Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Jones <michael_p_jones@apple.com>2022-04-26 21:00:35 +0300
committerMichael Jones <michael_p_jones@apple.com>2022-04-27 00:17:16 +0300
commitb82de02e7ce857e20b842a074c0068b146a9fd79 (patch)
treee1f00d1c2f3fa819e1025a302b660dcc3ade864e /intern/cycles/device
parent994da7077d4a683a122f40ad0c3b0585d4968fcc (diff)
Cycles: Enable inlining on Apple Silicon for 1.1x speedup
This is a stripped down version of D14645 without the scene specialisation optimisations. The two major changes in this patch are: - Enables more aggressive inlining on Apple Silicon resulting in a 1.1x speedup and 10% reduction in spill, at the cost of longer pipeline build times - Revival of shader binary archives through a new ShaderCache which is shared between MetalDevice instances using the same physical MTLDevice. This mitigates the extra compile times via explicit caching (rather than, as before, relying on the implicit system shader cache which can be purged without notice) Reviewed By: brecht Differential Revision: https://developer.blender.org/D14763
Diffstat (limited to 'intern/cycles/device')
-rw-r--r--intern/cycles/device/metal/device_impl.h9
-rw-r--r--intern/cycles/device/metal/device_impl.mm101
-rw-r--r--intern/cycles/device/metal/kernel.h103
-rw-r--r--intern/cycles/device/metal/kernel.mm849
-rw-r--r--intern/cycles/device/metal/queue.mm9
5 files changed, 536 insertions, 535 deletions
diff --git a/intern/cycles/device/metal/device_impl.h b/intern/cycles/device/metal/device_impl.h
index 27c58ce6d2f..d7311ee985f 100644
--- a/intern/cycles/device/metal/device_impl.h
+++ b/intern/cycles/device/metal/device_impl.h
@@ -28,7 +28,8 @@ class MetalDevice : public Device {
id<MTLCommandQueue> mtlGeneralCommandQueue = nil;
id<MTLArgumentEncoder> mtlAncillaryArgEncoder =
nil; /* encoder used for fetching device pointers from MTLBuffers */
- string source_used_for_compile[PSO_NUM];
+ string source[PSO_NUM];
+ string source_md5[PSO_NUM];
KernelParamsMetal launch_params = {0};
@@ -110,6 +111,12 @@ class MetalDevice : public Device {
virtual void build_bvh(BVH *bvh, Progress &progress, bool refit) override;
+ id<MTLLibrary> compile(string const &source);
+
+ const MetalKernelPipeline &get_best_pipeline(DeviceKernel kernel) const;
+
+ bool kernel_available(DeviceKernel kernel) const;
+
/* ------------------------------------------------------------------ */
/* low-level memory management */
diff --git a/intern/cycles/device/metal/device_impl.mm b/intern/cycles/device/metal/device_impl.mm
index c01f51fb506..7d1212cb37c 100644
--- a/intern/cycles/device/metal/device_impl.mm
+++ b/intern/cycles/device/metal/device_impl.mm
@@ -275,96 +275,44 @@ bool MetalDevice::load_kernels(const uint _kernel_features)
* active, but may still need to be rendered without motion blur if that isn't active as well. */
motion_blur = kernel_features & KERNEL_FEATURE_OBJECT_MOTION;
- NSError *error = NULL;
+ source[PSO_GENERIC] = get_source(kernel_features);
+ mtlLibrary[PSO_GENERIC] = compile(source[PSO_GENERIC]);
- for (int i = 0; i < PSO_NUM; i++) {
- if (mtlLibrary[i]) {
- [mtlLibrary[i] release];
- mtlLibrary[i] = nil;
- }
- }
+ MD5Hash md5;
+ md5.append(source[PSO_GENERIC]);
+ source_md5[PSO_GENERIC] = md5.get_hex();
+
+ metal_printf("Front-end compilation finished (generic)\n");
+
+ bool result = kernels.load(this, false);
+ reserve_local_memory(kernel_features);
+
+ return result;
+}
+
+id<MTLLibrary> MetalDevice::compile(string const &source)
+{
MTLCompileOptions *options = [[MTLCompileOptions alloc] init];
options.fastMathEnabled = YES;
if (@available(macOS 12.0, *)) {
options.languageVersion = MTLLanguageVersion2_4;
}
- else {
- return false;
- }
-
- string metalsrc;
-
- /* local helper: dump source to disk and return filepath */
- auto dump_source = [&](int kernel_type) -> string {
- string &source = source_used_for_compile[kernel_type];
- string metalsrc = path_cache_get(path_join("kernels",
- string_printf("%s.%s.metal",
- kernel_type_as_string(kernel_type),
- util_md5_string(source).c_str())));
- path_write_text(metalsrc, source);
- return metalsrc;
- };
-
- /* local helper: fetch the kernel source code, adjust it for specific PSO_.. kernel_type flavor,
- * then compile it into a MTLLibrary */
- auto fetch_and_compile_source = [&](int kernel_type) {
- /* Record the source used to compile this library, for hash building later. */
- string &source = source_used_for_compile[kernel_type];
-
- switch (kernel_type) {
- case PSO_GENERIC: {
- source = get_source(kernel_features);
- break;
- }
- case PSO_SPECIALISED: {
- /* PSO_SPECIALISED derives from PSO_GENERIC */
- string &generic_source = source_used_for_compile[PSO_GENERIC];
- if (generic_source.empty()) {
- generic_source = get_source(kernel_features);
- }
- source = "#define __KERNEL_METAL_USE_FUNCTION_SPECIALISATION__\n" + generic_source;
- break;
- }
- default:
- assert(0);
- }
- /* create MTLLibrary (front-end compilation) */
- mtlLibrary[kernel_type] = [mtlDevice newLibraryWithSource:@(source.c_str())
+ NSError *error = NULL;
+ id<MTLLibrary> mtlLibrary = [mtlDevice newLibraryWithSource:@(source.c_str())
options:options
error:&error];
- bool do_source_dump = (getenv("CYCLES_METAL_DUMP_SOURCE") != nullptr);
-
- if (!mtlLibrary[kernel_type] || do_source_dump) {
- string metalsrc = dump_source(kernel_type);
-
- if (!mtlLibrary[kernel_type]) {
- NSString *err = [error localizedDescription];
- set_error(string_printf("Failed to compile library:\n%s", [err UTF8String]));
-
- return false;
- }
- }
- return true;
- };
-
- fetch_and_compile_source(PSO_GENERIC);
-
- if (use_function_specialisation) {
- fetch_and_compile_source(PSO_SPECIALISED);
+ if (!mtlLibrary) {
+ NSString *err = [error localizedDescription];
+ set_error(string_printf("Failed to compile library:\n%s", [err UTF8String]));
}
- metal_printf("Front-end compilation finished\n");
-
- bool result = kernels.load(this, PSO_GENERIC);
-
[options release];
- reserve_local_memory(kernel_features);
- return result;
+ return mtlLibrary;
}
void MetalDevice::reserve_local_memory(const uint kernel_features)
@@ -671,6 +619,11 @@ device_ptr MetalDevice::mem_alloc_sub_ptr(device_memory &mem, size_t offset, siz
return 0;
}
+const MetalKernelPipeline &MetalDevice::get_best_pipeline(DeviceKernel kernel) const
+{
+ return kernels.get_best_pipeline(this, kernel);
+}
+
void MetalDevice::const_copy_to(const char *name, void *host, size_t size)
{
if (strcmp(name, "__data") == 0) {
diff --git a/intern/cycles/device/metal/kernel.h b/intern/cycles/device/metal/kernel.h
index b12491d820d..7e398d1cf41 100644
--- a/intern/cycles/device/metal/kernel.h
+++ b/intern/cycles/device/metal/kernel.h
@@ -54,98 +54,41 @@ enum {
const char *kernel_type_as_string(int kernel_type);
struct MetalKernelPipeline {
- void release()
- {
- if (pipeline) {
- [pipeline release];
- pipeline = nil;
- if (@available(macOS 11.0, *)) {
- for (int i = 0; i < METALRT_TABLE_NUM; i++) {
- if (intersection_func_table[i]) {
- [intersection_func_table[i] release];
- intersection_func_table[i] = nil;
- }
- }
- }
- }
- if (function) {
- [function release];
- function = nil;
- }
- if (@available(macOS 11.0, *)) {
- for (int i = 0; i < METALRT_TABLE_NUM; i++) {
- if (intersection_func_table[i]) {
- [intersection_func_table[i] release];
- }
- }
- }
- }
- bool loaded = false;
- id<MTLFunction> function = nil;
- id<MTLComputePipelineState> pipeline = nil;
-
- API_AVAILABLE(macos(11.0))
- id<MTLIntersectionFunctionTable> intersection_func_table[METALRT_TABLE_NUM] = {nil};
-};
+ void compile();
-struct MetalKernelLoadDesc {
- int pso_index = 0;
- const char *function_name = nullptr;
- int kernel_index = 0;
- int threads_per_threadgroup = 0;
- MTLFunctionConstantValues *constant_values = nullptr;
- NSArray *linked_functions = nullptr;
-
- struct IntersectorFunctions {
- NSArray *defaults;
- NSArray *shadow;
- NSArray *local;
- NSArray *operator[](int index) const
- {
- if (index == METALRT_TABLE_DEFAULT)
- return defaults;
- if (index == METALRT_TABLE_SHADOW)
- return shadow;
- return local;
- }
- } intersector_functions = {nullptr};
-};
-
-/* Metal kernel and associate occupancy information. */
-class MetalDeviceKernel {
- public:
- ~MetalDeviceKernel();
+ id<MTLLibrary> mtlLibrary = nil;
+ bool scene_specialized;
+ string source_md5;
- bool load(MetalDevice *device, MetalKernelLoadDesc const &desc, class MD5Hash const &md5);
+ bool use_metalrt;
+ bool metalrt_hair;
+ bool metalrt_hair_thick;
+ bool metalrt_pointcloud;
- void mark_loaded(int pso_index)
- {
- pso[pso_index].loaded = true;
- }
+ int threads_per_threadgroup;
- int get_num_threads_per_block() const
- {
- return num_threads_per_block;
- }
- const MetalKernelPipeline &get_pso() const;
-
- double load_duration = 0.0;
+ DeviceKernel device_kernel;
+ bool loaded = false;
+ id<MTLDevice> mtlDevice = nil;
+ id<MTLFunction> function = nil;
+ id<MTLComputePipelineState> pipeline = nil;
+ int num_threads_per_block = 0;
- private:
- MetalKernelPipeline pso[PSO_NUM];
+ string error_str;
- int num_threads_per_block = 0;
+ API_AVAILABLE(macos(11.0))
+ id<MTLIntersectionFunctionTable> intersection_func_table[METALRT_TABLE_NUM] = {nil};
+ id<MTLFunction> rt_intersection_function[METALRT_FUNC_NUM] = {nil};
};
/* Cache of Metal kernels for each DeviceKernel. */
class MetalDeviceKernels {
public:
- bool load(MetalDevice *device, int kernel_type);
- bool available(DeviceKernel kernel) const;
- const MetalDeviceKernel &get(DeviceKernel kernel) const;
-
- MetalDeviceKernel kernels_[DEVICE_KERNEL_NUM];
+ bool load(MetalDevice *device, bool scene_specialized);
+ bool available(const MetalDevice *device, DeviceKernel kernel) const;
+ const MetalKernelPipeline &get_best_pipeline(const MetalDevice *device,
+ DeviceKernel kernel) const;
id<MTLFunction> rt_intersection_funcs[PSO_NUM][METALRT_FUNC_NUM] = {{nil}};
diff --git a/intern/cycles/device/metal/kernel.mm b/intern/cycles/device/metal/kernel.mm
index 9555ca03c8e..44a5e23d00f 100644
--- a/intern/cycles/device/metal/kernel.mm
+++ b/intern/cycles/device/metal/kernel.mm
@@ -9,6 +9,7 @@
# include "util/path.h"
# include "util/tbb.h"
# include "util/time.h"
+# include "util/unique_ptr.h"
CCL_NAMESPACE_BEGIN
@@ -28,82 +29,370 @@ const char *kernel_type_as_string(int kernel_type)
return "";
}
-MetalDeviceKernel::~MetalDeviceKernel()
+bool kernel_has_intersection(DeviceKernel device_kernel)
{
- for (int i = 0; i < PSO_NUM; i++) {
- pso[i].release();
+ return (device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
+ device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW ||
+ device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE ||
+ device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK ||
+ device_kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE);
+}
+
+struct ShaderCache {
+ ShaderCache(id<MTLDevice> _mtlDevice) : mtlDevice(_mtlDevice)
+ {
+ }
+ ~ShaderCache();
+
+ /* Get the fastest available pipeline for the specified kernel. */
+ MetalKernelPipeline *get_best_pipeline(DeviceKernel kernel, const MetalDevice *device);
+
+ /* Non-blocking request for a kernel, optionally specialized to the scene being rendered by
+ * device. */
+ void load_kernel(DeviceKernel kernel, MetalDevice *device, bool scene_specialized);
+
+ void wait_for_all();
+
+ private:
+ friend ShaderCache *get_shader_cache(id<MTLDevice> mtlDevice);
+
+ void compile_thread_func(int thread_index);
+
+ using PipelineCollection = std::vector<unique_ptr<MetalKernelPipeline>>;
+
+ struct PipelineRequest {
+ MetalKernelPipeline *pipeline = nullptr;
+ std::function<void(MetalKernelPipeline *)> completionHandler;
+ };
+
+ std::mutex cache_mutex;
+
+ PipelineCollection pipelines[DEVICE_KERNEL_NUM];
+ id<MTLDevice> mtlDevice;
+
+ bool running = false;
+ std::condition_variable cond_var;
+ std::deque<PipelineRequest> request_queue;
+ std::vector<std::thread> compile_threads;
+ std::atomic_int incomplete_requests = 0;
+};
+
+std::mutex g_shaderCacheMutex;
+std::map<id<MTLDevice>, unique_ptr<ShaderCache>> g_shaderCache;
+
+ShaderCache *get_shader_cache(id<MTLDevice> mtlDevice)
+{
+ thread_scoped_lock lock(g_shaderCacheMutex);
+ auto it = g_shaderCache.find(mtlDevice);
+ if (it != g_shaderCache.end()) {
+ return it->second.get();
+ }
+
+ g_shaderCache[mtlDevice] = make_unique<ShaderCache>(mtlDevice);
+ return g_shaderCache[mtlDevice].get();
+}
+
+ShaderCache::~ShaderCache()
+{
+ running = false;
+ cond_var.notify_all();
+ for (auto &thread : compile_threads) {
+ thread.join();
+ }
+}
+
+void ShaderCache::wait_for_all()
+{
+ while (incomplete_requests > 0) {
+ std::this_thread::sleep_for(std::chrono::milliseconds(100));
}
}
-bool MetalDeviceKernel::load(MetalDevice *device,
- MetalKernelLoadDesc const &desc_in,
- MD5Hash const &md5)
+void ShaderCache::compile_thread_func(int thread_index)
{
- __block MetalKernelLoadDesc const desc(desc_in);
- if (desc.kernel_index == DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
+ while (1) {
+
+ /* wait for / acquire next request */
+ PipelineRequest request;
+ {
+ thread_scoped_lock lock(cache_mutex);
+ cond_var.wait(lock, [&] { return !running || !request_queue.empty(); });
+ if (!running) {
+ break;
+ }
+
+ if (!request_queue.empty()) {
+ request = request_queue.front();
+ request_queue.pop_front();
+ }
+ }
+
+ /* service request */
+ if (request.pipeline) {
+ request.pipeline->compile();
+ incomplete_requests--;
+ }
+ }
+}
+
+void ShaderCache::load_kernel(DeviceKernel device_kernel,
+ MetalDevice *device,
+ bool scene_specialized)
+{
+ {
+ /* create compiler threads on first run */
+ thread_scoped_lock lock(cache_mutex);
+ if (compile_threads.empty()) {
+ running = true;
+ for (int i = 0; i < max_mtlcompiler_threads; i++) {
+ compile_threads.push_back(std::thread([&] { compile_thread_func(i); }));
+ }
+ }
+ }
+
+ if (device_kernel == DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
/* skip megakernel */
- return true;
+ return;
}
- bool use_binary_archive = true;
- if (device->device_vendor == METAL_GPU_APPLE) {
- /* Workaround for T94142: Cycles Metal crash with simultaneous viewport and final render */
- use_binary_archive = false;
+ if (scene_specialized) {
+ /* Only specialize kernels where it can make an impact. */
+ if (device_kernel < DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
+ device_kernel > DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
+ return;
+ }
}
- if (auto str = getenv("CYCLES_METAL_DISABLE_BINARY_ARCHIVES")) {
- use_binary_archive = (atoi(str) == 0);
+ {
+ /* check whether the kernel has already been requested / cached */
+ thread_scoped_lock lock(cache_mutex);
+ for (auto &pipeline : pipelines[device_kernel]) {
+ if (scene_specialized) {
+ if (pipeline->source_md5 == device->source_md5[PSO_SPECIALISED]) {
+ /* we already requested a pipeline that is specialised for this kernel data */
+ metal_printf("Specialized kernel already requested (%s)\n",
+ device_kernel_as_string(device_kernel));
+ return;
+ }
+ }
+ else {
+ if (pipeline->source_md5 == device->source_md5[PSO_GENERIC]) {
+ /* we already requested a generic pipeline for this kernel */
+ metal_printf("Generic kernel already requested (%s)\n",
+ device_kernel_as_string(device_kernel));
+ return;
+ }
+ }
+ }
}
- id<MTLBinaryArchive> archive = nil;
- string metalbin_path;
- if (use_binary_archive) {
- NSProcessInfo *processInfo = [NSProcessInfo processInfo];
- string osVersion = [[processInfo operatingSystemVersionString] UTF8String];
- MD5Hash local_md5(md5);
- local_md5.append(osVersion);
- string metalbin_name = string(desc.function_name) + "." + local_md5.get_hex() +
- to_string(desc.pso_index) + ".bin";
- metalbin_path = path_cache_get(path_join("kernels", metalbin_name));
- path_create_directories(metalbin_path);
+ incomplete_requests++;
+
+ PipelineRequest request;
+ request.pipeline = new MetalKernelPipeline;
+ request.pipeline->scene_specialized = scene_specialized;
+ request.pipeline->mtlDevice = mtlDevice;
+ request.pipeline->source_md5 =
+ device->source_md5[scene_specialized ? PSO_SPECIALISED : PSO_GENERIC];
+ request.pipeline->mtlLibrary =
+ device->mtlLibrary[scene_specialized ? PSO_SPECIALISED : PSO_GENERIC];
+ request.pipeline->device_kernel = device_kernel;
+ request.pipeline->threads_per_threadgroup = device->max_threads_per_threadgroup;
+
+ /* metalrt options */
+ request.pipeline->use_metalrt = device->use_metalrt;
+ request.pipeline->metalrt_hair = device->kernel_features & KERNEL_FEATURE_HAIR;
+ request.pipeline->metalrt_hair_thick = device->kernel_features & KERNEL_FEATURE_HAIR_THICK;
+ request.pipeline->metalrt_pointcloud = device->kernel_features & KERNEL_FEATURE_POINTCLOUD;
+
+ {
+ thread_scoped_lock lock(cache_mutex);
+ pipelines[device_kernel].push_back(unique_ptr<MetalKernelPipeline>(request.pipeline));
+ request_queue.push_back(request);
+ }
+ cond_var.notify_one();
+}
- if (path_exists(metalbin_path) && use_binary_archive) {
- if (@available(macOS 11.0, *)) {
- MTLBinaryArchiveDescriptor *archiveDesc = [[MTLBinaryArchiveDescriptor alloc] init];
- archiveDesc.url = [NSURL fileURLWithPath:@(metalbin_path.c_str())];
- archive = [device->mtlDevice newBinaryArchiveWithDescriptor:archiveDesc error:nil];
- [archiveDesc release];
+MetalKernelPipeline *ShaderCache::get_best_pipeline(DeviceKernel kernel, const MetalDevice *device)
+{
+ thread_scoped_lock lock(cache_mutex);
+ auto &collection = pipelines[kernel];
+ if (collection.empty()) {
+ return nullptr;
+ }
+
+ /* metalrt options */
+ bool use_metalrt = device->use_metalrt;
+ bool metalrt_hair = device->kernel_features & KERNEL_FEATURE_HAIR;
+ bool metalrt_hair_thick = device->kernel_features & KERNEL_FEATURE_HAIR_THICK;
+ bool metalrt_pointcloud = device->kernel_features & KERNEL_FEATURE_POINTCLOUD;
+
+ MetalKernelPipeline *best_pipeline = nullptr;
+ for (auto &pipeline : collection) {
+ if (!pipeline->loaded) {
+ /* still loading - ignore */
+ continue;
+ }
+
+ if (pipeline->use_metalrt != use_metalrt || pipeline->metalrt_hair != metalrt_hair ||
+ pipeline->metalrt_hair_thick != metalrt_hair_thick ||
+ pipeline->metalrt_pointcloud != metalrt_pointcloud) {
+ /* wrong combination of metalrt options */
+ continue;
+ }
+
+ if (pipeline->scene_specialized) {
+ if (pipeline->source_md5 == device->source_md5[PSO_SPECIALISED]) {
+ best_pipeline = pipeline.get();
}
}
+ else if (!best_pipeline) {
+ best_pipeline = pipeline.get();
+ }
+ }
+
+ return best_pipeline;
+}
+
+void MetalKernelPipeline::compile()
+{
+ int pso_type = scene_specialized ? PSO_SPECIALISED : PSO_GENERIC;
+
+ const std::string function_name = std::string("cycles_metal_") +
+ device_kernel_as_string(device_kernel);
+
+ int threads_per_threadgroup = this->threads_per_threadgroup;
+ if (device_kernel > DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL &&
+ device_kernel < DEVICE_KERNEL_INTEGRATOR_RESET) {
+ /* Always use 512 for the sorting kernels */
+ threads_per_threadgroup = 512;
}
- NSString *entryPoint = [@(desc.function_name) copy];
+ NSString *entryPoint = [@(function_name.c_str()) copy];
NSError *error = NULL;
if (@available(macOS 11.0, *)) {
MTLFunctionDescriptor *func_desc = [MTLIntersectionFunctionDescriptor functionDescriptor];
func_desc.name = entryPoint;
- if (desc.constant_values) {
- func_desc.constantValues = desc.constant_values;
- }
- pso[desc.pso_index].function = [device->mtlLibrary[desc.pso_index]
- newFunctionWithDescriptor:func_desc
- error:&error];
+ function = [mtlLibrary newFunctionWithDescriptor:func_desc error:&error];
}
+
[entryPoint release];
- if (pso[desc.pso_index].function == nil) {
+ if (function == nil) {
NSString *err = [error localizedDescription];
string errors = [err UTF8String];
+ metal_printf("Error getting function \"%s\": %s", function_name.c_str(), errors.c_str());
+ return;
+ }
- device->set_error(
- string_printf("Error getting function \"%s\": %s", desc.function_name, errors.c_str()));
- return false;
+ function.label = [entryPoint copy];
+
+ if (use_metalrt) {
+ if (@available(macOS 11.0, *)) {
+ /* create the id<MTLFunction> for each intersection function */
+ const char *function_names[] = {
+ "__anyhit__cycles_metalrt_visibility_test_tri",
+ "__anyhit__cycles_metalrt_visibility_test_box",
+ "__anyhit__cycles_metalrt_shadow_all_hit_tri",
+ "__anyhit__cycles_metalrt_shadow_all_hit_box",
+ "__anyhit__cycles_metalrt_local_hit_tri",
+ "__anyhit__cycles_metalrt_local_hit_box",
+ "__intersection__curve_ribbon",
+ "__intersection__curve_ribbon_shadow",
+ "__intersection__curve_all",
+ "__intersection__curve_all_shadow",
+ "__intersection__point",
+ "__intersection__point_shadow",
+ };
+ assert(sizeof(function_names) / sizeof(function_names[0]) == METALRT_FUNC_NUM);
+
+ MTLFunctionDescriptor *desc = [MTLIntersectionFunctionDescriptor functionDescriptor];
+ for (int i = 0; i < METALRT_FUNC_NUM; i++) {
+ const char *function_name = function_names[i];
+ desc.name = [@(function_name) copy];
+
+ NSError *error = NULL;
+ rt_intersection_function[i] = [mtlLibrary newFunctionWithDescriptor:desc error:&error];
+
+ if (rt_intersection_function[i] == nil) {
+ NSString *err = [error localizedDescription];
+ string errors = [err UTF8String];
+
+ error_str = string_printf(
+ "Error getting intersection function \"%s\": %s", function_name, errors.c_str());
+ break;
+ }
+
+ rt_intersection_function[i].label = [@(function_name) copy];
+ }
+ }
}
- pso[desc.pso_index].function.label = [@(desc.function_name) copy];
+ NSArray *table_functions[METALRT_TABLE_NUM] = {nil};
+ NSArray *linked_functions = nil;
+
+ if (use_metalrt) {
+ id<MTLFunction> curve_intersect_default = nil;
+ id<MTLFunction> curve_intersect_shadow = nil;
+ id<MTLFunction> point_intersect_default = nil;
+ id<MTLFunction> point_intersect_shadow = nil;
+ if (metalrt_hair) {
+ /* Add curve intersection programs. */
+ if (metalrt_hair_thick) {
+ /* Slower programs for thick hair since that also slows down ribbons.
+ * Ideally this should not be needed. */
+ curve_intersect_default = rt_intersection_function[METALRT_FUNC_CURVE_ALL];
+ curve_intersect_shadow = rt_intersection_function[METALRT_FUNC_CURVE_ALL_SHADOW];
+ }
+ else {
+ curve_intersect_default = rt_intersection_function[METALRT_FUNC_CURVE_RIBBON];
+ curve_intersect_shadow = rt_intersection_function[METALRT_FUNC_CURVE_RIBBON_SHADOW];
+ }
+ }
+ if (metalrt_pointcloud) {
+ point_intersect_default = rt_intersection_function[METALRT_FUNC_POINT];
+ point_intersect_shadow = rt_intersection_function[METALRT_FUNC_POINT_SHADOW];
+ }
+ table_functions[METALRT_TABLE_DEFAULT] = [NSArray
+ arrayWithObjects:rt_intersection_function[METALRT_FUNC_DEFAULT_TRI],
+ curve_intersect_default ?
+ curve_intersect_default :
+ rt_intersection_function[METALRT_FUNC_DEFAULT_BOX],
+ point_intersect_default ?
+ point_intersect_default :
+ rt_intersection_function[METALRT_FUNC_DEFAULT_BOX],
+ nil];
+ table_functions[METALRT_TABLE_SHADOW] = [NSArray
+ arrayWithObjects:rt_intersection_function[METALRT_FUNC_SHADOW_TRI],
+ curve_intersect_shadow ?
+ curve_intersect_shadow :
+ rt_intersection_function[METALRT_FUNC_SHADOW_BOX],
+ point_intersect_shadow ?
+ point_intersect_shadow :
+ rt_intersection_function[METALRT_FUNC_SHADOW_BOX],
+ nil];
+ table_functions[METALRT_TABLE_LOCAL] = [NSArray
+ arrayWithObjects:rt_intersection_function[METALRT_FUNC_LOCAL_TRI],
+ rt_intersection_function[METALRT_FUNC_LOCAL_BOX],
+ rt_intersection_function[METALRT_FUNC_LOCAL_BOX],
+ nil];
+
+ NSMutableSet *unique_functions = [NSMutableSet
+ setWithArray:table_functions[METALRT_TABLE_DEFAULT]];
+ [unique_functions addObjectsFromArray:table_functions[METALRT_TABLE_SHADOW]];
+ [unique_functions addObjectsFromArray:table_functions[METALRT_TABLE_LOCAL]];
+
+ if (kernel_has_intersection(device_kernel)) {
+ linked_functions = [[NSArray arrayWithArray:[unique_functions allObjects]]
+ sortedArrayUsingComparator:^NSComparisonResult(id<MTLFunction> f1, id<MTLFunction> f2) {
+ return [f1.label compare:f2.label];
+ }];
+ }
+ unique_functions = nil;
+ }
- __block MTLComputePipelineDescriptor *computePipelineStateDescriptor =
+ MTLComputePipelineDescriptor *computePipelineStateDescriptor =
[[MTLComputePipelineDescriptor alloc] init];
computePipelineStateDescriptor.buffers[0].mutability = MTLMutabilityImmutable;
@@ -111,52 +400,86 @@ bool MetalDeviceKernel::load(MetalDevice *device,
computePipelineStateDescriptor.buffers[2].mutability = MTLMutabilityImmutable;
if (@available(macos 10.14, *)) {
- computePipelineStateDescriptor.maxTotalThreadsPerThreadgroup = desc.threads_per_threadgroup;
+ computePipelineStateDescriptor.maxTotalThreadsPerThreadgroup = threads_per_threadgroup;
}
computePipelineStateDescriptor.threadGroupSizeIsMultipleOfThreadExecutionWidth = true;
- computePipelineStateDescriptor.computeFunction = pso[desc.pso_index].function;
+ computePipelineStateDescriptor.computeFunction = function;
+
if (@available(macOS 11.0, *)) {
/* Attach the additional functions to an MTLLinkedFunctions object */
- if (desc.linked_functions) {
+ if (linked_functions) {
computePipelineStateDescriptor.linkedFunctions = [[MTLLinkedFunctions alloc] init];
- computePipelineStateDescriptor.linkedFunctions.functions = desc.linked_functions;
+ computePipelineStateDescriptor.linkedFunctions.functions = linked_functions;
}
-
computePipelineStateDescriptor.maxCallStackDepth = 1;
+ if (use_metalrt) {
+ computePipelineStateDescriptor.maxCallStackDepth = 8;
+ }
}
- /* Create a new Compute pipeline state object */
MTLPipelineOption pipelineOptions = MTLPipelineOptionNone;
- bool creating_new_archive = false;
+ bool use_binary_archive = true;
+ if (auto str = getenv("CYCLES_METAL_DISABLE_BINARY_ARCHIVES")) {
+ use_binary_archive = (atoi(str) == 0);
+ }
+
+ id<MTLBinaryArchive> archive = nil;
+ string metalbin_path;
+ string metalbin_name;
+ if (use_binary_archive) {
+ NSProcessInfo *processInfo = [NSProcessInfo processInfo];
+ string osVersion = [[processInfo operatingSystemVersionString] UTF8String];
+ MD5Hash local_md5;
+ local_md5.append(source_md5);
+ local_md5.append(osVersion);
+ local_md5.append((uint8_t *)&this->threads_per_threadgroup,
+ sizeof(this->threads_per_threadgroup));
+
+ string options;
+ if (use_metalrt && kernel_has_intersection(device_kernel)) {
+ /* incorporate any MetalRT specialisations into the archive name */
+ options += string_printf(".hair_%d.hair_thick_%d.pointcloud_%d",
+ metalrt_hair ? 1 : 0,
+ metalrt_hair_thick ? 1 : 0,
+ metalrt_pointcloud ? 1 : 0);
+ }
+
+ /* Replace non-alphanumerical characters with underscores. */
+ string device_name = [mtlDevice.name UTF8String];
+ for (char &c : device_name) {
+ if ((c < '0' || c > '9') && (c < 'a' || c > 'z') && (c < 'A' || c > 'Z')) {
+ c = '_';
+ }
+ }
+
+ metalbin_name = device_name;
+ metalbin_name = path_join(metalbin_name, device_kernel_as_string(device_kernel));
+ metalbin_name = path_join(metalbin_name, kernel_type_as_string(pso_type));
+ metalbin_name = path_join(metalbin_name, local_md5.get_hex() + options + ".bin");
+
+ metalbin_path = path_cache_get(path_join("kernels", metalbin_name));
+ path_create_directories(metalbin_path);
+
+ if (path_exists(metalbin_path) && use_binary_archive) {
+ if (@available(macOS 11.0, *)) {
+ MTLBinaryArchiveDescriptor *archiveDesc = [[MTLBinaryArchiveDescriptor alloc] init];
+ archiveDesc.url = [NSURL fileURLWithPath:@(metalbin_path.c_str())];
+ archive = [mtlDevice newBinaryArchiveWithDescriptor:archiveDesc error:nil];
+ [archiveDesc release];
+ }
+ }
+ }
+
+ __block bool creating_new_archive = false;
if (@available(macOS 11.0, *)) {
if (use_binary_archive) {
if (!archive) {
MTLBinaryArchiveDescriptor *archiveDesc = [[MTLBinaryArchiveDescriptor alloc] init];
archiveDesc.url = nil;
- archive = [device->mtlDevice newBinaryArchiveWithDescriptor:archiveDesc error:nil];
+ archive = [mtlDevice newBinaryArchiveWithDescriptor:archiveDesc error:nil];
creating_new_archive = true;
-
- double starttime = time_dt();
-
- if (![archive addComputePipelineFunctionsWithDescriptor:computePipelineStateDescriptor
- error:&error]) {
- NSString *errStr = [error localizedDescription];
- metal_printf("Failed to add PSO to archive:\n%s\n",
- errStr ? [errStr UTF8String] : "nil");
- }
- else {
- double duration = time_dt() - starttime;
- metal_printf("%2d | %-55s | %7.2fs\n",
- desc.kernel_index,
- device_kernel_as_string((DeviceKernel)desc.kernel_index),
- duration);
-
- if (desc.pso_index == PSO_GENERIC) {
- this->load_duration = duration;
- }
- }
}
computePipelineStateDescriptor.binaryArchives = [NSArray arrayWithObjects:archive, nil];
pipelineOptions = MTLPipelineOptionFailOnBinaryArchiveMiss;
@@ -170,17 +493,14 @@ bool MetalDeviceKernel::load(MetalDevice *device,
MTLComputePipelineReflection *reflection,
NSError *error) {
bool recreate_archive = false;
- if (computePipelineState == nil && archive && !creating_new_archive) {
-
- assert(0);
-
+ if (computePipelineState == nil && archive) {
NSString *errStr = [error localizedDescription];
metal_printf(
"Failed to create compute pipeline state \"%s\" from archive - attempting recreation... "
"(error: %s)\n",
- device_kernel_as_string((DeviceKernel)desc.kernel_index),
+ device_kernel_as_string((DeviceKernel)device_kernel),
errStr ? [errStr UTF8String] : "nil");
- computePipelineState = [device->mtlDevice
+ computePipelineState = [mtlDevice
newComputePipelineStateWithDescriptor:computePipelineStateDescriptor
options:MTLPipelineOptionNone
reflection:nullptr
@@ -192,32 +512,23 @@ bool MetalDeviceKernel::load(MetalDevice *device,
if (computePipelineState == nil) {
NSString *errStr = [error localizedDescription];
- device->set_error(string_printf("Failed to create compute pipeline state \"%s\", error: \n",
- device_kernel_as_string((DeviceKernel)desc.kernel_index)) +
- (errStr ? [errStr UTF8String] : "nil"));
- metal_printf("%2d | %-55s | %7.2fs | FAILED!\n",
- desc.kernel_index,
- device_kernel_as_string((DeviceKernel)desc.kernel_index),
+ error_str = string_printf("Failed to create compute pipeline state \"%s\", error: \n",
+ device_kernel_as_string((DeviceKernel)device_kernel));
+ error_str += (errStr ? [errStr UTF8String] : "nil");
+ metal_printf("%16s | %2d | %-55s | %7.2fs | FAILED!\n",
+ kernel_type_as_string(pso_type),
+ device_kernel,
+ device_kernel_as_string((DeviceKernel)device_kernel),
duration);
return;
}
- pso[desc.pso_index].pipeline = computePipelineState;
- num_threads_per_block = round_down(computePipelineState.maxTotalThreadsPerThreadgroup,
- computePipelineState.threadExecutionWidth);
+ int num_threads_per_block = round_down(computePipelineState.maxTotalThreadsPerThreadgroup,
+ computePipelineState.threadExecutionWidth);
num_threads_per_block = std::max(num_threads_per_block,
(int)computePipelineState.threadExecutionWidth);
-
- if (!use_binary_archive) {
- metal_printf("%2d | %-55s | %7.2fs\n",
- desc.kernel_index,
- device_kernel_as_string((DeviceKernel)desc.kernel_index),
- duration);
-
- if (desc.pso_index == PSO_GENERIC) {
- this->load_duration = duration;
- }
- }
+ this->pipeline = computePipelineState;
+ this->num_threads_per_block = num_threads_per_block;
if (@available(macOS 11.0, *)) {
if (creating_new_archive || recreate_archive) {
@@ -228,304 +539,90 @@ bool MetalDeviceKernel::load(MetalDevice *device,
}
}
}
+ };
- [computePipelineStateDescriptor release];
- computePipelineStateDescriptor = nil;
-
- if (device->use_metalrt && desc.linked_functions) {
- for (int table = 0; table < METALRT_TABLE_NUM; table++) {
- if (@available(macOS 11.0, *)) {
- MTLIntersectionFunctionTableDescriptor *ift_desc =
- [[MTLIntersectionFunctionTableDescriptor alloc] init];
- ift_desc.functionCount = desc.intersector_functions[table].count;
-
- pso[desc.pso_index].intersection_func_table[table] = [pso[desc.pso_index].pipeline
- newIntersectionFunctionTableWithDescriptor:ift_desc];
-
- /* Finally write the function handles into this pipeline's table */
- for (int i = 0; i < 2; i++) {
- id<MTLFunctionHandle> handle = [pso[desc.pso_index].pipeline
- functionHandleWithFunction:desc.intersector_functions[table][i]];
- [pso[desc.pso_index].intersection_func_table[table] setFunction:handle atIndex:i];
- }
+ /* Block on load to ensure we continue with a valid kernel function */
+ if (creating_new_archive) {
+ starttime = time_dt();
+ NSError *error;
+ if (![archive addComputePipelineFunctionsWithDescriptor:computePipelineStateDescriptor
+ error:&error]) {
+ NSString *errStr = [error localizedDescription];
+ metal_printf("Failed to add PSO to archive:\n%s\n", errStr ? [errStr UTF8String] : "nil");
+ }
+ }
+ id<MTLComputePipelineState> pipeline = [mtlDevice
+ newComputePipelineStateWithDescriptor:computePipelineStateDescriptor
+ options:pipelineOptions
+ reflection:nullptr
+ error:&error];
+ completionHandler(pipeline, nullptr, error);
+
+ this->loaded = true;
+ [computePipelineStateDescriptor release];
+ computePipelineStateDescriptor = nil;
+
+ if (use_metalrt && linked_functions) {
+ for (int table = 0; table < METALRT_TABLE_NUM; table++) {
+ if (@available(macOS 11.0, *)) {
+ MTLIntersectionFunctionTableDescriptor *ift_desc =
+ [[MTLIntersectionFunctionTableDescriptor alloc] init];
+ ift_desc.functionCount = table_functions[table].count;
+ intersection_func_table[table] = [this->pipeline
+ newIntersectionFunctionTableWithDescriptor:ift_desc];
+
+ /* Finally write the function handles into this pipeline's table */
+ for (int i = 0; i < 2; i++) {
+ id<MTLFunctionHandle> handle = [pipeline
+ functionHandleWithFunction:table_functions[table][i]];
+ [intersection_func_table[table] setFunction:handle atIndex:i];
}
}
}
+ }
- mark_loaded(desc.pso_index);
- };
+ double duration = time_dt() - starttime;
- if (desc.pso_index == PSO_SPECIALISED) {
- /* Asynchronous load */
- dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
- NSError *error;
- id<MTLComputePipelineState> pipeline = [device->mtlDevice
- newComputePipelineStateWithDescriptor:computePipelineStateDescriptor
- options:pipelineOptions
- reflection:nullptr
- error:&error];
- completionHandler(pipeline, nullptr, error);
- });
+ if (!use_binary_archive) {
+ metal_printf("%16s | %2d | %-55s | %7.2fs\n",
+ kernel_type_as_string(pso_type),
+ int(device_kernel),
+ device_kernel_as_string(device_kernel),
+ duration);
}
else {
- /* Block on load to ensure we continue with a valid kernel function */
- id<MTLComputePipelineState> pipeline = [device->mtlDevice
- newComputePipelineStateWithDescriptor:computePipelineStateDescriptor
- options:pipelineOptions
- reflection:nullptr
- error:&error];
- completionHandler(pipeline, nullptr, error);
+ metal_printf("%16s | %2d | %-55s | %7.2fs | %s: %s\n",
+ kernel_type_as_string(pso_type),
+ device_kernel,
+ device_kernel_as_string((DeviceKernel)device_kernel),
+ duration,
+ creating_new_archive ? " new" : "load",
+ metalbin_name.c_str());
}
-
- return true;
-}
-
-const MetalKernelPipeline &MetalDeviceKernel::get_pso() const
-{
- if (pso[PSO_SPECIALISED].loaded) {
- return pso[PSO_SPECIALISED];
- }
-
- assert(pso[PSO_GENERIC].loaded);
- return pso[PSO_GENERIC];
}
-bool MetalDeviceKernels::load(MetalDevice *device, int kernel_type)
+bool MetalDeviceKernels::load(MetalDevice *device, bool scene_specialized)
{
- bool any_error = false;
-
- MD5Hash md5;
-
- /* Build the function constant table */
- MTLFunctionConstantValues *constant_values = nullptr;
- if (kernel_type == PSO_SPECIALISED) {
- constant_values = [MTLFunctionConstantValues new];
-
-# define KERNEL_FILM(_type, name) \
- [constant_values setConstantValue:&data.film.name \
- type:get_MTLDataType_##_type() \
- atIndex:KernelData_film_##name]; \
- md5.append((uint8_t *)&data.film.name, sizeof(data.film.name));
-
-# define KERNEL_BACKGROUND(_type, name) \
- [constant_values setConstantValue:&data.background.name \
- type:get_MTLDataType_##_type() \
- atIndex:KernelData_background_##name]; \
- md5.append((uint8_t *)&data.background.name, sizeof(data.background.name));
-
-# define KERNEL_INTEGRATOR(_type, name) \
- [constant_values setConstantValue:&data.integrator.name \
- type:get_MTLDataType_##_type() \
- atIndex:KernelData_integrator_##name]; \
- md5.append((uint8_t *)&data.integrator.name, sizeof(data.integrator.name));
-
-# define KERNEL_BVH(_type, name) \
- [constant_values setConstantValue:&data.bvh.name \
- type:get_MTLDataType_##_type() \
- atIndex:KernelData_bvh_##name]; \
- md5.append((uint8_t *)&data.bvh.name, sizeof(data.bvh.name));
-
- /* METAL_WIP: populate constant_values based on KernelData */
- assert(0);
- /*
- const KernelData &data = device->launch_params.data;
- # include "kernel/types/background.h"
- # include "kernel/types/bvh.h"
- # include "kernel/types/film.h"
- # include "kernel/types/integrator.h"
- */
+ auto shader_cache = get_shader_cache(device->mtlDevice);
+ for (int i = 0; i < DEVICE_KERNEL_NUM; i++) {
+ shader_cache->load_kernel((DeviceKernel)i, device, scene_specialized);
}
- if (device->use_metalrt) {
- if (@available(macOS 11.0, *)) {
- /* create the id<MTLFunction> for each intersection function */
- const char *function_names[] = {
- "__anyhit__cycles_metalrt_visibility_test_tri",
- "__anyhit__cycles_metalrt_visibility_test_box",
- "__anyhit__cycles_metalrt_shadow_all_hit_tri",
- "__anyhit__cycles_metalrt_shadow_all_hit_box",
- "__anyhit__cycles_metalrt_local_hit_tri",
- "__anyhit__cycles_metalrt_local_hit_box",
- "__intersection__curve_ribbon",
- "__intersection__curve_ribbon_shadow",
- "__intersection__curve_all",
- "__intersection__curve_all_shadow",
- "__intersection__point",
- "__intersection__point_shadow",
- };
- assert(sizeof(function_names) / sizeof(function_names[0]) == METALRT_FUNC_NUM);
-
- MTLFunctionDescriptor *desc = [MTLIntersectionFunctionDescriptor functionDescriptor];
- if (kernel_type == PSO_SPECIALISED) {
- desc.constantValues = constant_values;
- }
- for (int i = 0; i < METALRT_FUNC_NUM; i++) {
- const char *function_name = function_names[i];
- desc.name = [@(function_name) copy];
-
- NSError *error = NULL;
- rt_intersection_funcs[kernel_type][i] = [device->mtlLibrary[kernel_type]
- newFunctionWithDescriptor:desc
- error:&error];
-
- if (rt_intersection_funcs[kernel_type][i] == nil) {
- NSString *err = [error localizedDescription];
- string errors = [err UTF8String];
-
- device->set_error(string_printf(
- "Error getting intersection function \"%s\": %s", function_name, errors.c_str()));
- any_error = true;
- break;
- }
-
- rt_intersection_funcs[kernel_type][i].label = [@(function_name) copy];
- }
- }
- }
- md5.append(device->source_used_for_compile[kernel_type]);
-
- string hash = md5.get_hex();
- if (loaded_md5[kernel_type] == hash) {
- return true;
+ if (!scene_specialized || getenv("CYCLES_METAL_PROFILING")) {
+ shader_cache->wait_for_all();
}
-
- if (!any_error) {
- NSArray *table_functions[METALRT_TABLE_NUM] = {nil};
- NSArray *function_list = nil;
-
- if (device->use_metalrt) {
- id<MTLFunction> curve_intersect_default = nil;
- id<MTLFunction> curve_intersect_shadow = nil;
- id<MTLFunction> point_intersect_default = nil;
- id<MTLFunction> point_intersect_shadow = nil;
- if (device->kernel_features & KERNEL_FEATURE_HAIR) {
- /* Add curve intersection programs. */
- if (device->kernel_features & KERNEL_FEATURE_HAIR_THICK) {
- /* Slower programs for thick hair since that also slows down ribbons.
- * Ideally this should not be needed. */
- curve_intersect_default = rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_ALL];
- curve_intersect_shadow =
- rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_ALL_SHADOW];
- }
- else {
- curve_intersect_default = rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_RIBBON];
- curve_intersect_shadow =
- rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_RIBBON_SHADOW];
- }
- }
- if (device->kernel_features & KERNEL_FEATURE_POINTCLOUD) {
- point_intersect_default = rt_intersection_funcs[kernel_type][METALRT_FUNC_POINT];
- point_intersect_shadow = rt_intersection_funcs[kernel_type][METALRT_FUNC_POINT_SHADOW];
- }
- table_functions[METALRT_TABLE_DEFAULT] = [NSArray
- arrayWithObjects:rt_intersection_funcs[kernel_type][METALRT_FUNC_DEFAULT_TRI],
- curve_intersect_default ?
- curve_intersect_default :
- rt_intersection_funcs[kernel_type][METALRT_FUNC_DEFAULT_BOX],
- point_intersect_default ?
- point_intersect_default :
- rt_intersection_funcs[kernel_type][METALRT_FUNC_DEFAULT_BOX],
- nil];
- table_functions[METALRT_TABLE_SHADOW] = [NSArray
- arrayWithObjects:rt_intersection_funcs[kernel_type][METALRT_FUNC_SHADOW_TRI],
- curve_intersect_shadow ?
- curve_intersect_shadow :
- rt_intersection_funcs[kernel_type][METALRT_FUNC_SHADOW_BOX],
- point_intersect_shadow ?
- point_intersect_shadow :
- rt_intersection_funcs[kernel_type][METALRT_FUNC_SHADOW_BOX],
- nil];
- table_functions[METALRT_TABLE_LOCAL] = [NSArray
- arrayWithObjects:rt_intersection_funcs[kernel_type][METALRT_FUNC_LOCAL_TRI],
- rt_intersection_funcs[kernel_type][METALRT_FUNC_LOCAL_BOX],
- rt_intersection_funcs[kernel_type][METALRT_FUNC_LOCAL_BOX],
- nil];
-
- NSMutableSet *unique_functions = [NSMutableSet
- setWithArray:table_functions[METALRT_TABLE_DEFAULT]];
- [unique_functions addObjectsFromArray:table_functions[METALRT_TABLE_SHADOW]];
- [unique_functions addObjectsFromArray:table_functions[METALRT_TABLE_LOCAL]];
-
- function_list = [[NSArray arrayWithArray:[unique_functions allObjects]]
- sortedArrayUsingComparator:^NSComparisonResult(id<MTLFunction> f1, id<MTLFunction> f2) {
- return [f1.label compare:f2.label];
- }];
-
- unique_functions = nil;
- }
-
- metal_printf("Starting %s \"cycles_metal_...\" pipeline builds\n",
- kernel_type_as_string(kernel_type));
-
- tbb::task_arena local_arena(max_mtlcompiler_threads);
- local_arena.execute([&]() {
- parallel_for(int(0), int(DEVICE_KERNEL_NUM), [&](int i) {
- /* skip megakernel */
- if (i == DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
- return;
- }
-
- /* Only specialize kernels where it can make an impact. */
- if (kernel_type == PSO_SPECIALISED) {
- if (i < DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
- i > DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
- return;
- }
- }
-
- MetalDeviceKernel &kernel = kernels_[i];
-
- const std::string function_name = std::string("cycles_metal_") +
- device_kernel_as_string((DeviceKernel)i);
- int threads_per_threadgroup = device->max_threads_per_threadgroup;
- if (i > DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL && i < DEVICE_KERNEL_INTEGRATOR_RESET) {
- /* Always use 512 for the sorting kernels */
- threads_per_threadgroup = 512;
- }
-
- NSArray *kernel_function_list = nil;
-
- if (i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
- i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW ||
- i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE ||
- i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK ||
- i == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) {
- kernel_function_list = function_list;
- }
-
- MetalKernelLoadDesc desc;
- desc.pso_index = kernel_type;
- desc.kernel_index = i;
- desc.linked_functions = kernel_function_list;
- desc.intersector_functions.defaults = table_functions[METALRT_TABLE_DEFAULT];
- desc.intersector_functions.shadow = table_functions[METALRT_TABLE_SHADOW];
- desc.intersector_functions.local = table_functions[METALRT_TABLE_LOCAL];
- desc.constant_values = constant_values;
- desc.threads_per_threadgroup = threads_per_threadgroup;
- desc.function_name = function_name.c_str();
-
- bool success = kernel.load(device, desc, md5);
-
- any_error |= !success;
- });
- });
- }
-
- bool loaded = !any_error;
- if (loaded) {
- loaded_md5[kernel_type] = hash;
- }
- return loaded;
+ return true;
}
-const MetalDeviceKernel &MetalDeviceKernels::get(DeviceKernel kernel) const
+const MetalKernelPipeline &MetalDeviceKernels::get_best_pipeline(const MetalDevice *device,
+ DeviceKernel kernel) const
{
- return kernels_[(int)kernel];
+ return *get_shader_cache(device->mtlDevice)->get_best_pipeline(kernel, device);
}
-bool MetalDeviceKernels::available(DeviceKernel kernel) const
+bool MetalDeviceKernels::available(const MetalDevice *device, DeviceKernel kernel) const
{
- return kernels_[(int)kernel].get_pso().function != nil;
+ return get_shader_cache(device->mtlDevice)->get_best_pipeline(kernel, device) != nullptr;
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/device/metal/queue.mm b/intern/cycles/device/metal/queue.mm
index 1686ab95ffa..2079aa65499 100644
--- a/intern/cycles/device/metal/queue.mm
+++ b/intern/cycles/device/metal/queue.mm
@@ -108,9 +108,6 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
VLOG(3) << "Metal queue launch " << device_kernel_as_string(kernel) << ", work_size "
<< work_size;
- const MetalDeviceKernel &metal_kernel = metal_device->kernels.get(kernel);
- const MetalKernelPipeline &metal_kernel_pso = metal_kernel.get_pso();
-
id<MTLComputeCommandEncoder> mtlComputeCommandEncoder = get_compute_encoder(kernel);
/* Determine size requirement for argument buffer. */
@@ -212,6 +209,8 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
}
bytes_written = globals_offsets + sizeof(KernelParamsMetal);
+ const MetalKernelPipeline &metal_kernel_pso = metal_device->get_best_pipeline(kernel);
+
/* Encode ancillaries */
[metal_device->mtlAncillaryArgEncoder setArgumentBuffer:arg_buffer offset:metal_offsets];
[metal_device->mtlAncillaryArgEncoder setBuffer:metal_device->texture_bindings_2d
@@ -284,7 +283,7 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
[mtlComputeCommandEncoder setComputePipelineState:metal_kernel_pso.pipeline];
/* Compute kernel launch parameters. */
- const int num_threads_per_block = metal_kernel.get_num_threads_per_block();
+ const int num_threads_per_block = metal_kernel_pso.num_threads_per_block;
int shared_mem_bytes = 0;
@@ -547,6 +546,8 @@ id<MTLComputeCommandEncoder> MetalDeviceQueue::get_compute_encoder(DeviceKernel
computeCommandEncoderWithDispatchType:concurrent ? MTLDispatchTypeConcurrent :
MTLDispatchTypeSerial];
+ [mtlComputeEncoder setLabel:@(device_kernel_as_string(kernel))];
+
/* declare usage of MTLBuffers etc */
prepare_resources(kernel);
}