Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/device/metal/kernel.mm')
-rw-r--r--intern/cycles/device/metal/kernel.mm221
1 files changed, 166 insertions, 55 deletions
diff --git a/intern/cycles/device/metal/kernel.mm b/intern/cycles/device/metal/kernel.mm
index fec4cd80466..385cb412b06 100644
--- a/intern/cycles/device/metal/kernel.mm
+++ b/intern/cycles/device/metal/kernel.mm
@@ -5,6 +5,7 @@
# include "device/metal/kernel.h"
# include "device/metal/device_impl.h"
+# include "kernel/device/metal/function_constants.h"
# include "util/md5.h"
# include "util/path.h"
# include "util/tbb.h"
@@ -16,13 +17,15 @@ CCL_NAMESPACE_BEGIN
/* limit to 2 MTLCompiler instances */
int max_mtlcompiler_threads = 2;
-const char *kernel_type_as_string(int kernel_type)
+const char *kernel_type_as_string(MetalPipelineType pso_type)
{
- switch (kernel_type) {
+ switch (pso_type) {
case PSO_GENERIC:
return "PSO_GENERIC";
- case PSO_SPECIALISED:
- return "PSO_SPECIALISED";
+ case PSO_SPECIALIZED_INTERSECT:
+ return "PSO_SPECIALIZED_INTERSECT";
+ case PSO_SPECIALIZED_SHADE:
+ return "PSO_SPECIALIZED_SHADE";
default:
assert(0);
}
@@ -50,7 +53,11 @@ struct ShaderCache {
/* Non-blocking request for a kernel, optionally specialized to the scene being rendered by
* device. */
- void load_kernel(DeviceKernel kernel, MetalDevice *device, bool scene_specialized);
+ void load_kernel(DeviceKernel kernel, MetalDevice *device, MetalPipelineType pso_type);
+
+ bool should_load_kernel(DeviceKernel device_kernel,
+ MetalDevice *device,
+ MetalPipelineType pso_type);
void wait_for_all();
@@ -139,31 +146,34 @@ void ShaderCache::compile_thread_func(int thread_index)
}
}
-void ShaderCache::load_kernel(DeviceKernel device_kernel,
- MetalDevice *device,
- bool scene_specialized)
+bool ShaderCache::should_load_kernel(DeviceKernel device_kernel,
+ MetalDevice *device,
+ MetalPipelineType pso_type)
{
- {
- /* create compiler threads on first run */
- thread_scoped_lock lock(cache_mutex);
- if (compile_threads.empty()) {
- running = true;
- for (int i = 0; i < max_mtlcompiler_threads; i++) {
- compile_threads.push_back(std::thread([&] { compile_thread_func(i); }));
- }
- }
+ if (device_kernel == DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
+ /* Skip megakernel. */
+ return false;
}
- if (device_kernel == DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
- /* skip megakernel */
- return;
+ if (device_kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) {
+ if ((device->kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) == 0) {
+ /* Skip shade_surface_raytrace kernel if the scene doesn't require it. */
+ return false;
+ }
}
- if (scene_specialized) {
+ if (pso_type != PSO_GENERIC) {
/* Only specialize kernels where it can make an impact. */
if (device_kernel < DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
device_kernel > DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
- return;
+ return false;
+ }
+
+ /* Only specialize shading / intersection kernels as requested. */
+ bool is_shade_kernel = (device_kernel >= DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
+ bool is_shade_pso = (pso_type == PSO_SPECIALIZED_SHADE);
+ if (is_shade_pso != is_shade_kernel) {
+ return false;
}
}
@@ -171,35 +181,45 @@ void ShaderCache::load_kernel(DeviceKernel device_kernel,
/* check whether the kernel has already been requested / cached */
thread_scoped_lock lock(cache_mutex);
for (auto &pipeline : pipelines[device_kernel]) {
- if (scene_specialized) {
- if (pipeline->source_md5 == device->source_md5[PSO_SPECIALISED]) {
- /* we already requested a pipeline that is specialized for this kernel data */
- metal_printf("Specialized kernel already requested (%s)\n",
- device_kernel_as_string(device_kernel));
- return;
- }
+ if (pipeline->source_md5 == device->source_md5[pso_type]) {
+ return false;
}
- else {
- if (pipeline->source_md5 == device->source_md5[PSO_GENERIC]) {
- /* we already requested a generic pipeline for this kernel */
- metal_printf("Generic kernel already requested (%s)\n",
- device_kernel_as_string(device_kernel));
- return;
- }
+ }
+ }
+
+ return true;
+}
+
+void ShaderCache::load_kernel(DeviceKernel device_kernel,
+ MetalDevice *device,
+ MetalPipelineType pso_type)
+{
+ {
+ /* create compiler threads on first run */
+ thread_scoped_lock lock(cache_mutex);
+ if (compile_threads.empty()) {
+ running = true;
+ for (int i = 0; i < max_mtlcompiler_threads; i++) {
+ compile_threads.push_back(std::thread([&] { compile_thread_func(i); }));
}
}
}
+ if (!should_load_kernel(device_kernel, device, pso_type)) {
+ return;
+ }
+
incomplete_requests++;
PipelineRequest request;
request.pipeline = new MetalKernelPipeline;
- request.pipeline->scene_specialized = scene_specialized;
+ memcpy(&request.pipeline->kernel_data_,
+ &device->launch_params.data,
+ sizeof(request.pipeline->kernel_data_));
+ request.pipeline->pso_type = pso_type;
request.pipeline->mtlDevice = mtlDevice;
- request.pipeline->source_md5 =
- device->source_md5[scene_specialized ? PSO_SPECIALISED : PSO_GENERIC];
- request.pipeline->mtlLibrary =
- device->mtlLibrary[scene_specialized ? PSO_SPECIALISED : PSO_GENERIC];
+ request.pipeline->source_md5 = device->source_md5[pso_type];
+ request.pipeline->mtlLibrary = device->mtlLibrary[pso_type];
request.pipeline->device_kernel = device_kernel;
request.pipeline->threads_per_threadgroup = device->max_threads_per_threadgroup;
@@ -214,7 +234,24 @@ void ShaderCache::load_kernel(DeviceKernel device_kernel,
{
thread_scoped_lock lock(cache_mutex);
- pipelines[device_kernel].push_back(unique_ptr<MetalKernelPipeline>(request.pipeline));
+ auto &collection = pipelines[device_kernel];
+
+ /* Cache up to 3 kernel variants with the same pso_type, purging oldest first. */
+ int max_entries_of_same_pso_type = 3;
+ for (int i = (int)collection.size() - 1; i >= 0; i--) {
+ if (collection[i]->pso_type == pso_type) {
+ max_entries_of_same_pso_type -= 1;
+ if (max_entries_of_same_pso_type == 0) {
+ metal_printf("Purging oldest %s:%s kernel from ShaderCache\n",
+ kernel_type_as_string(pso_type),
+ device_kernel_as_string(device_kernel));
+ collection.erase(collection.begin() + i);
+ break;
+ }
+ }
+ }
+
+ collection.push_back(unique_ptr<MetalKernelPipeline>(request.pipeline));
request_queue.push_back(request);
}
cond_var.notify_one();
@@ -248,8 +285,9 @@ MetalKernelPipeline *ShaderCache::get_best_pipeline(DeviceKernel kernel, const M
continue;
}
- if (pipeline->scene_specialized) {
- if (pipeline->source_md5 == device->source_md5[PSO_SPECIALISED]) {
+ if (pipeline->pso_type != PSO_GENERIC) {
+ if (pipeline->source_md5 == device->source_md5[PSO_SPECIALIZED_INTERSECT] ||
+ pipeline->source_md5 == device->source_md5[PSO_SPECIALIZED_SHADE]) {
best_pipeline = pipeline.get();
}
}
@@ -258,13 +296,65 @@ MetalKernelPipeline *ShaderCache::get_best_pipeline(DeviceKernel kernel, const M
}
}
+ if (best_pipeline->usage_count == 0 && best_pipeline->pso_type != PSO_GENERIC) {
+ metal_printf("Swapping in %s version of %s\n",
+ kernel_type_as_string(best_pipeline->pso_type),
+ device_kernel_as_string(kernel));
+ }
+ best_pipeline->usage_count += 1;
+
return best_pipeline;
}
-void MetalKernelPipeline::compile()
+bool MetalKernelPipeline::should_use_binary_archive() const
{
- int pso_type = scene_specialized ? PSO_SPECIALISED : PSO_GENERIC;
+ if (auto str = getenv("CYCLES_METAL_DISABLE_BINARY_ARCHIVES")) {
+ if (atoi(str) != 0) {
+ /* Don't archive if we have opted out by env var. */
+ return false;
+ }
+ }
+
+ if (pso_type == PSO_GENERIC) {
+ /* Archive the generic kernels. */
+ return true;
+ }
+
+ if (device_kernel >= DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND &&
+ device_kernel <= DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW) {
+ /* Archive all shade kernels - they take a long time to compile. */
+ return true;
+ }
+
+ /* The remaining kernels are all fast to compile. They may get cached by the system shader cache,
+ * but will be quick to regenerate if not. */
+ return false;
+}
+
+static MTLFunctionConstantValues *GetConstantValues(KernelData const *data = nullptr)
+{
+ MTLFunctionConstantValues *constant_values = [MTLFunctionConstantValues new];
+
+ MTLDataType MTLDataType_int = MTLDataTypeInt;
+ MTLDataType MTLDataType_float = MTLDataTypeFloat;
+ MTLDataType MTLDataType_float4 = MTLDataTypeFloat4;
+ KernelData zero_data = {0};
+ if (!data) {
+ data = &zero_data;
+ }
+# define KERNEL_STRUCT_MEMBER(parent, _type, name) \
+ [constant_values setConstantValue:&data->parent.name \
+ type:MTLDataType_##_type \
+ atIndex:KernelData_##parent##_##name];
+
+# include "kernel/data_template.h"
+
+ return constant_values;
+}
+
+void MetalKernelPipeline::compile()
+{
const std::string function_name = std::string("cycles_metal_") +
device_kernel_as_string(device_kernel);
@@ -281,6 +371,17 @@ void MetalKernelPipeline::compile()
if (@available(macOS 11.0, *)) {
MTLFunctionDescriptor *func_desc = [MTLIntersectionFunctionDescriptor functionDescriptor];
func_desc.name = entryPoint;
+
+ if (pso_type == PSO_SPECIALIZED_SHADE) {
+ func_desc.constantValues = GetConstantValues(&kernel_data_);
+ }
+ else if (pso_type == PSO_SPECIALIZED_INTERSECT) {
+ func_desc.constantValues = GetConstantValues(&kernel_data_);
+ }
+ else {
+ func_desc.constantValues = GetConstantValues();
+ }
+
function = [mtlLibrary newFunctionWithDescriptor:func_desc error:&error];
}
@@ -427,10 +528,7 @@ void MetalKernelPipeline::compile()
MTLPipelineOption pipelineOptions = MTLPipelineOptionNone;
- bool use_binary_archive = true;
- if (auto str = getenv("CYCLES_METAL_DISABLE_BINARY_ARCHIVES")) {
- use_binary_archive = (atoi(str) == 0);
- }
+ bool use_binary_archive = should_use_binary_archive();
id<MTLBinaryArchive> archive = nil;
string metalbin_path;
@@ -608,19 +706,32 @@ void MetalKernelPipeline::compile()
}
}
-bool MetalDeviceKernels::load(MetalDevice *device, bool scene_specialized)
+bool MetalDeviceKernels::load(MetalDevice *device, MetalPipelineType pso_type)
{
+ const double starttime = time_dt();
auto shader_cache = get_shader_cache(device->mtlDevice);
for (int i = 0; i < DEVICE_KERNEL_NUM; i++) {
- shader_cache->load_kernel((DeviceKernel)i, device, scene_specialized);
+ shader_cache->load_kernel((DeviceKernel)i, device, pso_type);
}
- if (!scene_specialized || getenv("CYCLES_METAL_PROFILING")) {
- shader_cache->wait_for_all();
- }
+ shader_cache->wait_for_all();
+ metal_printf("Back-end compilation finished in %.1f seconds (%s)\n",
+ time_dt() - starttime,
+ kernel_type_as_string(pso_type));
return true;
}
+bool MetalDeviceKernels::should_load_kernels(MetalDevice *device, MetalPipelineType pso_type)
+{
+ auto shader_cache = get_shader_cache(device->mtlDevice);
+ for (int i = 0; i < DEVICE_KERNEL_NUM; i++) {
+ if (shader_cache->should_load_kernel((DeviceKernel)i, device, pso_type)) {
+ return true;
+ }
+ }
+ return false;
+}
+
const MetalKernelPipeline *MetalDeviceKernels::get_best_pipeline(const MetalDevice *device,
DeviceKernel kernel)
{