5 files changed, 338 insertions, 1 deletions
diff --git a/intern/cycles/kernel/device/metal/compat.h b/intern/cycles/kernel/device/metal/compat.h
index 77cea30914c..61597a4acfc 100644
--- a/intern/cycles/kernel/device/metal/compat.h
+++ b/intern/cycles/kernel/device/metal/compat.h
@@ -34,6 +34,7 @@ using namespace metal;
 
 #pragma clang diagnostic ignored "-Wunused-variable"
 #pragma clang diagnostic ignored "-Wsign-compare"
+#pragma clang diagnostic ignored "-Wuninitialized"
 
 /* Qualifiers */
 
@@ -42,8 +43,9 @@ using namespace metal;
 #define ccl_device_forceinline ccl_device
 #define ccl_device_noinline ccl_device __attribute__((noinline))
 #define ccl_device_noinline_cpu ccl_device
+#define ccl_device_inline_method ccl_device
 #define ccl_global device
-#define ccl_static_constant static constant constexpr
+#define ccl_inline_constant static constant constexpr
 #define ccl_device_constant constant
 #define ccl_constant const device
 #define ccl_gpu_shared threadgroup
@@ -58,6 +60,122 @@ using namespace metal;
 
 #define kernel_assert(cond)
 
+#define ccl_gpu_global_id_x() metal_global_id
+#define ccl_gpu_warp_size simdgroup_size
+#define ccl_gpu_thread_idx_x simd_group_index
+#define ccl_gpu_thread_mask(thread_warp) uint64_t((1ull << thread_warp) - 1)
+
+#define ccl_gpu_ballot(predicate) ((uint64_t)((simd_vote::vote_t)simd_ballot(predicate)))
+#define ccl_gpu_syncthreads() threadgroup_barrier(mem_flags::mem_threadgroup);
+
+// clang-format off
+
+/* kernel.h adapters */
+
+#define ccl_gpu_kernel(block_num_threads, thread_num_registers)
+#define ccl_gpu_kernel_threads(block_num_threads)
+
+/* Convert a comma-separated list into a semicolon-separated list
+ * (so that we can generate a struct based on kernel entry-point parameters). */
+#define FN0()
+#define FN1(p1) p1;
+#define FN2(p1, p2) p1; p2;
+#define FN3(p1, p2, p3) p1; p2; p3;
+#define FN4(p1, p2, p3, p4) p1; p2; p3; p4;
+#define FN5(p1, p2, p3, p4, p5) p1; p2; p3; p4; p5;
+#define FN6(p1, p2, p3, p4, p5, p6) p1; p2; p3; p4; p5; p6;
+#define FN7(p1, p2, p3, p4, p5, p6, p7) p1; p2; p3; p4; p5; p6; p7;
+#define FN8(p1, p2, p3, p4, p5, p6, p7, p8) p1; p2; p3; p4; p5; p6; p7; p8;
+#define FN9(p1, p2, p3, p4, p5, p6, p7, p8, p9) p1; p2; p3; p4; p5; p6; p7; p8; p9;
+#define FN10(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10) p1; p2; p3; p4; p5; p6; p7; p8; p9; p10;
+#define FN11(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11) p1; p2; p3; p4; p5; p6; p7; p8; p9; p10; p11;
+#define FN12(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12) p1; p2; p3; p4; p5; p6; p7; p8; p9; p10; p11; p12;
+#define FN13(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13) p1; p2; p3; p4; p5; p6; p7; p8; p9; p10; p11; p12; p13;
+#define FN14(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14) p1; p2; p3; p4; p5; p6; p7; p8; p9; p10; p11; p12; p13; p14;
+#define FN15(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15) p1; p2; p3; p4; p5; p6; p7; p8; p9; p10; p11; p12; p13; p14; p15;
+#define FN16(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16) p1; p2; p3; p4; p5; p6; p7; p8; p9; p10; p11; p12; p13; p14; p15; p16;
+#define GET_LAST_ARG(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16, ...) p16
+#define PARAMS_MAKER(...) GET_LAST_ARG(__VA_ARGS__, FN16, FN15, FN14, FN13, FN12, FN11, FN10, FN9, FN8, FN7, FN6, FN5, FN4, FN3, FN2, FN1, FN0)
+
+/* Generate a struct containing the entry-point parameters and a "run"
+ * method which can access them implicitly via this-> */
+#define ccl_gpu_kernel_signature(name, ...) \
+struct kernel_gpu_##name \
+{ \
+  PARAMS_MAKER(__VA_ARGS__)(__VA_ARGS__) \
+  void run(thread MetalKernelContext& context, \
+           threadgroup int *simdgroup_offset, \
+           const uint metal_global_id, \
+           const ushort metal_local_id, \
+           const ushort metal_local_size, \
+           uint simdgroup_size, \
+           uint simd_lane_index, \
+           uint simd_group_index, \
+           uint num_simd_groups) ccl_global const; \
+}; \
+kernel void kernel_metal_##name(device const kernel_gpu_##name *params_struct, \
+                                constant KernelParamsMetal &ccl_restrict   _launch_params_metal, \
+                                constant MetalAncillaries *_metal_ancillaries, \
+                                threadgroup int *simdgroup_offset[[ threadgroup(0) ]], \
+                                const uint metal_global_id [[thread_position_in_grid]], \
+                                const ushort metal_local_id   [[thread_position_in_threadgroup]], \
+                                const ushort metal_local_size [[threads_per_threadgroup]], \
+                                uint simdgroup_size [[threads_per_simdgroup]], \
+                                uint simd_lane_index [[thread_index_in_simdgroup]], \
+                                uint simd_group_index [[simdgroup_index_in_threadgroup]], \
+                                uint num_simd_groups [[simdgroups_per_threadgroup]]) { \
+  MetalKernelContext context(_launch_params_metal, _metal_ancillaries); \
+  params_struct->run(context, simdgroup_offset, metal_global_id, metal_local_id, metal_local_size, simdgroup_size, simd_lane_index, simd_group_index, num_simd_groups); \
+} \
+void kernel_gpu_##name::run(thread MetalKernelContext& context, \
+                  threadgroup int *simdgroup_offset, \
+                  const uint metal_global_id, \
+                  const ushort metal_local_id, \
+                  const ushort metal_local_size, \
+                  uint simdgroup_size, \
+                  uint simd_lane_index, \
+                  uint simd_group_index, \
+                  uint num_simd_groups) ccl_global const
+
+#define ccl_gpu_kernel_call(x) context.x
+
+/* define a function object where "func" is the lambda body, and additional parameters are used to specify captured state  */
+#define ccl_gpu_kernel_lambda(func, ...) \
+  struct KernelLambda \
+  { \
+    KernelLambda(ccl_private MetalKernelContext &_context) : context(_context) {} \
+    ccl_private MetalKernelContext &context; \
+    __VA_ARGS__; \
+    int operator()(const int state) const { return (func); } \
+  } ccl_gpu_kernel_lambda_pass(context)
+
+// clang-format on
+
+/* volumetric lambda functions - use function objects for lambda-like functionality */
+#define VOLUME_READ_LAMBDA(function_call) \
+  struct FnObjectRead { \
+    KernelGlobals kg; \
+    ccl_private MetalKernelContext *context; \
+    int state; \
+\
+    VolumeStack operator()(const int i) const \
+    { \
+      return context->function_call; \
+    } \
+  } volume_read_lambda_pass{kg, this, state};
+
+#define VOLUME_WRITE_LAMBDA(function_call) \
+  struct FnObjectWrite { \
+    KernelGlobals kg; \
+    ccl_private MetalKernelContext *context; \
+    int state; \
+\
+    void operator()(const int i, VolumeStack entry) const \
+    { \
+      context->function_call; \
+    } \
+  } volume_write_lambda_pass{kg, this, state};
+
 /* make_type definitions with Metal style element initializers */
 #ifdef make_float2
 #  undef make_float2
@@ -112,6 +230,7 @@ using namespace metal;
 #define sinhf(x) sinh(float(x))
 #define coshf(x) cosh(float(x))
 #define tanhf(x) tanh(float(x))
+#define saturatef(x) saturate(float(x))
 
 /* Use native functions with possibly lower precision for performance,
  * no issues found so far. */
@@ -124,3 +243,43 @@ using namespace metal;
 #define logf(x) trigmode::log(float(x))
 
 #define NULL 0
+
+#define __device__
+
+/* texture bindings and sampler setup */
+
+struct Texture2DParamsMetal {
+  texture2d<float, access::sample> tex;
+};
+struct Texture3DParamsMetal {
+  texture3d<float, access::sample> tex;
+};
+
+struct MetalAncillaries {
+  device Texture2DParamsMetal *textures_2d;
+  device Texture3DParamsMetal *textures_3d;
+};
+
+#include "util/half.h"
+#include "util/types.h"
+
+enum SamplerType {
+  SamplerFilterNearest_AddressRepeat,
+  SamplerFilterNearest_AddressClampEdge,
+  SamplerFilterNearest_AddressClampZero,
+
+  SamplerFilterLinear_AddressRepeat,
+  SamplerFilterLinear_AddressClampEdge,
+  SamplerFilterLinear_AddressClampZero,
+
+  SamplerCount
+};
+
+constant constexpr array<sampler, SamplerCount> metal_samplers = {
+    sampler(address::repeat, filter::nearest),
+    sampler(address::clamp_to_edge, filter::nearest),
+    sampler(address::clamp_to_zero, filter::nearest),
+    sampler(address::repeat, filter::linear),
+    sampler(address::clamp_to_edge, filter::linear),
+    sampler(address::clamp_to_zero, filter::linear),
+};
diff --git a/intern/cycles/kernel/device/metal/context_begin.h b/intern/cycles/kernel/device/metal/context_begin.h
new file mode 100644
index 00000000000..8c9e1c54077
--- /dev/null
+++ b/intern/cycles/kernel/device/metal/context_begin.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright 2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// clang-format off
+
+/* Open the Metal kernel context class
+ * Necessary to access resource bindings */
+class MetalKernelContext {
+  public:
+    constant KernelParamsMetal &launch_params_metal;
+    constant MetalAncillaries *metal_ancillaries;
+
+    MetalKernelContext(constant KernelParamsMetal &_launch_params_metal, constant MetalAncillaries * _metal_ancillaries)
+    : launch_params_metal(_launch_params_metal), metal_ancillaries(_metal_ancillaries)
+    {}
+
+    /* texture fetch adapter functions */
+    typedef uint64_t ccl_gpu_tex_object;
+
+    template<typename T>
+    inline __attribute__((__always_inline__))
+    T ccl_gpu_tex_object_read_2D(ccl_gpu_tex_object tex, float x, float y) const {
+      kernel_assert(0);
+      return 0;
+    }
+    template<typename T>
+    inline __attribute__((__always_inline__))
+    T ccl_gpu_tex_object_read_3D(ccl_gpu_tex_object tex, float x, float y, float z) const {
+      kernel_assert(0);
+      return 0;
+    }
+
+    // texture2d
+    template<>
+    inline __attribute__((__always_inline__))
+    float4 ccl_gpu_tex_object_read_2D(ccl_gpu_tex_object tex, float x, float y) const {
+      const uint tid(tex);
+      const uint sid(tex >> 32);
+      return metal_ancillaries->textures_2d[tid].tex.sample(metal_samplers[sid], float2(x, y));
+    }
+    template<>
+    inline __attribute__((__always_inline__))
+    float ccl_gpu_tex_object_read_2D(ccl_gpu_tex_object tex, float x, float y) const {
+      const uint tid(tex);
+      const uint sid(tex >> 32);
+      return metal_ancillaries->textures_2d[tid].tex.sample(metal_samplers[sid], float2(x, y)).x;
+    }
+
+    // texture3d
+    template<>
+    inline __attribute__((__always_inline__))
+    float4 ccl_gpu_tex_object_read_3D(ccl_gpu_tex_object tex, float x, float y, float z) const {
+      const uint tid(tex);
+      const uint sid(tex >> 32);
+      return metal_ancillaries->textures_3d[tid].tex.sample(metal_samplers[sid], float3(x, y, z));
+    }
+    template<>
+    inline __attribute__((__always_inline__))
+    float ccl_gpu_tex_object_read_3D(ccl_gpu_tex_object tex, float x, float y, float z) const {
+      const uint tid(tex);
+      const uint sid(tex >> 32);
+      return metal_ancillaries->textures_3d[tid].tex.sample(metal_samplers[sid], float3(x, y, z)).x;
+    }
+#    include "kernel/device/gpu/image.h"
+
+  // clang-format on
diff --git a/intern/cycles/kernel/device/metal/context_end.h b/intern/cycles/kernel/device/metal/context_end.h
new file mode 100644
index 00000000000..e700f294440
--- /dev/null
+++ b/intern/cycles/kernel/device/metal/context_end.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright 2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+}
+; /* end of MetalKernelContext class definition */
+
+/* Silently redirect into the MetalKernelContext instance */
+/* NOTE: These macros will need maintaining as entry-points change. */
+
+#undef kernel_integrator_state
+#define kernel_integrator_state context.launch_params_metal.__integrator_state
diff --git a/intern/cycles/kernel/device/metal/globals.h b/intern/cycles/kernel/device/metal/globals.h
new file mode 100644
index 00000000000..1aea36589d0
--- /dev/null
+++ b/intern/cycles/kernel/device/metal/globals.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Constant Globals */
+
+#include "kernel/types.h"
+#include "kernel/util/profiling.h"
+
+#include "kernel/integrator/state.h"
+
+CCL_NAMESPACE_BEGIN
+
+typedef struct KernelParamsMetal {
+
+#define KERNEL_TEX(type, name) ccl_global const type *name;
+#include "kernel/textures.h"
+#undef KERNEL_TEX
+
+  const IntegratorStateGPU __integrator_state;
+  const KernelData data;
+
+} KernelParamsMetal;
+
+typedef struct KernelGlobalsGPU {
+  int unused[1];
+} KernelGlobalsGPU;
+
+typedef ccl_global const KernelGlobalsGPU *ccl_restrict KernelGlobals;
+
+#define kernel_data launch_params_metal.data
+#define kernel_integrator_state launch_params_metal.__integrator_state
+
+/* data lookup defines */
+
+#define kernel_tex_fetch(tex, index) launch_params_metal.tex[index]
+#define kernel_tex_array(tex) launch_params_metal.tex
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/device/metal/kernel.metal b/intern/cycles/kernel/device/metal/kernel.metal
new file mode 100644
index 00000000000..feca20ff475
--- /dev/null
+++ b/intern/cycles/kernel/device/metal/kernel.metal
@@ -0,0 +1,25 @@
+/*
+ * Copyright 2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Metal kernel entry points */
+
+// clang-format off
+
+#include "kernel/device/metal/compat.h"
+#include "kernel/device/metal/globals.h"
+#include "kernel/device/gpu/kernel.h"
+
+// clang-format on
+\ No newline at end of file