diff options
author | Michael Jones <michael_p_jones@apple.com> | 2021-11-18 16:25:30 +0300 |
---|---|---|
committer | Brecht Van Lommel <brecht@blender.org> | 2021-11-18 16:38:05 +0300 |
commit | d1f944c18634f215c3da0484ac3b80e994118680 (patch) | |
tree | 7d2b1f4fa3f3ea0673e9eaafdfca533547f7323b /intern | |
parent | d19e35873f67c90b251ca38e007a83aa1eada211 (diff) |
Cycles: declare constants at program scope on Metal
MSL requires that constant address space literals be declared at program
scope. This patch moves the `blackbody_table_r/g/b` and `cie_colour_match`
constants into separate files so they can be declared at the appropriate scope.
Ref T92212
Differential Revision: https://developer.blender.org/D13241
Diffstat (limited to 'intern')
-rw-r--r-- | intern/cycles/kernel/CMakeLists.txt | 1 | ||||
-rw-r--r-- | intern/cycles/kernel/device/cpu/globals.h | 1 | ||||
-rw-r--r-- | intern/cycles/kernel/device/cuda/compat.h | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/device/gpu/kernel.h | 3 | ||||
-rw-r--r-- | intern/cycles/kernel/device/hip/compat.h | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/device/metal/compat.h | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/device/optix/compat.h | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/device/optix/kernel.cu | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/svm/math_util.h | 27 | ||||
-rw-r--r-- | intern/cycles/kernel/svm/wavelength.h | 35 | ||||
-rw-r--r-- | intern/cycles/kernel/tables.h | 76 | ||||
-rw-r--r-- | intern/cycles/scene/shader_nodes.cpp | 2 | ||||
-rw-r--r-- | intern/cycles/util/defines.h | 2 |
13 files changed, 90 insertions, 67 deletions
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 36335d4c377..0b650b70961 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -273,6 +273,7 @@ set(SRC_KERNEL_UTIL_HEADERS ) set(SRC_KERNEL_TYPES_HEADERS + tables.h textures.h types.h ) diff --git a/intern/cycles/kernel/device/cpu/globals.h b/intern/cycles/kernel/device/cpu/globals.h index dd0327b3f94..746e48b9880 100644 --- a/intern/cycles/kernel/device/cpu/globals.h +++ b/intern/cycles/kernel/device/cpu/globals.h @@ -18,6 +18,7 @@ #pragma once +#include "kernel/tables.h" #include "kernel/types.h" #include "kernel/util/profiling.h" diff --git a/intern/cycles/kernel/device/cuda/compat.h b/intern/cycles/kernel/device/cuda/compat.h index 7f901510329..658dec102b1 100644 --- a/intern/cycles/kernel/device/cuda/compat.h +++ b/intern/cycles/kernel/device/cuda/compat.h @@ -54,7 +54,7 @@ typedef unsigned long long uint64_t; #define ccl_device_noinline_cpu ccl_device #define ccl_device_inline_method ccl_device #define ccl_global -#define ccl_static_constant __constant__ +#define ccl_inline_constant __constant__ #define ccl_device_constant __constant__ __device__ #define ccl_constant const #define ccl_gpu_shared __shared__ diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h index 60332af752c..22e2a61a06d 100644 --- a/intern/cycles/kernel/device/gpu/kernel.h +++ b/intern/cycles/kernel/device/gpu/kernel.h @@ -21,6 +21,9 @@ #include "kernel/device/gpu/parallel_sorted_index.h" #include "kernel/device/gpu/work_stealing.h" +/* Include constant tables before entering Metal's context class scope (context_begin.h) */ +#include "kernel/tables.h" + #ifdef __KERNEL_METAL__ # include "kernel/device/metal/context_begin.h" #endif diff --git a/intern/cycles/kernel/device/hip/compat.h b/intern/cycles/kernel/device/hip/compat.h index 39bf2131c22..fff7a09e884 100644 --- a/intern/cycles/kernel/device/hip/compat.h +++ b/intern/cycles/kernel/device/hip/compat.h @@ -47,7 +47,7 @@ typedef unsigned long long uint64_t; #define ccl_device_noinline_cpu ccl_device #define ccl_device_inline_method ccl_device #define ccl_global -#define ccl_static_constant __constant__ +#define ccl_inline_constant __constant__ #define ccl_device_constant __constant__ __device__ #define ccl_constant const #define ccl_gpu_shared __shared__ diff --git a/intern/cycles/kernel/device/metal/compat.h b/intern/cycles/kernel/device/metal/compat.h index 080109e3b83..61597a4acfc 100644 --- a/intern/cycles/kernel/device/metal/compat.h +++ b/intern/cycles/kernel/device/metal/compat.h @@ -45,7 +45,7 @@ using namespace metal; #define ccl_device_noinline_cpu ccl_device #define ccl_device_inline_method ccl_device #define ccl_global device -#define ccl_static_constant static constant constexpr +#define ccl_inline_constant static constant constexpr #define ccl_device_constant constant #define ccl_constant const device #define ccl_gpu_shared threadgroup diff --git a/intern/cycles/kernel/device/optix/compat.h b/intern/cycles/kernel/device/optix/compat.h index bebb1e458eb..0619c135c39 100644 --- a/intern/cycles/kernel/device/optix/compat.h +++ b/intern/cycles/kernel/device/optix/compat.h @@ -53,7 +53,7 @@ typedef unsigned long long uint64_t; #define ccl_device_noinline __device__ __noinline__ #define ccl_device_noinline_cpu ccl_device #define ccl_global -#define ccl_static_constant __constant__ +#define ccl_inline_constant __constant__ #define ccl_device_constant __constant__ __device__ #define ccl_constant const #define ccl_gpu_shared __shared__ diff --git a/intern/cycles/kernel/device/optix/kernel.cu b/intern/cycles/kernel/device/optix/kernel.cu index 849710ffe61..3f360e56483 100644 --- a/intern/cycles/kernel/device/optix/kernel.cu +++ b/intern/cycles/kernel/device/optix/kernel.cu @@ -21,6 +21,8 @@ #include "kernel/device/gpu/image.h" /* Texture lookup uses normal CUDA intrinsics. */ +#include "kernel/tables.h" + #include "kernel/integrator/state.h" #include "kernel/integrator/state_flow.h" #include "kernel/integrator/state_util.h" diff --git a/intern/cycles/kernel/svm/math_util.h b/intern/cycles/kernel/svm/math_util.h index b2e539cdd1f..20817cd0fd3 100644 --- a/intern/cycles/kernel/svm/math_util.h +++ b/intern/cycles/kernel/svm/math_util.h @@ -212,33 +212,6 @@ ccl_device float3 svm_math_blackbody_color(float t) * which is enough to get the same 8 bit/channel color. */ - const float blackbody_table_r[6][3] = { - {2.52432244e+03f, -1.06185848e-03f, 3.11067539e+00f}, - {3.37763626e+03f, -4.34581697e-04f, 1.64843306e+00f}, - {4.10671449e+03f, -8.61949938e-05f, 6.41423749e-01f}, - {4.66849800e+03f, 2.85655028e-05f, 1.29075375e-01f}, - {4.60124770e+03f, 2.89727618e-05f, 1.48001316e-01f}, - {3.78765709e+03f, 9.36026367e-06f, 3.98995841e-01f}, - }; - - const float blackbody_table_g[6][3] = { - {-7.50343014e+02f, 3.15679613e-04f, 4.73464526e-01f}, - {-1.00402363e+03f, 1.29189794e-04f, 9.08181524e-01f}, - {-1.22075471e+03f, 2.56245413e-05f, 1.20753416e+00f}, - {-1.42546105e+03f, -4.01730887e-05f, 1.44002695e+00f}, - {-1.18134453e+03f, -2.18913373e-05f, 1.30656109e+00f}, - {-5.00279505e+02f, -4.59745390e-06f, 1.09090465e+00f}, - }; - - const float blackbody_table_b[6][4] = { - {0.0f, 0.0f, 0.0f, 0.0f}, /* zeros should be optimized by compiler */ - {0.0f, 0.0f, 0.0f, 0.0f}, - {0.0f, 0.0f, 0.0f, 0.0f}, - {-2.02524603e-11f, 1.79435860e-07f, -2.60561875e-04f, -1.41761141e-02f}, - {-2.22463426e-13f, -1.55078698e-08f, 3.81675160e-04f, -7.30646033e-01f}, - {6.72595954e-13f, -2.73059993e-08f, 4.24068546e-04f, -7.52204323e-01f}, - }; - if (t >= 12000.0f) { return make_float3(0.826270103f, 0.994478524f, 1.56626022f); } diff --git a/intern/cycles/kernel/svm/wavelength.h b/intern/cycles/kernel/svm/wavelength.h index 28fd172abc7..6e25224243f 100644 --- a/intern/cycles/kernel/svm/wavelength.h +++ b/intern/cycles/kernel/svm/wavelength.h @@ -42,41 +42,6 @@ ccl_device_noinline void svm_node_wavelength(KernelGlobals kg, uint wavelength, uint color_out) { - // CIE colour matching functions xBar, yBar, and zBar for - // wavelengths from 380 through 780 nanometers, every 5 - // nanometers. For a wavelength lambda in this range: - // cie_colour_match[(lambda - 380) / 5][0] = xBar - // cie_colour_match[(lambda - 380) / 5][1] = yBar - // cie_colour_match[(lambda - 380) / 5][2] = zBar - const float cie_colour_match[81][3] = { - {0.0014f, 0.0000f, 0.0065f}, {0.0022f, 0.0001f, 0.0105f}, {0.0042f, 0.0001f, 0.0201f}, - {0.0076f, 0.0002f, 0.0362f}, {0.0143f, 0.0004f, 0.0679f}, {0.0232f, 0.0006f, 0.1102f}, - {0.0435f, 0.0012f, 0.2074f}, {0.0776f, 0.0022f, 0.3713f}, {0.1344f, 0.0040f, 0.6456f}, - {0.2148f, 0.0073f, 1.0391f}, {0.2839f, 0.0116f, 1.3856f}, {0.3285f, 0.0168f, 1.6230f}, - {0.3483f, 0.0230f, 1.7471f}, {0.3481f, 0.0298f, 1.7826f}, {0.3362f, 0.0380f, 1.7721f}, - {0.3187f, 0.0480f, 1.7441f}, {0.2908f, 0.0600f, 1.6692f}, {0.2511f, 0.0739f, 1.5281f}, - {0.1954f, 0.0910f, 1.2876f}, {0.1421f, 0.1126f, 1.0419f}, {0.0956f, 0.1390f, 0.8130f}, - {0.0580f, 0.1693f, 0.6162f}, {0.0320f, 0.2080f, 0.4652f}, {0.0147f, 0.2586f, 0.3533f}, - {0.0049f, 0.3230f, 0.2720f}, {0.0024f, 0.4073f, 0.2123f}, {0.0093f, 0.5030f, 0.1582f}, - {0.0291f, 0.6082f, 0.1117f}, {0.0633f, 0.7100f, 0.0782f}, {0.1096f, 0.7932f, 0.0573f}, - {0.1655f, 0.8620f, 0.0422f}, {0.2257f, 0.9149f, 0.0298f}, {0.2904f, 0.9540f, 0.0203f}, - {0.3597f, 0.9803f, 0.0134f}, {0.4334f, 0.9950f, 0.0087f}, {0.5121f, 1.0000f, 0.0057f}, - {0.5945f, 0.9950f, 0.0039f}, {0.6784f, 0.9786f, 0.0027f}, {0.7621f, 0.9520f, 0.0021f}, - {0.8425f, 0.9154f, 0.0018f}, {0.9163f, 0.8700f, 0.0017f}, {0.9786f, 0.8163f, 0.0014f}, - {1.0263f, 0.7570f, 0.0011f}, {1.0567f, 0.6949f, 0.0010f}, {1.0622f, 0.6310f, 0.0008f}, - {1.0456f, 0.5668f, 0.0006f}, {1.0026f, 0.5030f, 0.0003f}, {0.9384f, 0.4412f, 0.0002f}, - {0.8544f, 0.3810f, 0.0002f}, {0.7514f, 0.3210f, 0.0001f}, {0.6424f, 0.2650f, 0.0000f}, - {0.5419f, 0.2170f, 0.0000f}, {0.4479f, 0.1750f, 0.0000f}, {0.3608f, 0.1382f, 0.0000f}, - {0.2835f, 0.1070f, 0.0000f}, {0.2187f, 0.0816f, 0.0000f}, {0.1649f, 0.0610f, 0.0000f}, - {0.1212f, 0.0446f, 0.0000f}, {0.0874f, 0.0320f, 0.0000f}, {0.0636f, 0.0232f, 0.0000f}, - {0.0468f, 0.0170f, 0.0000f}, {0.0329f, 0.0119f, 0.0000f}, {0.0227f, 0.0082f, 0.0000f}, - {0.0158f, 0.0057f, 0.0000f}, {0.0114f, 0.0041f, 0.0000f}, {0.0081f, 0.0029f, 0.0000f}, - {0.0058f, 0.0021f, 0.0000f}, {0.0041f, 0.0015f, 0.0000f}, {0.0029f, 0.0010f, 0.0000f}, - {0.0020f, 0.0007f, 0.0000f}, {0.0014f, 0.0005f, 0.0000f}, {0.0010f, 0.0004f, 0.0000f}, - {0.0007f, 0.0002f, 0.0000f}, {0.0005f, 0.0002f, 0.0000f}, {0.0003f, 0.0001f, 0.0000f}, - {0.0002f, 0.0001f, 0.0000f}, {0.0002f, 0.0001f, 0.0000f}, {0.0001f, 0.0000f, 0.0000f}, - {0.0001f, 0.0000f, 0.0000f}, {0.0001f, 0.0000f, 0.0000f}, {0.0000f, 0.0000f, 0.0000f}}; - float lambda_nm = stack_load_float(stack, wavelength); float ii = (lambda_nm - 380.0f) * (1.0f / 5.0f); // scaled 0..80 int i = float_to_int(ii); diff --git a/intern/cycles/kernel/tables.h b/intern/cycles/kernel/tables.h new file mode 100644 index 00000000000..768033d4ffe --- /dev/null +++ b/intern/cycles/kernel/tables.h @@ -0,0 +1,76 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* clang-format off */ + +ccl_inline_constant float blackbody_table_r[][3] = { + {2.52432244e+03f, -1.06185848e-03f, 3.11067539e+00f}, + {3.37763626e+03f, -4.34581697e-04f, 1.64843306e+00f}, + {4.10671449e+03f, -8.61949938e-05f, 6.41423749e-01f}, + {4.66849800e+03f, 2.85655028e-05f, 1.29075375e-01f}, + {4.60124770e+03f, 2.89727618e-05f, 1.48001316e-01f}, + {3.78765709e+03f, 9.36026367e-06f, 3.98995841e-01f} +}; + +ccl_inline_constant float blackbody_table_g[][3] = { + {-7.50343014e+02f, 3.15679613e-04f, 4.73464526e-01f}, + {-1.00402363e+03f, 1.29189794e-04f, 9.08181524e-01f}, + {-1.22075471e+03f, 2.56245413e-05f, 1.20753416e+00f}, + {-1.42546105e+03f, -4.01730887e-05f, 1.44002695e+00f}, + {-1.18134453e+03f, -2.18913373e-05f, 1.30656109e+00f}, + {-5.00279505e+02f, -4.59745390e-06f, 1.09090465e+00f} +}; + +ccl_inline_constant float blackbody_table_b[][4] = { + {0.0f, 0.0f, 0.0f, 0.0f}, /* zeros should be optimized by compiler */ + {0.0f, 0.0f, 0.0f, 0.0f}, + {0.0f, 0.0f, 0.0f, 0.0f}, + {-2.02524603e-11f, 1.79435860e-07f, -2.60561875e-04f, -1.41761141e-02f}, + {-2.22463426e-13f, -1.55078698e-08f, 3.81675160e-04f, -7.30646033e-01f}, + {6.72595954e-13f, -2.73059993e-08f, 4.24068546e-04f, -7.52204323e-01f} +}; + +ccl_inline_constant float cie_colour_match[][3] = { + {0.0014f, 0.0000f, 0.0065f}, {0.0022f, 0.0001f, 0.0105f}, {0.0042f, 0.0001f, 0.0201f}, + {0.0076f, 0.0002f, 0.0362f}, {0.0143f, 0.0004f, 0.0679f}, {0.0232f, 0.0006f, 0.1102f}, + {0.0435f, 0.0012f, 0.2074f}, {0.0776f, 0.0022f, 0.3713f}, {0.1344f, 0.0040f, 0.6456f}, + {0.2148f, 0.0073f, 1.0391f}, {0.2839f, 0.0116f, 1.3856f}, {0.3285f, 0.0168f, 1.6230f}, + {0.3483f, 0.0230f, 1.7471f}, {0.3481f, 0.0298f, 1.7826f}, {0.3362f, 0.0380f, 1.7721f}, + {0.3187f, 0.0480f, 1.7441f}, {0.2908f, 0.0600f, 1.6692f}, {0.2511f, 0.0739f, 1.5281f}, + {0.1954f, 0.0910f, 1.2876f}, {0.1421f, 0.1126f, 1.0419f}, {0.0956f, 0.1390f, 0.8130f}, + {0.0580f, 0.1693f, 0.6162f}, {0.0320f, 0.2080f, 0.4652f}, {0.0147f, 0.2586f, 0.3533f}, + {0.0049f, 0.3230f, 0.2720f}, {0.0024f, 0.4073f, 0.2123f}, {0.0093f, 0.5030f, 0.1582f}, + {0.0291f, 0.6082f, 0.1117f}, {0.0633f, 0.7100f, 0.0782f}, {0.1096f, 0.7932f, 0.0573f}, + {0.1655f, 0.8620f, 0.0422f}, {0.2257f, 0.9149f, 0.0298f}, {0.2904f, 0.9540f, 0.0203f}, + {0.3597f, 0.9803f, 0.0134f}, {0.4334f, 0.9950f, 0.0087f}, {0.5121f, 1.0000f, 0.0057f}, + {0.5945f, 0.9950f, 0.0039f}, {0.6784f, 0.9786f, 0.0027f}, {0.7621f, 0.9520f, 0.0021f}, + {0.8425f, 0.9154f, 0.0018f}, {0.9163f, 0.8700f, 0.0017f}, {0.9786f, 0.8163f, 0.0014f}, + {1.0263f, 0.7570f, 0.0011f}, {1.0567f, 0.6949f, 0.0010f}, {1.0622f, 0.6310f, 0.0008f}, + {1.0456f, 0.5668f, 0.0006f}, {1.0026f, 0.5030f, 0.0003f}, {0.9384f, 0.4412f, 0.0002f}, + {0.8544f, 0.3810f, 0.0002f}, {0.7514f, 0.3210f, 0.0001f}, {0.6424f, 0.2650f, 0.0000f}, + {0.5419f, 0.2170f, 0.0000f}, {0.4479f, 0.1750f, 0.0000f}, {0.3608f, 0.1382f, 0.0000f}, + {0.2835f, 0.1070f, 0.0000f}, {0.2187f, 0.0816f, 0.0000f}, {0.1649f, 0.0610f, 0.0000f}, + {0.1212f, 0.0446f, 0.0000f}, {0.0874f, 0.0320f, 0.0000f}, {0.0636f, 0.0232f, 0.0000f}, + {0.0468f, 0.0170f, 0.0000f}, {0.0329f, 0.0119f, 0.0000f}, {0.0227f, 0.0082f, 0.0000f}, + {0.0158f, 0.0057f, 0.0000f}, {0.0114f, 0.0041f, 0.0000f}, {0.0081f, 0.0029f, 0.0000f}, + {0.0058f, 0.0021f, 0.0000f}, {0.0041f, 0.0015f, 0.0000f}, {0.0029f, 0.0010f, 0.0000f}, + {0.0020f, 0.0007f, 0.0000f}, {0.0014f, 0.0005f, 0.0000f}, {0.0010f, 0.0004f, 0.0000f}, + {0.0007f, 0.0002f, 0.0000f}, {0.0005f, 0.0002f, 0.0000f}, {0.0003f, 0.0001f, 0.0000f}, + {0.0002f, 0.0001f, 0.0000f}, {0.0002f, 0.0001f, 0.0000f}, {0.0001f, 0.0000f, 0.0000f}, + {0.0001f, 0.0000f, 0.0000f}, {0.0001f, 0.0000f, 0.0000f}, {0.0000f, 0.0000f, 0.0000f} +}; + +/* clang-format on */
\ No newline at end of file diff --git a/intern/cycles/scene/shader_nodes.cpp b/intern/cycles/scene/shader_nodes.cpp index 8a9ef56b0ae..8c20807a52b 100644 --- a/intern/cycles/scene/shader_nodes.cpp +++ b/intern/cycles/scene/shader_nodes.cpp @@ -34,6 +34,8 @@ #include "util/log.h" #include "util/transform.h" +#include "kernel/tables.h" + #include "kernel/svm/color_util.h" #include "kernel/svm/mapping_util.h" #include "kernel/svm/math_util.h" diff --git a/intern/cycles/util/defines.h b/intern/cycles/util/defines.h index edc36b14745..a2e8d83adb7 100644 --- a/intern/cycles/util/defines.h +++ b/intern/cycles/util/defines.h @@ -72,7 +72,7 @@ /* Address spaces for GPU. */ # define ccl_global -# define ccl_static_constant static const +# define ccl_inline_constant inline constexpr # define ccl_constant const # define ccl_private |