diff options
Diffstat (limited to 'intern/cycles/kernel')
-rw-r--r-- | intern/cycles/kernel/CMakeLists.txt | 1 | ||||
-rw-r--r-- | intern/cycles/kernel/device/metal/compat.h | 24 | ||||
-rw-r--r-- | intern/cycles/kernel/integrator/mnee.h | 13 | ||||
-rw-r--r-- | intern/cycles/kernel/osl/services.cpp | 38 | ||||
-rw-r--r-- | intern/cycles/kernel/osl/services.h | 14 | ||||
-rw-r--r-- | intern/cycles/kernel/osl/shaders/CMakeLists.txt | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/osl/shaders/node_color.h | 50 | ||||
-rw-r--r-- | intern/cycles/kernel/osl/shaders/node_combine_color.osl | 16 | ||||
-rw-r--r-- | intern/cycles/kernel/osl/shaders/node_separate_color.osl | 26 | ||||
-rw-r--r-- | intern/cycles/kernel/svm/color_util.h | 26 | ||||
-rw-r--r-- | intern/cycles/kernel/svm/sepcomb_color.h | 54 | ||||
-rw-r--r-- | intern/cycles/kernel/svm/svm.h | 7 | ||||
-rw-r--r-- | intern/cycles/kernel/svm/types.h | 8 |
13 files changed, 258 insertions, 21 deletions
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index d97854a52d0..473bdb67920 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -145,6 +145,7 @@ set(SRC_KERNEL_SVM_HEADERS svm/normal.h svm/ramp.h svm/ramp_util.h + svm/sepcomb_color.h svm/sepcomb_hsv.h svm/sepcomb_vector.h svm/sky.h diff --git a/intern/cycles/kernel/device/metal/compat.h b/intern/cycles/kernel/device/metal/compat.h index 4e309f16c08..0ed52074a90 100644 --- a/intern/cycles/kernel/device/metal/compat.h +++ b/intern/cycles/kernel/device/metal/compat.h @@ -29,10 +29,26 @@ using namespace metal::raytracing; /* Qualifiers */ -#define ccl_device -#define ccl_device_inline ccl_device -#define ccl_device_forceinline ccl_device -#define ccl_device_noinline ccl_device __attribute__((noinline)) +#if defined(__KERNEL_METAL_APPLE__) + +/* Inline everything for Apple GPUs. + * This gives ~1.1x speedup and 10% spill reduction for integator_shade_surface + * at the cost of longer compile times (~4.5 minutes on M1 Max). */ + +# define ccl_device __attribute__((always_inline)) +# define ccl_device_inline __attribute__((always_inline)) +# define ccl_device_forceinline __attribute__((always_inline)) +# define ccl_device_noinline __attribute__((always_inline)) + +#else + +# define ccl_device +# define ccl_device_inline ccl_device +# define ccl_device_forceinline ccl_device +# define ccl_device_noinline ccl_device __attribute__((noinline)) + +#endif + #define ccl_device_noinline_cpu ccl_device #define ccl_device_inline_method ccl_device #define ccl_global device diff --git a/intern/cycles/kernel/integrator/mnee.h b/intern/cycles/kernel/integrator/mnee.h index 7b86c660380..2f7b711e28c 100644 --- a/intern/cycles/kernel/integrator/mnee.h +++ b/intern/cycles/kernel/integrator/mnee.h @@ -534,14 +534,8 @@ ccl_device_forceinline bool mnee_newton_solver(KernelGlobals kg, tv.dp_dv = mv.dp_dv; /* Setup corrected manifold vertex. */ - mnee_setup_manifold_vertex(kg, - &tv, - mv.bsdf, - mv.eta, - mv.n_offset, - &projection_ray, - &projection_isect, - sd_vtx); + mnee_setup_manifold_vertex( + kg, &tv, mv.bsdf, mv.eta, mv.n_offset, &projection_ray, &projection_isect, sd_vtx); /* Fail newton solve if we are not making progress, probably stuck trying to move off the * edge of the mesh. */ @@ -1013,8 +1007,7 @@ ccl_device_forceinline int kernel_path_mnee_sample(KernelGlobals kg, } /* Setup differential geometry on vertex. */ - mnee_setup_manifold_vertex( - kg, &mv, bsdf, eta, h, &probe_ray, &probe_isect, sd_mnee); + mnee_setup_manifold_vertex(kg, &mv, bsdf, eta, h, &probe_ray, &probe_isect, sd_mnee); break; } } diff --git a/intern/cycles/kernel/osl/services.cpp b/intern/cycles/kernel/osl/services.cpp index 832498f1f73..e2e10b5b83f 100644 --- a/intern/cycles/kernel/osl/services.cpp +++ b/intern/cycles/kernel/osl/services.cpp @@ -1304,8 +1304,38 @@ bool OSLRenderServices::texture(ustring filename, break; } case OSLTextureHandle::SVM: { - /* Packed texture. */ - float4 rgba = kernel_tex_image_interp(kernel_globals, handle->svm_slot, s, 1.0f - t); + int id = -1; + if (handle->svm_slots[0].w == -1) { + /* Packed single texture. */ + id = handle->svm_slots[0].y; + } + else { + /* Packed tiled texture. */ + int tx = (int)s; + int ty = (int)t; + int tile = 1001 + 10 * ty + tx; + for (int4 tile_node : handle->svm_slots) { + if (tile_node.x == tile) { + id = tile_node.y; + break; + } + if (tile_node.z == tile) { + id = tile_node.w; + break; + } + } + s -= tx; + t -= ty; + } + + float4 rgba; + if (id == -1) { + rgba = make_float4( + TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B, TEX_IMAGE_MISSING_A); + } + else { + rgba = kernel_tex_image_interp(kernel_globals, id, s, 1.0f - t); + } result[0] = rgba[0]; if (nchannels > 1) @@ -1319,7 +1349,7 @@ bool OSLRenderServices::texture(ustring filename, } case OSLTextureHandle::IES: { /* IES light. */ - result[0] = kernel_ies_interp(kernel_globals, handle->svm_slot, s, t); + result[0] = kernel_ies_interp(kernel_globals, handle->svm_slots[0].y, s, t); status = true; break; } @@ -1413,7 +1443,7 @@ bool OSLRenderServices::texture3d(ustring filename, /* Packed texture. */ ShaderData *sd = (ShaderData *)(sg->renderstate); KernelGlobals kernel_globals = sd->osl_globals; - int slot = handle->svm_slot; + int slot = handle->svm_slots[0].y; float3 P_float3 = make_float3(P.x, P.y, P.z); float4 rgba = kernel_tex_image_interp_3d(kernel_globals, slot, P_float3, INTERPOLATION_NONE); diff --git a/intern/cycles/kernel/osl/services.h b/intern/cycles/kernel/osl/services.h index 653fa017140..edffd912bad 100644 --- a/intern/cycles/kernel/osl/services.h +++ b/intern/cycles/kernel/osl/services.h @@ -39,18 +39,26 @@ struct KernelGlobalsCPU; * with additional data. * * These are stored in a concurrent hash map, because OSL can compile multiple - * shaders in parallel. */ + * shaders in parallel. + * + * NOTE: The svm_slots array contains a compressed mapping of tile to svm_slot pairs + * stored as follows: x:tile_a, y:svm_slot_a, z:tile_b, w:svm_slot_b etc. */ struct OSLTextureHandle : public OIIO::RefCnt { enum Type { OIIO, SVM, IES, BEVEL, AO }; + OSLTextureHandle(Type type, const vector<int4> &svm_slots) + : type(type), svm_slots(svm_slots), oiio_handle(NULL), processor(NULL) + { + } + OSLTextureHandle(Type type = OIIO, int svm_slot = -1) - : type(type), svm_slot(svm_slot), oiio_handle(NULL), processor(NULL) + : OSLTextureHandle(type, {make_int4(0, svm_slot, -1, -1)}) { } Type type; - int svm_slot; + vector<int4> svm_slots; OSL::TextureSystem::TextureHandle *oiio_handle; ColorSpaceProcessor *processor; }; diff --git a/intern/cycles/kernel/osl/shaders/CMakeLists.txt b/intern/cycles/kernel/osl/shaders/CMakeLists.txt index 7ced21c5670..741bce7c399 100644 --- a/intern/cycles/kernel/osl/shaders/CMakeLists.txt +++ b/intern/cycles/kernel/osl/shaders/CMakeLists.txt @@ -16,6 +16,7 @@ set(SRC_OSL node_camera.osl node_checker_texture.osl node_clamp.osl + node_combine_color.osl node_combine_rgb.osl node_combine_hsv.osl node_combine_xyz.osl @@ -68,6 +69,7 @@ set(SRC_OSL node_refraction_bsdf.osl node_rgb_curves.osl node_rgb_ramp.osl + node_separate_color.osl node_separate_rgb.osl node_separate_hsv.osl node_separate_xyz.osl diff --git a/intern/cycles/kernel/osl/shaders/node_color.h b/intern/cycles/kernel/osl/shaders/node_color.h index 388dd114e9a..06735f5b03d 100644 --- a/intern/cycles/kernel/osl/shaders/node_color.h +++ b/intern/cycles/kernel/osl/shaders/node_color.h @@ -148,3 +148,53 @@ color hsv_to_rgb(color hsv) return rgb; } + +color rgb_to_hsl(color rgb) +{ + float cmax, cmin, h, s, l; + + cmax = max(rgb[0], max(rgb[1], rgb[2])); + cmin = min(rgb[0], min(rgb[1], rgb[2])); + l = min(1.0, (cmax + cmin) / 2.0); + + if (cmax == cmin) { + h = s = 0.0; /* achromatic */ + } + else { + float cdelta = cmax - cmin; + s = l > 0.5 ? cdelta / (2.0 - cmax - cmin) : cdelta / (cmax + cmin); + if (cmax == rgb[0]) { + h = (rgb[1] - rgb[2]) / cdelta + (rgb[1] < rgb[2] ? 6.0 : 0.0); + } + else if (cmax == rgb[1]) { + h = (rgb[2] - rgb[0]) / cdelta + 2.0; + } + else { + h = (rgb[0] - rgb[1]) / cdelta + 4.0; + } + } + h /= 6.0; + + return color(h, s, l); +} + +color hsl_to_rgb(color hsl) +{ + float nr, ng, nb, chroma, h, s, l; + + h = hsl[0]; + s = hsl[1]; + l = hsl[2]; + + nr = abs(h * 6.0 - 3.0) - 1.0; + ng = 2.0 - abs(h * 6.0 - 2.0); + nb = 2.0 - abs(h * 6.0 - 4.0); + + nr = clamp(nr, 0.0, 1.0); + nb = clamp(nb, 0.0, 1.0); + ng = clamp(ng, 0.0, 1.0); + + chroma = (1.0 - abs(2.0 * l - 1.0)) * s; + + return color((nr - 0.5) * chroma + l, (ng - 0.5) * chroma + l, (nb - 0.5) * chroma + l); +} diff --git a/intern/cycles/kernel/osl/shaders/node_combine_color.osl b/intern/cycles/kernel/osl/shaders/node_combine_color.osl new file mode 100644 index 00000000000..681a592d2bb --- /dev/null +++ b/intern/cycles/kernel/osl/shaders/node_combine_color.osl @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +#include "stdcycles.h" + +shader node_combine_color(string color_type = "rgb", + float Red = 0.0, + float Green = 0.0, + float Blue = 0.0, + output color Color = 0.8) +{ + if (color_type == "rgb" || color_type == "hsv" || color_type == "hsl") + Color = color(color_type, Red, Green, Blue); + else + warning("%s", "Unknown color space!"); +} diff --git a/intern/cycles/kernel/osl/shaders/node_separate_color.osl b/intern/cycles/kernel/osl/shaders/node_separate_color.osl new file mode 100644 index 00000000000..6f3e3149d8e --- /dev/null +++ b/intern/cycles/kernel/osl/shaders/node_separate_color.osl @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +#include "node_color.h" +#include "stdcycles.h" + +shader node_separate_color(string color_type = "rgb", + color Color = 0.8, + output float Red = 0.0, + output float Green = 0.0, + output float Blue = 0.0) +{ + color col; + if (color_type == "rgb") + col = Color; + else if (color_type == "hsv") + col = rgb_to_hsv(Color); + else if (color_type == "hsl") + col = rgb_to_hsl(Color); + else + warning("%s", "Unknown color space!"); + + Red = col[0]; + Green = col[1]; + Blue = col[2]; +} diff --git a/intern/cycles/kernel/svm/color_util.h b/intern/cycles/kernel/svm/color_util.h index b439721383c..fa22d4bc8c2 100644 --- a/intern/cycles/kernel/svm/color_util.h +++ b/intern/cycles/kernel/svm/color_util.h @@ -307,4 +307,30 @@ ccl_device_inline float3 svm_brightness_contrast(float3 color, float brightness, return color; } +ccl_device float3 svm_combine_color(NodeCombSepColorType type, float3 color) +{ + switch (type) { + case NODE_COMBSEP_COLOR_HSV: + return hsv_to_rgb(color); + case NODE_COMBSEP_COLOR_HSL: + return hsl_to_rgb(color); + case NODE_COMBSEP_COLOR_RGB: + default: + return color; + } +} + +ccl_device float3 svm_separate_color(NodeCombSepColorType type, float3 color) +{ + switch (type) { + case NODE_COMBSEP_COLOR_HSV: + return rgb_to_hsv(color); + case NODE_COMBSEP_COLOR_HSL: + return rgb_to_hsl(color); + case NODE_COMBSEP_COLOR_RGB: + default: + return color; + } +} + CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/sepcomb_color.h b/intern/cycles/kernel/svm/sepcomb_color.h new file mode 100644 index 00000000000..d186e7f163b --- /dev/null +++ b/intern/cycles/kernel/svm/sepcomb_color.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +#pragma once + +CCL_NAMESPACE_BEGIN + +ccl_device_noinline void svm_node_combine_color(KernelGlobals kg, + ccl_private ShaderData *sd, + ccl_private float *stack, + uint color_type, + uint inputs_stack_offsets, + uint result_stack_offset) +{ + uint red_stack_offset, green_stack_offset, blue_stack_offset; + svm_unpack_node_uchar3( + inputs_stack_offsets, &red_stack_offset, &green_stack_offset, &blue_stack_offset); + + float r = stack_load_float(stack, red_stack_offset); + float g = stack_load_float(stack, green_stack_offset); + float b = stack_load_float(stack, blue_stack_offset); + + /* Combine, and convert back to RGB */ + float3 color = svm_combine_color((NodeCombSepColorType)color_type, make_float3(r, g, b)); + + if (stack_valid(result_stack_offset)) + stack_store_float3(stack, result_stack_offset, color); +} + +ccl_device_noinline void svm_node_separate_color(KernelGlobals kg, + ccl_private ShaderData *sd, + ccl_private float *stack, + uint color_type, + uint input_stack_offset, + uint results_stack_offsets) +{ + float3 color = stack_load_float3(stack, input_stack_offset); + + /* Convert color space */ + color = svm_separate_color((NodeCombSepColorType)color_type, color); + + uint red_stack_offset, green_stack_offset, blue_stack_offset; + svm_unpack_node_uchar3( + results_stack_offsets, &red_stack_offset, &green_stack_offset, &blue_stack_offset); + + if (stack_valid(red_stack_offset)) + stack_store_float(stack, red_stack_offset, color.x); + if (stack_valid(green_stack_offset)) + stack_store_float(stack, green_stack_offset, color.y); + if (stack_valid(blue_stack_offset)) + stack_store_float(stack, blue_stack_offset, color.z); +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h index 08352a6231f..5def943c87f 100644 --- a/intern/cycles/kernel/svm/svm.h +++ b/intern/cycles/kernel/svm/svm.h @@ -181,6 +181,7 @@ CCL_NAMESPACE_END #include "kernel/svm/noisetex.h" #include "kernel/svm/normal.h" #include "kernel/svm/ramp.h" +#include "kernel/svm/sepcomb_color.h" #include "kernel/svm/sepcomb_hsv.h" #include "kernel/svm/sepcomb_vector.h" #include "kernel/svm/sky.h" @@ -508,6 +509,12 @@ ccl_device void svm_eval_nodes(KernelGlobals kg, case NODE_MIX: offset = svm_node_mix(kg, sd, stack, node.y, node.z, node.w, offset); break; + case NODE_SEPARATE_COLOR: + svm_node_separate_color(kg, sd, stack, node.y, node.z, node.w); + break; + case NODE_COMBINE_COLOR: + svm_node_combine_color(kg, sd, stack, node.y, node.z, node.w); + break; case NODE_SEPARATE_VECTOR: svm_node_separate_vector(sd, stack, node.y, node.z, node.w); break; diff --git a/intern/cycles/kernel/svm/types.h b/intern/cycles/kernel/svm/types.h index bede58f7a54..82109ec4c4f 100644 --- a/intern/cycles/kernel/svm/types.h +++ b/intern/cycles/kernel/svm/types.h @@ -92,6 +92,8 @@ typedef enum ShaderNodeType { NODE_NORMAL_MAP, NODE_INVERT, NODE_MIX, + NODE_SEPARATE_COLOR, + NODE_COMBINE_COLOR, NODE_SEPARATE_VECTOR, NODE_COMBINE_VECTOR, NODE_SEPARATE_HSV, @@ -487,6 +489,12 @@ typedef enum NodePrincipledHairParametrization { NODE_PRINCIPLED_HAIR_NUM, } NodePrincipledHairParametrization; +typedef enum NodeCombSepColorType { + NODE_COMBSEP_COLOR_RGB, + NODE_COMBSEP_COLOR_HSV, + NODE_COMBSEP_COLOR_HSL, +} NodeCombSepColorType; + /* Closure */ typedef enum ClosureType { |