Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/kernel/svm')
-rw-r--r--intern/cycles/kernel/svm/svm.h227
-rw-r--r--intern/cycles/kernel/svm/svm_ao.h53
-rw-r--r--intern/cycles/kernel/svm/svm_aov.h42
-rw-r--r--intern/cycles/kernel/svm/svm_attribute.h57
-rw-r--r--intern/cycles/kernel/svm/svm_bevel.h145
-rw-r--r--intern/cycles/kernel/svm/svm_blackbody.h7
-rw-r--r--intern/cycles/kernel/svm/svm_brick.h11
-rw-r--r--intern/cycles/kernel/svm/svm_brightness.h2
-rw-r--r--intern/cycles/kernel/svm/svm_bump.h16
-rw-r--r--intern/cycles/kernel/svm/svm_camera.h12
-rw-r--r--intern/cycles/kernel/svm/svm_checker.h5
-rw-r--r--intern/cycles/kernel/svm/svm_clamp.h17
-rw-r--r--intern/cycles/kernel/svm/svm_closure.h121
-rw-r--r--intern/cycles/kernel/svm/svm_convert.h4
-rw-r--r--intern/cycles/kernel/svm/svm_displace.h21
-rw-r--r--intern/cycles/kernel/svm/svm_fresnel.h4
-rw-r--r--intern/cycles/kernel/svm/svm_gamma.h2
-rw-r--r--intern/cycles/kernel/svm/svm_geometry.h24
-rw-r--r--intern/cycles/kernel/svm/svm_gradient.h2
-rw-r--r--intern/cycles/kernel/svm/svm_hsv.h6
-rw-r--r--intern/cycles/kernel/svm/svm_ies.h10
-rw-r--r--intern/cycles/kernel/svm/svm_image.h26
-rw-r--r--intern/cycles/kernel/svm/svm_invert.h2
-rw-r--r--intern/cycles/kernel/svm/svm_light_path.h50
-rw-r--r--intern/cycles/kernel/svm/svm_magic.h7
-rw-r--r--intern/cycles/kernel/svm/svm_map_range.h19
-rw-r--r--intern/cycles/kernel/svm/svm_mapping.h41
-rw-r--r--intern/cycles/kernel/svm/svm_math.h30
-rw-r--r--intern/cycles/kernel/svm/svm_mix.h17
-rw-r--r--intern/cycles/kernel/svm/svm_musgrave.h19
-rw-r--r--intern/cycles/kernel/svm/svm_noise.h10
-rw-r--r--intern/cycles/kernel/svm/svm_noisetex.h19
-rw-r--r--intern/cycles/kernel/svm/svm_normal.h17
-rw-r--r--intern/cycles/kernel/svm/svm_ramp.h34
-rw-r--r--intern/cycles/kernel/svm/svm_sepcomb_hsv.h34
-rw-r--r--intern/cycles/kernel/svm/svm_sky.h33
-rw-r--r--intern/cycles/kernel/svm/svm_tex_coord.h55
-rw-r--r--intern/cycles/kernel/svm/svm_types.h43
-rw-r--r--intern/cycles/kernel/svm/svm_value.h9
-rw-r--r--intern/cycles/kernel/svm/svm_vector_rotate.h10
-rw-r--r--intern/cycles/kernel/svm/svm_vector_transform.h8
-rw-r--r--intern/cycles/kernel/svm/svm_vertex_color.h48
-rw-r--r--intern/cycles/kernel/svm/svm_voronoi.h148
-rw-r--r--intern/cycles/kernel/svm/svm_voxel.h11
-rw-r--r--intern/cycles/kernel/svm/svm_wave.h9
-rw-r--r--intern/cycles/kernel/svm/svm_wavelength.h4
-rw-r--r--intern/cycles/kernel/svm/svm_white_noise.h13
-rw-r--r--intern/cycles/kernel/svm/svm_wireframe.h18
48 files changed, 861 insertions, 661 deletions
diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h
index 000da1fa615..4aee1ef11b3 100644
--- a/intern/cycles/kernel/svm/svm.h
+++ b/intern/cycles/kernel/svm/svm.h
@@ -48,16 +48,18 @@ ccl_device_inline float3 stack_load_float3(float *stack, uint a)
{
kernel_assert(a + 2 < SVM_STACK_SIZE);
- return make_float3(stack[a + 0], stack[a + 1], stack[a + 2]);
+ float *stack_a = stack + a;
+ return make_float3(stack_a[0], stack_a[1], stack_a[2]);
}
ccl_device_inline void stack_store_float3(float *stack, uint a, float3 f)
{
kernel_assert(a + 2 < SVM_STACK_SIZE);
- stack[a + 0] = f.x;
- stack[a + 1] = f.y;
- stack[a + 2] = f.z;
+ float *stack_a = stack + a;
+ stack_a[0] = f.x;
+ stack_a[1] = f.y;
+ stack_a[2] = f.z;
}
ccl_device_inline float stack_load_float(float *stack, uint a)
@@ -105,14 +107,14 @@ ccl_device_inline bool stack_valid(uint a)
/* Reading Nodes */
-ccl_device_inline uint4 read_node(KernelGlobals *kg, int *offset)
+ccl_device_inline uint4 read_node(const KernelGlobals *kg, int *offset)
{
uint4 node = kernel_tex_fetch(__svm_nodes, *offset);
(*offset)++;
return node;
}
-ccl_device_inline float4 read_node_float(KernelGlobals *kg, int *offset)
+ccl_device_inline float4 read_node_float(const KernelGlobals *kg, int *offset)
{
uint4 node = kernel_tex_fetch(__svm_nodes, *offset);
float4 f = make_float4(__uint_as_float(node.x),
@@ -123,7 +125,7 @@ ccl_device_inline float4 read_node_float(KernelGlobals *kg, int *offset)
return f;
}
-ccl_device_inline float4 fetch_node_float(KernelGlobals *kg, int offset)
+ccl_device_inline float4 fetch_node_float(const KernelGlobals *kg, int offset)
{
uint4 node = kernel_tex_fetch(__svm_nodes, offset);
return make_float4(__uint_as_float(node.x),
@@ -217,26 +219,11 @@ CCL_NAMESPACE_END
CCL_NAMESPACE_BEGIN
/* Main Interpreter Loop */
-#if defined(__KERNEL_OPTIX__) && defined(__SHADER_RAYTRACE__)
-ccl_device_inline void svm_eval_nodes(KernelGlobals *kg,
- ShaderData *sd,
- ccl_addr_space PathState *state,
- ccl_global float *buffer,
- ShaderType type,
- int path_flag)
-{
- optixDirectCall<void>(0, kg, sd, state, buffer, type, path_flag);
-}
-extern "C" __device__ void __direct_callable__svm_eval_nodes(
-#else
-ccl_device_noinline void svm_eval_nodes(
-#endif
- KernelGlobals *kg,
- ShaderData *sd,
- ccl_addr_space PathState *state,
- ccl_global float *buffer,
- ShaderType type,
- int path_flag)
+template<uint node_feature_mask, ShaderType type>
+ccl_device void svm_eval_nodes(INTEGRATOR_STATE_CONST_ARGS,
+ ShaderData *sd,
+ ccl_global float *render_buffer,
+ int path_flag)
{
float stack[SVM_STACK_SIZE];
int offset = sd->shader & SHADER_MASK;
@@ -247,7 +234,6 @@ ccl_device_noinline void svm_eval_nodes(
switch (node.x) {
case NODE_END:
return;
-#if NODES_GROUP(NODE_GROUP_LEVEL_0)
case NODE_SHADER_JUMP: {
if (type == SHADER_TYPE_SURFACE)
offset = node.y;
@@ -260,13 +246,18 @@ ccl_device_noinline void svm_eval_nodes(
break;
}
case NODE_CLOSURE_BSDF:
- svm_node_closure_bsdf(kg, sd, stack, node, type, path_flag, &offset);
+ offset = svm_node_closure_bsdf<node_feature_mask, type>(
+ kg, sd, stack, node, path_flag, offset);
break;
case NODE_CLOSURE_EMISSION:
- svm_node_closure_emission(sd, stack, node);
+ if (KERNEL_NODES_FEATURE(EMISSION)) {
+ svm_node_closure_emission(sd, stack, node);
+ }
break;
case NODE_CLOSURE_BACKGROUND:
- svm_node_closure_background(sd, stack, node);
+ if (KERNEL_NODES_FEATURE(EMISSION)) {
+ svm_node_closure_background(sd, stack, node);
+ }
break;
case NODE_CLOSURE_SET_WEIGHT:
svm_node_closure_set_weight(sd, node.y, node.z, node.w);
@@ -275,7 +266,9 @@ ccl_device_noinline void svm_eval_nodes(
svm_node_closure_weight(sd, stack, node.y);
break;
case NODE_EMISSION_WEIGHT:
- svm_node_emission_weight(kg, sd, stack, node);
+ if (KERNEL_NODES_FEATURE(EMISSION)) {
+ svm_node_emission_weight(kg, sd, stack, node);
+ }
break;
case NODE_MIX_CLOSURE:
svm_node_mix_closure(sd, stack, node);
@@ -295,86 +288,108 @@ ccl_device_noinline void svm_eval_nodes(
svm_node_convert(kg, sd, stack, node.y, node.z, node.w);
break;
case NODE_TEX_COORD:
- svm_node_tex_coord(kg, sd, path_flag, stack, node, &offset);
+ offset = svm_node_tex_coord(kg, sd, path_flag, stack, node, offset);
break;
case NODE_VALUE_F:
svm_node_value_f(kg, sd, stack, node.y, node.z);
break;
case NODE_VALUE_V:
- svm_node_value_v(kg, sd, stack, node.y, &offset);
+ offset = svm_node_value_v(kg, sd, stack, node.y, offset);
break;
case NODE_ATTR:
- svm_node_attr(kg, sd, stack, node);
+ svm_node_attr<node_feature_mask>(kg, sd, stack, node);
break;
case NODE_VERTEX_COLOR:
svm_node_vertex_color(kg, sd, stack, node.y, node.z, node.w);
break;
-# if NODES_FEATURE(NODE_FEATURE_BUMP)
case NODE_GEOMETRY_BUMP_DX:
- svm_node_geometry_bump_dx(kg, sd, stack, node.y, node.z);
+ if (KERNEL_NODES_FEATURE(BUMP)) {
+ svm_node_geometry_bump_dx(kg, sd, stack, node.y, node.z);
+ }
break;
case NODE_GEOMETRY_BUMP_DY:
- svm_node_geometry_bump_dy(kg, sd, stack, node.y, node.z);
+ if (KERNEL_NODES_FEATURE(BUMP)) {
+ svm_node_geometry_bump_dy(kg, sd, stack, node.y, node.z);
+ }
break;
case NODE_SET_DISPLACEMENT:
- svm_node_set_displacement(kg, sd, stack, node.y);
+ if (KERNEL_NODES_FEATURE(BUMP)) {
+ svm_node_set_displacement(kg, sd, stack, node.y);
+ }
break;
case NODE_DISPLACEMENT:
- svm_node_displacement(kg, sd, stack, node);
+ if (KERNEL_NODES_FEATURE(BUMP)) {
+ svm_node_displacement(kg, sd, stack, node);
+ }
break;
case NODE_VECTOR_DISPLACEMENT:
- svm_node_vector_displacement(kg, sd, stack, node, &offset);
+ if (KERNEL_NODES_FEATURE(BUMP)) {
+ offset = svm_node_vector_displacement(kg, sd, stack, node, offset);
+ }
break;
-# endif /* NODES_FEATURE(NODE_FEATURE_BUMP) */
case NODE_TEX_IMAGE:
- svm_node_tex_image(kg, sd, stack, node, &offset);
+ offset = svm_node_tex_image(kg, sd, stack, node, offset);
break;
case NODE_TEX_IMAGE_BOX:
svm_node_tex_image_box(kg, sd, stack, node);
break;
case NODE_TEX_NOISE:
- svm_node_tex_noise(kg, sd, stack, node.y, node.z, node.w, &offset);
+ offset = svm_node_tex_noise(kg, sd, stack, node.y, node.z, node.w, offset);
break;
-# if NODES_FEATURE(NODE_FEATURE_BUMP)
case NODE_SET_BUMP:
- svm_node_set_bump(kg, sd, stack, node);
+ if (KERNEL_NODES_FEATURE(BUMP)) {
+ svm_node_set_bump(kg, sd, stack, node);
+ }
break;
case NODE_ATTR_BUMP_DX:
- svm_node_attr_bump_dx(kg, sd, stack, node);
+ if (KERNEL_NODES_FEATURE(BUMP)) {
+ svm_node_attr_bump_dx(kg, sd, stack, node);
+ }
break;
case NODE_ATTR_BUMP_DY:
- svm_node_attr_bump_dy(kg, sd, stack, node);
+ if (KERNEL_NODES_FEATURE(BUMP)) {
+ svm_node_attr_bump_dy(kg, sd, stack, node);
+ }
break;
case NODE_VERTEX_COLOR_BUMP_DX:
- svm_node_vertex_color_bump_dx(kg, sd, stack, node.y, node.z, node.w);
+ if (KERNEL_NODES_FEATURE(BUMP)) {
+ svm_node_vertex_color_bump_dx(kg, sd, stack, node.y, node.z, node.w);
+ }
break;
case NODE_VERTEX_COLOR_BUMP_DY:
- svm_node_vertex_color_bump_dy(kg, sd, stack, node.y, node.z, node.w);
+ if (KERNEL_NODES_FEATURE(BUMP)) {
+ svm_node_vertex_color_bump_dy(kg, sd, stack, node.y, node.z, node.w);
+ }
break;
case NODE_TEX_COORD_BUMP_DX:
- svm_node_tex_coord_bump_dx(kg, sd, path_flag, stack, node, &offset);
+ if (KERNEL_NODES_FEATURE(BUMP)) {
+ offset = svm_node_tex_coord_bump_dx(kg, sd, path_flag, stack, node, offset);
+ }
break;
case NODE_TEX_COORD_BUMP_DY:
- svm_node_tex_coord_bump_dy(kg, sd, path_flag, stack, node, &offset);
+ if (KERNEL_NODES_FEATURE(BUMP)) {
+ offset = svm_node_tex_coord_bump_dy(kg, sd, path_flag, stack, node, offset);
+ }
break;
case NODE_CLOSURE_SET_NORMAL:
- svm_node_set_normal(kg, sd, stack, node.y, node.z);
+ if (KERNEL_NODES_FEATURE(BUMP)) {
+ svm_node_set_normal(kg, sd, stack, node.y, node.z);
+ }
break;
-# if NODES_FEATURE(NODE_FEATURE_BUMP_STATE)
case NODE_ENTER_BUMP_EVAL:
- svm_node_enter_bump_eval(kg, sd, stack, node.y);
+ if (KERNEL_NODES_FEATURE(BUMP_STATE)) {
+ svm_node_enter_bump_eval(kg, sd, stack, node.y);
+ }
break;
case NODE_LEAVE_BUMP_EVAL:
- svm_node_leave_bump_eval(kg, sd, stack, node.y);
+ if (KERNEL_NODES_FEATURE(BUMP_STATE)) {
+ svm_node_leave_bump_eval(kg, sd, stack, node.y);
+ }
break;
-# endif /* NODES_FEATURE(NODE_FEATURE_BUMP_STATE) */
-# endif /* NODES_FEATURE(NODE_FEATURE_BUMP) */
case NODE_HSV:
- svm_node_hsv(kg, sd, stack, node, &offset);
+ svm_node_hsv(kg, sd, stack, node);
break;
-#endif /* NODES_GROUP(NODE_GROUP_LEVEL_0) */
-#if NODES_GROUP(NODE_GROUP_LEVEL_1)
case NODE_CLOSURE_HOLDOUT:
svm_node_closure_holdout(sd, stack, node);
break;
@@ -384,22 +399,24 @@ ccl_device_noinline void svm_eval_nodes(
case NODE_LAYER_WEIGHT:
svm_node_layer_weight(sd, stack, node);
break;
-# if NODES_FEATURE(NODE_FEATURE_VOLUME)
case NODE_CLOSURE_VOLUME:
- svm_node_closure_volume(kg, sd, stack, node, type);
+ if (KERNEL_NODES_FEATURE(VOLUME)) {
+ svm_node_closure_volume<type>(kg, sd, stack, node);
+ }
break;
case NODE_PRINCIPLED_VOLUME:
- svm_node_principled_volume(kg, sd, stack, node, type, path_flag, &offset);
+ if (KERNEL_NODES_FEATURE(VOLUME)) {
+ offset = svm_node_principled_volume<type>(kg, sd, stack, node, path_flag, offset);
+ }
break;
-# endif /* NODES_FEATURE(NODE_FEATURE_VOLUME) */
case NODE_MATH:
- svm_node_math(kg, sd, stack, node.y, node.z, node.w, &offset);
+ svm_node_math(kg, sd, stack, node.y, node.z, node.w);
break;
case NODE_VECTOR_MATH:
- svm_node_vector_math(kg, sd, stack, node.y, node.z, node.w, &offset);
+ offset = svm_node_vector_math(kg, sd, stack, node.y, node.z, node.w, offset);
break;
case NODE_RGB_RAMP:
- svm_node_rgb_ramp(kg, sd, stack, node, &offset);
+ offset = svm_node_rgb_ramp(kg, sd, stack, node, offset);
break;
case NODE_GAMMA:
svm_node_gamma(sd, stack, node.y, node.z, node.w);
@@ -408,7 +425,7 @@ ccl_device_noinline void svm_eval_nodes(
svm_node_brightness(sd, stack, node.y, node.z, node.w);
break;
case NODE_LIGHT_PATH:
- svm_node_light_path(sd, state, stack, node.y, node.z, path_flag);
+ svm_node_light_path(INTEGRATOR_STATE_PASS, sd, stack, node.y, node.z, path_flag);
break;
case NODE_OBJECT_INFO:
svm_node_object_info(kg, sd, stack, node.y, node.z);
@@ -416,22 +433,22 @@ ccl_device_noinline void svm_eval_nodes(
case NODE_PARTICLE_INFO:
svm_node_particle_info(kg, sd, stack, node.y, node.z);
break;
-# if defined(__HAIR__) && NODES_FEATURE(NODE_FEATURE_HAIR)
+#if defined(__HAIR__)
case NODE_HAIR_INFO:
- svm_node_hair_info(kg, sd, stack, node.y, node.z);
+ if (KERNEL_NODES_FEATURE(HAIR)) {
+ svm_node_hair_info(kg, sd, stack, node.y, node.z);
+ }
break;
-# endif /* NODES_FEATURE(NODE_FEATURE_HAIR) */
-#endif /* NODES_GROUP(NODE_GROUP_LEVEL_1) */
+#endif
-#if NODES_GROUP(NODE_GROUP_LEVEL_2)
case NODE_TEXTURE_MAPPING:
- svm_node_texture_mapping(kg, sd, stack, node.y, node.z, &offset);
+ offset = svm_node_texture_mapping(kg, sd, stack, node.y, node.z, offset);
break;
case NODE_MAPPING:
- svm_node_mapping(kg, sd, stack, node.y, node.z, node.w, &offset);
+ svm_node_mapping(kg, sd, stack, node.y, node.z, node.w);
break;
case NODE_MIN_MAX:
- svm_node_min_max(kg, sd, stack, node.y, node.z, &offset);
+ offset = svm_node_min_max(kg, sd, stack, node.y, node.z, offset);
break;
case NODE_CAMERA:
svm_node_camera(kg, sd, stack, node.y, node.z, node.w);
@@ -440,47 +457,46 @@ ccl_device_noinline void svm_eval_nodes(
svm_node_tex_environment(kg, sd, stack, node);
break;
case NODE_TEX_SKY:
- svm_node_tex_sky(kg, sd, stack, node, &offset);
+ offset = svm_node_tex_sky(kg, sd, stack, node, offset);
break;
case NODE_TEX_GRADIENT:
svm_node_tex_gradient(sd, stack, node);
break;
case NODE_TEX_VORONOI:
- svm_node_tex_voronoi(kg, sd, stack, node.y, node.z, node.w, &offset);
+ offset = svm_node_tex_voronoi<node_feature_mask>(
+ kg, sd, stack, node.y, node.z, node.w, offset);
break;
case NODE_TEX_MUSGRAVE:
- svm_node_tex_musgrave(kg, sd, stack, node.y, node.z, node.w, &offset);
+ offset = svm_node_tex_musgrave(kg, sd, stack, node.y, node.z, node.w, offset);
break;
case NODE_TEX_WAVE:
- svm_node_tex_wave(kg, sd, stack, node, &offset);
+ offset = svm_node_tex_wave(kg, sd, stack, node, offset);
break;
case NODE_TEX_MAGIC:
- svm_node_tex_magic(kg, sd, stack, node, &offset);
+ offset = svm_node_tex_magic(kg, sd, stack, node, offset);
break;
case NODE_TEX_CHECKER:
svm_node_tex_checker(kg, sd, stack, node);
break;
case NODE_TEX_BRICK:
- svm_node_tex_brick(kg, sd, stack, node, &offset);
+ offset = svm_node_tex_brick(kg, sd, stack, node, offset);
break;
case NODE_TEX_WHITE_NOISE:
- svm_node_tex_white_noise(kg, sd, stack, node.y, node.z, node.w, &offset);
+ svm_node_tex_white_noise(kg, sd, stack, node.y, node.z, node.w);
break;
case NODE_NORMAL:
- svm_node_normal(kg, sd, stack, node.y, node.z, node.w, &offset);
+ offset = svm_node_normal(kg, sd, stack, node.y, node.z, node.w, offset);
break;
case NODE_LIGHT_FALLOFF:
svm_node_light_falloff(sd, stack, node);
break;
case NODE_IES:
- svm_node_ies(kg, sd, stack, node, &offset);
+ svm_node_ies(kg, sd, stack, node);
break;
-#endif /* NODES_GROUP(NODE_GROUP_LEVEL_2) */
-#if NODES_GROUP(NODE_GROUP_LEVEL_3)
case NODE_RGB_CURVES:
case NODE_VECTOR_CURVES:
- svm_node_curves(kg, sd, stack, node, &offset);
+ offset = svm_node_curves(kg, sd, stack, node, offset);
break;
case NODE_TANGENT:
svm_node_tangent(kg, sd, stack, node);
@@ -492,7 +508,7 @@ ccl_device_noinline void svm_eval_nodes(
svm_node_invert(sd, stack, node.y, node.z, node.w);
break;
case NODE_MIX:
- svm_node_mix(kg, sd, stack, node.y, node.z, node.w, &offset);
+ offset = svm_node_mix(kg, sd, stack, node.y, node.z, node.w, offset);
break;
case NODE_SEPARATE_VECTOR:
svm_node_separate_vector(sd, stack, node.y, node.z, node.w);
@@ -501,10 +517,10 @@ ccl_device_noinline void svm_eval_nodes(
svm_node_combine_vector(sd, stack, node.y, node.z, node.w);
break;
case NODE_SEPARATE_HSV:
- svm_node_separate_hsv(kg, sd, stack, node.y, node.z, node.w, &offset);
+ offset = svm_node_separate_hsv(kg, sd, stack, node.y, node.z, node.w, offset);
break;
case NODE_COMBINE_HSV:
- svm_node_combine_hsv(kg, sd, stack, node.y, node.z, node.w, &offset);
+ offset = svm_node_combine_hsv(kg, sd, stack, node.y, node.z, node.w, offset);
break;
case NODE_VECTOR_ROTATE:
svm_node_vector_rotate(sd, stack, node.y, node.z, node.w);
@@ -522,39 +538,36 @@ ccl_device_noinline void svm_eval_nodes(
svm_node_blackbody(kg, sd, stack, node.y, node.z);
break;
case NODE_MAP_RANGE:
- svm_node_map_range(kg, sd, stack, node.y, node.z, node.w, &offset);
+ offset = svm_node_map_range(kg, sd, stack, node.y, node.z, node.w, offset);
break;
case NODE_CLAMP:
- svm_node_clamp(kg, sd, stack, node.y, node.z, node.w, &offset);
+ offset = svm_node_clamp(kg, sd, stack, node.y, node.z, node.w, offset);
break;
-# ifdef __SHADER_RAYTRACE__
+#ifdef __SHADER_RAYTRACE__
case NODE_BEVEL:
- svm_node_bevel(kg, sd, state, stack, node);
+ svm_node_bevel<node_feature_mask>(INTEGRATOR_STATE_PASS, sd, stack, node);
break;
case NODE_AMBIENT_OCCLUSION:
- svm_node_ao(kg, sd, state, stack, node);
+ svm_node_ao<node_feature_mask>(INTEGRATOR_STATE_PASS, sd, stack, node);
break;
-# endif /* __SHADER_RAYTRACE__ */
-#endif /* NODES_GROUP(NODE_GROUP_LEVEL_3) */
+#endif
-#if NODES_GROUP(NODE_GROUP_LEVEL_4)
-# if NODES_FEATURE(NODE_FEATURE_VOLUME)
case NODE_TEX_VOXEL:
- svm_node_tex_voxel(kg, sd, stack, node, &offset);
+ if (KERNEL_NODES_FEATURE(VOLUME)) {
+ offset = svm_node_tex_voxel(kg, sd, stack, node, offset);
+ }
break;
-# endif /* NODES_FEATURE(NODE_FEATURE_VOLUME) */
case NODE_AOV_START:
- if (!svm_node_aov_check(state, buffer)) {
+ if (!svm_node_aov_check(path_flag, render_buffer)) {
return;
}
break;
case NODE_AOV_COLOR:
- svm_node_aov_color(kg, sd, stack, node, buffer);
+ svm_node_aov_color(INTEGRATOR_STATE_PASS, sd, stack, node, render_buffer);
break;
case NODE_AOV_VALUE:
- svm_node_aov_value(kg, sd, stack, node, buffer);
+ svm_node_aov_value(INTEGRATOR_STATE_PASS, sd, stack, node, render_buffer);
break;
-#endif /* NODES_GROUP(NODE_GROUP_LEVEL_4) */
default:
kernel_assert(!"Unknown node type was passed to the SVM machine");
return;
diff --git a/intern/cycles/kernel/svm/svm_ao.h b/intern/cycles/kernel/svm/svm_ao.h
index 4cb986b897a..34ac2cb8fbf 100644
--- a/intern/cycles/kernel/svm/svm_ao.h
+++ b/intern/cycles/kernel/svm/svm_ao.h
@@ -14,20 +14,25 @@
* limitations under the License.
*/
+#include "kernel/bvh/bvh.h"
+
CCL_NAMESPACE_BEGIN
#ifdef __SHADER_RAYTRACE__
-ccl_device_noinline float svm_ao(KernelGlobals *kg,
- ShaderData *sd,
- float3 N,
- ccl_addr_space PathState *state,
- float max_dist,
- int num_samples,
- int flags)
+# ifdef __KERNEL_OPTIX__
+extern "C" __device__ float __direct_callable__svm_node_ao(INTEGRATOR_STATE_CONST_ARGS,
+# else
+ccl_device float svm_ao(INTEGRATOR_STATE_CONST_ARGS,
+# endif
+ ShaderData *sd,
+ float3 N,
+ float max_dist,
+ int num_samples,
+ int flags)
{
if (flags & NODE_AO_GLOBAL_RADIUS) {
- max_dist = kernel_data.background.ao_distance;
+ max_dist = kernel_data.integrator.ao_bounces_distance;
}
/* Early out if no sampling needed. */
@@ -47,11 +52,14 @@ ccl_device_noinline float svm_ao(KernelGlobals *kg,
float3 T, B;
make_orthonormals(N, &T, &B);
+ /* TODO: support ray-tracing in shadow shader evaluation? */
+ RNGState rng_state;
+ path_state_rng_load(INTEGRATOR_STATE_PASS, &rng_state);
+
int unoccluded = 0;
for (int sample = 0; sample < num_samples; sample++) {
float disk_u, disk_v;
- path_branched_rng_2D(
- kg, state->rng_hash, state, sample, num_samples, PRNG_BEVEL_U, &disk_u, &disk_v);
+ path_branched_rng_2D(kg, &rng_state, sample, num_samples, PRNG_BEVEL_U, &disk_u, &disk_v);
float2 d = concentric_sample_disk(disk_u, disk_v);
float3 D = make_float3(d.x, d.y, safe_sqrtf(1.0f - dot(d, d)));
@@ -62,8 +70,8 @@ ccl_device_noinline float svm_ao(KernelGlobals *kg,
ray.D = D.x * T + D.y * B + D.z * N;
ray.t = max_dist;
ray.time = sd->time;
- ray.dP = sd->dP;
- ray.dD = differential3_zero();
+ ray.dP = differential_zero_compact();
+ ray.dD = differential_zero_compact();
if (flags & NODE_AO_ONLY_LOCAL) {
if (!scene_intersect_local(kg, &ray, NULL, sd->object, NULL, 0)) {
@@ -81,8 +89,14 @@ ccl_device_noinline float svm_ao(KernelGlobals *kg,
return ((float)unoccluded) / num_samples;
}
-ccl_device void svm_node_ao(
- KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state, float *stack, uint4 node)
+template<uint node_feature_mask>
+# if defined(__KERNEL_OPTIX__)
+ccl_device_inline
+# else
+ccl_device_noinline
+# endif
+ void
+ svm_node_ao(INTEGRATOR_STATE_CONST_ARGS, ShaderData *sd, float *stack, uint4 node)
{
uint flags, dist_offset, normal_offset, out_ao_offset;
svm_unpack_node_uchar4(node.y, &flags, &dist_offset, &normal_offset, &out_ao_offset);
@@ -92,7 +106,16 @@ ccl_device void svm_node_ao(
float dist = stack_load_float_default(stack, dist_offset, node.w);
float3 normal = stack_valid(normal_offset) ? stack_load_float3(stack, normal_offset) : sd->N;
- float ao = svm_ao(kg, sd, normal, state, dist, samples, flags);
+
+ float ao = 1.0f;
+
+ if (KERNEL_NODES_FEATURE(RAYTRACE)) {
+# ifdef __KERNEL_OPTIX__
+ ao = optixDirectCall<float>(0, INTEGRATOR_STATE_PASS, sd, normal, dist, samples, flags);
+# else
+ ao = svm_ao(INTEGRATOR_STATE_PASS, sd, normal, dist, samples, flags);
+# endif
+ }
if (stack_valid(out_ao_offset)) {
stack_store_float(stack, out_ao_offset, ao);
diff --git a/intern/cycles/kernel/svm/svm_aov.h b/intern/cycles/kernel/svm/svm_aov.h
index 899e466d099..26dec9717b3 100644
--- a/intern/cycles/kernel/svm/svm_aov.h
+++ b/intern/cycles/kernel/svm/svm_aov.h
@@ -14,36 +14,50 @@
* limitations under the License.
*/
+#include "kernel/kernel_write_passes.h"
+
CCL_NAMESPACE_BEGIN
-ccl_device_inline bool svm_node_aov_check(ccl_addr_space PathState *state,
- ccl_global float *buffer)
+ccl_device_inline bool svm_node_aov_check(const int path_flag, ccl_global float *render_buffer)
{
- int path_flag = state->flag;
-
bool is_primary = (path_flag & PATH_RAY_CAMERA) && (!(path_flag & PATH_RAY_SINGLE_PASS_DONE));
- return ((buffer != NULL) && is_primary);
+ return ((render_buffer != NULL) && is_primary);
}
-ccl_device void svm_node_aov_color(
- KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, ccl_global float *buffer)
+ccl_device void svm_node_aov_color(INTEGRATOR_STATE_CONST_ARGS,
+ ShaderData *sd,
+ float *stack,
+ uint4 node,
+ ccl_global float *render_buffer)
{
float3 val = stack_load_float3(stack, node.y);
- if (buffer) {
- kernel_write_pass_float4(buffer + kernel_data.film.pass_aov_color + 4 * node.z,
- make_float4(val.x, val.y, val.z, 1.0f));
+ if (render_buffer && !INTEGRATOR_STATE_IS_NULL) {
+ const uint32_t render_pixel_index = INTEGRATOR_STATE(path, render_pixel_index);
+ const uint64_t render_buffer_offset = (uint64_t)render_pixel_index *
+ kernel_data.film.pass_stride;
+ ccl_global float *buffer = render_buffer + render_buffer_offset +
+ (kernel_data.film.pass_aov_color + node.z);
+ kernel_write_pass_float3(buffer, make_float3(val.x, val.y, val.z));
}
}
-ccl_device void svm_node_aov_value(
- KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, ccl_global float *buffer)
+ccl_device void svm_node_aov_value(INTEGRATOR_STATE_CONST_ARGS,
+ ShaderData *sd,
+ float *stack,
+ uint4 node,
+ ccl_global float *render_buffer)
{
float val = stack_load_float(stack, node.y);
- if (buffer) {
- kernel_write_pass_float(buffer + kernel_data.film.pass_aov_value + node.z, val);
+ if (render_buffer && !INTEGRATOR_STATE_IS_NULL) {
+ const uint32_t render_pixel_index = INTEGRATOR_STATE(path, render_pixel_index);
+ const uint64_t render_buffer_offset = (uint64_t)render_pixel_index *
+ kernel_data.film.pass_stride;
+ ccl_global float *buffer = render_buffer + render_buffer_offset +
+ (kernel_data.film.pass_aov_value + node.z);
+ kernel_write_pass_float(buffer, val);
}
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_attribute.h b/intern/cycles/kernel/svm/svm_attribute.h
index 62740824ad1..5f94b20af73 100644
--- a/intern/cycles/kernel/svm/svm_attribute.h
+++ b/intern/cycles/kernel/svm/svm_attribute.h
@@ -18,8 +18,11 @@ CCL_NAMESPACE_BEGIN
/* Attribute Node */
-ccl_device AttributeDescriptor svm_node_attr_init(
- KernelGlobals *kg, ShaderData *sd, uint4 node, NodeAttributeOutputType *type, uint *out_offset)
+ccl_device AttributeDescriptor svm_node_attr_init(const KernelGlobals *kg,
+ ShaderData *sd,
+ uint4 node,
+ NodeAttributeOutputType *type,
+ uint *out_offset)
{
*out_offset = node.z;
*type = (NodeAttributeOutputType)node.w;
@@ -44,31 +47,37 @@ ccl_device AttributeDescriptor svm_node_attr_init(
return desc;
}
-ccl_device void svm_node_attr(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
+template<uint node_feature_mask>
+ccl_device_noinline void svm_node_attr(const KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint4 node)
{
NodeAttributeOutputType type = NODE_ATTR_OUTPUT_FLOAT;
uint out_offset = 0;
AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset);
#ifdef __VOLUME__
- /* Volumes
- * NOTE: moving this into its own node type might help improve performance. */
- if (primitive_is_volume_attribute(sd, desc)) {
- const float4 value = volume_attribute_float4(kg, sd, desc);
+ if (KERNEL_NODES_FEATURE(VOLUME)) {
+ /* Volumes
+ * NOTE: moving this into its own node type might help improve performance. */
+ if (primitive_is_volume_attribute(sd, desc)) {
+ const float4 value = volume_attribute_float4(kg, sd, desc);
- if (type == NODE_ATTR_OUTPUT_FLOAT) {
- const float f = volume_attribute_value_to_float(value);
- stack_store_float(stack, out_offset, f);
- }
- else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
- const float3 f = volume_attribute_value_to_float3(value);
- stack_store_float3(stack, out_offset, f);
+ if (type == NODE_ATTR_OUTPUT_FLOAT) {
+ const float f = volume_attribute_value_to_float(value);
+ stack_store_float(stack, out_offset, f);
+ }
+ else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
+ const float3 f = volume_attribute_value_to_float3(value);
+ stack_store_float3(stack, out_offset, f);
+ }
+ else {
+ const float f = volume_attribute_value_to_alpha(value);
+ stack_store_float(stack, out_offset, f);
+ }
+ return;
}
- else {
- const float f = volume_attribute_value_to_alpha(value);
- stack_store_float(stack, out_offset, f);
- }
- return;
}
#endif
@@ -139,7 +148,10 @@ ccl_device void svm_node_attr(KernelGlobals *kg, ShaderData *sd, float *stack, u
}
}
-ccl_device void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
+ccl_device_noinline void svm_node_attr_bump_dx(const KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint4 node)
{
NodeAttributeOutputType type = NODE_ATTR_OUTPUT_FLOAT;
uint out_offset = 0;
@@ -232,7 +244,10 @@ ccl_device void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *
}
}
-ccl_device void svm_node_attr_bump_dy(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
+ccl_device_noinline void svm_node_attr_bump_dy(const KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint4 node)
{
NodeAttributeOutputType type = NODE_ATTR_OUTPUT_FLOAT;
uint out_offset = 0;
diff --git a/intern/cycles/kernel/svm/svm_bevel.h b/intern/cycles/kernel/svm/svm_bevel.h
index bf5957ec9e4..9d7ce202d49 100644
--- a/intern/cycles/kernel/svm/svm_bevel.h
+++ b/intern/cycles/kernel/svm/svm_bevel.h
@@ -14,21 +14,95 @@
* limitations under the License.
*/
+#include "kernel/bvh/bvh.h"
+#include "kernel/kernel_montecarlo.h"
+#include "kernel/kernel_random.h"
+
CCL_NAMESPACE_BEGIN
#ifdef __SHADER_RAYTRACE__
+/* Planar Cubic BSSRDF falloff, reused for bevel.
+ *
+ * This is basically (Rm - x)^3, with some factors to normalize it. For sampling
+ * we integrate 2*pi*x * (Rm - x)^3, which gives us a quintic equation that as
+ * far as I can tell has no closed form solution. So we get an iterative solution
+ * instead with newton-raphson. */
+
+ccl_device float svm_bevel_cubic_eval(const float radius, float r)
+{
+ const float Rm = radius;
+
+ if (r >= Rm)
+ return 0.0f;
+
+ /* integrate (2*pi*r * 10*(R - r)^3)/(pi * R^5) from 0 to R = 1 */
+ const float Rm5 = (Rm * Rm) * (Rm * Rm) * Rm;
+ const float f = Rm - r;
+ const float num = f * f * f;
+
+ return (10.0f * num) / (Rm5 * M_PI_F);
+}
+
+ccl_device float svm_bevel_cubic_pdf(const float radius, float r)
+{
+ return svm_bevel_cubic_eval(radius, r);
+}
+
+/* solve 10x^2 - 20x^3 + 15x^4 - 4x^5 - xi == 0 */
+ccl_device_forceinline float svm_bevel_cubic_quintic_root_find(float xi)
+{
+ /* newton-raphson iteration, usually succeeds in 2-4 iterations, except
+ * outside 0.02 ... 0.98 where it can go up to 10, so overall performance
+ * should not be too bad */
+ const float tolerance = 1e-6f;
+ const int max_iteration_count = 10;
+ float x = 0.25f;
+ int i;
+
+ for (i = 0; i < max_iteration_count; i++) {
+ float x2 = x * x;
+ float x3 = x2 * x;
+ float nx = (1.0f - x);
+
+ float f = 10.0f * x2 - 20.0f * x3 + 15.0f * x2 * x2 - 4.0f * x2 * x3 - xi;
+ float f_ = 20.0f * (x * nx) * (nx * nx);
+
+ if (fabsf(f) < tolerance || f_ == 0.0f)
+ break;
+
+ x = saturate(x - f / f_);
+ }
+
+ return x;
+}
+
+ccl_device void svm_bevel_cubic_sample(const float radius, float xi, float *r, float *h)
+{
+ float Rm = radius;
+ float r_ = svm_bevel_cubic_quintic_root_find(xi);
+
+ r_ *= Rm;
+ *r = r_;
+
+ /* h^2 + r^2 = Rm^2 */
+ *h = safe_sqrtf(Rm * Rm - r_ * r_);
+}
+
/* Bevel shader averaging normals from nearby surfaces.
*
* Sampling strategy from: BSSRDF Importance Sampling, SIGGRAPH 2013
* http://library.imageworks.com/pdfs/imageworks-library-BSSRDF-sampling.pdf
*/
-ccl_device_noinline float3 svm_bevel(KernelGlobals *kg,
- ShaderData *sd,
- ccl_addr_space PathState *state,
- float radius,
- int num_samples)
+# ifdef __KERNEL_OPTIX__
+extern "C" __device__ float3 __direct_callable__svm_node_bevel(INTEGRATOR_STATE_CONST_ARGS,
+# else
+ccl_device float3 svm_bevel(INTEGRATOR_STATE_CONST_ARGS,
+# endif
+ ShaderData *sd,
+ float radius,
+ int num_samples)
{
/* Early out if no sampling needed. */
if (radius <= 0.0f || num_samples < 1 || sd->object == OBJECT_NONE) {
@@ -41,21 +115,27 @@ ccl_device_noinline float3 svm_bevel(KernelGlobals *kg,
}
/* Don't bevel for blurry indirect rays. */
- if (state->min_ray_pdf < 8.0f) {
+ if (INTEGRATOR_STATE(path, min_ray_pdf) < 8.0f) {
return sd->N;
}
/* Setup for multi intersection. */
LocalIntersection isect;
- uint lcg_state = lcg_state_init_addrspace(state, 0x64c6a40e);
+ uint lcg_state = lcg_state_init(INTEGRATOR_STATE(path, rng_hash),
+ INTEGRATOR_STATE(path, rng_offset),
+ INTEGRATOR_STATE(path, sample),
+ 0x64c6a40e);
/* Sample normals from surrounding points on surface. */
float3 sum_N = make_float3(0.0f, 0.0f, 0.0f);
+ /* TODO: support ray-tracing in shadow shader evaluation? */
+ RNGState rng_state;
+ path_state_rng_load(INTEGRATOR_STATE_PASS, &rng_state);
+
for (int sample = 0; sample < num_samples; sample++) {
float disk_u, disk_v;
- path_branched_rng_2D(
- kg, state->rng_hash, state, sample, num_samples, PRNG_BEVEL_U, &disk_u, &disk_v);
+ path_branched_rng_2D(kg, &rng_state, sample, num_samples, PRNG_BEVEL_U, &disk_u, &disk_v);
/* Pick random axis in local frame and point on disk. */
float3 disk_N, disk_T, disk_B;
@@ -97,7 +177,7 @@ ccl_device_noinline float3 svm_bevel(KernelGlobals *kg,
float disk_height;
/* Perhaps find something better than Cubic BSSRDF, but happens to work well. */
- bssrdf_cubic_sample(radius, 0.0f, disk_r, &disk_r, &disk_height);
+ svm_bevel_cubic_sample(radius, disk_r, &disk_r, &disk_height);
float3 disk_P = (disk_r * cosf(phi)) * disk_T + (disk_r * sinf(phi)) * disk_B;
@@ -106,8 +186,8 @@ ccl_device_noinline float3 svm_bevel(KernelGlobals *kg,
ray->P = sd->P + disk_N * disk_height + disk_P;
ray->D = -disk_N;
ray->t = 2.0f * disk_height;
- ray->dP = sd->dP;
- ray->dD = differential3_zero();
+ ray->dP = differential_zero_compact();
+ ray->dD = differential_zero_compact();
ray->time = sd->time;
/* Intersect with the same object. if multiple intersections are found it
@@ -120,14 +200,16 @@ ccl_device_noinline float3 svm_bevel(KernelGlobals *kg,
/* Quickly retrieve P and Ng without setting up ShaderData. */
float3 hit_P;
if (sd->type & PRIMITIVE_TRIANGLE) {
- hit_P = triangle_refine_local(kg, sd, &isect.hits[hit], ray);
+ hit_P = triangle_refine_local(
+ kg, sd, ray->P, ray->D, ray->t, isect.hits[hit].object, isect.hits[hit].prim);
}
# ifdef __OBJECT_MOTION__
else if (sd->type & PRIMITIVE_MOTION_TRIANGLE) {
float3 verts[3];
motion_triangle_vertices(
kg, sd->object, kernel_tex_fetch(__prim_index, isect.hits[hit].prim), sd->time, verts);
- hit_P = motion_triangle_refine_local(kg, sd, &isect.hits[hit], ray, verts);
+ hit_P = motion_triangle_refine_local(
+ kg, sd, ray->P, ray->D, ray->t, isect.hits[hit].object, isect.hits[hit].prim, verts);
}
# endif /* __OBJECT_MOTION__ */
@@ -173,7 +255,7 @@ ccl_device_noinline float3 svm_bevel(KernelGlobals *kg,
/* Multiple importance sample between 3 axes, power heuristic
* found to be slightly better than balance heuristic. pdf_N
- * in the MIS weight and denominator cancelled out. */
+ * in the MIS weight and denominator canceled out. */
float w = pdf_N / (sqr(pdf_N) + sqr(pdf_T) + sqr(pdf_B));
if (isect.num_hits > LOCAL_MAX_HITS) {
w *= isect.num_hits / (float)LOCAL_MAX_HITS;
@@ -183,8 +265,8 @@ ccl_device_noinline float3 svm_bevel(KernelGlobals *kg,
float r = len(hit_P - sd->P);
/* Compute weight. */
- float pdf = bssrdf_cubic_pdf(radius, 0.0f, r);
- float disk_pdf = bssrdf_cubic_pdf(radius, 0.0f, disk_r);
+ float pdf = svm_bevel_cubic_pdf(radius, r);
+ float disk_pdf = svm_bevel_cubic_pdf(radius, disk_r);
w *= pdf / disk_pdf;
@@ -198,19 +280,34 @@ ccl_device_noinline float3 svm_bevel(KernelGlobals *kg,
return is_zero(N) ? sd->N : (sd->flag & SD_BACKFACING) ? -N : N;
}
-ccl_device void svm_node_bevel(
- KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state, float *stack, uint4 node)
+template<uint node_feature_mask>
+# if defined(__KERNEL_OPTIX__)
+ccl_device_inline
+# else
+ccl_device_noinline
+# endif
+ void
+ svm_node_bevel(INTEGRATOR_STATE_CONST_ARGS, ShaderData *sd, float *stack, uint4 node)
{
uint num_samples, radius_offset, normal_offset, out_offset;
svm_unpack_node_uchar4(node.y, &num_samples, &radius_offset, &normal_offset, &out_offset);
float radius = stack_load_float(stack, radius_offset);
- float3 bevel_N = svm_bevel(kg, sd, state, radius, num_samples);
- if (stack_valid(normal_offset)) {
- /* Preserve input normal. */
- float3 ref_N = stack_load_float3(stack, normal_offset);
- bevel_N = normalize(ref_N + (bevel_N - sd->N));
+ float3 bevel_N = sd->N;
+
+ if (KERNEL_NODES_FEATURE(RAYTRACE)) {
+# ifdef __KERNEL_OPTIX__
+ bevel_N = optixDirectCall<float3>(1, INTEGRATOR_STATE_PASS, sd, radius, num_samples);
+# else
+ bevel_N = svm_bevel(INTEGRATOR_STATE_PASS, sd, radius, num_samples);
+# endif
+
+ if (stack_valid(normal_offset)) {
+ /* Preserve input normal. */
+ float3 ref_N = stack_load_float3(stack, normal_offset);
+ bevel_N = normalize(ref_N + (bevel_N - sd->N));
+ }
}
stack_store_float3(stack, out_offset, bevel_N);
diff --git a/intern/cycles/kernel/svm/svm_blackbody.h b/intern/cycles/kernel/svm/svm_blackbody.h
index adfc50d961e..96b3703b954 100644
--- a/intern/cycles/kernel/svm/svm_blackbody.h
+++ b/intern/cycles/kernel/svm/svm_blackbody.h
@@ -34,8 +34,11 @@ CCL_NAMESPACE_BEGIN
/* Blackbody Node */
-ccl_device void svm_node_blackbody(
- KernelGlobals *kg, ShaderData *sd, float *stack, uint temperature_offset, uint col_offset)
+ccl_device_noinline void svm_node_blackbody(const KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint temperature_offset,
+ uint col_offset)
{
/* Input */
float temperature = stack_load_float(stack, temperature_offset);
diff --git a/intern/cycles/kernel/svm/svm_brick.h b/intern/cycles/kernel/svm/svm_brick.h
index 6984afa30a5..dca1b220dd5 100644
--- a/intern/cycles/kernel/svm/svm_brick.h
+++ b/intern/cycles/kernel/svm/svm_brick.h
@@ -72,12 +72,12 @@ ccl_device_noinline_cpu float2 svm_brick(float3 p,
return make_float2(tint, mortar);
}
-ccl_device void svm_node_tex_brick(
- KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
+ccl_device_noinline int svm_node_tex_brick(
+ const KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int offset)
{
- uint4 node2 = read_node(kg, offset);
- uint4 node3 = read_node(kg, offset);
- uint4 node4 = read_node(kg, offset);
+ uint4 node2 = read_node(kg, &offset);
+ uint4 node3 = read_node(kg, &offset);
+ uint4 node4 = read_node(kg, &offset);
/* Input and Output Sockets */
uint co_offset, color1_offset, color2_offset, mortar_offset, scale_offset;
@@ -133,6 +133,7 @@ ccl_device void svm_node_tex_brick(
stack_store_float3(stack, color_offset, color1 * (1.0f - f) + mortar * f);
if (stack_valid(fac_offset))
stack_store_float(stack, fac_offset, f);
+ return offset;
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_brightness.h b/intern/cycles/kernel/svm/svm_brightness.h
index 9554b5946fb..2ed812acd71 100644
--- a/intern/cycles/kernel/svm/svm_brightness.h
+++ b/intern/cycles/kernel/svm/svm_brightness.h
@@ -16,7 +16,7 @@
CCL_NAMESPACE_BEGIN
-ccl_device void svm_node_brightness(
+ccl_device_noinline void svm_node_brightness(
ShaderData *sd, float *stack, uint in_color, uint out_color, uint node)
{
uint bright_offset, contrast_offset;
diff --git a/intern/cycles/kernel/svm/svm_bump.h b/intern/cycles/kernel/svm/svm_bump.h
index c9d430a2bba..8672839dbab 100644
--- a/intern/cycles/kernel/svm/svm_bump.h
+++ b/intern/cycles/kernel/svm/svm_bump.h
@@ -18,10 +18,10 @@ CCL_NAMESPACE_BEGIN
/* Bump Eval Nodes */
-ccl_device void svm_node_enter_bump_eval(KernelGlobals *kg,
- ShaderData *sd,
- float *stack,
- uint offset)
+ccl_device_noinline void svm_node_enter_bump_eval(const KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint offset)
{
/* save state */
stack_store_float3(stack, offset + 0, sd->P);
@@ -45,10 +45,10 @@ ccl_device void svm_node_enter_bump_eval(KernelGlobals *kg,
}
}
-ccl_device void svm_node_leave_bump_eval(KernelGlobals *kg,
- ShaderData *sd,
- float *stack,
- uint offset)
+ccl_device_noinline void svm_node_leave_bump_eval(const KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint offset)
{
/* restore state */
sd->P = stack_load_float3(stack, offset + 0);
diff --git a/intern/cycles/kernel/svm/svm_camera.h b/intern/cycles/kernel/svm/svm_camera.h
index 21a17acf5f1..40c0edcdad0 100644
--- a/intern/cycles/kernel/svm/svm_camera.h
+++ b/intern/cycles/kernel/svm/svm_camera.h
@@ -16,12 +16,12 @@
CCL_NAMESPACE_BEGIN
-ccl_device void svm_node_camera(KernelGlobals *kg,
- ShaderData *sd,
- float *stack,
- uint out_vector,
- uint out_zdepth,
- uint out_distance)
+ccl_device_noinline void svm_node_camera(const KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint out_vector,
+ uint out_zdepth,
+ uint out_distance)
{
float distance;
float zdepth;
diff --git a/intern/cycles/kernel/svm/svm_checker.h b/intern/cycles/kernel/svm/svm_checker.h
index d54cb73df91..a9919c9ddc9 100644
--- a/intern/cycles/kernel/svm/svm_checker.h
+++ b/intern/cycles/kernel/svm/svm_checker.h
@@ -32,7 +32,10 @@ ccl_device float svm_checker(float3 p)
return ((xi % 2 == yi % 2) == (zi % 2)) ? 1.0f : 0.0f;
}
-ccl_device void svm_node_tex_checker(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
+ccl_device_noinline void svm_node_tex_checker(const KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint4 node)
{
uint co_offset, color1_offset, color2_offset, scale_offset;
uint color_offset, fac_offset;
diff --git a/intern/cycles/kernel/svm/svm_clamp.h b/intern/cycles/kernel/svm/svm_clamp.h
index a85fd82754e..656bd31c085 100644
--- a/intern/cycles/kernel/svm/svm_clamp.h
+++ b/intern/cycles/kernel/svm/svm_clamp.h
@@ -18,18 +18,18 @@ CCL_NAMESPACE_BEGIN
/* Clamp Node */
-ccl_device void svm_node_clamp(KernelGlobals *kg,
- ShaderData *sd,
- float *stack,
- uint value_stack_offset,
- uint parameters_stack_offsets,
- uint result_stack_offset,
- int *offset)
+ccl_device_noinline int svm_node_clamp(const KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint value_stack_offset,
+ uint parameters_stack_offsets,
+ uint result_stack_offset,
+ int offset)
{
uint min_stack_offset, max_stack_offset, type;
svm_unpack_node_uchar3(parameters_stack_offsets, &min_stack_offset, &max_stack_offset, &type);
- uint4 defaults = read_node(kg, offset);
+ uint4 defaults = read_node(kg, &offset);
float value = stack_load_float(stack, value_stack_offset);
float min = stack_load_float_default(stack, min_stack_offset, defaults.x);
@@ -41,6 +41,7 @@ ccl_device void svm_node_clamp(KernelGlobals *kg,
else {
stack_store_float(stack, result_stack_offset, clamp(value, min, max));
}
+ return offset;
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_closure.h b/intern/cycles/kernel/svm/svm_closure.h
index bbe8d72edf0..e2f6dde4ace 100644
--- a/intern/cycles/kernel/svm/svm_closure.h
+++ b/intern/cycles/kernel/svm/svm_closure.h
@@ -57,13 +57,9 @@ ccl_device void svm_node_glass_setup(
}
}
-ccl_device void svm_node_closure_bsdf(KernelGlobals *kg,
- ShaderData *sd,
- float *stack,
- uint4 node,
- ShaderType shader_type,
- int path_flag,
- int *offset)
+template<uint node_feature_mask, ShaderType shader_type>
+ccl_device_noinline int svm_node_closure_bsdf(
+ const KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int path_flag, int offset)
{
uint type, param1_offset, param2_offset;
@@ -73,19 +69,19 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg,
1.0f);
/* note we read this extra node before weight check, so offset is added */
- uint4 data_node = read_node(kg, offset);
+ uint4 data_node = read_node(kg, &offset);
/* Only compute BSDF for surfaces, transparent variable is shared with volume extinction. */
- if (mix_weight == 0.0f || shader_type != SHADER_TYPE_SURFACE) {
+ if ((!KERNEL_NODES_FEATURE(BSDF) || shader_type != SHADER_TYPE_SURFACE) || mix_weight == 0.0f) {
if (type == CLOSURE_BSDF_PRINCIPLED_ID) {
/* Read all principled BSDF extra data to get the right offset. */
- read_node(kg, offset);
- read_node(kg, offset);
- read_node(kg, offset);
- read_node(kg, offset);
+ read_node(kg, &offset);
+ read_node(kg, &offset);
+ read_node(kg, &offset);
+ read_node(kg, &offset);
}
- return;
+ return offset;
}
float3 N = stack_valid(data_node.x) ? stack_load_float3(stack, data_node.x) : sd->N;
@@ -102,7 +98,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg,
sheen_offset, sheen_tint_offset, clearcoat_offset, clearcoat_roughness_offset,
eta_offset, transmission_offset, anisotropic_rotation_offset,
transmission_roughness_offset;
- uint4 data_node2 = read_node(kg, offset);
+ uint4 data_node2 = read_node(kg, &offset);
float3 T = stack_load_float3(stack, data_node.y);
svm_unpack_node_uchar4(data_node.z,
@@ -158,7 +154,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg,
float specular_weight = (1.0f - final_transmission);
// get the base color
- uint4 data_base_color = read_node(kg, offset);
+ uint4 data_base_color = read_node(kg, &offset);
float3 base_color = stack_valid(data_base_color.x) ?
stack_load_float3(stack, data_base_color.x) :
make_float3(__uint_as_float(data_base_color.y),
@@ -166,16 +162,21 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg,
__uint_as_float(data_base_color.w));
// get the additional clearcoat normal and subsurface scattering radius
- uint4 data_cn_ssr = read_node(kg, offset);
+ uint4 data_cn_ssr = read_node(kg, &offset);
float3 clearcoat_normal = stack_valid(data_cn_ssr.x) ?
stack_load_float3(stack, data_cn_ssr.x) :
sd->N;
float3 subsurface_radius = stack_valid(data_cn_ssr.y) ?
stack_load_float3(stack, data_cn_ssr.y) :
make_float3(1.0f, 1.0f, 1.0f);
+ float subsurface_ior = stack_valid(data_cn_ssr.z) ? stack_load_float(stack, data_cn_ssr.z) :
+ 1.4f;
+ float subsurface_anisotropy = stack_valid(data_cn_ssr.w) ?
+ stack_load_float(stack, data_cn_ssr.w) :
+ 0.0f;
// get the subsurface color
- uint4 data_subsurface_color = read_node(kg, offset);
+ uint4 data_subsurface_color = read_node(kg, &offset);
float3 subsurface_color = stack_valid(data_subsurface_color.x) ?
stack_load_float3(stack, data_subsurface_color.x) :
make_float3(__uint_as_float(data_subsurface_color.y),
@@ -222,16 +223,16 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg,
if (bssrdf) {
bssrdf->radius = subsurface_radius * subsurface;
- bssrdf->albedo = (subsurface_method == CLOSURE_BSSRDF_PRINCIPLED_ID) ?
- subsurface_color :
- mixed_ss_base_color;
- bssrdf->texture_blur = 0.0f;
- bssrdf->sharpness = 0.0f;
+ bssrdf->albedo = mixed_ss_base_color;
bssrdf->N = N;
bssrdf->roughness = roughness;
+ /* Clamps protecting against bad/extreme and non physical values. */
+ subsurface_ior = clamp(subsurface_ior, 1.01f, 3.8f);
+ bssrdf->anisotropy = clamp(subsurface_anisotropy, 0.0f, 0.9f);
+
/* setup bsdf */
- sd->flag |= bssrdf_setup(sd, bssrdf, subsurface_method);
+ sd->flag |= bssrdf_setup(sd, bssrdf, subsurface_method, subsurface_ior);
}
}
}
@@ -733,9 +734,9 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg,
}
#ifdef __HAIR__
case CLOSURE_BSDF_HAIR_PRINCIPLED_ID: {
- uint4 data_node2 = read_node(kg, offset);
- uint4 data_node3 = read_node(kg, offset);
- uint4 data_node4 = read_node(kg, offset);
+ uint4 data_node2 = read_node(kg, &offset);
+ uint4 data_node3 = read_node(kg, &offset);
+ uint4 data_node4 = read_node(kg, &offset);
float3 weight = sd->svm_closure_weight * mix_weight;
@@ -878,10 +879,8 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg,
#endif /* __HAIR__ */
#ifdef __SUBSURFACE__
- case CLOSURE_BSSRDF_CUBIC_ID:
- case CLOSURE_BSSRDF_GAUSSIAN_ID:
- case CLOSURE_BSSRDF_BURLEY_ID:
- case CLOSURE_BSSRDF_RANDOM_WALK_ID: {
+ case CLOSURE_BSSRDF_RANDOM_WALK_ID:
+ case CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID: {
float3 weight = sd->svm_closure_weight * mix_weight;
Bssrdf *bssrdf = bssrdf_alloc(sd, weight);
@@ -894,11 +893,14 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg,
bssrdf->radius = stack_load_float3(stack, data_node.z) * param1;
bssrdf->albedo = sd->svm_closure_weight;
- bssrdf->texture_blur = param2;
- bssrdf->sharpness = stack_load_float(stack, data_node.w);
bssrdf->N = N;
- bssrdf->roughness = 0.0f;
- sd->flag |= bssrdf_setup(sd, bssrdf, (ClosureType)type);
+ bssrdf->roughness = FLT_MAX;
+
+ const float subsurface_ior = clamp(param2, 1.01f, 3.8f);
+ const float subsurface_anisotropy = stack_load_float(stack, data_node.w);
+ bssrdf->anisotropy = clamp(subsurface_anisotropy, 0.0f, 0.9f);
+
+ sd->flag |= bssrdf_setup(sd, bssrdf, (ClosureType)type, subsurface_ior);
}
break;
@@ -907,10 +909,15 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg,
default:
break;
}
+
+ return offset;
}
-ccl_device void svm_node_closure_volume(
- KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, ShaderType shader_type)
+template<ShaderType shader_type>
+ccl_device_noinline void svm_node_closure_volume(const KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint4 node)
{
#ifdef __VOLUME__
/* Only sum extinction for volumes, variable is shared with surface transparency. */
@@ -961,21 +968,17 @@ ccl_device void svm_node_closure_volume(
#endif
}
-ccl_device void svm_node_principled_volume(KernelGlobals *kg,
- ShaderData *sd,
- float *stack,
- uint4 node,
- ShaderType shader_type,
- int path_flag,
- int *offset)
+template<ShaderType shader_type>
+ccl_device_noinline int svm_node_principled_volume(
+ const KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int path_flag, int offset)
{
#ifdef __VOLUME__
- uint4 value_node = read_node(kg, offset);
- uint4 attr_node = read_node(kg, offset);
+ uint4 value_node = read_node(kg, &offset);
+ uint4 attr_node = read_node(kg, &offset);
/* Only sum extinction for volumes, variable is shared with surface transparency. */
if (shader_type != SHADER_TYPE_VOLUME) {
- return;
+ return offset;
}
uint density_offset, anisotropy_offset, absorption_color_offset, mix_weight_offset;
@@ -985,7 +988,7 @@ ccl_device void svm_node_principled_volume(KernelGlobals *kg,
1.0f);
if (mix_weight == 0.0f) {
- return;
+ return offset;
}
/* Compute density. */
@@ -1034,7 +1037,7 @@ ccl_device void svm_node_principled_volume(KernelGlobals *kg,
/* Compute emission. */
if (path_flag & PATH_RAY_SHADOW) {
/* Don't need emission for shadows. */
- return;
+ return offset;
}
uint emission_offset, emission_color_offset, blackbody_offset, temperature_offset;
@@ -1074,9 +1077,10 @@ ccl_device void svm_node_principled_volume(KernelGlobals *kg,
}
}
#endif
+ return offset;
}
-ccl_device void svm_node_closure_emission(ShaderData *sd, float *stack, uint4 node)
+ccl_device_noinline void svm_node_closure_emission(ShaderData *sd, float *stack, uint4 node)
{
uint mix_weight_offset = node.y;
float3 weight = sd->svm_closure_weight;
@@ -1093,7 +1097,7 @@ ccl_device void svm_node_closure_emission(ShaderData *sd, float *stack, uint4 no
emission_setup(sd, weight);
}
-ccl_device void svm_node_closure_background(ShaderData *sd, float *stack, uint4 node)
+ccl_device_noinline void svm_node_closure_background(ShaderData *sd, float *stack, uint4 node)
{
uint mix_weight_offset = node.y;
float3 weight = sd->svm_closure_weight;
@@ -1110,7 +1114,7 @@ ccl_device void svm_node_closure_background(ShaderData *sd, float *stack, uint4
background_setup(sd, weight);
}
-ccl_device void svm_node_closure_holdout(ShaderData *sd, float *stack, uint4 node)
+ccl_device_noinline void svm_node_closure_holdout(ShaderData *sd, float *stack, uint4 node)
{
uint mix_weight_offset = node.y;
@@ -1145,14 +1149,13 @@ ccl_device void svm_node_closure_set_weight(ShaderData *sd, uint r, uint g, uint
ccl_device void svm_node_closure_weight(ShaderData *sd, float *stack, uint weight_offset)
{
float3 weight = stack_load_float3(stack, weight_offset);
-
svm_node_closure_store_weight(sd, weight);
}
-ccl_device void svm_node_emission_weight(KernelGlobals *kg,
- ShaderData *sd,
- float *stack,
- uint4 node)
+ccl_device_noinline void svm_node_emission_weight(const KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint4 node)
{
uint color_offset = node.y;
uint strength_offset = node.z;
@@ -1163,7 +1166,7 @@ ccl_device void svm_node_emission_weight(KernelGlobals *kg,
svm_node_closure_store_weight(sd, weight);
}
-ccl_device void svm_node_mix_closure(ShaderData *sd, float *stack, uint4 node)
+ccl_device_noinline void svm_node_mix_closure(ShaderData *sd, float *stack, uint4 node)
{
/* fetch weight from blend input, previous mix closures,
* and write to stack to be used by closure nodes later */
@@ -1186,7 +1189,7 @@ ccl_device void svm_node_mix_closure(ShaderData *sd, float *stack, uint4 node)
/* (Bump) normal */
ccl_device void svm_node_set_normal(
- KernelGlobals *kg, ShaderData *sd, float *stack, uint in_direction, uint out_normal)
+ const KernelGlobals *kg, ShaderData *sd, float *stack, uint in_direction, uint out_normal)
{
float3 normal = stack_load_float3(stack, in_direction);
sd->N = normal;
diff --git a/intern/cycles/kernel/svm/svm_convert.h b/intern/cycles/kernel/svm/svm_convert.h
index 5df6c9fb755..37d40167ccc 100644
--- a/intern/cycles/kernel/svm/svm_convert.h
+++ b/intern/cycles/kernel/svm/svm_convert.h
@@ -18,8 +18,8 @@ CCL_NAMESPACE_BEGIN
/* Conversion Nodes */
-ccl_device void svm_node_convert(
- KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint from, uint to)
+ccl_device_noinline void svm_node_convert(
+ const KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint from, uint to)
{
switch (type) {
case NODE_CONVERT_FI: {
diff --git a/intern/cycles/kernel/svm/svm_displace.h b/intern/cycles/kernel/svm/svm_displace.h
index 250fac6bcb8..a1d952173d8 100644
--- a/intern/cycles/kernel/svm/svm_displace.h
+++ b/intern/cycles/kernel/svm/svm_displace.h
@@ -14,11 +14,16 @@
* limitations under the License.
*/
+#include "kernel/kernel_montecarlo.h"
+
CCL_NAMESPACE_BEGIN
/* Bump Node */
-ccl_device void svm_node_set_bump(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
+ccl_device_noinline void svm_node_set_bump(const KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint4 node)
{
#ifdef __RAY_DIFFERENTIALS__
/* get normal input */
@@ -83,7 +88,7 @@ ccl_device void svm_node_set_bump(KernelGlobals *kg, ShaderData *sd, float *stac
/* Displacement Node */
-ccl_device void svm_node_set_displacement(KernelGlobals *kg,
+ccl_device void svm_node_set_displacement(const KernelGlobals *kg,
ShaderData *sd,
float *stack,
uint fac_offset)
@@ -92,7 +97,10 @@ ccl_device void svm_node_set_displacement(KernelGlobals *kg,
sd->P += dP;
}
-ccl_device void svm_node_displacement(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
+ccl_device_noinline void svm_node_displacement(const KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint4 node)
{
uint height_offset, midlevel_offset, scale_offset, normal_offset;
svm_unpack_node_uchar4(node.y, &height_offset, &midlevel_offset, &scale_offset, &normal_offset);
@@ -119,10 +127,10 @@ ccl_device void svm_node_displacement(KernelGlobals *kg, ShaderData *sd, float *
stack_store_float3(stack, node.z, dP);
}
-ccl_device void svm_node_vector_displacement(
- KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
+ccl_device_noinline int svm_node_vector_displacement(
+ const KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int offset)
{
- uint4 data_node = read_node(kg, offset);
+ uint4 data_node = read_node(kg, &offset);
uint space = data_node.x;
uint vector_offset, midlevel_offset, scale_offset, displacement_offset;
@@ -164,6 +172,7 @@ ccl_device void svm_node_vector_displacement(
}
stack_store_float3(stack, displacement_offset, dP);
+ return offset;
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_fresnel.h b/intern/cycles/kernel/svm/svm_fresnel.h
index 96d602e35bf..b5ecdbe2abf 100644
--- a/intern/cycles/kernel/svm/svm_fresnel.h
+++ b/intern/cycles/kernel/svm/svm_fresnel.h
@@ -18,7 +18,7 @@ CCL_NAMESPACE_BEGIN
/* Fresnel Node */
-ccl_device void svm_node_fresnel(
+ccl_device_noinline void svm_node_fresnel(
ShaderData *sd, float *stack, uint ior_offset, uint ior_value, uint node)
{
uint normal_offset, out_offset;
@@ -37,7 +37,7 @@ ccl_device void svm_node_fresnel(
/* Layer Weight Node */
-ccl_device void svm_node_layer_weight(ShaderData *sd, float *stack, uint4 node)
+ccl_device_noinline void svm_node_layer_weight(ShaderData *sd, float *stack, uint4 node)
{
uint blend_offset = node.y;
uint blend_value = node.z;
diff --git a/intern/cycles/kernel/svm/svm_gamma.h b/intern/cycles/kernel/svm/svm_gamma.h
index 65eb08eb0eb..f6fafdee941 100644
--- a/intern/cycles/kernel/svm/svm_gamma.h
+++ b/intern/cycles/kernel/svm/svm_gamma.h
@@ -16,7 +16,7 @@
CCL_NAMESPACE_BEGIN
-ccl_device void svm_node_gamma(
+ccl_device_noinline void svm_node_gamma(
ShaderData *sd, float *stack, uint in_gamma, uint in_color, uint out_color)
{
float3 color = stack_load_float3(stack, in_color);
diff --git a/intern/cycles/kernel/svm/svm_geometry.h b/intern/cycles/kernel/svm/svm_geometry.h
index e48e96dcfa4..10e9f291d0e 100644
--- a/intern/cycles/kernel/svm/svm_geometry.h
+++ b/intern/cycles/kernel/svm/svm_geometry.h
@@ -18,8 +18,8 @@ CCL_NAMESPACE_BEGIN
/* Geometry Node */
-ccl_device_inline void svm_node_geometry(
- KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset)
+ccl_device_noinline void svm_node_geometry(
+ const KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset)
{
float3 data;
@@ -51,8 +51,8 @@ ccl_device_inline void svm_node_geometry(
stack_store_float3(stack, out_offset, data);
}
-ccl_device void svm_node_geometry_bump_dx(
- KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset)
+ccl_device_noinline void svm_node_geometry_bump_dx(
+ const KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset)
{
#ifdef __RAY_DIFFERENTIALS__
float3 data;
@@ -75,8 +75,8 @@ ccl_device void svm_node_geometry_bump_dx(
#endif
}
-ccl_device void svm_node_geometry_bump_dy(
- KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset)
+ccl_device_noinline void svm_node_geometry_bump_dy(
+ const KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset)
{
#ifdef __RAY_DIFFERENTIALS__
float3 data;
@@ -101,8 +101,8 @@ ccl_device void svm_node_geometry_bump_dy(
/* Object Info */
-ccl_device void svm_node_object_info(
- KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset)
+ccl_device_noinline void svm_node_object_info(
+ const KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset)
{
float data;
@@ -140,8 +140,8 @@ ccl_device void svm_node_object_info(
/* Particle Info */
-ccl_device void svm_node_particle_info(
- KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset)
+ccl_device_noinline void svm_node_particle_info(
+ const KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset)
{
switch (type) {
case NODE_INFO_PAR_INDEX: {
@@ -199,8 +199,8 @@ ccl_device void svm_node_particle_info(
/* Hair Info */
-ccl_device void svm_node_hair_info(
- KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset)
+ccl_device_noinline void svm_node_hair_info(
+ const KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset)
{
float data;
float3 data3;
diff --git a/intern/cycles/kernel/svm/svm_gradient.h b/intern/cycles/kernel/svm/svm_gradient.h
index 08304bc47e8..cd15f7097e7 100644
--- a/intern/cycles/kernel/svm/svm_gradient.h
+++ b/intern/cycles/kernel/svm/svm_gradient.h
@@ -60,7 +60,7 @@ ccl_device float svm_gradient(float3 p, NodeGradientType type)
return 0.0f;
}
-ccl_device void svm_node_tex_gradient(ShaderData *sd, float *stack, uint4 node)
+ccl_device_noinline void svm_node_tex_gradient(ShaderData *sd, float *stack, uint4 node)
{
uint type, co_offset, color_offset, fac_offset;
diff --git a/intern/cycles/kernel/svm/svm_hsv.h b/intern/cycles/kernel/svm/svm_hsv.h
index c299cf58c7f..6f49a8385aa 100644
--- a/intern/cycles/kernel/svm/svm_hsv.h
+++ b/intern/cycles/kernel/svm/svm_hsv.h
@@ -19,8 +19,10 @@
CCL_NAMESPACE_BEGIN
-ccl_device void svm_node_hsv(
- KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
+ccl_device_noinline void svm_node_hsv(const KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint4 node)
{
uint in_color_offset, fac_offset, out_color_offset;
uint hue_offset, sat_offset, val_offset;
diff --git a/intern/cycles/kernel/svm/svm_ies.h b/intern/cycles/kernel/svm/svm_ies.h
index 56c804b44d0..9c13734ecf0 100644
--- a/intern/cycles/kernel/svm/svm_ies.h
+++ b/intern/cycles/kernel/svm/svm_ies.h
@@ -19,7 +19,7 @@ CCL_NAMESPACE_BEGIN
/* IES Light */
ccl_device_inline float interpolate_ies_vertical(
- KernelGlobals *kg, int ofs, int v, int v_num, float v_frac, int h)
+ const KernelGlobals *kg, int ofs, int v, int v_num, float v_frac, int h)
{
/* Since lookups are performed in spherical coordinates, clamping the coordinates at the low end
* of v (corresponding to the north pole) would result in artifacts. The proper way of dealing
@@ -39,7 +39,7 @@ ccl_device_inline float interpolate_ies_vertical(
return cubic_interp(a, b, c, d, v_frac);
}
-ccl_device_inline float kernel_ies_interp(KernelGlobals *kg,
+ccl_device_inline float kernel_ies_interp(const KernelGlobals *kg,
int slot,
float h_angle,
float v_angle)
@@ -98,8 +98,10 @@ ccl_device_inline float kernel_ies_interp(KernelGlobals *kg,
return max(cubic_interp(a, b, c, d, h_frac), 0.0f);
}
-ccl_device void svm_node_ies(
- KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
+ccl_device_noinline void svm_node_ies(const KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint4 node)
{
uint vector_offset, strength_offset, fac_offset, slot = node.z;
svm_unpack_node_uchar3(node.y, &strength_offset, &vector_offset, &fac_offset);
diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h
index 9348ddabde5..a344f36977a 100644
--- a/intern/cycles/kernel/svm/svm_image.h
+++ b/intern/cycles/kernel/svm/svm_image.h
@@ -16,7 +16,7 @@
CCL_NAMESPACE_BEGIN
-ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, uint flags)
+ccl_device float4 svm_image_texture(const KernelGlobals *kg, int id, float x, float y, uint flags)
{
if (id == -1) {
return make_float4(
@@ -44,8 +44,8 @@ ccl_device_inline float3 texco_remap_square(float3 co)
return (co - make_float3(0.5f, 0.5f, 0.5f)) * 2.0f;
}
-ccl_device void svm_node_tex_image(
- KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
+ccl_device_noinline int svm_node_tex_image(
+ const KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int offset)
{
uint co_offset, out_offset, alpha_offset, flags;
@@ -71,7 +71,7 @@ ccl_device void svm_node_tex_image(
int num_nodes = (int)node.y;
if (num_nodes > 0) {
/* Remember the offset of the node following the tile nodes. */
- int next_offset = (*offset) + num_nodes;
+ int next_offset = offset + num_nodes;
/* Find the tile that the UV lies in. */
int tx = (int)tex_co.x;
@@ -83,7 +83,7 @@ ccl_device void svm_node_tex_image(
/* Find the index of the tile. */
for (int i = 0; i < num_nodes; i++) {
- uint4 tile_node = read_node(kg, offset);
+ uint4 tile_node = read_node(kg, &offset);
if (tile_node.x == tile) {
id = tile_node.y;
break;
@@ -102,7 +102,7 @@ ccl_device void svm_node_tex_image(
}
/* Skip over the remaining nodes. */
- *offset = next_offset;
+ offset = next_offset;
}
else {
id = -num_nodes;
@@ -114,9 +114,13 @@ ccl_device void svm_node_tex_image(
stack_store_float3(stack, out_offset, make_float3(f.x, f.y, f.z));
if (stack_valid(alpha_offset))
stack_store_float(stack, alpha_offset, f.w);
+ return offset;
}
-ccl_device void svm_node_tex_image_box(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
+ccl_device_noinline void svm_node_tex_image_box(const KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint4 node)
{
/* get object space normal */
float3 N = sd->N;
@@ -215,10 +219,10 @@ ccl_device void svm_node_tex_image_box(KernelGlobals *kg, ShaderData *sd, float
stack_store_float(stack, alpha_offset, f.w);
}
-ccl_device void svm_node_tex_environment(KernelGlobals *kg,
- ShaderData *sd,
- float *stack,
- uint4 node)
+ccl_device_noinline void svm_node_tex_environment(const KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint4 node)
{
uint id = node.y;
uint co_offset, out_offset, alpha_offset, flags;
diff --git a/intern/cycles/kernel/svm/svm_invert.h b/intern/cycles/kernel/svm/svm_invert.h
index 02024742b13..27cdaaff473 100644
--- a/intern/cycles/kernel/svm/svm_invert.h
+++ b/intern/cycles/kernel/svm/svm_invert.h
@@ -21,7 +21,7 @@ ccl_device float invert(float color, float factor)
return factor * (1.0f - color) + (1.0f - factor) * color;
}
-ccl_device void svm_node_invert(
+ccl_device_noinline void svm_node_invert(
ShaderData *sd, float *stack, uint in_fac, uint in_color, uint out_color)
{
float factor = stack_load_float(stack, in_fac);
diff --git a/intern/cycles/kernel/svm/svm_light_path.h b/intern/cycles/kernel/svm/svm_light_path.h
index 768c65918cd..49fabad1cc5 100644
--- a/intern/cycles/kernel/svm/svm_light_path.h
+++ b/intern/cycles/kernel/svm/svm_light_path.h
@@ -18,12 +18,12 @@ CCL_NAMESPACE_BEGIN
/* Light Path Node */
-ccl_device void svm_node_light_path(ShaderData *sd,
- ccl_addr_space PathState *state,
- float *stack,
- uint type,
- uint out_offset,
- int path_flag)
+ccl_device_noinline void svm_node_light_path(INTEGRATOR_STATE_CONST_ARGS,
+ const ShaderData *sd,
+ float *stack,
+ uint type,
+ uint out_offset,
+ int path_flag)
{
float info = 0.0f;
@@ -58,21 +58,47 @@ ccl_device void svm_node_light_path(ShaderData *sd,
case NODE_LP_ray_length:
info = sd->ray_length;
break;
- case NODE_LP_ray_depth:
- info = (float)state->bounce;
+ case NODE_LP_ray_depth: {
+ /* Read bounce from difference location depending if this is a shadow
+ * path. It's a bit dubious to have integrate state details leak into
+ * this function but hard to avoid currently. */
+ int bounce = (INTEGRATOR_STATE_IS_NULL) ? 0 :
+ (path_flag & PATH_RAY_SHADOW) ? INTEGRATOR_STATE(shadow_path, bounce) :
+ INTEGRATOR_STATE(path, bounce);
+
+ /* For background, light emission and shadow evaluation we from a
+ * surface or volume we are effective one bounce further. */
+ if (path_flag & (PATH_RAY_SHADOW | PATH_RAY_EMISSION)) {
+ bounce++;
+ }
+
+ info = (float)bounce;
break;
+ }
+ /* TODO */
+ case NODE_LP_ray_transparent: {
+ const int bounce = (INTEGRATOR_STATE_IS_NULL) ?
+ 0 :
+ (path_flag & PATH_RAY_SHADOW) ?
+ INTEGRATOR_STATE(shadow_path, transparent_bounce) :
+ INTEGRATOR_STATE(path, transparent_bounce);
+
+ info = (float)bounce;
+ break;
+ }
+#if 0
case NODE_LP_ray_diffuse:
info = (float)state->diffuse_bounce;
break;
case NODE_LP_ray_glossy:
info = (float)state->glossy_bounce;
break;
- case NODE_LP_ray_transparent:
- info = (float)state->transparent_bounce;
- break;
+#endif
+#if 0
case NODE_LP_ray_transmission:
info = (float)state->transmission_bounce;
break;
+#endif
}
stack_store_float(stack, out_offset, info);
@@ -80,7 +106,7 @@ ccl_device void svm_node_light_path(ShaderData *sd,
/* Light Falloff Node */
-ccl_device void svm_node_light_falloff(ShaderData *sd, float *stack, uint4 node)
+ccl_device_noinline void svm_node_light_falloff(ShaderData *sd, float *stack, uint4 node)
{
uint strength_offset, out_offset, smooth_offset;
diff --git a/intern/cycles/kernel/svm/svm_magic.h b/intern/cycles/kernel/svm/svm_magic.h
index 9c160e6d8cc..8784c760860 100644
--- a/intern/cycles/kernel/svm/svm_magic.h
+++ b/intern/cycles/kernel/svm/svm_magic.h
@@ -87,8 +87,8 @@ ccl_device_noinline_cpu float3 svm_magic(float3 p, int n, float distortion)
return make_float3(0.5f - x, 0.5f - y, 0.5f - z);
}
-ccl_device void svm_node_tex_magic(
- KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
+ccl_device_noinline int svm_node_tex_magic(
+ const KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int offset)
{
uint depth;
uint scale_offset, distortion_offset, co_offset, fac_offset, color_offset;
@@ -96,7 +96,7 @@ ccl_device void svm_node_tex_magic(
svm_unpack_node_uchar3(node.y, &depth, &color_offset, &fac_offset);
svm_unpack_node_uchar3(node.z, &co_offset, &scale_offset, &distortion_offset);
- uint4 node2 = read_node(kg, offset);
+ uint4 node2 = read_node(kg, &offset);
float3 co = stack_load_float3(stack, co_offset);
float scale = stack_load_float_default(stack, scale_offset, node2.x);
float distortion = stack_load_float_default(stack, distortion_offset, node2.y);
@@ -107,6 +107,7 @@ ccl_device void svm_node_tex_magic(
stack_store_float(stack, fac_offset, average(color));
if (stack_valid(color_offset))
stack_store_float3(stack, color_offset, color);
+ return offset;
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_map_range.h b/intern/cycles/kernel/svm/svm_map_range.h
index 533a631c837..c8684981e31 100644
--- a/intern/cycles/kernel/svm/svm_map_range.h
+++ b/intern/cycles/kernel/svm/svm_map_range.h
@@ -24,13 +24,13 @@ ccl_device_inline float smootherstep(float edge0, float edge1, float x)
return x * x * x * (x * (x * 6.0f - 15.0f) + 10.0f);
}
-ccl_device void svm_node_map_range(KernelGlobals *kg,
- ShaderData *sd,
- float *stack,
- uint value_stack_offset,
- uint parameters_stack_offsets,
- uint results_stack_offsets,
- int *offset)
+ccl_device_noinline int svm_node_map_range(const KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint value_stack_offset,
+ uint parameters_stack_offsets,
+ uint results_stack_offsets,
+ int offset)
{
uint from_min_stack_offset, from_max_stack_offset, to_min_stack_offset, to_max_stack_offset;
uint type_stack_offset, steps_stack_offset, result_stack_offset;
@@ -42,8 +42,8 @@ ccl_device void svm_node_map_range(KernelGlobals *kg,
svm_unpack_node_uchar3(
results_stack_offsets, &type_stack_offset, &steps_stack_offset, &result_stack_offset);
- uint4 defaults = read_node(kg, offset);
- uint4 defaults2 = read_node(kg, offset);
+ uint4 defaults = read_node(kg, &offset);
+ uint4 defaults2 = read_node(kg, &offset);
float value = stack_load_float(stack, value_stack_offset);
float from_min = stack_load_float_default(stack, from_min_stack_offset, defaults.x);
@@ -83,6 +83,7 @@ ccl_device void svm_node_map_range(KernelGlobals *kg,
result = 0.0f;
}
stack_store_float(stack, result_stack_offset, result);
+ return offset;
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_mapping.h b/intern/cycles/kernel/svm/svm_mapping.h
index 6e19c859e19..fcc724405f5 100644
--- a/intern/cycles/kernel/svm/svm_mapping.h
+++ b/intern/cycles/kernel/svm/svm_mapping.h
@@ -18,13 +18,12 @@ CCL_NAMESPACE_BEGIN
/* Mapping Node */
-ccl_device void svm_node_mapping(KernelGlobals *kg,
- ShaderData *sd,
- float *stack,
- uint type,
- uint inputs_stack_offsets,
- uint result_stack_offset,
- int *offset)
+ccl_device_noinline void svm_node_mapping(const KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint type,
+ uint inputs_stack_offsets,
+ uint result_stack_offset)
{
uint vector_stack_offset, location_stack_offset, rotation_stack_offset, scale_stack_offset;
svm_unpack_node_uchar4(inputs_stack_offsets,
@@ -44,30 +43,40 @@ ccl_device void svm_node_mapping(KernelGlobals *kg,
/* Texture Mapping */
-ccl_device void svm_node_texture_mapping(
- KernelGlobals *kg, ShaderData *sd, float *stack, uint vec_offset, uint out_offset, int *offset)
+ccl_device_noinline int svm_node_texture_mapping(const KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint vec_offset,
+ uint out_offset,
+ int offset)
{
float3 v = stack_load_float3(stack, vec_offset);
Transform tfm;
- tfm.x = read_node_float(kg, offset);
- tfm.y = read_node_float(kg, offset);
- tfm.z = read_node_float(kg, offset);
+ tfm.x = read_node_float(kg, &offset);
+ tfm.y = read_node_float(kg, &offset);
+ tfm.z = read_node_float(kg, &offset);
float3 r = transform_point(&tfm, v);
stack_store_float3(stack, out_offset, r);
+ return offset;
}
-ccl_device void svm_node_min_max(
- KernelGlobals *kg, ShaderData *sd, float *stack, uint vec_offset, uint out_offset, int *offset)
+ccl_device_noinline int svm_node_min_max(const KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint vec_offset,
+ uint out_offset,
+ int offset)
{
float3 v = stack_load_float3(stack, vec_offset);
- float3 mn = float4_to_float3(read_node_float(kg, offset));
- float3 mx = float4_to_float3(read_node_float(kg, offset));
+ float3 mn = float4_to_float3(read_node_float(kg, &offset));
+ float3 mx = float4_to_float3(read_node_float(kg, &offset));
float3 r = min(max(mn, v), mx);
stack_store_float3(stack, out_offset, r);
+ return offset;
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_math.h b/intern/cycles/kernel/svm/svm_math.h
index 733ea28f9e5..99e7a8f2bda 100644
--- a/intern/cycles/kernel/svm/svm_math.h
+++ b/intern/cycles/kernel/svm/svm_math.h
@@ -16,13 +16,12 @@
CCL_NAMESPACE_BEGIN
-ccl_device void svm_node_math(KernelGlobals *kg,
- ShaderData *sd,
- float *stack,
- uint type,
- uint inputs_stack_offsets,
- uint result_stack_offset,
- int *offset)
+ccl_device_noinline void svm_node_math(const KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint type,
+ uint inputs_stack_offsets,
+ uint result_stack_offset)
{
uint a_stack_offset, b_stack_offset, c_stack_offset;
svm_unpack_node_uchar3(inputs_stack_offsets, &a_stack_offset, &b_stack_offset, &c_stack_offset);
@@ -35,13 +34,13 @@ ccl_device void svm_node_math(KernelGlobals *kg,
stack_store_float(stack, result_stack_offset, result);
}
-ccl_device void svm_node_vector_math(KernelGlobals *kg,
- ShaderData *sd,
- float *stack,
- uint type,
- uint inputs_stack_offsets,
- uint outputs_stack_offsets,
- int *offset)
+ccl_device_noinline int svm_node_vector_math(const KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint type,
+ uint inputs_stack_offsets,
+ uint outputs_stack_offsets,
+ int offset)
{
uint value_stack_offset, vector_stack_offset;
uint a_stack_offset, b_stack_offset, param1_stack_offset;
@@ -60,7 +59,7 @@ ccl_device void svm_node_vector_math(KernelGlobals *kg,
/* 3 Vector Operators */
if (type == NODE_VECTOR_MATH_WRAP || type == NODE_VECTOR_MATH_FACEFORWARD ||
type == NODE_VECTOR_MATH_MULTIPLY_ADD) {
- uint4 extra_node = read_node(kg, offset);
+ uint4 extra_node = read_node(kg, &offset);
c = stack_load_float3(stack, extra_node.x);
}
@@ -70,6 +69,7 @@ ccl_device void svm_node_vector_math(KernelGlobals *kg,
stack_store_float(stack, value_stack_offset, value);
if (stack_valid(vector_stack_offset))
stack_store_float3(stack, vector_stack_offset, vector);
+ return offset;
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_mix.h b/intern/cycles/kernel/svm/svm_mix.h
index 15114bfd5e4..3e38080977f 100644
--- a/intern/cycles/kernel/svm/svm_mix.h
+++ b/intern/cycles/kernel/svm/svm_mix.h
@@ -18,16 +18,16 @@ CCL_NAMESPACE_BEGIN
/* Node */
-ccl_device void svm_node_mix(KernelGlobals *kg,
- ShaderData *sd,
- float *stack,
- uint fac_offset,
- uint c1_offset,
- uint c2_offset,
- int *offset)
+ccl_device_noinline int svm_node_mix(const KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint fac_offset,
+ uint c1_offset,
+ uint c2_offset,
+ int offset)
{
/* read extra data */
- uint4 node1 = read_node(kg, offset);
+ uint4 node1 = read_node(kg, &offset);
float fac = stack_load_float(stack, fac_offset);
float3 c1 = stack_load_float3(stack, c1_offset);
@@ -35,6 +35,7 @@ ccl_device void svm_node_mix(KernelGlobals *kg,
float3 result = svm_mix((NodeMix)node1.y, fac, c1, c2);
stack_store_float3(stack, node1.z, result);
+ return offset;
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_musgrave.h b/intern/cycles/kernel/svm/svm_musgrave.h
index 571f62fe27f..03a8b68b3ef 100644
--- a/intern/cycles/kernel/svm/svm_musgrave.h
+++ b/intern/cycles/kernel/svm/svm_musgrave.h
@@ -700,13 +700,13 @@ ccl_device_noinline_cpu float noise_musgrave_ridged_multi_fractal_4d(
return value;
}
-ccl_device void svm_node_tex_musgrave(KernelGlobals *kg,
- ShaderData *sd,
- float *stack,
- uint offsets1,
- uint offsets2,
- uint offsets3,
- int *offset)
+ccl_device_noinline int svm_node_tex_musgrave(const KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint offsets1,
+ uint offsets2,
+ uint offsets3,
+ int offset)
{
uint type, dimensions, co_stack_offset, w_stack_offset;
uint scale_stack_offset, detail_stack_offset, dimension_stack_offset, lacunarity_stack_offset;
@@ -720,8 +720,8 @@ ccl_device void svm_node_tex_musgrave(KernelGlobals *kg,
&lacunarity_stack_offset);
svm_unpack_node_uchar3(offsets3, &offset_stack_offset, &gain_stack_offset, &fac_stack_offset);
- uint4 defaults1 = read_node(kg, offset);
- uint4 defaults2 = read_node(kg, offset);
+ uint4 defaults1 = read_node(kg, &offset);
+ uint4 defaults2 = read_node(kg, &offset);
float3 co = stack_load_float3(stack, co_stack_offset);
float w = stack_load_float_default(stack, w_stack_offset, defaults1.x);
@@ -844,6 +844,7 @@ ccl_device void svm_node_tex_musgrave(KernelGlobals *kg,
}
stack_store_float(stack, fac_stack_offset, fac);
+ return offset;
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_noise.h b/intern/cycles/kernel/svm/svm_noise.h
index 94d8bfde555..ecb4df6afdf 100644
--- a/intern/cycles/kernel/svm/svm_noise.h
+++ b/intern/cycles/kernel/svm/svm_noise.h
@@ -330,7 +330,7 @@ ccl_device_inline ssef grad(const ssei &hash, const ssef &x, const ssef &y)
* |__________________________|
*
*/
-ccl_device_noinline float perlin_2d(float x, float y)
+ccl_device_noinline_cpu float perlin_2d(float x, float y)
{
ssei XY;
ssef fxy = floorfrac(ssef(x, y, 0.0f, 0.0f), &XY);
@@ -447,7 +447,7 @@ ccl_device_inline ssef quad_mix(ssef p, ssef q, ssef r, ssef s, ssef f)
* v7 (1, 1, 1)
*
*/
-ccl_device_noinline float perlin_3d(float x, float y, float z)
+ccl_device_noinline_cpu float perlin_3d(float x, float y, float z)
{
ssei XYZ;
ssef fxyz = floorfrac(ssef(x, y, z, 0.0f), &XYZ);
@@ -501,7 +501,7 @@ ccl_device_noinline float perlin_3d(float x, float y, float z)
* v15 (1, 1, 1, 1)
*
*/
-ccl_device_noinline float perlin_4d(float x, float y, float z, float w)
+ccl_device_noinline_cpu float perlin_4d(float x, float y, float z, float w)
{
ssei XYZW;
ssef fxyzw = floorfrac(ssef(x, y, z, w), &XYZW);
@@ -585,7 +585,7 @@ ccl_device_inline ssef quad_mix(avxf p, avxf q, ssef f)
* |__________________________|
*
*/
-ccl_device_noinline float perlin_3d(float x, float y, float z)
+ccl_device_noinline_cpu float perlin_3d(float x, float y, float z)
{
ssei XYZ;
ssef fxyz = floorfrac(ssef(x, y, z, 0.0f), &XYZ);
@@ -637,7 +637,7 @@ ccl_device_noinline float perlin_3d(float x, float y, float z)
* v15 (1, 1, 1, 1)
*
*/
-ccl_device_noinline float perlin_4d(float x, float y, float z, float w)
+ccl_device_noinline_cpu float perlin_4d(float x, float y, float z, float w)
{
ssei XYZW;
ssef fxyzw = floorfrac(ssef(x, y, z, w), &XYZW);
diff --git a/intern/cycles/kernel/svm/svm_noisetex.h b/intern/cycles/kernel/svm/svm_noisetex.h
index 61fd9553802..29b262ac06e 100644
--- a/intern/cycles/kernel/svm/svm_noisetex.h
+++ b/intern/cycles/kernel/svm/svm_noisetex.h
@@ -140,13 +140,13 @@ ccl_device void noise_texture_4d(float4 co,
}
}
-ccl_device void svm_node_tex_noise(KernelGlobals *kg,
- ShaderData *sd,
- float *stack,
- uint dimensions,
- uint offsets1,
- uint offsets2,
- int *offset)
+ccl_device_noinline int svm_node_tex_noise(const KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint dimensions,
+ uint offsets1,
+ uint offsets2,
+ int offset)
{
uint vector_stack_offset, w_stack_offset, scale_stack_offset;
uint detail_stack_offset, roughness_stack_offset, distortion_stack_offset;
@@ -160,8 +160,8 @@ ccl_device void svm_node_tex_noise(KernelGlobals *kg,
&value_stack_offset,
&color_stack_offset);
- uint4 defaults1 = read_node(kg, offset);
- uint4 defaults2 = read_node(kg, offset);
+ uint4 defaults1 = read_node(kg, &offset);
+ uint4 defaults2 = read_node(kg, &offset);
float3 vector = stack_load_float3(stack, vector_stack_offset);
float w = stack_load_float_default(stack, w_stack_offset, defaults1.x);
@@ -212,6 +212,7 @@ ccl_device void svm_node_tex_noise(KernelGlobals *kg,
if (stack_valid(color_stack_offset)) {
stack_store_float3(stack, color_stack_offset, color);
}
+ return offset;
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_normal.h b/intern/cycles/kernel/svm/svm_normal.h
index 4cd3eab0ed2..724b5f281f9 100644
--- a/intern/cycles/kernel/svm/svm_normal.h
+++ b/intern/cycles/kernel/svm/svm_normal.h
@@ -16,16 +16,16 @@
CCL_NAMESPACE_BEGIN
-ccl_device void svm_node_normal(KernelGlobals *kg,
- ShaderData *sd,
- float *stack,
- uint in_normal_offset,
- uint out_normal_offset,
- uint out_dot_offset,
- int *offset)
+ccl_device_noinline int svm_node_normal(const KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint in_normal_offset,
+ uint out_normal_offset,
+ uint out_dot_offset,
+ int offset)
{
/* read extra data */
- uint4 node1 = read_node(kg, offset);
+ uint4 node1 = read_node(kg, &offset);
float3 normal = stack_load_float3(stack, in_normal_offset);
float3 direction;
@@ -39,6 +39,7 @@ ccl_device void svm_node_normal(KernelGlobals *kg,
if (stack_valid(out_dot_offset))
stack_store_float(stack, out_dot_offset, dot(direction, normalize(normal)));
+ return offset;
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_ramp.h b/intern/cycles/kernel/svm/svm_ramp.h
index 85ccf39144b..e92df3c093c 100644
--- a/intern/cycles/kernel/svm/svm_ramp.h
+++ b/intern/cycles/kernel/svm/svm_ramp.h
@@ -21,8 +21,12 @@ CCL_NAMESPACE_BEGIN
/* NOTE: svm_ramp.h, svm_ramp_util.h and node_ramp_util.h must stay consistent */
-ccl_device_inline float4 rgb_ramp_lookup(
- KernelGlobals *kg, int offset, float f, bool interpolate, bool extrapolate, int table_size)
+ccl_device_inline float4 rgb_ramp_lookup(const KernelGlobals *kg,
+ int offset,
+ float f,
+ bool interpolate,
+ bool extrapolate,
+ int table_size)
{
if ((f < 0.0f || f > 1.0f) && extrapolate) {
float4 t0, dy;
@@ -53,34 +57,35 @@ ccl_device_inline float4 rgb_ramp_lookup(
return a;
}
-ccl_device void svm_node_rgb_ramp(
- KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
+ccl_device_noinline int svm_node_rgb_ramp(
+ const KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int offset)
{
uint fac_offset, color_offset, alpha_offset;
uint interpolate = node.z;
svm_unpack_node_uchar3(node.y, &fac_offset, &color_offset, &alpha_offset);
- uint table_size = read_node(kg, offset).x;
+ uint table_size = read_node(kg, &offset).x;
float fac = stack_load_float(stack, fac_offset);
- float4 color = rgb_ramp_lookup(kg, *offset, fac, interpolate, false, table_size);
+ float4 color = rgb_ramp_lookup(kg, offset, fac, interpolate, false, table_size);
if (stack_valid(color_offset))
stack_store_float3(stack, color_offset, float4_to_float3(color));
if (stack_valid(alpha_offset))
stack_store_float(stack, alpha_offset, color.w);
- *offset += table_size;
+ offset += table_size;
+ return offset;
}
-ccl_device void svm_node_curves(
- KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
+ccl_device_noinline int svm_node_curves(
+ const KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int offset)
{
uint fac_offset, color_offset, out_offset;
svm_unpack_node_uchar3(node.y, &fac_offset, &color_offset, &out_offset);
- uint table_size = read_node(kg, offset).x;
+ uint table_size = read_node(kg, &offset).x;
float fac = stack_load_float(stack, fac_offset);
float3 color = stack_load_float3(stack, color_offset);
@@ -89,14 +94,15 @@ ccl_device void svm_node_curves(
const float range_x = max_x - min_x;
const float3 relpos = (color - make_float3(min_x, min_x, min_x)) / range_x;
- float r = rgb_ramp_lookup(kg, *offset, relpos.x, true, true, table_size).x;
- float g = rgb_ramp_lookup(kg, *offset, relpos.y, true, true, table_size).y;
- float b = rgb_ramp_lookup(kg, *offset, relpos.z, true, true, table_size).z;
+ float r = rgb_ramp_lookup(kg, offset, relpos.x, true, true, table_size).x;
+ float g = rgb_ramp_lookup(kg, offset, relpos.y, true, true, table_size).y;
+ float b = rgb_ramp_lookup(kg, offset, relpos.z, true, true, table_size).z;
color = (1.0f - fac) * color + fac * make_float3(r, g, b);
stack_store_float3(stack, out_offset, color);
- *offset += table_size;
+ offset += table_size;
+ return offset;
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_sepcomb_hsv.h b/intern/cycles/kernel/svm/svm_sepcomb_hsv.h
index f501252062e..8d52845ea3d 100644
--- a/intern/cycles/kernel/svm/svm_sepcomb_hsv.h
+++ b/intern/cycles/kernel/svm/svm_sepcomb_hsv.h
@@ -16,15 +16,15 @@
CCL_NAMESPACE_BEGIN
-ccl_device void svm_node_combine_hsv(KernelGlobals *kg,
- ShaderData *sd,
- float *stack,
- uint hue_in,
- uint saturation_in,
- uint value_in,
- int *offset)
+ccl_device_noinline int svm_node_combine_hsv(const KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint hue_in,
+ uint saturation_in,
+ uint value_in,
+ int offset)
{
- uint4 node1 = read_node(kg, offset);
+ uint4 node1 = read_node(kg, &offset);
uint color_out = node1.y;
float hue = stack_load_float(stack, hue_in);
@@ -36,17 +36,18 @@ ccl_device void svm_node_combine_hsv(KernelGlobals *kg,
if (stack_valid(color_out))
stack_store_float3(stack, color_out, color);
+ return offset;
}
-ccl_device void svm_node_separate_hsv(KernelGlobals *kg,
- ShaderData *sd,
- float *stack,
- uint color_in,
- uint hue_out,
- uint saturation_out,
- int *offset)
+ccl_device_noinline int svm_node_separate_hsv(const KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint color_in,
+ uint hue_out,
+ uint saturation_out,
+ int offset)
{
- uint4 node1 = read_node(kg, offset);
+ uint4 node1 = read_node(kg, &offset);
uint value_out = node1.y;
float3 color = stack_load_float3(stack, color_in);
@@ -60,6 +61,7 @@ ccl_device void svm_node_separate_hsv(KernelGlobals *kg,
stack_store_float(stack, saturation_out, color.y);
if (stack_valid(value_out))
stack_store_float(stack, value_out, color.z);
+ return offset;
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_sky.h b/intern/cycles/kernel/svm/svm_sky.h
index b908732f026..b77c4311e72 100644
--- a/intern/cycles/kernel/svm/svm_sky.h
+++ b/intern/cycles/kernel/svm/svm_sky.h
@@ -37,7 +37,7 @@ ccl_device float sky_perez_function(float *lam, float theta, float gamma)
(1.0f + lam[2] * expf(lam[3] * gamma) + lam[4] * cgamma * cgamma);
}
-ccl_device float3 sky_radiance_preetham(KernelGlobals *kg,
+ccl_device float3 sky_radiance_preetham(const KernelGlobals *kg,
float3 dir,
float sunphi,
float suntheta,
@@ -90,7 +90,7 @@ ccl_device float sky_radiance_internal(float *configuration, float theta, float
configuration[6] * mieM + configuration[7] * zenith);
}
-ccl_device float3 sky_radiance_hosek(KernelGlobals *kg,
+ccl_device float3 sky_radiance_hosek(const KernelGlobals *kg,
float3 dir,
float sunphi,
float suntheta,
@@ -127,7 +127,7 @@ ccl_device float3 geographical_to_direction(float lat, float lon)
return make_float3(cos(lat) * cos(lon), cos(lat) * sin(lon), sin(lat));
}
-ccl_device float3 sky_radiance_nishita(KernelGlobals *kg,
+ccl_device float3 sky_radiance_nishita(const KernelGlobals *kg,
float3 dir,
float *nishita_data,
uint texture_id)
@@ -209,8 +209,8 @@ ccl_device float3 sky_radiance_nishita(KernelGlobals *kg,
return xyz_to_rgb(kg, xyz);
}
-ccl_device void svm_node_tex_sky(
- KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
+ccl_device_noinline int svm_node_tex_sky(
+ const KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int offset)
{
/* Load data */
uint dir_offset = node.y;
@@ -226,49 +226,49 @@ ccl_device void svm_node_tex_sky(
float sunphi, suntheta, radiance_x, radiance_y, radiance_z;
float config_x[9], config_y[9], config_z[9];
- float4 data = read_node_float(kg, offset);
+ float4 data = read_node_float(kg, &offset);
sunphi = data.x;
suntheta = data.y;
radiance_x = data.z;
radiance_y = data.w;
- data = read_node_float(kg, offset);
+ data = read_node_float(kg, &offset);
radiance_z = data.x;
config_x[0] = data.y;
config_x[1] = data.z;
config_x[2] = data.w;
- data = read_node_float(kg, offset);
+ data = read_node_float(kg, &offset);
config_x[3] = data.x;
config_x[4] = data.y;
config_x[5] = data.z;
config_x[6] = data.w;
- data = read_node_float(kg, offset);
+ data = read_node_float(kg, &offset);
config_x[7] = data.x;
config_x[8] = data.y;
config_y[0] = data.z;
config_y[1] = data.w;
- data = read_node_float(kg, offset);
+ data = read_node_float(kg, &offset);
config_y[2] = data.x;
config_y[3] = data.y;
config_y[4] = data.z;
config_y[5] = data.w;
- data = read_node_float(kg, offset);
+ data = read_node_float(kg, &offset);
config_y[6] = data.x;
config_y[7] = data.y;
config_y[8] = data.z;
config_z[0] = data.w;
- data = read_node_float(kg, offset);
+ data = read_node_float(kg, &offset);
config_z[1] = data.x;
config_z[2] = data.y;
config_z[3] = data.z;
config_z[4] = data.w;
- data = read_node_float(kg, offset);
+ data = read_node_float(kg, &offset);
config_z[5] = data.x;
config_z[6] = data.y;
config_z[7] = data.z;
@@ -305,19 +305,19 @@ ccl_device void svm_node_tex_sky(
/* Define variables */
float nishita_data[10];
- float4 data = read_node_float(kg, offset);
+ float4 data = read_node_float(kg, &offset);
nishita_data[0] = data.x;
nishita_data[1] = data.y;
nishita_data[2] = data.z;
nishita_data[3] = data.w;
- data = read_node_float(kg, offset);
+ data = read_node_float(kg, &offset);
nishita_data[4] = data.x;
nishita_data[5] = data.y;
nishita_data[6] = data.z;
nishita_data[7] = data.w;
- data = read_node_float(kg, offset);
+ data = read_node_float(kg, &offset);
nishita_data[8] = data.x;
nishita_data[9] = data.y;
uint texture_id = __float_as_uint(data.z);
@@ -327,6 +327,7 @@ ccl_device void svm_node_tex_sky(
}
stack_store_float3(stack, out_offset, f);
+ return offset;
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_tex_coord.h b/intern/cycles/kernel/svm/svm_tex_coord.h
index 46600551cc4..a35253080da 100644
--- a/intern/cycles/kernel/svm/svm_tex_coord.h
+++ b/intern/cycles/kernel/svm/svm_tex_coord.h
@@ -14,12 +14,16 @@
* limitations under the License.
*/
+#include "kernel/geom/geom.h"
+#include "kernel/kernel_camera.h"
+#include "kernel/kernel_montecarlo.h"
+
CCL_NAMESPACE_BEGIN
/* Texture Coordinate Node */
-ccl_device void svm_node_tex_coord(
- KernelGlobals *kg, ShaderData *sd, int path_flag, float *stack, uint4 node, int *offset)
+ccl_device_noinline int svm_node_tex_coord(
+ const KernelGlobals *kg, ShaderData *sd, int path_flag, float *stack, uint4 node, int offset)
{
float3 data;
uint type = node.y;
@@ -35,9 +39,9 @@ ccl_device void svm_node_tex_coord(
}
else {
Transform tfm;
- tfm.x = read_node_float(kg, offset);
- tfm.y = read_node_float(kg, offset);
- tfm.z = read_node_float(kg, offset);
+ tfm.x = read_node_float(kg, &offset);
+ tfm.y = read_node_float(kg, &offset);
+ tfm.z = read_node_float(kg, &offset);
data = transform_point(&tfm, data);
}
break;
@@ -92,10 +96,11 @@ ccl_device void svm_node_tex_coord(
}
stack_store_float3(stack, out_offset, data);
+ return offset;
}
-ccl_device void svm_node_tex_coord_bump_dx(
- KernelGlobals *kg, ShaderData *sd, int path_flag, float *stack, uint4 node, int *offset)
+ccl_device_noinline int svm_node_tex_coord_bump_dx(
+ const KernelGlobals *kg, ShaderData *sd, int path_flag, float *stack, uint4 node, int offset)
{
#ifdef __RAY_DIFFERENTIALS__
float3 data;
@@ -112,9 +117,9 @@ ccl_device void svm_node_tex_coord_bump_dx(
}
else {
Transform tfm;
- tfm.x = read_node_float(kg, offset);
- tfm.y = read_node_float(kg, offset);
- tfm.z = read_node_float(kg, offset);
+ tfm.x = read_node_float(kg, &offset);
+ tfm.y = read_node_float(kg, &offset);
+ tfm.z = read_node_float(kg, &offset);
data = transform_point(&tfm, data);
}
break;
@@ -136,7 +141,7 @@ ccl_device void svm_node_tex_coord_bump_dx(
case NODE_TEXCO_WINDOW: {
if ((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE &&
kernel_data.cam.type == CAMERA_ORTHOGRAPHIC)
- data = camera_world_to_ndc(kg, sd, sd->ray_P + sd->ray_dP.dx);
+ data = camera_world_to_ndc(kg, sd, sd->ray_P + make_float3(sd->ray_dP, 0.0f, 0.0f));
else
data = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dx);
data.z = 0.0f;
@@ -169,13 +174,14 @@ ccl_device void svm_node_tex_coord_bump_dx(
}
stack_store_float3(stack, out_offset, data);
+ return offset;
#else
- svm_node_tex_coord(kg, sd, path_flag, stack, node, offset);
+ return svm_node_tex_coord(kg, sd, path_flag, stack, node, offset);
#endif
}
-ccl_device void svm_node_tex_coord_bump_dy(
- KernelGlobals *kg, ShaderData *sd, int path_flag, float *stack, uint4 node, int *offset)
+ccl_device_noinline int svm_node_tex_coord_bump_dy(
+ const KernelGlobals *kg, ShaderData *sd, int path_flag, float *stack, uint4 node, int offset)
{
#ifdef __RAY_DIFFERENTIALS__
float3 data;
@@ -192,9 +198,9 @@ ccl_device void svm_node_tex_coord_bump_dy(
}
else {
Transform tfm;
- tfm.x = read_node_float(kg, offset);
- tfm.y = read_node_float(kg, offset);
- tfm.z = read_node_float(kg, offset);
+ tfm.x = read_node_float(kg, &offset);
+ tfm.y = read_node_float(kg, &offset);
+ tfm.z = read_node_float(kg, &offset);
data = transform_point(&tfm, data);
}
break;
@@ -216,7 +222,7 @@ ccl_device void svm_node_tex_coord_bump_dy(
case NODE_TEXCO_WINDOW: {
if ((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE &&
kernel_data.cam.type == CAMERA_ORTHOGRAPHIC)
- data = camera_world_to_ndc(kg, sd, sd->ray_P + sd->ray_dP.dy);
+ data = camera_world_to_ndc(kg, sd, sd->ray_P + make_float3(0.0f, sd->ray_dP, 0.0f));
else
data = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dy);
data.z = 0.0f;
@@ -249,12 +255,16 @@ ccl_device void svm_node_tex_coord_bump_dy(
}
stack_store_float3(stack, out_offset, data);
+ return offset;
#else
- svm_node_tex_coord(kg, sd, path_flag, stack, node, offset);
+ return svm_node_tex_coord(kg, sd, path_flag, stack, node, offset);
#endif
}
-ccl_device void svm_node_normal_map(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
+ccl_device_noinline void svm_node_normal_map(const KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint4 node)
{
uint color_offset, strength_offset, normal_offset, space;
svm_unpack_node_uchar4(node.y, &color_offset, &strength_offset, &normal_offset, &space);
@@ -346,7 +356,10 @@ ccl_device void svm_node_normal_map(KernelGlobals *kg, ShaderData *sd, float *st
stack_store_float3(stack, normal_offset, N);
}
-ccl_device void svm_node_tangent(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
+ccl_device_noinline void svm_node_tangent(const KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint4 node)
{
uint tangent_offset, direction_type, axis;
svm_unpack_node_uchar3(node.y, &tangent_offset, &direction_type, &axis);
diff --git a/intern/cycles/kernel/svm/svm_types.h b/intern/cycles/kernel/svm/svm_types.h
index 062afcfa5ac..c053be96c51 100644
--- a/intern/cycles/kernel/svm/svm_types.h
+++ b/intern/cycles/kernel/svm/svm_types.h
@@ -30,37 +30,6 @@ CCL_NAMESPACE_BEGIN
/* Nodes */
-/* Known frequencies of used nodes, used for selective nodes compilation
- * in the kernel. Currently only affects split OpenCL kernel.
- *
- * Keep as defines so it's easy to check which nodes are to be compiled
- * from preprocessor.
- *
- * Lower the number of group more often the node is used.
- */
-#define NODE_GROUP_LEVEL_0 0
-#define NODE_GROUP_LEVEL_1 1
-#define NODE_GROUP_LEVEL_2 2
-#define NODE_GROUP_LEVEL_3 3
-#define NODE_GROUP_LEVEL_4 4
-#define NODE_GROUP_LEVEL_MAX NODE_GROUP_LEVEL_4
-
-#define NODE_FEATURE_VOLUME (1 << 0)
-#define NODE_FEATURE_HAIR (1 << 1)
-#define NODE_FEATURE_BUMP (1 << 2)
-#define NODE_FEATURE_BUMP_STATE (1 << 3)
-#define NODE_FEATURE_VORONOI_EXTRA (1 << 4)
-/* TODO(sergey): Consider using something like ((uint)(-1)).
- * Need to check carefully operand types around usage of this
- * define first.
- */
-#define NODE_FEATURE_ALL \
- (NODE_FEATURE_VOLUME | NODE_FEATURE_HAIR | NODE_FEATURE_BUMP | NODE_FEATURE_BUMP_STATE | \
- NODE_FEATURE_VORONOI_EXTRA)
-
-#define NODES_GROUP(group) ((group) <= __NODES_MAX_GROUP__)
-#define NODES_FEATURE(feature) ((__NODES_FEATURES__ & (feature)) != 0)
-
typedef enum ShaderNodeType {
NODE_END = 0,
NODE_SHADER_JUMP,
@@ -572,12 +541,8 @@ typedef enum ClosureType {
CLOSURE_BSDF_TRANSPARENT_ID,
/* BSSRDF */
- CLOSURE_BSSRDF_CUBIC_ID,
- CLOSURE_BSSRDF_GAUSSIAN_ID,
- CLOSURE_BSSRDF_PRINCIPLED_ID,
- CLOSURE_BSSRDF_BURLEY_ID,
CLOSURE_BSSRDF_RANDOM_WALK_ID,
- CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID,
+ CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID,
/* Other */
CLOSURE_HOLDOUT_ID,
@@ -620,11 +585,9 @@ typedef enum ClosureType {
type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID || \
type == CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID || \
type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID)
-#define CLOSURE_IS_BSDF_OR_BSSRDF(type) (type <= CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID)
+#define CLOSURE_IS_BSDF_OR_BSSRDF(type) (type <= CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID)
#define CLOSURE_IS_BSSRDF(type) \
- (type >= CLOSURE_BSSRDF_CUBIC_ID && type <= CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID)
-#define CLOSURE_IS_DISK_BSSRDF(type) \
- (type >= CLOSURE_BSSRDF_CUBIC_ID && type <= CLOSURE_BSSRDF_BURLEY_ID)
+ (type >= CLOSURE_BSSRDF_RANDOM_WALK_ID && type <= CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID)
#define CLOSURE_IS_VOLUME(type) \
(type >= CLOSURE_VOLUME_ID && type <= CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID)
#define CLOSURE_IS_VOLUME_SCATTER(type) (type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID)
diff --git a/intern/cycles/kernel/svm/svm_value.h b/intern/cycles/kernel/svm/svm_value.h
index 5b76f2c8832..d0478660094 100644
--- a/intern/cycles/kernel/svm/svm_value.h
+++ b/intern/cycles/kernel/svm/svm_value.h
@@ -19,20 +19,21 @@ CCL_NAMESPACE_BEGIN
/* Value Nodes */
ccl_device void svm_node_value_f(
- KernelGlobals *kg, ShaderData *sd, float *stack, uint ivalue, uint out_offset)
+ const KernelGlobals *kg, ShaderData *sd, float *stack, uint ivalue, uint out_offset)
{
stack_store_float(stack, out_offset, __uint_as_float(ivalue));
}
-ccl_device void svm_node_value_v(
- KernelGlobals *kg, ShaderData *sd, float *stack, uint out_offset, int *offset)
+ccl_device int svm_node_value_v(
+ const KernelGlobals *kg, ShaderData *sd, float *stack, uint out_offset, int offset)
{
/* read extra data */
- uint4 node1 = read_node(kg, offset);
+ uint4 node1 = read_node(kg, &offset);
float3 p = make_float3(
__uint_as_float(node1.y), __uint_as_float(node1.z), __uint_as_float(node1.w));
stack_store_float3(stack, out_offset, p);
+ return offset;
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_vector_rotate.h b/intern/cycles/kernel/svm/svm_vector_rotate.h
index 50045752484..55e1bce0158 100644
--- a/intern/cycles/kernel/svm/svm_vector_rotate.h
+++ b/intern/cycles/kernel/svm/svm_vector_rotate.h
@@ -18,11 +18,11 @@ CCL_NAMESPACE_BEGIN
/* Vector Rotate */
-ccl_device void svm_node_vector_rotate(ShaderData *sd,
- float *stack,
- uint input_stack_offsets,
- uint axis_stack_offsets,
- uint result_stack_offset)
+ccl_device_noinline void svm_node_vector_rotate(ShaderData *sd,
+ float *stack,
+ uint input_stack_offsets,
+ uint axis_stack_offsets,
+ uint result_stack_offset)
{
uint type, vector_stack_offset, rotation_stack_offset, center_stack_offset, axis_stack_offset,
angle_stack_offset, invert;
diff --git a/intern/cycles/kernel/svm/svm_vector_transform.h b/intern/cycles/kernel/svm/svm_vector_transform.h
index 1e95492cf1b..8aedb7e0f54 100644
--- a/intern/cycles/kernel/svm/svm_vector_transform.h
+++ b/intern/cycles/kernel/svm/svm_vector_transform.h
@@ -18,10 +18,10 @@ CCL_NAMESPACE_BEGIN
/* Vector Transform */
-ccl_device void svm_node_vector_transform(KernelGlobals *kg,
- ShaderData *sd,
- float *stack,
- uint4 node)
+ccl_device_noinline void svm_node_vector_transform(const KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint4 node)
{
uint itype, ifrom, ito;
uint vector_in, vector_out;
diff --git a/intern/cycles/kernel/svm/svm_vertex_color.h b/intern/cycles/kernel/svm/svm_vertex_color.h
index 0aa45835522..986ea244f3a 100644
--- a/intern/cycles/kernel/svm/svm_vertex_color.h
+++ b/intern/cycles/kernel/svm/svm_vertex_color.h
@@ -16,12 +16,12 @@
CCL_NAMESPACE_BEGIN
-ccl_device void svm_node_vertex_color(KernelGlobals *kg,
- ShaderData *sd,
- float *stack,
- uint layer_id,
- uint color_offset,
- uint alpha_offset)
+ccl_device_noinline void svm_node_vertex_color(const KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint layer_id,
+ uint color_offset,
+ uint alpha_offset)
{
AttributeDescriptor descriptor = find_attribute(kg, sd, layer_id);
if (descriptor.offset != ATTR_STD_NOT_FOUND) {
@@ -35,18 +35,12 @@ ccl_device void svm_node_vertex_color(KernelGlobals *kg,
}
}
-#ifndef __KERNEL_CUDA__
-ccl_device
-#else
-ccl_device_noinline
-#endif
- void
- svm_node_vertex_color_bump_dx(KernelGlobals *kg,
- ShaderData *sd,
- float *stack,
- uint layer_id,
- uint color_offset,
- uint alpha_offset)
+ccl_device_noinline void svm_node_vertex_color_bump_dx(const KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint layer_id,
+ uint color_offset,
+ uint alpha_offset)
{
AttributeDescriptor descriptor = find_attribute(kg, sd, layer_id);
if (descriptor.offset != ATTR_STD_NOT_FOUND) {
@@ -62,18 +56,12 @@ ccl_device_noinline
}
}
-#ifndef __KERNEL_CUDA__
-ccl_device
-#else
-ccl_device_noinline
-#endif
- void
- svm_node_vertex_color_bump_dy(KernelGlobals *kg,
- ShaderData *sd,
- float *stack,
- uint layer_id,
- uint color_offset,
- uint alpha_offset)
+ccl_device_noinline void svm_node_vertex_color_bump_dy(const KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint layer_id,
+ uint color_offset,
+ uint alpha_offset)
{
AttributeDescriptor descriptor = find_attribute(kg, sd, layer_id);
if (descriptor.offset != ATTR_STD_NOT_FOUND) {
diff --git a/intern/cycles/kernel/svm/svm_voronoi.h b/intern/cycles/kernel/svm/svm_voronoi.h
index d0e7db35fab..b1d2eff7f37 100644
--- a/intern/cycles/kernel/svm/svm_voronoi.h
+++ b/intern/cycles/kernel/svm/svm_voronoi.h
@@ -902,16 +902,17 @@ ccl_device void voronoi_n_sphere_radius_4d(float4 coord, float randomness, float
*outRadius = distance(closestPointToClosestPoint, closestPoint) / 2.0f;
}
-ccl_device void svm_node_tex_voronoi(KernelGlobals *kg,
- ShaderData *sd,
- float *stack,
- uint dimensions,
- uint feature,
- uint metric,
- int *offset)
+template<uint node_feature_mask>
+ccl_device_noinline int svm_node_tex_voronoi(const KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint dimensions,
+ uint feature,
+ uint metric,
+ int offset)
{
- uint4 stack_offsets = read_node(kg, offset);
- uint4 defaults = read_node(kg, offset);
+ uint4 stack_offsets = read_node(kg, &offset);
+ uint4 defaults = read_node(kg, &offset);
uint coord_stack_offset, w_stack_offset, scale_stack_offset, smoothness_stack_offset;
uint exponent_stack_offset, randomness_stack_offset, distance_out_stack_offset,
@@ -997,18 +998,18 @@ ccl_device void svm_node_tex_voronoi(KernelGlobals *kg,
&color_out,
&position_out_2d);
break;
-#if NODES_FEATURE(NODE_FEATURE_VORONOI_EXTRA)
case NODE_VORONOI_SMOOTH_F1:
- voronoi_smooth_f1_2d(coord_2d,
- smoothness,
- exponent,
- randomness,
- voronoi_metric,
- &distance_out,
- &color_out,
- &position_out_2d);
+ if (KERNEL_NODES_FEATURE(VORONOI_EXTRA)) {
+ voronoi_smooth_f1_2d(coord_2d,
+ smoothness,
+ exponent,
+ randomness,
+ voronoi_metric,
+ &distance_out,
+ &color_out,
+ &position_out_2d);
+ }
break;
-#endif
case NODE_VORONOI_F2:
voronoi_f2_2d(coord_2d,
exponent,
@@ -1042,18 +1043,18 @@ ccl_device void svm_node_tex_voronoi(KernelGlobals *kg,
&color_out,
&position_out);
break;
-#if NODES_FEATURE(NODE_FEATURE_VORONOI_EXTRA)
case NODE_VORONOI_SMOOTH_F1:
- voronoi_smooth_f1_3d(coord,
- smoothness,
- exponent,
- randomness,
- voronoi_metric,
- &distance_out,
- &color_out,
- &position_out);
+ if (KERNEL_NODES_FEATURE(VORONOI_EXTRA)) {
+ voronoi_smooth_f1_3d(coord,
+ smoothness,
+ exponent,
+ randomness,
+ voronoi_metric,
+ &distance_out,
+ &color_out,
+ &position_out);
+ }
break;
-#endif
case NODE_VORONOI_F2:
voronoi_f2_3d(coord,
exponent,
@@ -1076,54 +1077,54 @@ ccl_device void svm_node_tex_voronoi(KernelGlobals *kg,
break;
}
-#if NODES_FEATURE(NODE_FEATURE_VORONOI_EXTRA)
case 4: {
- float4 coord_4d = make_float4(coord.x, coord.y, coord.z, w);
- float4 position_out_4d;
- switch (voronoi_feature) {
- case NODE_VORONOI_F1:
- voronoi_f1_4d(coord_4d,
- exponent,
- randomness,
- voronoi_metric,
- &distance_out,
- &color_out,
- &position_out_4d);
- break;
- case NODE_VORONOI_SMOOTH_F1:
- voronoi_smooth_f1_4d(coord_4d,
- smoothness,
- exponent,
- randomness,
- voronoi_metric,
- &distance_out,
- &color_out,
- &position_out_4d);
- break;
- case NODE_VORONOI_F2:
- voronoi_f2_4d(coord_4d,
- exponent,
- randomness,
- voronoi_metric,
- &distance_out,
- &color_out,
- &position_out_4d);
- break;
- case NODE_VORONOI_DISTANCE_TO_EDGE:
- voronoi_distance_to_edge_4d(coord_4d, randomness, &distance_out);
- break;
- case NODE_VORONOI_N_SPHERE_RADIUS:
- voronoi_n_sphere_radius_4d(coord_4d, randomness, &radius_out);
- break;
- default:
- kernel_assert(0);
+ if (KERNEL_NODES_FEATURE(VORONOI_EXTRA)) {
+ float4 coord_4d = make_float4(coord.x, coord.y, coord.z, w);
+ float4 position_out_4d;
+ switch (voronoi_feature) {
+ case NODE_VORONOI_F1:
+ voronoi_f1_4d(coord_4d,
+ exponent,
+ randomness,
+ voronoi_metric,
+ &distance_out,
+ &color_out,
+ &position_out_4d);
+ break;
+ case NODE_VORONOI_SMOOTH_F1:
+ voronoi_smooth_f1_4d(coord_4d,
+ smoothness,
+ exponent,
+ randomness,
+ voronoi_metric,
+ &distance_out,
+ &color_out,
+ &position_out_4d);
+ break;
+ case NODE_VORONOI_F2:
+ voronoi_f2_4d(coord_4d,
+ exponent,
+ randomness,
+ voronoi_metric,
+ &distance_out,
+ &color_out,
+ &position_out_4d);
+ break;
+ case NODE_VORONOI_DISTANCE_TO_EDGE:
+ voronoi_distance_to_edge_4d(coord_4d, randomness, &distance_out);
+ break;
+ case NODE_VORONOI_N_SPHERE_RADIUS:
+ voronoi_n_sphere_radius_4d(coord_4d, randomness, &radius_out);
+ break;
+ default:
+ kernel_assert(0);
+ }
+ position_out_4d = safe_divide_float4_float(position_out_4d, scale);
+ position_out = make_float3(position_out_4d.x, position_out_4d.y, position_out_4d.z);
+ w_out = position_out_4d.w;
}
- position_out_4d = safe_divide_float4_float(position_out_4d, scale);
- position_out = make_float3(position_out_4d.x, position_out_4d.y, position_out_4d.z);
- w_out = position_out_4d.w;
break;
}
-#endif
default:
kernel_assert(0);
}
@@ -1138,6 +1139,7 @@ ccl_device void svm_node_tex_voronoi(KernelGlobals *kg,
stack_store_float(stack, w_out_stack_offset, w_out);
if (stack_valid(radius_out_stack_offset))
stack_store_float(stack, radius_out_stack_offset, radius_out);
+ return offset;
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_voxel.h b/intern/cycles/kernel/svm/svm_voxel.h
index 4bc14f82382..78b75405356 100644
--- a/intern/cycles/kernel/svm/svm_voxel.h
+++ b/intern/cycles/kernel/svm/svm_voxel.h
@@ -19,8 +19,8 @@ CCL_NAMESPACE_BEGIN
/* TODO(sergey): Think of making it more generic volume-type attribute
* sampler.
*/
-ccl_device void svm_node_tex_voxel(
- KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
+ccl_device_noinline int svm_node_tex_voxel(
+ const KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int offset)
{
uint co_offset, density_out_offset, color_out_offset, space;
svm_unpack_node_uchar4(node.z, &co_offset, &density_out_offset, &color_out_offset, &space);
@@ -33,9 +33,9 @@ ccl_device void svm_node_tex_voxel(
else {
kernel_assert(space == NODE_TEX_VOXEL_SPACE_WORLD);
Transform tfm;
- tfm.x = read_node_float(kg, offset);
- tfm.y = read_node_float(kg, offset);
- tfm.z = read_node_float(kg, offset);
+ tfm.x = read_node_float(kg, &offset);
+ tfm.y = read_node_float(kg, &offset);
+ tfm.z = read_node_float(kg, &offset);
co = transform_point(&tfm, co);
}
@@ -47,6 +47,7 @@ ccl_device void svm_node_tex_voxel(
stack_store_float(stack, density_out_offset, r.w);
if (stack_valid(color_out_offset))
stack_store_float3(stack, color_out_offset, make_float3(r.x, r.y, r.z));
+ return offset;
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_wave.h b/intern/cycles/kernel/svm/svm_wave.h
index c4763475b47..00f980c16df 100644
--- a/intern/cycles/kernel/svm/svm_wave.h
+++ b/intern/cycles/kernel/svm/svm_wave.h
@@ -82,11 +82,11 @@ ccl_device_noinline_cpu float svm_wave(NodeWaveType type,
}
}
-ccl_device void svm_node_tex_wave(
- KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
+ccl_device_noinline int svm_node_tex_wave(
+ const KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int offset)
{
- uint4 node2 = read_node(kg, offset);
- uint4 node3 = read_node(kg, offset);
+ uint4 node2 = read_node(kg, &offset);
+ uint4 node3 = read_node(kg, &offset);
/* RNA properties */
uint type_offset, bands_dir_offset, rings_dir_offset, profile_offset;
@@ -125,6 +125,7 @@ ccl_device void svm_node_tex_wave(
stack_store_float(stack, fac_offset, f);
if (stack_valid(color_offset))
stack_store_float3(stack, color_offset, make_float3(f, f, f));
+ return offset;
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_wavelength.h b/intern/cycles/kernel/svm/svm_wavelength.h
index d6144802559..fba8aa63d31 100644
--- a/intern/cycles/kernel/svm/svm_wavelength.h
+++ b/intern/cycles/kernel/svm/svm_wavelength.h
@@ -69,8 +69,8 @@ ccl_static_constant float cie_colour_match[81][3] = {
{0.0002f, 0.0001f, 0.0000f}, {0.0002f, 0.0001f, 0.0000f}, {0.0001f, 0.0000f, 0.0000f},
{0.0001f, 0.0000f, 0.0000f}, {0.0001f, 0.0000f, 0.0000f}, {0.0000f, 0.0000f, 0.0000f}};
-ccl_device void svm_node_wavelength(
- KernelGlobals *kg, ShaderData *sd, float *stack, uint wavelength, uint color_out)
+ccl_device_noinline void svm_node_wavelength(
+ const KernelGlobals *kg, ShaderData *sd, float *stack, uint wavelength, uint color_out)
{
float lambda_nm = stack_load_float(stack, wavelength);
float ii = (lambda_nm - 380.0f) * (1.0f / 5.0f); // scaled 0..80
diff --git a/intern/cycles/kernel/svm/svm_white_noise.h b/intern/cycles/kernel/svm/svm_white_noise.h
index b30d85acaec..0306d2e7b9c 100644
--- a/intern/cycles/kernel/svm/svm_white_noise.h
+++ b/intern/cycles/kernel/svm/svm_white_noise.h
@@ -16,13 +16,12 @@
CCL_NAMESPACE_BEGIN
-ccl_device void svm_node_tex_white_noise(KernelGlobals *kg,
- ShaderData *sd,
- float *stack,
- uint dimensions,
- uint inputs_stack_offsets,
- uint ouptuts_stack_offsets,
- int *offset)
+ccl_device_noinline void svm_node_tex_white_noise(const KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint dimensions,
+ uint inputs_stack_offsets,
+ uint ouptuts_stack_offsets)
{
uint vector_stack_offset, w_stack_offset, value_stack_offset, color_stack_offset;
svm_unpack_node_uchar2(inputs_stack_offsets, &vector_stack_offset, &w_stack_offset);
diff --git a/intern/cycles/kernel/svm/svm_wireframe.h b/intern/cycles/kernel/svm/svm_wireframe.h
index 49158bd86d5..7ec913789d2 100644
--- a/intern/cycles/kernel/svm/svm_wireframe.h
+++ b/intern/cycles/kernel/svm/svm_wireframe.h
@@ -35,7 +35,7 @@ CCL_NAMESPACE_BEGIN
/* Wireframe Node */
ccl_device_inline float wireframe(
- KernelGlobals *kg, ShaderData *sd, float size, int pixel_size, float3 *P)
+ const KernelGlobals *kg, ShaderData *sd, float size, int pixel_size, float3 *P)
{
#ifdef __HAIR__
if (sd->prim != PRIM_NONE && sd->type & PRIMITIVE_ALL_TRIANGLE)
@@ -88,7 +88,10 @@ ccl_device_inline float wireframe(
return 0.0f;
}
-ccl_device void svm_node_wireframe(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
+ccl_device_noinline void svm_node_wireframe(const KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint4 node)
{
uint in_size = node.y;
uint out_fac = node.z;
@@ -100,18 +103,7 @@ ccl_device void svm_node_wireframe(KernelGlobals *kg, ShaderData *sd, float *sta
int pixel_size = (int)use_pixel_size;
/* Calculate wireframe */
-#ifdef __SPLIT_KERNEL__
- /* TODO(sergey): This is because sd is actually a global space,
- * which makes it difficult to re-use same wireframe() function.
- *
- * With OpenCL 2.0 it's possible to avoid this change, but for until
- * then we'll be living with such an exception.
- */
- float3 P = sd->P;
- float f = wireframe(kg, sd, size, pixel_size, &P);
-#else
float f = wireframe(kg, sd, size, pixel_size, &sd->P);
-#endif
/* TODO(sergey): Think of faster way to calculate derivatives. */
if (bump_offset == NODE_BUMP_OFFSET_DX) {