diff options
Diffstat (limited to 'intern/cycles/kernel/svm')
48 files changed, 861 insertions, 661 deletions
diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h index 000da1fa615..4aee1ef11b3 100644 --- a/intern/cycles/kernel/svm/svm.h +++ b/intern/cycles/kernel/svm/svm.h @@ -48,16 +48,18 @@ ccl_device_inline float3 stack_load_float3(float *stack, uint a) { kernel_assert(a + 2 < SVM_STACK_SIZE); - return make_float3(stack[a + 0], stack[a + 1], stack[a + 2]); + float *stack_a = stack + a; + return make_float3(stack_a[0], stack_a[1], stack_a[2]); } ccl_device_inline void stack_store_float3(float *stack, uint a, float3 f) { kernel_assert(a + 2 < SVM_STACK_SIZE); - stack[a + 0] = f.x; - stack[a + 1] = f.y; - stack[a + 2] = f.z; + float *stack_a = stack + a; + stack_a[0] = f.x; + stack_a[1] = f.y; + stack_a[2] = f.z; } ccl_device_inline float stack_load_float(float *stack, uint a) @@ -105,14 +107,14 @@ ccl_device_inline bool stack_valid(uint a) /* Reading Nodes */ -ccl_device_inline uint4 read_node(KernelGlobals *kg, int *offset) +ccl_device_inline uint4 read_node(const KernelGlobals *kg, int *offset) { uint4 node = kernel_tex_fetch(__svm_nodes, *offset); (*offset)++; return node; } -ccl_device_inline float4 read_node_float(KernelGlobals *kg, int *offset) +ccl_device_inline float4 read_node_float(const KernelGlobals *kg, int *offset) { uint4 node = kernel_tex_fetch(__svm_nodes, *offset); float4 f = make_float4(__uint_as_float(node.x), @@ -123,7 +125,7 @@ ccl_device_inline float4 read_node_float(KernelGlobals *kg, int *offset) return f; } -ccl_device_inline float4 fetch_node_float(KernelGlobals *kg, int offset) +ccl_device_inline float4 fetch_node_float(const KernelGlobals *kg, int offset) { uint4 node = kernel_tex_fetch(__svm_nodes, offset); return make_float4(__uint_as_float(node.x), @@ -217,26 +219,11 @@ CCL_NAMESPACE_END CCL_NAMESPACE_BEGIN /* Main Interpreter Loop */ -#if defined(__KERNEL_OPTIX__) && defined(__SHADER_RAYTRACE__) -ccl_device_inline void svm_eval_nodes(KernelGlobals *kg, - ShaderData *sd, - ccl_addr_space PathState *state, - ccl_global float *buffer, - ShaderType type, - int path_flag) -{ - optixDirectCall<void>(0, kg, sd, state, buffer, type, path_flag); -} -extern "C" __device__ void __direct_callable__svm_eval_nodes( -#else -ccl_device_noinline void svm_eval_nodes( -#endif - KernelGlobals *kg, - ShaderData *sd, - ccl_addr_space PathState *state, - ccl_global float *buffer, - ShaderType type, - int path_flag) +template<uint node_feature_mask, ShaderType type> +ccl_device void svm_eval_nodes(INTEGRATOR_STATE_CONST_ARGS, + ShaderData *sd, + ccl_global float *render_buffer, + int path_flag) { float stack[SVM_STACK_SIZE]; int offset = sd->shader & SHADER_MASK; @@ -247,7 +234,6 @@ ccl_device_noinline void svm_eval_nodes( switch (node.x) { case NODE_END: return; -#if NODES_GROUP(NODE_GROUP_LEVEL_0) case NODE_SHADER_JUMP: { if (type == SHADER_TYPE_SURFACE) offset = node.y; @@ -260,13 +246,18 @@ ccl_device_noinline void svm_eval_nodes( break; } case NODE_CLOSURE_BSDF: - svm_node_closure_bsdf(kg, sd, stack, node, type, path_flag, &offset); + offset = svm_node_closure_bsdf<node_feature_mask, type>( + kg, sd, stack, node, path_flag, offset); break; case NODE_CLOSURE_EMISSION: - svm_node_closure_emission(sd, stack, node); + if (KERNEL_NODES_FEATURE(EMISSION)) { + svm_node_closure_emission(sd, stack, node); + } break; case NODE_CLOSURE_BACKGROUND: - svm_node_closure_background(sd, stack, node); + if (KERNEL_NODES_FEATURE(EMISSION)) { + svm_node_closure_background(sd, stack, node); + } break; case NODE_CLOSURE_SET_WEIGHT: svm_node_closure_set_weight(sd, node.y, node.z, node.w); @@ -275,7 +266,9 @@ ccl_device_noinline void svm_eval_nodes( svm_node_closure_weight(sd, stack, node.y); break; case NODE_EMISSION_WEIGHT: - svm_node_emission_weight(kg, sd, stack, node); + if (KERNEL_NODES_FEATURE(EMISSION)) { + svm_node_emission_weight(kg, sd, stack, node); + } break; case NODE_MIX_CLOSURE: svm_node_mix_closure(sd, stack, node); @@ -295,86 +288,108 @@ ccl_device_noinline void svm_eval_nodes( svm_node_convert(kg, sd, stack, node.y, node.z, node.w); break; case NODE_TEX_COORD: - svm_node_tex_coord(kg, sd, path_flag, stack, node, &offset); + offset = svm_node_tex_coord(kg, sd, path_flag, stack, node, offset); break; case NODE_VALUE_F: svm_node_value_f(kg, sd, stack, node.y, node.z); break; case NODE_VALUE_V: - svm_node_value_v(kg, sd, stack, node.y, &offset); + offset = svm_node_value_v(kg, sd, stack, node.y, offset); break; case NODE_ATTR: - svm_node_attr(kg, sd, stack, node); + svm_node_attr<node_feature_mask>(kg, sd, stack, node); break; case NODE_VERTEX_COLOR: svm_node_vertex_color(kg, sd, stack, node.y, node.z, node.w); break; -# if NODES_FEATURE(NODE_FEATURE_BUMP) case NODE_GEOMETRY_BUMP_DX: - svm_node_geometry_bump_dx(kg, sd, stack, node.y, node.z); + if (KERNEL_NODES_FEATURE(BUMP)) { + svm_node_geometry_bump_dx(kg, sd, stack, node.y, node.z); + } break; case NODE_GEOMETRY_BUMP_DY: - svm_node_geometry_bump_dy(kg, sd, stack, node.y, node.z); + if (KERNEL_NODES_FEATURE(BUMP)) { + svm_node_geometry_bump_dy(kg, sd, stack, node.y, node.z); + } break; case NODE_SET_DISPLACEMENT: - svm_node_set_displacement(kg, sd, stack, node.y); + if (KERNEL_NODES_FEATURE(BUMP)) { + svm_node_set_displacement(kg, sd, stack, node.y); + } break; case NODE_DISPLACEMENT: - svm_node_displacement(kg, sd, stack, node); + if (KERNEL_NODES_FEATURE(BUMP)) { + svm_node_displacement(kg, sd, stack, node); + } break; case NODE_VECTOR_DISPLACEMENT: - svm_node_vector_displacement(kg, sd, stack, node, &offset); + if (KERNEL_NODES_FEATURE(BUMP)) { + offset = svm_node_vector_displacement(kg, sd, stack, node, offset); + } break; -# endif /* NODES_FEATURE(NODE_FEATURE_BUMP) */ case NODE_TEX_IMAGE: - svm_node_tex_image(kg, sd, stack, node, &offset); + offset = svm_node_tex_image(kg, sd, stack, node, offset); break; case NODE_TEX_IMAGE_BOX: svm_node_tex_image_box(kg, sd, stack, node); break; case NODE_TEX_NOISE: - svm_node_tex_noise(kg, sd, stack, node.y, node.z, node.w, &offset); + offset = svm_node_tex_noise(kg, sd, stack, node.y, node.z, node.w, offset); break; -# if NODES_FEATURE(NODE_FEATURE_BUMP) case NODE_SET_BUMP: - svm_node_set_bump(kg, sd, stack, node); + if (KERNEL_NODES_FEATURE(BUMP)) { + svm_node_set_bump(kg, sd, stack, node); + } break; case NODE_ATTR_BUMP_DX: - svm_node_attr_bump_dx(kg, sd, stack, node); + if (KERNEL_NODES_FEATURE(BUMP)) { + svm_node_attr_bump_dx(kg, sd, stack, node); + } break; case NODE_ATTR_BUMP_DY: - svm_node_attr_bump_dy(kg, sd, stack, node); + if (KERNEL_NODES_FEATURE(BUMP)) { + svm_node_attr_bump_dy(kg, sd, stack, node); + } break; case NODE_VERTEX_COLOR_BUMP_DX: - svm_node_vertex_color_bump_dx(kg, sd, stack, node.y, node.z, node.w); + if (KERNEL_NODES_FEATURE(BUMP)) { + svm_node_vertex_color_bump_dx(kg, sd, stack, node.y, node.z, node.w); + } break; case NODE_VERTEX_COLOR_BUMP_DY: - svm_node_vertex_color_bump_dy(kg, sd, stack, node.y, node.z, node.w); + if (KERNEL_NODES_FEATURE(BUMP)) { + svm_node_vertex_color_bump_dy(kg, sd, stack, node.y, node.z, node.w); + } break; case NODE_TEX_COORD_BUMP_DX: - svm_node_tex_coord_bump_dx(kg, sd, path_flag, stack, node, &offset); + if (KERNEL_NODES_FEATURE(BUMP)) { + offset = svm_node_tex_coord_bump_dx(kg, sd, path_flag, stack, node, offset); + } break; case NODE_TEX_COORD_BUMP_DY: - svm_node_tex_coord_bump_dy(kg, sd, path_flag, stack, node, &offset); + if (KERNEL_NODES_FEATURE(BUMP)) { + offset = svm_node_tex_coord_bump_dy(kg, sd, path_flag, stack, node, offset); + } break; case NODE_CLOSURE_SET_NORMAL: - svm_node_set_normal(kg, sd, stack, node.y, node.z); + if (KERNEL_NODES_FEATURE(BUMP)) { + svm_node_set_normal(kg, sd, stack, node.y, node.z); + } break; -# if NODES_FEATURE(NODE_FEATURE_BUMP_STATE) case NODE_ENTER_BUMP_EVAL: - svm_node_enter_bump_eval(kg, sd, stack, node.y); + if (KERNEL_NODES_FEATURE(BUMP_STATE)) { + svm_node_enter_bump_eval(kg, sd, stack, node.y); + } break; case NODE_LEAVE_BUMP_EVAL: - svm_node_leave_bump_eval(kg, sd, stack, node.y); + if (KERNEL_NODES_FEATURE(BUMP_STATE)) { + svm_node_leave_bump_eval(kg, sd, stack, node.y); + } break; -# endif /* NODES_FEATURE(NODE_FEATURE_BUMP_STATE) */ -# endif /* NODES_FEATURE(NODE_FEATURE_BUMP) */ case NODE_HSV: - svm_node_hsv(kg, sd, stack, node, &offset); + svm_node_hsv(kg, sd, stack, node); break; -#endif /* NODES_GROUP(NODE_GROUP_LEVEL_0) */ -#if NODES_GROUP(NODE_GROUP_LEVEL_1) case NODE_CLOSURE_HOLDOUT: svm_node_closure_holdout(sd, stack, node); break; @@ -384,22 +399,24 @@ ccl_device_noinline void svm_eval_nodes( case NODE_LAYER_WEIGHT: svm_node_layer_weight(sd, stack, node); break; -# if NODES_FEATURE(NODE_FEATURE_VOLUME) case NODE_CLOSURE_VOLUME: - svm_node_closure_volume(kg, sd, stack, node, type); + if (KERNEL_NODES_FEATURE(VOLUME)) { + svm_node_closure_volume<type>(kg, sd, stack, node); + } break; case NODE_PRINCIPLED_VOLUME: - svm_node_principled_volume(kg, sd, stack, node, type, path_flag, &offset); + if (KERNEL_NODES_FEATURE(VOLUME)) { + offset = svm_node_principled_volume<type>(kg, sd, stack, node, path_flag, offset); + } break; -# endif /* NODES_FEATURE(NODE_FEATURE_VOLUME) */ case NODE_MATH: - svm_node_math(kg, sd, stack, node.y, node.z, node.w, &offset); + svm_node_math(kg, sd, stack, node.y, node.z, node.w); break; case NODE_VECTOR_MATH: - svm_node_vector_math(kg, sd, stack, node.y, node.z, node.w, &offset); + offset = svm_node_vector_math(kg, sd, stack, node.y, node.z, node.w, offset); break; case NODE_RGB_RAMP: - svm_node_rgb_ramp(kg, sd, stack, node, &offset); + offset = svm_node_rgb_ramp(kg, sd, stack, node, offset); break; case NODE_GAMMA: svm_node_gamma(sd, stack, node.y, node.z, node.w); @@ -408,7 +425,7 @@ ccl_device_noinline void svm_eval_nodes( svm_node_brightness(sd, stack, node.y, node.z, node.w); break; case NODE_LIGHT_PATH: - svm_node_light_path(sd, state, stack, node.y, node.z, path_flag); + svm_node_light_path(INTEGRATOR_STATE_PASS, sd, stack, node.y, node.z, path_flag); break; case NODE_OBJECT_INFO: svm_node_object_info(kg, sd, stack, node.y, node.z); @@ -416,22 +433,22 @@ ccl_device_noinline void svm_eval_nodes( case NODE_PARTICLE_INFO: svm_node_particle_info(kg, sd, stack, node.y, node.z); break; -# if defined(__HAIR__) && NODES_FEATURE(NODE_FEATURE_HAIR) +#if defined(__HAIR__) case NODE_HAIR_INFO: - svm_node_hair_info(kg, sd, stack, node.y, node.z); + if (KERNEL_NODES_FEATURE(HAIR)) { + svm_node_hair_info(kg, sd, stack, node.y, node.z); + } break; -# endif /* NODES_FEATURE(NODE_FEATURE_HAIR) */ -#endif /* NODES_GROUP(NODE_GROUP_LEVEL_1) */ +#endif -#if NODES_GROUP(NODE_GROUP_LEVEL_2) case NODE_TEXTURE_MAPPING: - svm_node_texture_mapping(kg, sd, stack, node.y, node.z, &offset); + offset = svm_node_texture_mapping(kg, sd, stack, node.y, node.z, offset); break; case NODE_MAPPING: - svm_node_mapping(kg, sd, stack, node.y, node.z, node.w, &offset); + svm_node_mapping(kg, sd, stack, node.y, node.z, node.w); break; case NODE_MIN_MAX: - svm_node_min_max(kg, sd, stack, node.y, node.z, &offset); + offset = svm_node_min_max(kg, sd, stack, node.y, node.z, offset); break; case NODE_CAMERA: svm_node_camera(kg, sd, stack, node.y, node.z, node.w); @@ -440,47 +457,46 @@ ccl_device_noinline void svm_eval_nodes( svm_node_tex_environment(kg, sd, stack, node); break; case NODE_TEX_SKY: - svm_node_tex_sky(kg, sd, stack, node, &offset); + offset = svm_node_tex_sky(kg, sd, stack, node, offset); break; case NODE_TEX_GRADIENT: svm_node_tex_gradient(sd, stack, node); break; case NODE_TEX_VORONOI: - svm_node_tex_voronoi(kg, sd, stack, node.y, node.z, node.w, &offset); + offset = svm_node_tex_voronoi<node_feature_mask>( + kg, sd, stack, node.y, node.z, node.w, offset); break; case NODE_TEX_MUSGRAVE: - svm_node_tex_musgrave(kg, sd, stack, node.y, node.z, node.w, &offset); + offset = svm_node_tex_musgrave(kg, sd, stack, node.y, node.z, node.w, offset); break; case NODE_TEX_WAVE: - svm_node_tex_wave(kg, sd, stack, node, &offset); + offset = svm_node_tex_wave(kg, sd, stack, node, offset); break; case NODE_TEX_MAGIC: - svm_node_tex_magic(kg, sd, stack, node, &offset); + offset = svm_node_tex_magic(kg, sd, stack, node, offset); break; case NODE_TEX_CHECKER: svm_node_tex_checker(kg, sd, stack, node); break; case NODE_TEX_BRICK: - svm_node_tex_brick(kg, sd, stack, node, &offset); + offset = svm_node_tex_brick(kg, sd, stack, node, offset); break; case NODE_TEX_WHITE_NOISE: - svm_node_tex_white_noise(kg, sd, stack, node.y, node.z, node.w, &offset); + svm_node_tex_white_noise(kg, sd, stack, node.y, node.z, node.w); break; case NODE_NORMAL: - svm_node_normal(kg, sd, stack, node.y, node.z, node.w, &offset); + offset = svm_node_normal(kg, sd, stack, node.y, node.z, node.w, offset); break; case NODE_LIGHT_FALLOFF: svm_node_light_falloff(sd, stack, node); break; case NODE_IES: - svm_node_ies(kg, sd, stack, node, &offset); + svm_node_ies(kg, sd, stack, node); break; -#endif /* NODES_GROUP(NODE_GROUP_LEVEL_2) */ -#if NODES_GROUP(NODE_GROUP_LEVEL_3) case NODE_RGB_CURVES: case NODE_VECTOR_CURVES: - svm_node_curves(kg, sd, stack, node, &offset); + offset = svm_node_curves(kg, sd, stack, node, offset); break; case NODE_TANGENT: svm_node_tangent(kg, sd, stack, node); @@ -492,7 +508,7 @@ ccl_device_noinline void svm_eval_nodes( svm_node_invert(sd, stack, node.y, node.z, node.w); break; case NODE_MIX: - svm_node_mix(kg, sd, stack, node.y, node.z, node.w, &offset); + offset = svm_node_mix(kg, sd, stack, node.y, node.z, node.w, offset); break; case NODE_SEPARATE_VECTOR: svm_node_separate_vector(sd, stack, node.y, node.z, node.w); @@ -501,10 +517,10 @@ ccl_device_noinline void svm_eval_nodes( svm_node_combine_vector(sd, stack, node.y, node.z, node.w); break; case NODE_SEPARATE_HSV: - svm_node_separate_hsv(kg, sd, stack, node.y, node.z, node.w, &offset); + offset = svm_node_separate_hsv(kg, sd, stack, node.y, node.z, node.w, offset); break; case NODE_COMBINE_HSV: - svm_node_combine_hsv(kg, sd, stack, node.y, node.z, node.w, &offset); + offset = svm_node_combine_hsv(kg, sd, stack, node.y, node.z, node.w, offset); break; case NODE_VECTOR_ROTATE: svm_node_vector_rotate(sd, stack, node.y, node.z, node.w); @@ -522,39 +538,36 @@ ccl_device_noinline void svm_eval_nodes( svm_node_blackbody(kg, sd, stack, node.y, node.z); break; case NODE_MAP_RANGE: - svm_node_map_range(kg, sd, stack, node.y, node.z, node.w, &offset); + offset = svm_node_map_range(kg, sd, stack, node.y, node.z, node.w, offset); break; case NODE_CLAMP: - svm_node_clamp(kg, sd, stack, node.y, node.z, node.w, &offset); + offset = svm_node_clamp(kg, sd, stack, node.y, node.z, node.w, offset); break; -# ifdef __SHADER_RAYTRACE__ +#ifdef __SHADER_RAYTRACE__ case NODE_BEVEL: - svm_node_bevel(kg, sd, state, stack, node); + svm_node_bevel<node_feature_mask>(INTEGRATOR_STATE_PASS, sd, stack, node); break; case NODE_AMBIENT_OCCLUSION: - svm_node_ao(kg, sd, state, stack, node); + svm_node_ao<node_feature_mask>(INTEGRATOR_STATE_PASS, sd, stack, node); break; -# endif /* __SHADER_RAYTRACE__ */ -#endif /* NODES_GROUP(NODE_GROUP_LEVEL_3) */ +#endif -#if NODES_GROUP(NODE_GROUP_LEVEL_4) -# if NODES_FEATURE(NODE_FEATURE_VOLUME) case NODE_TEX_VOXEL: - svm_node_tex_voxel(kg, sd, stack, node, &offset); + if (KERNEL_NODES_FEATURE(VOLUME)) { + offset = svm_node_tex_voxel(kg, sd, stack, node, offset); + } break; -# endif /* NODES_FEATURE(NODE_FEATURE_VOLUME) */ case NODE_AOV_START: - if (!svm_node_aov_check(state, buffer)) { + if (!svm_node_aov_check(path_flag, render_buffer)) { return; } break; case NODE_AOV_COLOR: - svm_node_aov_color(kg, sd, stack, node, buffer); + svm_node_aov_color(INTEGRATOR_STATE_PASS, sd, stack, node, render_buffer); break; case NODE_AOV_VALUE: - svm_node_aov_value(kg, sd, stack, node, buffer); + svm_node_aov_value(INTEGRATOR_STATE_PASS, sd, stack, node, render_buffer); break; -#endif /* NODES_GROUP(NODE_GROUP_LEVEL_4) */ default: kernel_assert(!"Unknown node type was passed to the SVM machine"); return; diff --git a/intern/cycles/kernel/svm/svm_ao.h b/intern/cycles/kernel/svm/svm_ao.h index 4cb986b897a..34ac2cb8fbf 100644 --- a/intern/cycles/kernel/svm/svm_ao.h +++ b/intern/cycles/kernel/svm/svm_ao.h @@ -14,20 +14,25 @@ * limitations under the License. */ +#include "kernel/bvh/bvh.h" + CCL_NAMESPACE_BEGIN #ifdef __SHADER_RAYTRACE__ -ccl_device_noinline float svm_ao(KernelGlobals *kg, - ShaderData *sd, - float3 N, - ccl_addr_space PathState *state, - float max_dist, - int num_samples, - int flags) +# ifdef __KERNEL_OPTIX__ +extern "C" __device__ float __direct_callable__svm_node_ao(INTEGRATOR_STATE_CONST_ARGS, +# else +ccl_device float svm_ao(INTEGRATOR_STATE_CONST_ARGS, +# endif + ShaderData *sd, + float3 N, + float max_dist, + int num_samples, + int flags) { if (flags & NODE_AO_GLOBAL_RADIUS) { - max_dist = kernel_data.background.ao_distance; + max_dist = kernel_data.integrator.ao_bounces_distance; } /* Early out if no sampling needed. */ @@ -47,11 +52,14 @@ ccl_device_noinline float svm_ao(KernelGlobals *kg, float3 T, B; make_orthonormals(N, &T, &B); + /* TODO: support ray-tracing in shadow shader evaluation? */ + RNGState rng_state; + path_state_rng_load(INTEGRATOR_STATE_PASS, &rng_state); + int unoccluded = 0; for (int sample = 0; sample < num_samples; sample++) { float disk_u, disk_v; - path_branched_rng_2D( - kg, state->rng_hash, state, sample, num_samples, PRNG_BEVEL_U, &disk_u, &disk_v); + path_branched_rng_2D(kg, &rng_state, sample, num_samples, PRNG_BEVEL_U, &disk_u, &disk_v); float2 d = concentric_sample_disk(disk_u, disk_v); float3 D = make_float3(d.x, d.y, safe_sqrtf(1.0f - dot(d, d))); @@ -62,8 +70,8 @@ ccl_device_noinline float svm_ao(KernelGlobals *kg, ray.D = D.x * T + D.y * B + D.z * N; ray.t = max_dist; ray.time = sd->time; - ray.dP = sd->dP; - ray.dD = differential3_zero(); + ray.dP = differential_zero_compact(); + ray.dD = differential_zero_compact(); if (flags & NODE_AO_ONLY_LOCAL) { if (!scene_intersect_local(kg, &ray, NULL, sd->object, NULL, 0)) { @@ -81,8 +89,14 @@ ccl_device_noinline float svm_ao(KernelGlobals *kg, return ((float)unoccluded) / num_samples; } -ccl_device void svm_node_ao( - KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state, float *stack, uint4 node) +template<uint node_feature_mask> +# if defined(__KERNEL_OPTIX__) +ccl_device_inline +# else +ccl_device_noinline +# endif + void + svm_node_ao(INTEGRATOR_STATE_CONST_ARGS, ShaderData *sd, float *stack, uint4 node) { uint flags, dist_offset, normal_offset, out_ao_offset; svm_unpack_node_uchar4(node.y, &flags, &dist_offset, &normal_offset, &out_ao_offset); @@ -92,7 +106,16 @@ ccl_device void svm_node_ao( float dist = stack_load_float_default(stack, dist_offset, node.w); float3 normal = stack_valid(normal_offset) ? stack_load_float3(stack, normal_offset) : sd->N; - float ao = svm_ao(kg, sd, normal, state, dist, samples, flags); + + float ao = 1.0f; + + if (KERNEL_NODES_FEATURE(RAYTRACE)) { +# ifdef __KERNEL_OPTIX__ + ao = optixDirectCall<float>(0, INTEGRATOR_STATE_PASS, sd, normal, dist, samples, flags); +# else + ao = svm_ao(INTEGRATOR_STATE_PASS, sd, normal, dist, samples, flags); +# endif + } if (stack_valid(out_ao_offset)) { stack_store_float(stack, out_ao_offset, ao); diff --git a/intern/cycles/kernel/svm/svm_aov.h b/intern/cycles/kernel/svm/svm_aov.h index 899e466d099..26dec9717b3 100644 --- a/intern/cycles/kernel/svm/svm_aov.h +++ b/intern/cycles/kernel/svm/svm_aov.h @@ -14,36 +14,50 @@ * limitations under the License. */ +#include "kernel/kernel_write_passes.h" + CCL_NAMESPACE_BEGIN -ccl_device_inline bool svm_node_aov_check(ccl_addr_space PathState *state, - ccl_global float *buffer) +ccl_device_inline bool svm_node_aov_check(const int path_flag, ccl_global float *render_buffer) { - int path_flag = state->flag; - bool is_primary = (path_flag & PATH_RAY_CAMERA) && (!(path_flag & PATH_RAY_SINGLE_PASS_DONE)); - return ((buffer != NULL) && is_primary); + return ((render_buffer != NULL) && is_primary); } -ccl_device void svm_node_aov_color( - KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, ccl_global float *buffer) +ccl_device void svm_node_aov_color(INTEGRATOR_STATE_CONST_ARGS, + ShaderData *sd, + float *stack, + uint4 node, + ccl_global float *render_buffer) { float3 val = stack_load_float3(stack, node.y); - if (buffer) { - kernel_write_pass_float4(buffer + kernel_data.film.pass_aov_color + 4 * node.z, - make_float4(val.x, val.y, val.z, 1.0f)); + if (render_buffer && !INTEGRATOR_STATE_IS_NULL) { + const uint32_t render_pixel_index = INTEGRATOR_STATE(path, render_pixel_index); + const uint64_t render_buffer_offset = (uint64_t)render_pixel_index * + kernel_data.film.pass_stride; + ccl_global float *buffer = render_buffer + render_buffer_offset + + (kernel_data.film.pass_aov_color + node.z); + kernel_write_pass_float3(buffer, make_float3(val.x, val.y, val.z)); } } -ccl_device void svm_node_aov_value( - KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, ccl_global float *buffer) +ccl_device void svm_node_aov_value(INTEGRATOR_STATE_CONST_ARGS, + ShaderData *sd, + float *stack, + uint4 node, + ccl_global float *render_buffer) { float val = stack_load_float(stack, node.y); - if (buffer) { - kernel_write_pass_float(buffer + kernel_data.film.pass_aov_value + node.z, val); + if (render_buffer && !INTEGRATOR_STATE_IS_NULL) { + const uint32_t render_pixel_index = INTEGRATOR_STATE(path, render_pixel_index); + const uint64_t render_buffer_offset = (uint64_t)render_pixel_index * + kernel_data.film.pass_stride; + ccl_global float *buffer = render_buffer + render_buffer_offset + + (kernel_data.film.pass_aov_value + node.z); + kernel_write_pass_float(buffer, val); } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_attribute.h b/intern/cycles/kernel/svm/svm_attribute.h index 62740824ad1..5f94b20af73 100644 --- a/intern/cycles/kernel/svm/svm_attribute.h +++ b/intern/cycles/kernel/svm/svm_attribute.h @@ -18,8 +18,11 @@ CCL_NAMESPACE_BEGIN /* Attribute Node */ -ccl_device AttributeDescriptor svm_node_attr_init( - KernelGlobals *kg, ShaderData *sd, uint4 node, NodeAttributeOutputType *type, uint *out_offset) +ccl_device AttributeDescriptor svm_node_attr_init(const KernelGlobals *kg, + ShaderData *sd, + uint4 node, + NodeAttributeOutputType *type, + uint *out_offset) { *out_offset = node.z; *type = (NodeAttributeOutputType)node.w; @@ -44,31 +47,37 @@ ccl_device AttributeDescriptor svm_node_attr_init( return desc; } -ccl_device void svm_node_attr(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) +template<uint node_feature_mask> +ccl_device_noinline void svm_node_attr(const KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint4 node) { NodeAttributeOutputType type = NODE_ATTR_OUTPUT_FLOAT; uint out_offset = 0; AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset); #ifdef __VOLUME__ - /* Volumes - * NOTE: moving this into its own node type might help improve performance. */ - if (primitive_is_volume_attribute(sd, desc)) { - const float4 value = volume_attribute_float4(kg, sd, desc); + if (KERNEL_NODES_FEATURE(VOLUME)) { + /* Volumes + * NOTE: moving this into its own node type might help improve performance. */ + if (primitive_is_volume_attribute(sd, desc)) { + const float4 value = volume_attribute_float4(kg, sd, desc); - if (type == NODE_ATTR_OUTPUT_FLOAT) { - const float f = volume_attribute_value_to_float(value); - stack_store_float(stack, out_offset, f); - } - else if (type == NODE_ATTR_OUTPUT_FLOAT3) { - const float3 f = volume_attribute_value_to_float3(value); - stack_store_float3(stack, out_offset, f); + if (type == NODE_ATTR_OUTPUT_FLOAT) { + const float f = volume_attribute_value_to_float(value); + stack_store_float(stack, out_offset, f); + } + else if (type == NODE_ATTR_OUTPUT_FLOAT3) { + const float3 f = volume_attribute_value_to_float3(value); + stack_store_float3(stack, out_offset, f); + } + else { + const float f = volume_attribute_value_to_alpha(value); + stack_store_float(stack, out_offset, f); + } + return; } - else { - const float f = volume_attribute_value_to_alpha(value); - stack_store_float(stack, out_offset, f); - } - return; } #endif @@ -139,7 +148,10 @@ ccl_device void svm_node_attr(KernelGlobals *kg, ShaderData *sd, float *stack, u } } -ccl_device void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) +ccl_device_noinline void svm_node_attr_bump_dx(const KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint4 node) { NodeAttributeOutputType type = NODE_ATTR_OUTPUT_FLOAT; uint out_offset = 0; @@ -232,7 +244,10 @@ ccl_device void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float * } } -ccl_device void svm_node_attr_bump_dy(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) +ccl_device_noinline void svm_node_attr_bump_dy(const KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint4 node) { NodeAttributeOutputType type = NODE_ATTR_OUTPUT_FLOAT; uint out_offset = 0; diff --git a/intern/cycles/kernel/svm/svm_bevel.h b/intern/cycles/kernel/svm/svm_bevel.h index bf5957ec9e4..9d7ce202d49 100644 --- a/intern/cycles/kernel/svm/svm_bevel.h +++ b/intern/cycles/kernel/svm/svm_bevel.h @@ -14,21 +14,95 @@ * limitations under the License. */ +#include "kernel/bvh/bvh.h" +#include "kernel/kernel_montecarlo.h" +#include "kernel/kernel_random.h" + CCL_NAMESPACE_BEGIN #ifdef __SHADER_RAYTRACE__ +/* Planar Cubic BSSRDF falloff, reused for bevel. + * + * This is basically (Rm - x)^3, with some factors to normalize it. For sampling + * we integrate 2*pi*x * (Rm - x)^3, which gives us a quintic equation that as + * far as I can tell has no closed form solution. So we get an iterative solution + * instead with newton-raphson. */ + +ccl_device float svm_bevel_cubic_eval(const float radius, float r) +{ + const float Rm = radius; + + if (r >= Rm) + return 0.0f; + + /* integrate (2*pi*r * 10*(R - r)^3)/(pi * R^5) from 0 to R = 1 */ + const float Rm5 = (Rm * Rm) * (Rm * Rm) * Rm; + const float f = Rm - r; + const float num = f * f * f; + + return (10.0f * num) / (Rm5 * M_PI_F); +} + +ccl_device float svm_bevel_cubic_pdf(const float radius, float r) +{ + return svm_bevel_cubic_eval(radius, r); +} + +/* solve 10x^2 - 20x^3 + 15x^4 - 4x^5 - xi == 0 */ +ccl_device_forceinline float svm_bevel_cubic_quintic_root_find(float xi) +{ + /* newton-raphson iteration, usually succeeds in 2-4 iterations, except + * outside 0.02 ... 0.98 where it can go up to 10, so overall performance + * should not be too bad */ + const float tolerance = 1e-6f; + const int max_iteration_count = 10; + float x = 0.25f; + int i; + + for (i = 0; i < max_iteration_count; i++) { + float x2 = x * x; + float x3 = x2 * x; + float nx = (1.0f - x); + + float f = 10.0f * x2 - 20.0f * x3 + 15.0f * x2 * x2 - 4.0f * x2 * x3 - xi; + float f_ = 20.0f * (x * nx) * (nx * nx); + + if (fabsf(f) < tolerance || f_ == 0.0f) + break; + + x = saturate(x - f / f_); + } + + return x; +} + +ccl_device void svm_bevel_cubic_sample(const float radius, float xi, float *r, float *h) +{ + float Rm = radius; + float r_ = svm_bevel_cubic_quintic_root_find(xi); + + r_ *= Rm; + *r = r_; + + /* h^2 + r^2 = Rm^2 */ + *h = safe_sqrtf(Rm * Rm - r_ * r_); +} + /* Bevel shader averaging normals from nearby surfaces. * * Sampling strategy from: BSSRDF Importance Sampling, SIGGRAPH 2013 * http://library.imageworks.com/pdfs/imageworks-library-BSSRDF-sampling.pdf */ -ccl_device_noinline float3 svm_bevel(KernelGlobals *kg, - ShaderData *sd, - ccl_addr_space PathState *state, - float radius, - int num_samples) +# ifdef __KERNEL_OPTIX__ +extern "C" __device__ float3 __direct_callable__svm_node_bevel(INTEGRATOR_STATE_CONST_ARGS, +# else +ccl_device float3 svm_bevel(INTEGRATOR_STATE_CONST_ARGS, +# endif + ShaderData *sd, + float radius, + int num_samples) { /* Early out if no sampling needed. */ if (radius <= 0.0f || num_samples < 1 || sd->object == OBJECT_NONE) { @@ -41,21 +115,27 @@ ccl_device_noinline float3 svm_bevel(KernelGlobals *kg, } /* Don't bevel for blurry indirect rays. */ - if (state->min_ray_pdf < 8.0f) { + if (INTEGRATOR_STATE(path, min_ray_pdf) < 8.0f) { return sd->N; } /* Setup for multi intersection. */ LocalIntersection isect; - uint lcg_state = lcg_state_init_addrspace(state, 0x64c6a40e); + uint lcg_state = lcg_state_init(INTEGRATOR_STATE(path, rng_hash), + INTEGRATOR_STATE(path, rng_offset), + INTEGRATOR_STATE(path, sample), + 0x64c6a40e); /* Sample normals from surrounding points on surface. */ float3 sum_N = make_float3(0.0f, 0.0f, 0.0f); + /* TODO: support ray-tracing in shadow shader evaluation? */ + RNGState rng_state; + path_state_rng_load(INTEGRATOR_STATE_PASS, &rng_state); + for (int sample = 0; sample < num_samples; sample++) { float disk_u, disk_v; - path_branched_rng_2D( - kg, state->rng_hash, state, sample, num_samples, PRNG_BEVEL_U, &disk_u, &disk_v); + path_branched_rng_2D(kg, &rng_state, sample, num_samples, PRNG_BEVEL_U, &disk_u, &disk_v); /* Pick random axis in local frame and point on disk. */ float3 disk_N, disk_T, disk_B; @@ -97,7 +177,7 @@ ccl_device_noinline float3 svm_bevel(KernelGlobals *kg, float disk_height; /* Perhaps find something better than Cubic BSSRDF, but happens to work well. */ - bssrdf_cubic_sample(radius, 0.0f, disk_r, &disk_r, &disk_height); + svm_bevel_cubic_sample(radius, disk_r, &disk_r, &disk_height); float3 disk_P = (disk_r * cosf(phi)) * disk_T + (disk_r * sinf(phi)) * disk_B; @@ -106,8 +186,8 @@ ccl_device_noinline float3 svm_bevel(KernelGlobals *kg, ray->P = sd->P + disk_N * disk_height + disk_P; ray->D = -disk_N; ray->t = 2.0f * disk_height; - ray->dP = sd->dP; - ray->dD = differential3_zero(); + ray->dP = differential_zero_compact(); + ray->dD = differential_zero_compact(); ray->time = sd->time; /* Intersect with the same object. if multiple intersections are found it @@ -120,14 +200,16 @@ ccl_device_noinline float3 svm_bevel(KernelGlobals *kg, /* Quickly retrieve P and Ng without setting up ShaderData. */ float3 hit_P; if (sd->type & PRIMITIVE_TRIANGLE) { - hit_P = triangle_refine_local(kg, sd, &isect.hits[hit], ray); + hit_P = triangle_refine_local( + kg, sd, ray->P, ray->D, ray->t, isect.hits[hit].object, isect.hits[hit].prim); } # ifdef __OBJECT_MOTION__ else if (sd->type & PRIMITIVE_MOTION_TRIANGLE) { float3 verts[3]; motion_triangle_vertices( kg, sd->object, kernel_tex_fetch(__prim_index, isect.hits[hit].prim), sd->time, verts); - hit_P = motion_triangle_refine_local(kg, sd, &isect.hits[hit], ray, verts); + hit_P = motion_triangle_refine_local( + kg, sd, ray->P, ray->D, ray->t, isect.hits[hit].object, isect.hits[hit].prim, verts); } # endif /* __OBJECT_MOTION__ */ @@ -173,7 +255,7 @@ ccl_device_noinline float3 svm_bevel(KernelGlobals *kg, /* Multiple importance sample between 3 axes, power heuristic * found to be slightly better than balance heuristic. pdf_N - * in the MIS weight and denominator cancelled out. */ + * in the MIS weight and denominator canceled out. */ float w = pdf_N / (sqr(pdf_N) + sqr(pdf_T) + sqr(pdf_B)); if (isect.num_hits > LOCAL_MAX_HITS) { w *= isect.num_hits / (float)LOCAL_MAX_HITS; @@ -183,8 +265,8 @@ ccl_device_noinline float3 svm_bevel(KernelGlobals *kg, float r = len(hit_P - sd->P); /* Compute weight. */ - float pdf = bssrdf_cubic_pdf(radius, 0.0f, r); - float disk_pdf = bssrdf_cubic_pdf(radius, 0.0f, disk_r); + float pdf = svm_bevel_cubic_pdf(radius, r); + float disk_pdf = svm_bevel_cubic_pdf(radius, disk_r); w *= pdf / disk_pdf; @@ -198,19 +280,34 @@ ccl_device_noinline float3 svm_bevel(KernelGlobals *kg, return is_zero(N) ? sd->N : (sd->flag & SD_BACKFACING) ? -N : N; } -ccl_device void svm_node_bevel( - KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state, float *stack, uint4 node) +template<uint node_feature_mask> +# if defined(__KERNEL_OPTIX__) +ccl_device_inline +# else +ccl_device_noinline +# endif + void + svm_node_bevel(INTEGRATOR_STATE_CONST_ARGS, ShaderData *sd, float *stack, uint4 node) { uint num_samples, radius_offset, normal_offset, out_offset; svm_unpack_node_uchar4(node.y, &num_samples, &radius_offset, &normal_offset, &out_offset); float radius = stack_load_float(stack, radius_offset); - float3 bevel_N = svm_bevel(kg, sd, state, radius, num_samples); - if (stack_valid(normal_offset)) { - /* Preserve input normal. */ - float3 ref_N = stack_load_float3(stack, normal_offset); - bevel_N = normalize(ref_N + (bevel_N - sd->N)); + float3 bevel_N = sd->N; + + if (KERNEL_NODES_FEATURE(RAYTRACE)) { +# ifdef __KERNEL_OPTIX__ + bevel_N = optixDirectCall<float3>(1, INTEGRATOR_STATE_PASS, sd, radius, num_samples); +# else + bevel_N = svm_bevel(INTEGRATOR_STATE_PASS, sd, radius, num_samples); +# endif + + if (stack_valid(normal_offset)) { + /* Preserve input normal. */ + float3 ref_N = stack_load_float3(stack, normal_offset); + bevel_N = normalize(ref_N + (bevel_N - sd->N)); + } } stack_store_float3(stack, out_offset, bevel_N); diff --git a/intern/cycles/kernel/svm/svm_blackbody.h b/intern/cycles/kernel/svm/svm_blackbody.h index adfc50d961e..96b3703b954 100644 --- a/intern/cycles/kernel/svm/svm_blackbody.h +++ b/intern/cycles/kernel/svm/svm_blackbody.h @@ -34,8 +34,11 @@ CCL_NAMESPACE_BEGIN /* Blackbody Node */ -ccl_device void svm_node_blackbody( - KernelGlobals *kg, ShaderData *sd, float *stack, uint temperature_offset, uint col_offset) +ccl_device_noinline void svm_node_blackbody(const KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint temperature_offset, + uint col_offset) { /* Input */ float temperature = stack_load_float(stack, temperature_offset); diff --git a/intern/cycles/kernel/svm/svm_brick.h b/intern/cycles/kernel/svm/svm_brick.h index 6984afa30a5..dca1b220dd5 100644 --- a/intern/cycles/kernel/svm/svm_brick.h +++ b/intern/cycles/kernel/svm/svm_brick.h @@ -72,12 +72,12 @@ ccl_device_noinline_cpu float2 svm_brick(float3 p, return make_float2(tint, mortar); } -ccl_device void svm_node_tex_brick( - KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) +ccl_device_noinline int svm_node_tex_brick( + const KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int offset) { - uint4 node2 = read_node(kg, offset); - uint4 node3 = read_node(kg, offset); - uint4 node4 = read_node(kg, offset); + uint4 node2 = read_node(kg, &offset); + uint4 node3 = read_node(kg, &offset); + uint4 node4 = read_node(kg, &offset); /* Input and Output Sockets */ uint co_offset, color1_offset, color2_offset, mortar_offset, scale_offset; @@ -133,6 +133,7 @@ ccl_device void svm_node_tex_brick( stack_store_float3(stack, color_offset, color1 * (1.0f - f) + mortar * f); if (stack_valid(fac_offset)) stack_store_float(stack, fac_offset, f); + return offset; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_brightness.h b/intern/cycles/kernel/svm/svm_brightness.h index 9554b5946fb..2ed812acd71 100644 --- a/intern/cycles/kernel/svm/svm_brightness.h +++ b/intern/cycles/kernel/svm/svm_brightness.h @@ -16,7 +16,7 @@ CCL_NAMESPACE_BEGIN -ccl_device void svm_node_brightness( +ccl_device_noinline void svm_node_brightness( ShaderData *sd, float *stack, uint in_color, uint out_color, uint node) { uint bright_offset, contrast_offset; diff --git a/intern/cycles/kernel/svm/svm_bump.h b/intern/cycles/kernel/svm/svm_bump.h index c9d430a2bba..8672839dbab 100644 --- a/intern/cycles/kernel/svm/svm_bump.h +++ b/intern/cycles/kernel/svm/svm_bump.h @@ -18,10 +18,10 @@ CCL_NAMESPACE_BEGIN /* Bump Eval Nodes */ -ccl_device void svm_node_enter_bump_eval(KernelGlobals *kg, - ShaderData *sd, - float *stack, - uint offset) +ccl_device_noinline void svm_node_enter_bump_eval(const KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint offset) { /* save state */ stack_store_float3(stack, offset + 0, sd->P); @@ -45,10 +45,10 @@ ccl_device void svm_node_enter_bump_eval(KernelGlobals *kg, } } -ccl_device void svm_node_leave_bump_eval(KernelGlobals *kg, - ShaderData *sd, - float *stack, - uint offset) +ccl_device_noinline void svm_node_leave_bump_eval(const KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint offset) { /* restore state */ sd->P = stack_load_float3(stack, offset + 0); diff --git a/intern/cycles/kernel/svm/svm_camera.h b/intern/cycles/kernel/svm/svm_camera.h index 21a17acf5f1..40c0edcdad0 100644 --- a/intern/cycles/kernel/svm/svm_camera.h +++ b/intern/cycles/kernel/svm/svm_camera.h @@ -16,12 +16,12 @@ CCL_NAMESPACE_BEGIN -ccl_device void svm_node_camera(KernelGlobals *kg, - ShaderData *sd, - float *stack, - uint out_vector, - uint out_zdepth, - uint out_distance) +ccl_device_noinline void svm_node_camera(const KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint out_vector, + uint out_zdepth, + uint out_distance) { float distance; float zdepth; diff --git a/intern/cycles/kernel/svm/svm_checker.h b/intern/cycles/kernel/svm/svm_checker.h index d54cb73df91..a9919c9ddc9 100644 --- a/intern/cycles/kernel/svm/svm_checker.h +++ b/intern/cycles/kernel/svm/svm_checker.h @@ -32,7 +32,10 @@ ccl_device float svm_checker(float3 p) return ((xi % 2 == yi % 2) == (zi % 2)) ? 1.0f : 0.0f; } -ccl_device void svm_node_tex_checker(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) +ccl_device_noinline void svm_node_tex_checker(const KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint4 node) { uint co_offset, color1_offset, color2_offset, scale_offset; uint color_offset, fac_offset; diff --git a/intern/cycles/kernel/svm/svm_clamp.h b/intern/cycles/kernel/svm/svm_clamp.h index a85fd82754e..656bd31c085 100644 --- a/intern/cycles/kernel/svm/svm_clamp.h +++ b/intern/cycles/kernel/svm/svm_clamp.h @@ -18,18 +18,18 @@ CCL_NAMESPACE_BEGIN /* Clamp Node */ -ccl_device void svm_node_clamp(KernelGlobals *kg, - ShaderData *sd, - float *stack, - uint value_stack_offset, - uint parameters_stack_offsets, - uint result_stack_offset, - int *offset) +ccl_device_noinline int svm_node_clamp(const KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint value_stack_offset, + uint parameters_stack_offsets, + uint result_stack_offset, + int offset) { uint min_stack_offset, max_stack_offset, type; svm_unpack_node_uchar3(parameters_stack_offsets, &min_stack_offset, &max_stack_offset, &type); - uint4 defaults = read_node(kg, offset); + uint4 defaults = read_node(kg, &offset); float value = stack_load_float(stack, value_stack_offset); float min = stack_load_float_default(stack, min_stack_offset, defaults.x); @@ -41,6 +41,7 @@ ccl_device void svm_node_clamp(KernelGlobals *kg, else { stack_store_float(stack, result_stack_offset, clamp(value, min, max)); } + return offset; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_closure.h b/intern/cycles/kernel/svm/svm_closure.h index bbe8d72edf0..e2f6dde4ace 100644 --- a/intern/cycles/kernel/svm/svm_closure.h +++ b/intern/cycles/kernel/svm/svm_closure.h @@ -57,13 +57,9 @@ ccl_device void svm_node_glass_setup( } } -ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, - ShaderData *sd, - float *stack, - uint4 node, - ShaderType shader_type, - int path_flag, - int *offset) +template<uint node_feature_mask, ShaderType shader_type> +ccl_device_noinline int svm_node_closure_bsdf( + const KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int path_flag, int offset) { uint type, param1_offset, param2_offset; @@ -73,19 +69,19 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, 1.0f); /* note we read this extra node before weight check, so offset is added */ - uint4 data_node = read_node(kg, offset); + uint4 data_node = read_node(kg, &offset); /* Only compute BSDF for surfaces, transparent variable is shared with volume extinction. */ - if (mix_weight == 0.0f || shader_type != SHADER_TYPE_SURFACE) { + if ((!KERNEL_NODES_FEATURE(BSDF) || shader_type != SHADER_TYPE_SURFACE) || mix_weight == 0.0f) { if (type == CLOSURE_BSDF_PRINCIPLED_ID) { /* Read all principled BSDF extra data to get the right offset. */ - read_node(kg, offset); - read_node(kg, offset); - read_node(kg, offset); - read_node(kg, offset); + read_node(kg, &offset); + read_node(kg, &offset); + read_node(kg, &offset); + read_node(kg, &offset); } - return; + return offset; } float3 N = stack_valid(data_node.x) ? stack_load_float3(stack, data_node.x) : sd->N; @@ -102,7 +98,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, sheen_offset, sheen_tint_offset, clearcoat_offset, clearcoat_roughness_offset, eta_offset, transmission_offset, anisotropic_rotation_offset, transmission_roughness_offset; - uint4 data_node2 = read_node(kg, offset); + uint4 data_node2 = read_node(kg, &offset); float3 T = stack_load_float3(stack, data_node.y); svm_unpack_node_uchar4(data_node.z, @@ -158,7 +154,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, float specular_weight = (1.0f - final_transmission); // get the base color - uint4 data_base_color = read_node(kg, offset); + uint4 data_base_color = read_node(kg, &offset); float3 base_color = stack_valid(data_base_color.x) ? stack_load_float3(stack, data_base_color.x) : make_float3(__uint_as_float(data_base_color.y), @@ -166,16 +162,21 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, __uint_as_float(data_base_color.w)); // get the additional clearcoat normal and subsurface scattering radius - uint4 data_cn_ssr = read_node(kg, offset); + uint4 data_cn_ssr = read_node(kg, &offset); float3 clearcoat_normal = stack_valid(data_cn_ssr.x) ? stack_load_float3(stack, data_cn_ssr.x) : sd->N; float3 subsurface_radius = stack_valid(data_cn_ssr.y) ? stack_load_float3(stack, data_cn_ssr.y) : make_float3(1.0f, 1.0f, 1.0f); + float subsurface_ior = stack_valid(data_cn_ssr.z) ? stack_load_float(stack, data_cn_ssr.z) : + 1.4f; + float subsurface_anisotropy = stack_valid(data_cn_ssr.w) ? + stack_load_float(stack, data_cn_ssr.w) : + 0.0f; // get the subsurface color - uint4 data_subsurface_color = read_node(kg, offset); + uint4 data_subsurface_color = read_node(kg, &offset); float3 subsurface_color = stack_valid(data_subsurface_color.x) ? stack_load_float3(stack, data_subsurface_color.x) : make_float3(__uint_as_float(data_subsurface_color.y), @@ -222,16 +223,16 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, if (bssrdf) { bssrdf->radius = subsurface_radius * subsurface; - bssrdf->albedo = (subsurface_method == CLOSURE_BSSRDF_PRINCIPLED_ID) ? - subsurface_color : - mixed_ss_base_color; - bssrdf->texture_blur = 0.0f; - bssrdf->sharpness = 0.0f; + bssrdf->albedo = mixed_ss_base_color; bssrdf->N = N; bssrdf->roughness = roughness; + /* Clamps protecting against bad/extreme and non physical values. */ + subsurface_ior = clamp(subsurface_ior, 1.01f, 3.8f); + bssrdf->anisotropy = clamp(subsurface_anisotropy, 0.0f, 0.9f); + /* setup bsdf */ - sd->flag |= bssrdf_setup(sd, bssrdf, subsurface_method); + sd->flag |= bssrdf_setup(sd, bssrdf, subsurface_method, subsurface_ior); } } } @@ -733,9 +734,9 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, } #ifdef __HAIR__ case CLOSURE_BSDF_HAIR_PRINCIPLED_ID: { - uint4 data_node2 = read_node(kg, offset); - uint4 data_node3 = read_node(kg, offset); - uint4 data_node4 = read_node(kg, offset); + uint4 data_node2 = read_node(kg, &offset); + uint4 data_node3 = read_node(kg, &offset); + uint4 data_node4 = read_node(kg, &offset); float3 weight = sd->svm_closure_weight * mix_weight; @@ -878,10 +879,8 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, #endif /* __HAIR__ */ #ifdef __SUBSURFACE__ - case CLOSURE_BSSRDF_CUBIC_ID: - case CLOSURE_BSSRDF_GAUSSIAN_ID: - case CLOSURE_BSSRDF_BURLEY_ID: - case CLOSURE_BSSRDF_RANDOM_WALK_ID: { + case CLOSURE_BSSRDF_RANDOM_WALK_ID: + case CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID: { float3 weight = sd->svm_closure_weight * mix_weight; Bssrdf *bssrdf = bssrdf_alloc(sd, weight); @@ -894,11 +893,14 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, bssrdf->radius = stack_load_float3(stack, data_node.z) * param1; bssrdf->albedo = sd->svm_closure_weight; - bssrdf->texture_blur = param2; - bssrdf->sharpness = stack_load_float(stack, data_node.w); bssrdf->N = N; - bssrdf->roughness = 0.0f; - sd->flag |= bssrdf_setup(sd, bssrdf, (ClosureType)type); + bssrdf->roughness = FLT_MAX; + + const float subsurface_ior = clamp(param2, 1.01f, 3.8f); + const float subsurface_anisotropy = stack_load_float(stack, data_node.w); + bssrdf->anisotropy = clamp(subsurface_anisotropy, 0.0f, 0.9f); + + sd->flag |= bssrdf_setup(sd, bssrdf, (ClosureType)type, subsurface_ior); } break; @@ -907,10 +909,15 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, default: break; } + + return offset; } -ccl_device void svm_node_closure_volume( - KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, ShaderType shader_type) +template<ShaderType shader_type> +ccl_device_noinline void svm_node_closure_volume(const KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint4 node) { #ifdef __VOLUME__ /* Only sum extinction for volumes, variable is shared with surface transparency. */ @@ -961,21 +968,17 @@ ccl_device void svm_node_closure_volume( #endif } -ccl_device void svm_node_principled_volume(KernelGlobals *kg, - ShaderData *sd, - float *stack, - uint4 node, - ShaderType shader_type, - int path_flag, - int *offset) +template<ShaderType shader_type> +ccl_device_noinline int svm_node_principled_volume( + const KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int path_flag, int offset) { #ifdef __VOLUME__ - uint4 value_node = read_node(kg, offset); - uint4 attr_node = read_node(kg, offset); + uint4 value_node = read_node(kg, &offset); + uint4 attr_node = read_node(kg, &offset); /* Only sum extinction for volumes, variable is shared with surface transparency. */ if (shader_type != SHADER_TYPE_VOLUME) { - return; + return offset; } uint density_offset, anisotropy_offset, absorption_color_offset, mix_weight_offset; @@ -985,7 +988,7 @@ ccl_device void svm_node_principled_volume(KernelGlobals *kg, 1.0f); if (mix_weight == 0.0f) { - return; + return offset; } /* Compute density. */ @@ -1034,7 +1037,7 @@ ccl_device void svm_node_principled_volume(KernelGlobals *kg, /* Compute emission. */ if (path_flag & PATH_RAY_SHADOW) { /* Don't need emission for shadows. */ - return; + return offset; } uint emission_offset, emission_color_offset, blackbody_offset, temperature_offset; @@ -1074,9 +1077,10 @@ ccl_device void svm_node_principled_volume(KernelGlobals *kg, } } #endif + return offset; } -ccl_device void svm_node_closure_emission(ShaderData *sd, float *stack, uint4 node) +ccl_device_noinline void svm_node_closure_emission(ShaderData *sd, float *stack, uint4 node) { uint mix_weight_offset = node.y; float3 weight = sd->svm_closure_weight; @@ -1093,7 +1097,7 @@ ccl_device void svm_node_closure_emission(ShaderData *sd, float *stack, uint4 no emission_setup(sd, weight); } -ccl_device void svm_node_closure_background(ShaderData *sd, float *stack, uint4 node) +ccl_device_noinline void svm_node_closure_background(ShaderData *sd, float *stack, uint4 node) { uint mix_weight_offset = node.y; float3 weight = sd->svm_closure_weight; @@ -1110,7 +1114,7 @@ ccl_device void svm_node_closure_background(ShaderData *sd, float *stack, uint4 background_setup(sd, weight); } -ccl_device void svm_node_closure_holdout(ShaderData *sd, float *stack, uint4 node) +ccl_device_noinline void svm_node_closure_holdout(ShaderData *sd, float *stack, uint4 node) { uint mix_weight_offset = node.y; @@ -1145,14 +1149,13 @@ ccl_device void svm_node_closure_set_weight(ShaderData *sd, uint r, uint g, uint ccl_device void svm_node_closure_weight(ShaderData *sd, float *stack, uint weight_offset) { float3 weight = stack_load_float3(stack, weight_offset); - svm_node_closure_store_weight(sd, weight); } -ccl_device void svm_node_emission_weight(KernelGlobals *kg, - ShaderData *sd, - float *stack, - uint4 node) +ccl_device_noinline void svm_node_emission_weight(const KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint4 node) { uint color_offset = node.y; uint strength_offset = node.z; @@ -1163,7 +1166,7 @@ ccl_device void svm_node_emission_weight(KernelGlobals *kg, svm_node_closure_store_weight(sd, weight); } -ccl_device void svm_node_mix_closure(ShaderData *sd, float *stack, uint4 node) +ccl_device_noinline void svm_node_mix_closure(ShaderData *sd, float *stack, uint4 node) { /* fetch weight from blend input, previous mix closures, * and write to stack to be used by closure nodes later */ @@ -1186,7 +1189,7 @@ ccl_device void svm_node_mix_closure(ShaderData *sd, float *stack, uint4 node) /* (Bump) normal */ ccl_device void svm_node_set_normal( - KernelGlobals *kg, ShaderData *sd, float *stack, uint in_direction, uint out_normal) + const KernelGlobals *kg, ShaderData *sd, float *stack, uint in_direction, uint out_normal) { float3 normal = stack_load_float3(stack, in_direction); sd->N = normal; diff --git a/intern/cycles/kernel/svm/svm_convert.h b/intern/cycles/kernel/svm/svm_convert.h index 5df6c9fb755..37d40167ccc 100644 --- a/intern/cycles/kernel/svm/svm_convert.h +++ b/intern/cycles/kernel/svm/svm_convert.h @@ -18,8 +18,8 @@ CCL_NAMESPACE_BEGIN /* Conversion Nodes */ -ccl_device void svm_node_convert( - KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint from, uint to) +ccl_device_noinline void svm_node_convert( + const KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint from, uint to) { switch (type) { case NODE_CONVERT_FI: { diff --git a/intern/cycles/kernel/svm/svm_displace.h b/intern/cycles/kernel/svm/svm_displace.h index 250fac6bcb8..a1d952173d8 100644 --- a/intern/cycles/kernel/svm/svm_displace.h +++ b/intern/cycles/kernel/svm/svm_displace.h @@ -14,11 +14,16 @@ * limitations under the License. */ +#include "kernel/kernel_montecarlo.h" + CCL_NAMESPACE_BEGIN /* Bump Node */ -ccl_device void svm_node_set_bump(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) +ccl_device_noinline void svm_node_set_bump(const KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint4 node) { #ifdef __RAY_DIFFERENTIALS__ /* get normal input */ @@ -83,7 +88,7 @@ ccl_device void svm_node_set_bump(KernelGlobals *kg, ShaderData *sd, float *stac /* Displacement Node */ -ccl_device void svm_node_set_displacement(KernelGlobals *kg, +ccl_device void svm_node_set_displacement(const KernelGlobals *kg, ShaderData *sd, float *stack, uint fac_offset) @@ -92,7 +97,10 @@ ccl_device void svm_node_set_displacement(KernelGlobals *kg, sd->P += dP; } -ccl_device void svm_node_displacement(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) +ccl_device_noinline void svm_node_displacement(const KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint4 node) { uint height_offset, midlevel_offset, scale_offset, normal_offset; svm_unpack_node_uchar4(node.y, &height_offset, &midlevel_offset, &scale_offset, &normal_offset); @@ -119,10 +127,10 @@ ccl_device void svm_node_displacement(KernelGlobals *kg, ShaderData *sd, float * stack_store_float3(stack, node.z, dP); } -ccl_device void svm_node_vector_displacement( - KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) +ccl_device_noinline int svm_node_vector_displacement( + const KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int offset) { - uint4 data_node = read_node(kg, offset); + uint4 data_node = read_node(kg, &offset); uint space = data_node.x; uint vector_offset, midlevel_offset, scale_offset, displacement_offset; @@ -164,6 +172,7 @@ ccl_device void svm_node_vector_displacement( } stack_store_float3(stack, displacement_offset, dP); + return offset; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_fresnel.h b/intern/cycles/kernel/svm/svm_fresnel.h index 96d602e35bf..b5ecdbe2abf 100644 --- a/intern/cycles/kernel/svm/svm_fresnel.h +++ b/intern/cycles/kernel/svm/svm_fresnel.h @@ -18,7 +18,7 @@ CCL_NAMESPACE_BEGIN /* Fresnel Node */ -ccl_device void svm_node_fresnel( +ccl_device_noinline void svm_node_fresnel( ShaderData *sd, float *stack, uint ior_offset, uint ior_value, uint node) { uint normal_offset, out_offset; @@ -37,7 +37,7 @@ ccl_device void svm_node_fresnel( /* Layer Weight Node */ -ccl_device void svm_node_layer_weight(ShaderData *sd, float *stack, uint4 node) +ccl_device_noinline void svm_node_layer_weight(ShaderData *sd, float *stack, uint4 node) { uint blend_offset = node.y; uint blend_value = node.z; diff --git a/intern/cycles/kernel/svm/svm_gamma.h b/intern/cycles/kernel/svm/svm_gamma.h index 65eb08eb0eb..f6fafdee941 100644 --- a/intern/cycles/kernel/svm/svm_gamma.h +++ b/intern/cycles/kernel/svm/svm_gamma.h @@ -16,7 +16,7 @@ CCL_NAMESPACE_BEGIN -ccl_device void svm_node_gamma( +ccl_device_noinline void svm_node_gamma( ShaderData *sd, float *stack, uint in_gamma, uint in_color, uint out_color) { float3 color = stack_load_float3(stack, in_color); diff --git a/intern/cycles/kernel/svm/svm_geometry.h b/intern/cycles/kernel/svm/svm_geometry.h index e48e96dcfa4..10e9f291d0e 100644 --- a/intern/cycles/kernel/svm/svm_geometry.h +++ b/intern/cycles/kernel/svm/svm_geometry.h @@ -18,8 +18,8 @@ CCL_NAMESPACE_BEGIN /* Geometry Node */ -ccl_device_inline void svm_node_geometry( - KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset) +ccl_device_noinline void svm_node_geometry( + const KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset) { float3 data; @@ -51,8 +51,8 @@ ccl_device_inline void svm_node_geometry( stack_store_float3(stack, out_offset, data); } -ccl_device void svm_node_geometry_bump_dx( - KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset) +ccl_device_noinline void svm_node_geometry_bump_dx( + const KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset) { #ifdef __RAY_DIFFERENTIALS__ float3 data; @@ -75,8 +75,8 @@ ccl_device void svm_node_geometry_bump_dx( #endif } -ccl_device void svm_node_geometry_bump_dy( - KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset) +ccl_device_noinline void svm_node_geometry_bump_dy( + const KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset) { #ifdef __RAY_DIFFERENTIALS__ float3 data; @@ -101,8 +101,8 @@ ccl_device void svm_node_geometry_bump_dy( /* Object Info */ -ccl_device void svm_node_object_info( - KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset) +ccl_device_noinline void svm_node_object_info( + const KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset) { float data; @@ -140,8 +140,8 @@ ccl_device void svm_node_object_info( /* Particle Info */ -ccl_device void svm_node_particle_info( - KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset) +ccl_device_noinline void svm_node_particle_info( + const KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset) { switch (type) { case NODE_INFO_PAR_INDEX: { @@ -199,8 +199,8 @@ ccl_device void svm_node_particle_info( /* Hair Info */ -ccl_device void svm_node_hair_info( - KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset) +ccl_device_noinline void svm_node_hair_info( + const KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset) { float data; float3 data3; diff --git a/intern/cycles/kernel/svm/svm_gradient.h b/intern/cycles/kernel/svm/svm_gradient.h index 08304bc47e8..cd15f7097e7 100644 --- a/intern/cycles/kernel/svm/svm_gradient.h +++ b/intern/cycles/kernel/svm/svm_gradient.h @@ -60,7 +60,7 @@ ccl_device float svm_gradient(float3 p, NodeGradientType type) return 0.0f; } -ccl_device void svm_node_tex_gradient(ShaderData *sd, float *stack, uint4 node) +ccl_device_noinline void svm_node_tex_gradient(ShaderData *sd, float *stack, uint4 node) { uint type, co_offset, color_offset, fac_offset; diff --git a/intern/cycles/kernel/svm/svm_hsv.h b/intern/cycles/kernel/svm/svm_hsv.h index c299cf58c7f..6f49a8385aa 100644 --- a/intern/cycles/kernel/svm/svm_hsv.h +++ b/intern/cycles/kernel/svm/svm_hsv.h @@ -19,8 +19,10 @@ CCL_NAMESPACE_BEGIN -ccl_device void svm_node_hsv( - KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) +ccl_device_noinline void svm_node_hsv(const KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint4 node) { uint in_color_offset, fac_offset, out_color_offset; uint hue_offset, sat_offset, val_offset; diff --git a/intern/cycles/kernel/svm/svm_ies.h b/intern/cycles/kernel/svm/svm_ies.h index 56c804b44d0..9c13734ecf0 100644 --- a/intern/cycles/kernel/svm/svm_ies.h +++ b/intern/cycles/kernel/svm/svm_ies.h @@ -19,7 +19,7 @@ CCL_NAMESPACE_BEGIN /* IES Light */ ccl_device_inline float interpolate_ies_vertical( - KernelGlobals *kg, int ofs, int v, int v_num, float v_frac, int h) + const KernelGlobals *kg, int ofs, int v, int v_num, float v_frac, int h) { /* Since lookups are performed in spherical coordinates, clamping the coordinates at the low end * of v (corresponding to the north pole) would result in artifacts. The proper way of dealing @@ -39,7 +39,7 @@ ccl_device_inline float interpolate_ies_vertical( return cubic_interp(a, b, c, d, v_frac); } -ccl_device_inline float kernel_ies_interp(KernelGlobals *kg, +ccl_device_inline float kernel_ies_interp(const KernelGlobals *kg, int slot, float h_angle, float v_angle) @@ -98,8 +98,10 @@ ccl_device_inline float kernel_ies_interp(KernelGlobals *kg, return max(cubic_interp(a, b, c, d, h_frac), 0.0f); } -ccl_device void svm_node_ies( - KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) +ccl_device_noinline void svm_node_ies(const KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint4 node) { uint vector_offset, strength_offset, fac_offset, slot = node.z; svm_unpack_node_uchar3(node.y, &strength_offset, &vector_offset, &fac_offset); diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h index 9348ddabde5..a344f36977a 100644 --- a/intern/cycles/kernel/svm/svm_image.h +++ b/intern/cycles/kernel/svm/svm_image.h @@ -16,7 +16,7 @@ CCL_NAMESPACE_BEGIN -ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, uint flags) +ccl_device float4 svm_image_texture(const KernelGlobals *kg, int id, float x, float y, uint flags) { if (id == -1) { return make_float4( @@ -44,8 +44,8 @@ ccl_device_inline float3 texco_remap_square(float3 co) return (co - make_float3(0.5f, 0.5f, 0.5f)) * 2.0f; } -ccl_device void svm_node_tex_image( - KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) +ccl_device_noinline int svm_node_tex_image( + const KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int offset) { uint co_offset, out_offset, alpha_offset, flags; @@ -71,7 +71,7 @@ ccl_device void svm_node_tex_image( int num_nodes = (int)node.y; if (num_nodes > 0) { /* Remember the offset of the node following the tile nodes. */ - int next_offset = (*offset) + num_nodes; + int next_offset = offset + num_nodes; /* Find the tile that the UV lies in. */ int tx = (int)tex_co.x; @@ -83,7 +83,7 @@ ccl_device void svm_node_tex_image( /* Find the index of the tile. */ for (int i = 0; i < num_nodes; i++) { - uint4 tile_node = read_node(kg, offset); + uint4 tile_node = read_node(kg, &offset); if (tile_node.x == tile) { id = tile_node.y; break; @@ -102,7 +102,7 @@ ccl_device void svm_node_tex_image( } /* Skip over the remaining nodes. */ - *offset = next_offset; + offset = next_offset; } else { id = -num_nodes; @@ -114,9 +114,13 @@ ccl_device void svm_node_tex_image( stack_store_float3(stack, out_offset, make_float3(f.x, f.y, f.z)); if (stack_valid(alpha_offset)) stack_store_float(stack, alpha_offset, f.w); + return offset; } -ccl_device void svm_node_tex_image_box(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) +ccl_device_noinline void svm_node_tex_image_box(const KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint4 node) { /* get object space normal */ float3 N = sd->N; @@ -215,10 +219,10 @@ ccl_device void svm_node_tex_image_box(KernelGlobals *kg, ShaderData *sd, float stack_store_float(stack, alpha_offset, f.w); } -ccl_device void svm_node_tex_environment(KernelGlobals *kg, - ShaderData *sd, - float *stack, - uint4 node) +ccl_device_noinline void svm_node_tex_environment(const KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint4 node) { uint id = node.y; uint co_offset, out_offset, alpha_offset, flags; diff --git a/intern/cycles/kernel/svm/svm_invert.h b/intern/cycles/kernel/svm/svm_invert.h index 02024742b13..27cdaaff473 100644 --- a/intern/cycles/kernel/svm/svm_invert.h +++ b/intern/cycles/kernel/svm/svm_invert.h @@ -21,7 +21,7 @@ ccl_device float invert(float color, float factor) return factor * (1.0f - color) + (1.0f - factor) * color; } -ccl_device void svm_node_invert( +ccl_device_noinline void svm_node_invert( ShaderData *sd, float *stack, uint in_fac, uint in_color, uint out_color) { float factor = stack_load_float(stack, in_fac); diff --git a/intern/cycles/kernel/svm/svm_light_path.h b/intern/cycles/kernel/svm/svm_light_path.h index 768c65918cd..49fabad1cc5 100644 --- a/intern/cycles/kernel/svm/svm_light_path.h +++ b/intern/cycles/kernel/svm/svm_light_path.h @@ -18,12 +18,12 @@ CCL_NAMESPACE_BEGIN /* Light Path Node */ -ccl_device void svm_node_light_path(ShaderData *sd, - ccl_addr_space PathState *state, - float *stack, - uint type, - uint out_offset, - int path_flag) +ccl_device_noinline void svm_node_light_path(INTEGRATOR_STATE_CONST_ARGS, + const ShaderData *sd, + float *stack, + uint type, + uint out_offset, + int path_flag) { float info = 0.0f; @@ -58,21 +58,47 @@ ccl_device void svm_node_light_path(ShaderData *sd, case NODE_LP_ray_length: info = sd->ray_length; break; - case NODE_LP_ray_depth: - info = (float)state->bounce; + case NODE_LP_ray_depth: { + /* Read bounce from difference location depending if this is a shadow + * path. It's a bit dubious to have integrate state details leak into + * this function but hard to avoid currently. */ + int bounce = (INTEGRATOR_STATE_IS_NULL) ? 0 : + (path_flag & PATH_RAY_SHADOW) ? INTEGRATOR_STATE(shadow_path, bounce) : + INTEGRATOR_STATE(path, bounce); + + /* For background, light emission and shadow evaluation we from a + * surface or volume we are effective one bounce further. */ + if (path_flag & (PATH_RAY_SHADOW | PATH_RAY_EMISSION)) { + bounce++; + } + + info = (float)bounce; break; + } + /* TODO */ + case NODE_LP_ray_transparent: { + const int bounce = (INTEGRATOR_STATE_IS_NULL) ? + 0 : + (path_flag & PATH_RAY_SHADOW) ? + INTEGRATOR_STATE(shadow_path, transparent_bounce) : + INTEGRATOR_STATE(path, transparent_bounce); + + info = (float)bounce; + break; + } +#if 0 case NODE_LP_ray_diffuse: info = (float)state->diffuse_bounce; break; case NODE_LP_ray_glossy: info = (float)state->glossy_bounce; break; - case NODE_LP_ray_transparent: - info = (float)state->transparent_bounce; - break; +#endif +#if 0 case NODE_LP_ray_transmission: info = (float)state->transmission_bounce; break; +#endif } stack_store_float(stack, out_offset, info); @@ -80,7 +106,7 @@ ccl_device void svm_node_light_path(ShaderData *sd, /* Light Falloff Node */ -ccl_device void svm_node_light_falloff(ShaderData *sd, float *stack, uint4 node) +ccl_device_noinline void svm_node_light_falloff(ShaderData *sd, float *stack, uint4 node) { uint strength_offset, out_offset, smooth_offset; diff --git a/intern/cycles/kernel/svm/svm_magic.h b/intern/cycles/kernel/svm/svm_magic.h index 9c160e6d8cc..8784c760860 100644 --- a/intern/cycles/kernel/svm/svm_magic.h +++ b/intern/cycles/kernel/svm/svm_magic.h @@ -87,8 +87,8 @@ ccl_device_noinline_cpu float3 svm_magic(float3 p, int n, float distortion) return make_float3(0.5f - x, 0.5f - y, 0.5f - z); } -ccl_device void svm_node_tex_magic( - KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) +ccl_device_noinline int svm_node_tex_magic( + const KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int offset) { uint depth; uint scale_offset, distortion_offset, co_offset, fac_offset, color_offset; @@ -96,7 +96,7 @@ ccl_device void svm_node_tex_magic( svm_unpack_node_uchar3(node.y, &depth, &color_offset, &fac_offset); svm_unpack_node_uchar3(node.z, &co_offset, &scale_offset, &distortion_offset); - uint4 node2 = read_node(kg, offset); + uint4 node2 = read_node(kg, &offset); float3 co = stack_load_float3(stack, co_offset); float scale = stack_load_float_default(stack, scale_offset, node2.x); float distortion = stack_load_float_default(stack, distortion_offset, node2.y); @@ -107,6 +107,7 @@ ccl_device void svm_node_tex_magic( stack_store_float(stack, fac_offset, average(color)); if (stack_valid(color_offset)) stack_store_float3(stack, color_offset, color); + return offset; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_map_range.h b/intern/cycles/kernel/svm/svm_map_range.h index 533a631c837..c8684981e31 100644 --- a/intern/cycles/kernel/svm/svm_map_range.h +++ b/intern/cycles/kernel/svm/svm_map_range.h @@ -24,13 +24,13 @@ ccl_device_inline float smootherstep(float edge0, float edge1, float x) return x * x * x * (x * (x * 6.0f - 15.0f) + 10.0f); } -ccl_device void svm_node_map_range(KernelGlobals *kg, - ShaderData *sd, - float *stack, - uint value_stack_offset, - uint parameters_stack_offsets, - uint results_stack_offsets, - int *offset) +ccl_device_noinline int svm_node_map_range(const KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint value_stack_offset, + uint parameters_stack_offsets, + uint results_stack_offsets, + int offset) { uint from_min_stack_offset, from_max_stack_offset, to_min_stack_offset, to_max_stack_offset; uint type_stack_offset, steps_stack_offset, result_stack_offset; @@ -42,8 +42,8 @@ ccl_device void svm_node_map_range(KernelGlobals *kg, svm_unpack_node_uchar3( results_stack_offsets, &type_stack_offset, &steps_stack_offset, &result_stack_offset); - uint4 defaults = read_node(kg, offset); - uint4 defaults2 = read_node(kg, offset); + uint4 defaults = read_node(kg, &offset); + uint4 defaults2 = read_node(kg, &offset); float value = stack_load_float(stack, value_stack_offset); float from_min = stack_load_float_default(stack, from_min_stack_offset, defaults.x); @@ -83,6 +83,7 @@ ccl_device void svm_node_map_range(KernelGlobals *kg, result = 0.0f; } stack_store_float(stack, result_stack_offset, result); + return offset; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_mapping.h b/intern/cycles/kernel/svm/svm_mapping.h index 6e19c859e19..fcc724405f5 100644 --- a/intern/cycles/kernel/svm/svm_mapping.h +++ b/intern/cycles/kernel/svm/svm_mapping.h @@ -18,13 +18,12 @@ CCL_NAMESPACE_BEGIN /* Mapping Node */ -ccl_device void svm_node_mapping(KernelGlobals *kg, - ShaderData *sd, - float *stack, - uint type, - uint inputs_stack_offsets, - uint result_stack_offset, - int *offset) +ccl_device_noinline void svm_node_mapping(const KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint type, + uint inputs_stack_offsets, + uint result_stack_offset) { uint vector_stack_offset, location_stack_offset, rotation_stack_offset, scale_stack_offset; svm_unpack_node_uchar4(inputs_stack_offsets, @@ -44,30 +43,40 @@ ccl_device void svm_node_mapping(KernelGlobals *kg, /* Texture Mapping */ -ccl_device void svm_node_texture_mapping( - KernelGlobals *kg, ShaderData *sd, float *stack, uint vec_offset, uint out_offset, int *offset) +ccl_device_noinline int svm_node_texture_mapping(const KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint vec_offset, + uint out_offset, + int offset) { float3 v = stack_load_float3(stack, vec_offset); Transform tfm; - tfm.x = read_node_float(kg, offset); - tfm.y = read_node_float(kg, offset); - tfm.z = read_node_float(kg, offset); + tfm.x = read_node_float(kg, &offset); + tfm.y = read_node_float(kg, &offset); + tfm.z = read_node_float(kg, &offset); float3 r = transform_point(&tfm, v); stack_store_float3(stack, out_offset, r); + return offset; } -ccl_device void svm_node_min_max( - KernelGlobals *kg, ShaderData *sd, float *stack, uint vec_offset, uint out_offset, int *offset) +ccl_device_noinline int svm_node_min_max(const KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint vec_offset, + uint out_offset, + int offset) { float3 v = stack_load_float3(stack, vec_offset); - float3 mn = float4_to_float3(read_node_float(kg, offset)); - float3 mx = float4_to_float3(read_node_float(kg, offset)); + float3 mn = float4_to_float3(read_node_float(kg, &offset)); + float3 mx = float4_to_float3(read_node_float(kg, &offset)); float3 r = min(max(mn, v), mx); stack_store_float3(stack, out_offset, r); + return offset; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_math.h b/intern/cycles/kernel/svm/svm_math.h index 733ea28f9e5..99e7a8f2bda 100644 --- a/intern/cycles/kernel/svm/svm_math.h +++ b/intern/cycles/kernel/svm/svm_math.h @@ -16,13 +16,12 @@ CCL_NAMESPACE_BEGIN -ccl_device void svm_node_math(KernelGlobals *kg, - ShaderData *sd, - float *stack, - uint type, - uint inputs_stack_offsets, - uint result_stack_offset, - int *offset) +ccl_device_noinline void svm_node_math(const KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint type, + uint inputs_stack_offsets, + uint result_stack_offset) { uint a_stack_offset, b_stack_offset, c_stack_offset; svm_unpack_node_uchar3(inputs_stack_offsets, &a_stack_offset, &b_stack_offset, &c_stack_offset); @@ -35,13 +34,13 @@ ccl_device void svm_node_math(KernelGlobals *kg, stack_store_float(stack, result_stack_offset, result); } -ccl_device void svm_node_vector_math(KernelGlobals *kg, - ShaderData *sd, - float *stack, - uint type, - uint inputs_stack_offsets, - uint outputs_stack_offsets, - int *offset) +ccl_device_noinline int svm_node_vector_math(const KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint type, + uint inputs_stack_offsets, + uint outputs_stack_offsets, + int offset) { uint value_stack_offset, vector_stack_offset; uint a_stack_offset, b_stack_offset, param1_stack_offset; @@ -60,7 +59,7 @@ ccl_device void svm_node_vector_math(KernelGlobals *kg, /* 3 Vector Operators */ if (type == NODE_VECTOR_MATH_WRAP || type == NODE_VECTOR_MATH_FACEFORWARD || type == NODE_VECTOR_MATH_MULTIPLY_ADD) { - uint4 extra_node = read_node(kg, offset); + uint4 extra_node = read_node(kg, &offset); c = stack_load_float3(stack, extra_node.x); } @@ -70,6 +69,7 @@ ccl_device void svm_node_vector_math(KernelGlobals *kg, stack_store_float(stack, value_stack_offset, value); if (stack_valid(vector_stack_offset)) stack_store_float3(stack, vector_stack_offset, vector); + return offset; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_mix.h b/intern/cycles/kernel/svm/svm_mix.h index 15114bfd5e4..3e38080977f 100644 --- a/intern/cycles/kernel/svm/svm_mix.h +++ b/intern/cycles/kernel/svm/svm_mix.h @@ -18,16 +18,16 @@ CCL_NAMESPACE_BEGIN /* Node */ -ccl_device void svm_node_mix(KernelGlobals *kg, - ShaderData *sd, - float *stack, - uint fac_offset, - uint c1_offset, - uint c2_offset, - int *offset) +ccl_device_noinline int svm_node_mix(const KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint fac_offset, + uint c1_offset, + uint c2_offset, + int offset) { /* read extra data */ - uint4 node1 = read_node(kg, offset); + uint4 node1 = read_node(kg, &offset); float fac = stack_load_float(stack, fac_offset); float3 c1 = stack_load_float3(stack, c1_offset); @@ -35,6 +35,7 @@ ccl_device void svm_node_mix(KernelGlobals *kg, float3 result = svm_mix((NodeMix)node1.y, fac, c1, c2); stack_store_float3(stack, node1.z, result); + return offset; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_musgrave.h b/intern/cycles/kernel/svm/svm_musgrave.h index 571f62fe27f..03a8b68b3ef 100644 --- a/intern/cycles/kernel/svm/svm_musgrave.h +++ b/intern/cycles/kernel/svm/svm_musgrave.h @@ -700,13 +700,13 @@ ccl_device_noinline_cpu float noise_musgrave_ridged_multi_fractal_4d( return value; } -ccl_device void svm_node_tex_musgrave(KernelGlobals *kg, - ShaderData *sd, - float *stack, - uint offsets1, - uint offsets2, - uint offsets3, - int *offset) +ccl_device_noinline int svm_node_tex_musgrave(const KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint offsets1, + uint offsets2, + uint offsets3, + int offset) { uint type, dimensions, co_stack_offset, w_stack_offset; uint scale_stack_offset, detail_stack_offset, dimension_stack_offset, lacunarity_stack_offset; @@ -720,8 +720,8 @@ ccl_device void svm_node_tex_musgrave(KernelGlobals *kg, &lacunarity_stack_offset); svm_unpack_node_uchar3(offsets3, &offset_stack_offset, &gain_stack_offset, &fac_stack_offset); - uint4 defaults1 = read_node(kg, offset); - uint4 defaults2 = read_node(kg, offset); + uint4 defaults1 = read_node(kg, &offset); + uint4 defaults2 = read_node(kg, &offset); float3 co = stack_load_float3(stack, co_stack_offset); float w = stack_load_float_default(stack, w_stack_offset, defaults1.x); @@ -844,6 +844,7 @@ ccl_device void svm_node_tex_musgrave(KernelGlobals *kg, } stack_store_float(stack, fac_stack_offset, fac); + return offset; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_noise.h b/intern/cycles/kernel/svm/svm_noise.h index 94d8bfde555..ecb4df6afdf 100644 --- a/intern/cycles/kernel/svm/svm_noise.h +++ b/intern/cycles/kernel/svm/svm_noise.h @@ -330,7 +330,7 @@ ccl_device_inline ssef grad(const ssei &hash, const ssef &x, const ssef &y) * |__________________________| * */ -ccl_device_noinline float perlin_2d(float x, float y) +ccl_device_noinline_cpu float perlin_2d(float x, float y) { ssei XY; ssef fxy = floorfrac(ssef(x, y, 0.0f, 0.0f), &XY); @@ -447,7 +447,7 @@ ccl_device_inline ssef quad_mix(ssef p, ssef q, ssef r, ssef s, ssef f) * v7 (1, 1, 1) * */ -ccl_device_noinline float perlin_3d(float x, float y, float z) +ccl_device_noinline_cpu float perlin_3d(float x, float y, float z) { ssei XYZ; ssef fxyz = floorfrac(ssef(x, y, z, 0.0f), &XYZ); @@ -501,7 +501,7 @@ ccl_device_noinline float perlin_3d(float x, float y, float z) * v15 (1, 1, 1, 1) * */ -ccl_device_noinline float perlin_4d(float x, float y, float z, float w) +ccl_device_noinline_cpu float perlin_4d(float x, float y, float z, float w) { ssei XYZW; ssef fxyzw = floorfrac(ssef(x, y, z, w), &XYZW); @@ -585,7 +585,7 @@ ccl_device_inline ssef quad_mix(avxf p, avxf q, ssef f) * |__________________________| * */ -ccl_device_noinline float perlin_3d(float x, float y, float z) +ccl_device_noinline_cpu float perlin_3d(float x, float y, float z) { ssei XYZ; ssef fxyz = floorfrac(ssef(x, y, z, 0.0f), &XYZ); @@ -637,7 +637,7 @@ ccl_device_noinline float perlin_3d(float x, float y, float z) * v15 (1, 1, 1, 1) * */ -ccl_device_noinline float perlin_4d(float x, float y, float z, float w) +ccl_device_noinline_cpu float perlin_4d(float x, float y, float z, float w) { ssei XYZW; ssef fxyzw = floorfrac(ssef(x, y, z, w), &XYZW); diff --git a/intern/cycles/kernel/svm/svm_noisetex.h b/intern/cycles/kernel/svm/svm_noisetex.h index 61fd9553802..29b262ac06e 100644 --- a/intern/cycles/kernel/svm/svm_noisetex.h +++ b/intern/cycles/kernel/svm/svm_noisetex.h @@ -140,13 +140,13 @@ ccl_device void noise_texture_4d(float4 co, } } -ccl_device void svm_node_tex_noise(KernelGlobals *kg, - ShaderData *sd, - float *stack, - uint dimensions, - uint offsets1, - uint offsets2, - int *offset) +ccl_device_noinline int svm_node_tex_noise(const KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint dimensions, + uint offsets1, + uint offsets2, + int offset) { uint vector_stack_offset, w_stack_offset, scale_stack_offset; uint detail_stack_offset, roughness_stack_offset, distortion_stack_offset; @@ -160,8 +160,8 @@ ccl_device void svm_node_tex_noise(KernelGlobals *kg, &value_stack_offset, &color_stack_offset); - uint4 defaults1 = read_node(kg, offset); - uint4 defaults2 = read_node(kg, offset); + uint4 defaults1 = read_node(kg, &offset); + uint4 defaults2 = read_node(kg, &offset); float3 vector = stack_load_float3(stack, vector_stack_offset); float w = stack_load_float_default(stack, w_stack_offset, defaults1.x); @@ -212,6 +212,7 @@ ccl_device void svm_node_tex_noise(KernelGlobals *kg, if (stack_valid(color_stack_offset)) { stack_store_float3(stack, color_stack_offset, color); } + return offset; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_normal.h b/intern/cycles/kernel/svm/svm_normal.h index 4cd3eab0ed2..724b5f281f9 100644 --- a/intern/cycles/kernel/svm/svm_normal.h +++ b/intern/cycles/kernel/svm/svm_normal.h @@ -16,16 +16,16 @@ CCL_NAMESPACE_BEGIN -ccl_device void svm_node_normal(KernelGlobals *kg, - ShaderData *sd, - float *stack, - uint in_normal_offset, - uint out_normal_offset, - uint out_dot_offset, - int *offset) +ccl_device_noinline int svm_node_normal(const KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint in_normal_offset, + uint out_normal_offset, + uint out_dot_offset, + int offset) { /* read extra data */ - uint4 node1 = read_node(kg, offset); + uint4 node1 = read_node(kg, &offset); float3 normal = stack_load_float3(stack, in_normal_offset); float3 direction; @@ -39,6 +39,7 @@ ccl_device void svm_node_normal(KernelGlobals *kg, if (stack_valid(out_dot_offset)) stack_store_float(stack, out_dot_offset, dot(direction, normalize(normal))); + return offset; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_ramp.h b/intern/cycles/kernel/svm/svm_ramp.h index 85ccf39144b..e92df3c093c 100644 --- a/intern/cycles/kernel/svm/svm_ramp.h +++ b/intern/cycles/kernel/svm/svm_ramp.h @@ -21,8 +21,12 @@ CCL_NAMESPACE_BEGIN /* NOTE: svm_ramp.h, svm_ramp_util.h and node_ramp_util.h must stay consistent */ -ccl_device_inline float4 rgb_ramp_lookup( - KernelGlobals *kg, int offset, float f, bool interpolate, bool extrapolate, int table_size) +ccl_device_inline float4 rgb_ramp_lookup(const KernelGlobals *kg, + int offset, + float f, + bool interpolate, + bool extrapolate, + int table_size) { if ((f < 0.0f || f > 1.0f) && extrapolate) { float4 t0, dy; @@ -53,34 +57,35 @@ ccl_device_inline float4 rgb_ramp_lookup( return a; } -ccl_device void svm_node_rgb_ramp( - KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) +ccl_device_noinline int svm_node_rgb_ramp( + const KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int offset) { uint fac_offset, color_offset, alpha_offset; uint interpolate = node.z; svm_unpack_node_uchar3(node.y, &fac_offset, &color_offset, &alpha_offset); - uint table_size = read_node(kg, offset).x; + uint table_size = read_node(kg, &offset).x; float fac = stack_load_float(stack, fac_offset); - float4 color = rgb_ramp_lookup(kg, *offset, fac, interpolate, false, table_size); + float4 color = rgb_ramp_lookup(kg, offset, fac, interpolate, false, table_size); if (stack_valid(color_offset)) stack_store_float3(stack, color_offset, float4_to_float3(color)); if (stack_valid(alpha_offset)) stack_store_float(stack, alpha_offset, color.w); - *offset += table_size; + offset += table_size; + return offset; } -ccl_device void svm_node_curves( - KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) +ccl_device_noinline int svm_node_curves( + const KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int offset) { uint fac_offset, color_offset, out_offset; svm_unpack_node_uchar3(node.y, &fac_offset, &color_offset, &out_offset); - uint table_size = read_node(kg, offset).x; + uint table_size = read_node(kg, &offset).x; float fac = stack_load_float(stack, fac_offset); float3 color = stack_load_float3(stack, color_offset); @@ -89,14 +94,15 @@ ccl_device void svm_node_curves( const float range_x = max_x - min_x; const float3 relpos = (color - make_float3(min_x, min_x, min_x)) / range_x; - float r = rgb_ramp_lookup(kg, *offset, relpos.x, true, true, table_size).x; - float g = rgb_ramp_lookup(kg, *offset, relpos.y, true, true, table_size).y; - float b = rgb_ramp_lookup(kg, *offset, relpos.z, true, true, table_size).z; + float r = rgb_ramp_lookup(kg, offset, relpos.x, true, true, table_size).x; + float g = rgb_ramp_lookup(kg, offset, relpos.y, true, true, table_size).y; + float b = rgb_ramp_lookup(kg, offset, relpos.z, true, true, table_size).z; color = (1.0f - fac) * color + fac * make_float3(r, g, b); stack_store_float3(stack, out_offset, color); - *offset += table_size; + offset += table_size; + return offset; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_sepcomb_hsv.h b/intern/cycles/kernel/svm/svm_sepcomb_hsv.h index f501252062e..8d52845ea3d 100644 --- a/intern/cycles/kernel/svm/svm_sepcomb_hsv.h +++ b/intern/cycles/kernel/svm/svm_sepcomb_hsv.h @@ -16,15 +16,15 @@ CCL_NAMESPACE_BEGIN -ccl_device void svm_node_combine_hsv(KernelGlobals *kg, - ShaderData *sd, - float *stack, - uint hue_in, - uint saturation_in, - uint value_in, - int *offset) +ccl_device_noinline int svm_node_combine_hsv(const KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint hue_in, + uint saturation_in, + uint value_in, + int offset) { - uint4 node1 = read_node(kg, offset); + uint4 node1 = read_node(kg, &offset); uint color_out = node1.y; float hue = stack_load_float(stack, hue_in); @@ -36,17 +36,18 @@ ccl_device void svm_node_combine_hsv(KernelGlobals *kg, if (stack_valid(color_out)) stack_store_float3(stack, color_out, color); + return offset; } -ccl_device void svm_node_separate_hsv(KernelGlobals *kg, - ShaderData *sd, - float *stack, - uint color_in, - uint hue_out, - uint saturation_out, - int *offset) +ccl_device_noinline int svm_node_separate_hsv(const KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint color_in, + uint hue_out, + uint saturation_out, + int offset) { - uint4 node1 = read_node(kg, offset); + uint4 node1 = read_node(kg, &offset); uint value_out = node1.y; float3 color = stack_load_float3(stack, color_in); @@ -60,6 +61,7 @@ ccl_device void svm_node_separate_hsv(KernelGlobals *kg, stack_store_float(stack, saturation_out, color.y); if (stack_valid(value_out)) stack_store_float(stack, value_out, color.z); + return offset; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_sky.h b/intern/cycles/kernel/svm/svm_sky.h index b908732f026..b77c4311e72 100644 --- a/intern/cycles/kernel/svm/svm_sky.h +++ b/intern/cycles/kernel/svm/svm_sky.h @@ -37,7 +37,7 @@ ccl_device float sky_perez_function(float *lam, float theta, float gamma) (1.0f + lam[2] * expf(lam[3] * gamma) + lam[4] * cgamma * cgamma); } -ccl_device float3 sky_radiance_preetham(KernelGlobals *kg, +ccl_device float3 sky_radiance_preetham(const KernelGlobals *kg, float3 dir, float sunphi, float suntheta, @@ -90,7 +90,7 @@ ccl_device float sky_radiance_internal(float *configuration, float theta, float configuration[6] * mieM + configuration[7] * zenith); } -ccl_device float3 sky_radiance_hosek(KernelGlobals *kg, +ccl_device float3 sky_radiance_hosek(const KernelGlobals *kg, float3 dir, float sunphi, float suntheta, @@ -127,7 +127,7 @@ ccl_device float3 geographical_to_direction(float lat, float lon) return make_float3(cos(lat) * cos(lon), cos(lat) * sin(lon), sin(lat)); } -ccl_device float3 sky_radiance_nishita(KernelGlobals *kg, +ccl_device float3 sky_radiance_nishita(const KernelGlobals *kg, float3 dir, float *nishita_data, uint texture_id) @@ -209,8 +209,8 @@ ccl_device float3 sky_radiance_nishita(KernelGlobals *kg, return xyz_to_rgb(kg, xyz); } -ccl_device void svm_node_tex_sky( - KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) +ccl_device_noinline int svm_node_tex_sky( + const KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int offset) { /* Load data */ uint dir_offset = node.y; @@ -226,49 +226,49 @@ ccl_device void svm_node_tex_sky( float sunphi, suntheta, radiance_x, radiance_y, radiance_z; float config_x[9], config_y[9], config_z[9]; - float4 data = read_node_float(kg, offset); + float4 data = read_node_float(kg, &offset); sunphi = data.x; suntheta = data.y; radiance_x = data.z; radiance_y = data.w; - data = read_node_float(kg, offset); + data = read_node_float(kg, &offset); radiance_z = data.x; config_x[0] = data.y; config_x[1] = data.z; config_x[2] = data.w; - data = read_node_float(kg, offset); + data = read_node_float(kg, &offset); config_x[3] = data.x; config_x[4] = data.y; config_x[5] = data.z; config_x[6] = data.w; - data = read_node_float(kg, offset); + data = read_node_float(kg, &offset); config_x[7] = data.x; config_x[8] = data.y; config_y[0] = data.z; config_y[1] = data.w; - data = read_node_float(kg, offset); + data = read_node_float(kg, &offset); config_y[2] = data.x; config_y[3] = data.y; config_y[4] = data.z; config_y[5] = data.w; - data = read_node_float(kg, offset); + data = read_node_float(kg, &offset); config_y[6] = data.x; config_y[7] = data.y; config_y[8] = data.z; config_z[0] = data.w; - data = read_node_float(kg, offset); + data = read_node_float(kg, &offset); config_z[1] = data.x; config_z[2] = data.y; config_z[3] = data.z; config_z[4] = data.w; - data = read_node_float(kg, offset); + data = read_node_float(kg, &offset); config_z[5] = data.x; config_z[6] = data.y; config_z[7] = data.z; @@ -305,19 +305,19 @@ ccl_device void svm_node_tex_sky( /* Define variables */ float nishita_data[10]; - float4 data = read_node_float(kg, offset); + float4 data = read_node_float(kg, &offset); nishita_data[0] = data.x; nishita_data[1] = data.y; nishita_data[2] = data.z; nishita_data[3] = data.w; - data = read_node_float(kg, offset); + data = read_node_float(kg, &offset); nishita_data[4] = data.x; nishita_data[5] = data.y; nishita_data[6] = data.z; nishita_data[7] = data.w; - data = read_node_float(kg, offset); + data = read_node_float(kg, &offset); nishita_data[8] = data.x; nishita_data[9] = data.y; uint texture_id = __float_as_uint(data.z); @@ -327,6 +327,7 @@ ccl_device void svm_node_tex_sky( } stack_store_float3(stack, out_offset, f); + return offset; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_tex_coord.h b/intern/cycles/kernel/svm/svm_tex_coord.h index 46600551cc4..a35253080da 100644 --- a/intern/cycles/kernel/svm/svm_tex_coord.h +++ b/intern/cycles/kernel/svm/svm_tex_coord.h @@ -14,12 +14,16 @@ * limitations under the License. */ +#include "kernel/geom/geom.h" +#include "kernel/kernel_camera.h" +#include "kernel/kernel_montecarlo.h" + CCL_NAMESPACE_BEGIN /* Texture Coordinate Node */ -ccl_device void svm_node_tex_coord( - KernelGlobals *kg, ShaderData *sd, int path_flag, float *stack, uint4 node, int *offset) +ccl_device_noinline int svm_node_tex_coord( + const KernelGlobals *kg, ShaderData *sd, int path_flag, float *stack, uint4 node, int offset) { float3 data; uint type = node.y; @@ -35,9 +39,9 @@ ccl_device void svm_node_tex_coord( } else { Transform tfm; - tfm.x = read_node_float(kg, offset); - tfm.y = read_node_float(kg, offset); - tfm.z = read_node_float(kg, offset); + tfm.x = read_node_float(kg, &offset); + tfm.y = read_node_float(kg, &offset); + tfm.z = read_node_float(kg, &offset); data = transform_point(&tfm, data); } break; @@ -92,10 +96,11 @@ ccl_device void svm_node_tex_coord( } stack_store_float3(stack, out_offset, data); + return offset; } -ccl_device void svm_node_tex_coord_bump_dx( - KernelGlobals *kg, ShaderData *sd, int path_flag, float *stack, uint4 node, int *offset) +ccl_device_noinline int svm_node_tex_coord_bump_dx( + const KernelGlobals *kg, ShaderData *sd, int path_flag, float *stack, uint4 node, int offset) { #ifdef __RAY_DIFFERENTIALS__ float3 data; @@ -112,9 +117,9 @@ ccl_device void svm_node_tex_coord_bump_dx( } else { Transform tfm; - tfm.x = read_node_float(kg, offset); - tfm.y = read_node_float(kg, offset); - tfm.z = read_node_float(kg, offset); + tfm.x = read_node_float(kg, &offset); + tfm.y = read_node_float(kg, &offset); + tfm.z = read_node_float(kg, &offset); data = transform_point(&tfm, data); } break; @@ -136,7 +141,7 @@ ccl_device void svm_node_tex_coord_bump_dx( case NODE_TEXCO_WINDOW: { if ((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) - data = camera_world_to_ndc(kg, sd, sd->ray_P + sd->ray_dP.dx); + data = camera_world_to_ndc(kg, sd, sd->ray_P + make_float3(sd->ray_dP, 0.0f, 0.0f)); else data = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dx); data.z = 0.0f; @@ -169,13 +174,14 @@ ccl_device void svm_node_tex_coord_bump_dx( } stack_store_float3(stack, out_offset, data); + return offset; #else - svm_node_tex_coord(kg, sd, path_flag, stack, node, offset); + return svm_node_tex_coord(kg, sd, path_flag, stack, node, offset); #endif } -ccl_device void svm_node_tex_coord_bump_dy( - KernelGlobals *kg, ShaderData *sd, int path_flag, float *stack, uint4 node, int *offset) +ccl_device_noinline int svm_node_tex_coord_bump_dy( + const KernelGlobals *kg, ShaderData *sd, int path_flag, float *stack, uint4 node, int offset) { #ifdef __RAY_DIFFERENTIALS__ float3 data; @@ -192,9 +198,9 @@ ccl_device void svm_node_tex_coord_bump_dy( } else { Transform tfm; - tfm.x = read_node_float(kg, offset); - tfm.y = read_node_float(kg, offset); - tfm.z = read_node_float(kg, offset); + tfm.x = read_node_float(kg, &offset); + tfm.y = read_node_float(kg, &offset); + tfm.z = read_node_float(kg, &offset); data = transform_point(&tfm, data); } break; @@ -216,7 +222,7 @@ ccl_device void svm_node_tex_coord_bump_dy( case NODE_TEXCO_WINDOW: { if ((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) - data = camera_world_to_ndc(kg, sd, sd->ray_P + sd->ray_dP.dy); + data = camera_world_to_ndc(kg, sd, sd->ray_P + make_float3(0.0f, sd->ray_dP, 0.0f)); else data = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dy); data.z = 0.0f; @@ -249,12 +255,16 @@ ccl_device void svm_node_tex_coord_bump_dy( } stack_store_float3(stack, out_offset, data); + return offset; #else - svm_node_tex_coord(kg, sd, path_flag, stack, node, offset); + return svm_node_tex_coord(kg, sd, path_flag, stack, node, offset); #endif } -ccl_device void svm_node_normal_map(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) +ccl_device_noinline void svm_node_normal_map(const KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint4 node) { uint color_offset, strength_offset, normal_offset, space; svm_unpack_node_uchar4(node.y, &color_offset, &strength_offset, &normal_offset, &space); @@ -346,7 +356,10 @@ ccl_device void svm_node_normal_map(KernelGlobals *kg, ShaderData *sd, float *st stack_store_float3(stack, normal_offset, N); } -ccl_device void svm_node_tangent(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) +ccl_device_noinline void svm_node_tangent(const KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint4 node) { uint tangent_offset, direction_type, axis; svm_unpack_node_uchar3(node.y, &tangent_offset, &direction_type, &axis); diff --git a/intern/cycles/kernel/svm/svm_types.h b/intern/cycles/kernel/svm/svm_types.h index 062afcfa5ac..c053be96c51 100644 --- a/intern/cycles/kernel/svm/svm_types.h +++ b/intern/cycles/kernel/svm/svm_types.h @@ -30,37 +30,6 @@ CCL_NAMESPACE_BEGIN /* Nodes */ -/* Known frequencies of used nodes, used for selective nodes compilation - * in the kernel. Currently only affects split OpenCL kernel. - * - * Keep as defines so it's easy to check which nodes are to be compiled - * from preprocessor. - * - * Lower the number of group more often the node is used. - */ -#define NODE_GROUP_LEVEL_0 0 -#define NODE_GROUP_LEVEL_1 1 -#define NODE_GROUP_LEVEL_2 2 -#define NODE_GROUP_LEVEL_3 3 -#define NODE_GROUP_LEVEL_4 4 -#define NODE_GROUP_LEVEL_MAX NODE_GROUP_LEVEL_4 - -#define NODE_FEATURE_VOLUME (1 << 0) -#define NODE_FEATURE_HAIR (1 << 1) -#define NODE_FEATURE_BUMP (1 << 2) -#define NODE_FEATURE_BUMP_STATE (1 << 3) -#define NODE_FEATURE_VORONOI_EXTRA (1 << 4) -/* TODO(sergey): Consider using something like ((uint)(-1)). - * Need to check carefully operand types around usage of this - * define first. - */ -#define NODE_FEATURE_ALL \ - (NODE_FEATURE_VOLUME | NODE_FEATURE_HAIR | NODE_FEATURE_BUMP | NODE_FEATURE_BUMP_STATE | \ - NODE_FEATURE_VORONOI_EXTRA) - -#define NODES_GROUP(group) ((group) <= __NODES_MAX_GROUP__) -#define NODES_FEATURE(feature) ((__NODES_FEATURES__ & (feature)) != 0) - typedef enum ShaderNodeType { NODE_END = 0, NODE_SHADER_JUMP, @@ -572,12 +541,8 @@ typedef enum ClosureType { CLOSURE_BSDF_TRANSPARENT_ID, /* BSSRDF */ - CLOSURE_BSSRDF_CUBIC_ID, - CLOSURE_BSSRDF_GAUSSIAN_ID, - CLOSURE_BSSRDF_PRINCIPLED_ID, - CLOSURE_BSSRDF_BURLEY_ID, CLOSURE_BSSRDF_RANDOM_WALK_ID, - CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID, + CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID, /* Other */ CLOSURE_HOLDOUT_ID, @@ -620,11 +585,9 @@ typedef enum ClosureType { type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID || \ type == CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID || \ type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID) -#define CLOSURE_IS_BSDF_OR_BSSRDF(type) (type <= CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID) +#define CLOSURE_IS_BSDF_OR_BSSRDF(type) (type <= CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID) #define CLOSURE_IS_BSSRDF(type) \ - (type >= CLOSURE_BSSRDF_CUBIC_ID && type <= CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID) -#define CLOSURE_IS_DISK_BSSRDF(type) \ - (type >= CLOSURE_BSSRDF_CUBIC_ID && type <= CLOSURE_BSSRDF_BURLEY_ID) + (type >= CLOSURE_BSSRDF_RANDOM_WALK_ID && type <= CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID) #define CLOSURE_IS_VOLUME(type) \ (type >= CLOSURE_VOLUME_ID && type <= CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) #define CLOSURE_IS_VOLUME_SCATTER(type) (type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) diff --git a/intern/cycles/kernel/svm/svm_value.h b/intern/cycles/kernel/svm/svm_value.h index 5b76f2c8832..d0478660094 100644 --- a/intern/cycles/kernel/svm/svm_value.h +++ b/intern/cycles/kernel/svm/svm_value.h @@ -19,20 +19,21 @@ CCL_NAMESPACE_BEGIN /* Value Nodes */ ccl_device void svm_node_value_f( - KernelGlobals *kg, ShaderData *sd, float *stack, uint ivalue, uint out_offset) + const KernelGlobals *kg, ShaderData *sd, float *stack, uint ivalue, uint out_offset) { stack_store_float(stack, out_offset, __uint_as_float(ivalue)); } -ccl_device void svm_node_value_v( - KernelGlobals *kg, ShaderData *sd, float *stack, uint out_offset, int *offset) +ccl_device int svm_node_value_v( + const KernelGlobals *kg, ShaderData *sd, float *stack, uint out_offset, int offset) { /* read extra data */ - uint4 node1 = read_node(kg, offset); + uint4 node1 = read_node(kg, &offset); float3 p = make_float3( __uint_as_float(node1.y), __uint_as_float(node1.z), __uint_as_float(node1.w)); stack_store_float3(stack, out_offset, p); + return offset; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_vector_rotate.h b/intern/cycles/kernel/svm/svm_vector_rotate.h index 50045752484..55e1bce0158 100644 --- a/intern/cycles/kernel/svm/svm_vector_rotate.h +++ b/intern/cycles/kernel/svm/svm_vector_rotate.h @@ -18,11 +18,11 @@ CCL_NAMESPACE_BEGIN /* Vector Rotate */ -ccl_device void svm_node_vector_rotate(ShaderData *sd, - float *stack, - uint input_stack_offsets, - uint axis_stack_offsets, - uint result_stack_offset) +ccl_device_noinline void svm_node_vector_rotate(ShaderData *sd, + float *stack, + uint input_stack_offsets, + uint axis_stack_offsets, + uint result_stack_offset) { uint type, vector_stack_offset, rotation_stack_offset, center_stack_offset, axis_stack_offset, angle_stack_offset, invert; diff --git a/intern/cycles/kernel/svm/svm_vector_transform.h b/intern/cycles/kernel/svm/svm_vector_transform.h index 1e95492cf1b..8aedb7e0f54 100644 --- a/intern/cycles/kernel/svm/svm_vector_transform.h +++ b/intern/cycles/kernel/svm/svm_vector_transform.h @@ -18,10 +18,10 @@ CCL_NAMESPACE_BEGIN /* Vector Transform */ -ccl_device void svm_node_vector_transform(KernelGlobals *kg, - ShaderData *sd, - float *stack, - uint4 node) +ccl_device_noinline void svm_node_vector_transform(const KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint4 node) { uint itype, ifrom, ito; uint vector_in, vector_out; diff --git a/intern/cycles/kernel/svm/svm_vertex_color.h b/intern/cycles/kernel/svm/svm_vertex_color.h index 0aa45835522..986ea244f3a 100644 --- a/intern/cycles/kernel/svm/svm_vertex_color.h +++ b/intern/cycles/kernel/svm/svm_vertex_color.h @@ -16,12 +16,12 @@ CCL_NAMESPACE_BEGIN -ccl_device void svm_node_vertex_color(KernelGlobals *kg, - ShaderData *sd, - float *stack, - uint layer_id, - uint color_offset, - uint alpha_offset) +ccl_device_noinline void svm_node_vertex_color(const KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint layer_id, + uint color_offset, + uint alpha_offset) { AttributeDescriptor descriptor = find_attribute(kg, sd, layer_id); if (descriptor.offset != ATTR_STD_NOT_FOUND) { @@ -35,18 +35,12 @@ ccl_device void svm_node_vertex_color(KernelGlobals *kg, } } -#ifndef __KERNEL_CUDA__ -ccl_device -#else -ccl_device_noinline -#endif - void - svm_node_vertex_color_bump_dx(KernelGlobals *kg, - ShaderData *sd, - float *stack, - uint layer_id, - uint color_offset, - uint alpha_offset) +ccl_device_noinline void svm_node_vertex_color_bump_dx(const KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint layer_id, + uint color_offset, + uint alpha_offset) { AttributeDescriptor descriptor = find_attribute(kg, sd, layer_id); if (descriptor.offset != ATTR_STD_NOT_FOUND) { @@ -62,18 +56,12 @@ ccl_device_noinline } } -#ifndef __KERNEL_CUDA__ -ccl_device -#else -ccl_device_noinline -#endif - void - svm_node_vertex_color_bump_dy(KernelGlobals *kg, - ShaderData *sd, - float *stack, - uint layer_id, - uint color_offset, - uint alpha_offset) +ccl_device_noinline void svm_node_vertex_color_bump_dy(const KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint layer_id, + uint color_offset, + uint alpha_offset) { AttributeDescriptor descriptor = find_attribute(kg, sd, layer_id); if (descriptor.offset != ATTR_STD_NOT_FOUND) { diff --git a/intern/cycles/kernel/svm/svm_voronoi.h b/intern/cycles/kernel/svm/svm_voronoi.h index d0e7db35fab..b1d2eff7f37 100644 --- a/intern/cycles/kernel/svm/svm_voronoi.h +++ b/intern/cycles/kernel/svm/svm_voronoi.h @@ -902,16 +902,17 @@ ccl_device void voronoi_n_sphere_radius_4d(float4 coord, float randomness, float *outRadius = distance(closestPointToClosestPoint, closestPoint) / 2.0f; } -ccl_device void svm_node_tex_voronoi(KernelGlobals *kg, - ShaderData *sd, - float *stack, - uint dimensions, - uint feature, - uint metric, - int *offset) +template<uint node_feature_mask> +ccl_device_noinline int svm_node_tex_voronoi(const KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint dimensions, + uint feature, + uint metric, + int offset) { - uint4 stack_offsets = read_node(kg, offset); - uint4 defaults = read_node(kg, offset); + uint4 stack_offsets = read_node(kg, &offset); + uint4 defaults = read_node(kg, &offset); uint coord_stack_offset, w_stack_offset, scale_stack_offset, smoothness_stack_offset; uint exponent_stack_offset, randomness_stack_offset, distance_out_stack_offset, @@ -997,18 +998,18 @@ ccl_device void svm_node_tex_voronoi(KernelGlobals *kg, &color_out, &position_out_2d); break; -#if NODES_FEATURE(NODE_FEATURE_VORONOI_EXTRA) case NODE_VORONOI_SMOOTH_F1: - voronoi_smooth_f1_2d(coord_2d, - smoothness, - exponent, - randomness, - voronoi_metric, - &distance_out, - &color_out, - &position_out_2d); + if (KERNEL_NODES_FEATURE(VORONOI_EXTRA)) { + voronoi_smooth_f1_2d(coord_2d, + smoothness, + exponent, + randomness, + voronoi_metric, + &distance_out, + &color_out, + &position_out_2d); + } break; -#endif case NODE_VORONOI_F2: voronoi_f2_2d(coord_2d, exponent, @@ -1042,18 +1043,18 @@ ccl_device void svm_node_tex_voronoi(KernelGlobals *kg, &color_out, &position_out); break; -#if NODES_FEATURE(NODE_FEATURE_VORONOI_EXTRA) case NODE_VORONOI_SMOOTH_F1: - voronoi_smooth_f1_3d(coord, - smoothness, - exponent, - randomness, - voronoi_metric, - &distance_out, - &color_out, - &position_out); + if (KERNEL_NODES_FEATURE(VORONOI_EXTRA)) { + voronoi_smooth_f1_3d(coord, + smoothness, + exponent, + randomness, + voronoi_metric, + &distance_out, + &color_out, + &position_out); + } break; -#endif case NODE_VORONOI_F2: voronoi_f2_3d(coord, exponent, @@ -1076,54 +1077,54 @@ ccl_device void svm_node_tex_voronoi(KernelGlobals *kg, break; } -#if NODES_FEATURE(NODE_FEATURE_VORONOI_EXTRA) case 4: { - float4 coord_4d = make_float4(coord.x, coord.y, coord.z, w); - float4 position_out_4d; - switch (voronoi_feature) { - case NODE_VORONOI_F1: - voronoi_f1_4d(coord_4d, - exponent, - randomness, - voronoi_metric, - &distance_out, - &color_out, - &position_out_4d); - break; - case NODE_VORONOI_SMOOTH_F1: - voronoi_smooth_f1_4d(coord_4d, - smoothness, - exponent, - randomness, - voronoi_metric, - &distance_out, - &color_out, - &position_out_4d); - break; - case NODE_VORONOI_F2: - voronoi_f2_4d(coord_4d, - exponent, - randomness, - voronoi_metric, - &distance_out, - &color_out, - &position_out_4d); - break; - case NODE_VORONOI_DISTANCE_TO_EDGE: - voronoi_distance_to_edge_4d(coord_4d, randomness, &distance_out); - break; - case NODE_VORONOI_N_SPHERE_RADIUS: - voronoi_n_sphere_radius_4d(coord_4d, randomness, &radius_out); - break; - default: - kernel_assert(0); + if (KERNEL_NODES_FEATURE(VORONOI_EXTRA)) { + float4 coord_4d = make_float4(coord.x, coord.y, coord.z, w); + float4 position_out_4d; + switch (voronoi_feature) { + case NODE_VORONOI_F1: + voronoi_f1_4d(coord_4d, + exponent, + randomness, + voronoi_metric, + &distance_out, + &color_out, + &position_out_4d); + break; + case NODE_VORONOI_SMOOTH_F1: + voronoi_smooth_f1_4d(coord_4d, + smoothness, + exponent, + randomness, + voronoi_metric, + &distance_out, + &color_out, + &position_out_4d); + break; + case NODE_VORONOI_F2: + voronoi_f2_4d(coord_4d, + exponent, + randomness, + voronoi_metric, + &distance_out, + &color_out, + &position_out_4d); + break; + case NODE_VORONOI_DISTANCE_TO_EDGE: + voronoi_distance_to_edge_4d(coord_4d, randomness, &distance_out); + break; + case NODE_VORONOI_N_SPHERE_RADIUS: + voronoi_n_sphere_radius_4d(coord_4d, randomness, &radius_out); + break; + default: + kernel_assert(0); + } + position_out_4d = safe_divide_float4_float(position_out_4d, scale); + position_out = make_float3(position_out_4d.x, position_out_4d.y, position_out_4d.z); + w_out = position_out_4d.w; } - position_out_4d = safe_divide_float4_float(position_out_4d, scale); - position_out = make_float3(position_out_4d.x, position_out_4d.y, position_out_4d.z); - w_out = position_out_4d.w; break; } -#endif default: kernel_assert(0); } @@ -1138,6 +1139,7 @@ ccl_device void svm_node_tex_voronoi(KernelGlobals *kg, stack_store_float(stack, w_out_stack_offset, w_out); if (stack_valid(radius_out_stack_offset)) stack_store_float(stack, radius_out_stack_offset, radius_out); + return offset; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_voxel.h b/intern/cycles/kernel/svm/svm_voxel.h index 4bc14f82382..78b75405356 100644 --- a/intern/cycles/kernel/svm/svm_voxel.h +++ b/intern/cycles/kernel/svm/svm_voxel.h @@ -19,8 +19,8 @@ CCL_NAMESPACE_BEGIN /* TODO(sergey): Think of making it more generic volume-type attribute * sampler. */ -ccl_device void svm_node_tex_voxel( - KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) +ccl_device_noinline int svm_node_tex_voxel( + const KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int offset) { uint co_offset, density_out_offset, color_out_offset, space; svm_unpack_node_uchar4(node.z, &co_offset, &density_out_offset, &color_out_offset, &space); @@ -33,9 +33,9 @@ ccl_device void svm_node_tex_voxel( else { kernel_assert(space == NODE_TEX_VOXEL_SPACE_WORLD); Transform tfm; - tfm.x = read_node_float(kg, offset); - tfm.y = read_node_float(kg, offset); - tfm.z = read_node_float(kg, offset); + tfm.x = read_node_float(kg, &offset); + tfm.y = read_node_float(kg, &offset); + tfm.z = read_node_float(kg, &offset); co = transform_point(&tfm, co); } @@ -47,6 +47,7 @@ ccl_device void svm_node_tex_voxel( stack_store_float(stack, density_out_offset, r.w); if (stack_valid(color_out_offset)) stack_store_float3(stack, color_out_offset, make_float3(r.x, r.y, r.z)); + return offset; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_wave.h b/intern/cycles/kernel/svm/svm_wave.h index c4763475b47..00f980c16df 100644 --- a/intern/cycles/kernel/svm/svm_wave.h +++ b/intern/cycles/kernel/svm/svm_wave.h @@ -82,11 +82,11 @@ ccl_device_noinline_cpu float svm_wave(NodeWaveType type, } } -ccl_device void svm_node_tex_wave( - KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) +ccl_device_noinline int svm_node_tex_wave( + const KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int offset) { - uint4 node2 = read_node(kg, offset); - uint4 node3 = read_node(kg, offset); + uint4 node2 = read_node(kg, &offset); + uint4 node3 = read_node(kg, &offset); /* RNA properties */ uint type_offset, bands_dir_offset, rings_dir_offset, profile_offset; @@ -125,6 +125,7 @@ ccl_device void svm_node_tex_wave( stack_store_float(stack, fac_offset, f); if (stack_valid(color_offset)) stack_store_float3(stack, color_offset, make_float3(f, f, f)); + return offset; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_wavelength.h b/intern/cycles/kernel/svm/svm_wavelength.h index d6144802559..fba8aa63d31 100644 --- a/intern/cycles/kernel/svm/svm_wavelength.h +++ b/intern/cycles/kernel/svm/svm_wavelength.h @@ -69,8 +69,8 @@ ccl_static_constant float cie_colour_match[81][3] = { {0.0002f, 0.0001f, 0.0000f}, {0.0002f, 0.0001f, 0.0000f}, {0.0001f, 0.0000f, 0.0000f}, {0.0001f, 0.0000f, 0.0000f}, {0.0001f, 0.0000f, 0.0000f}, {0.0000f, 0.0000f, 0.0000f}}; -ccl_device void svm_node_wavelength( - KernelGlobals *kg, ShaderData *sd, float *stack, uint wavelength, uint color_out) +ccl_device_noinline void svm_node_wavelength( + const KernelGlobals *kg, ShaderData *sd, float *stack, uint wavelength, uint color_out) { float lambda_nm = stack_load_float(stack, wavelength); float ii = (lambda_nm - 380.0f) * (1.0f / 5.0f); // scaled 0..80 diff --git a/intern/cycles/kernel/svm/svm_white_noise.h b/intern/cycles/kernel/svm/svm_white_noise.h index b30d85acaec..0306d2e7b9c 100644 --- a/intern/cycles/kernel/svm/svm_white_noise.h +++ b/intern/cycles/kernel/svm/svm_white_noise.h @@ -16,13 +16,12 @@ CCL_NAMESPACE_BEGIN -ccl_device void svm_node_tex_white_noise(KernelGlobals *kg, - ShaderData *sd, - float *stack, - uint dimensions, - uint inputs_stack_offsets, - uint ouptuts_stack_offsets, - int *offset) +ccl_device_noinline void svm_node_tex_white_noise(const KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint dimensions, + uint inputs_stack_offsets, + uint ouptuts_stack_offsets) { uint vector_stack_offset, w_stack_offset, value_stack_offset, color_stack_offset; svm_unpack_node_uchar2(inputs_stack_offsets, &vector_stack_offset, &w_stack_offset); diff --git a/intern/cycles/kernel/svm/svm_wireframe.h b/intern/cycles/kernel/svm/svm_wireframe.h index 49158bd86d5..7ec913789d2 100644 --- a/intern/cycles/kernel/svm/svm_wireframe.h +++ b/intern/cycles/kernel/svm/svm_wireframe.h @@ -35,7 +35,7 @@ CCL_NAMESPACE_BEGIN /* Wireframe Node */ ccl_device_inline float wireframe( - KernelGlobals *kg, ShaderData *sd, float size, int pixel_size, float3 *P) + const KernelGlobals *kg, ShaderData *sd, float size, int pixel_size, float3 *P) { #ifdef __HAIR__ if (sd->prim != PRIM_NONE && sd->type & PRIMITIVE_ALL_TRIANGLE) @@ -88,7 +88,10 @@ ccl_device_inline float wireframe( return 0.0f; } -ccl_device void svm_node_wireframe(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) +ccl_device_noinline void svm_node_wireframe(const KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint4 node) { uint in_size = node.y; uint out_fac = node.z; @@ -100,18 +103,7 @@ ccl_device void svm_node_wireframe(KernelGlobals *kg, ShaderData *sd, float *sta int pixel_size = (int)use_pixel_size; /* Calculate wireframe */ -#ifdef __SPLIT_KERNEL__ - /* TODO(sergey): This is because sd is actually a global space, - * which makes it difficult to re-use same wireframe() function. - * - * With OpenCL 2.0 it's possible to avoid this change, but for until - * then we'll be living with such an exception. - */ - float3 P = sd->P; - float f = wireframe(kg, sd, size, pixel_size, &P); -#else float f = wireframe(kg, sd, size, pixel_size, &sd->P); -#endif /* TODO(sergey): Think of faster way to calculate derivatives. */ if (bump_offset == NODE_BUMP_OFFSET_DX) { |