Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCampbell Barton <ideasman42@gmail.com>2019-04-17 07:17:24 +0300
committerCampbell Barton <ideasman42@gmail.com>2019-04-17 07:21:24 +0300
commite12c08e8d170b7ca40f204a5b0423c23a9fbc2c1 (patch)
tree8cf3453d12edb177a218ef8009357518ec6cab6a /intern/cycles/kernel
parentb3dabc200a4b0399ec6b81f2ff2730d07b44fcaa (diff)
ClangFormat: apply to source, most of intern
Apply clang format as proposed in T53211. For details on usage and instructions for migrating branches without conflicts, see: https://wiki.blender.org/wiki/Tools/ClangFormat
Diffstat (limited to 'intern/cycles/kernel')
-rw-r--r--intern/cycles/kernel/CMakeLists.txt898
-rw-r--r--intern/cycles/kernel/bvh/bvh.h577
-rw-r--r--intern/cycles/kernel/bvh/bvh_embree.h173
-rw-r--r--intern/cycles/kernel/bvh/bvh_local.h358
-rw-r--r--intern/cycles/kernel/bvh/bvh_nodes.h899
-rw-r--r--intern/cycles/kernel/bvh/bvh_shadow_all.h563
-rw-r--r--intern/cycles/kernel/bvh/bvh_traversal.h632
-rw-r--r--intern/cycles/kernel/bvh/bvh_types.h46
-rw-r--r--intern/cycles/kernel/bvh/bvh_volume.h438
-rw-r--r--intern/cycles/kernel/bvh/bvh_volume_all.h557
-rw-r--r--intern/cycles/kernel/bvh/obvh_local.h641
-rw-r--r--intern/cycles/kernel/bvh/obvh_nodes.h817
-rw-r--r--intern/cycles/kernel/bvh/obvh_shadow_all.h1060
-rw-r--r--intern/cycles/kernel/bvh/obvh_traversal.h1013
-rw-r--r--intern/cycles/kernel/bvh/obvh_volume.h760
-rw-r--r--intern/cycles/kernel/bvh/obvh_volume_all.h866
-rw-r--r--intern/cycles/kernel/bvh/qbvh_local.h427
-rw-r--r--intern/cycles/kernel/bvh/qbvh_nodes.h700
-rw-r--r--intern/cycles/kernel/bvh/qbvh_shadow_all.h685
-rw-r--r--intern/cycles/kernel/bvh/qbvh_traversal.h731
-rw-r--r--intern/cycles/kernel/bvh/qbvh_volume.h538
-rw-r--r--intern/cycles/kernel/bvh/qbvh_volume_all.h650
-rw-r--r--intern/cycles/kernel/closure/alloc.h85
-rw-r--r--intern/cycles/kernel/closure/bsdf.h1015
-rw-r--r--intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h353
-rw-r--r--intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h168
-rw-r--r--intern/cycles/kernel/closure/bsdf_diffuse.h146
-rw-r--r--intern/cycles/kernel/closure/bsdf_diffuse_ramp.h101
-rw-r--r--intern/cycles/kernel/closure/bsdf_hair.h365
-rw-r--r--intern/cycles/kernel/closure/bsdf_hair_principled.h679
-rw-r--r--intern/cycles/kernel/closure/bsdf_microfacet.h1847
-rw-r--r--intern/cycles/kernel/closure/bsdf_microfacet_multi.h972
-rw-r--r--intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h398
-rw-r--r--intern/cycles/kernel/closure/bsdf_oren_nayar.h124
-rw-r--r--intern/cycles/kernel/closure/bsdf_phong_ramp.h171
-rw-r--r--intern/cycles/kernel/closure/bsdf_principled_diffuse.h130
-rw-r--r--intern/cycles/kernel/closure/bsdf_principled_sheen.h116
-rw-r--r--intern/cycles/kernel/closure/bsdf_reflection.h63
-rw-r--r--intern/cycles/kernel/closure/bsdf_refraction.h80
-rw-r--r--intern/cycles/kernel/closure/bsdf_toon.h242
-rw-r--r--intern/cycles/kernel/closure/bsdf_transparent.h116
-rw-r--r--intern/cycles/kernel/closure/bsdf_util.h183
-rw-r--r--intern/cycles/kernel/closure/bssrdf.h637
-rw-r--r--intern/cycles/kernel/closure/emissive.h42
-rw-r--r--intern/cycles/kernel/closure/volume.h209
-rw-r--r--intern/cycles/kernel/filter/filter.h6
-rw-r--r--intern/cycles/kernel/filter/filter_defines.h75
-rw-r--r--intern/cycles/kernel/filter/filter_features.h168
-rw-r--r--intern/cycles/kernel/filter/filter_features_sse.h129
-rw-r--r--intern/cycles/kernel/filter/filter_nlm_cpu.h334
-rw-r--r--intern/cycles/kernel/filter/filter_nlm_gpu.h365
-rw-r--r--intern/cycles/kernel/filter/filter_prefilter.h325
-rw-r--r--intern/cycles/kernel/filter/filter_reconstruction.h142
-rw-r--r--intern/cycles/kernel/filter/filter_transform.h177
-rw-r--r--intern/cycles/kernel/filter/filter_transform_gpu.h186
-rw-r--r--intern/cycles/kernel/filter/filter_transform_sse.h192
-rw-r--r--intern/cycles/kernel/geom/geom_attribute.h110
-rw-r--r--intern/cycles/kernel/geom/geom_curve.h441
-rw-r--r--intern/cycles/kernel/geom/geom_curve_intersect.h1770
-rw-r--r--intern/cycles/kernel/geom/geom_motion_curve.h306
-rw-r--r--intern/cycles/kernel/geom/geom_motion_triangle.h228
-rw-r--r--intern/cycles/kernel/geom/geom_motion_triangle_intersect.h418
-rw-r--r--intern/cycles/kernel/geom/geom_motion_triangle_shader.h151
-rw-r--r--intern/cycles/kernel/geom/geom_object.h503
-rw-r--r--intern/cycles/kernel/geom/geom_patch.h554
-rw-r--r--intern/cycles/kernel/geom/geom_primitive.h484
-rw-r--r--intern/cycles/kernel/geom/geom_subd_triangle.h765
-rw-r--r--intern/cycles/kernel/geom/geom_triangle.h353
-rw-r--r--intern/cycles/kernel/geom/geom_triangle_intersect.h1229
-rw-r--r--intern/cycles/kernel/geom/geom_volume.h56
-rw-r--r--intern/cycles/kernel/kernel.h11
-rw-r--r--intern/cycles/kernel/kernel_accumulate.h1093
-rw-r--r--intern/cycles/kernel/kernel_bake.h944
-rw-r--r--intern/cycles/kernel/kernel_camera.h696
-rw-r--r--intern/cycles/kernel/kernel_color.h10
-rw-r--r--intern/cycles/kernel/kernel_compat_cpu.h94
-rw-r--r--intern/cycles/kernel/kernel_compat_cuda.h87
-rw-r--r--intern/cycles/kernel/kernel_compat_opencl.h11
-rw-r--r--intern/cycles/kernel/kernel_differential.h132
-rw-r--r--intern/cycles/kernel/kernel_emission.h455
-rw-r--r--intern/cycles/kernel/kernel_film.h92
-rw-r--r--intern/cycles/kernel/kernel_globals.h165
-rw-r--r--intern/cycles/kernel/kernel_id_passes.h123
-rw-r--r--intern/cycles/kernel/kernel_jitter.h258
-rw-r--r--intern/cycles/kernel/kernel_light.h1946
-rw-r--r--intern/cycles/kernel/kernel_math.h2
-rw-r--r--intern/cycles/kernel/kernel_montecarlo.h349
-rw-r--r--intern/cycles/kernel/kernel_passes.h647
-rw-r--r--intern/cycles/kernel/kernel_path.h1210
-rw-r--r--intern/cycles/kernel/kernel_path_branched.h1032
-rw-r--r--intern/cycles/kernel/kernel_path_common.h31
-rw-r--r--intern/cycles/kernel/kernel_path_state.h367
-rw-r--r--intern/cycles/kernel/kernel_path_subsurface.h213
-rw-r--r--intern/cycles/kernel/kernel_path_surface.h603
-rw-r--r--intern/cycles/kernel/kernel_path_volume.h544
-rw-r--r--intern/cycles/kernel/kernel_profiling.h14
-rw-r--r--intern/cycles/kernel/kernel_projection.h272
-rw-r--r--intern/cycles/kernel/kernel_queues.h145
-rw-r--r--intern/cycles/kernel/kernel_random.h294
-rw-r--r--intern/cycles/kernel/kernel_shader.h1749
-rw-r--r--intern/cycles/kernel/kernel_shadow.h637
-rw-r--r--intern/cycles/kernel/kernel_subsurface.h822
-rw-r--r--intern/cycles/kernel/kernel_types.h2105
-rw-r--r--intern/cycles/kernel/kernel_volume.h2244
-rw-r--r--intern/cycles/kernel/kernel_work_stealing.h59
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter.cpp2
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_avx.cpp2
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_avx2.cpp2
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_cpu.h42
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h229
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_sse2.cpp2
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_sse3.cpp2
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_sse41.cpp2
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel.cpp34
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_avx.cpp2
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp2
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu.h53
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h965
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h182
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_split.cpp2
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp2
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp2
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp2
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp2
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp2
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp2
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp2
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp2
-rw-r--r--intern/cycles/kernel/kernels/cuda/kernel_config.h14
-rw-r--r--intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h237
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h425
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_split_function.h59
-rw-r--r--intern/cycles/kernel/osl/CMakeLists.txt28
-rw-r--r--intern/cycles/kernel/osl/background.cpp37
-rw-r--r--intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp43
-rw-r--r--intern/cycles/kernel/osl/bsdf_phong_ramp.cpp45
-rw-r--r--intern/cycles/kernel/osl/emissive.cpp18
-rw-r--r--intern/cycles/kernel/osl/osl_bssrdf.cpp133
-rw-r--r--intern/cycles/kernel/osl/osl_closures.cpp1300
-rw-r--r--intern/cycles/kernel/osl/osl_closures.h86
-rw-r--r--intern/cycles/kernel/osl/osl_globals.h106
-rw-r--r--intern/cycles/kernel/osl/osl_services.cpp2064
-rw-r--r--intern/cycles/kernel/osl/osl_services.h388
-rw-r--r--intern/cycles/kernel/osl/osl_shader.cpp580
-rw-r--r--intern/cycles/kernel/osl/osl_shader.h35
-rw-r--r--intern/cycles/kernel/shaders/CMakeLists.txt212
-rw-r--r--intern/cycles/kernel/shaders/node_absorption_volume.osl10
-rw-r--r--intern/cycles/kernel/shaders/node_add_closure.osl10
-rw-r--r--intern/cycles/kernel/shaders/node_ambient_occlusion.osl36
-rw-r--r--intern/cycles/kernel/shaders/node_anisotropic_bsdf.osl70
-rw-r--r--intern/cycles/kernel/shaders/node_attribute.osl38
-rw-r--r--intern/cycles/kernel/shaders/node_background.osl10
-rw-r--r--intern/cycles/kernel/shaders/node_bevel.osl18
-rw-r--r--intern/cycles/kernel/shaders/node_blackbody.osl19
-rw-r--r--intern/cycles/kernel/shaders/node_brick_texture.osl165
-rw-r--r--intern/cycles/kernel/shaders/node_brightness.osl20
-rw-r--r--intern/cycles/kernel/shaders/node_bump.osl74
-rw-r--r--intern/cycles/kernel/shaders/node_camera.osl16
-rw-r--r--intern/cycles/kernel/shaders/node_checker_texture.osl71
-rw-r--r--intern/cycles/kernel/shaders/node_color.h197
-rw-r--r--intern/cycles/kernel/shaders/node_combine_hsv.osl9
-rw-r--r--intern/cycles/kernel/shaders/node_combine_rgb.osl9
-rw-r--r--intern/cycles/kernel/shaders/node_combine_xyz.osl9
-rw-r--r--intern/cycles/kernel/shaders/node_convert_from_color.osl26
-rw-r--r--intern/cycles/kernel/shaders/node_convert_from_float.osl26
-rw-r--r--intern/cycles/kernel/shaders/node_convert_from_int.osl28
-rw-r--r--intern/cycles/kernel/shaders/node_convert_from_normal.osl26
-rw-r--r--intern/cycles/kernel/shaders/node_convert_from_point.osl26
-rw-r--r--intern/cycles/kernel/shaders/node_convert_from_string.osl16
-rw-r--r--intern/cycles/kernel/shaders/node_convert_from_vector.osl26
-rw-r--r--intern/cycles/kernel/shaders/node_diffuse_bsdf.osl18
-rw-r--r--intern/cycles/kernel/shaders/node_displacement.osl30
-rw-r--r--intern/cycles/kernel/shaders/node_emission.osl8
-rw-r--r--intern/cycles/kernel/shaders/node_environment_texture.osl82
-rw-r--r--intern/cycles/kernel/shaders/node_fresnel.h44
-rw-r--r--intern/cycles/kernel/shaders/node_fresnel.osl14
-rw-r--r--intern/cycles/kernel/shaders/node_gamma.osl7
-rw-r--r--intern/cycles/kernel/shaders/node_geometry.osl88
-rw-r--r--intern/cycles/kernel/shaders/node_glass_bsdf.osl44
-rw-r--r--intern/cycles/kernel/shaders/node_glossy_bsdf.osl35
-rw-r--r--intern/cycles/kernel/shaders/node_gradient_texture.osl89
-rw-r--r--intern/cycles/kernel/shaders/node_hair_bsdf.osl65
-rw-r--r--intern/cycles/kernel/shaders/node_hair_info.osl22
-rw-r--r--intern/cycles/kernel/shaders/node_holdout.osl5
-rw-r--r--intern/cycles/kernel/shaders/node_hsv.osl36
-rw-r--r--intern/cycles/kernel/shaders/node_ies_light.osl29
-rw-r--r--intern/cycles/kernel/shaders/node_image_texture.osl378
-rw-r--r--intern/cycles/kernel/shaders/node_invert.osl10
-rw-r--r--intern/cycles/kernel/shaders/node_layer_weight.osl37
-rw-r--r--intern/cycles/kernel/shaders/node_light_falloff.osl40
-rw-r--r--intern/cycles/kernel/shaders/node_light_path.osl78
-rw-r--r--intern/cycles/kernel/shaders/node_magic_texture.osl162
-rw-r--r--intern/cycles/kernel/shaders/node_mapping.osl23
-rw-r--r--intern/cycles/kernel/shaders/node_math.osl162
-rw-r--r--intern/cycles/kernel/shaders/node_mix.osl421
-rw-r--r--intern/cycles/kernel/shaders/node_mix_closure.osl14
-rw-r--r--intern/cycles/kernel/shaders/node_musgrave_texture.osl302
-rw-r--r--intern/cycles/kernel/shaders/node_noise_texture.osl63
-rw-r--r--intern/cycles/kernel/shaders/node_normal.osl14
-rw-r--r--intern/cycles/kernel/shaders/node_normal_map.osl128
-rw-r--r--intern/cycles/kernel/shaders/node_object_info.osl18
-rw-r--r--intern/cycles/kernel/shaders/node_output_displacement.osl3
-rw-r--r--intern/cycles/kernel/shaders/node_output_surface.osl3
-rw-r--r--intern/cycles/kernel/shaders/node_output_volume.osl3
-rw-r--r--intern/cycles/kernel/shaders/node_particle_info.osl34
-rw-r--r--intern/cycles/kernel/shaders/node_principled_bsdf.osl245
-rw-r--r--intern/cycles/kernel/shaders/node_principled_hair_bsdf.osl128
-rw-r--r--intern/cycles/kernel/shaders/node_principled_volume.osl126
-rw-r--r--intern/cycles/kernel/shaders/node_ramp_util.h108
-rw-r--r--intern/cycles/kernel/shaders/node_refraction_bsdf.osl32
-rw-r--r--intern/cycles/kernel/shaders/node_rgb_curves.osl30
-rw-r--r--intern/cycles/kernel/shaders/node_rgb_ramp.osl18
-rw-r--r--intern/cycles/kernel/shaders/node_rgb_to_bw.osl7
-rw-r--r--intern/cycles/kernel/shaders/node_scatter_volume.osl12
-rw-r--r--intern/cycles/kernel/shaders/node_separate_hsv.osl19
-rw-r--r--intern/cycles/kernel/shaders/node_separate_rgb.osl15
-rw-r--r--intern/cycles/kernel/shaders/node_separate_xyz.osl15
-rw-r--r--intern/cycles/kernel/shaders/node_set_normal.osl9
-rw-r--r--intern/cycles/kernel/shaders/node_sky_texture.osl157
-rw-r--r--intern/cycles/kernel/shaders/node_subsurface_scattering.osl43
-rw-r--r--intern/cycles/kernel/shaders/node_tangent.osl46
-rw-r--r--intern/cycles/kernel/shaders/node_texture.h210
-rw-r--r--intern/cycles/kernel/shaders/node_texture_coordinate.osl141
-rw-r--r--intern/cycles/kernel/shaders/node_toon_bsdf.osl22
-rw-r--r--intern/cycles/kernel/shaders/node_translucent_bsdf.osl8
-rw-r--r--intern/cycles/kernel/shaders/node_transparent_bsdf.osl8
-rw-r--r--intern/cycles/kernel/shaders/node_uv_map.osl47
-rw-r--r--intern/cycles/kernel/shaders/node_value.osl20
-rw-r--r--intern/cycles/kernel/shaders/node_vector_curves.osl30
-rw-r--r--intern/cycles/kernel/shaders/node_vector_displacement.osl78
-rw-r--r--intern/cycles/kernel/shaders/node_vector_math.osl60
-rw-r--r--intern/cycles/kernel/shaders/node_vector_transform.osl30
-rw-r--r--intern/cycles/kernel/shaders/node_velvet_bsdf.osl14
-rw-r--r--intern/cycles/kernel/shaders/node_voronoi_texture.osl273
-rw-r--r--intern/cycles/kernel/shaders/node_voxel_texture.osl52
-rw-r--r--intern/cycles/kernel/shaders/node_wave_texture.osl72
-rw-r--r--intern/cycles/kernel/shaders/node_wavelength.osl7
-rw-r--r--intern/cycles/kernel/shaders/node_wireframe.osl39
-rw-r--r--intern/cycles/kernel/shaders/oslutil.h100
-rw-r--r--intern/cycles/kernel/shaders/stdosl.h1160
-rw-r--r--intern/cycles/kernel/split/kernel_branched.h350
-rw-r--r--intern/cycles/kernel/split/kernel_buffer_update.h227
-rw-r--r--intern/cycles/kernel/split/kernel_data_init.h122
-rw-r--r--intern/cycles/kernel/split/kernel_direct_lighting.h195
-rw-r--r--intern/cycles/kernel/split/kernel_do_volume.h325
-rw-r--r--intern/cycles/kernel/split/kernel_enqueue_inactive.h36
-rw-r--r--intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h186
-rw-r--r--intern/cycles/kernel/split/kernel_indirect_background.h72
-rw-r--r--intern/cycles/kernel/split/kernel_indirect_subsurface.h79
-rw-r--r--intern/cycles/kernel/split/kernel_lamp_emission.h62
-rw-r--r--intern/cycles/kernel/split/kernel_next_iteration_setup.h350
-rw-r--r--intern/cycles/kernel/split/kernel_path_init.h88
-rw-r--r--intern/cycles/kernel/split/kernel_queue_enqueue.h85
-rw-r--r--intern/cycles/kernel/split/kernel_scene_intersect.h83
-rw-r--r--intern/cycles/kernel/split/kernel_shader_eval.h57
-rw-r--r--intern/cycles/kernel/split/kernel_shader_setup.h78
-rw-r--r--intern/cycles/kernel/split/kernel_shader_sort.h134
-rw-r--r--intern/cycles/kernel/split/kernel_shadow_blocked_ao.h49
-rw-r--r--intern/cycles/kernel/split/kernel_shadow_blocked_dl.h135
-rw-r--r--intern/cycles/kernel/split/kernel_split_common.h75
-rw-r--r--intern/cycles/kernel/split/kernel_split_data.h39
-rw-r--r--intern/cycles/kernel/split/kernel_split_data_types.h170
-rw-r--r--intern/cycles/kernel/split/kernel_subsurface_scatter.h475
-rw-r--r--intern/cycles/kernel/svm/svm.h624
-rw-r--r--intern/cycles/kernel/svm/svm_ao.h151
-rw-r--r--intern/cycles/kernel/svm/svm_attribute.h248
-rw-r--r--intern/cycles/kernel/svm/svm_bevel.h379
-rw-r--r--intern/cycles/kernel/svm/svm_blackbody.h11
-rw-r--r--intern/cycles/kernel/svm/svm_brick.h196
-rw-r--r--intern/cycles/kernel/svm/svm_brightness.h19
-rw-r--r--intern/cycles/kernel/svm/svm_bump.h58
-rw-r--r--intern/cycles/kernel/svm/svm_camera.h33
-rw-r--r--intern/cycles/kernel/svm/svm_checker.h42
-rw-r--r--intern/cycles/kernel/svm/svm_closure.h2147
-rw-r--r--intern/cycles/kernel/svm/svm_color_util.h366
-rw-r--r--intern/cycles/kernel/svm/svm_convert.h95
-rw-r--r--intern/cycles/kernel/svm/svm_displace.h225
-rw-r--r--intern/cycles/kernel/svm/svm_fresnel.h66
-rw-r--r--intern/cycles/kernel/svm/svm_gamma.h13
-rw-r--r--intern/cycles/kernel/svm/svm_geometry.h325
-rw-r--r--intern/cycles/kernel/svm/svm_gradient.h84
-rw-r--r--intern/cycles/kernel/svm/svm_hsv.h55
-rw-r--r--intern/cycles/kernel/svm/svm_ies.h149
-rw-r--r--intern/cycles/kernel/svm/svm_image.h324
-rw-r--r--intern/cycles/kernel/svm/svm_invert.h19
-rw-r--r--intern/cycles/kernel/svm/svm_light_path.h116
-rw-r--r--intern/cycles/kernel/svm/svm_magic.h159
-rw-r--r--intern/cycles/kernel/svm/svm_mapping.h30
-rw-r--r--intern/cycles/kernel/svm/svm_math.h48
-rw-r--r--intern/cycles/kernel/svm/svm_math_util.h269
-rw-r--r--intern/cycles/kernel/svm/svm_mix.h22
-rw-r--r--intern/cycles/kernel/svm/svm_musgrave.h319
-rw-r--r--intern/cycles/kernel/svm/svm_noise.h330
-rw-r--r--intern/cycles/kernel/svm/svm_noisetex.h55
-rw-r--r--intern/cycles/kernel/svm/svm_normal.h32
-rw-r--r--intern/cycles/kernel/svm/svm_ramp.h125
-rw-r--r--intern/cycles/kernel/svm/svm_ramp_util.h126
-rw-r--r--intern/cycles/kernel/svm/svm_sepcomb_hsv.h56
-rw-r--r--intern/cycles/kernel/svm/svm_sepcomb_vector.h30
-rw-r--r--intern/cycles/kernel/svm/svm_sky.h283
-rw-r--r--intern/cycles/kernel/svm/svm_tex_coord.h709
-rw-r--r--intern/cycles/kernel/svm/svm_texture.h56
-rw-r--r--intern/cycles/kernel/svm/svm_types.h745
-rw-r--r--intern/cycles/kernel/svm/svm_value.h17
-rw-r--r--intern/cycles/kernel/svm/svm_vector_transform.h141
-rw-r--r--intern/cycles/kernel/svm/svm_voronoi.h290
-rw-r--r--intern/cycles/kernel/svm/svm_voxel.h49
-rw-r--r--intern/cycles/kernel/svm/svm_wave.h70
-rw-r--r--intern/cycles/kernel/svm/svm_wavelength.h112
-rw-r--r--intern/cycles/kernel/svm/svm_wireframe.h150
310 files changed, 40081 insertions, 38747 deletions
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 01552dff9bb..8a8fee108ae 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -1,7 +1,7 @@
remove_extra_strict_flags()
set(INC
- ..
+ ..
)
set(INC_SYS
@@ -9,328 +9,328 @@ set(INC_SYS
)
set(SRC_CPU_KERNELS
- kernels/cpu/kernel.cpp
- kernels/cpu/kernel_sse2.cpp
- kernels/cpu/kernel_sse3.cpp
- kernels/cpu/kernel_sse41.cpp
- kernels/cpu/kernel_avx.cpp
- kernels/cpu/kernel_avx2.cpp
- kernels/cpu/kernel_split.cpp
- kernels/cpu/kernel_split_sse2.cpp
- kernels/cpu/kernel_split_sse3.cpp
- kernels/cpu/kernel_split_sse41.cpp
- kernels/cpu/kernel_split_avx.cpp
- kernels/cpu/kernel_split_avx2.cpp
- kernels/cpu/filter.cpp
- kernels/cpu/filter_sse2.cpp
- kernels/cpu/filter_sse3.cpp
- kernels/cpu/filter_sse41.cpp
- kernels/cpu/filter_avx.cpp
- kernels/cpu/filter_avx2.cpp
+ kernels/cpu/kernel.cpp
+ kernels/cpu/kernel_sse2.cpp
+ kernels/cpu/kernel_sse3.cpp
+ kernels/cpu/kernel_sse41.cpp
+ kernels/cpu/kernel_avx.cpp
+ kernels/cpu/kernel_avx2.cpp
+ kernels/cpu/kernel_split.cpp
+ kernels/cpu/kernel_split_sse2.cpp
+ kernels/cpu/kernel_split_sse3.cpp
+ kernels/cpu/kernel_split_sse41.cpp
+ kernels/cpu/kernel_split_avx.cpp
+ kernels/cpu/kernel_split_avx2.cpp
+ kernels/cpu/filter.cpp
+ kernels/cpu/filter_sse2.cpp
+ kernels/cpu/filter_sse3.cpp
+ kernels/cpu/filter_sse41.cpp
+ kernels/cpu/filter_avx.cpp
+ kernels/cpu/filter_avx2.cpp
)
set(SRC_CUDA_KERNELS
- kernels/cuda/kernel.cu
- kernels/cuda/kernel_split.cu
- kernels/cuda/filter.cu
+ kernels/cuda/kernel.cu
+ kernels/cuda/kernel_split.cu
+ kernels/cuda/filter.cu
)
set(SRC_OPENCL_KERNELS
- kernels/opencl/kernel_bake.cl
- kernels/opencl/kernel_base.cl
- kernels/opencl/kernel_displace.cl
- kernels/opencl/kernel_background.cl
- kernels/opencl/kernel_state_buffer_size.cl
- kernels/opencl/kernel_split_bundle.cl
- kernels/opencl/kernel_data_init.cl
- kernels/opencl/kernel_path_init.cl
- kernels/opencl/kernel_queue_enqueue.cl
- kernels/opencl/kernel_scene_intersect.cl
- kernels/opencl/kernel_lamp_emission.cl
- kernels/opencl/kernel_do_volume.cl
- kernels/opencl/kernel_indirect_background.cl
- kernels/opencl/kernel_shader_setup.cl
- kernels/opencl/kernel_shader_sort.cl
- kernels/opencl/kernel_shader_eval.cl
- kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl
- kernels/opencl/kernel_subsurface_scatter.cl
- kernels/opencl/kernel_direct_lighting.cl
- kernels/opencl/kernel_shadow_blocked_ao.cl
- kernels/opencl/kernel_shadow_blocked_dl.cl
- kernels/opencl/kernel_enqueue_inactive.cl
- kernels/opencl/kernel_next_iteration_setup.cl
- kernels/opencl/kernel_indirect_subsurface.cl
- kernels/opencl/kernel_buffer_update.cl
- kernels/opencl/filter.cl
+ kernels/opencl/kernel_bake.cl
+ kernels/opencl/kernel_base.cl
+ kernels/opencl/kernel_displace.cl
+ kernels/opencl/kernel_background.cl
+ kernels/opencl/kernel_state_buffer_size.cl
+ kernels/opencl/kernel_split_bundle.cl
+ kernels/opencl/kernel_data_init.cl
+ kernels/opencl/kernel_path_init.cl
+ kernels/opencl/kernel_queue_enqueue.cl
+ kernels/opencl/kernel_scene_intersect.cl
+ kernels/opencl/kernel_lamp_emission.cl
+ kernels/opencl/kernel_do_volume.cl
+ kernels/opencl/kernel_indirect_background.cl
+ kernels/opencl/kernel_shader_setup.cl
+ kernels/opencl/kernel_shader_sort.cl
+ kernels/opencl/kernel_shader_eval.cl
+ kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl
+ kernels/opencl/kernel_subsurface_scatter.cl
+ kernels/opencl/kernel_direct_lighting.cl
+ kernels/opencl/kernel_shadow_blocked_ao.cl
+ kernels/opencl/kernel_shadow_blocked_dl.cl
+ kernels/opencl/kernel_enqueue_inactive.cl
+ kernels/opencl/kernel_next_iteration_setup.cl
+ kernels/opencl/kernel_indirect_subsurface.cl
+ kernels/opencl/kernel_buffer_update.cl
+ kernels/opencl/filter.cl
)
set(SRC_BVH_HEADERS
- bvh/bvh.h
- bvh/bvh_nodes.h
- bvh/bvh_shadow_all.h
- bvh/bvh_local.h
- bvh/bvh_traversal.h
- bvh/bvh_types.h
- bvh/bvh_volume.h
- bvh/bvh_volume_all.h
- bvh/qbvh_nodes.h
- bvh/qbvh_shadow_all.h
- bvh/qbvh_local.h
- bvh/qbvh_traversal.h
- bvh/qbvh_volume.h
- bvh/qbvh_volume_all.h
- bvh/obvh_nodes.h
- bvh/obvh_shadow_all.h
- bvh/obvh_local.h
- bvh/obvh_traversal.h
- bvh/obvh_volume.h
- bvh/obvh_volume_all.h
- bvh/bvh_embree.h
+ bvh/bvh.h
+ bvh/bvh_nodes.h
+ bvh/bvh_shadow_all.h
+ bvh/bvh_local.h
+ bvh/bvh_traversal.h
+ bvh/bvh_types.h
+ bvh/bvh_volume.h
+ bvh/bvh_volume_all.h
+ bvh/qbvh_nodes.h
+ bvh/qbvh_shadow_all.h
+ bvh/qbvh_local.h
+ bvh/qbvh_traversal.h
+ bvh/qbvh_volume.h
+ bvh/qbvh_volume_all.h
+ bvh/obvh_nodes.h
+ bvh/obvh_shadow_all.h
+ bvh/obvh_local.h
+ bvh/obvh_traversal.h
+ bvh/obvh_volume.h
+ bvh/obvh_volume_all.h
+ bvh/bvh_embree.h
)
set(SRC_HEADERS
- kernel_accumulate.h
- kernel_bake.h
- kernel_camera.h
- kernel_color.h
- kernel_compat_cpu.h
- kernel_compat_cuda.h
- kernel_compat_opencl.h
- kernel_differential.h
- kernel_emission.h
- kernel_film.h
- kernel_globals.h
- kernel_id_passes.h
- kernel_jitter.h
- kernel_light.h
- kernel_math.h
- kernel_montecarlo.h
- kernel_passes.h
- kernel_path.h
- kernel_path_branched.h
- kernel_path_common.h
- kernel_path_state.h
- kernel_path_surface.h
- kernel_path_subsurface.h
- kernel_path_volume.h
- kernel_profiling.h
- kernel_projection.h
- kernel_queues.h
- kernel_random.h
- kernel_shader.h
- kernel_shadow.h
- kernel_subsurface.h
- kernel_textures.h
- kernel_types.h
- kernel_volume.h
- kernel_work_stealing.h
+ kernel_accumulate.h
+ kernel_bake.h
+ kernel_camera.h
+ kernel_color.h
+ kernel_compat_cpu.h
+ kernel_compat_cuda.h
+ kernel_compat_opencl.h
+ kernel_differential.h
+ kernel_emission.h
+ kernel_film.h
+ kernel_globals.h
+ kernel_id_passes.h
+ kernel_jitter.h
+ kernel_light.h
+ kernel_math.h
+ kernel_montecarlo.h
+ kernel_passes.h
+ kernel_path.h
+ kernel_path_branched.h
+ kernel_path_common.h
+ kernel_path_state.h
+ kernel_path_surface.h
+ kernel_path_subsurface.h
+ kernel_path_volume.h
+ kernel_profiling.h
+ kernel_projection.h
+ kernel_queues.h
+ kernel_random.h
+ kernel_shader.h
+ kernel_shadow.h
+ kernel_subsurface.h
+ kernel_textures.h
+ kernel_types.h
+ kernel_volume.h
+ kernel_work_stealing.h
)
set(SRC_KERNELS_CPU_HEADERS
- kernel.h
- kernels/cpu/kernel_cpu.h
- kernels/cpu/kernel_cpu_impl.h
- kernels/cpu/kernel_cpu_image.h
- kernels/cpu/filter_cpu.h
- kernels/cpu/filter_cpu_impl.h
+ kernel.h
+ kernels/cpu/kernel_cpu.h
+ kernels/cpu/kernel_cpu_impl.h
+ kernels/cpu/kernel_cpu_image.h
+ kernels/cpu/filter_cpu.h
+ kernels/cpu/filter_cpu_impl.h
)
set(SRC_KERNELS_CUDA_HEADERS
- kernels/cuda/kernel_config.h
- kernels/cuda/kernel_cuda_image.h
+ kernels/cuda/kernel_config.h
+ kernels/cuda/kernel_cuda_image.h
)
set(SRC_KERNELS_OPENCL_HEADERS
- kernels/opencl/kernel_split_function.h
- kernels/opencl/kernel_opencl_image.h
+ kernels/opencl/kernel_split_function.h
+ kernels/opencl/kernel_opencl_image.h
)
set(SRC_CLOSURE_HEADERS
- closure/alloc.h
- closure/bsdf.h
- closure/bsdf_ashikhmin_velvet.h
- closure/bsdf_diffuse.h
- closure/bsdf_diffuse_ramp.h
- closure/bsdf_microfacet.h
- closure/bsdf_microfacet_multi.h
- closure/bsdf_microfacet_multi_impl.h
- closure/bsdf_oren_nayar.h
- closure/bsdf_phong_ramp.h
- closure/bsdf_reflection.h
- closure/bsdf_refraction.h
- closure/bsdf_toon.h
- closure/bsdf_transparent.h
- closure/bsdf_util.h
- closure/bsdf_ashikhmin_shirley.h
- closure/bsdf_hair.h
- closure/bssrdf.h
- closure/emissive.h
- closure/volume.h
- closure/bsdf_principled_diffuse.h
- closure/bsdf_principled_sheen.h
+ closure/alloc.h
+ closure/bsdf.h
+ closure/bsdf_ashikhmin_velvet.h
+ closure/bsdf_diffuse.h
+ closure/bsdf_diffuse_ramp.h
+ closure/bsdf_microfacet.h
+ closure/bsdf_microfacet_multi.h
+ closure/bsdf_microfacet_multi_impl.h
+ closure/bsdf_oren_nayar.h
+ closure/bsdf_phong_ramp.h
+ closure/bsdf_reflection.h
+ closure/bsdf_refraction.h
+ closure/bsdf_toon.h
+ closure/bsdf_transparent.h
+ closure/bsdf_util.h
+ closure/bsdf_ashikhmin_shirley.h
+ closure/bsdf_hair.h
+ closure/bssrdf.h
+ closure/emissive.h
+ closure/volume.h
+ closure/bsdf_principled_diffuse.h
+ closure/bsdf_principled_sheen.h
closure/bsdf_hair_principled.h
)
set(SRC_SVM_HEADERS
- svm/svm.h
- svm/svm_ao.h
- svm/svm_attribute.h
- svm/svm_bevel.h
- svm/svm_blackbody.h
- svm/svm_bump.h
- svm/svm_camera.h
- svm/svm_closure.h
- svm/svm_convert.h
- svm/svm_checker.h
- svm/svm_color_util.h
- svm/svm_brick.h
- svm/svm_displace.h
- svm/svm_fresnel.h
- svm/svm_wireframe.h
- svm/svm_wavelength.h
- svm/svm_gamma.h
- svm/svm_brightness.h
- svm/svm_geometry.h
- svm/svm_gradient.h
- svm/svm_hsv.h
- svm/svm_ies.h
- svm/svm_image.h
- svm/svm_invert.h
- svm/svm_light_path.h
- svm/svm_magic.h
- svm/svm_mapping.h
- svm/svm_math.h
- svm/svm_math_util.h
- svm/svm_mix.h
- svm/svm_musgrave.h
- svm/svm_noise.h
- svm/svm_noisetex.h
- svm/svm_normal.h
- svm/svm_ramp.h
- svm/svm_ramp_util.h
- svm/svm_sepcomb_hsv.h
- svm/svm_sepcomb_vector.h
- svm/svm_sky.h
- svm/svm_tex_coord.h
- svm/svm_texture.h
- svm/svm_types.h
- svm/svm_value.h
- svm/svm_vector_transform.h
- svm/svm_voronoi.h
- svm/svm_voxel.h
- svm/svm_wave.h
+ svm/svm.h
+ svm/svm_ao.h
+ svm/svm_attribute.h
+ svm/svm_bevel.h
+ svm/svm_blackbody.h
+ svm/svm_bump.h
+ svm/svm_camera.h
+ svm/svm_closure.h
+ svm/svm_convert.h
+ svm/svm_checker.h
+ svm/svm_color_util.h
+ svm/svm_brick.h
+ svm/svm_displace.h
+ svm/svm_fresnel.h
+ svm/svm_wireframe.h
+ svm/svm_wavelength.h
+ svm/svm_gamma.h
+ svm/svm_brightness.h
+ svm/svm_geometry.h
+ svm/svm_gradient.h
+ svm/svm_hsv.h
+ svm/svm_ies.h
+ svm/svm_image.h
+ svm/svm_invert.h
+ svm/svm_light_path.h
+ svm/svm_magic.h
+ svm/svm_mapping.h
+ svm/svm_math.h
+ svm/svm_math_util.h
+ svm/svm_mix.h
+ svm/svm_musgrave.h
+ svm/svm_noise.h
+ svm/svm_noisetex.h
+ svm/svm_normal.h
+ svm/svm_ramp.h
+ svm/svm_ramp_util.h
+ svm/svm_sepcomb_hsv.h
+ svm/svm_sepcomb_vector.h
+ svm/svm_sky.h
+ svm/svm_tex_coord.h
+ svm/svm_texture.h
+ svm/svm_types.h
+ svm/svm_value.h
+ svm/svm_vector_transform.h
+ svm/svm_voronoi.h
+ svm/svm_voxel.h
+ svm/svm_wave.h
)
set(SRC_GEOM_HEADERS
- geom/geom.h
- geom/geom_attribute.h
- geom/geom_curve.h
- geom/geom_curve_intersect.h
- geom/geom_motion_curve.h
- geom/geom_motion_triangle.h
- geom/geom_motion_triangle_intersect.h
- geom/geom_motion_triangle_shader.h
- geom/geom_object.h
- geom/geom_patch.h
- geom/geom_primitive.h
- geom/geom_subd_triangle.h
- geom/geom_triangle.h
- geom/geom_triangle_intersect.h
- geom/geom_volume.h
+ geom/geom.h
+ geom/geom_attribute.h
+ geom/geom_curve.h
+ geom/geom_curve_intersect.h
+ geom/geom_motion_curve.h
+ geom/geom_motion_triangle.h
+ geom/geom_motion_triangle_intersect.h
+ geom/geom_motion_triangle_shader.h
+ geom/geom_object.h
+ geom/geom_patch.h
+ geom/geom_primitive.h
+ geom/geom_subd_triangle.h
+ geom/geom_triangle.h
+ geom/geom_triangle_intersect.h
+ geom/geom_volume.h
)
set(SRC_FILTER_HEADERS
- filter/filter.h
- filter/filter_defines.h
- filter/filter_features.h
- filter/filter_features_sse.h
- filter/filter_kernel.h
- filter/filter_nlm_cpu.h
- filter/filter_nlm_gpu.h
- filter/filter_prefilter.h
- filter/filter_reconstruction.h
- filter/filter_transform.h
- filter/filter_transform_gpu.h
- filter/filter_transform_sse.h
+ filter/filter.h
+ filter/filter_defines.h
+ filter/filter_features.h
+ filter/filter_features_sse.h
+ filter/filter_kernel.h
+ filter/filter_nlm_cpu.h
+ filter/filter_nlm_gpu.h
+ filter/filter_prefilter.h
+ filter/filter_reconstruction.h
+ filter/filter_transform.h
+ filter/filter_transform_gpu.h
+ filter/filter_transform_sse.h
)
set(SRC_UTIL_HEADERS
- ../util/util_atomic.h
- ../util/util_color.h
- ../util/util_defines.h
- ../util/util_half.h
- ../util/util_hash.h
- ../util/util_math.h
- ../util/util_math_fast.h
- ../util/util_math_intersect.h
- ../util/util_math_float2.h
- ../util/util_math_float3.h
- ../util/util_math_float4.h
- ../util/util_math_int2.h
- ../util/util_math_int3.h
- ../util/util_math_int4.h
- ../util/util_math_matrix.h
- ../util/util_projection.h
- ../util/util_rect.h
- ../util/util_static_assert.h
- ../util/util_transform.h
- ../util/util_texture.h
- ../util/util_types.h
- ../util/util_types_float2.h
- ../util/util_types_float2_impl.h
- ../util/util_types_float3.h
- ../util/util_types_float3_impl.h
- ../util/util_types_float4.h
- ../util/util_types_float4_impl.h
- ../util/util_types_float8.h
- ../util/util_types_float8_impl.h
- ../util/util_types_int2.h
- ../util/util_types_int2_impl.h
- ../util/util_types_int3.h
- ../util/util_types_int3_impl.h
- ../util/util_types_int4.h
- ../util/util_types_int4_impl.h
- ../util/util_types_uchar2.h
- ../util/util_types_uchar2_impl.h
- ../util/util_types_uchar3.h
- ../util/util_types_uchar3_impl.h
- ../util/util_types_uchar4.h
- ../util/util_types_uchar4_impl.h
- ../util/util_types_uint2.h
- ../util/util_types_uint2_impl.h
- ../util/util_types_uint3.h
- ../util/util_types_uint3_impl.h
- ../util/util_types_uint4.h
- ../util/util_types_uint4_impl.h
- ../util/util_types_ushort4.h
- ../util/util_types_vector3.h
- ../util/util_types_vector3_impl.h
+ ../util/util_atomic.h
+ ../util/util_color.h
+ ../util/util_defines.h
+ ../util/util_half.h
+ ../util/util_hash.h
+ ../util/util_math.h
+ ../util/util_math_fast.h
+ ../util/util_math_intersect.h
+ ../util/util_math_float2.h
+ ../util/util_math_float3.h
+ ../util/util_math_float4.h
+ ../util/util_math_int2.h
+ ../util/util_math_int3.h
+ ../util/util_math_int4.h
+ ../util/util_math_matrix.h
+ ../util/util_projection.h
+ ../util/util_rect.h
+ ../util/util_static_assert.h
+ ../util/util_transform.h
+ ../util/util_texture.h
+ ../util/util_types.h
+ ../util/util_types_float2.h
+ ../util/util_types_float2_impl.h
+ ../util/util_types_float3.h
+ ../util/util_types_float3_impl.h
+ ../util/util_types_float4.h
+ ../util/util_types_float4_impl.h
+ ../util/util_types_float8.h
+ ../util/util_types_float8_impl.h
+ ../util/util_types_int2.h
+ ../util/util_types_int2_impl.h
+ ../util/util_types_int3.h
+ ../util/util_types_int3_impl.h
+ ../util/util_types_int4.h
+ ../util/util_types_int4_impl.h
+ ../util/util_types_uchar2.h
+ ../util/util_types_uchar2_impl.h
+ ../util/util_types_uchar3.h
+ ../util/util_types_uchar3_impl.h
+ ../util/util_types_uchar4.h
+ ../util/util_types_uchar4_impl.h
+ ../util/util_types_uint2.h
+ ../util/util_types_uint2_impl.h
+ ../util/util_types_uint3.h
+ ../util/util_types_uint3_impl.h
+ ../util/util_types_uint4.h
+ ../util/util_types_uint4_impl.h
+ ../util/util_types_ushort4.h
+ ../util/util_types_vector3.h
+ ../util/util_types_vector3_impl.h
)
set(SRC_SPLIT_HEADERS
- split/kernel_branched.h
- split/kernel_buffer_update.h
- split/kernel_data_init.h
- split/kernel_direct_lighting.h
- split/kernel_do_volume.h
- split/kernel_enqueue_inactive.h
- split/kernel_holdout_emission_blurring_pathtermination_ao.h
- split/kernel_indirect_background.h
- split/kernel_indirect_subsurface.h
- split/kernel_lamp_emission.h
- split/kernel_next_iteration_setup.h
- split/kernel_path_init.h
- split/kernel_queue_enqueue.h
- split/kernel_scene_intersect.h
- split/kernel_shader_setup.h
- split/kernel_shader_sort.h
- split/kernel_shader_eval.h
- split/kernel_shadow_blocked_ao.h
- split/kernel_shadow_blocked_dl.h
- split/kernel_split_common.h
- split/kernel_split_data.h
- split/kernel_split_data_types.h
- split/kernel_subsurface_scatter.h
+ split/kernel_branched.h
+ split/kernel_buffer_update.h
+ split/kernel_data_init.h
+ split/kernel_direct_lighting.h
+ split/kernel_do_volume.h
+ split/kernel_enqueue_inactive.h
+ split/kernel_holdout_emission_blurring_pathtermination_ao.h
+ split/kernel_indirect_background.h
+ split/kernel_indirect_subsurface.h
+ split/kernel_lamp_emission.h
+ split/kernel_next_iteration_setup.h
+ split/kernel_path_init.h
+ split/kernel_queue_enqueue.h
+ split/kernel_scene_intersect.h
+ split/kernel_shader_setup.h
+ split/kernel_shader_sort.h
+ split/kernel_shader_eval.h
+ split/kernel_shadow_blocked_ao.h
+ split/kernel_shadow_blocked_dl.h
+ split/kernel_split_common.h
+ split/kernel_split_data.h
+ split/kernel_split_data_types.h
+ split/kernel_subsurface_scatter.h
)
set(LIB
@@ -340,145 +340,145 @@ set(LIB
# CUDA module
if(WITH_CYCLES_CUDA_BINARIES)
- # 64 bit only
- set(CUDA_BITS 64)
-
- # CUDA version
- execute_process(COMMAND ${CUDA_NVCC_EXECUTABLE} "--version" OUTPUT_VARIABLE NVCC_OUT)
- string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR "${NVCC_OUT}")
- string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR "${NVCC_OUT}")
- set(CUDA_VERSION "${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}")
-
- # warn for other versions
- if(CUDA_VERSION MATCHES "101")
- else()
- message(WARNING
- "CUDA version ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} detected, "
- "build may succeed but only CUDA 10.1 is officially supported")
- endif()
-
- # build for each arch
- set(cuda_sources kernels/cuda/kernel.cu kernels/cuda/kernel_split.cu
- ${SRC_HEADERS}
- ${SRC_KERNELS_CUDA_HEADERS}
- ${SRC_BVH_HEADERS}
- ${SRC_SVM_HEADERS}
- ${SRC_GEOM_HEADERS}
- ${SRC_CLOSURE_HEADERS}
- ${SRC_UTIL_HEADERS}
- )
- set(cuda_filter_sources kernels/cuda/filter.cu
- ${SRC_HEADERS}
- ${SRC_KERNELS_CUDA_HEADERS}
- ${SRC_FILTER_HEADERS}
- ${SRC_UTIL_HEADERS}
- )
- set(cuda_cubins)
-
- macro(CYCLES_CUDA_KERNEL_ADD arch prev_arch name flags sources experimental)
- set(cuda_cubin ${name}_${arch}.cubin)
-
- set(kernel_sources ${sources})
- if(NOT ${prev_arch} STREQUAL "none")
- set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.cubin)
- endif()
-
- set(cuda_kernel_src "/kernels/cuda/${name}.cu")
-
- set(cuda_flags
- -D CCL_NAMESPACE_BEGIN=
- -D CCL_NAMESPACE_END=
- -D NVCC
- -m ${CUDA_BITS}
- -I ${CMAKE_CURRENT_SOURCE_DIR}/..
- -I ${CMAKE_CURRENT_SOURCE_DIR}/kernels/cuda
- --use_fast_math
- -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin})
-
- if(${experimental})
- set(cuda_flags ${cuda_flags} -D __KERNEL_EXPERIMENTAL__)
- set(name ${name}_experimental)
- endif()
-
- if(WITH_CYCLES_DEBUG)
- set(cuda_flags ${cuda_flags} -D __KERNEL_DEBUG__)
- endif()
-
- if(WITH_CYCLES_CUBIN_COMPILER)
- string(SUBSTRING ${arch} 3 -1 CUDA_ARCH)
-
- # Needed to find libnvrtc-builtins.so. Can't do it from inside
- # cycles_cubin_cc since the env variable is read before main()
- if(APPLE)
- set(CUBIN_CC_ENV ${CMAKE_COMMAND}
- -E env DYLD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib")
- elseif(UNIX)
- set(CUBIN_CC_ENV ${CMAKE_COMMAND}
- -E env LD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib64")
- endif()
-
- add_custom_command(
- OUTPUT ${cuda_cubin}
- COMMAND ${CUBIN_CC_ENV}
- "$<TARGET_FILE:cycles_cubin_cc>"
- -target ${CUDA_ARCH}
- -i ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src}
- ${cuda_flags}
- -v
- -cuda-toolkit-dir "${CUDA_TOOLKIT_ROOT_DIR}"
- DEPENDS ${kernel_sources} cycles_cubin_cc)
- else()
- add_custom_command(
- OUTPUT ${cuda_cubin}
- COMMAND ${CUDA_NVCC_EXECUTABLE}
- -arch=${arch}
- ${CUDA_NVCC_FLAGS}
- --cubin
- ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src}
- --ptxas-options="-v"
- ${cuda_flags}
- DEPENDS ${kernel_sources})
- endif()
- delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib)
- list(APPEND cuda_cubins ${cuda_cubin})
-
- unset(cuda_debug_flags)
- endmacro()
-
- set(prev_arch "none")
- foreach(arch ${CYCLES_CUDA_BINARIES_ARCH})
- if(${arch} MATCHES "sm_2.")
- message(STATUS "CUDA binaries for ${arch} are no longer supported, skipped.")
- elseif(${arch} MATCHES "sm_7." AND ${CUDA_VERSION} LESS 100)
- message(STATUS "CUDA binaries for ${arch} require CUDA 10.0+, skipped.")
- else()
- # Compile regular kernel
- CYCLES_CUDA_KERNEL_ADD(${arch} ${prev_arch} filter "" "${cuda_filter_sources}" FALSE)
- CYCLES_CUDA_KERNEL_ADD(${arch} ${prev_arch} kernel "" "${cuda_sources}" FALSE)
-
- if(WITH_CYCLES_CUDA_SPLIT_KERNEL_BINARIES)
- # Compile split kernel
- CYCLES_CUDA_KERNEL_ADD(${arch} ${prev_arch} kernel_split "-D __SPLIT__" "${cuda_sources}" FALSE)
- endif()
-
- if(WITH_CYCLES_CUDA_BUILD_SERIAL)
- set(prev_arch ${arch})
- endif()
- endif()
- endforeach()
-
- add_custom_target(cycles_kernel_cuda ALL DEPENDS ${cuda_cubins})
- cycles_set_solution_folder(cycles_kernel_cuda)
+ # 64 bit only
+ set(CUDA_BITS 64)
+
+ # CUDA version
+ execute_process(COMMAND ${CUDA_NVCC_EXECUTABLE} "--version" OUTPUT_VARIABLE NVCC_OUT)
+ string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR "${NVCC_OUT}")
+ string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR "${NVCC_OUT}")
+ set(CUDA_VERSION "${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}")
+
+ # warn for other versions
+ if(CUDA_VERSION MATCHES "101")
+ else()
+ message(WARNING
+ "CUDA version ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} detected, "
+ "build may succeed but only CUDA 10.1 is officially supported")
+ endif()
+
+ # build for each arch
+ set(cuda_sources kernels/cuda/kernel.cu kernels/cuda/kernel_split.cu
+ ${SRC_HEADERS}
+ ${SRC_KERNELS_CUDA_HEADERS}
+ ${SRC_BVH_HEADERS}
+ ${SRC_SVM_HEADERS}
+ ${SRC_GEOM_HEADERS}
+ ${SRC_CLOSURE_HEADERS}
+ ${SRC_UTIL_HEADERS}
+ )
+ set(cuda_filter_sources kernels/cuda/filter.cu
+ ${SRC_HEADERS}
+ ${SRC_KERNELS_CUDA_HEADERS}
+ ${SRC_FILTER_HEADERS}
+ ${SRC_UTIL_HEADERS}
+ )
+ set(cuda_cubins)
+
+ macro(CYCLES_CUDA_KERNEL_ADD arch prev_arch name flags sources experimental)
+ set(cuda_cubin ${name}_${arch}.cubin)
+
+ set(kernel_sources ${sources})
+ if(NOT ${prev_arch} STREQUAL "none")
+ set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.cubin)
+ endif()
+
+ set(cuda_kernel_src "/kernels/cuda/${name}.cu")
+
+ set(cuda_flags
+ -D CCL_NAMESPACE_BEGIN=
+ -D CCL_NAMESPACE_END=
+ -D NVCC
+ -m ${CUDA_BITS}
+ -I ${CMAKE_CURRENT_SOURCE_DIR}/..
+ -I ${CMAKE_CURRENT_SOURCE_DIR}/kernels/cuda
+ --use_fast_math
+ -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin})
+
+ if(${experimental})
+ set(cuda_flags ${cuda_flags} -D __KERNEL_EXPERIMENTAL__)
+ set(name ${name}_experimental)
+ endif()
+
+ if(WITH_CYCLES_DEBUG)
+ set(cuda_flags ${cuda_flags} -D __KERNEL_DEBUG__)
+ endif()
+
+ if(WITH_CYCLES_CUBIN_COMPILER)
+ string(SUBSTRING ${arch} 3 -1 CUDA_ARCH)
+
+ # Needed to find libnvrtc-builtins.so. Can't do it from inside
+ # cycles_cubin_cc since the env variable is read before main()
+ if(APPLE)
+ set(CUBIN_CC_ENV ${CMAKE_COMMAND}
+ -E env DYLD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib")
+ elseif(UNIX)
+ set(CUBIN_CC_ENV ${CMAKE_COMMAND}
+ -E env LD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib64")
+ endif()
+
+ add_custom_command(
+ OUTPUT ${cuda_cubin}
+ COMMAND ${CUBIN_CC_ENV}
+ "$<TARGET_FILE:cycles_cubin_cc>"
+ -target ${CUDA_ARCH}
+ -i ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src}
+ ${cuda_flags}
+ -v
+ -cuda-toolkit-dir "${CUDA_TOOLKIT_ROOT_DIR}"
+ DEPENDS ${kernel_sources} cycles_cubin_cc)
+ else()
+ add_custom_command(
+ OUTPUT ${cuda_cubin}
+ COMMAND ${CUDA_NVCC_EXECUTABLE}
+ -arch=${arch}
+ ${CUDA_NVCC_FLAGS}
+ --cubin
+ ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src}
+ --ptxas-options="-v"
+ ${cuda_flags}
+ DEPENDS ${kernel_sources})
+ endif()
+ delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib)
+ list(APPEND cuda_cubins ${cuda_cubin})
+
+ unset(cuda_debug_flags)
+ endmacro()
+
+ set(prev_arch "none")
+ foreach(arch ${CYCLES_CUDA_BINARIES_ARCH})
+ if(${arch} MATCHES "sm_2.")
+ message(STATUS "CUDA binaries for ${arch} are no longer supported, skipped.")
+ elseif(${arch} MATCHES "sm_7." AND ${CUDA_VERSION} LESS 100)
+ message(STATUS "CUDA binaries for ${arch} require CUDA 10.0+, skipped.")
+ else()
+ # Compile regular kernel
+ CYCLES_CUDA_KERNEL_ADD(${arch} ${prev_arch} filter "" "${cuda_filter_sources}" FALSE)
+ CYCLES_CUDA_KERNEL_ADD(${arch} ${prev_arch} kernel "" "${cuda_sources}" FALSE)
+
+ if(WITH_CYCLES_CUDA_SPLIT_KERNEL_BINARIES)
+ # Compile split kernel
+ CYCLES_CUDA_KERNEL_ADD(${arch} ${prev_arch} kernel_split "-D __SPLIT__" "${cuda_sources}" FALSE)
+ endif()
+
+ if(WITH_CYCLES_CUDA_BUILD_SERIAL)
+ set(prev_arch ${arch})
+ endif()
+ endif()
+ endforeach()
+
+ add_custom_target(cycles_kernel_cuda ALL DEPENDS ${cuda_cubins})
+ cycles_set_solution_folder(cycles_kernel_cuda)
endif()
# OSL module
if(WITH_CYCLES_OSL)
- list(APPEND LIB
- cycles_kernel_osl
- )
- add_subdirectory(osl)
- add_subdirectory(shaders)
+ list(APPEND LIB
+ cycles_kernel_osl
+ )
+ add_subdirectory(osl)
+ add_subdirectory(shaders)
endif()
# CPU module
@@ -491,56 +491,56 @@ set_source_files_properties(kernels/cpu/kernel_split.cpp PROPERTIES COMPILE_FLAG
set_source_files_properties(kernels/cpu/filter.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_KERNEL_FLAGS}")
if(CXX_HAS_SSE)
- set_source_files_properties(kernels/cpu/kernel_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
- set_source_files_properties(kernels/cpu/kernel_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
- set_source_files_properties(kernels/cpu/kernel_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
- set_source_files_properties(kernels/cpu/kernel_split_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
- set_source_files_properties(kernels/cpu/kernel_split_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
- set_source_files_properties(kernels/cpu/kernel_split_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
- set_source_files_properties(kernels/cpu/filter_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
- set_source_files_properties(kernels/cpu/filter_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
- set_source_files_properties(kernels/cpu/filter_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
+ set_source_files_properties(kernels/cpu/kernel_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
+ set_source_files_properties(kernels/cpu/kernel_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
+ set_source_files_properties(kernels/cpu/kernel_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
+ set_source_files_properties(kernels/cpu/kernel_split_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
+ set_source_files_properties(kernels/cpu/kernel_split_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
+ set_source_files_properties(kernels/cpu/kernel_split_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
+ set_source_files_properties(kernels/cpu/filter_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
+ set_source_files_properties(kernels/cpu/filter_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
+ set_source_files_properties(kernels/cpu/filter_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
endif()
if(CXX_HAS_AVX)
- set_source_files_properties(kernels/cpu/kernel_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
- set_source_files_properties(kernels/cpu/kernel_split_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
- set_source_files_properties(kernels/cpu/filter_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
+ set_source_files_properties(kernels/cpu/kernel_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
+ set_source_files_properties(kernels/cpu/kernel_split_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
+ set_source_files_properties(kernels/cpu/filter_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
endif()
if(CXX_HAS_AVX2)
- set_source_files_properties(kernels/cpu/kernel_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
- set_source_files_properties(kernels/cpu/kernel_split_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
- set_source_files_properties(kernels/cpu/filter_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
+ set_source_files_properties(kernels/cpu/kernel_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
+ set_source_files_properties(kernels/cpu/kernel_split_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
+ set_source_files_properties(kernels/cpu/filter_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
endif()
cycles_add_library(cycles_kernel "${LIB}"
- ${SRC_CPU_KERNELS}
- ${SRC_CUDA_KERNELS}
- ${SRC_OPENCL_KERNELS}
- ${SRC_HEADERS}
- ${SRC_KERNELS_CPU_HEADERS}
- ${SRC_KERNELS_CUDA_HEADERS}
- ${SRC_KERNELS_OPENCL_HEADERS}
- ${SRC_BVH_HEADERS}
- ${SRC_CLOSURE_HEADERS}
- ${SRC_FILTER_HEADERS}
- ${SRC_SVM_HEADERS}
- ${SRC_GEOM_HEADERS}
- ${SRC_SPLIT_HEADERS}
+ ${SRC_CPU_KERNELS}
+ ${SRC_CUDA_KERNELS}
+ ${SRC_OPENCL_KERNELS}
+ ${SRC_HEADERS}
+ ${SRC_KERNELS_CPU_HEADERS}
+ ${SRC_KERNELS_CUDA_HEADERS}
+ ${SRC_KERNELS_OPENCL_HEADERS}
+ ${SRC_BVH_HEADERS}
+ ${SRC_CLOSURE_HEADERS}
+ ${SRC_FILTER_HEADERS}
+ ${SRC_SVM_HEADERS}
+ ${SRC_GEOM_HEADERS}
+ ${SRC_SPLIT_HEADERS}
)
if(WITH_CYCLES_CUDA)
- add_dependencies(cycles_kernel cycles_kernel_cuda)
+ add_dependencies(cycles_kernel cycles_kernel_cuda)
endif()
# OpenCL kernel
#set(KERNEL_PREPROCESSED ${CMAKE_CURRENT_BINARY_DIR}/kernel_preprocessed.cl)
#add_custom_command(
-# OUTPUT ${KERNEL_PREPROCESSED}
-# COMMAND gcc -x c++ -E ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cl -I ${CMAKE_CURRENT_SOURCE_DIR}/../util/ -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -o ${KERNEL_PREPROCESSED}
-# DEPENDS ${SRC_KERNEL} ${SRC_UTIL_HEADERS})
+# OUTPUT ${KERNEL_PREPROCESSED}
+# COMMAND gcc -x c++ -E ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cl -I ${CMAKE_CURRENT_SOURCE_DIR}/../util/ -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -o ${KERNEL_PREPROCESSED}
+# DEPENDS ${SRC_KERNEL} ${SRC_UTIL_HEADERS})
#add_custom_target(cycles_kernel_preprocess ALL DEPENDS ${KERNEL_PREPROCESSED})
#delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${KERNEL_PREPROCESSED}" ${CYCLES_INSTALL_PATH}/kernel)
diff --git a/intern/cycles/kernel/bvh/bvh.h b/intern/cycles/kernel/bvh/bvh.h
index e5f807833f3..13e72ed299f 100644
--- a/intern/cycles/kernel/bvh/bvh.h
+++ b/intern/cycles/kernel/bvh/bvh.h
@@ -57,19 +57,19 @@ CCL_NAMESPACE_BEGIN
#if defined(__HAIR__)
# define BVH_FUNCTION_NAME bvh_intersect_hair
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH
+# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR | BVH_HAIR_MINIMUM_WIDTH
# include "kernel/bvh/bvh_traversal.h"
#endif
#if defined(__OBJECT_MOTION__)
# define BVH_FUNCTION_NAME bvh_intersect_motion
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION
+# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION
# include "kernel/bvh/bvh_traversal.h"
#endif
#if defined(__HAIR__) && defined(__OBJECT_MOTION__)
# define BVH_FUNCTION_NAME bvh_intersect_hair_motion
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH|BVH_MOTION
+# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR | BVH_HAIR_MINIMUM_WIDTH | BVH_MOTION
# include "kernel/bvh/bvh_traversal.h"
#endif
@@ -82,10 +82,10 @@ CCL_NAMESPACE_BEGIN
# if defined(__OBJECT_MOTION__)
# define BVH_FUNCTION_NAME bvh_intersect_local_motion
-# define BVH_FUNCTION_FEATURES BVH_MOTION|BVH_HAIR
+# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR
# include "kernel/bvh/bvh_local.h"
# endif
-#endif /* __BVH_LOCAL__ */
+#endif /* __BVH_LOCAL__ */
/* Volume BVH traversal */
@@ -96,16 +96,16 @@ CCL_NAMESPACE_BEGIN
# if defined(__INSTANCING__)
# define BVH_FUNCTION_NAME bvh_intersect_volume_instancing
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR
+# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR
# include "kernel/bvh/bvh_volume.h"
# endif
# if defined(__OBJECT_MOTION__)
# define BVH_FUNCTION_NAME bvh_intersect_volume_motion
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION|BVH_HAIR
+# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION | BVH_HAIR
# include "kernel/bvh/bvh_volume.h"
# endif
-#endif /* __VOLUME__ */
+#endif /* __VOLUME__ */
/* Record all intersections - Shadow BVH traversal */
@@ -122,22 +122,22 @@ CCL_NAMESPACE_BEGIN
# if defined(__HAIR__)
# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR
+# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR
# include "kernel/bvh/bvh_shadow_all.h"
# endif
# if defined(__OBJECT_MOTION__)
# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_motion
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION
+# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION
# include "kernel/bvh/bvh_shadow_all.h"
# endif
# if defined(__HAIR__) && defined(__OBJECT_MOTION__)
# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair_motion
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_MOTION
+# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR | BVH_MOTION
# include "kernel/bvh/bvh_shadow_all.h"
# endif
-#endif /* __SHADOW_RECORD_ALL__ */
+#endif /* __SHADOW_RECORD_ALL__ */
/* Record all intersections - Volume BVH traversal */
@@ -148,16 +148,16 @@ CCL_NAMESPACE_BEGIN
# if defined(__INSTANCING__)
# define BVH_FUNCTION_NAME bvh_intersect_volume_all_instancing
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR
+# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR
# include "kernel/bvh/bvh_volume_all.h"
# endif
# if defined(__OBJECT_MOTION__)
# define BVH_FUNCTION_NAME bvh_intersect_volume_all_motion
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION|BVH_HAIR
+# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION | BVH_HAIR
# include "kernel/bvh/bvh_volume_all.h"
# endif
-#endif /* __VOLUME_RECORD_ALL__ */
+#endif /* __VOLUME_RECORD_ALL__ */
#undef BVH_FEATURE
#undef BVH_NAME_JOIN
@@ -166,15 +166,15 @@ CCL_NAMESPACE_BEGIN
ccl_device_inline bool scene_intersect_valid(const Ray *ray)
{
- /* NOTE: Due to some vectorization code non-finite origin point might
- * cause lots of false-positive intersections which will overflow traversal
- * stack.
- * This code is a quick way to perform early output, to avoid crashes in
- * such cases.
- * From production scenes so far it seems it's enough to test first element
- * only.
- */
- return isfinite(ray->P.x);
+ /* NOTE: Due to some vectorization code non-finite origin point might
+ * cause lots of false-positive intersections which will overflow traversal
+ * stack.
+ * This code is a quick way to perform early output, to avoid crashes in
+ * such cases.
+ * From production scenes so far it seems it's enough to test first element
+ * only.
+ */
+ return isfinite(ray->P.x);
}
/* Note: ray is passed by value to work around a possible CUDA compiler bug. */
@@ -186,59 +186,60 @@ ccl_device_intersect bool scene_intersect(KernelGlobals *kg,
float difl,
float extmax)
{
- PROFILING_INIT(kg, PROFILING_INTERSECT);
+ PROFILING_INIT(kg, PROFILING_INTERSECT);
- if(!scene_intersect_valid(&ray)) {
- return false;
- }
+ if (!scene_intersect_valid(&ray)) {
+ return false;
+ }
#ifdef __EMBREE__
- if(kernel_data.bvh.scene) {
- isect->t = ray.t;
- CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_REGULAR);
- IntersectContext rtc_ctx(&ctx);
- RTCRayHit ray_hit;
- kernel_embree_setup_rayhit(ray, ray_hit, visibility);
- rtcIntersect1(kernel_data.bvh.scene, &rtc_ctx.context, &ray_hit);
- if(ray_hit.hit.geomID != RTC_INVALID_GEOMETRY_ID && ray_hit.hit.primID != RTC_INVALID_GEOMETRY_ID) {
- kernel_embree_convert_hit(kg, &ray_hit.ray, &ray_hit.hit, isect);
- return true;
- }
- return false;
- }
-#endif /* __EMBREE__ */
+ if (kernel_data.bvh.scene) {
+ isect->t = ray.t;
+ CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_REGULAR);
+ IntersectContext rtc_ctx(&ctx);
+ RTCRayHit ray_hit;
+ kernel_embree_setup_rayhit(ray, ray_hit, visibility);
+ rtcIntersect1(kernel_data.bvh.scene, &rtc_ctx.context, &ray_hit);
+ if (ray_hit.hit.geomID != RTC_INVALID_GEOMETRY_ID &&
+ ray_hit.hit.primID != RTC_INVALID_GEOMETRY_ID) {
+ kernel_embree_convert_hit(kg, &ray_hit.ray, &ray_hit.hit, isect);
+ return true;
+ }
+ return false;
+ }
+#endif /* __EMBREE__ */
#ifdef __OBJECT_MOTION__
- if(kernel_data.bvh.have_motion) {
+ if (kernel_data.bvh.have_motion) {
# ifdef __HAIR__
- if(kernel_data.bvh.have_curves)
- return bvh_intersect_hair_motion(kg, &ray, isect, visibility, lcg_state, difl, extmax);
-# endif /* __HAIR__ */
+ if (kernel_data.bvh.have_curves)
+ return bvh_intersect_hair_motion(kg, &ray, isect, visibility, lcg_state, difl, extmax);
+# endif /* __HAIR__ */
- return bvh_intersect_motion(kg, &ray, isect, visibility);
- }
-#endif /* __OBJECT_MOTION__ */
+ return bvh_intersect_motion(kg, &ray, isect, visibility);
+ }
+#endif /* __OBJECT_MOTION__ */
#ifdef __HAIR__
- if(kernel_data.bvh.have_curves)
- return bvh_intersect_hair(kg, &ray, isect, visibility, lcg_state, difl, extmax);
-#endif /* __HAIR__ */
+ if (kernel_data.bvh.have_curves)
+ return bvh_intersect_hair(kg, &ray, isect, visibility, lcg_state, difl, extmax);
+#endif /* __HAIR__ */
#ifdef __KERNEL_CPU__
# ifdef __INSTANCING__
- if(kernel_data.bvh.have_instancing)
- return bvh_intersect_instancing(kg, &ray, isect, visibility);
-# endif /* __INSTANCING__ */
+ if (kernel_data.bvh.have_instancing)
+ return bvh_intersect_instancing(kg, &ray, isect, visibility);
+# endif /* __INSTANCING__ */
- return bvh_intersect(kg, &ray, isect, visibility);
-#else /* __KERNEL_CPU__ */
+ return bvh_intersect(kg, &ray, isect, visibility);
+#else /* __KERNEL_CPU__ */
# ifdef __INSTANCING__
- return bvh_intersect_instancing(kg, &ray, isect, visibility);
+ return bvh_intersect_instancing(kg, &ray, isect, visibility);
# else
- return bvh_intersect(kg, &ray, isect, visibility);
-# endif /* __INSTANCING__ */
+ return bvh_intersect(kg, &ray, isect, visibility);
+# endif /* __INSTANCING__ */
-#endif /* __KERNEL_CPU__ */
+#endif /* __KERNEL_CPU__ */
}
#ifdef __BVH_LOCAL__
@@ -250,77 +251,61 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals *kg,
uint *lcg_state,
int max_hits)
{
- PROFILING_INIT(kg, PROFILING_INTERSECT_LOCAL);
+ PROFILING_INIT(kg, PROFILING_INTERSECT_LOCAL);
- if(!scene_intersect_valid(&ray)) {
- local_isect->num_hits = 0;
- return false;
- }
-#ifdef __EMBREE__
- if(kernel_data.bvh.scene) {
- CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SSS);
- ctx.lcg_state = lcg_state;
- ctx.max_hits = max_hits;
- ctx.ss_isect = local_isect;
- local_isect->num_hits = 0;
- ctx.sss_object_id = local_object;
- IntersectContext rtc_ctx(&ctx);
- RTCRay rtc_ray;
- kernel_embree_setup_ray(ray, rtc_ray, PATH_RAY_ALL_VISIBILITY);
-
- /* Get the Embree scene for this intersection. */
- RTCGeometry geom = rtcGetGeometry(kernel_data.bvh.scene, local_object * 2);
- if(geom) {
- float3 P = ray.P;
- float3 dir = ray.D;
- float3 idir = ray.D;
- const int object_flag = kernel_tex_fetch(__object_flag, local_object);
- if(!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
- Transform ob_itfm;
- rtc_ray.tfar = bvh_instance_motion_push(kg,
- local_object,
- &ray,
- &P,
- &dir,
- &idir,
- ray.t,
- &ob_itfm);
- /* bvh_instance_motion_push() returns the inverse transform but
- * it's not needed here. */
- (void) ob_itfm;
-
- rtc_ray.org_x = P.x;
- rtc_ray.org_y = P.y;
- rtc_ray.org_z = P.z;
- rtc_ray.dir_x = dir.x;
- rtc_ray.dir_y = dir.y;
- rtc_ray.dir_z = dir.z;
- }
- RTCScene scene = (RTCScene)rtcGetGeometryUserData(geom);
- if(scene) {
- rtcOccluded1(scene, &rtc_ctx.context, &rtc_ray);
- }
- }
-
- return local_isect->num_hits > 0;
- }
-#endif /* __EMBREE__ */
-#ifdef __OBJECT_MOTION__
- if(kernel_data.bvh.have_motion) {
- return bvh_intersect_local_motion(kg,
- &ray,
- local_isect,
- local_object,
- lcg_state,
- max_hits);
- }
-#endif /* __OBJECT_MOTION__ */
- return bvh_intersect_local(kg,
- &ray,
- local_isect,
- local_object,
- lcg_state,
- max_hits);
+ if (!scene_intersect_valid(&ray)) {
+ local_isect->num_hits = 0;
+ return false;
+ }
+# ifdef __EMBREE__
+ if (kernel_data.bvh.scene) {
+ CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SSS);
+ ctx.lcg_state = lcg_state;
+ ctx.max_hits = max_hits;
+ ctx.ss_isect = local_isect;
+ local_isect->num_hits = 0;
+ ctx.sss_object_id = local_object;
+ IntersectContext rtc_ctx(&ctx);
+ RTCRay rtc_ray;
+ kernel_embree_setup_ray(ray, rtc_ray, PATH_RAY_ALL_VISIBILITY);
+
+ /* Get the Embree scene for this intersection. */
+ RTCGeometry geom = rtcGetGeometry(kernel_data.bvh.scene, local_object * 2);
+ if (geom) {
+ float3 P = ray.P;
+ float3 dir = ray.D;
+ float3 idir = ray.D;
+ const int object_flag = kernel_tex_fetch(__object_flag, local_object);
+ if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+ Transform ob_itfm;
+ rtc_ray.tfar = bvh_instance_motion_push(
+ kg, local_object, &ray, &P, &dir, &idir, ray.t, &ob_itfm);
+ /* bvh_instance_motion_push() returns the inverse transform but
+ * it's not needed here. */
+ (void)ob_itfm;
+
+ rtc_ray.org_x = P.x;
+ rtc_ray.org_y = P.y;
+ rtc_ray.org_z = P.z;
+ rtc_ray.dir_x = dir.x;
+ rtc_ray.dir_y = dir.y;
+ rtc_ray.dir_z = dir.z;
+ }
+ RTCScene scene = (RTCScene)rtcGetGeometryUserData(geom);
+ if (scene) {
+ rtcOccluded1(scene, &rtc_ctx.context, &rtc_ray);
+ }
+ }
+
+ return local_isect->num_hits > 0;
+ }
+# endif /* __EMBREE__ */
+# ifdef __OBJECT_MOTION__
+ if (kernel_data.bvh.have_motion) {
+ return bvh_intersect_local_motion(kg, &ray, local_isect, local_object, lcg_state, max_hits);
+ }
+# endif /* __OBJECT_MOTION__ */
+ return bvh_intersect_local(kg, &ray, local_isect, local_object, lcg_state, max_hits);
}
#endif
@@ -332,82 +317,57 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg,
uint max_hits,
uint *num_hits)
{
- PROFILING_INIT(kg, PROFILING_INTERSECT_SHADOW_ALL);
+ PROFILING_INIT(kg, PROFILING_INTERSECT_SHADOW_ALL);
- if(!scene_intersect_valid(ray)) {
- *num_hits = 0;
- return false;
- }
+ if (!scene_intersect_valid(ray)) {
+ *num_hits = 0;
+ return false;
+ }
# ifdef __EMBREE__
- if(kernel_data.bvh.scene) {
- CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SHADOW_ALL);
- ctx.isect_s = isect;
- ctx.max_hits = max_hits;
- ctx.num_hits = 0;
- IntersectContext rtc_ctx(&ctx);
- RTCRay rtc_ray;
- kernel_embree_setup_ray(*ray, rtc_ray, PATH_RAY_SHADOW);
- rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray);
-
- if(ctx.num_hits > max_hits) {
- return true;
- }
- *num_hits = ctx.num_hits;
- return rtc_ray.tfar == -INFINITY;
- }
+ if (kernel_data.bvh.scene) {
+ CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SHADOW_ALL);
+ ctx.isect_s = isect;
+ ctx.max_hits = max_hits;
+ ctx.num_hits = 0;
+ IntersectContext rtc_ctx(&ctx);
+ RTCRay rtc_ray;
+ kernel_embree_setup_ray(*ray, rtc_ray, PATH_RAY_SHADOW);
+ rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray);
+
+ if (ctx.num_hits > max_hits) {
+ return true;
+ }
+ *num_hits = ctx.num_hits;
+ return rtc_ray.tfar == -INFINITY;
+ }
# endif
# ifdef __OBJECT_MOTION__
- if(kernel_data.bvh.have_motion) {
+ if (kernel_data.bvh.have_motion) {
# ifdef __HAIR__
- if(kernel_data.bvh.have_curves) {
- return bvh_intersect_shadow_all_hair_motion(kg,
- ray,
- isect,
- visibility,
- max_hits,
- num_hits);
- }
-# endif /* __HAIR__ */
-
- return bvh_intersect_shadow_all_motion(kg,
- ray,
- isect,
- visibility,
- max_hits,
- num_hits);
- }
-# endif /* __OBJECT_MOTION__ */
+ if (kernel_data.bvh.have_curves) {
+ return bvh_intersect_shadow_all_hair_motion(kg, ray, isect, visibility, max_hits, num_hits);
+ }
+# endif /* __HAIR__ */
+
+ return bvh_intersect_shadow_all_motion(kg, ray, isect, visibility, max_hits, num_hits);
+ }
+# endif /* __OBJECT_MOTION__ */
# ifdef __HAIR__
- if(kernel_data.bvh.have_curves) {
- return bvh_intersect_shadow_all_hair(kg,
- ray,
- isect,
- visibility,
- max_hits,
- num_hits);
- }
-# endif /* __HAIR__ */
+ if (kernel_data.bvh.have_curves) {
+ return bvh_intersect_shadow_all_hair(kg, ray, isect, visibility, max_hits, num_hits);
+ }
+# endif /* __HAIR__ */
# ifdef __INSTANCING__
- if(kernel_data.bvh.have_instancing) {
- return bvh_intersect_shadow_all_instancing(kg,
- ray,
- isect,
- visibility,
- max_hits,
- num_hits);
- }
-# endif /* __INSTANCING__ */
-
- return bvh_intersect_shadow_all(kg,
- ray,
- isect,
- visibility,
- max_hits,
- num_hits);
+ if (kernel_data.bvh.have_instancing) {
+ return bvh_intersect_shadow_all_instancing(kg, ray, isect, visibility, max_hits, num_hits);
+ }
+# endif /* __INSTANCING__ */
+
+ return bvh_intersect_shadow_all(kg, ray, isect, visibility, max_hits, num_hits);
}
-#endif /* __SHADOW_RECORD_ALL__ */
+#endif /* __SHADOW_RECORD_ALL__ */
#ifdef __VOLUME__
ccl_device_intersect bool scene_intersect_volume(KernelGlobals *kg,
@@ -415,31 +375,31 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals *kg,
Intersection *isect,
const uint visibility)
{
- PROFILING_INIT(kg, PROFILING_INTERSECT_VOLUME);
+ PROFILING_INIT(kg, PROFILING_INTERSECT_VOLUME);
- if(!scene_intersect_valid(ray)) {
- return false;
- }
+ if (!scene_intersect_valid(ray)) {
+ return false;
+ }
# ifdef __OBJECT_MOTION__
- if(kernel_data.bvh.have_motion) {
- return bvh_intersect_volume_motion(kg, ray, isect, visibility);
- }
-# endif /* __OBJECT_MOTION__ */
+ if (kernel_data.bvh.have_motion) {
+ return bvh_intersect_volume_motion(kg, ray, isect, visibility);
+ }
+# endif /* __OBJECT_MOTION__ */
# ifdef __KERNEL_CPU__
# ifdef __INSTANCING__
- if(kernel_data.bvh.have_instancing)
- return bvh_intersect_volume_instancing(kg, ray, isect, visibility);
-# endif /* __INSTANCING__ */
- return bvh_intersect_volume(kg, ray, isect, visibility);
-# else /* __KERNEL_CPU__ */
+ if (kernel_data.bvh.have_instancing)
+ return bvh_intersect_volume_instancing(kg, ray, isect, visibility);
+# endif /* __INSTANCING__ */
+ return bvh_intersect_volume(kg, ray, isect, visibility);
+# else /* __KERNEL_CPU__ */
# ifdef __INSTANCING__
- return bvh_intersect_volume_instancing(kg, ray, isect, visibility);
+ return bvh_intersect_volume_instancing(kg, ray, isect, visibility);
# else
- return bvh_intersect_volume(kg, ray, isect, visibility);
-# endif /* __INSTANCING__ */
-# endif /* __KERNEL_CPU__ */
+ return bvh_intersect_volume(kg, ray, isect, visibility);
+# endif /* __INSTANCING__ */
+# endif /* __KERNEL_CPU__ */
}
-#endif /* __VOLUME__ */
+#endif /* __VOLUME__ */
#ifdef __VOLUME_RECORD_ALL__
ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals *kg,
@@ -448,37 +408,36 @@ ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals *kg,
const uint max_hits,
const uint visibility)
{
- PROFILING_INIT(kg, PROFILING_INTERSECT_VOLUME_ALL);
+ PROFILING_INIT(kg, PROFILING_INTERSECT_VOLUME_ALL);
- if(!scene_intersect_valid(ray)) {
- return false;
- }
+ if (!scene_intersect_valid(ray)) {
+ return false;
+ }
# ifdef __EMBREE__
- if(kernel_data.bvh.scene) {
- CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_VOLUME_ALL);
- ctx.isect_s = isect;
- ctx.max_hits = max_hits;
- ctx.num_hits = 0;
- IntersectContext rtc_ctx(&ctx);
- RTCRay rtc_ray;
- kernel_embree_setup_ray(*ray, rtc_ray, visibility);
- rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray);
- return rtc_ray.tfar == -INFINITY;
- }
+ if (kernel_data.bvh.scene) {
+ CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_VOLUME_ALL);
+ ctx.isect_s = isect;
+ ctx.max_hits = max_hits;
+ ctx.num_hits = 0;
+ IntersectContext rtc_ctx(&ctx);
+ RTCRay rtc_ray;
+ kernel_embree_setup_ray(*ray, rtc_ray, visibility);
+ rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray);
+ return rtc_ray.tfar == -INFINITY;
+ }
# endif
# ifdef __OBJECT_MOTION__
- if(kernel_data.bvh.have_motion) {
- return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits, visibility);
- }
-# endif /* __OBJECT_MOTION__ */
+ if (kernel_data.bvh.have_motion) {
+ return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits, visibility);
+ }
+# endif /* __OBJECT_MOTION__ */
# ifdef __INSTANCING__
- if(kernel_data.bvh.have_instancing)
- return bvh_intersect_volume_all_instancing(kg, ray, isect, max_hits, visibility);
-# endif /* __INSTANCING__ */
- return bvh_intersect_volume_all(kg, ray, isect, max_hits, visibility);
+ if (kernel_data.bvh.have_instancing)
+ return bvh_intersect_volume_all_instancing(kg, ray, isect, max_hits, visibility);
+# endif /* __INSTANCING__ */
+ return bvh_intersect_volume_all(kg, ray, isect, max_hits, visibility);
}
-#endif /* __VOLUME_RECORD_ALL__ */
-
+#endif /* __VOLUME_RECORD_ALL__ */
/* Ray offset to avoid self intersection.
*
@@ -488,48 +447,48 @@ ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals *kg,
ccl_device_inline float3 ray_offset(float3 P, float3 Ng)
{
#ifdef __INTERSECTION_REFINE__
- const float epsilon_f = 1e-5f;
- /* ideally this should match epsilon_f, but instancing and motion blur
- * precision makes it problematic */
- const float epsilon_test = 1.0f;
- const int epsilon_i = 32;
-
- float3 res;
-
- /* x component */
- if(fabsf(P.x) < epsilon_test) {
- res.x = P.x + Ng.x*epsilon_f;
- }
- else {
- uint ix = __float_as_uint(P.x);
- ix += ((ix ^ __float_as_uint(Ng.x)) >> 31)? -epsilon_i: epsilon_i;
- res.x = __uint_as_float(ix);
- }
-
- /* y component */
- if(fabsf(P.y) < epsilon_test) {
- res.y = P.y + Ng.y*epsilon_f;
- }
- else {
- uint iy = __float_as_uint(P.y);
- iy += ((iy ^ __float_as_uint(Ng.y)) >> 31)? -epsilon_i: epsilon_i;
- res.y = __uint_as_float(iy);
- }
-
- /* z component */
- if(fabsf(P.z) < epsilon_test) {
- res.z = P.z + Ng.z*epsilon_f;
- }
- else {
- uint iz = __float_as_uint(P.z);
- iz += ((iz ^ __float_as_uint(Ng.z)) >> 31)? -epsilon_i: epsilon_i;
- res.z = __uint_as_float(iz);
- }
-
- return res;
+ const float epsilon_f = 1e-5f;
+ /* ideally this should match epsilon_f, but instancing and motion blur
+ * precision makes it problematic */
+ const float epsilon_test = 1.0f;
+ const int epsilon_i = 32;
+
+ float3 res;
+
+ /* x component */
+ if (fabsf(P.x) < epsilon_test) {
+ res.x = P.x + Ng.x * epsilon_f;
+ }
+ else {
+ uint ix = __float_as_uint(P.x);
+ ix += ((ix ^ __float_as_uint(Ng.x)) >> 31) ? -epsilon_i : epsilon_i;
+ res.x = __uint_as_float(ix);
+ }
+
+ /* y component */
+ if (fabsf(P.y) < epsilon_test) {
+ res.y = P.y + Ng.y * epsilon_f;
+ }
+ else {
+ uint iy = __float_as_uint(P.y);
+ iy += ((iy ^ __float_as_uint(Ng.y)) >> 31) ? -epsilon_i : epsilon_i;
+ res.y = __uint_as_float(iy);
+ }
+
+ /* z component */
+ if (fabsf(P.z) < epsilon_test) {
+ res.z = P.z + Ng.z * epsilon_f;
+ }
+ else {
+ uint iz = __float_as_uint(P.z);
+ iz += ((iz ^ __float_as_uint(Ng.z)) >> 31) ? -epsilon_i : epsilon_i;
+ res.z = __uint_as_float(iz);
+ }
+
+ return res;
#else
- const float epsilon_f = 1e-4f;
- return P + epsilon_f*Ng;
+ const float epsilon_f = 1e-4f;
+ return P + epsilon_f * Ng;
#endif
}
@@ -537,40 +496,40 @@ ccl_device_inline float3 ray_offset(float3 P, float3 Ng)
/* ToDo: Move to another file? */
ccl_device int intersections_compare(const void *a, const void *b)
{
- const Intersection *isect_a = (const Intersection*)a;
- const Intersection *isect_b = (const Intersection*)b;
-
- if(isect_a->t < isect_b->t)
- return -1;
- else if(isect_a->t > isect_b->t)
- return 1;
- else
- return 0;
+ const Intersection *isect_a = (const Intersection *)a;
+ const Intersection *isect_b = (const Intersection *)b;
+
+ if (isect_a->t < isect_b->t)
+ return -1;
+ else if (isect_a->t > isect_b->t)
+ return 1;
+ else
+ return 0;
}
#endif
#if defined(__SHADOW_RECORD_ALL__)
ccl_device_inline void sort_intersections(Intersection *hits, uint num_hits)
{
-#ifdef __KERNEL_GPU__
- /* Use bubble sort which has more friendly memory pattern on GPU. */
- bool swapped;
- do {
- swapped = false;
- for(int j = 0; j < num_hits - 1; ++j) {
- if(hits[j].t > hits[j + 1].t) {
- struct Intersection tmp = hits[j];
- hits[j] = hits[j + 1];
- hits[j + 1] = tmp;
- swapped = true;
- }
- }
- --num_hits;
- } while(swapped);
-#else
- qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
-#endif
+# ifdef __KERNEL_GPU__
+ /* Use bubble sort which has more friendly memory pattern on GPU. */
+ bool swapped;
+ do {
+ swapped = false;
+ for (int j = 0; j < num_hits - 1; ++j) {
+ if (hits[j].t > hits[j + 1].t) {
+ struct Intersection tmp = hits[j];
+ hits[j] = hits[j + 1];
+ hits[j + 1] = tmp;
+ swapped = true;
+ }
+ }
+ --num_hits;
+ } while (swapped);
+# else
+ qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
+# endif
}
-#endif /* __SHADOW_RECORD_ALL__ | __VOLUME_RECORD_ALL__ */
+#endif /* __SHADOW_RECORD_ALL__ | __VOLUME_RECORD_ALL__ */
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/bvh/bvh_embree.h b/intern/cycles/kernel/bvh/bvh_embree.h
index bfc911a1e76..661bba54fd4 100644
--- a/intern/cycles/kernel/bvh/bvh_embree.h
+++ b/intern/cycles/kernel/bvh/bvh_embree.h
@@ -24,103 +24,120 @@
CCL_NAMESPACE_BEGIN
-struct CCLIntersectContext {
- typedef enum {
- RAY_REGULAR = 0,
- RAY_SHADOW_ALL = 1,
- RAY_SSS = 2,
- RAY_VOLUME_ALL = 3,
+struct CCLIntersectContext {
+ typedef enum {
+ RAY_REGULAR = 0,
+ RAY_SHADOW_ALL = 1,
+ RAY_SSS = 2,
+ RAY_VOLUME_ALL = 3,
- } RayType;
+ } RayType;
- KernelGlobals *kg;
- RayType type;
+ KernelGlobals *kg;
+ RayType type;
- /* for shadow rays */
- Intersection *isect_s;
- int max_hits;
- int num_hits;
+ /* for shadow rays */
+ Intersection *isect_s;
+ int max_hits;
+ int num_hits;
- /* for SSS Rays: */
- LocalIntersection *ss_isect;
- int sss_object_id;
- uint *lcg_state;
+ /* for SSS Rays: */
+ LocalIntersection *ss_isect;
+ int sss_object_id;
+ uint *lcg_state;
- CCLIntersectContext(KernelGlobals *kg_, RayType type_)
- {
- kg = kg_;
- type = type_;
- max_hits = 1;
- num_hits = 0;
- isect_s = NULL;
- ss_isect = NULL;
- sss_object_id = -1;
- lcg_state = NULL;
- }
+ CCLIntersectContext(KernelGlobals *kg_, RayType type_)
+ {
+ kg = kg_;
+ type = type_;
+ max_hits = 1;
+ num_hits = 0;
+ isect_s = NULL;
+ ss_isect = NULL;
+ sss_object_id = -1;
+ lcg_state = NULL;
+ }
};
-class IntersectContext
-{
-public:
- IntersectContext(CCLIntersectContext* ctx)
- {
- rtcInitIntersectContext(&context);
- userRayExt = ctx;
- }
- RTCIntersectContext context;
- CCLIntersectContext* userRayExt;
+class IntersectContext {
+ public:
+ IntersectContext(CCLIntersectContext *ctx)
+ {
+ rtcInitIntersectContext(&context);
+ userRayExt = ctx;
+ }
+ RTCIntersectContext context;
+ CCLIntersectContext *userRayExt;
};
-ccl_device_inline void kernel_embree_setup_ray(const Ray& ray, RTCRay& rtc_ray, const uint visibility)
+ccl_device_inline void kernel_embree_setup_ray(const Ray &ray,
+ RTCRay &rtc_ray,
+ const uint visibility)
{
- rtc_ray.org_x = ray.P.x;
- rtc_ray.org_y = ray.P.y;
- rtc_ray.org_z = ray.P.z;
- rtc_ray.dir_x = ray.D.x;
- rtc_ray.dir_y = ray.D.y;
- rtc_ray.dir_z = ray.D.z;
- rtc_ray.tnear = 0.0f;
- rtc_ray.tfar = ray.t;
- rtc_ray.time = ray.time;
- rtc_ray.mask = visibility;
+ rtc_ray.org_x = ray.P.x;
+ rtc_ray.org_y = ray.P.y;
+ rtc_ray.org_z = ray.P.z;
+ rtc_ray.dir_x = ray.D.x;
+ rtc_ray.dir_y = ray.D.y;
+ rtc_ray.dir_z = ray.D.z;
+ rtc_ray.tnear = 0.0f;
+ rtc_ray.tfar = ray.t;
+ rtc_ray.time = ray.time;
+ rtc_ray.mask = visibility;
}
-ccl_device_inline void kernel_embree_setup_rayhit(const Ray& ray, RTCRayHit& rayhit, const uint visibility)
+ccl_device_inline void kernel_embree_setup_rayhit(const Ray &ray,
+ RTCRayHit &rayhit,
+ const uint visibility)
{
- kernel_embree_setup_ray(ray, rayhit.ray, visibility);
- rayhit.hit.geomID = RTC_INVALID_GEOMETRY_ID;
- rayhit.hit.primID = RTC_INVALID_GEOMETRY_ID;
+ kernel_embree_setup_ray(ray, rayhit.ray, visibility);
+ rayhit.hit.geomID = RTC_INVALID_GEOMETRY_ID;
+ rayhit.hit.primID = RTC_INVALID_GEOMETRY_ID;
}
-ccl_device_inline void kernel_embree_convert_hit(KernelGlobals *kg, const RTCRay *ray, const RTCHit *hit, Intersection *isect)
+ccl_device_inline void kernel_embree_convert_hit(KernelGlobals *kg,
+ const RTCRay *ray,
+ const RTCHit *hit,
+ Intersection *isect)
{
- bool is_hair = hit->geomID & 1;
- isect->u = is_hair ? hit->u : 1.0f - hit->v - hit->u;
- isect->v = is_hair ? hit->v : hit->u;
- isect->t = ray->tfar;
- isect->Ng = make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z);
- if(hit->instID[0] != RTC_INVALID_GEOMETRY_ID) {
- RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(rtcGetGeometry(kernel_data.bvh.scene, hit->instID[0]));
- isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID)) + kernel_tex_fetch(__object_node, hit->instID[0]/2);
- isect->object = hit->instID[0]/2;
- }
- else {
- isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(kernel_data.bvh.scene, hit->geomID));
- isect->object = OBJECT_NONE;
- }
- isect->type = kernel_tex_fetch(__prim_type, isect->prim);
+ bool is_hair = hit->geomID & 1;
+ isect->u = is_hair ? hit->u : 1.0f - hit->v - hit->u;
+ isect->v = is_hair ? hit->v : hit->u;
+ isect->t = ray->tfar;
+ isect->Ng = make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z);
+ if (hit->instID[0] != RTC_INVALID_GEOMETRY_ID) {
+ RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
+ rtcGetGeometry(kernel_data.bvh.scene, hit->instID[0]));
+ isect->prim = hit->primID +
+ (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID)) +
+ kernel_tex_fetch(__object_node, hit->instID[0] / 2);
+ isect->object = hit->instID[0] / 2;
+ }
+ else {
+ isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData(
+ rtcGetGeometry(kernel_data.bvh.scene, hit->geomID));
+ isect->object = OBJECT_NONE;
+ }
+ isect->type = kernel_tex_fetch(__prim_type, isect->prim);
}
-ccl_device_inline void kernel_embree_convert_local_hit(KernelGlobals *kg, const RTCRay *ray, const RTCHit *hit, Intersection *isect, int local_object_id)
+ccl_device_inline void kernel_embree_convert_local_hit(KernelGlobals *kg,
+ const RTCRay *ray,
+ const RTCHit *hit,
+ Intersection *isect,
+ int local_object_id)
{
- isect->u = 1.0f - hit->v - hit->u;
- isect->v = hit->u;
- isect->t = ray->tfar;
- isect->Ng = make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z);
- RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(rtcGetGeometry(kernel_data.bvh.scene, local_object_id * 2));
- isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID)) + kernel_tex_fetch(__object_node, local_object_id);
- isect->object = local_object_id;
- isect->type = kernel_tex_fetch(__prim_type, isect->prim);
+ isect->u = 1.0f - hit->v - hit->u;
+ isect->v = hit->u;
+ isect->t = ray->tfar;
+ isect->Ng = make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z);
+ RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
+ rtcGetGeometry(kernel_data.bvh.scene, local_object_id * 2));
+ isect->prim = hit->primID +
+ (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID)) +
+ kernel_tex_fetch(__object_node, local_object_id);
+ isect->object = local_object_id;
+ isect->type = kernel_tex_fetch(__prim_type, isect->prim);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/bvh/bvh_local.h b/intern/cycles/kernel/bvh/bvh_local.h
index 3bdc9293a6c..7a069ef1108 100644
--- a/intern/cycles/kernel/bvh/bvh_local.h
+++ b/intern/cycles/kernel/bvh/bvh_local.h
@@ -43,208 +43,201 @@ ccl_device
#else
ccl_device_inline
#endif
-bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
- const Ray *ray,
- LocalIntersection *local_isect,
- int local_object,
- uint *lcg_state,
- int max_hits)
+ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
+ const Ray *ray,
+ LocalIntersection *local_isect,
+ int local_object,
+ uint *lcg_state,
+ int max_hits)
{
- /* todo:
- * - test if pushing distance on the stack helps (for non shadow rays)
- * - separate version for shadow rays
- * - likely and unlikely for if() statements
- * - test restrict attribute for pointers
- */
+ /* todo:
+ * - test if pushing distance on the stack helps (for non shadow rays)
+ * - separate version for shadow rays
+ * - likely and unlikely for if() statements
+ * - test restrict attribute for pointers
+ */
- /* traversal stack in CUDA thread-local memory */
- int traversal_stack[BVH_STACK_SIZE];
- traversal_stack[0] = ENTRYPOINT_SENTINEL;
+ /* traversal stack in CUDA thread-local memory */
+ int traversal_stack[BVH_STACK_SIZE];
+ traversal_stack[0] = ENTRYPOINT_SENTINEL;
- /* traversal variables in registers */
- int stack_ptr = 0;
- int node_addr = kernel_tex_fetch(__object_node, local_object);
+ /* traversal variables in registers */
+ int stack_ptr = 0;
+ int node_addr = kernel_tex_fetch(__object_node, local_object);
- /* ray parameters in registers */
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
- float isect_t = ray->t;
+ /* ray parameters in registers */
+ float3 P = ray->P;
+ float3 dir = bvh_clamp_direction(ray->D);
+ float3 idir = bvh_inverse_direction(dir);
+ int object = OBJECT_NONE;
+ float isect_t = ray->t;
- if(local_isect != NULL) {
- local_isect->num_hits = 0;
- }
- kernel_assert((local_isect == NULL) == (max_hits == 0));
+ if (local_isect != NULL) {
+ local_isect->num_hits = 0;
+ }
+ kernel_assert((local_isect == NULL) == (max_hits == 0));
- const int object_flag = kernel_tex_fetch(__object_flag, local_object);
- if(!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+ const int object_flag = kernel_tex_fetch(__object_flag, local_object);
+ if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
- isect_t = bvh_instance_motion_push(kg,
- local_object,
- ray,
- &P,
- &dir,
- &idir,
- isect_t,
- &ob_itfm);
+ Transform ob_itfm;
+ isect_t = bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
#else
- isect_t = bvh_instance_push(kg, local_object, ray, &P, &dir, &idir, isect_t);
+ isect_t = bvh_instance_push(kg, local_object, ray, &P, &dir, &idir, isect_t);
#endif
- object = local_object;
- }
+ object = local_object;
+ }
#if defined(__KERNEL_SSE2__)
- const shuffle_swap_t shuf_identity = shuffle_swap_identity();
- const shuffle_swap_t shuf_swap = shuffle_swap_swap();
+ const shuffle_swap_t shuf_identity = shuffle_swap_identity();
+ const shuffle_swap_t shuf_swap = shuffle_swap_swap();
- const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
- ssef Psplat[3], idirsplat[3];
+ const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
+ ssef Psplat[3], idirsplat[3];
# if BVH_FEATURE(BVH_HAIR)
- ssef tnear(0.0f), tfar(isect_t);
+ ssef tnear(0.0f), tfar(isect_t);
# endif
- shuffle_swap_t shufflexyz[3];
+ shuffle_swap_t shufflexyz[3];
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
+ Psplat[0] = ssef(P.x);
+ Psplat[1] = ssef(P.y);
+ Psplat[2] = ssef(P.z);
- ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t);
+ ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t);
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
+ gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
#endif
- /* traversal loop */
- do {
- do {
- /* traverse internal nodes */
- while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
- int node_addr_child1, traverse_mask;
- float dist[2];
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
+ /* traversal loop */
+ do {
+ do {
+ /* traverse internal nodes */
+ while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
+ int node_addr_child1, traverse_mask;
+ float dist[2];
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
#if !defined(__KERNEL_SSE2__)
- traverse_mask = NODE_INTERSECT(kg,
- P,
+ traverse_mask = NODE_INTERSECT(kg,
+ P,
# if BVH_FEATURE(BVH_HAIR)
- dir,
+ dir,
# endif
- idir,
- isect_t,
- node_addr,
- PATH_RAY_ALL_VISIBILITY,
- dist);
+ idir,
+ isect_t,
+ node_addr,
+ PATH_RAY_ALL_VISIBILITY,
+ dist);
#else // __KERNEL_SSE2__
- traverse_mask = NODE_INTERSECT(kg,
- P,
- dir,
+ traverse_mask = NODE_INTERSECT(kg,
+ P,
+ dir,
# if BVH_FEATURE(BVH_HAIR)
- tnear,
- tfar,
+ tnear,
+ tfar,
# endif
- tsplat,
- Psplat,
- idirsplat,
- shufflexyz,
- node_addr,
- PATH_RAY_ALL_VISIBILITY,
- dist);
+ tsplat,
+ Psplat,
+ idirsplat,
+ shufflexyz,
+ node_addr,
+ PATH_RAY_ALL_VISIBILITY,
+ dist);
#endif // __KERNEL_SSE2__
- node_addr = __float_as_int(cnodes.z);
- node_addr_child1 = __float_as_int(cnodes.w);
+ node_addr = __float_as_int(cnodes.z);
+ node_addr_child1 = __float_as_int(cnodes.w);
- if(traverse_mask == 3) {
- /* Both children were intersected, push the farther one. */
- bool is_closest_child1 = (dist[1] < dist[0]);
- if(is_closest_child1) {
- int tmp = node_addr;
- node_addr = node_addr_child1;
- node_addr_child1 = tmp;
- }
+ if (traverse_mask == 3) {
+ /* Both children were intersected, push the farther one. */
+ bool is_closest_child1 = (dist[1] < dist[0]);
+ if (is_closest_child1) {
+ int tmp = node_addr;
+ node_addr = node_addr_child1;
+ node_addr_child1 = tmp;
+ }
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_STACK_SIZE);
- traversal_stack[stack_ptr] = node_addr_child1;
- }
- else {
- /* One child was intersected. */
- if(traverse_mask == 2) {
- node_addr = node_addr_child1;
- }
- else if(traverse_mask == 0) {
- /* Neither child was intersected. */
- node_addr = traversal_stack[stack_ptr];
- --stack_ptr;
- }
- }
- }
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_STACK_SIZE);
+ traversal_stack[stack_ptr] = node_addr_child1;
+ }
+ else {
+ /* One child was intersected. */
+ if (traverse_mask == 2) {
+ node_addr = node_addr_child1;
+ }
+ else if (traverse_mask == 0) {
+ /* Neither child was intersected. */
+ node_addr = traversal_stack[stack_ptr];
+ --stack_ptr;
+ }
+ }
+ }
- /* if node is leaf, fetch triangle list */
- if(node_addr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
- int prim_addr = __float_as_int(leaf.x);
+ /* if node is leaf, fetch triangle list */
+ if (node_addr < 0) {
+ float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
+ int prim_addr = __float_as_int(leaf.x);
- const int prim_addr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
+ const int prim_addr2 = __float_as_int(leaf.y);
+ const uint type = __float_as_int(leaf.w);
- /* pop */
- node_addr = traversal_stack[stack_ptr];
- --stack_ptr;
+ /* pop */
+ node_addr = traversal_stack[stack_ptr];
+ --stack_ptr;
- /* primitive intersection */
- switch(type & PRIMITIVE_ALL) {
- case PRIMITIVE_TRIANGLE: {
- /* intersect ray against primitive */
- for(; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- if(triangle_intersect_local(kg,
- local_isect,
- P,
- dir,
- object,
- local_object,
- prim_addr,
- isect_t,
- lcg_state,
- max_hits)) {
- return true;
- }
- }
- break;
- }
+ /* primitive intersection */
+ switch (type & PRIMITIVE_ALL) {
+ case PRIMITIVE_TRIANGLE: {
+ /* intersect ray against primitive */
+ for (; prim_addr < prim_addr2; prim_addr++) {
+ kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+ if (triangle_intersect_local(kg,
+ local_isect,
+ P,
+ dir,
+ object,
+ local_object,
+ prim_addr,
+ isect_t,
+ lcg_state,
+ max_hits)) {
+ return true;
+ }
+ }
+ break;
+ }
#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- /* intersect ray against primitive */
- for(; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- if(motion_triangle_intersect_local(kg,
- local_isect,
- P,
- dir,
- ray->time,
- object,
- local_object,
- prim_addr,
- isect_t,
- lcg_state,
- max_hits)) {
- return true;
- }
- }
- break;
- }
+ case PRIMITIVE_MOTION_TRIANGLE: {
+ /* intersect ray against primitive */
+ for (; prim_addr < prim_addr2; prim_addr++) {
+ kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+ if (motion_triangle_intersect_local(kg,
+ local_isect,
+ P,
+ dir,
+ ray->time,
+ object,
+ local_object,
+ prim_addr,
+ isect_t,
+ lcg_state,
+ max_hits)) {
+ return true;
+ }
+ }
+ break;
+ }
#endif
- default: {
- break;
- }
- }
- }
- } while(node_addr != ENTRYPOINT_SENTINEL);
- } while(node_addr != ENTRYPOINT_SENTINEL);
+ default: {
+ break;
+ }
+ }
+ }
+ } while (node_addr != ENTRYPOINT_SENTINEL);
+ } while (node_addr != ENTRYPOINT_SENTINEL);
- return false;
+ return false;
}
ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
@@ -254,35 +247,20 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
uint *lcg_state,
int max_hits)
{
- switch(kernel_data.bvh.bvh_layout) {
+ switch (kernel_data.bvh.bvh_layout) {
#ifdef __KERNEL_AVX2__
- case BVH_LAYOUT_BVH8:
- return BVH_FUNCTION_FULL_NAME(OBVH)(kg,
- ray,
- local_isect,
- local_object,
- lcg_state,
- max_hits);
+ case BVH_LAYOUT_BVH8:
+ return BVH_FUNCTION_FULL_NAME(OBVH)(kg, ray, local_isect, local_object, lcg_state, max_hits);
#endif
#ifdef __QBVH__
- case BVH_LAYOUT_BVH4:
- return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
- ray,
- local_isect,
- local_object,
- lcg_state,
- max_hits);
+ case BVH_LAYOUT_BVH4:
+ return BVH_FUNCTION_FULL_NAME(QBVH)(kg, ray, local_isect, local_object, lcg_state, max_hits);
#endif
- case BVH_LAYOUT_BVH2:
- return BVH_FUNCTION_FULL_NAME(BVH)(kg,
- ray,
- local_isect,
- local_object,
- lcg_state,
- max_hits);
- }
- kernel_assert(!"Should not happen");
- return false;
+ case BVH_LAYOUT_BVH2:
+ return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, local_isect, local_object, lcg_state, max_hits);
+ }
+ kernel_assert(!"Should not happen");
+ return false;
}
#undef BVH_FUNCTION_NAME
diff --git a/intern/cycles/kernel/bvh/bvh_nodes.h b/intern/cycles/kernel/bvh/bvh_nodes.h
index 060b3934a41..042630121c8 100644
--- a/intern/cycles/kernel/bvh/bvh_nodes.h
+++ b/intern/cycles/kernel/bvh/bvh_nodes.h
@@ -20,12 +20,12 @@ ccl_device_forceinline Transform bvh_unaligned_node_fetch_space(KernelGlobals *k
int node_addr,
int child)
{
- Transform space;
- const int child_addr = node_addr + child * 3;
- space.x = kernel_tex_fetch(__bvh_nodes, child_addr+1);
- space.y = kernel_tex_fetch(__bvh_nodes, child_addr+2);
- space.z = kernel_tex_fetch(__bvh_nodes, child_addr+3);
- return space;
+ Transform space;
+ const int child_addr = node_addr + child * 3;
+ space.x = kernel_tex_fetch(__bvh_nodes, child_addr + 1);
+ space.y = kernel_tex_fetch(__bvh_nodes, child_addr + 2);
+ space.z = kernel_tex_fetch(__bvh_nodes, child_addr + 3);
+ return space;
}
#if !defined(__KERNEL_SSE2__)
@@ -38,42 +38,41 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals *kg,
float dist[2])
{
- /* fetch node data */
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
- float4 node0 = kernel_tex_fetch(__bvh_nodes, node_addr+1);
- float4 node1 = kernel_tex_fetch(__bvh_nodes, node_addr+2);
- float4 node2 = kernel_tex_fetch(__bvh_nodes, node_addr+3);
-
- /* intersect ray against child nodes */
- float c0lox = (node0.x - P.x) * idir.x;
- float c0hix = (node0.z - P.x) * idir.x;
- float c0loy = (node1.x - P.y) * idir.y;
- float c0hiy = (node1.z - P.y) * idir.y;
- float c0loz = (node2.x - P.z) * idir.z;
- float c0hiz = (node2.z - P.z) * idir.z;
- float c0min = max4(0.0f, min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz));
- float c0max = min4(t, max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz));
-
- float c1lox = (node0.y - P.x) * idir.x;
- float c1hix = (node0.w - P.x) * idir.x;
- float c1loy = (node1.y - P.y) * idir.y;
- float c1hiy = (node1.w - P.y) * idir.y;
- float c1loz = (node2.y - P.z) * idir.z;
- float c1hiz = (node2.w - P.z) * idir.z;
- float c1min = max4(0.0f, min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz));
- float c1max = min4(t, max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz));
-
- dist[0] = c0min;
- dist[1] = c1min;
-
-#ifdef __VISIBILITY_FLAG__
- /* this visibility test gives a 5% performance hit, how to solve? */
- return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
- (((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
-#else
- return ((c0max >= c0min)? 1: 0) |
- ((c1max >= c1min)? 2: 0);
-#endif
+ /* fetch node data */
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
+ float4 node0 = kernel_tex_fetch(__bvh_nodes, node_addr + 1);
+ float4 node1 = kernel_tex_fetch(__bvh_nodes, node_addr + 2);
+ float4 node2 = kernel_tex_fetch(__bvh_nodes, node_addr + 3);
+
+ /* intersect ray against child nodes */
+ float c0lox = (node0.x - P.x) * idir.x;
+ float c0hix = (node0.z - P.x) * idir.x;
+ float c0loy = (node1.x - P.y) * idir.y;
+ float c0hiy = (node1.z - P.y) * idir.y;
+ float c0loz = (node2.x - P.z) * idir.z;
+ float c0hiz = (node2.z - P.z) * idir.z;
+ float c0min = max4(0.0f, min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz));
+ float c0max = min4(t, max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz));
+
+ float c1lox = (node0.y - P.x) * idir.x;
+ float c1hix = (node0.w - P.x) * idir.x;
+ float c1loy = (node1.y - P.y) * idir.y;
+ float c1hiy = (node1.w - P.y) * idir.y;
+ float c1loz = (node2.y - P.z) * idir.z;
+ float c1hiz = (node2.w - P.z) * idir.z;
+ float c1min = max4(0.0f, min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz));
+ float c1max = min4(t, max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz));
+
+ dist[0] = c0min;
+ dist[1] = c1min;
+
+# ifdef __VISIBILITY_FLAG__
+ /* this visibility test gives a 5% performance hit, how to solve? */
+ return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) |
+ (((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0);
+# else
+ return ((c0max >= c0min) ? 1 : 0) | ((c1max >= c1min) ? 2 : 0);
+# endif
}
ccl_device_forceinline int bvh_aligned_node_intersect_robust(KernelGlobals *kg,
@@ -87,118 +86,115 @@ ccl_device_forceinline int bvh_aligned_node_intersect_robust(KernelGlobals *kg,
float dist[2])
{
- /* fetch node data */
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
- float4 node0 = kernel_tex_fetch(__bvh_nodes, node_addr+1);
- float4 node1 = kernel_tex_fetch(__bvh_nodes, node_addr+2);
- float4 node2 = kernel_tex_fetch(__bvh_nodes, node_addr+3);
-
- /* intersect ray against child nodes */
- float c0lox = (node0.x - P.x) * idir.x;
- float c0hix = (node0.z - P.x) * idir.x;
- float c0loy = (node1.x - P.y) * idir.y;
- float c0hiy = (node1.z - P.y) * idir.y;
- float c0loz = (node2.x - P.z) * idir.z;
- float c0hiz = (node2.z - P.z) * idir.z;
- float c0min = max4(0.0f, min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz));
- float c0max = min4(t, max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz));
-
- float c1lox = (node0.y - P.x) * idir.x;
- float c1hix = (node0.w - P.x) * idir.x;
- float c1loy = (node1.y - P.y) * idir.y;
- float c1hiy = (node1.w - P.y) * idir.y;
- float c1loz = (node2.y - P.z) * idir.z;
- float c1hiz = (node2.w - P.z) * idir.z;
- float c1min = max4(0.0f, min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz));
- float c1max = min4(t, max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz));
-
- if(difl != 0.0f) {
- float hdiff = 1.0f + difl;
- float ldiff = 1.0f - difl;
- if(__float_as_int(cnodes.z) & PATH_RAY_CURVE) {
- c0min = max(ldiff * c0min, c0min - extmax);
- c0max = min(hdiff * c0max, c0max + extmax);
- }
- if(__float_as_int(cnodes.w) & PATH_RAY_CURVE) {
- c1min = max(ldiff * c1min, c1min - extmax);
- c1max = min(hdiff * c1max, c1max + extmax);
- }
- }
-
- dist[0] = c0min;
- dist[1] = c1min;
-
-#ifdef __VISIBILITY_FLAG__
- /* this visibility test gives a 5% performance hit, how to solve? */
- return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
- (((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
-#else
- return ((c0max >= c0min)? 1: 0) |
- ((c1max >= c1min)? 2: 0);
-#endif
+ /* fetch node data */
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
+ float4 node0 = kernel_tex_fetch(__bvh_nodes, node_addr + 1);
+ float4 node1 = kernel_tex_fetch(__bvh_nodes, node_addr + 2);
+ float4 node2 = kernel_tex_fetch(__bvh_nodes, node_addr + 3);
+
+ /* intersect ray against child nodes */
+ float c0lox = (node0.x - P.x) * idir.x;
+ float c0hix = (node0.z - P.x) * idir.x;
+ float c0loy = (node1.x - P.y) * idir.y;
+ float c0hiy = (node1.z - P.y) * idir.y;
+ float c0loz = (node2.x - P.z) * idir.z;
+ float c0hiz = (node2.z - P.z) * idir.z;
+ float c0min = max4(0.0f, min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz));
+ float c0max = min4(t, max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz));
+
+ float c1lox = (node0.y - P.x) * idir.x;
+ float c1hix = (node0.w - P.x) * idir.x;
+ float c1loy = (node1.y - P.y) * idir.y;
+ float c1hiy = (node1.w - P.y) * idir.y;
+ float c1loz = (node2.y - P.z) * idir.z;
+ float c1hiz = (node2.w - P.z) * idir.z;
+ float c1min = max4(0.0f, min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz));
+ float c1max = min4(t, max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz));
+
+ if (difl != 0.0f) {
+ float hdiff = 1.0f + difl;
+ float ldiff = 1.0f - difl;
+ if (__float_as_int(cnodes.z) & PATH_RAY_CURVE) {
+ c0min = max(ldiff * c0min, c0min - extmax);
+ c0max = min(hdiff * c0max, c0max + extmax);
+ }
+ if (__float_as_int(cnodes.w) & PATH_RAY_CURVE) {
+ c1min = max(ldiff * c1min, c1min - extmax);
+ c1max = min(hdiff * c1max, c1max + extmax);
+ }
+ }
+
+ dist[0] = c0min;
+ dist[1] = c1min;
+
+# ifdef __VISIBILITY_FLAG__
+ /* this visibility test gives a 5% performance hit, how to solve? */
+ return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) |
+ (((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0);
+# else
+ return ((c0max >= c0min) ? 1 : 0) | ((c1max >= c1min) ? 2 : 0);
+# endif
}
-ccl_device_forceinline bool bvh_unaligned_node_intersect_child(
- KernelGlobals *kg,
- const float3 P,
- const float3 dir,
- const float t,
- int node_addr,
- int child,
- float dist[2])
+ccl_device_forceinline bool bvh_unaligned_node_intersect_child(KernelGlobals *kg,
+ const float3 P,
+ const float3 dir,
+ const float t,
+ int node_addr,
+ int child,
+ float dist[2])
{
- Transform space = bvh_unaligned_node_fetch_space(kg, node_addr, child);
- float3 aligned_dir = transform_direction(&space, dir);
- float3 aligned_P = transform_point(&space, P);
- float3 nrdir = -bvh_inverse_direction(aligned_dir);
- float3 lower_xyz = aligned_P * nrdir;
- float3 upper_xyz = lower_xyz - nrdir;
- const float near_x = min(lower_xyz.x, upper_xyz.x);
- const float near_y = min(lower_xyz.y, upper_xyz.y);
- const float near_z = min(lower_xyz.z, upper_xyz.z);
- const float far_x = max(lower_xyz.x, upper_xyz.x);
- const float far_y = max(lower_xyz.y, upper_xyz.y);
- const float far_z = max(lower_xyz.z, upper_xyz.z);
- const float tnear = max4(0.0f, near_x, near_y, near_z);
- const float tfar = min4(t, far_x, far_y, far_z);
- *dist = tnear;
- return tnear <= tfar;
+ Transform space = bvh_unaligned_node_fetch_space(kg, node_addr, child);
+ float3 aligned_dir = transform_direction(&space, dir);
+ float3 aligned_P = transform_point(&space, P);
+ float3 nrdir = -bvh_inverse_direction(aligned_dir);
+ float3 lower_xyz = aligned_P * nrdir;
+ float3 upper_xyz = lower_xyz - nrdir;
+ const float near_x = min(lower_xyz.x, upper_xyz.x);
+ const float near_y = min(lower_xyz.y, upper_xyz.y);
+ const float near_z = min(lower_xyz.z, upper_xyz.z);
+ const float far_x = max(lower_xyz.x, upper_xyz.x);
+ const float far_y = max(lower_xyz.y, upper_xyz.y);
+ const float far_z = max(lower_xyz.z, upper_xyz.z);
+ const float tnear = max4(0.0f, near_x, near_y, near_z);
+ const float tfar = min4(t, far_x, far_y, far_z);
+ *dist = tnear;
+ return tnear <= tfar;
}
-ccl_device_forceinline bool bvh_unaligned_node_intersect_child_robust(
- KernelGlobals *kg,
- const float3 P,
- const float3 dir,
- const float t,
- const float difl,
- int node_addr,
- int child,
- float dist[2])
+ccl_device_forceinline bool bvh_unaligned_node_intersect_child_robust(KernelGlobals *kg,
+ const float3 P,
+ const float3 dir,
+ const float t,
+ const float difl,
+ int node_addr,
+ int child,
+ float dist[2])
{
- Transform space = bvh_unaligned_node_fetch_space(kg, node_addr, child);
- float3 aligned_dir = transform_direction(&space, dir);
- float3 aligned_P = transform_point(&space, P);
- float3 nrdir = -bvh_inverse_direction(aligned_dir);
- float3 tLowerXYZ = aligned_P * nrdir;
- float3 tUpperXYZ = tLowerXYZ - nrdir;
- const float near_x = min(tLowerXYZ.x, tUpperXYZ.x);
- const float near_y = min(tLowerXYZ.y, tUpperXYZ.y);
- const float near_z = min(tLowerXYZ.z, tUpperXYZ.z);
- const float far_x = max(tLowerXYZ.x, tUpperXYZ.x);
- const float far_y = max(tLowerXYZ.y, tUpperXYZ.y);
- const float far_z = max(tLowerXYZ.z, tUpperXYZ.z);
- const float tnear = max4(0.0f, near_x, near_y, near_z);
- const float tfar = min4(t, far_x, far_y, far_z);
- *dist = tnear;
- if(difl != 0.0f) {
- /* TODO(sergey): Same as for QBVH, needs a proper use. */
- const float round_down = 1.0f - difl;
- const float round_up = 1.0f + difl;
- return round_down*tnear <= round_up*tfar;
- }
- else {
- return tnear <= tfar;
- }
+ Transform space = bvh_unaligned_node_fetch_space(kg, node_addr, child);
+ float3 aligned_dir = transform_direction(&space, dir);
+ float3 aligned_P = transform_point(&space, P);
+ float3 nrdir = -bvh_inverse_direction(aligned_dir);
+ float3 tLowerXYZ = aligned_P * nrdir;
+ float3 tUpperXYZ = tLowerXYZ - nrdir;
+ const float near_x = min(tLowerXYZ.x, tUpperXYZ.x);
+ const float near_y = min(tLowerXYZ.y, tUpperXYZ.y);
+ const float near_z = min(tLowerXYZ.z, tUpperXYZ.z);
+ const float far_x = max(tLowerXYZ.x, tUpperXYZ.x);
+ const float far_y = max(tLowerXYZ.y, tUpperXYZ.y);
+ const float far_z = max(tLowerXYZ.z, tUpperXYZ.z);
+ const float tnear = max4(0.0f, near_x, near_y, near_z);
+ const float tfar = min4(t, far_x, far_y, far_z);
+ *dist = tnear;
+ if (difl != 0.0f) {
+ /* TODO(sergey): Same as for QBVH, needs a proper use. */
+ const float round_down = 1.0f - difl;
+ const float round_up = 1.0f + difl;
+ return round_down * tnear <= round_up * tfar;
+ }
+ else {
+ return tnear <= tfar;
+ }
}
ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg,
@@ -210,25 +206,25 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg,
const uint visibility,
float dist[2])
{
- int mask = 0;
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
- if(bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 0, &dist[0])) {
-#ifdef __VISIBILITY_FLAG__
- if((__float_as_uint(cnodes.x) & visibility))
-#endif
- {
- mask |= 1;
- }
- }
- if(bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 1, &dist[1])) {
-#ifdef __VISIBILITY_FLAG__
- if((__float_as_uint(cnodes.y) & visibility))
-#endif
- {
- mask |= 2;
- }
- }
- return mask;
+ int mask = 0;
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
+ if (bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 0, &dist[0])) {
+# ifdef __VISIBILITY_FLAG__
+ if ((__float_as_uint(cnodes.x) & visibility))
+# endif
+ {
+ mask |= 1;
+ }
+ }
+ if (bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 1, &dist[1])) {
+# ifdef __VISIBILITY_FLAG__
+ if ((__float_as_uint(cnodes.y) & visibility))
+# endif
+ {
+ mask |= 2;
+ }
+ }
+ return mask;
}
ccl_device_forceinline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg,
@@ -242,25 +238,25 @@ ccl_device_forceinline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg
const uint visibility,
float dist[2])
{
- int mask = 0;
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
- if(bvh_unaligned_node_intersect_child_robust(kg, P, dir, t, difl, node_addr, 0, &dist[0])) {
-#ifdef __VISIBILITY_FLAG__
- if((__float_as_uint(cnodes.x) & visibility))
-#endif
- {
- mask |= 1;
- }
- }
- if(bvh_unaligned_node_intersect_child_robust(kg, P, dir, t, difl, node_addr, 1, &dist[1])) {
-#ifdef __VISIBILITY_FLAG__
- if((__float_as_uint(cnodes.y) & visibility))
-#endif
- {
- mask |= 2;
- }
- }
- return mask;
+ int mask = 0;
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
+ if (bvh_unaligned_node_intersect_child_robust(kg, P, dir, t, difl, node_addr, 0, &dist[0])) {
+# ifdef __VISIBILITY_FLAG__
+ if ((__float_as_uint(cnodes.x) & visibility))
+# endif
+ {
+ mask |= 1;
+ }
+ }
+ if (bvh_unaligned_node_intersect_child_robust(kg, P, dir, t, difl, node_addr, 1, &dist[1])) {
+# ifdef __VISIBILITY_FLAG__
+ if ((__float_as_uint(cnodes.y) & visibility))
+# endif
+ {
+ mask |= 2;
+ }
+ }
+ return mask;
}
ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg,
@@ -272,26 +268,13 @@ ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg,
const uint visibility,
float dist[2])
{
- float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
- if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
- return bvh_unaligned_node_intersect(kg,
- P,
- dir,
- idir,
- t,
- node_addr,
- visibility,
- dist);
- }
- else {
- return bvh_aligned_node_intersect(kg,
- P,
- idir,
- t,
- node_addr,
- visibility,
- dist);
- }
+ float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
+ if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
+ return bvh_unaligned_node_intersect(kg, P, dir, idir, t, node_addr, visibility, dist);
+ }
+ else {
+ return bvh_aligned_node_intersect(kg, P, idir, t, node_addr, visibility, dist);
+ }
}
ccl_device_forceinline int bvh_node_intersect_robust(KernelGlobals *kg,
@@ -305,279 +288,244 @@ ccl_device_forceinline int bvh_node_intersect_robust(KernelGlobals *kg,
const uint visibility,
float dist[2])
{
- float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
- if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
- return bvh_unaligned_node_intersect_robust(kg,
- P,
- dir,
- idir,
- t,
- difl,
- extmax,
- node_addr,
- visibility,
- dist);
- }
- else {
- return bvh_aligned_node_intersect_robust(kg,
- P,
- idir,
- t,
- difl,
- extmax,
- node_addr,
- visibility,
- dist);
- }
+ float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
+ if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
+ return bvh_unaligned_node_intersect_robust(
+ kg, P, dir, idir, t, difl, extmax, node_addr, visibility, dist);
+ }
+ else {
+ return bvh_aligned_node_intersect_robust(
+ kg, P, idir, t, difl, extmax, node_addr, visibility, dist);
+ }
}
-#else /* !defined(__KERNEL_SSE2__) */
-
-int ccl_device_forceinline bvh_aligned_node_intersect(
- KernelGlobals *kg,
- const float3& P,
- const float3& dir,
- const ssef& tsplat,
- const ssef Psplat[3],
- const ssef idirsplat[3],
- const shuffle_swap_t shufflexyz[3],
- const int node_addr,
- const uint visibility,
- float dist[2])
+#else /* !defined(__KERNEL_SSE2__) */
+
+int ccl_device_forceinline bvh_aligned_node_intersect(KernelGlobals *kg,
+ const float3 &P,
+ const float3 &dir,
+ const ssef &tsplat,
+ const ssef Psplat[3],
+ const ssef idirsplat[3],
+ const shuffle_swap_t shufflexyz[3],
+ const int node_addr,
+ const uint visibility,
+ float dist[2])
{
- /* Intersect two child bounding boxes, SSE3 version adapted from Embree */
- const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
+ /* Intersect two child bounding boxes, SSE3 version adapted from Embree */
+ const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
- /* fetch node data */
- const ssef *bvh_nodes = (ssef*)kg->__bvh_nodes.data + node_addr;
+ /* fetch node data */
+ const ssef *bvh_nodes = (ssef *)kg->__bvh_nodes.data + node_addr;
- /* intersect ray against child nodes */
- const ssef tminmaxx = (shuffle_swap(bvh_nodes[1], shufflexyz[0]) - Psplat[0]) * idirsplat[0];
- const ssef tminmaxy = (shuffle_swap(bvh_nodes[2], shufflexyz[1]) - Psplat[1]) * idirsplat[1];
- const ssef tminmaxz = (shuffle_swap(bvh_nodes[3], shufflexyz[2]) - Psplat[2]) * idirsplat[2];
+ /* intersect ray against child nodes */
+ const ssef tminmaxx = (shuffle_swap(bvh_nodes[1], shufflexyz[0]) - Psplat[0]) * idirsplat[0];
+ const ssef tminmaxy = (shuffle_swap(bvh_nodes[2], shufflexyz[1]) - Psplat[1]) * idirsplat[1];
+ const ssef tminmaxz = (shuffle_swap(bvh_nodes[3], shufflexyz[2]) - Psplat[2]) * idirsplat[2];
- /* calculate { c0min, c1min, -c0max, -c1max} */
- ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat));
- const ssef tminmax = minmax ^ pn;
- const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax);
+ /* calculate { c0min, c1min, -c0max, -c1max} */
+ ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat));
+ const ssef tminmax = minmax ^ pn;
+ const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax);
- dist[0] = tminmax[0];
- dist[1] = tminmax[1];
+ dist[0] = tminmax[0];
+ dist[1] = tminmax[1];
- int mask = movemask(lrhit);
+ int mask = movemask(lrhit);
# ifdef __VISIBILITY_FLAG__
- /* this visibility test gives a 5% performance hit, how to solve? */
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
- int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
- (((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
- return cmask;
+ /* this visibility test gives a 5% performance hit, how to solve? */
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
+ int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) |
+ (((mask & 2) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0);
+ return cmask;
# else
- return mask & 3;
+ return mask & 3;
# endif
}
-ccl_device_forceinline int bvh_aligned_node_intersect_robust(
- KernelGlobals *kg,
- const float3& P,
- const float3& dir,
- const ssef& tsplat,
- const ssef Psplat[3],
- const ssef idirsplat[3],
- const shuffle_swap_t shufflexyz[3],
- const float difl,
- const float extmax,
- const int nodeAddr,
- const uint visibility,
- float dist[2])
+ccl_device_forceinline int bvh_aligned_node_intersect_robust(KernelGlobals *kg,
+ const float3 &P,
+ const float3 &dir,
+ const ssef &tsplat,
+ const ssef Psplat[3],
+ const ssef idirsplat[3],
+ const shuffle_swap_t shufflexyz[3],
+ const float difl,
+ const float extmax,
+ const int nodeAddr,
+ const uint visibility,
+ float dist[2])
{
- /* Intersect two child bounding boxes, SSE3 version adapted from Embree */
- const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
-
- /* fetch node data */
- const ssef *bvh_nodes = (ssef*)kg->__bvh_nodes.data + nodeAddr;
-
- /* intersect ray against child nodes */
- const ssef tminmaxx = (shuffle_swap(bvh_nodes[1], shufflexyz[0]) - Psplat[0]) * idirsplat[0];
- const ssef tminmaxy = (shuffle_swap(bvh_nodes[2], shufflexyz[1]) - Psplat[1]) * idirsplat[1];
- const ssef tminmaxz = (shuffle_swap(bvh_nodes[3], shufflexyz[2]) - Psplat[2]) * idirsplat[2];
-
- /* calculate { c0min, c1min, -c0max, -c1max} */
- ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat));
- const ssef tminmax = minmax ^ pn;
-
- if(difl != 0.0f) {
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
- float4 *tminmaxview = (float4*)&tminmax;
- float& c0min = tminmaxview->x, &c1min = tminmaxview->y;
- float& c0max = tminmaxview->z, &c1max = tminmaxview->w;
- float hdiff = 1.0f + difl;
- float ldiff = 1.0f - difl;
- if(__float_as_int(cnodes.x) & PATH_RAY_CURVE) {
- c0min = max(ldiff * c0min, c0min - extmax);
- c0max = min(hdiff * c0max, c0max + extmax);
- }
- if(__float_as_int(cnodes.y) & PATH_RAY_CURVE) {
- c1min = max(ldiff * c1min, c1min - extmax);
- c1max = min(hdiff * c1max, c1max + extmax);
- }
- }
-
- const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax);
-
- dist[0] = tminmax[0];
- dist[1] = tminmax[1];
-
- int mask = movemask(lrhit);
+ /* Intersect two child bounding boxes, SSE3 version adapted from Embree */
+ const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
+
+ /* fetch node data */
+ const ssef *bvh_nodes = (ssef *)kg->__bvh_nodes.data + nodeAddr;
+
+ /* intersect ray against child nodes */
+ const ssef tminmaxx = (shuffle_swap(bvh_nodes[1], shufflexyz[0]) - Psplat[0]) * idirsplat[0];
+ const ssef tminmaxy = (shuffle_swap(bvh_nodes[2], shufflexyz[1]) - Psplat[1]) * idirsplat[1];
+ const ssef tminmaxz = (shuffle_swap(bvh_nodes[3], shufflexyz[2]) - Psplat[2]) * idirsplat[2];
+
+ /* calculate { c0min, c1min, -c0max, -c1max} */
+ ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat));
+ const ssef tminmax = minmax ^ pn;
+
+ if (difl != 0.0f) {
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr + 0);
+ float4 *tminmaxview = (float4 *)&tminmax;
+ float &c0min = tminmaxview->x, &c1min = tminmaxview->y;
+ float &c0max = tminmaxview->z, &c1max = tminmaxview->w;
+ float hdiff = 1.0f + difl;
+ float ldiff = 1.0f - difl;
+ if (__float_as_int(cnodes.x) & PATH_RAY_CURVE) {
+ c0min = max(ldiff * c0min, c0min - extmax);
+ c0max = min(hdiff * c0max, c0max + extmax);
+ }
+ if (__float_as_int(cnodes.y) & PATH_RAY_CURVE) {
+ c1min = max(ldiff * c1min, c1min - extmax);
+ c1max = min(hdiff * c1max, c1max + extmax);
+ }
+ }
+
+ const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax);
+
+ dist[0] = tminmax[0];
+ dist[1] = tminmax[1];
+
+ int mask = movemask(lrhit);
# ifdef __VISIBILITY_FLAG__
- /* this visibility test gives a 5% performance hit, how to solve? */
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
- int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
- (((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
- return cmask;
+ /* this visibility test gives a 5% performance hit, how to solve? */
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr + 0);
+ int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) |
+ (((mask & 2) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0);
+ return cmask;
# else
- return mask & 3;
+ return mask & 3;
# endif
}
ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg,
const float3 P,
const float3 dir,
- const ssef& isect_near,
- const ssef& isect_far,
+ const ssef &isect_near,
+ const ssef &isect_far,
const int node_addr,
const uint visibility,
float dist[2])
{
- Transform space0 = bvh_unaligned_node_fetch_space(kg, node_addr, 0);
- Transform space1 = bvh_unaligned_node_fetch_space(kg, node_addr, 1);
-
- float3 aligned_dir0 = transform_direction(&space0, dir),
- aligned_dir1 = transform_direction(&space1, dir);
- float3 aligned_P0 = transform_point(&space0, P),
- aligned_P1 = transform_point(&space1, P);
- float3 nrdir0 = -bvh_inverse_direction(aligned_dir0),
- nrdir1 = -bvh_inverse_direction(aligned_dir1);
-
- ssef lower_x = ssef(aligned_P0.x * nrdir0.x,
- aligned_P1.x * nrdir1.x,
- 0.0f, 0.0f),
- lower_y = ssef(aligned_P0.y * nrdir0.y,
- aligned_P1.y * nrdir1.y,
- 0.0f,
- 0.0f),
- lower_z = ssef(aligned_P0.z * nrdir0.z,
- aligned_P1.z * nrdir1.z,
- 0.0f,
- 0.0f);
-
- ssef upper_x = lower_x - ssef(nrdir0.x, nrdir1.x, 0.0f, 0.0f),
- upper_y = lower_y - ssef(nrdir0.y, nrdir1.y, 0.0f, 0.0f),
- upper_z = lower_z - ssef(nrdir0.z, nrdir1.z, 0.0f, 0.0f);
-
- ssef tnear_x = min(lower_x, upper_x);
- ssef tnear_y = min(lower_y, upper_y);
- ssef tnear_z = min(lower_z, upper_z);
- ssef tfar_x = max(lower_x, upper_x);
- ssef tfar_y = max(lower_y, upper_y);
- ssef tfar_z = max(lower_z, upper_z);
-
- const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
- const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
- sseb vmask = tnear <= tfar;
- dist[0] = tnear.f[0];
- dist[1] = tnear.f[1];
-
- int mask = (int)movemask(vmask);
+ Transform space0 = bvh_unaligned_node_fetch_space(kg, node_addr, 0);
+ Transform space1 = bvh_unaligned_node_fetch_space(kg, node_addr, 1);
+
+ float3 aligned_dir0 = transform_direction(&space0, dir),
+ aligned_dir1 = transform_direction(&space1, dir);
+ float3 aligned_P0 = transform_point(&space0, P), aligned_P1 = transform_point(&space1, P);
+ float3 nrdir0 = -bvh_inverse_direction(aligned_dir0),
+ nrdir1 = -bvh_inverse_direction(aligned_dir1);
+
+ ssef lower_x = ssef(aligned_P0.x * nrdir0.x, aligned_P1.x * nrdir1.x, 0.0f, 0.0f),
+ lower_y = ssef(aligned_P0.y * nrdir0.y, aligned_P1.y * nrdir1.y, 0.0f, 0.0f),
+ lower_z = ssef(aligned_P0.z * nrdir0.z, aligned_P1.z * nrdir1.z, 0.0f, 0.0f);
+
+ ssef upper_x = lower_x - ssef(nrdir0.x, nrdir1.x, 0.0f, 0.0f),
+ upper_y = lower_y - ssef(nrdir0.y, nrdir1.y, 0.0f, 0.0f),
+ upper_z = lower_z - ssef(nrdir0.z, nrdir1.z, 0.0f, 0.0f);
+
+ ssef tnear_x = min(lower_x, upper_x);
+ ssef tnear_y = min(lower_y, upper_y);
+ ssef tnear_z = min(lower_z, upper_z);
+ ssef tfar_x = max(lower_x, upper_x);
+ ssef tfar_y = max(lower_y, upper_y);
+ ssef tfar_z = max(lower_z, upper_z);
+
+ const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
+ const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
+ sseb vmask = tnear <= tfar;
+ dist[0] = tnear.f[0];
+ dist[1] = tnear.f[1];
+
+ int mask = (int)movemask(vmask);
# ifdef __VISIBILITY_FLAG__
- /* this visibility test gives a 5% performance hit, how to solve? */
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
- int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
- (((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
- return cmask;
+ /* this visibility test gives a 5% performance hit, how to solve? */
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
+ int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) |
+ (((mask & 2) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0);
+ return cmask;
# else
- return mask & 3;
+ return mask & 3;
# endif
}
ccl_device_forceinline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg,
const float3 P,
const float3 dir,
- const ssef& isect_near,
- const ssef& isect_far,
+ const ssef &isect_near,
+ const ssef &isect_far,
const float difl,
const int node_addr,
const uint visibility,
float dist[2])
{
- Transform space0 = bvh_unaligned_node_fetch_space(kg, node_addr, 0);
- Transform space1 = bvh_unaligned_node_fetch_space(kg, node_addr, 1);
-
- float3 aligned_dir0 = transform_direction(&space0, dir),
- aligned_dir1 = transform_direction(&space1, dir);
- float3 aligned_P0 = transform_point(&space0, P),
- aligned_P1 = transform_point(&space1, P);
- float3 nrdir0 = -bvh_inverse_direction(aligned_dir0),
- nrdir1 = -bvh_inverse_direction(aligned_dir1);
-
- ssef lower_x = ssef(aligned_P0.x * nrdir0.x,
- aligned_P1.x * nrdir1.x,
- 0.0f, 0.0f),
- lower_y = ssef(aligned_P0.y * nrdir0.y,
- aligned_P1.y * nrdir1.y,
- 0.0f,
- 0.0f),
- lower_z = ssef(aligned_P0.z * nrdir0.z,
- aligned_P1.z * nrdir1.z,
- 0.0f,
- 0.0f);
-
- ssef upper_x = lower_x - ssef(nrdir0.x, nrdir1.x, 0.0f, 0.0f),
- upper_y = lower_y - ssef(nrdir0.y, nrdir1.y, 0.0f, 0.0f),
- upper_z = lower_z - ssef(nrdir0.z, nrdir1.z, 0.0f, 0.0f);
-
- ssef tnear_x = min(lower_x, upper_x);
- ssef tnear_y = min(lower_y, upper_y);
- ssef tnear_z = min(lower_z, upper_z);
- ssef tfar_x = max(lower_x, upper_x);
- ssef tfar_y = max(lower_y, upper_y);
- ssef tfar_z = max(lower_z, upper_z);
-
- const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
- const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
- sseb vmask;
- if(difl != 0.0f) {
- const float round_down = 1.0f - difl;
- const float round_up = 1.0f + difl;
- vmask = round_down*tnear <= round_up*tfar;
- }
- else {
- vmask = tnear <= tfar;
- }
-
- dist[0] = tnear.f[0];
- dist[1] = tnear.f[1];
-
- int mask = (int)movemask(vmask);
+ Transform space0 = bvh_unaligned_node_fetch_space(kg, node_addr, 0);
+ Transform space1 = bvh_unaligned_node_fetch_space(kg, node_addr, 1);
+
+ float3 aligned_dir0 = transform_direction(&space0, dir),
+ aligned_dir1 = transform_direction(&space1, dir);
+ float3 aligned_P0 = transform_point(&space0, P), aligned_P1 = transform_point(&space1, P);
+ float3 nrdir0 = -bvh_inverse_direction(aligned_dir0),
+ nrdir1 = -bvh_inverse_direction(aligned_dir1);
+
+ ssef lower_x = ssef(aligned_P0.x * nrdir0.x, aligned_P1.x * nrdir1.x, 0.0f, 0.0f),
+ lower_y = ssef(aligned_P0.y * nrdir0.y, aligned_P1.y * nrdir1.y, 0.0f, 0.0f),
+ lower_z = ssef(aligned_P0.z * nrdir0.z, aligned_P1.z * nrdir1.z, 0.0f, 0.0f);
+
+ ssef upper_x = lower_x - ssef(nrdir0.x, nrdir1.x, 0.0f, 0.0f),
+ upper_y = lower_y - ssef(nrdir0.y, nrdir1.y, 0.0f, 0.0f),
+ upper_z = lower_z - ssef(nrdir0.z, nrdir1.z, 0.0f, 0.0f);
+
+ ssef tnear_x = min(lower_x, upper_x);
+ ssef tnear_y = min(lower_y, upper_y);
+ ssef tnear_z = min(lower_z, upper_z);
+ ssef tfar_x = max(lower_x, upper_x);
+ ssef tfar_y = max(lower_y, upper_y);
+ ssef tfar_z = max(lower_z, upper_z);
+
+ const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
+ const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
+ sseb vmask;
+ if (difl != 0.0f) {
+ const float round_down = 1.0f - difl;
+ const float round_up = 1.0f + difl;
+ vmask = round_down * tnear <= round_up * tfar;
+ }
+ else {
+ vmask = tnear <= tfar;
+ }
+
+ dist[0] = tnear.f[0];
+ dist[1] = tnear.f[1];
+
+ int mask = (int)movemask(vmask);
# ifdef __VISIBILITY_FLAG__
- /* this visibility test gives a 5% performance hit, how to solve? */
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
- int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
- (((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
- return cmask;
+ /* this visibility test gives a 5% performance hit, how to solve? */
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
+ int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) |
+ (((mask & 2) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0);
+ return cmask;
# else
- return mask & 3;
+ return mask & 3;
# endif
}
ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg,
- const float3& P,
- const float3& dir,
- const ssef& isect_near,
- const ssef& isect_far,
- const ssef& tsplat,
+ const float3 &P,
+ const float3 &dir,
+ const ssef &isect_near,
+ const ssef &isect_far,
+ const ssef &tsplat,
const ssef Psplat[3],
const ssef idirsplat[3],
const shuffle_swap_t shufflexyz[3],
@@ -585,37 +533,23 @@ ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg,
const uint visibility,
float dist[2])
{
- float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
- if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
- return bvh_unaligned_node_intersect(kg,
- P,
- dir,
- isect_near,
- isect_far,
- node_addr,
- visibility,
- dist);
- }
- else {
- return bvh_aligned_node_intersect(kg,
- P,
- dir,
- tsplat,
- Psplat,
- idirsplat,
- shufflexyz,
- node_addr,
- visibility,
- dist);
- }
+ float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
+ if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
+ return bvh_unaligned_node_intersect(
+ kg, P, dir, isect_near, isect_far, node_addr, visibility, dist);
+ }
+ else {
+ return bvh_aligned_node_intersect(
+ kg, P, dir, tsplat, Psplat, idirsplat, shufflexyz, node_addr, visibility, dist);
+ }
}
ccl_device_forceinline int bvh_node_intersect_robust(KernelGlobals *kg,
- const float3& P,
- const float3& dir,
- const ssef& isect_near,
- const ssef& isect_far,
- const ssef& tsplat,
+ const float3 &P,
+ const float3 &dir,
+ const ssef &isect_near,
+ const ssef &isect_far,
+ const ssef &tsplat,
const ssef Psplat[3],
const ssef idirsplat[3],
const shuffle_swap_t shufflexyz[3],
@@ -625,31 +559,24 @@ ccl_device_forceinline int bvh_node_intersect_robust(KernelGlobals *kg,
const uint visibility,
float dist[2])
{
- float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
- if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
- return bvh_unaligned_node_intersect_robust(kg,
- P,
- dir,
- isect_near,
- isect_far,
- difl,
- node_addr,
- visibility,
- dist);
- }
- else {
- return bvh_aligned_node_intersect_robust(kg,
- P,
- dir,
- tsplat,
- Psplat,
- idirsplat,
- shufflexyz,
- difl,
- extmax,
- node_addr,
- visibility,
- dist);
- }
+ float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
+ if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
+ return bvh_unaligned_node_intersect_robust(
+ kg, P, dir, isect_near, isect_far, difl, node_addr, visibility, dist);
+ }
+ else {
+ return bvh_aligned_node_intersect_robust(kg,
+ P,
+ dir,
+ tsplat,
+ Psplat,
+ idirsplat,
+ shufflexyz,
+ difl,
+ extmax,
+ node_addr,
+ visibility,
+ dist);
+ }
}
-#endif /* !defined(__KERNEL_SSE2__) */
+#endif /* !defined(__KERNEL_SSE2__) */
diff --git a/intern/cycles/kernel/bvh/bvh_shadow_all.h b/intern/cycles/kernel/bvh/bvh_shadow_all.h
index d8e089711ee..b362779549c 100644
--- a/intern/cycles/kernel/bvh/bvh_shadow_all.h
+++ b/intern/cycles/kernel/bvh/bvh_shadow_all.h
@@ -19,9 +19,9 @@
#ifdef __QBVH__
# include "kernel/bvh/qbvh_shadow_all.h"
-#ifdef __KERNEL_AVX2__
-# include "kernel/bvh/obvh_shadow_all.h"
-#endif
+# ifdef __KERNEL_AVX2__
+# include "kernel/bvh/obvh_shadow_all.h"
+# endif
#endif
#if BVH_FEATURE(BVH_HAIR)
@@ -44,350 +44,340 @@ ccl_device
#else
ccl_device_inline
#endif
-bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
- const Ray *ray,
- Intersection *isect_array,
- const uint visibility,
- const uint max_hits,
- uint *num_hits)
+ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
+ const Ray *ray,
+ Intersection *isect_array,
+ const uint visibility,
+ const uint max_hits,
+ uint *num_hits)
{
- /* todo:
- * - likely and unlikely for if() statements
- * - test restrict attribute for pointers
- */
-
- /* traversal stack in CUDA thread-local memory */
- int traversal_stack[BVH_STACK_SIZE];
- traversal_stack[0] = ENTRYPOINT_SENTINEL;
-
- /* traversal variables in registers */
- int stack_ptr = 0;
- int node_addr = kernel_data.bvh.root;
-
- /* ray parameters in registers */
- const float tmax = ray->t;
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
- float isect_t = tmax;
+ /* todo:
+ * - likely and unlikely for if() statements
+ * - test restrict attribute for pointers
+ */
+
+ /* traversal stack in CUDA thread-local memory */
+ int traversal_stack[BVH_STACK_SIZE];
+ traversal_stack[0] = ENTRYPOINT_SENTINEL;
+
+ /* traversal variables in registers */
+ int stack_ptr = 0;
+ int node_addr = kernel_data.bvh.root;
+
+ /* ray parameters in registers */
+ const float tmax = ray->t;
+ float3 P = ray->P;
+ float3 dir = bvh_clamp_direction(ray->D);
+ float3 idir = bvh_inverse_direction(dir);
+ int object = OBJECT_NONE;
+ float isect_t = tmax;
#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
+ Transform ob_itfm;
#endif
#if BVH_FEATURE(BVH_INSTANCING)
- int num_hits_in_instance = 0;
+ int num_hits_in_instance = 0;
#endif
- *num_hits = 0;
- isect_array->t = tmax;
+ *num_hits = 0;
+ isect_array->t = tmax;
#if defined(__KERNEL_SSE2__)
- const shuffle_swap_t shuf_identity = shuffle_swap_identity();
- const shuffle_swap_t shuf_swap = shuffle_swap_swap();
+ const shuffle_swap_t shuf_identity = shuffle_swap_identity();
+ const shuffle_swap_t shuf_swap = shuffle_swap_swap();
- const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
- ssef Psplat[3], idirsplat[3];
+ const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
+ ssef Psplat[3], idirsplat[3];
# if BVH_FEATURE(BVH_HAIR)
- ssef tnear(0.0f), tfar(isect_t);
+ ssef tnear(0.0f), tfar(isect_t);
# endif
- shuffle_swap_t shufflexyz[3];
+ shuffle_swap_t shufflexyz[3];
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
+ Psplat[0] = ssef(P.x);
+ Psplat[1] = ssef(P.y);
+ Psplat[2] = ssef(P.z);
- ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t);
+ ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t);
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-#endif /* __KERNEL_SSE2__ */
+ gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
+#endif /* __KERNEL_SSE2__ */
- /* traversal loop */
- do {
- do {
- /* traverse internal nodes */
- while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
- int node_addr_child1, traverse_mask;
- float dist[2];
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
+ /* traversal loop */
+ do {
+ do {
+ /* traverse internal nodes */
+ while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
+ int node_addr_child1, traverse_mask;
+ float dist[2];
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
#if !defined(__KERNEL_SSE2__)
- traverse_mask = NODE_INTERSECT(kg,
- P,
+ traverse_mask = NODE_INTERSECT(kg,
+ P,
# if BVH_FEATURE(BVH_HAIR)
- dir,
+ dir,
# endif
- idir,
- isect_t,
- node_addr,
- visibility,
- dist);
+ idir,
+ isect_t,
+ node_addr,
+ visibility,
+ dist);
#else // __KERNEL_SSE2__
- traverse_mask = NODE_INTERSECT(kg,
- P,
- dir,
+ traverse_mask = NODE_INTERSECT(kg,
+ P,
+ dir,
# if BVH_FEATURE(BVH_HAIR)
- tnear,
- tfar,
+ tnear,
+ tfar,
# endif
- tsplat,
- Psplat,
- idirsplat,
- shufflexyz,
- node_addr,
- visibility,
- dist);
+ tsplat,
+ Psplat,
+ idirsplat,
+ shufflexyz,
+ node_addr,
+ visibility,
+ dist);
#endif // __KERNEL_SSE2__
- node_addr = __float_as_int(cnodes.z);
- node_addr_child1 = __float_as_int(cnodes.w);
-
- if(traverse_mask == 3) {
- /* Both children were intersected, push the farther one. */
- bool is_closest_child1 = (dist[1] < dist[0]);
- if(is_closest_child1) {
- int tmp = node_addr;
- node_addr = node_addr_child1;
- node_addr_child1 = tmp;
- }
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_STACK_SIZE);
- traversal_stack[stack_ptr] = node_addr_child1;
- }
- else {
- /* One child was intersected. */
- if(traverse_mask == 2) {
- node_addr = node_addr_child1;
- }
- else if(traverse_mask == 0) {
- /* Neither child was intersected. */
- node_addr = traversal_stack[stack_ptr];
- --stack_ptr;
- }
- }
- }
-
- /* if node is leaf, fetch triangle list */
- if(node_addr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
- int prim_addr = __float_as_int(leaf.x);
+ node_addr = __float_as_int(cnodes.z);
+ node_addr_child1 = __float_as_int(cnodes.w);
+
+ if (traverse_mask == 3) {
+ /* Both children were intersected, push the farther one. */
+ bool is_closest_child1 = (dist[1] < dist[0]);
+ if (is_closest_child1) {
+ int tmp = node_addr;
+ node_addr = node_addr_child1;
+ node_addr_child1 = tmp;
+ }
+
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_STACK_SIZE);
+ traversal_stack[stack_ptr] = node_addr_child1;
+ }
+ else {
+ /* One child was intersected. */
+ if (traverse_mask == 2) {
+ node_addr = node_addr_child1;
+ }
+ else if (traverse_mask == 0) {
+ /* Neither child was intersected. */
+ node_addr = traversal_stack[stack_ptr];
+ --stack_ptr;
+ }
+ }
+ }
+
+ /* if node is leaf, fetch triangle list */
+ if (node_addr < 0) {
+ float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
+ int prim_addr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
- if(prim_addr >= 0) {
+ if (prim_addr >= 0) {
#endif
- const int prim_addr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
- const uint p_type = type & PRIMITIVE_ALL;
-
- /* pop */
- node_addr = traversal_stack[stack_ptr];
- --stack_ptr;
-
- /* primitive intersection */
- while(prim_addr < prim_addr2) {
- kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type);
- bool hit;
-
- /* todo: specialized intersect functions which don't fill in
- * isect unless needed and check SD_HAS_TRANSPARENT_SHADOW?
- * might give a few % performance improvement */
-
- switch(p_type) {
- case PRIMITIVE_TRIANGLE: {
- hit = triangle_intersect(kg,
- isect_array,
- P,
- dir,
- visibility,
- object,
- prim_addr);
- break;
- }
+ const int prim_addr2 = __float_as_int(leaf.y);
+ const uint type = __float_as_int(leaf.w);
+ const uint p_type = type & PRIMITIVE_ALL;
+
+ /* pop */
+ node_addr = traversal_stack[stack_ptr];
+ --stack_ptr;
+
+ /* primitive intersection */
+ while (prim_addr < prim_addr2) {
+ kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type);
+ bool hit;
+
+ /* todo: specialized intersect functions which don't fill in
+ * isect unless needed and check SD_HAS_TRANSPARENT_SHADOW?
+ * might give a few % performance improvement */
+
+ switch (p_type) {
+ case PRIMITIVE_TRIANGLE: {
+ hit = triangle_intersect(kg, isect_array, P, dir, visibility, object, prim_addr);
+ break;
+ }
#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- hit = motion_triangle_intersect(kg,
- isect_array,
- P,
- dir,
- ray->time,
- visibility,
- object,
- prim_addr);
- break;
- }
+ case PRIMITIVE_MOTION_TRIANGLE: {
+ hit = motion_triangle_intersect(
+ kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
+ break;
+ }
#endif
#if BVH_FEATURE(BVH_HAIR)
- case PRIMITIVE_CURVE:
- case PRIMITIVE_MOTION_CURVE: {
- const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
- if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
- hit = cardinal_curve_intersect(kg,
- isect_array,
- P,
- dir,
- visibility,
- object,
- prim_addr,
- ray->time,
- curve_type,
- NULL,
- 0, 0);
- }
- else {
- hit = curve_intersect(kg,
- isect_array,
- P,
- dir,
- visibility,
- object,
- prim_addr,
- ray->time,
- curve_type,
- NULL,
- 0, 0);
- }
- break;
- }
+ case PRIMITIVE_CURVE:
+ case PRIMITIVE_MOTION_CURVE: {
+ const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
+ if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
+ hit = cardinal_curve_intersect(kg,
+ isect_array,
+ P,
+ dir,
+ visibility,
+ object,
+ prim_addr,
+ ray->time,
+ curve_type,
+ NULL,
+ 0,
+ 0);
+ }
+ else {
+ hit = curve_intersect(kg,
+ isect_array,
+ P,
+ dir,
+ visibility,
+ object,
+ prim_addr,
+ ray->time,
+ curve_type,
+ NULL,
+ 0,
+ 0);
+ }
+ break;
+ }
#endif
- default: {
- hit = false;
- break;
- }
- }
+ default: {
+ hit = false;
+ break;
+ }
+ }
- /* shadow ray early termination */
- if(hit) {
- /* detect if this surface has a shader with transparent shadows */
+ /* shadow ray early termination */
+ if (hit) {
+ /* detect if this surface has a shader with transparent shadows */
- /* todo: optimize so primitive visibility flag indicates if
- * the primitive has a transparent shadow shader? */
- int prim = kernel_tex_fetch(__prim_index, isect_array->prim);
- int shader = 0;
+ /* todo: optimize so primitive visibility flag indicates if
+ * the primitive has a transparent shadow shader? */
+ int prim = kernel_tex_fetch(__prim_index, isect_array->prim);
+ int shader = 0;
#ifdef __HAIR__
- if(kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
+ if (kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
#endif
- {
- shader = kernel_tex_fetch(__tri_shader, prim);
- }
+ {
+ shader = kernel_tex_fetch(__tri_shader, prim);
+ }
#ifdef __HAIR__
- else {
- float4 str = kernel_tex_fetch(__curves, prim);
- shader = __float_as_int(str.z);
- }
+ else {
+ float4 str = kernel_tex_fetch(__curves, prim);
+ shader = __float_as_int(str.z);
+ }
#endif
- int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
-
- /* if no transparent shadows, all light is blocked */
- if(!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
- return true;
- }
- /* if maximum number of hits reached, block all light */
- else if(*num_hits == max_hits) {
- return true;
- }
-
- /* move on to next entry in intersections array */
- isect_array++;
- (*num_hits)++;
+ int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
+
+ /* if no transparent shadows, all light is blocked */
+ if (!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
+ return true;
+ }
+ /* if maximum number of hits reached, block all light */
+ else if (*num_hits == max_hits) {
+ return true;
+ }
+
+ /* move on to next entry in intersections array */
+ isect_array++;
+ (*num_hits)++;
#if BVH_FEATURE(BVH_INSTANCING)
- num_hits_in_instance++;
+ num_hits_in_instance++;
#endif
- isect_array->t = isect_t;
- }
+ isect_array->t = isect_t;
+ }
- prim_addr++;
- }
- }
+ prim_addr++;
+ }
+ }
#if BVH_FEATURE(BVH_INSTANCING)
- else {
- /* instance push */
- object = kernel_tex_fetch(__prim_object, -prim_addr-1);
+ else {
+ /* instance push */
+ object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
# if BVH_FEATURE(BVH_MOTION)
- isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
+ isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
# else
- isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
+ isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
# endif
- num_hits_in_instance = 0;
- isect_array->t = isect_t;
+ num_hits_in_instance = 0;
+ isect_array->t = isect_t;
# if defined(__KERNEL_SSE2__)
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
+ Psplat[0] = ssef(P.x);
+ Psplat[1] = ssef(P.y);
+ Psplat[2] = ssef(P.z);
- tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
+ tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect_t);
+ tfar = ssef(isect_t);
# endif
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
+ gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
# endif
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_STACK_SIZE);
- traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_STACK_SIZE);
+ traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
- node_addr = kernel_tex_fetch(__object_node, object);
- }
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while(node_addr != ENTRYPOINT_SENTINEL);
+ node_addr = kernel_tex_fetch(__object_node, object);
+ }
+ }
+#endif /* FEATURE(BVH_INSTANCING) */
+ } while (node_addr != ENTRYPOINT_SENTINEL);
#if BVH_FEATURE(BVH_INSTANCING)
- if(stack_ptr >= 0) {
- kernel_assert(object != OBJECT_NONE);
+ if (stack_ptr >= 0) {
+ kernel_assert(object != OBJECT_NONE);
- /* Instance pop. */
- if(num_hits_in_instance) {
- float t_fac;
+ /* Instance pop. */
+ if (num_hits_in_instance) {
+ float t_fac;
# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
+ bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
# else
- bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
+ bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
# endif
- /* scale isect->t to adjust for instancing */
- for(int i = 0; i < num_hits_in_instance; i++) {
- (isect_array-i-1)->t *= t_fac;
- }
- }
- else {
+ /* scale isect->t to adjust for instancing */
+ for (int i = 0; i < num_hits_in_instance; i++) {
+ (isect_array - i - 1)->t *= t_fac;
+ }
+ }
+ else {
# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
+ bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
# else
- bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
+ bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
# endif
- }
+ }
- isect_t = tmax;
- isect_array->t = isect_t;
+ isect_t = tmax;
+ isect_array->t = isect_t;
# if defined(__KERNEL_SSE2__)
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
+ Psplat[0] = ssef(P.x);
+ Psplat[1] = ssef(P.y);
+ Psplat[2] = ssef(P.z);
- tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
+ tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect_t);
+ tfar = ssef(isect_t);
# endif
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
+ gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
# endif
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr];
- --stack_ptr;
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while(node_addr != ENTRYPOINT_SENTINEL);
+ object = OBJECT_NONE;
+ node_addr = traversal_stack[stack_ptr];
+ --stack_ptr;
+ }
+#endif /* FEATURE(BVH_INSTANCING) */
+ } while (node_addr != ENTRYPOINT_SENTINEL);
- return false;
+ return false;
}
ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
@@ -397,35 +387,20 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
const uint max_hits,
uint *num_hits)
{
- switch(kernel_data.bvh.bvh_layout) {
+ switch (kernel_data.bvh.bvh_layout) {
#ifdef __KERNEL_AVX2__
- case BVH_LAYOUT_BVH8:
- return BVH_FUNCTION_FULL_NAME(OBVH)(kg,
- ray,
- isect_array,
- visibility,
- max_hits,
- num_hits);
+ case BVH_LAYOUT_BVH8:
+ return BVH_FUNCTION_FULL_NAME(OBVH)(kg, ray, isect_array, visibility, max_hits, num_hits);
#endif
#ifdef __QBVH__
- case BVH_LAYOUT_BVH4:
- return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
- ray,
- isect_array,
- visibility,
- max_hits,
- num_hits);
+ case BVH_LAYOUT_BVH4:
+ return BVH_FUNCTION_FULL_NAME(QBVH)(kg, ray, isect_array, visibility, max_hits, num_hits);
#endif
- case BVH_LAYOUT_BVH2:
- return BVH_FUNCTION_FULL_NAME(BVH)(kg,
- ray,
- isect_array,
- visibility,
- max_hits,
- num_hits);
- }
- kernel_assert(!"Should not happen");
- return false;
+ case BVH_LAYOUT_BVH2:
+ return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect_array, visibility, max_hits, num_hits);
+ }
+ kernel_assert(!"Should not happen");
+ return false;
}
#undef BVH_FUNCTION_NAME
diff --git a/intern/cycles/kernel/bvh/bvh_traversal.h b/intern/cycles/kernel/bvh/bvh_traversal.h
index 76d4cab663d..34a06d003bb 100644
--- a/intern/cycles/kernel/bvh/bvh_traversal.h
+++ b/intern/cycles/kernel/bvh/bvh_traversal.h
@@ -47,374 +47,362 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
Intersection *isect,
const uint visibility
#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
- , uint *lcg_state,
+ ,
+ uint *lcg_state,
float difl,
float extmax
#endif
- )
+)
{
- /* todo:
- * - test if pushing distance on the stack helps (for non shadow rays)
- * - separate version for shadow rays
- * - likely and unlikely for if() statements
- * - test restrict attribute for pointers
- */
-
- /* traversal stack in CUDA thread-local memory */
- int traversal_stack[BVH_STACK_SIZE];
- traversal_stack[0] = ENTRYPOINT_SENTINEL;
-
- /* traversal variables in registers */
- int stack_ptr = 0;
- int node_addr = kernel_data.bvh.root;
-
- /* ray parameters in registers */
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
+ /* todo:
+ * - test if pushing distance on the stack helps (for non shadow rays)
+ * - separate version for shadow rays
+ * - likely and unlikely for if() statements
+ * - test restrict attribute for pointers
+ */
+
+ /* traversal stack in CUDA thread-local memory */
+ int traversal_stack[BVH_STACK_SIZE];
+ traversal_stack[0] = ENTRYPOINT_SENTINEL;
+
+ /* traversal variables in registers */
+ int stack_ptr = 0;
+ int node_addr = kernel_data.bvh.root;
+
+ /* ray parameters in registers */
+ float3 P = ray->P;
+ float3 dir = bvh_clamp_direction(ray->D);
+ float3 idir = bvh_inverse_direction(dir);
+ int object = OBJECT_NONE;
#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
+ Transform ob_itfm;
#endif
- isect->t = ray->t;
- isect->u = 0.0f;
- isect->v = 0.0f;
- isect->prim = PRIM_NONE;
- isect->object = OBJECT_NONE;
+ isect->t = ray->t;
+ isect->u = 0.0f;
+ isect->v = 0.0f;
+ isect->prim = PRIM_NONE;
+ isect->object = OBJECT_NONE;
- BVH_DEBUG_INIT();
+ BVH_DEBUG_INIT();
#if defined(__KERNEL_SSE2__)
- const shuffle_swap_t shuf_identity = shuffle_swap_identity();
- const shuffle_swap_t shuf_swap = shuffle_swap_swap();
+ const shuffle_swap_t shuf_identity = shuffle_swap_identity();
+ const shuffle_swap_t shuf_swap = shuffle_swap_swap();
- const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
- ssef Psplat[3], idirsplat[3];
+ const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
+ ssef Psplat[3], idirsplat[3];
# if BVH_FEATURE(BVH_HAIR)
- ssef tnear(0.0f), tfar(isect->t);
+ ssef tnear(0.0f), tfar(isect->t);
# endif
- shuffle_swap_t shufflexyz[3];
+ shuffle_swap_t shufflexyz[3];
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
+ Psplat[0] = ssef(P.x);
+ Psplat[1] = ssef(P.y);
+ Psplat[2] = ssef(P.z);
- ssef tsplat(0.0f, 0.0f, -isect->t, -isect->t);
+ ssef tsplat(0.0f, 0.0f, -isect->t, -isect->t);
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
+ gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
#endif
- /* traversal loop */
- do {
- do {
- /* traverse internal nodes */
- while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
- int node_addr_child1, traverse_mask;
- float dist[2];
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
+ /* traversal loop */
+ do {
+ do {
+ /* traverse internal nodes */
+ while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
+ int node_addr_child1, traverse_mask;
+ float dist[2];
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
#if !defined(__KERNEL_SSE2__)
# if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
- if(difl != 0.0f) {
- traverse_mask = NODE_INTERSECT_ROBUST(kg,
- P,
+ if (difl != 0.0f) {
+ traverse_mask = NODE_INTERSECT_ROBUST(kg,
+ P,
# if BVH_FEATURE(BVH_HAIR)
- dir,
+ dir,
# endif
- idir,
- isect->t,
- difl,
- extmax,
- node_addr,
- visibility,
- dist);
- }
- else
+ idir,
+ isect->t,
+ difl,
+ extmax,
+ node_addr,
+ visibility,
+ dist);
+ }
+ else
# endif
- {
- traverse_mask = NODE_INTERSECT(kg,
- P,
-# if BVH_FEATURE(BVH_HAIR)
- dir,
-# endif
- idir,
- isect->t,
- node_addr,
- visibility,
- dist);
- }
+ {
+ traverse_mask = NODE_INTERSECT(kg,
+ P,
+# if BVH_FEATURE(BVH_HAIR)
+ dir,
+# endif
+ idir,
+ isect->t,
+ node_addr,
+ visibility,
+ dist);
+ }
#else // __KERNEL_SSE2__
# if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
- if(difl != 0.0f) {
- traverse_mask = NODE_INTERSECT_ROBUST(kg,
- P,
- dir,
+ if (difl != 0.0f) {
+ traverse_mask = NODE_INTERSECT_ROBUST(kg,
+ P,
+ dir,
# if BVH_FEATURE(BVH_HAIR)
- tnear,
- tfar,
+ tnear,
+ tfar,
# endif
- tsplat,
- Psplat,
- idirsplat,
- shufflexyz,
- difl,
- extmax,
- node_addr,
- visibility,
- dist);
- }
- else
+ tsplat,
+ Psplat,
+ idirsplat,
+ shufflexyz,
+ difl,
+ extmax,
+ node_addr,
+ visibility,
+ dist);
+ }
+ else
# endif
- {
- traverse_mask = NODE_INTERSECT(kg,
- P,
- dir,
-# if BVH_FEATURE(BVH_HAIR)
- tnear,
- tfar,
-# endif
- tsplat,
- Psplat,
- idirsplat,
- shufflexyz,
- node_addr,
- visibility,
- dist);
- }
+ {
+ traverse_mask = NODE_INTERSECT(kg,
+ P,
+ dir,
+# if BVH_FEATURE(BVH_HAIR)
+ tnear,
+ tfar,
+# endif
+ tsplat,
+ Psplat,
+ idirsplat,
+ shufflexyz,
+ node_addr,
+ visibility,
+ dist);
+ }
#endif // __KERNEL_SSE2__
- node_addr = __float_as_int(cnodes.z);
- node_addr_child1 = __float_as_int(cnodes.w);
-
- if(traverse_mask == 3) {
- /* Both children were intersected, push the farther one. */
- bool is_closest_child1 = (dist[1] < dist[0]);
- if(is_closest_child1) {
- int tmp = node_addr;
- node_addr = node_addr_child1;
- node_addr_child1 = tmp;
- }
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_STACK_SIZE);
- traversal_stack[stack_ptr] = node_addr_child1;
- }
- else {
- /* One child was intersected. */
- if(traverse_mask == 2) {
- node_addr = node_addr_child1;
- }
- else if(traverse_mask == 0) {
- /* Neither child was intersected. */
- node_addr = traversal_stack[stack_ptr];
- --stack_ptr;
- }
- }
- BVH_DEBUG_NEXT_NODE();
- }
-
- /* if node is leaf, fetch triangle list */
- if(node_addr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
- int prim_addr = __float_as_int(leaf.x);
+ node_addr = __float_as_int(cnodes.z);
+ node_addr_child1 = __float_as_int(cnodes.w);
+
+ if (traverse_mask == 3) {
+ /* Both children were intersected, push the farther one. */
+ bool is_closest_child1 = (dist[1] < dist[0]);
+ if (is_closest_child1) {
+ int tmp = node_addr;
+ node_addr = node_addr_child1;
+ node_addr_child1 = tmp;
+ }
+
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_STACK_SIZE);
+ traversal_stack[stack_ptr] = node_addr_child1;
+ }
+ else {
+ /* One child was intersected. */
+ if (traverse_mask == 2) {
+ node_addr = node_addr_child1;
+ }
+ else if (traverse_mask == 0) {
+ /* Neither child was intersected. */
+ node_addr = traversal_stack[stack_ptr];
+ --stack_ptr;
+ }
+ }
+ BVH_DEBUG_NEXT_NODE();
+ }
+
+ /* if node is leaf, fetch triangle list */
+ if (node_addr < 0) {
+ float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
+ int prim_addr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
- if(prim_addr >= 0) {
+ if (prim_addr >= 0) {
#endif
- const int prim_addr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
-
- /* pop */
- node_addr = traversal_stack[stack_ptr];
- --stack_ptr;
-
- /* primitive intersection */
- switch(type & PRIMITIVE_ALL) {
- case PRIMITIVE_TRIANGLE: {
- for(; prim_addr < prim_addr2; prim_addr++) {
- BVH_DEBUG_NEXT_INTERSECTION();
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- if(triangle_intersect(kg,
- isect,
- P,
- dir,
- visibility,
- object,
- prim_addr))
- {
- /* shadow ray early termination */
+ const int prim_addr2 = __float_as_int(leaf.y);
+ const uint type = __float_as_int(leaf.w);
+
+ /* pop */
+ node_addr = traversal_stack[stack_ptr];
+ --stack_ptr;
+
+ /* primitive intersection */
+ switch (type & PRIMITIVE_ALL) {
+ case PRIMITIVE_TRIANGLE: {
+ for (; prim_addr < prim_addr2; prim_addr++) {
+ BVH_DEBUG_NEXT_INTERSECTION();
+ kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+ if (triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr)) {
+ /* shadow ray early termination */
#if defined(__KERNEL_SSE2__)
- if(visibility & PATH_RAY_SHADOW_OPAQUE)
- return true;
- tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
+ if (visibility & PATH_RAY_SHADOW_OPAQUE)
+ return true;
+ tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect->t);
+ tfar = ssef(isect->t);
# endif
#else
- if(visibility & PATH_RAY_SHADOW_OPAQUE)
- return true;
+ if (visibility & PATH_RAY_SHADOW_OPAQUE)
+ return true;
#endif
- }
- }
- break;
- }
+ }
+ }
+ break;
+ }
#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- for(; prim_addr < prim_addr2; prim_addr++) {
- BVH_DEBUG_NEXT_INTERSECTION();
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- if(motion_triangle_intersect(kg,
- isect,
- P,
- dir,
- ray->time,
- visibility,
- object,
- prim_addr))
- {
- /* shadow ray early termination */
+ case PRIMITIVE_MOTION_TRIANGLE: {
+ for (; prim_addr < prim_addr2; prim_addr++) {
+ BVH_DEBUG_NEXT_INTERSECTION();
+ kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+ if (motion_triangle_intersect(
+ kg, isect, P, dir, ray->time, visibility, object, prim_addr)) {
+ /* shadow ray early termination */
# if defined(__KERNEL_SSE2__)
- if(visibility & PATH_RAY_SHADOW_OPAQUE)
- return true;
- tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
+ if (visibility & PATH_RAY_SHADOW_OPAQUE)
+ return true;
+ tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect->t);
+ tfar = ssef(isect->t);
# endif
# else
- if(visibility & PATH_RAY_SHADOW_OPAQUE)
- return true;
+ if (visibility & PATH_RAY_SHADOW_OPAQUE)
+ return true;
# endif
- }
- }
- break;
- }
-#endif /* BVH_FEATURE(BVH_MOTION) */
+ }
+ }
+ break;
+ }
+#endif /* BVH_FEATURE(BVH_MOTION) */
#if BVH_FEATURE(BVH_HAIR)
- case PRIMITIVE_CURVE:
- case PRIMITIVE_MOTION_CURVE: {
- for(; prim_addr < prim_addr2; prim_addr++) {
- BVH_DEBUG_NEXT_INTERSECTION();
- const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
- kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL));
- bool hit;
- if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
- hit = cardinal_curve_intersect(kg,
- isect,
- P,
- dir,
- visibility,
- object,
- prim_addr,
- ray->time,
- curve_type,
- lcg_state,
- difl,
- extmax);
- }
- else {
- hit = curve_intersect(kg,
- isect,
- P,
- dir,
- visibility,
- object,
- prim_addr,
- ray->time,
- curve_type,
- lcg_state,
- difl,
- extmax);
- }
- if(hit) {
- /* shadow ray early termination */
+ case PRIMITIVE_CURVE:
+ case PRIMITIVE_MOTION_CURVE: {
+ for (; prim_addr < prim_addr2; prim_addr++) {
+ BVH_DEBUG_NEXT_INTERSECTION();
+ const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
+ kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL));
+ bool hit;
+ if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
+ hit = cardinal_curve_intersect(kg,
+ isect,
+ P,
+ dir,
+ visibility,
+ object,
+ prim_addr,
+ ray->time,
+ curve_type,
+ lcg_state,
+ difl,
+ extmax);
+ }
+ else {
+ hit = curve_intersect(kg,
+ isect,
+ P,
+ dir,
+ visibility,
+ object,
+ prim_addr,
+ ray->time,
+ curve_type,
+ lcg_state,
+ difl,
+ extmax);
+ }
+ if (hit) {
+ /* shadow ray early termination */
# if defined(__KERNEL_SSE2__)
- if(visibility & PATH_RAY_SHADOW_OPAQUE)
- return true;
- tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
+ if (visibility & PATH_RAY_SHADOW_OPAQUE)
+ return true;
+ tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect->t);
+ tfar = ssef(isect->t);
# endif
# else
- if(visibility & PATH_RAY_SHADOW_OPAQUE)
- return true;
+ if (visibility & PATH_RAY_SHADOW_OPAQUE)
+ return true;
# endif
- }
- }
- break;
- }
-#endif /* BVH_FEATURE(BVH_HAIR) */
- }
- }
+ }
+ }
+ break;
+ }
+#endif /* BVH_FEATURE(BVH_HAIR) */
+ }
+ }
#if BVH_FEATURE(BVH_INSTANCING)
- else {
- /* instance push */
- object = kernel_tex_fetch(__prim_object, -prim_addr-1);
+ else {
+ /* instance push */
+ object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
# if BVH_FEATURE(BVH_MOTION)
- isect->t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
+ isect->t = bvh_instance_motion_push(
+ kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
# else
- isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
+ isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
# endif
# if defined(__KERNEL_SSE2__)
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
+ Psplat[0] = ssef(P.x);
+ Psplat[1] = ssef(P.y);
+ Psplat[2] = ssef(P.z);
- tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
+ tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect->t);
+ tfar = ssef(isect->t);
# endif
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
+ gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
# endif
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_STACK_SIZE);
- traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_STACK_SIZE);
+ traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
- node_addr = kernel_tex_fetch(__object_node, object);
+ node_addr = kernel_tex_fetch(__object_node, object);
- BVH_DEBUG_NEXT_INSTANCE();
- }
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while(node_addr != ENTRYPOINT_SENTINEL);
+ BVH_DEBUG_NEXT_INSTANCE();
+ }
+ }
+#endif /* FEATURE(BVH_INSTANCING) */
+ } while (node_addr != ENTRYPOINT_SENTINEL);
#if BVH_FEATURE(BVH_INSTANCING)
- if(stack_ptr >= 0) {
- kernel_assert(object != OBJECT_NONE);
+ if (stack_ptr >= 0) {
+ kernel_assert(object != OBJECT_NONE);
- /* instance pop */
+ /* instance pop */
# if BVH_FEATURE(BVH_MOTION)
- isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
+ isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
# else
- isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
+ isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
# endif
# if defined(__KERNEL_SSE2__)
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
+ Psplat[0] = ssef(P.x);
+ Psplat[1] = ssef(P.y);
+ Psplat[2] = ssef(P.z);
- tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
+ tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect->t);
+ tfar = ssef(isect->t);
# endif
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
+ gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
# endif
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr];
- --stack_ptr;
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while(node_addr != ENTRYPOINT_SENTINEL);
+ object = OBJECT_NONE;
+ node_addr = traversal_stack[stack_ptr];
+ --stack_ptr;
+ }
+#endif /* FEATURE(BVH_INSTANCING) */
+ } while (node_addr != ENTRYPOINT_SENTINEL);
- return (isect->prim != PRIM_NONE);
+ return (isect->prim != PRIM_NONE);
}
ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
@@ -422,53 +410,57 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
Intersection *isect,
const uint visibility
#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
- , uint *lcg_state,
+ ,
+ uint *lcg_state,
float difl,
float extmax
#endif
- )
+)
{
- switch(kernel_data.bvh.bvh_layout) {
+ switch (kernel_data.bvh.bvh_layout) {
#ifdef __KERNEL_AVX2__
- case BVH_LAYOUT_BVH8:
- return BVH_FUNCTION_FULL_NAME(OBVH)(kg,
- ray,
- isect,
- visibility
+ case BVH_LAYOUT_BVH8:
+ return BVH_FUNCTION_FULL_NAME(OBVH)(kg,
+ ray,
+ isect,
+ visibility
# if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
- , lcg_state,
- difl,
- extmax
+ ,
+ lcg_state,
+ difl,
+ extmax
# endif
- );
+ );
#endif
#ifdef __QBVH__
- case BVH_LAYOUT_BVH4:
- return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
- ray,
- isect,
- visibility
+ case BVH_LAYOUT_BVH4:
+ return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
+ ray,
+ isect,
+ visibility
# if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
- , lcg_state,
- difl,
- extmax
+ ,
+ lcg_state,
+ difl,
+ extmax
# endif
- );
-#endif /* __QBVH__ */
- case BVH_LAYOUT_BVH2:
- return BVH_FUNCTION_FULL_NAME(BVH)(kg,
- ray,
- isect,
- visibility
+ );
+#endif /* __QBVH__ */
+ case BVH_LAYOUT_BVH2:
+ return BVH_FUNCTION_FULL_NAME(BVH)(kg,
+ ray,
+ isect,
+ visibility
#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
- , lcg_state,
- difl,
- extmax
+ ,
+ lcg_state,
+ difl,
+ extmax
#endif
- );
- }
- kernel_assert(!"Should not happen");
- return false;
+ );
+ }
+ kernel_assert(!"Should not happen");
+ return false;
}
#undef BVH_FUNCTION_NAME
diff --git a/intern/cycles/kernel/bvh/bvh_types.h b/intern/cycles/kernel/bvh/bvh_types.h
index 4ca0dc2225e..16f3b03f842 100644
--- a/intern/cycles/kernel/bvh/bvh_types.h
+++ b/intern/cycles/kernel/bvh/bvh_types.h
@@ -35,13 +35,13 @@ CCL_NAMESPACE_BEGIN
#define BVH_OSTACK_SIZE 768
/* BVH intersection function variations */
-#define BVH_INSTANCING 1
-#define BVH_MOTION 2
-#define BVH_HAIR 4
-#define BVH_HAIR_MINIMUM_WIDTH 8
+#define BVH_INSTANCING 1
+#define BVH_MOTION 2
+#define BVH_HAIR 4
+#define BVH_HAIR_MINIMUM_WIDTH 8
-#define BVH_NAME_JOIN(x,y) x ## _ ## y
-#define BVH_NAME_EVAL(x,y) BVH_NAME_JOIN(x,y)
+#define BVH_NAME_JOIN(x, y) x##_##y
+#define BVH_NAME_EVAL(x, y) BVH_NAME_JOIN(x, y)
#define BVH_FUNCTION_FULL_NAME(prefix) BVH_NAME_EVAL(prefix, BVH_FUNCTION_NAME)
#define BVH_FEATURE(f) (((BVH_FUNCTION_FEATURES) & (f)) != 0)
@@ -49,30 +49,30 @@ CCL_NAMESPACE_BEGIN
/* Debugging heleprs */
#ifdef __KERNEL_DEBUG__
# define BVH_DEBUG_INIT() \
- do { \
- isect->num_traversed_nodes = 0; \
- isect->num_traversed_instances = 0; \
- isect->num_intersections = 0; \
- } while(0)
+ do { \
+ isect->num_traversed_nodes = 0; \
+ isect->num_traversed_instances = 0; \
+ isect->num_intersections = 0; \
+ } while (0)
# define BVH_DEBUG_NEXT_NODE() \
- do { \
- ++isect->num_traversed_nodes; \
- } while(0)
+ do { \
+ ++isect->num_traversed_nodes; \
+ } while (0)
# define BVH_DEBUG_NEXT_INTERSECTION() \
- do { \
- ++isect->num_intersections; \
- } while(0)
+ do { \
+ ++isect->num_intersections; \
+ } while (0)
# define BVH_DEBUG_NEXT_INSTANCE() \
- do { \
- ++isect->num_traversed_instances; \
- } while(0)
-#else /* __KERNEL_DEBUG__ */
+ do { \
+ ++isect->num_traversed_instances; \
+ } while (0)
+#else /* __KERNEL_DEBUG__ */
# define BVH_DEBUG_INIT()
# define BVH_DEBUG_NEXT_NODE()
# define BVH_DEBUG_NEXT_INTERSECTION()
# define BVH_DEBUG_NEXT_INSTANCE()
-#endif /* __KERNEL_DEBUG__ */
+#endif /* __KERNEL_DEBUG__ */
CCL_NAMESPACE_END
-#endif /* __BVH_TYPES__ */
+#endif /* __BVH_TYPES__ */
diff --git a/intern/cycles/kernel/bvh/bvh_volume.h b/intern/cycles/kernel/bvh/bvh_volume.h
index b8257e3493e..c83b0d783f4 100644
--- a/intern/cycles/kernel/bvh/bvh_volume.h
+++ b/intern/cycles/kernel/bvh/bvh_volume.h
@@ -19,9 +19,9 @@
#ifdef __QBVH__
# include "kernel/bvh/qbvh_volume.h"
-#ifdef __KERNEL_AVX2__
-# include "kernel/bvh/obvh_volume.h"
-#endif
+# ifdef __KERNEL_AVX2__
+# include "kernel/bvh/obvh_volume.h"
+# endif
#endif
#if BVH_FEATURE(BVH_HAIR)
@@ -43,267 +43,260 @@ ccl_device
#else
ccl_device_inline
#endif
-bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
- const Ray *ray,
- Intersection *isect,
- const uint visibility)
+ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
+ const Ray *ray,
+ Intersection *isect,
+ const uint visibility)
{
- /* todo:
- * - test if pushing distance on the stack helps (for non shadow rays)
- * - separate version for shadow rays
- * - likely and unlikely for if() statements
- * - test restrict attribute for pointers
- */
-
- /* traversal stack in CUDA thread-local memory */
- int traversal_stack[BVH_STACK_SIZE];
- traversal_stack[0] = ENTRYPOINT_SENTINEL;
-
- /* traversal variables in registers */
- int stack_ptr = 0;
- int node_addr = kernel_data.bvh.root;
-
- /* ray parameters in registers */
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
+ /* todo:
+ * - test if pushing distance on the stack helps (for non shadow rays)
+ * - separate version for shadow rays
+ * - likely and unlikely for if() statements
+ * - test restrict attribute for pointers
+ */
+
+ /* traversal stack in CUDA thread-local memory */
+ int traversal_stack[BVH_STACK_SIZE];
+ traversal_stack[0] = ENTRYPOINT_SENTINEL;
+
+ /* traversal variables in registers */
+ int stack_ptr = 0;
+ int node_addr = kernel_data.bvh.root;
+
+ /* ray parameters in registers */
+ float3 P = ray->P;
+ float3 dir = bvh_clamp_direction(ray->D);
+ float3 idir = bvh_inverse_direction(dir);
+ int object = OBJECT_NONE;
#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
+ Transform ob_itfm;
#endif
- isect->t = ray->t;
- isect->u = 0.0f;
- isect->v = 0.0f;
- isect->prim = PRIM_NONE;
- isect->object = OBJECT_NONE;
+ isect->t = ray->t;
+ isect->u = 0.0f;
+ isect->v = 0.0f;
+ isect->prim = PRIM_NONE;
+ isect->object = OBJECT_NONE;
#if defined(__KERNEL_SSE2__)
- const shuffle_swap_t shuf_identity = shuffle_swap_identity();
- const shuffle_swap_t shuf_swap = shuffle_swap_swap();
+ const shuffle_swap_t shuf_identity = shuffle_swap_identity();
+ const shuffle_swap_t shuf_swap = shuffle_swap_swap();
- const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
- ssef Psplat[3], idirsplat[3];
+ const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
+ ssef Psplat[3], idirsplat[3];
# if BVH_FEATURE(BVH_HAIR)
- ssef tnear(0.0f), tfar(isect->t);
+ ssef tnear(0.0f), tfar(isect->t);
# endif
- shuffle_swap_t shufflexyz[3];
+ shuffle_swap_t shufflexyz[3];
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
+ Psplat[0] = ssef(P.x);
+ Psplat[1] = ssef(P.y);
+ Psplat[2] = ssef(P.z);
- ssef tsplat(0.0f, 0.0f, -isect->t, -isect->t);
+ ssef tsplat(0.0f, 0.0f, -isect->t, -isect->t);
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
+ gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
#endif
- /* traversal loop */
- do {
- do {
- /* traverse internal nodes */
- while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
- int node_addr_child1, traverse_mask;
- float dist[2];
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
+ /* traversal loop */
+ do {
+ do {
+ /* traverse internal nodes */
+ while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
+ int node_addr_child1, traverse_mask;
+ float dist[2];
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
#if !defined(__KERNEL_SSE2__)
- traverse_mask = NODE_INTERSECT(kg,
- P,
+ traverse_mask = NODE_INTERSECT(kg,
+ P,
# if BVH_FEATURE(BVH_HAIR)
- dir,
+ dir,
# endif
- idir,
- isect->t,
- node_addr,
- visibility,
- dist);
+ idir,
+ isect->t,
+ node_addr,
+ visibility,
+ dist);
#else // __KERNEL_SSE2__
- traverse_mask = NODE_INTERSECT(kg,
- P,
- dir,
+ traverse_mask = NODE_INTERSECT(kg,
+ P,
+ dir,
# if BVH_FEATURE(BVH_HAIR)
- tnear,
- tfar,
+ tnear,
+ tfar,
# endif
- tsplat,
- Psplat,
- idirsplat,
- shufflexyz,
- node_addr,
- visibility,
- dist);
+ tsplat,
+ Psplat,
+ idirsplat,
+ shufflexyz,
+ node_addr,
+ visibility,
+ dist);
#endif // __KERNEL_SSE2__
- node_addr = __float_as_int(cnodes.z);
- node_addr_child1 = __float_as_int(cnodes.w);
-
- if(traverse_mask == 3) {
- /* Both children were intersected, push the farther one. */
- bool is_closest_child1 = (dist[1] < dist[0]);
- if(is_closest_child1) {
- int tmp = node_addr;
- node_addr = node_addr_child1;
- node_addr_child1 = tmp;
- }
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_STACK_SIZE);
- traversal_stack[stack_ptr] = node_addr_child1;
- }
- else {
- /* One child was intersected. */
- if(traverse_mask == 2) {
- node_addr = node_addr_child1;
- }
- else if(traverse_mask == 0) {
- /* Neither child was intersected. */
- node_addr = traversal_stack[stack_ptr];
- --stack_ptr;
- }
- }
- }
-
- /* if node is leaf, fetch triangle list */
- if(node_addr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
- int prim_addr = __float_as_int(leaf.x);
+ node_addr = __float_as_int(cnodes.z);
+ node_addr_child1 = __float_as_int(cnodes.w);
+
+ if (traverse_mask == 3) {
+ /* Both children were intersected, push the farther one. */
+ bool is_closest_child1 = (dist[1] < dist[0]);
+ if (is_closest_child1) {
+ int tmp = node_addr;
+ node_addr = node_addr_child1;
+ node_addr_child1 = tmp;
+ }
+
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_STACK_SIZE);
+ traversal_stack[stack_ptr] = node_addr_child1;
+ }
+ else {
+ /* One child was intersected. */
+ if (traverse_mask == 2) {
+ node_addr = node_addr_child1;
+ }
+ else if (traverse_mask == 0) {
+ /* Neither child was intersected. */
+ node_addr = traversal_stack[stack_ptr];
+ --stack_ptr;
+ }
+ }
+ }
+
+ /* if node is leaf, fetch triangle list */
+ if (node_addr < 0) {
+ float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
+ int prim_addr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
- if(prim_addr >= 0) {
+ if (prim_addr >= 0) {
#endif
- const int prim_addr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
-
- /* pop */
- node_addr = traversal_stack[stack_ptr];
- --stack_ptr;
-
- /* primitive intersection */
- switch(type & PRIMITIVE_ALL) {
- case PRIMITIVE_TRIANGLE: {
- /* intersect ray against primitive */
- for(; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- /* only primitives from volume object */
- uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
- int object_flag = kernel_tex_fetch(__object_flag, tri_object);
- if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
- continue;
- }
- triangle_intersect(kg,
- isect,
- P,
- dir,
- visibility,
- object,
- prim_addr);
- }
- break;
- }
+ const int prim_addr2 = __float_as_int(leaf.y);
+ const uint type = __float_as_int(leaf.w);
+
+ /* pop */
+ node_addr = traversal_stack[stack_ptr];
+ --stack_ptr;
+
+ /* primitive intersection */
+ switch (type & PRIMITIVE_ALL) {
+ case PRIMITIVE_TRIANGLE: {
+ /* intersect ray against primitive */
+ for (; prim_addr < prim_addr2; prim_addr++) {
+ kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+ /* only primitives from volume object */
+ uint tri_object = (object == OBJECT_NONE) ?
+ kernel_tex_fetch(__prim_object, prim_addr) :
+ object;
+ int object_flag = kernel_tex_fetch(__object_flag, tri_object);
+ if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
+ continue;
+ }
+ triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr);
+ }
+ break;
+ }
#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- /* intersect ray against primitive */
- for(; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- /* only primitives from volume object */
- uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
- int object_flag = kernel_tex_fetch(__object_flag, tri_object);
- if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
- continue;
- }
- motion_triangle_intersect(kg,
- isect,
- P,
- dir,
- ray->time,
- visibility,
- object,
- prim_addr);
- }
- break;
- }
+ case PRIMITIVE_MOTION_TRIANGLE: {
+ /* intersect ray against primitive */
+ for (; prim_addr < prim_addr2; prim_addr++) {
+ kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+ /* only primitives from volume object */
+ uint tri_object = (object == OBJECT_NONE) ?
+ kernel_tex_fetch(__prim_object, prim_addr) :
+ object;
+ int object_flag = kernel_tex_fetch(__object_flag, tri_object);
+ if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
+ continue;
+ }
+ motion_triangle_intersect(
+ kg, isect, P, dir, ray->time, visibility, object, prim_addr);
+ }
+ break;
+ }
#endif
- default: {
- break;
- }
- }
- }
+ default: {
+ break;
+ }
+ }
+ }
#if BVH_FEATURE(BVH_INSTANCING)
- else {
- /* instance push */
- object = kernel_tex_fetch(__prim_object, -prim_addr-1);
- int object_flag = kernel_tex_fetch(__object_flag, object);
- if(object_flag & SD_OBJECT_HAS_VOLUME) {
+ else {
+ /* instance push */
+ object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
+ int object_flag = kernel_tex_fetch(__object_flag, object);
+ if (object_flag & SD_OBJECT_HAS_VOLUME) {
# if BVH_FEATURE(BVH_MOTION)
- isect->t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
+ isect->t = bvh_instance_motion_push(
+ kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
# else
- isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
+ isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
# endif
# if defined(__KERNEL_SSE2__)
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
+ Psplat[0] = ssef(P.x);
+ Psplat[1] = ssef(P.y);
+ Psplat[2] = ssef(P.z);
- tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
+ tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect->t);
+ tfar = ssef(isect->t);
# endif
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
+ gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
# endif
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_STACK_SIZE);
- traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
-
- node_addr = kernel_tex_fetch(__object_node, object);
- }
- else {
- /* pop */
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr];
- --stack_ptr;
- }
- }
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while(node_addr != ENTRYPOINT_SENTINEL);
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_STACK_SIZE);
+ traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
+
+ node_addr = kernel_tex_fetch(__object_node, object);
+ }
+ else {
+ /* pop */
+ object = OBJECT_NONE;
+ node_addr = traversal_stack[stack_ptr];
+ --stack_ptr;
+ }
+ }
+ }
+#endif /* FEATURE(BVH_INSTANCING) */
+ } while (node_addr != ENTRYPOINT_SENTINEL);
#if BVH_FEATURE(BVH_INSTANCING)
- if(stack_ptr >= 0) {
- kernel_assert(object != OBJECT_NONE);
+ if (stack_ptr >= 0) {
+ kernel_assert(object != OBJECT_NONE);
- /* instance pop */
+ /* instance pop */
# if BVH_FEATURE(BVH_MOTION)
- isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
+ isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
# else
- isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
+ isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
# endif
# if defined(__KERNEL_SSE2__)
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
+ Psplat[0] = ssef(P.x);
+ Psplat[1] = ssef(P.y);
+ Psplat[2] = ssef(P.z);
- tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
+ tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect->t);
+ tfar = ssef(isect->t);
# endif
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
+ gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
# endif
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr];
- --stack_ptr;
- }
-#endif /* FEATURE(BVH_MOTION) */
- } while(node_addr != ENTRYPOINT_SENTINEL);
+ object = OBJECT_NONE;
+ node_addr = traversal_stack[stack_ptr];
+ --stack_ptr;
+ }
+#endif /* FEATURE(BVH_MOTION) */
+ } while (node_addr != ENTRYPOINT_SENTINEL);
- return (isect->prim != PRIM_NONE);
+ return (isect->prim != PRIM_NONE);
}
ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
@@ -311,29 +304,20 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
Intersection *isect,
const uint visibility)
{
- switch(kernel_data.bvh.bvh_layout) {
+ switch (kernel_data.bvh.bvh_layout) {
#ifdef __KERNEL_AVX2__
- case BVH_LAYOUT_BVH8:
- return BVH_FUNCTION_FULL_NAME(OBVH)(kg,
- ray,
- isect,
- visibility);
+ case BVH_LAYOUT_BVH8:
+ return BVH_FUNCTION_FULL_NAME(OBVH)(kg, ray, isect, visibility);
#endif
#ifdef __QBVH__
- case BVH_LAYOUT_BVH4:
- return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
- ray,
- isect,
- visibility);
+ case BVH_LAYOUT_BVH4:
+ return BVH_FUNCTION_FULL_NAME(QBVH)(kg, ray, isect, visibility);
#endif
- case BVH_LAYOUT_BVH2:
- return BVH_FUNCTION_FULL_NAME(BVH)(kg,
- ray,
- isect,
- visibility);
- }
- kernel_assert(!"Should not happen");
- return false;
+ case BVH_LAYOUT_BVH2:
+ return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect, visibility);
+ }
+ kernel_assert(!"Should not happen");
+ return false;
}
#undef BVH_FUNCTION_NAME
diff --git a/intern/cycles/kernel/bvh/bvh_volume_all.h b/intern/cycles/kernel/bvh/bvh_volume_all.h
index f3ca4058460..ae8c4d12e8a 100644
--- a/intern/cycles/kernel/bvh/bvh_volume_all.h
+++ b/intern/cycles/kernel/bvh/bvh_volume_all.h
@@ -19,9 +19,9 @@
#ifdef __QBVH__
# include "kernel/bvh/qbvh_volume_all.h"
-#ifdef __KERNEL_AVX2__
-# include "kernel/bvh/obvh_volume_all.h"
-#endif
+# ifdef __KERNEL_AVX2__
+# include "kernel/bvh/obvh_volume_all.h"
+# endif
#endif
#if BVH_FEATURE(BVH_HAIR)
@@ -43,342 +43,337 @@ ccl_device
#else
ccl_device_inline
#endif
-uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
- const Ray *ray,
- Intersection *isect_array,
- const uint max_hits,
- const uint visibility)
+ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
+ const Ray *ray,
+ Intersection *isect_array,
+ const uint max_hits,
+ const uint visibility)
{
- /* todo:
- * - test if pushing distance on the stack helps (for non shadow rays)
- * - separate version for shadow rays
- * - likely and unlikely for if() statements
- * - test restrict attribute for pointers
- */
-
- /* traversal stack in CUDA thread-local memory */
- int traversal_stack[BVH_STACK_SIZE];
- traversal_stack[0] = ENTRYPOINT_SENTINEL;
-
- /* traversal variables in registers */
- int stack_ptr = 0;
- int node_addr = kernel_data.bvh.root;
-
- /* ray parameters in registers */
- const float tmax = ray->t;
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
- float isect_t = tmax;
+ /* todo:
+ * - test if pushing distance on the stack helps (for non shadow rays)
+ * - separate version for shadow rays
+ * - likely and unlikely for if() statements
+ * - test restrict attribute for pointers
+ */
+
+ /* traversal stack in CUDA thread-local memory */
+ int traversal_stack[BVH_STACK_SIZE];
+ traversal_stack[0] = ENTRYPOINT_SENTINEL;
+
+ /* traversal variables in registers */
+ int stack_ptr = 0;
+ int node_addr = kernel_data.bvh.root;
+
+ /* ray parameters in registers */
+ const float tmax = ray->t;
+ float3 P = ray->P;
+ float3 dir = bvh_clamp_direction(ray->D);
+ float3 idir = bvh_inverse_direction(dir);
+ int object = OBJECT_NONE;
+ float isect_t = tmax;
#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
+ Transform ob_itfm;
#endif
#if BVH_FEATURE(BVH_INSTANCING)
- int num_hits_in_instance = 0;
+ int num_hits_in_instance = 0;
#endif
- uint num_hits = 0;
- isect_array->t = tmax;
+ uint num_hits = 0;
+ isect_array->t = tmax;
#if defined(__KERNEL_SSE2__)
- const shuffle_swap_t shuf_identity = shuffle_swap_identity();
- const shuffle_swap_t shuf_swap = shuffle_swap_swap();
+ const shuffle_swap_t shuf_identity = shuffle_swap_identity();
+ const shuffle_swap_t shuf_swap = shuffle_swap_swap();
- const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
- ssef Psplat[3], idirsplat[3];
+ const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
+ ssef Psplat[3], idirsplat[3];
# if BVH_FEATURE(BVH_HAIR)
- ssef tnear(0.0f), tfar(isect_t);
+ ssef tnear(0.0f), tfar(isect_t);
# endif
- shuffle_swap_t shufflexyz[3];
+ shuffle_swap_t shufflexyz[3];
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
+ Psplat[0] = ssef(P.x);
+ Psplat[1] = ssef(P.y);
+ Psplat[2] = ssef(P.z);
- ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t);
+ ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t);
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-#endif /* __KERNEL_SSE2__ */
+ gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
+#endif /* __KERNEL_SSE2__ */
- /* traversal loop */
- do {
- do {
- /* traverse internal nodes */
- while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
- int node_addr_child1, traverse_mask;
- float dist[2];
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
+ /* traversal loop */
+ do {
+ do {
+ /* traverse internal nodes */
+ while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
+ int node_addr_child1, traverse_mask;
+ float dist[2];
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
#if !defined(__KERNEL_SSE2__)
- traverse_mask = NODE_INTERSECT(kg,
- P,
+ traverse_mask = NODE_INTERSECT(kg,
+ P,
# if BVH_FEATURE(BVH_HAIR)
- dir,
+ dir,
# endif
- idir,
- isect_t,
- node_addr,
- visibility,
- dist);
+ idir,
+ isect_t,
+ node_addr,
+ visibility,
+ dist);
#else // __KERNEL_SSE2__
- traverse_mask = NODE_INTERSECT(kg,
- P,
- dir,
+ traverse_mask = NODE_INTERSECT(kg,
+ P,
+ dir,
# if BVH_FEATURE(BVH_HAIR)
- tnear,
- tfar,
+ tnear,
+ tfar,
# endif
- tsplat,
- Psplat,
- idirsplat,
- shufflexyz,
- node_addr,
- visibility,
- dist);
+ tsplat,
+ Psplat,
+ idirsplat,
+ shufflexyz,
+ node_addr,
+ visibility,
+ dist);
#endif // __KERNEL_SSE2__
- node_addr = __float_as_int(cnodes.z);
- node_addr_child1 = __float_as_int(cnodes.w);
-
- if(traverse_mask == 3) {
- /* Both children were intersected, push the farther one. */
- bool is_closest_child1 = (dist[1] < dist[0]);
- if(is_closest_child1) {
- int tmp = node_addr;
- node_addr = node_addr_child1;
- node_addr_child1 = tmp;
- }
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_STACK_SIZE);
- traversal_stack[stack_ptr] = node_addr_child1;
- }
- else {
- /* One child was intersected. */
- if(traverse_mask == 2) {
- node_addr = node_addr_child1;
- }
- else if(traverse_mask == 0) {
- /* Neither child was intersected. */
- node_addr = traversal_stack[stack_ptr];
- --stack_ptr;
- }
- }
- }
-
- /* if node is leaf, fetch triangle list */
- if(node_addr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
- int prim_addr = __float_as_int(leaf.x);
+ node_addr = __float_as_int(cnodes.z);
+ node_addr_child1 = __float_as_int(cnodes.w);
+
+ if (traverse_mask == 3) {
+ /* Both children were intersected, push the farther one. */
+ bool is_closest_child1 = (dist[1] < dist[0]);
+ if (is_closest_child1) {
+ int tmp = node_addr;
+ node_addr = node_addr_child1;
+ node_addr_child1 = tmp;
+ }
+
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_STACK_SIZE);
+ traversal_stack[stack_ptr] = node_addr_child1;
+ }
+ else {
+ /* One child was intersected. */
+ if (traverse_mask == 2) {
+ node_addr = node_addr_child1;
+ }
+ else if (traverse_mask == 0) {
+ /* Neither child was intersected. */
+ node_addr = traversal_stack[stack_ptr];
+ --stack_ptr;
+ }
+ }
+ }
+
+ /* if node is leaf, fetch triangle list */
+ if (node_addr < 0) {
+ float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
+ int prim_addr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
- if(prim_addr >= 0) {
+ if (prim_addr >= 0) {
#endif
- const int prim_addr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
- bool hit;
-
- /* pop */
- node_addr = traversal_stack[stack_ptr];
- --stack_ptr;
-
- /* primitive intersection */
- switch(type & PRIMITIVE_ALL) {
- case PRIMITIVE_TRIANGLE: {
- /* intersect ray against primitive */
- for(; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- /* only primitives from volume object */
- uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
- int object_flag = kernel_tex_fetch(__object_flag, tri_object);
- if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
- continue;
- }
- hit = triangle_intersect(kg,
- isect_array,
- P,
- dir,
- visibility,
- object,
- prim_addr);
- if(hit) {
- /* Move on to next entry in intersections array. */
- isect_array++;
- num_hits++;
+ const int prim_addr2 = __float_as_int(leaf.y);
+ const uint type = __float_as_int(leaf.w);
+ bool hit;
+
+ /* pop */
+ node_addr = traversal_stack[stack_ptr];
+ --stack_ptr;
+
+ /* primitive intersection */
+ switch (type & PRIMITIVE_ALL) {
+ case PRIMITIVE_TRIANGLE: {
+ /* intersect ray against primitive */
+ for (; prim_addr < prim_addr2; prim_addr++) {
+ kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+ /* only primitives from volume object */
+ uint tri_object = (object == OBJECT_NONE) ?
+ kernel_tex_fetch(__prim_object, prim_addr) :
+ object;
+ int object_flag = kernel_tex_fetch(__object_flag, tri_object);
+ if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
+ continue;
+ }
+ hit = triangle_intersect(kg, isect_array, P, dir, visibility, object, prim_addr);
+ if (hit) {
+ /* Move on to next entry in intersections array. */
+ isect_array++;
+ num_hits++;
#if BVH_FEATURE(BVH_INSTANCING)
- num_hits_in_instance++;
+ num_hits_in_instance++;
#endif
- isect_array->t = isect_t;
- if(num_hits == max_hits) {
+ isect_array->t = isect_t;
+ if (num_hits == max_hits) {
#if BVH_FEATURE(BVH_INSTANCING)
- if(object != OBJECT_NONE) {
+ if (object != OBJECT_NONE) {
# if BVH_FEATURE(BVH_MOTION)
- float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
+ float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
# else
- Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
- float t_fac = 1.0f / len(transform_direction(&itfm, dir));
+ Transform itfm = object_fetch_transform(
+ kg, object, OBJECT_INVERSE_TRANSFORM);
+ float t_fac = 1.0f / len(transform_direction(&itfm, dir));
# endif
- for(int i = 0; i < num_hits_in_instance; i++) {
- (isect_array-i-1)->t *= t_fac;
- }
- }
-#endif /* BVH_FEATURE(BVH_INSTANCING) */
- return num_hits;
- }
- }
- }
- break;
- }
+ for (int i = 0; i < num_hits_in_instance; i++) {
+ (isect_array - i - 1)->t *= t_fac;
+ }
+ }
+#endif /* BVH_FEATURE(BVH_INSTANCING) */
+ return num_hits;
+ }
+ }
+ }
+ break;
+ }
#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- /* intersect ray against primitive */
- for(; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- /* only primitives from volume object */
- uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
- int object_flag = kernel_tex_fetch(__object_flag, tri_object);
- if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
- continue;
- }
- hit = motion_triangle_intersect(kg,
- isect_array,
- P,
- dir,
- ray->time,
- visibility,
- object,
- prim_addr);
- if(hit) {
- /* Move on to next entry in intersections array. */
- isect_array++;
- num_hits++;
+ case PRIMITIVE_MOTION_TRIANGLE: {
+ /* intersect ray against primitive */
+ for (; prim_addr < prim_addr2; prim_addr++) {
+ kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+ /* only primitives from volume object */
+ uint tri_object = (object == OBJECT_NONE) ?
+ kernel_tex_fetch(__prim_object, prim_addr) :
+ object;
+ int object_flag = kernel_tex_fetch(__object_flag, tri_object);
+ if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
+ continue;
+ }
+ hit = motion_triangle_intersect(
+ kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
+ if (hit) {
+ /* Move on to next entry in intersections array. */
+ isect_array++;
+ num_hits++;
# if BVH_FEATURE(BVH_INSTANCING)
- num_hits_in_instance++;
+ num_hits_in_instance++;
# endif
- isect_array->t = isect_t;
- if(num_hits == max_hits) {
+ isect_array->t = isect_t;
+ if (num_hits == max_hits) {
# if BVH_FEATURE(BVH_INSTANCING)
- if(object != OBJECT_NONE) {
+ if (object != OBJECT_NONE) {
# if BVH_FEATURE(BVH_MOTION)
- float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
+ float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
# else
- Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
- float t_fac = 1.0f / len(transform_direction(&itfm, dir));
+ Transform itfm = object_fetch_transform(
+ kg, object, OBJECT_INVERSE_TRANSFORM);
+ float t_fac = 1.0f / len(transform_direction(&itfm, dir));
# endif
- for(int i = 0; i < num_hits_in_instance; i++) {
- (isect_array-i-1)->t *= t_fac;
- }
- }
-# endif /* BVH_FEATURE(BVH_INSTANCING) */
- return num_hits;
- }
- }
- }
- break;
- }
-#endif /* BVH_MOTION */
- default: {
- break;
- }
- }
- }
+ for (int i = 0; i < num_hits_in_instance; i++) {
+ (isect_array - i - 1)->t *= t_fac;
+ }
+ }
+# endif /* BVH_FEATURE(BVH_INSTANCING) */
+ return num_hits;
+ }
+ }
+ }
+ break;
+ }
+#endif /* BVH_MOTION */
+ default: {
+ break;
+ }
+ }
+ }
#if BVH_FEATURE(BVH_INSTANCING)
- else {
- /* instance push */
- object = kernel_tex_fetch(__prim_object, -prim_addr-1);
- int object_flag = kernel_tex_fetch(__object_flag, object);
- if(object_flag & SD_OBJECT_HAS_VOLUME) {
+ else {
+ /* instance push */
+ object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
+ int object_flag = kernel_tex_fetch(__object_flag, object);
+ if (object_flag & SD_OBJECT_HAS_VOLUME) {
# if BVH_FEATURE(BVH_MOTION)
- isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
+ isect_t = bvh_instance_motion_push(
+ kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
# else
- isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
+ isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
# endif
- num_hits_in_instance = 0;
- isect_array->t = isect_t;
+ num_hits_in_instance = 0;
+ isect_array->t = isect_t;
# if defined(__KERNEL_SSE2__)
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
+ Psplat[0] = ssef(P.x);
+ Psplat[1] = ssef(P.y);
+ Psplat[2] = ssef(P.z);
- tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
+ tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect_t);
+ tfar = ssef(isect_t);
# endif
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
+ gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
# endif
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_STACK_SIZE);
- traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
-
- node_addr = kernel_tex_fetch(__object_node, object);
- }
- else {
- /* pop */
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr];
- --stack_ptr;
- }
- }
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while(node_addr != ENTRYPOINT_SENTINEL);
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_STACK_SIZE);
+ traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
+
+ node_addr = kernel_tex_fetch(__object_node, object);
+ }
+ else {
+ /* pop */
+ object = OBJECT_NONE;
+ node_addr = traversal_stack[stack_ptr];
+ --stack_ptr;
+ }
+ }
+ }
+#endif /* FEATURE(BVH_INSTANCING) */
+ } while (node_addr != ENTRYPOINT_SENTINEL);
#if BVH_FEATURE(BVH_INSTANCING)
- if(stack_ptr >= 0) {
- kernel_assert(object != OBJECT_NONE);
+ if (stack_ptr >= 0) {
+ kernel_assert(object != OBJECT_NONE);
- /* Instance pop. */
- if(num_hits_in_instance) {
- float t_fac;
+ /* Instance pop. */
+ if (num_hits_in_instance) {
+ float t_fac;
# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
+ bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
# else
- bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
+ bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
# endif
- /* Scale isect->t to adjust for instancing. */
- for(int i = 0; i < num_hits_in_instance; i++) {
- (isect_array-i-1)->t *= t_fac;
- }
- }
- else {
+ /* Scale isect->t to adjust for instancing. */
+ for (int i = 0; i < num_hits_in_instance; i++) {
+ (isect_array - i - 1)->t *= t_fac;
+ }
+ }
+ else {
# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
+ bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
# else
- bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
+ bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
# endif
- }
+ }
- isect_t = tmax;
- isect_array->t = isect_t;
+ isect_t = tmax;
+ isect_array->t = isect_t;
# if defined(__KERNEL_SSE2__)
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
+ Psplat[0] = ssef(P.x);
+ Psplat[1] = ssef(P.y);
+ Psplat[2] = ssef(P.z);
- tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
+ tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect_t);
+ tfar = ssef(isect_t);
# endif
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
+ gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
# endif
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr];
- --stack_ptr;
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while(node_addr != ENTRYPOINT_SENTINEL);
+ object = OBJECT_NONE;
+ node_addr = traversal_stack[stack_ptr];
+ --stack_ptr;
+ }
+#endif /* FEATURE(BVH_INSTANCING) */
+ } while (node_addr != ENTRYPOINT_SENTINEL);
- return num_hits;
+ return num_hits;
}
ccl_device_inline uint BVH_FUNCTION_NAME(KernelGlobals *kg,
@@ -387,32 +382,20 @@ ccl_device_inline uint BVH_FUNCTION_NAME(KernelGlobals *kg,
const uint max_hits,
const uint visibility)
{
- switch(kernel_data.bvh.bvh_layout) {
+ switch (kernel_data.bvh.bvh_layout) {
#ifdef __KERNEL_AVX2__
- case BVH_LAYOUT_BVH8:
- return BVH_FUNCTION_FULL_NAME(OBVH)(kg,
- ray,
- isect_array,
- max_hits,
- visibility);
+ case BVH_LAYOUT_BVH8:
+ return BVH_FUNCTION_FULL_NAME(OBVH)(kg, ray, isect_array, max_hits, visibility);
#endif
#ifdef __QBVH__
- case BVH_LAYOUT_BVH4:
- return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
- ray,
- isect_array,
- max_hits,
- visibility);
+ case BVH_LAYOUT_BVH4:
+ return BVH_FUNCTION_FULL_NAME(QBVH)(kg, ray, isect_array, max_hits, visibility);
#endif
- case BVH_LAYOUT_BVH2:
- return BVH_FUNCTION_FULL_NAME(BVH)(kg,
- ray,
- isect_array,
- max_hits,
- visibility);
- }
- kernel_assert(!"Should not happen");
- return 0;
+ case BVH_LAYOUT_BVH2:
+ return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect_array, max_hits, visibility);
+ }
+ kernel_assert(!"Should not happen");
+ return 0;
}
#undef BVH_FUNCTION_NAME
diff --git a/intern/cycles/kernel/bvh/obvh_local.h b/intern/cycles/kernel/bvh/obvh_local.h
index f449cefb335..e6bb548bc5b 100644
--- a/intern/cycles/kernel/bvh/obvh_local.h
+++ b/intern/cycles/kernel/bvh/obvh_local.h
@@ -34,372 +34,365 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
uint *lcg_state,
int max_hits)
{
- /* Traversal stack in CUDA thread-local memory. */
- OBVHStackItem traversal_stack[BVH_OSTACK_SIZE];
- traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
+ /* Traversal stack in CUDA thread-local memory. */
+ OBVHStackItem traversal_stack[BVH_OSTACK_SIZE];
+ traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
- /* Traversal variables in registers. */
- int stack_ptr = 0;
- int node_addr = kernel_tex_fetch(__object_node, local_object);
+ /* Traversal variables in registers. */
+ int stack_ptr = 0;
+ int node_addr = kernel_tex_fetch(__object_node, local_object);
- /* Ray parameters in registers. */
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
- float isect_t = ray->t;
+ /* Ray parameters in registers. */
+ float3 P = ray->P;
+ float3 dir = bvh_clamp_direction(ray->D);
+ float3 idir = bvh_inverse_direction(dir);
+ int object = OBJECT_NONE;
+ float isect_t = ray->t;
- if(local_isect != NULL) {
- local_isect->num_hits = 0;
- }
- kernel_assert((local_isect == NULL) == (max_hits == 0));
+ if (local_isect != NULL) {
+ local_isect->num_hits = 0;
+ }
+ kernel_assert((local_isect == NULL) == (max_hits == 0));
- const int object_flag = kernel_tex_fetch(__object_flag, local_object);
- if(!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+ const int object_flag = kernel_tex_fetch(__object_flag, local_object);
+ if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
- isect_t = bvh_instance_motion_push(kg,
- local_object,
- ray,
- &P,
- &dir,
- &idir,
- isect_t,
- &ob_itfm);
+ Transform ob_itfm;
+ isect_t = bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
#else
- isect_t = bvh_instance_push(kg, local_object, ray, &P, &dir, &idir, isect_t);
+ isect_t = bvh_instance_push(kg, local_object, ray, &P, &dir, &idir, isect_t);
#endif
- object = local_object;
- }
+ object = local_object;
+ }
- avxf tnear(0.0f), tfar(isect_t);
+ avxf tnear(0.0f), tfar(isect_t);
#if BVH_FEATURE(BVH_HAIR)
- avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z));
+ avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z));
#endif
- avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
+ avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
#ifdef __KERNEL_AVX2__
- float3 P_idir = P*idir;
- avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
+ float3 P_idir = P * idir;
+ avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
#endif
#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- avx3f org4(avxf(P.x), avxf(P.y), avxf(P.z));
+ avx3f org4(avxf(P.x), avxf(P.y), avxf(P.z));
#endif
- /* Offsets to select the side that becomes the lower or upper bound. */
- int near_x, near_y, near_z;
- int far_x, far_y, far_z;
- obvh_near_far_idx_calc(idir,
- &near_x, &near_y, &near_z,
- &far_x, &far_y, &far_z);
+ /* Offsets to select the side that becomes the lower or upper bound. */
+ int near_x, near_y, near_z;
+ int far_x, far_y, far_z;
+ obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- /* Traversal loop. */
- do {
- do {
- /* Traverse internal nodes. */
- while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
- avxf dist;
- int child_mask = NODE_INTERSECT(kg,
- tnear,
- tfar,
+ /* Traversal loop. */
+ do {
+ do {
+ /* Traverse internal nodes. */
+ while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
+ avxf dist;
+ int child_mask = NODE_INTERSECT(kg,
+ tnear,
+ tfar,
#ifdef __KERNEL_AVX2__
- P_idir4,
+ P_idir4,
#endif
#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4,
+ org4,
#endif
#if BVH_FEATURE(BVH_HAIR)
- dir4,
+ dir4,
#endif
- idir4,
- near_x, near_y, near_z,
- far_x, far_y, far_z,
- node_addr,
- &dist);
+ idir4,
+ near_x,
+ near_y,
+ near_z,
+ far_x,
+ far_y,
+ far_z,
+ node_addr,
+ &dist);
- if(child_mask != 0) {
- float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
- avxf cnodes;
+ if (child_mask != 0) {
+ float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
+ avxf cnodes;
#if BVH_FEATURE(BVH_HAIR)
- if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
- cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr+26);
- }
- else
+ if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
+ cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 26);
+ }
+ else
#endif
- {
- cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr+14);
- }
+ {
+ cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 14);
+ }
- /* One child is hit, continue with that child. */
- int r = __bscf(child_mask);
- if(child_mask == 0) {
- node_addr = __float_as_int(cnodes[r]);
- continue;
- }
+ /* One child is hit, continue with that child. */
+ int r = __bscf(child_mask);
+ if (child_mask == 0) {
+ node_addr = __float_as_int(cnodes[r]);
+ continue;
+ }
- /* Two children are hit, push far child, and continue with
- * closer child.
- */
- int c0 = __float_as_int(cnodes[r]);
- float d0 = ((float*)&dist)[r];
- r = __bscf(child_mask);
- int c1 = __float_as_int(cnodes[r]);
- float d1 = ((float*)&dist)[r];
- if(child_mask == 0) {
- if(d1 < d0) {
- node_addr = c1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
- continue;
- }
- else {
- node_addr = c0;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- continue;
- }
- }
+ /* Two children are hit, push far child, and continue with
+ * closer child.
+ */
+ int c0 = __float_as_int(cnodes[r]);
+ float d0 = ((float *)&dist)[r];
+ r = __bscf(child_mask);
+ int c1 = __float_as_int(cnodes[r]);
+ float d1 = ((float *)&dist)[r];
+ if (child_mask == 0) {
+ if (d1 < d0) {
+ node_addr = c1;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c0;
+ traversal_stack[stack_ptr].dist = d0;
+ continue;
+ }
+ else {
+ node_addr = c0;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c1;
+ traversal_stack[stack_ptr].dist = d1;
+ continue;
+ }
+ }
- /* Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there.
- */
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
+ /* Here starts the slow path for 3 or 4 hit children. We push
+ * all nodes onto the stack to sort them there.
+ */
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c1;
+ traversal_stack[stack_ptr].dist = d1;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c0;
+ traversal_stack[stack_ptr].dist = d0;
- /* Three children are hit, push all onto stack and sort 3
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c2 = __float_as_int(cnodes[r]);
- float d2 = ((float*)&dist)[r];
- if(child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
+ /* Three children are hit, push all onto stack and sort 3
+ * stack items, continue with closest child.
+ */
+ r = __bscf(child_mask);
+ int c2 = __float_as_int(cnodes[r]);
+ float d2 = ((float *)&dist)[r];
+ if (child_mask == 0) {
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c2;
+ traversal_stack[stack_ptr].dist = d2;
+ obvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2]);
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ continue;
+ }
- /* Four children are hit, push all onto stack and sort 4
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c3 = __float_as_int(cnodes[r]);
- float d3 = ((float*)&dist)[r];
- if(child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
+ /* Four children are hit, push all onto stack and sort 4
+ * stack items, continue with closest child.
+ */
+ r = __bscf(child_mask);
+ int c3 = __float_as_int(cnodes[r]);
+ float d3 = ((float *)&dist)[r];
+ if (child_mask == 0) {
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c3;
+ traversal_stack[stack_ptr].dist = d3;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c2;
+ traversal_stack[stack_ptr].dist = d2;
+ obvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2],
+ &traversal_stack[stack_ptr - 3]);
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ continue;
+ }
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c3;
+ traversal_stack[stack_ptr].dist = d3;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c2;
+ traversal_stack[stack_ptr].dist = d2;
- /* Five children are hit, push all onto stack and sort 5
- * stack items, continue with closest child
- */
- r = __bscf(child_mask);
- int c4 = __float_as_int(cnodes[r]);
- float d4 = ((float*)&dist)[r];
- if(child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
- /* Six children are hit, push all onto stack and sort 6
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c5 = __float_as_int(cnodes[r]);
- float d5 = ((float*)&dist)[r];
- if(child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c5;
- traversal_stack[stack_ptr].dist = d5;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
+ /* Five children are hit, push all onto stack and sort 5
+ * stack items, continue with closest child
+ */
+ r = __bscf(child_mask);
+ int c4 = __float_as_int(cnodes[r]);
+ float d4 = ((float *)&dist)[r];
+ if (child_mask == 0) {
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c4;
+ traversal_stack[stack_ptr].dist = d4;
+ obvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2],
+ &traversal_stack[stack_ptr - 3],
+ &traversal_stack[stack_ptr - 4]);
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ continue;
+ }
+ /* Six children are hit, push all onto stack and sort 6
+ * stack items, continue with closest child.
+ */
+ r = __bscf(child_mask);
+ int c5 = __float_as_int(cnodes[r]);
+ float d5 = ((float *)&dist)[r];
+ if (child_mask == 0) {
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c5;
+ traversal_stack[stack_ptr].dist = d5;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c4;
+ traversal_stack[stack_ptr].dist = d4;
+ obvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2],
+ &traversal_stack[stack_ptr - 3],
+ &traversal_stack[stack_ptr - 4],
+ &traversal_stack[stack_ptr - 5]);
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ continue;
+ }
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c5;
- traversal_stack[stack_ptr].dist = d5;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c5;
+ traversal_stack[stack_ptr].dist = d5;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c4;
+ traversal_stack[stack_ptr].dist = d4;
- /* Seven children are hit, push all onto stack and sort 7
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c6 = __float_as_int(cnodes[r]);
- float d6 = ((float*)&dist)[r];
- if(child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c6;
- traversal_stack[stack_ptr].dist = d6;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5],
- &traversal_stack[stack_ptr - 6]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
- /* Eight children are hit, push all onto stack and sort 8
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c7 = __float_as_int(cnodes[r]);
- float d7 = ((float*)&dist)[r];
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c7;
- traversal_stack[stack_ptr].dist = d7;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c6;
- traversal_stack[stack_ptr].dist = d6;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5],
- &traversal_stack[stack_ptr - 6],
- &traversal_stack[stack_ptr - 7]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
+ /* Seven children are hit, push all onto stack and sort 7
+ * stack items, continue with closest child.
+ */
+ r = __bscf(child_mask);
+ int c6 = __float_as_int(cnodes[r]);
+ float d6 = ((float *)&dist)[r];
+ if (child_mask == 0) {
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c6;
+ traversal_stack[stack_ptr].dist = d6;
+ obvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2],
+ &traversal_stack[stack_ptr - 3],
+ &traversal_stack[stack_ptr - 4],
+ &traversal_stack[stack_ptr - 5],
+ &traversal_stack[stack_ptr - 6]);
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ continue;
+ }
+ /* Eight children are hit, push all onto stack and sort 8
+ * stack items, continue with closest child.
+ */
+ r = __bscf(child_mask);
+ int c7 = __float_as_int(cnodes[r]);
+ float d7 = ((float *)&dist)[r];
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c7;
+ traversal_stack[stack_ptr].dist = d7;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c6;
+ traversal_stack[stack_ptr].dist = d6;
+ obvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2],
+ &traversal_stack[stack_ptr - 3],
+ &traversal_stack[stack_ptr - 4],
+ &traversal_stack[stack_ptr - 5],
+ &traversal_stack[stack_ptr - 6],
+ &traversal_stack[stack_ptr - 7]);
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ continue;
+ }
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ }
- /* If node is leaf, fetch triangle list. */
- if(node_addr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
- int prim_addr = __float_as_int(leaf.x);
+ /* If node is leaf, fetch triangle list. */
+ if (node_addr < 0) {
+ float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
+ int prim_addr = __float_as_int(leaf.x);
- int prim_addr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
+ int prim_addr2 = __float_as_int(leaf.y);
+ const uint type = __float_as_int(leaf.w);
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
+ /* Pop. */
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
- /* Primitive intersection. */
- switch(type & PRIMITIVE_ALL) {
- case PRIMITIVE_TRIANGLE: {
- /* Intersect ray against primitive, */
- for(; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- if(triangle_intersect_local(kg,
- local_isect,
- P,
- dir,
- object,
- local_object,
- prim_addr,
- isect_t,
- lcg_state,
- max_hits))
- {
- return true;
- }
- }
- break;
- }
+ /* Primitive intersection. */
+ switch (type & PRIMITIVE_ALL) {
+ case PRIMITIVE_TRIANGLE: {
+ /* Intersect ray against primitive, */
+ for (; prim_addr < prim_addr2; prim_addr++) {
+ kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+ if (triangle_intersect_local(kg,
+ local_isect,
+ P,
+ dir,
+ object,
+ local_object,
+ prim_addr,
+ isect_t,
+ lcg_state,
+ max_hits)) {
+ return true;
+ }
+ }
+ break;
+ }
#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- /* Intersect ray against primitive. */
- for(; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- if(motion_triangle_intersect_local(kg,
- local_isect,
- P,
- dir,
- ray->time,
- object,
- local_object,
- prim_addr,
- isect_t,
- lcg_state,
- max_hits))
- {
- return true;
- }
- }
- break;
- }
+ case PRIMITIVE_MOTION_TRIANGLE: {
+ /* Intersect ray against primitive. */
+ for (; prim_addr < prim_addr2; prim_addr++) {
+ kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+ if (motion_triangle_intersect_local(kg,
+ local_isect,
+ P,
+ dir,
+ ray->time,
+ object,
+ local_object,
+ prim_addr,
+ isect_t,
+ lcg_state,
+ max_hits)) {
+ return true;
+ }
+ }
+ break;
+ }
#endif
- default:
- break;
- }
- }
- } while(node_addr != ENTRYPOINT_SENTINEL);
- } while(node_addr != ENTRYPOINT_SENTINEL);
- return false;
+ default:
+ break;
+ }
+ }
+ } while (node_addr != ENTRYPOINT_SENTINEL);
+ } while (node_addr != ENTRYPOINT_SENTINEL);
+ return false;
}
#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/obvh_nodes.h b/intern/cycles/kernel/bvh/obvh_nodes.h
index 93f35f6dffb..6831562cade 100644
--- a/intern/cycles/kernel/bvh/obvh_nodes.h
+++ b/intern/cycles/kernel/bvh/obvh_nodes.h
@@ -17,11 +17,11 @@
*/
struct OBVHStackItem {
- int addr;
- float dist;
+ int addr;
+ float dist;
};
-ccl_device_inline void obvh_near_far_idx_calc(const float3& idir,
+ccl_device_inline void obvh_near_far_idx_calc(const float3 &idir,
int *ccl_restrict near_x,
int *ccl_restrict near_y,
int *ccl_restrict near_z,
@@ -31,41 +31,73 @@ ccl_device_inline void obvh_near_far_idx_calc(const float3& idir,
{
#ifdef __KERNEL_SSE__
- *near_x = 0; *far_x = 1;
- *near_y = 2; *far_y = 3;
- *near_z = 4; *far_z = 5;
-
- const size_t mask = movemask(ssef(idir.m128));
-
- const int mask_x = mask & 1;
- const int mask_y = (mask & 2) >> 1;
- const int mask_z = (mask & 4) >> 2;
-
- *near_x += mask_x; *far_x -= mask_x;
- *near_y += mask_y; *far_y -= mask_y;
- *near_z += mask_z; *far_z -= mask_z;
+ *near_x = 0;
+ *far_x = 1;
+ *near_y = 2;
+ *far_y = 3;
+ *near_z = 4;
+ *far_z = 5;
+
+ const size_t mask = movemask(ssef(idir.m128));
+
+ const int mask_x = mask & 1;
+ const int mask_y = (mask & 2) >> 1;
+ const int mask_z = (mask & 4) >> 2;
+
+ *near_x += mask_x;
+ *far_x -= mask_x;
+ *near_y += mask_y;
+ *far_y -= mask_y;
+ *near_z += mask_z;
+ *far_z -= mask_z;
#else
- if(idir.x >= 0.0f) { *near_x = 0; *far_x = 1; } else { *near_x = 1; *far_x = 0; }
- if(idir.y >= 0.0f) { *near_y = 2; *far_y = 3; } else { *near_y = 3; *far_y = 2; }
- if(idir.z >= 0.0f) { *near_z = 4; *far_z = 5; } else { *near_z = 5; *far_z = 4; }
+ if (idir.x >= 0.0f) {
+ *near_x = 0;
+ *far_x = 1;
+ }
+ else {
+ *near_x = 1;
+ *far_x = 0;
+ }
+ if (idir.y >= 0.0f) {
+ *near_y = 2;
+ *far_y = 3;
+ }
+ else {
+ *near_y = 3;
+ *far_y = 2;
+ }
+ if (idir.z >= 0.0f) {
+ *near_z = 4;
+ *far_z = 5;
+ }
+ else {
+ *near_z = 5;
+ *far_z = 4;
+ }
#endif
}
-ccl_device_inline void obvh_item_swap(OBVHStackItem *ccl_restrict a,
- OBVHStackItem *ccl_restrict b)
+ccl_device_inline void obvh_item_swap(OBVHStackItem *ccl_restrict a, OBVHStackItem *ccl_restrict b)
{
- OBVHStackItem tmp = *a;
- *a = *b;
- *b = tmp;
+ OBVHStackItem tmp = *a;
+ *a = *b;
+ *b = tmp;
}
ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
OBVHStackItem *ccl_restrict s2,
OBVHStackItem *ccl_restrict s3)
{
- if(s2->dist < s1->dist) { obvh_item_swap(s2, s1); }
- if(s3->dist < s2->dist) { obvh_item_swap(s3, s2); }
- if(s2->dist < s1->dist) { obvh_item_swap(s2, s1); }
+ if (s2->dist < s1->dist) {
+ obvh_item_swap(s2, s1);
+ }
+ if (s3->dist < s2->dist) {
+ obvh_item_swap(s3, s2);
+ }
+ if (s2->dist < s1->dist) {
+ obvh_item_swap(s2, s1);
+ }
}
ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
@@ -73,11 +105,21 @@ ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
OBVHStackItem *ccl_restrict s3,
OBVHStackItem *ccl_restrict s4)
{
- if(s2->dist < s1->dist) { obvh_item_swap(s2, s1); }
- if(s4->dist < s3->dist) { obvh_item_swap(s4, s3); }
- if(s3->dist < s1->dist) { obvh_item_swap(s3, s1); }
- if(s4->dist < s2->dist) { obvh_item_swap(s4, s2); }
- if(s3->dist < s2->dist) { obvh_item_swap(s3, s2); }
+ if (s2->dist < s1->dist) {
+ obvh_item_swap(s2, s1);
+ }
+ if (s4->dist < s3->dist) {
+ obvh_item_swap(s4, s3);
+ }
+ if (s3->dist < s1->dist) {
+ obvh_item_swap(s3, s1);
+ }
+ if (s4->dist < s2->dist) {
+ obvh_item_swap(s4, s2);
+ }
+ if (s3->dist < s2->dist) {
+ obvh_item_swap(s3, s2);
+ }
}
ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
@@ -86,19 +128,19 @@ ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
OBVHStackItem *ccl_restrict s4,
OBVHStackItem *ccl_restrict s5)
{
- obvh_stack_sort(s1, s2, s3, s4);
- if(s5->dist < s4->dist) {
- obvh_item_swap(s4, s5);
- if(s4->dist < s3->dist) {
- obvh_item_swap(s3, s4);
- if(s3->dist < s2->dist) {
- obvh_item_swap(s2, s3);
- if(s2->dist < s1->dist) {
- obvh_item_swap(s1, s2);
- }
- }
- }
- }
+ obvh_stack_sort(s1, s2, s3, s4);
+ if (s5->dist < s4->dist) {
+ obvh_item_swap(s4, s5);
+ if (s4->dist < s3->dist) {
+ obvh_item_swap(s3, s4);
+ if (s3->dist < s2->dist) {
+ obvh_item_swap(s2, s3);
+ if (s2->dist < s1->dist) {
+ obvh_item_swap(s1, s2);
+ }
+ }
+ }
+ }
}
ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
@@ -108,22 +150,22 @@ ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
OBVHStackItem *ccl_restrict s5,
OBVHStackItem *ccl_restrict s6)
{
- obvh_stack_sort(s1, s2, s3, s4, s5);
- if(s6->dist < s5->dist) {
- obvh_item_swap(s5, s6);
- if(s5->dist < s4->dist) {
- obvh_item_swap(s4, s5);
- if(s4->dist < s3->dist) {
- obvh_item_swap(s3, s4);
- if(s3->dist < s2->dist) {
- obvh_item_swap(s2, s3);
- if(s2->dist < s1->dist) {
- obvh_item_swap(s1, s2);
- }
- }
- }
- }
- }
+ obvh_stack_sort(s1, s2, s3, s4, s5);
+ if (s6->dist < s5->dist) {
+ obvh_item_swap(s5, s6);
+ if (s5->dist < s4->dist) {
+ obvh_item_swap(s4, s5);
+ if (s4->dist < s3->dist) {
+ obvh_item_swap(s3, s4);
+ if (s3->dist < s2->dist) {
+ obvh_item_swap(s2, s3);
+ if (s2->dist < s1->dist) {
+ obvh_item_swap(s1, s2);
+ }
+ }
+ }
+ }
+ }
}
ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
@@ -134,25 +176,25 @@ ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
OBVHStackItem *ccl_restrict s6,
OBVHStackItem *ccl_restrict s7)
{
- obvh_stack_sort(s1, s2, s3, s4, s5, s6);
- if(s7->dist < s6->dist) {
- obvh_item_swap(s6, s7);
- if(s6->dist < s5->dist) {
- obvh_item_swap(s5, s6);
- if(s5->dist < s4->dist) {
- obvh_item_swap(s4, s5);
- if(s4->dist < s3->dist) {
- obvh_item_swap(s3, s4);
- if(s3->dist < s2->dist) {
- obvh_item_swap(s2, s3);
- if(s2->dist < s1->dist) {
- obvh_item_swap(s1, s2);
- }
- }
- }
- }
- }
- }
+ obvh_stack_sort(s1, s2, s3, s4, s5, s6);
+ if (s7->dist < s6->dist) {
+ obvh_item_swap(s6, s7);
+ if (s6->dist < s5->dist) {
+ obvh_item_swap(s5, s6);
+ if (s5->dist < s4->dist) {
+ obvh_item_swap(s4, s5);
+ if (s4->dist < s3->dist) {
+ obvh_item_swap(s3, s4);
+ if (s3->dist < s2->dist) {
+ obvh_item_swap(s2, s3);
+ if (s2->dist < s1->dist) {
+ obvh_item_swap(s1, s2);
+ }
+ }
+ }
+ }
+ }
+ }
}
ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
@@ -164,41 +206,41 @@ ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
OBVHStackItem *ccl_restrict s7,
OBVHStackItem *ccl_restrict s8)
{
- obvh_stack_sort(s1, s2, s3, s4, s5, s6, s7);
- if(s8->dist < s7->dist) {
- obvh_item_swap(s7, s8);
- if(s7->dist < s6->dist) {
- obvh_item_swap(s6, s7);
- if(s6->dist < s5->dist) {
- obvh_item_swap(s5, s6);
- if(s5->dist < s4->dist) {
- obvh_item_swap(s4, s5);
- if(s4->dist < s3->dist) {
- obvh_item_swap(s3, s4);
- if(s3->dist < s2->dist) {
- obvh_item_swap(s2, s3);
- if(s2->dist < s1->dist) {
- obvh_item_swap(s1, s2);
- }
- }
- }
- }
- }
- }
- }
+ obvh_stack_sort(s1, s2, s3, s4, s5, s6, s7);
+ if (s8->dist < s7->dist) {
+ obvh_item_swap(s7, s8);
+ if (s7->dist < s6->dist) {
+ obvh_item_swap(s6, s7);
+ if (s6->dist < s5->dist) {
+ obvh_item_swap(s5, s6);
+ if (s5->dist < s4->dist) {
+ obvh_item_swap(s4, s5);
+ if (s4->dist < s3->dist) {
+ obvh_item_swap(s3, s4);
+ if (s3->dist < s2->dist) {
+ obvh_item_swap(s2, s3);
+ if (s2->dist < s1->dist) {
+ obvh_item_swap(s1, s2);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
}
/* Axis-aligned nodes intersection */
ccl_device_inline int obvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg,
- const avxf& isect_near,
- const avxf& isect_far,
+ const avxf &isect_near,
+ const avxf &isect_far,
#ifdef __KERNEL_AVX2__
- const avx3f& org_idir,
+ const avx3f &org_idir,
#else
- const avx3f& org,
+ const avx3f &org,
#endif
- const avx3f& idir,
+ const avx3f &idir,
const int near_x,
const int near_y,
const int near_z,
@@ -208,213 +250,216 @@ ccl_device_inline int obvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg
const int node_addr,
avxf *ccl_restrict dist)
{
- const int offset = node_addr + 2;
+ const int offset = node_addr + 2;
#ifdef __KERNEL_AVX2__
- const avxf tnear_x = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset+near_x*2), idir.x, org_idir.x);
- const avxf tnear_y = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset+near_y*2), idir.y, org_idir.y);
- const avxf tnear_z = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset+near_z*2), idir.z, org_idir.z);
- const avxf tfar_x = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset+far_x*2), idir.x, org_idir.x);
- const avxf tfar_y = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset+far_y*2), idir.y, org_idir.y);
- const avxf tfar_z = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset+far_z*2), idir.z, org_idir.z);
-
- const avxf tnear = max4(tnear_x, tnear_y, tnear_z, isect_near);
- const avxf tfar = min4(tfar_x, tfar_y, tfar_z, isect_far);
- const avxb vmask = tnear <= tfar;
- int mask = (int)movemask(vmask);
- *dist = tnear;
- return mask;
+ const avxf tnear_x = msub(
+ kernel_tex_fetch_avxf(__bvh_nodes, offset + near_x * 2), idir.x, org_idir.x);
+ const avxf tnear_y = msub(
+ kernel_tex_fetch_avxf(__bvh_nodes, offset + near_y * 2), idir.y, org_idir.y);
+ const avxf tnear_z = msub(
+ kernel_tex_fetch_avxf(__bvh_nodes, offset + near_z * 2), idir.z, org_idir.z);
+ const avxf tfar_x = msub(
+ kernel_tex_fetch_avxf(__bvh_nodes, offset + far_x * 2), idir.x, org_idir.x);
+ const avxf tfar_y = msub(
+ kernel_tex_fetch_avxf(__bvh_nodes, offset + far_y * 2), idir.y, org_idir.y);
+ const avxf tfar_z = msub(
+ kernel_tex_fetch_avxf(__bvh_nodes, offset + far_z * 2), idir.z, org_idir.z);
+
+ const avxf tnear = max4(tnear_x, tnear_y, tnear_z, isect_near);
+ const avxf tfar = min4(tfar_x, tfar_y, tfar_z, isect_far);
+ const avxb vmask = tnear <= tfar;
+ int mask = (int)movemask(vmask);
+ *dist = tnear;
+ return mask;
#else
- return 0;
+ return 0;
#endif
}
-ccl_device_inline int obvh_aligned_node_intersect_robust(
- KernelGlobals *ccl_restrict kg,
- const avxf& isect_near,
- const avxf& isect_far,
+ccl_device_inline int obvh_aligned_node_intersect_robust(KernelGlobals *ccl_restrict kg,
+ const avxf &isect_near,
+ const avxf &isect_far,
#ifdef __KERNEL_AVX2__
- const avx3f& P_idir,
+ const avx3f &P_idir,
#else
- const avx3f& P,
+ const avx3f &P,
#endif
- const avx3f& idir,
- const int near_x,
- const int near_y,
- const int near_z,
- const int far_x,
- const int far_y,
- const int far_z,
- const int node_addr,
- const float difl,
- avxf *ccl_restrict dist)
+ const avx3f &idir,
+ const int near_x,
+ const int near_y,
+ const int near_z,
+ const int far_x,
+ const int far_y,
+ const int far_z,
+ const int node_addr,
+ const float difl,
+ avxf *ccl_restrict dist)
{
- const int offset = node_addr + 2;
+ const int offset = node_addr + 2;
#ifdef __KERNEL_AVX2__
- const avxf tnear_x = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset + near_x * 2), idir.x, P_idir.x);
- const avxf tfar_x = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset + far_x * 2), idir.x, P_idir.x);
- const avxf tnear_y = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset + near_y * 2), idir.y, P_idir.y);
- const avxf tfar_y = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset + far_y * 2), idir.y, P_idir.y);
- const avxf tnear_z = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset + near_z * 2), idir.z, P_idir.z);
- const avxf tfar_z = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset + far_z * 2), idir.z, P_idir.z);
-
- const float round_down = 1.0f - difl;
- const float round_up = 1.0f + difl;
- const avxf tnear = max4(tnear_x, tnear_y, tnear_z, isect_near);
- const avxf tfar = min4(tfar_x, tfar_y, tfar_z, isect_far);
- const avxb vmask = round_down*tnear <= round_up*tfar;
- int mask = (int)movemask(vmask);
- *dist = tnear;
- return mask;
+ const avxf tnear_x = msub(
+ kernel_tex_fetch_avxf(__bvh_nodes, offset + near_x * 2), idir.x, P_idir.x);
+ const avxf tfar_x = msub(
+ kernel_tex_fetch_avxf(__bvh_nodes, offset + far_x * 2), idir.x, P_idir.x);
+ const avxf tnear_y = msub(
+ kernel_tex_fetch_avxf(__bvh_nodes, offset + near_y * 2), idir.y, P_idir.y);
+ const avxf tfar_y = msub(
+ kernel_tex_fetch_avxf(__bvh_nodes, offset + far_y * 2), idir.y, P_idir.y);
+ const avxf tnear_z = msub(
+ kernel_tex_fetch_avxf(__bvh_nodes, offset + near_z * 2), idir.z, P_idir.z);
+ const avxf tfar_z = msub(
+ kernel_tex_fetch_avxf(__bvh_nodes, offset + far_z * 2), idir.z, P_idir.z);
+
+ const float round_down = 1.0f - difl;
+ const float round_up = 1.0f + difl;
+ const avxf tnear = max4(tnear_x, tnear_y, tnear_z, isect_near);
+ const avxf tfar = min4(tfar_x, tfar_y, tfar_z, isect_far);
+ const avxb vmask = round_down * tnear <= round_up * tfar;
+ int mask = (int)movemask(vmask);
+ *dist = tnear;
+ return mask;
#else
- return 0;
+ return 0;
#endif
}
/* Unaligned nodes intersection */
-ccl_device_inline int obvh_unaligned_node_intersect(
- KernelGlobals *ccl_restrict kg,
- const avxf& isect_near,
- const avxf& isect_far,
+ccl_device_inline int obvh_unaligned_node_intersect(KernelGlobals *ccl_restrict kg,
+ const avxf &isect_near,
+ const avxf &isect_far,
#ifdef __KERNEL_AVX2__
- const avx3f& org_idir,
+ const avx3f &org_idir,
#endif
- const avx3f& org,
- const avx3f& dir,
- const avx3f& idir,
- const int near_x,
- const int near_y,
- const int near_z,
- const int far_x,
- const int far_y,
- const int far_z,
- const int node_addr,
- avxf *ccl_restrict dist)
+ const avx3f &org,
+ const avx3f &dir,
+ const avx3f &idir,
+ const int near_x,
+ const int near_y,
+ const int near_z,
+ const int far_x,
+ const int far_y,
+ const int far_z,
+ const int node_addr,
+ avxf *ccl_restrict dist)
{
- const int offset = node_addr;
- const avxf tfm_x_x = kernel_tex_fetch_avxf(__bvh_nodes, offset+2);
- const avxf tfm_x_y = kernel_tex_fetch_avxf(__bvh_nodes, offset+4);
- const avxf tfm_x_z = kernel_tex_fetch_avxf(__bvh_nodes, offset+6);
-
- const avxf tfm_y_x = kernel_tex_fetch_avxf(__bvh_nodes, offset+8);
- const avxf tfm_y_y = kernel_tex_fetch_avxf(__bvh_nodes, offset+10);
- const avxf tfm_y_z = kernel_tex_fetch_avxf(__bvh_nodes, offset+12);
-
- const avxf tfm_z_x = kernel_tex_fetch_avxf(__bvh_nodes, offset+14);
- const avxf tfm_z_y = kernel_tex_fetch_avxf(__bvh_nodes, offset+16);
- const avxf tfm_z_z = kernel_tex_fetch_avxf(__bvh_nodes, offset+18);
-
- const avxf tfm_t_x = kernel_tex_fetch_avxf(__bvh_nodes, offset+20);
- const avxf tfm_t_y = kernel_tex_fetch_avxf(__bvh_nodes, offset+22);
- const avxf tfm_t_z = kernel_tex_fetch_avxf(__bvh_nodes, offset+24);
-
- const avxf aligned_dir_x = dir.x*tfm_x_x + dir.y*tfm_x_y + dir.z*tfm_x_z,
- aligned_dir_y = dir.x*tfm_y_x + dir.y*tfm_y_y + dir.z*tfm_y_z,
- aligned_dir_z = dir.x*tfm_z_x + dir.y*tfm_z_y + dir.z*tfm_z_z;
-
- const avxf aligned_P_x = org.x*tfm_x_x + org.y*tfm_x_y + org.z*tfm_x_z + tfm_t_x,
- aligned_P_y = org.x*tfm_y_x + org.y*tfm_y_y + org.z*tfm_y_z + tfm_t_y,
- aligned_P_z = org.x*tfm_z_x + org.y*tfm_z_y + org.z*tfm_z_z + tfm_t_z;
-
- const avxf neg_one(-1.0f);
- const avxf nrdir_x = neg_one / aligned_dir_x,
- nrdir_y = neg_one / aligned_dir_y,
- nrdir_z = neg_one / aligned_dir_z;
-
- const avxf tlower_x = aligned_P_x * nrdir_x,
- tlower_y = aligned_P_y * nrdir_y,
- tlower_z = aligned_P_z * nrdir_z;
-
- const avxf tupper_x = tlower_x - nrdir_x,
- tupper_y = tlower_y - nrdir_y,
- tupper_z = tlower_z - nrdir_z;
-
- const avxf tnear_x = min(tlower_x, tupper_x);
- const avxf tnear_y = min(tlower_y, tupper_y);
- const avxf tnear_z = min(tlower_z, tupper_z);
- const avxf tfar_x = max(tlower_x, tupper_x);
- const avxf tfar_y = max(tlower_y, tupper_y);
- const avxf tfar_z = max(tlower_z, tupper_z);
- const avxf tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
- const avxf tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
- const avxb vmask = tnear <= tfar;
- *dist = tnear;
- return movemask(vmask);
+ const int offset = node_addr;
+ const avxf tfm_x_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 2);
+ const avxf tfm_x_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 4);
+ const avxf tfm_x_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 6);
+
+ const avxf tfm_y_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 8);
+ const avxf tfm_y_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 10);
+ const avxf tfm_y_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 12);
+
+ const avxf tfm_z_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 14);
+ const avxf tfm_z_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 16);
+ const avxf tfm_z_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 18);
+
+ const avxf tfm_t_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 20);
+ const avxf tfm_t_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 22);
+ const avxf tfm_t_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 24);
+
+ const avxf aligned_dir_x = dir.x * tfm_x_x + dir.y * tfm_x_y + dir.z * tfm_x_z,
+ aligned_dir_y = dir.x * tfm_y_x + dir.y * tfm_y_y + dir.z * tfm_y_z,
+ aligned_dir_z = dir.x * tfm_z_x + dir.y * tfm_z_y + dir.z * tfm_z_z;
+
+ const avxf aligned_P_x = org.x * tfm_x_x + org.y * tfm_x_y + org.z * tfm_x_z + tfm_t_x,
+ aligned_P_y = org.x * tfm_y_x + org.y * tfm_y_y + org.z * tfm_y_z + tfm_t_y,
+ aligned_P_z = org.x * tfm_z_x + org.y * tfm_z_y + org.z * tfm_z_z + tfm_t_z;
+
+ const avxf neg_one(-1.0f);
+ const avxf nrdir_x = neg_one / aligned_dir_x, nrdir_y = neg_one / aligned_dir_y,
+ nrdir_z = neg_one / aligned_dir_z;
+
+ const avxf tlower_x = aligned_P_x * nrdir_x, tlower_y = aligned_P_y * nrdir_y,
+ tlower_z = aligned_P_z * nrdir_z;
+
+ const avxf tupper_x = tlower_x - nrdir_x, tupper_y = tlower_y - nrdir_y,
+ tupper_z = tlower_z - nrdir_z;
+
+ const avxf tnear_x = min(tlower_x, tupper_x);
+ const avxf tnear_y = min(tlower_y, tupper_y);
+ const avxf tnear_z = min(tlower_z, tupper_z);
+ const avxf tfar_x = max(tlower_x, tupper_x);
+ const avxf tfar_y = max(tlower_y, tupper_y);
+ const avxf tfar_z = max(tlower_z, tupper_z);
+ const avxf tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
+ const avxf tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
+ const avxb vmask = tnear <= tfar;
+ *dist = tnear;
+ return movemask(vmask);
}
-ccl_device_inline int obvh_unaligned_node_intersect_robust(
- KernelGlobals *ccl_restrict kg,
- const avxf& isect_near,
- const avxf& isect_far,
+ccl_device_inline int obvh_unaligned_node_intersect_robust(KernelGlobals *ccl_restrict kg,
+ const avxf &isect_near,
+ const avxf &isect_far,
#ifdef __KERNEL_AVX2__
- const avx3f& P_idir,
+ const avx3f &P_idir,
#endif
- const avx3f& P,
- const avx3f& dir,
- const avx3f& idir,
- const int near_x,
- const int near_y,
- const int near_z,
- const int far_x,
- const int far_y,
- const int far_z,
- const int node_addr,
- const float difl,
- avxf *ccl_restrict dist)
+ const avx3f &P,
+ const avx3f &dir,
+ const avx3f &idir,
+ const int near_x,
+ const int near_y,
+ const int near_z,
+ const int far_x,
+ const int far_y,
+ const int far_z,
+ const int node_addr,
+ const float difl,
+ avxf *ccl_restrict dist)
{
- const int offset = node_addr;
- const avxf tfm_x_x = kernel_tex_fetch_avxf(__bvh_nodes, offset+2);
- const avxf tfm_x_y = kernel_tex_fetch_avxf(__bvh_nodes, offset+4);
- const avxf tfm_x_z = kernel_tex_fetch_avxf(__bvh_nodes, offset+6);
-
- const avxf tfm_y_x = kernel_tex_fetch_avxf(__bvh_nodes, offset+8);
- const avxf tfm_y_y = kernel_tex_fetch_avxf(__bvh_nodes, offset+10);
- const avxf tfm_y_z = kernel_tex_fetch_avxf(__bvh_nodes, offset+12);
-
- const avxf tfm_z_x = kernel_tex_fetch_avxf(__bvh_nodes, offset+14);
- const avxf tfm_z_y = kernel_tex_fetch_avxf(__bvh_nodes, offset+16);
- const avxf tfm_z_z = kernel_tex_fetch_avxf(__bvh_nodes, offset+18);
-
- const avxf tfm_t_x = kernel_tex_fetch_avxf(__bvh_nodes, offset+20);
- const avxf tfm_t_y = kernel_tex_fetch_avxf(__bvh_nodes, offset+22);
- const avxf tfm_t_z = kernel_tex_fetch_avxf(__bvh_nodes, offset+24);
-
- const avxf aligned_dir_x = dir.x*tfm_x_x + dir.y*tfm_x_y + dir.z*tfm_x_z,
- aligned_dir_y = dir.x*tfm_y_x + dir.y*tfm_y_y + dir.z*tfm_y_z,
- aligned_dir_z = dir.x*tfm_z_x + dir.y*tfm_z_y + dir.z*tfm_z_z;
-
- const avxf aligned_P_x = P.x*tfm_x_x + P.y*tfm_x_y + P.z*tfm_x_z + tfm_t_x,
- aligned_P_y = P.x*tfm_y_x + P.y*tfm_y_y + P.z*tfm_y_z + tfm_t_y,
- aligned_P_z = P.x*tfm_z_x + P.y*tfm_z_y + P.z*tfm_z_z + tfm_t_z;
-
- const avxf neg_one(-1.0f);
- const avxf nrdir_x = neg_one / aligned_dir_x,
- nrdir_y = neg_one / aligned_dir_y,
- nrdir_z = neg_one / aligned_dir_z;
-
- const avxf tlower_x = aligned_P_x * nrdir_x,
- tlower_y = aligned_P_y * nrdir_y,
- tlower_z = aligned_P_z * nrdir_z;
-
- const avxf tupper_x = tlower_x - nrdir_x,
- tupper_y = tlower_y - nrdir_y,
- tupper_z = tlower_z - nrdir_z;
-
- const float round_down = 1.0f - difl;
- const float round_up = 1.0f + difl;
-
- const avxf tnear_x = min(tlower_x, tupper_x);
- const avxf tnear_y = min(tlower_y, tupper_y);
- const avxf tnear_z = min(tlower_z, tupper_z);
- const avxf tfar_x = max(tlower_x, tupper_x);
- const avxf tfar_y = max(tlower_y, tupper_y);
- const avxf tfar_z = max(tlower_z, tupper_z);
-
- const avxf tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
- const avxf tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
- const avxb vmask = round_down*tnear <= round_up*tfar;
- *dist = tnear;
- return movemask(vmask);
+ const int offset = node_addr;
+ const avxf tfm_x_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 2);
+ const avxf tfm_x_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 4);
+ const avxf tfm_x_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 6);
+
+ const avxf tfm_y_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 8);
+ const avxf tfm_y_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 10);
+ const avxf tfm_y_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 12);
+
+ const avxf tfm_z_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 14);
+ const avxf tfm_z_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 16);
+ const avxf tfm_z_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 18);
+
+ const avxf tfm_t_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 20);
+ const avxf tfm_t_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 22);
+ const avxf tfm_t_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 24);
+
+ const avxf aligned_dir_x = dir.x * tfm_x_x + dir.y * tfm_x_y + dir.z * tfm_x_z,
+ aligned_dir_y = dir.x * tfm_y_x + dir.y * tfm_y_y + dir.z * tfm_y_z,
+ aligned_dir_z = dir.x * tfm_z_x + dir.y * tfm_z_y + dir.z * tfm_z_z;
+
+ const avxf aligned_P_x = P.x * tfm_x_x + P.y * tfm_x_y + P.z * tfm_x_z + tfm_t_x,
+ aligned_P_y = P.x * tfm_y_x + P.y * tfm_y_y + P.z * tfm_y_z + tfm_t_y,
+ aligned_P_z = P.x * tfm_z_x + P.y * tfm_z_y + P.z * tfm_z_z + tfm_t_z;
+
+ const avxf neg_one(-1.0f);
+ const avxf nrdir_x = neg_one / aligned_dir_x, nrdir_y = neg_one / aligned_dir_y,
+ nrdir_z = neg_one / aligned_dir_z;
+
+ const avxf tlower_x = aligned_P_x * nrdir_x, tlower_y = aligned_P_y * nrdir_y,
+ tlower_z = aligned_P_z * nrdir_z;
+
+ const avxf tupper_x = tlower_x - nrdir_x, tupper_y = tlower_y - nrdir_y,
+ tupper_z = tlower_z - nrdir_z;
+
+ const float round_down = 1.0f - difl;
+ const float round_up = 1.0f + difl;
+
+ const avxf tnear_x = min(tlower_x, tupper_x);
+ const avxf tnear_y = min(tlower_y, tupper_y);
+ const avxf tnear_z = min(tlower_z, tupper_z);
+ const avxf tfar_x = max(tlower_x, tupper_x);
+ const avxf tfar_y = max(tlower_y, tupper_y);
+ const avxf tfar_z = max(tlower_z, tupper_z);
+
+ const avxf tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
+ const avxf tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
+ const avxb vmask = round_down * tnear <= round_up * tfar;
+ *dist = tnear;
+ return movemask(vmask);
}
/* Intersectors wrappers.
@@ -422,111 +467,125 @@ ccl_device_inline int obvh_unaligned_node_intersect_robust(
* They'll check node type and call appropriate intersection code.
*/
-ccl_device_inline int obvh_node_intersect(
- KernelGlobals *ccl_restrict kg,
- const avxf& isect_near,
- const avxf& isect_far,
+ccl_device_inline int obvh_node_intersect(KernelGlobals *ccl_restrict kg,
+ const avxf &isect_near,
+ const avxf &isect_far,
#ifdef __KERNEL_AVX2__
- const avx3f& org_idir,
+ const avx3f &org_idir,
#endif
- const avx3f& org,
- const avx3f& dir,
- const avx3f& idir,
- const int near_x,
- const int near_y,
- const int near_z,
- const int far_x,
- const int far_y,
- const int far_z,
- const int node_addr,
- avxf *ccl_restrict dist)
+ const avx3f &org,
+ const avx3f &dir,
+ const avx3f &idir,
+ const int near_x,
+ const int near_y,
+ const int near_z,
+ const int far_x,
+ const int far_y,
+ const int far_z,
+ const int node_addr,
+ avxf *ccl_restrict dist)
{
- const int offset = node_addr;
- const float4 node = kernel_tex_fetch(__bvh_nodes, offset);
- if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
- return obvh_unaligned_node_intersect(kg,
- isect_near,
- isect_far,
+ const int offset = node_addr;
+ const float4 node = kernel_tex_fetch(__bvh_nodes, offset);
+ if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
+ return obvh_unaligned_node_intersect(kg,
+ isect_near,
+ isect_far,
#ifdef __KERNEL_AVX2__
- org_idir,
+ org_idir,
#endif
- org,
- dir,
- idir,
- near_x, near_y, near_z,
- far_x, far_y, far_z,
- node_addr,
- dist);
- }
- else {
- return obvh_aligned_node_intersect(kg,
- isect_near,
- isect_far,
+ org,
+ dir,
+ idir,
+ near_x,
+ near_y,
+ near_z,
+ far_x,
+ far_y,
+ far_z,
+ node_addr,
+ dist);
+ }
+ else {
+ return obvh_aligned_node_intersect(kg,
+ isect_near,
+ isect_far,
#ifdef __KERNEL_AVX2__
- org_idir,
+ org_idir,
#else
- org,
+ org,
#endif
- idir,
- near_x, near_y, near_z,
- far_x, far_y, far_z,
- node_addr,
- dist);
- }
+ idir,
+ near_x,
+ near_y,
+ near_z,
+ far_x,
+ far_y,
+ far_z,
+ node_addr,
+ dist);
+ }
}
-ccl_device_inline int obvh_node_intersect_robust(
- KernelGlobals *ccl_restrict kg,
- const avxf& isect_near,
- const avxf& isect_far,
+ccl_device_inline int obvh_node_intersect_robust(KernelGlobals *ccl_restrict kg,
+ const avxf &isect_near,
+ const avxf &isect_far,
#ifdef __KERNEL_AVX2__
- const avx3f& P_idir,
+ const avx3f &P_idir,
#endif
- const avx3f& P,
- const avx3f& dir,
- const avx3f& idir,
- const int near_x,
- const int near_y,
- const int near_z,
- const int far_x,
- const int far_y,
- const int far_z,
- const int node_addr,
- const float difl,
- avxf *ccl_restrict dist)
+ const avx3f &P,
+ const avx3f &dir,
+ const avx3f &idir,
+ const int near_x,
+ const int near_y,
+ const int near_z,
+ const int far_x,
+ const int far_y,
+ const int far_z,
+ const int node_addr,
+ const float difl,
+ avxf *ccl_restrict dist)
{
- const int offset = node_addr;
- const float4 node = kernel_tex_fetch(__bvh_nodes, offset);
- if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
- return obvh_unaligned_node_intersect_robust(kg,
- isect_near,
- isect_far,
+ const int offset = node_addr;
+ const float4 node = kernel_tex_fetch(__bvh_nodes, offset);
+ if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
+ return obvh_unaligned_node_intersect_robust(kg,
+ isect_near,
+ isect_far,
#ifdef __KERNEL_AVX2__
- P_idir,
+ P_idir,
#endif
- P,
- dir,
- idir,
- near_x, near_y, near_z,
- far_x, far_y, far_z,
- node_addr,
- difl,
- dist);
- }
- else {
- return obvh_aligned_node_intersect_robust(kg,
- isect_near,
- isect_far,
+ P,
+ dir,
+ idir,
+ near_x,
+ near_y,
+ near_z,
+ far_x,
+ far_y,
+ far_z,
+ node_addr,
+ difl,
+ dist);
+ }
+ else {
+ return obvh_aligned_node_intersect_robust(kg,
+ isect_near,
+ isect_far,
#ifdef __KERNEL_AVX2__
- P_idir,
+ P_idir,
#else
- P,
+ P,
#endif
- idir,
- near_x, near_y, near_z,
- far_x, far_y, far_z,
- node_addr,
- difl,
- dist);
- }
+ idir,
+ near_x,
+ near_y,
+ near_z,
+ far_x,
+ far_y,
+ far_z,
+ node_addr,
+ difl,
+ dist);
+ }
}
diff --git a/intern/cycles/kernel/bvh/obvh_shadow_all.h b/intern/cycles/kernel/bvh/obvh_shadow_all.h
index 10d5422c31c..98efb003788 100644
--- a/intern/cycles/kernel/bvh/obvh_shadow_all.h
+++ b/intern/cycles/kernel/bvh/obvh_shadow_all.h
@@ -36,645 +36,635 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
const uint max_hits,
uint *num_hits)
{
- /* TODO(sergey):
- * - Test if pushing distance on the stack helps.
- * - Likely and unlikely for if() statements.
- * - Test restrict attribute for pointers.
- */
-
- /* Traversal stack in CUDA thread-local memory. */
- OBVHStackItem traversal_stack[BVH_OSTACK_SIZE];
- traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
-
- /* Traversal variables in registers. */
- int stack_ptr = 0;
- int node_addr = kernel_data.bvh.root;
-
- /* Ray parameters in registers. */
- const float tmax = ray->t;
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
- float isect_t = tmax;
+ /* TODO(sergey):
+ * - Test if pushing distance on the stack helps.
+ * - Likely and unlikely for if() statements.
+ * - Test restrict attribute for pointers.
+ */
+
+ /* Traversal stack in CUDA thread-local memory. */
+ OBVHStackItem traversal_stack[BVH_OSTACK_SIZE];
+ traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
+
+ /* Traversal variables in registers. */
+ int stack_ptr = 0;
+ int node_addr = kernel_data.bvh.root;
+
+ /* Ray parameters in registers. */
+ const float tmax = ray->t;
+ float3 P = ray->P;
+ float3 dir = bvh_clamp_direction(ray->D);
+ float3 idir = bvh_inverse_direction(dir);
+ int object = OBJECT_NONE;
+ float isect_t = tmax;
#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
+ Transform ob_itfm;
#endif
- *num_hits = 0;
- isect_array->t = tmax;
+ *num_hits = 0;
+ isect_array->t = tmax;
#if BVH_FEATURE(BVH_INSTANCING)
- int num_hits_in_instance = 0;
+ int num_hits_in_instance = 0;
#endif
- avxf tnear(0.0f), tfar(isect_t);
+ avxf tnear(0.0f), tfar(isect_t);
#if BVH_FEATURE(BVH_HAIR)
- avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z));
+ avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z));
#endif
- avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
+ avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
#ifdef __KERNEL_AVX2__
- float3 P_idir = P*idir;
- avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
+ float3 P_idir = P * idir;
+ avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
#endif
#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- avx3f org4(avxf(P.x), avxf(P.y), avxf(P.z));
+ avx3f org4(avxf(P.x), avxf(P.y), avxf(P.z));
#endif
- /* Offsets to select the side that becomes the lower or upper bound. */
- int near_x, near_y, near_z;
- int far_x, far_y, far_z;
- obvh_near_far_idx_calc(idir,
- &near_x, &near_y, &near_z,
- &far_x, &far_y, &far_z);
-
- /* Traversal loop. */
- do {
- do {
- /* Traverse internal nodes. */
- while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
- float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
- (void) inodes;
-
- if(false
+ /* Offsets to select the side that becomes the lower or upper bound. */
+ int near_x, near_y, near_z;
+ int far_x, far_y, far_z;
+ obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
+
+ /* Traversal loop. */
+ do {
+ do {
+ /* Traverse internal nodes. */
+ while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
+ float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
+ (void)inodes;
+
+ if (false
#ifdef __VISIBILITY_FLAG__
- || ((__float_as_uint(inodes.x) & PATH_RAY_SHADOW) == 0)
+ || ((__float_as_uint(inodes.x) & PATH_RAY_SHADOW) == 0)
#endif
#if BVH_FEATURE(BVH_MOTION)
- || UNLIKELY(ray->time < inodes.y)
- || UNLIKELY(ray->time > inodes.z)
+ || UNLIKELY(ray->time < inodes.y) || UNLIKELY(ray->time > inodes.z)
#endif
- ) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- avxf dist;
- int child_mask = NODE_INTERSECT(kg,
- tnear,
- tfar,
+ ) {
+ /* Pop. */
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ continue;
+ }
+
+ avxf dist;
+ int child_mask = NODE_INTERSECT(kg,
+ tnear,
+ tfar,
#ifdef __KERNEL_AVX2__
- P_idir4,
+ P_idir4,
#endif
#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-//#if !defined(__KERNEL_AVX2__)
- org4,
+ //#if !defined(__KERNEL_AVX2__)
+ org4,
#endif
#if BVH_FEATURE(BVH_HAIR)
- dir4,
+ dir4,
#endif
- idir4,
- near_x, near_y, near_z,
- far_x, far_y, far_z,
- node_addr,
- &dist);
-
- if(child_mask != 0) {
- avxf cnodes;
+ idir4,
+ near_x,
+ near_y,
+ near_z,
+ far_x,
+ far_y,
+ far_z,
+ node_addr,
+ &dist);
+
+ if (child_mask != 0) {
+ avxf cnodes;
#if BVH_FEATURE(BVH_HAIR)
- if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
- cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr+26);
- }
- else
+ if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
+ cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 26);
+ }
+ else
#endif
- {
- cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr+14);
- }
-
- /* One child is hit, continue with that child. */
- int r = __bscf(child_mask);
- if(child_mask == 0) {
- node_addr = __float_as_int(cnodes[r]);
- continue;
- }
-
- /* Two children are hit, push far child, and continue with
- * closer child.
- */
- int c0 = __float_as_int(cnodes[r]);
- float d0 = ((float*)&dist)[r];
- r = __bscf(child_mask);
- int c1 = __float_as_int(cnodes[r]);
- float d1 = ((float*)&dist)[r];
- if(child_mask == 0) {
- if(d1 < d0) {
- node_addr = c1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
- continue;
- }
- else {
- node_addr = c0;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- continue;
- }
- }
-
- /* Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there.
- */
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
-
- /* Three children are hit, push all onto stack and sort 3
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c2 = __float_as_int(cnodes[r]);
- float d2 = ((float*)&dist)[r];
- if(child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Four children are hit, push all onto stack and sort 4
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c3 = __float_as_int(cnodes[r]);
- float d3 = ((float*)&dist)[r];
- if(child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
-
- /* Five children are hit, push all onto stack and sort 5
- * stack items, continue with closest child
- */
- r = __bscf(child_mask);
- int c4 = __float_as_int(cnodes[r]);
- float d4 = ((float*)&dist)[r];
- if(child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Six children are hit, push all onto stack and sort 6
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c5 = __float_as_int(cnodes[r]);
- float d5 = ((float*)&dist)[r];
- if(child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c5;
- traversal_stack[stack_ptr].dist = d5;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c5;
- traversal_stack[stack_ptr].dist = d5;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
-
- /* Seven children are hit, push all onto stack and sort 7
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c6 = __float_as_int(cnodes[r]);
- float d6 = ((float*)&dist)[r];
- if(child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c6;
- traversal_stack[stack_ptr].dist = d6;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5],
- &traversal_stack[stack_ptr - 6]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Eight children are hit, push all onto stack and sort 8
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c7 = __float_as_int(cnodes[r]);
- float d7 = ((float*)&dist)[r];
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c7;
- traversal_stack[stack_ptr].dist = d7;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c6;
- traversal_stack[stack_ptr].dist = d6;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5],
- &traversal_stack[stack_ptr - 6],
- &traversal_stack[stack_ptr - 7]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-
- /* If node is leaf, fetch triangle list. */
- if(node_addr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
+ {
+ cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 14);
+ }
+
+ /* One child is hit, continue with that child. */
+ int r = __bscf(child_mask);
+ if (child_mask == 0) {
+ node_addr = __float_as_int(cnodes[r]);
+ continue;
+ }
+
+ /* Two children are hit, push far child, and continue with
+ * closer child.
+ */
+ int c0 = __float_as_int(cnodes[r]);
+ float d0 = ((float *)&dist)[r];
+ r = __bscf(child_mask);
+ int c1 = __float_as_int(cnodes[r]);
+ float d1 = ((float *)&dist)[r];
+ if (child_mask == 0) {
+ if (d1 < d0) {
+ node_addr = c1;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c0;
+ traversal_stack[stack_ptr].dist = d0;
+ continue;
+ }
+ else {
+ node_addr = c0;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c1;
+ traversal_stack[stack_ptr].dist = d1;
+ continue;
+ }
+ }
+
+ /* Here starts the slow path for 3 or 4 hit children. We push
+ * all nodes onto the stack to sort them there.
+ */
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c1;
+ traversal_stack[stack_ptr].dist = d1;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c0;
+ traversal_stack[stack_ptr].dist = d0;
+
+ /* Three children are hit, push all onto stack and sort 3
+ * stack items, continue with closest child.
+ */
+ r = __bscf(child_mask);
+ int c2 = __float_as_int(cnodes[r]);
+ float d2 = ((float *)&dist)[r];
+ if (child_mask == 0) {
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c2;
+ traversal_stack[stack_ptr].dist = d2;
+ obvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2]);
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ continue;
+ }
+
+ /* Four children are hit, push all onto stack and sort 4
+ * stack items, continue with closest child.
+ */
+ r = __bscf(child_mask);
+ int c3 = __float_as_int(cnodes[r]);
+ float d3 = ((float *)&dist)[r];
+ if (child_mask == 0) {
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c3;
+ traversal_stack[stack_ptr].dist = d3;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c2;
+ traversal_stack[stack_ptr].dist = d2;
+ obvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2],
+ &traversal_stack[stack_ptr - 3]);
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ continue;
+ }
+
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c3;
+ traversal_stack[stack_ptr].dist = d3;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c2;
+ traversal_stack[stack_ptr].dist = d2;
+
+ /* Five children are hit, push all onto stack and sort 5
+ * stack items, continue with closest child
+ */
+ r = __bscf(child_mask);
+ int c4 = __float_as_int(cnodes[r]);
+ float d4 = ((float *)&dist)[r];
+ if (child_mask == 0) {
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c4;
+ traversal_stack[stack_ptr].dist = d4;
+ obvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2],
+ &traversal_stack[stack_ptr - 3],
+ &traversal_stack[stack_ptr - 4]);
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ continue;
+ }
+
+ /* Six children are hit, push all onto stack and sort 6
+ * stack items, continue with closest child.
+ */
+ r = __bscf(child_mask);
+ int c5 = __float_as_int(cnodes[r]);
+ float d5 = ((float *)&dist)[r];
+ if (child_mask == 0) {
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c5;
+ traversal_stack[stack_ptr].dist = d5;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c4;
+ traversal_stack[stack_ptr].dist = d4;
+ obvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2],
+ &traversal_stack[stack_ptr - 3],
+ &traversal_stack[stack_ptr - 4],
+ &traversal_stack[stack_ptr - 5]);
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ continue;
+ }
+
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c5;
+ traversal_stack[stack_ptr].dist = d5;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c4;
+ traversal_stack[stack_ptr].dist = d4;
+
+ /* Seven children are hit, push all onto stack and sort 7
+ * stack items, continue with closest child.
+ */
+ r = __bscf(child_mask);
+ int c6 = __float_as_int(cnodes[r]);
+ float d6 = ((float *)&dist)[r];
+ if (child_mask == 0) {
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c6;
+ traversal_stack[stack_ptr].dist = d6;
+ obvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2],
+ &traversal_stack[stack_ptr - 3],
+ &traversal_stack[stack_ptr - 4],
+ &traversal_stack[stack_ptr - 5],
+ &traversal_stack[stack_ptr - 6]);
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ continue;
+ }
+
+ /* Eight children are hit, push all onto stack and sort 8
+ * stack items, continue with closest child.
+ */
+ r = __bscf(child_mask);
+ int c7 = __float_as_int(cnodes[r]);
+ float d7 = ((float *)&dist)[r];
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c7;
+ traversal_stack[stack_ptr].dist = d7;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c6;
+ traversal_stack[stack_ptr].dist = d6;
+ obvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2],
+ &traversal_stack[stack_ptr - 3],
+ &traversal_stack[stack_ptr - 4],
+ &traversal_stack[stack_ptr - 5],
+ &traversal_stack[stack_ptr - 6],
+ &traversal_stack[stack_ptr - 7]);
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ continue;
+ }
+
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ }
+
+ /* If node is leaf, fetch triangle list. */
+ if (node_addr < 0) {
+ float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
#ifdef __VISIBILITY_FLAG__
- if((__float_as_uint(leaf.z) & PATH_RAY_SHADOW) == 0) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
+ if ((__float_as_uint(leaf.z) & PATH_RAY_SHADOW) == 0) {
+ /* Pop. */
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ continue;
+ }
#endif
- int prim_addr = __float_as_int(leaf.x);
+ int prim_addr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
- if(prim_addr >= 0) {
+ if (prim_addr >= 0) {
#endif
- int prim_addr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
- const uint p_type = type & PRIMITIVE_ALL;
-
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
-
- /* Primitive intersection. */
- if(p_type == PRIMITIVE_TRIANGLE) {
- int prim_count = prim_addr2 - prim_addr;
- if(prim_count < 3) {
- while(prim_addr < prim_addr2) {
- kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type);
- int hit = triangle_intersect(kg,
- isect_array,
- P,
- dir,
- PATH_RAY_SHADOW,
- object,
- prim_addr);
- /* Shadow ray early termination. */
- if(hit) {
- /* detect if this surface has a shader with transparent shadows */
-
- /* todo: optimize so primitive visibility flag indicates if
- * the primitive has a transparent shadow shader? */
- int prim = kernel_tex_fetch(__prim_index, isect_array->prim);
- int shader = 0;
+ int prim_addr2 = __float_as_int(leaf.y);
+ const uint type = __float_as_int(leaf.w);
+ const uint p_type = type & PRIMITIVE_ALL;
+
+ /* Pop. */
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+
+ /* Primitive intersection. */
+ if (p_type == PRIMITIVE_TRIANGLE) {
+ int prim_count = prim_addr2 - prim_addr;
+ if (prim_count < 3) {
+ while (prim_addr < prim_addr2) {
+ kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) ==
+ p_type);
+ int hit = triangle_intersect(
+ kg, isect_array, P, dir, PATH_RAY_SHADOW, object, prim_addr);
+ /* Shadow ray early termination. */
+ if (hit) {
+ /* detect if this surface has a shader with transparent shadows */
+
+ /* todo: optimize so primitive visibility flag indicates if
+ * the primitive has a transparent shadow shader? */
+ int prim = kernel_tex_fetch(__prim_index, isect_array->prim);
+ int shader = 0;
#ifdef __HAIR__
- if(kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
+ if (kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
#endif
- {
- shader = kernel_tex_fetch(__tri_shader, prim);
- }
+ {
+ shader = kernel_tex_fetch(__tri_shader, prim);
+ }
#ifdef __HAIR__
- else {
- float4 str = kernel_tex_fetch(__curves, prim);
- shader = __float_as_int(str.z);
- }
+ else {
+ float4 str = kernel_tex_fetch(__curves, prim);
+ shader = __float_as_int(str.z);
+ }
#endif
- int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
-
- /* if no transparent shadows, all light is blocked */
- if(!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
- return true;
- }
- /* if maximum number of hits reached, block all light */
- else if(*num_hits == max_hits) {
- return true;
- }
-
- /* move on to next entry in intersections array */
- isect_array++;
- (*num_hits)++;
+ int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
+
+ /* if no transparent shadows, all light is blocked */
+ if (!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
+ return true;
+ }
+ /* if maximum number of hits reached, block all light */
+ else if (*num_hits == max_hits) {
+ return true;
+ }
+
+ /* move on to next entry in intersections array */
+ isect_array++;
+ (*num_hits)++;
#if BVH_FEATURE(BVH_INSTANCING)
- num_hits_in_instance++;
+ num_hits_in_instance++;
#endif
- isect_array->t = isect_t;
- }
+ isect_array->t = isect_t;
+ }
- prim_addr++;
- } //while
- } else {
- kernel_assert((kernel_tex_fetch(__prim_type, (prim_addr)) & PRIMITIVE_ALL) == p_type);
+ prim_addr++;
+ } //while
+ }
+ else {
+ kernel_assert((kernel_tex_fetch(__prim_type, (prim_addr)) & PRIMITIVE_ALL) ==
+ p_type);
#if BVH_FEATURE(BVH_INSTANCING)
- int* nhiptr = &num_hits_in_instance;
+ int *nhiptr = &num_hits_in_instance;
#else
- int nhi= 0;
- int *nhiptr = &nhi;
+ int nhi = 0;
+ int *nhiptr = &nhi;
#endif
- int result = triangle_intersect8(kg,
- &isect_array,
- P,
- dir,
- PATH_RAY_SHADOW,
- object,
- prim_addr,
- prim_count,
- num_hits,
- max_hits,
- nhiptr,
- isect_t);
- if(result == 2) {
- return true;
- }
- } // prim_count
- } // PRIMITIVE_TRIANGLE
- else {
- while(prim_addr < prim_addr2) {
- kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type);
+ int result = triangle_intersect8(kg,
+ &isect_array,
+ P,
+ dir,
+ PATH_RAY_SHADOW,
+ object,
+ prim_addr,
+ prim_count,
+ num_hits,
+ max_hits,
+ nhiptr,
+ isect_t);
+ if (result == 2) {
+ return true;
+ }
+ } // prim_count
+ } // PRIMITIVE_TRIANGLE
+ else {
+ while (prim_addr < prim_addr2) {
+ kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type);
#ifdef __SHADOW_TRICKS__
- uint tri_object = (object == OBJECT_NONE)
- ? kernel_tex_fetch(__prim_object, prim_addr)
- : object;
- if(tri_object == skip_object) {
- ++prim_addr;
- continue;
- }
+ uint tri_object = (object == OBJECT_NONE) ?
+ kernel_tex_fetch(__prim_object, prim_addr) :
+ object;
+ if (tri_object == skip_object) {
+ ++prim_addr;
+ continue;
+ }
#endif
- bool hit;
+ bool hit;
- /* todo: specialized intersect functions which don't fill in
- * isect unless needed and check SD_HAS_TRANSPARENT_SHADOW?
- * might give a few % performance improvement */
+ /* todo: specialized intersect functions which don't fill in
+ * isect unless needed and check SD_HAS_TRANSPARENT_SHADOW?
+ * might give a few % performance improvement */
- switch(p_type) {
+ switch (p_type) {
#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- hit = motion_triangle_intersect(kg,
- isect_array,
- P,
- dir,
- ray->time,
- PATH_RAY_SHADOW,
- object,
- prim_addr);
- break;
- }
+ case PRIMITIVE_MOTION_TRIANGLE: {
+ hit = motion_triangle_intersect(
+ kg, isect_array, P, dir, ray->time, PATH_RAY_SHADOW, object, prim_addr);
+ break;
+ }
#endif
#if BVH_FEATURE(BVH_HAIR)
- case PRIMITIVE_CURVE:
- case PRIMITIVE_MOTION_CURVE: {
- const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
- if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
- hit = cardinal_curve_intersect(kg,
- isect_array,
- P,
- dir,
- PATH_RAY_SHADOW,
- object,
- prim_addr,
- ray->time,
- curve_type,
- NULL,
- 0, 0);
- }
- else {
- hit = curve_intersect(kg,
- isect_array,
- P,
- dir,
- PATH_RAY_SHADOW,
- object,
- prim_addr,
- ray->time,
- curve_type,
- NULL,
- 0, 0);
- }
- break;
- }
+ case PRIMITIVE_CURVE:
+ case PRIMITIVE_MOTION_CURVE: {
+ const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
+ if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
+ hit = cardinal_curve_intersect(kg,
+ isect_array,
+ P,
+ dir,
+ PATH_RAY_SHADOW,
+ object,
+ prim_addr,
+ ray->time,
+ curve_type,
+ NULL,
+ 0,
+ 0);
+ }
+ else {
+ hit = curve_intersect(kg,
+ isect_array,
+ P,
+ dir,
+ PATH_RAY_SHADOW,
+ object,
+ prim_addr,
+ ray->time,
+ curve_type,
+ NULL,
+ 0,
+ 0);
+ }
+ break;
+ }
#endif
- default: {
- hit = false;
- break;
- }
- }
+ default: {
+ hit = false;
+ break;
+ }
+ }
- /* Shadow ray early termination. */
- if(hit) {
- /* detect if this surface has a shader with transparent shadows */
+ /* Shadow ray early termination. */
+ if (hit) {
+ /* detect if this surface has a shader with transparent shadows */
- /* todo: optimize so primitive visibility flag indicates if
- * the primitive has a transparent shadow shader? */
- int prim = kernel_tex_fetch(__prim_index, isect_array->prim);
- int shader = 0;
+ /* todo: optimize so primitive visibility flag indicates if
+ * the primitive has a transparent shadow shader? */
+ int prim = kernel_tex_fetch(__prim_index, isect_array->prim);
+ int shader = 0;
#ifdef __HAIR__
- if(kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
+ if (kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
#endif
- {
- shader = kernel_tex_fetch(__tri_shader, prim);
- }
+ {
+ shader = kernel_tex_fetch(__tri_shader, prim);
+ }
#ifdef __HAIR__
- else {
- float4 str = kernel_tex_fetch(__curves, prim);
- shader = __float_as_int(str.z);
- }
+ else {
+ float4 str = kernel_tex_fetch(__curves, prim);
+ shader = __float_as_int(str.z);
+ }
#endif
- int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
-
- /* if no transparent shadows, all light is blocked */
- if(!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
- return true;
- }
- /* if maximum number of hits reached, block all light */
- else if(*num_hits == max_hits) {
- return true;
- }
-
- /* move on to next entry in intersections array */
- isect_array++;
- (*num_hits)++;
+ int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
+
+ /* if no transparent shadows, all light is blocked */
+ if (!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
+ return true;
+ }
+ /* if maximum number of hits reached, block all light */
+ else if (*num_hits == max_hits) {
+ return true;
+ }
+
+ /* move on to next entry in intersections array */
+ isect_array++;
+ (*num_hits)++;
#if BVH_FEATURE(BVH_INSTANCING)
- num_hits_in_instance++;
+ num_hits_in_instance++;
#endif
- isect_array->t = isect_t;
- }
+ isect_array->t = isect_t;
+ }
- prim_addr++;
- }//while prim
- }
- }
+ prim_addr++;
+ } //while prim
+ }
+ }
#if BVH_FEATURE(BVH_INSTANCING)
- else {
- /* Instance push. */
- object = kernel_tex_fetch(__prim_object, -prim_addr-1);
+ else {
+ /* Instance push. */
+ object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
# if BVH_FEATURE(BVH_MOTION)
- isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
+ isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
# else
- isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
+ isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
# endif
- num_hits_in_instance = 0;
- isect_array->t = isect_t;
+ num_hits_in_instance = 0;
+ isect_array->t = isect_t;
- obvh_near_far_idx_calc(idir,
- &near_x, &near_y, &near_z,
- &far_x, &far_y, &far_z);
- tfar = avxf(isect_t);
+ obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
+ tfar = avxf(isect_t);
# if BVH_FEATURE(BVH_HAIR)
- dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
+ dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
# endif
- idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
+ idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
# ifdef __KERNEL_AVX2__
- P_idir = P*idir;
- P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
+ P_idir = P * idir;
+ P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
# endif
# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
+ org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
# endif
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
-
- node_addr = kernel_tex_fetch(__object_node, object);
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
- }
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while(node_addr != ENTRYPOINT_SENTINEL);
+ node_addr = kernel_tex_fetch(__object_node, object);
+ }
+ }
+#endif /* FEATURE(BVH_INSTANCING) */
+ } while (node_addr != ENTRYPOINT_SENTINEL);
#if BVH_FEATURE(BVH_INSTANCING)
- if(stack_ptr >= 0) {
- kernel_assert(object != OBJECT_NONE);
+ if (stack_ptr >= 0) {
+ kernel_assert(object != OBJECT_NONE);
- /* Instance pop. */
- if(num_hits_in_instance) {
- float t_fac;
+ /* Instance pop. */
+ if (num_hits_in_instance) {
+ float t_fac;
# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
+ bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
# else
- bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
+ bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
# endif
- /* Scale isect->t to adjust for instancing. */
- for(int i = 0; i < num_hits_in_instance; i++) {
- (isect_array-i-1)->t *= t_fac;
- }
- }
- else {
+ /* Scale isect->t to adjust for instancing. */
+ for (int i = 0; i < num_hits_in_instance; i++) {
+ (isect_array - i - 1)->t *= t_fac;
+ }
+ }
+ else {
# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
+ bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
# else
- bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
+ bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
# endif
- }
+ }
- isect_t = tmax;
- isect_array->t = isect_t;
+ isect_t = tmax;
+ isect_array->t = isect_t;
- obvh_near_far_idx_calc(idir,
- &near_x, &near_y, &near_z,
- &far_x, &far_y, &far_z);
- tfar = avxf(isect_t);
+ obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
+ tfar = avxf(isect_t);
# if BVH_FEATURE(BVH_HAIR)
- dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
+ dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
# endif
- idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
+ idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
# ifdef __KERNEL_AVX2__
- P_idir = P*idir;
- P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
+ P_idir = P * idir;
+ P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
# endif
# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
+ org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
# endif
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while(node_addr != ENTRYPOINT_SENTINEL);
+ object = OBJECT_NONE;
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ }
+#endif /* FEATURE(BVH_INSTANCING) */
+ } while (node_addr != ENTRYPOINT_SENTINEL);
- return false;
+ return false;
}
#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/obvh_traversal.h b/intern/cycles/kernel/bvh/obvh_traversal.h
index 5df7a3be515..86b1de48aaa 100644
--- a/intern/cycles/kernel/bvh/obvh_traversal.h
+++ b/intern/cycles/kernel/bvh/obvh_traversal.h
@@ -37,598 +37,583 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
Intersection *isect,
const uint visibility
#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
- ,uint *lcg_state,
+ ,
+ uint *lcg_state,
float difl,
float extmax
#endif
- )
+)
{
- /* Traversal stack in CUDA thread-local memory. */
- OBVHStackItem traversal_stack[BVH_OSTACK_SIZE];
- traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
- traversal_stack[0].dist = -FLT_MAX;
-
- /* Traversal variables in registers. */
- int stack_ptr = 0;
- int node_addr = kernel_data.bvh.root;
- float node_dist = -FLT_MAX;
-
- /* Ray parameters in registers. */
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
+ /* Traversal stack in CUDA thread-local memory. */
+ OBVHStackItem traversal_stack[BVH_OSTACK_SIZE];
+ traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
+ traversal_stack[0].dist = -FLT_MAX;
+
+ /* Traversal variables in registers. */
+ int stack_ptr = 0;
+ int node_addr = kernel_data.bvh.root;
+ float node_dist = -FLT_MAX;
+
+ /* Ray parameters in registers. */
+ float3 P = ray->P;
+ float3 dir = bvh_clamp_direction(ray->D);
+ float3 idir = bvh_inverse_direction(dir);
+ int object = OBJECT_NONE;
#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
+ Transform ob_itfm;
#endif
- isect->t = ray->t;
- isect->u = 0.0f;
- isect->v = 0.0f;
- isect->prim = PRIM_NONE;
- isect->object = OBJECT_NONE;
+ isect->t = ray->t;
+ isect->u = 0.0f;
+ isect->v = 0.0f;
+ isect->prim = PRIM_NONE;
+ isect->object = OBJECT_NONE;
- BVH_DEBUG_INIT();
- avxf tnear(0.0f), tfar(ray->t);
+ BVH_DEBUG_INIT();
+ avxf tnear(0.0f), tfar(ray->t);
#if BVH_FEATURE(BVH_HAIR)
- avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z));
+ avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z));
#endif
- avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
+ avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
#ifdef __KERNEL_AVX2__
- float3 P_idir = P*idir;
- avx3f P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
+ float3 P_idir = P * idir;
+ avx3f P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
#endif
#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- avx3f org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
+ avx3f org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
#endif
- /* Offsets to select the side that becomes the lower or upper bound. */
- int near_x, near_y, near_z;
- int far_x, far_y, far_z;
- obvh_near_far_idx_calc(idir,
- &near_x, &near_y, &near_z,
- &far_x, &far_y, &far_z);
- /* Traversal loop. */
- do {
- do {
- /* Traverse internal nodes. */
- while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
- float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
- (void) inodes;
-
- if(UNLIKELY(node_dist > isect->t)
+ /* Offsets to select the side that becomes the lower or upper bound. */
+ int near_x, near_y, near_z;
+ int far_x, far_y, far_z;
+ obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
+ /* Traversal loop. */
+ do {
+ do {
+ /* Traverse internal nodes. */
+ while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
+ float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
+ (void)inodes;
+
+ if (UNLIKELY(node_dist > isect->t)
#if BVH_FEATURE(BVH_MOTION)
- || UNLIKELY(ray->time < inodes.y)
- || UNLIKELY(ray->time > inodes.z)
+ || UNLIKELY(ray->time < inodes.y) || UNLIKELY(ray->time > inodes.z)
#endif
#ifdef __VISIBILITY_FLAG__
- || (__float_as_uint(inodes.x) & visibility) == 0
+ || (__float_as_uint(inodes.x) & visibility) == 0
#endif
- )
- {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- continue;
- }
+ ) {
+ /* Pop. */
+ node_addr = traversal_stack[stack_ptr].addr;
+ node_dist = traversal_stack[stack_ptr].dist;
+ --stack_ptr;
+ continue;
+ }
- int child_mask;
- avxf dist;
+ int child_mask;
+ avxf dist;
- BVH_DEBUG_NEXT_NODE();
+ BVH_DEBUG_NEXT_NODE();
#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
- if(difl != 0.0f) {
- /* NOTE: We extend all the child BB instead of fetching
- * and checking visibility flags for each of the,
- *
- * Need to test if doing opposite would be any faster.
- */
- child_mask = NODE_INTERSECT_ROBUST(kg,
- tnear,
- tfar,
+ if (difl != 0.0f) {
+ /* NOTE: We extend all the child BB instead of fetching
+ * and checking visibility flags for each of the,
+ *
+ * Need to test if doing opposite would be any faster.
+ */
+ child_mask = NODE_INTERSECT_ROBUST(kg,
+ tnear,
+ tfar,
# ifdef __KERNEL_AVX2__
- P_idir4,
+ P_idir4,
# endif
# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4,
+ org4,
# endif
# if BVH_FEATURE(BVH_HAIR)
- dir4,
+ dir4,
# endif
- idir4,
- near_x, near_y, near_z,
- far_x, far_y, far_z,
- node_addr,
- difl,
- &dist);
- }
- else
-#endif /* BVH_HAIR_MINIMUM_WIDTH */
- {
- child_mask = NODE_INTERSECT(kg,
- tnear,
- tfar,
+ idir4,
+ near_x,
+ near_y,
+ near_z,
+ far_x,
+ far_y,
+ far_z,
+ node_addr,
+ difl,
+ &dist);
+ }
+ else
+#endif /* BVH_HAIR_MINIMUM_WIDTH */
+ {
+ child_mask = NODE_INTERSECT(kg,
+ tnear,
+ tfar,
#ifdef __KERNEL_AVX2__
- P_idir4,
+ P_idir4,
#endif
#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4,
+ org4,
#endif
#if BVH_FEATURE(BVH_HAIR)
- dir4,
+ dir4,
#endif
- idir4,
- near_x, near_y, near_z,
- far_x, far_y, far_z,
- node_addr,
- &dist);
- }
-
- if(child_mask != 0) {
- avxf cnodes;
- /* TODO(sergey): Investigate whether moving cnodes upwards
- * gives a speedup (will be different cache pattern but will
- * avoid extra check here).
- */
+ idir4,
+ near_x,
+ near_y,
+ near_z,
+ far_x,
+ far_y,
+ far_z,
+ node_addr,
+ &dist);
+ }
+
+ if (child_mask != 0) {
+ avxf cnodes;
+ /* TODO(sergey): Investigate whether moving cnodes upwards
+ * gives a speedup (will be different cache pattern but will
+ * avoid extra check here).
+ */
#if BVH_FEATURE(BVH_HAIR)
- if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
- cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr+26);
- }
- else
+ if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
+ cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 26);
+ }
+ else
#endif
- {
- cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr+14);
- }
-
- /* One child is hit, continue with that child. */
- int r = __bscf(child_mask);
- float d0 = ((float*)&dist)[r];
- if(child_mask == 0) {
- node_addr = __float_as_int(cnodes[r]);
- node_dist = d0;
- continue;
- }
-
- /* Two children are hit, push far child, and continue with
- * closer child.
- */
- int c0 = __float_as_int(cnodes[r]);
- r = __bscf(child_mask);
- int c1 = __float_as_int(cnodes[r]);
- float d1 = ((float*)&dist)[r];
- if(child_mask == 0) {
- if(d1 < d0) {
- node_addr = c1;
- node_dist = d1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
- continue;
- }
- else {
- node_addr = c0;
- node_dist = d0;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- continue;
- }
- }
-
- /* Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there.
- */
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
-
- /* Three children are hit, push all onto stack and sort 3
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c2 = __float_as_int(cnodes[r]);
- float d2 = ((float*)&dist)[r];
- if(child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2]);
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- continue;
- }
-
- /* Four children are hit, push all onto stack and sort 4
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c3 = __float_as_int(cnodes[r]);
- float d3 = ((float*)&dist)[r];
- if(child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3]);
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- continue;
- }
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
-
- /* Five children are hit, push all onto stack and sort 5
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c4 = __float_as_int(cnodes[r]);
- float d4 = ((float*)&dist)[r];
- if(child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4]);
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- continue;
- }
-
- /* Six children are hit, push all onto stack and sort 6
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c5 = __float_as_int(cnodes[r]);
- float d5 = ((float*)&dist)[r];
- if(child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c5;
- traversal_stack[stack_ptr].dist = d5;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5]);
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- continue;
- }
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c5;
- traversal_stack[stack_ptr].dist = d5;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
-
- /* Seven children are hit, push all onto stack and sort 7
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c6 = __float_as_int(cnodes[r]);
- float d6 = ((float*)&dist)[r];
- if(child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c6;
- traversal_stack[stack_ptr].dist = d6;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5],
- &traversal_stack[stack_ptr - 6]);
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- continue;
- }
-
- /* Eight children are hit, push all onto stack and sort 8
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c7 = __float_as_int(cnodes[r]);
- float d7 = ((float*)&dist)[r];
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c7;
- traversal_stack[stack_ptr].dist = d7;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c6;
- traversal_stack[stack_ptr].dist = d6;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5],
- &traversal_stack[stack_ptr - 6],
- &traversal_stack[stack_ptr - 7]);
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- continue;
- }
-
-
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- }
-
- /* If node is leaf, fetch triangle list. */
- if(node_addr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
+ {
+ cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 14);
+ }
+
+ /* One child is hit, continue with that child. */
+ int r = __bscf(child_mask);
+ float d0 = ((float *)&dist)[r];
+ if (child_mask == 0) {
+ node_addr = __float_as_int(cnodes[r]);
+ node_dist = d0;
+ continue;
+ }
+
+ /* Two children are hit, push far child, and continue with
+ * closer child.
+ */
+ int c0 = __float_as_int(cnodes[r]);
+ r = __bscf(child_mask);
+ int c1 = __float_as_int(cnodes[r]);
+ float d1 = ((float *)&dist)[r];
+ if (child_mask == 0) {
+ if (d1 < d0) {
+ node_addr = c1;
+ node_dist = d1;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c0;
+ traversal_stack[stack_ptr].dist = d0;
+ continue;
+ }
+ else {
+ node_addr = c0;
+ node_dist = d0;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c1;
+ traversal_stack[stack_ptr].dist = d1;
+ continue;
+ }
+ }
+
+ /* Here starts the slow path for 3 or 4 hit children. We push
+ * all nodes onto the stack to sort them there.
+ */
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c1;
+ traversal_stack[stack_ptr].dist = d1;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c0;
+ traversal_stack[stack_ptr].dist = d0;
+
+ /* Three children are hit, push all onto stack and sort 3
+ * stack items, continue with closest child.
+ */
+ r = __bscf(child_mask);
+ int c2 = __float_as_int(cnodes[r]);
+ float d2 = ((float *)&dist)[r];
+ if (child_mask == 0) {
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c2;
+ traversal_stack[stack_ptr].dist = d2;
+ obvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2]);
+ node_addr = traversal_stack[stack_ptr].addr;
+ node_dist = traversal_stack[stack_ptr].dist;
+ --stack_ptr;
+ continue;
+ }
+
+ /* Four children are hit, push all onto stack and sort 4
+ * stack items, continue with closest child.
+ */
+ r = __bscf(child_mask);
+ int c3 = __float_as_int(cnodes[r]);
+ float d3 = ((float *)&dist)[r];
+ if (child_mask == 0) {
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c3;
+ traversal_stack[stack_ptr].dist = d3;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c2;
+ traversal_stack[stack_ptr].dist = d2;
+ obvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2],
+ &traversal_stack[stack_ptr - 3]);
+ node_addr = traversal_stack[stack_ptr].addr;
+ node_dist = traversal_stack[stack_ptr].dist;
+ --stack_ptr;
+ continue;
+ }
+
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c3;
+ traversal_stack[stack_ptr].dist = d3;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c2;
+ traversal_stack[stack_ptr].dist = d2;
+
+ /* Five children are hit, push all onto stack and sort 5
+ * stack items, continue with closest child.
+ */
+ r = __bscf(child_mask);
+ int c4 = __float_as_int(cnodes[r]);
+ float d4 = ((float *)&dist)[r];
+ if (child_mask == 0) {
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c4;
+ traversal_stack[stack_ptr].dist = d4;
+ obvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2],
+ &traversal_stack[stack_ptr - 3],
+ &traversal_stack[stack_ptr - 4]);
+ node_addr = traversal_stack[stack_ptr].addr;
+ node_dist = traversal_stack[stack_ptr].dist;
+ --stack_ptr;
+ continue;
+ }
+
+ /* Six children are hit, push all onto stack and sort 6
+ * stack items, continue with closest child.
+ */
+ r = __bscf(child_mask);
+ int c5 = __float_as_int(cnodes[r]);
+ float d5 = ((float *)&dist)[r];
+ if (child_mask == 0) {
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c5;
+ traversal_stack[stack_ptr].dist = d5;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c4;
+ traversal_stack[stack_ptr].dist = d4;
+ obvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2],
+ &traversal_stack[stack_ptr - 3],
+ &traversal_stack[stack_ptr - 4],
+ &traversal_stack[stack_ptr - 5]);
+ node_addr = traversal_stack[stack_ptr].addr;
+ node_dist = traversal_stack[stack_ptr].dist;
+ --stack_ptr;
+ continue;
+ }
+
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c5;
+ traversal_stack[stack_ptr].dist = d5;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c4;
+ traversal_stack[stack_ptr].dist = d4;
+
+ /* Seven children are hit, push all onto stack and sort 7
+ * stack items, continue with closest child.
+ */
+ r = __bscf(child_mask);
+ int c6 = __float_as_int(cnodes[r]);
+ float d6 = ((float *)&dist)[r];
+ if (child_mask == 0) {
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c6;
+ traversal_stack[stack_ptr].dist = d6;
+ obvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2],
+ &traversal_stack[stack_ptr - 3],
+ &traversal_stack[stack_ptr - 4],
+ &traversal_stack[stack_ptr - 5],
+ &traversal_stack[stack_ptr - 6]);
+ node_addr = traversal_stack[stack_ptr].addr;
+ node_dist = traversal_stack[stack_ptr].dist;
+ --stack_ptr;
+ continue;
+ }
+
+ /* Eight children are hit, push all onto stack and sort 8
+ * stack items, continue with closest child.
+ */
+ r = __bscf(child_mask);
+ int c7 = __float_as_int(cnodes[r]);
+ float d7 = ((float *)&dist)[r];
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c7;
+ traversal_stack[stack_ptr].dist = d7;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c6;
+ traversal_stack[stack_ptr].dist = d6;
+ obvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2],
+ &traversal_stack[stack_ptr - 3],
+ &traversal_stack[stack_ptr - 4],
+ &traversal_stack[stack_ptr - 5],
+ &traversal_stack[stack_ptr - 6],
+ &traversal_stack[stack_ptr - 7]);
+ node_addr = traversal_stack[stack_ptr].addr;
+ node_dist = traversal_stack[stack_ptr].dist;
+ --stack_ptr;
+ continue;
+ }
+
+ node_addr = traversal_stack[stack_ptr].addr;
+ node_dist = traversal_stack[stack_ptr].dist;
+ --stack_ptr;
+ }
+
+ /* If node is leaf, fetch triangle list. */
+ if (node_addr < 0) {
+ float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
#ifdef __VISIBILITY_FLAG__
- if(UNLIKELY((node_dist > isect->t) ||
- ((__float_as_uint(leaf.z) & visibility) == 0)))
+ if (UNLIKELY((node_dist > isect->t) || ((__float_as_uint(leaf.z) & visibility) == 0)))
#else
- if(UNLIKELY((node_dist > isect->t)))
+ if (UNLIKELY((node_dist > isect->t)))
#endif
- {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- continue;
- }
- int prim_addr = __float_as_int(leaf.x);
+ {
+ /* Pop. */
+ node_addr = traversal_stack[stack_ptr].addr;
+ node_dist = traversal_stack[stack_ptr].dist;
+ --stack_ptr;
+ continue;
+ }
+ int prim_addr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
- if(prim_addr >= 0) {
+ if (prim_addr >= 0) {
#endif
- int prim_addr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
-
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
-
- /* Primitive intersection. */
- switch(type & PRIMITIVE_ALL) {
- case PRIMITIVE_TRIANGLE: {
- int prim_count = prim_addr2 - prim_addr;
- if(prim_count < 3) {
- for(; prim_addr < prim_addr2; prim_addr++) {
- BVH_DEBUG_NEXT_INTERSECTION();
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- if(triangle_intersect(kg,
- isect,
- P,
- dir,
- visibility,
- object,
- prim_addr))
- {
- tfar = avxf(isect->t);
- /* Shadow ray early termination. */
- if(visibility == PATH_RAY_SHADOW_OPAQUE) {
- return true;
- }
- }
- }//for
- }
- else {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- if(triangle_intersect8(kg,
- &isect,
- P,
- dir,
- visibility,
- object,
- prim_addr,
- prim_count,
- 0,
- 0,
- NULL,
- 0.0f))
- {
- tfar = avxf(isect->t);
- if(visibility == PATH_RAY_SHADOW_OPAQUE) {
- return true;
- }
- }
- }//prim count
- break;
- }
+ int prim_addr2 = __float_as_int(leaf.y);
+ const uint type = __float_as_int(leaf.w);
+
+ /* Pop. */
+ node_addr = traversal_stack[stack_ptr].addr;
+ node_dist = traversal_stack[stack_ptr].dist;
+ --stack_ptr;
+
+ /* Primitive intersection. */
+ switch (type & PRIMITIVE_ALL) {
+ case PRIMITIVE_TRIANGLE: {
+ int prim_count = prim_addr2 - prim_addr;
+ if (prim_count < 3) {
+ for (; prim_addr < prim_addr2; prim_addr++) {
+ BVH_DEBUG_NEXT_INTERSECTION();
+ kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+ if (triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr)) {
+ tfar = avxf(isect->t);
+ /* Shadow ray early termination. */
+ if (visibility == PATH_RAY_SHADOW_OPAQUE) {
+ return true;
+ }
+ }
+ } //for
+ }
+ else {
+ kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+ if (triangle_intersect8(kg,
+ &isect,
+ P,
+ dir,
+ visibility,
+ object,
+ prim_addr,
+ prim_count,
+ 0,
+ 0,
+ NULL,
+ 0.0f)) {
+ tfar = avxf(isect->t);
+ if (visibility == PATH_RAY_SHADOW_OPAQUE) {
+ return true;
+ }
+ }
+ } //prim count
+ break;
+ }
#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- for(; prim_addr < prim_addr2; prim_addr++) {
- BVH_DEBUG_NEXT_INTERSECTION();
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- if(motion_triangle_intersect(kg,
- isect,
- P,
- dir,
- ray->time,
- visibility,
- object,
- prim_addr))
- {
- tfar = avxf(isect->t);
- /* Shadow ray early termination. */
- if(visibility == PATH_RAY_SHADOW_OPAQUE) {
- return true;
- }
- }
- }
- break;
- }
-#endif /* BVH_FEATURE(BVH_MOTION) */
+ case PRIMITIVE_MOTION_TRIANGLE: {
+ for (; prim_addr < prim_addr2; prim_addr++) {
+ BVH_DEBUG_NEXT_INTERSECTION();
+ kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+ if (motion_triangle_intersect(
+ kg, isect, P, dir, ray->time, visibility, object, prim_addr)) {
+ tfar = avxf(isect->t);
+ /* Shadow ray early termination. */
+ if (visibility == PATH_RAY_SHADOW_OPAQUE) {
+ return true;
+ }
+ }
+ }
+ break;
+ }
+#endif /* BVH_FEATURE(BVH_MOTION) */
#if BVH_FEATURE(BVH_HAIR)
- case PRIMITIVE_CURVE:
- case PRIMITIVE_MOTION_CURVE: {
- for(; prim_addr < prim_addr2; prim_addr++) {
- BVH_DEBUG_NEXT_INTERSECTION();
- const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
- kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL));
- bool hit;
- if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
- hit = cardinal_curve_intersect(kg,
- isect,
- P,
- dir,
- visibility,
- object,
- prim_addr,
- ray->time,
- curve_type,
- lcg_state,
- difl,
- extmax);
- }
- else {
- hit = curve_intersect(kg,
- isect,
- P,
- dir,
- visibility,
- object,
- prim_addr,
- ray->time,
- curve_type,
- lcg_state,
- difl,
- extmax);
- }
- if(hit) {
- tfar = avxf(isect->t);
- /* Shadow ray early termination. */
- if(visibility == PATH_RAY_SHADOW_OPAQUE) {
- return true;
- }
- }
- }
- break;
- }
-#endif /* BVH_FEATURE(BVH_HAIR) */
- }
- }
+ case PRIMITIVE_CURVE:
+ case PRIMITIVE_MOTION_CURVE: {
+ for (; prim_addr < prim_addr2; prim_addr++) {
+ BVH_DEBUG_NEXT_INTERSECTION();
+ const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
+ kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL));
+ bool hit;
+ if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
+ hit = cardinal_curve_intersect(kg,
+ isect,
+ P,
+ dir,
+ visibility,
+ object,
+ prim_addr,
+ ray->time,
+ curve_type,
+ lcg_state,
+ difl,
+ extmax);
+ }
+ else {
+ hit = curve_intersect(kg,
+ isect,
+ P,
+ dir,
+ visibility,
+ object,
+ prim_addr,
+ ray->time,
+ curve_type,
+ lcg_state,
+ difl,
+ extmax);
+ }
+ if (hit) {
+ tfar = avxf(isect->t);
+ /* Shadow ray early termination. */
+ if (visibility == PATH_RAY_SHADOW_OPAQUE) {
+ return true;
+ }
+ }
+ }
+ break;
+ }
+#endif /* BVH_FEATURE(BVH_HAIR) */
+ }
+ }
#if BVH_FEATURE(BVH_INSTANCING)
- else {
- /* Instance push. */
- object = kernel_tex_fetch(__prim_object, -prim_addr-1);
+ else {
+ /* Instance push. */
+ object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
# if BVH_FEATURE(BVH_MOTION)
- qbvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist, &ob_itfm);
+ qbvh_instance_motion_push(
+ kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist, &ob_itfm);
# else
- qbvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist);
+ qbvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist);
# endif
- obvh_near_far_idx_calc(idir,
- &near_x, &near_y, &near_z,
- &far_x, &far_y, &far_z);
- tfar = avxf(isect->t);
+ obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
+ tfar = avxf(isect->t);
# if BVH_FEATURE(BVH_HAIR)
- dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
+ dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
# endif
- idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
+ idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
# ifdef __KERNEL_AVX2__
- P_idir = P*idir;
- P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
+ P_idir = P * idir;
+ P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
# endif
# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
+ org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
# endif
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
- traversal_stack[stack_ptr].dist = -FLT_MAX;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
+ traversal_stack[stack_ptr].dist = -FLT_MAX;
- node_addr = kernel_tex_fetch(__object_node, object);
+ node_addr = kernel_tex_fetch(__object_node, object);
- BVH_DEBUG_NEXT_INSTANCE();
- }
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while(node_addr != ENTRYPOINT_SENTINEL);
+ BVH_DEBUG_NEXT_INSTANCE();
+ }
+ }
+#endif /* FEATURE(BVH_INSTANCING) */
+ } while (node_addr != ENTRYPOINT_SENTINEL);
#if BVH_FEATURE(BVH_INSTANCING)
- if(stack_ptr >= 0) {
- kernel_assert(object != OBJECT_NONE);
+ if (stack_ptr >= 0) {
+ kernel_assert(object != OBJECT_NONE);
- /* Instance pop. */
+ /* Instance pop. */
# if BVH_FEATURE(BVH_MOTION)
- isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
+ isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
# else
- isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
+ isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
# endif
- obvh_near_far_idx_calc(idir,
- &near_x, &near_y, &near_z,
- &far_x, &far_y, &far_z);
- tfar = avxf(isect->t);
+ obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
+ tfar = avxf(isect->t);
# if BVH_FEATURE(BVH_HAIR)
- dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
+ dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
# endif
- idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
+ idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
# ifdef __KERNEL_AVX2__
- P_idir = P*idir;
- P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
+ P_idir = P * idir;
+ P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
# endif
# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
+ org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
# endif
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while(node_addr != ENTRYPOINT_SENTINEL);
+ object = OBJECT_NONE;
+ node_addr = traversal_stack[stack_ptr].addr;
+ node_dist = traversal_stack[stack_ptr].dist;
+ --stack_ptr;
+ }
+#endif /* FEATURE(BVH_INSTANCING) */
+ } while (node_addr != ENTRYPOINT_SENTINEL);
- return (isect->prim != PRIM_NONE);
+ return (isect->prim != PRIM_NONE);
}
#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/obvh_volume.h b/intern/cycles/kernel/bvh/obvh_volume.h
index e66d499dccc..fb41ae783ab 100644
--- a/intern/cycles/kernel/bvh/obvh_volume.h
+++ b/intern/cycles/kernel/bvh/obvh_volume.h
@@ -33,444 +33,448 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
Intersection *isect,
const uint visibility)
{
- /* Traversal stack in CUDA thread-local memory. */
- OBVHStackItem traversal_stack[BVH_OSTACK_SIZE];
- traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
+ /* Traversal stack in CUDA thread-local memory. */
+ OBVHStackItem traversal_stack[BVH_OSTACK_SIZE];
+ traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
- /* Traversal variables in registers. */
- int stack_ptr = 0;
- int node_addr = kernel_data.bvh.root;
+ /* Traversal variables in registers. */
+ int stack_ptr = 0;
+ int node_addr = kernel_data.bvh.root;
- /* Ray parameters in registers. */
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
+ /* Ray parameters in registers. */
+ float3 P = ray->P;
+ float3 dir = bvh_clamp_direction(ray->D);
+ float3 idir = bvh_inverse_direction(dir);
+ int object = OBJECT_NONE;
#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
+ Transform ob_itfm;
#endif
- isect->t = ray->t;
- isect->u = 0.0f;
- isect->v = 0.0f;
- isect->prim = PRIM_NONE;
- isect->object = OBJECT_NONE;
+ isect->t = ray->t;
+ isect->u = 0.0f;
+ isect->v = 0.0f;
+ isect->prim = PRIM_NONE;
+ isect->object = OBJECT_NONE;
- avxf tnear(0.0f), tfar(ray->t);
+ avxf tnear(0.0f), tfar(ray->t);
#if BVH_FEATURE(BVH_HAIR)
- avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z));
+ avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z));
#endif
- avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
+ avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
#ifdef __KERNEL_AVX2__
- float3 P_idir = P*idir;
- avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
+ float3 P_idir = P * idir;
+ avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
#endif
#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- avx3f org4(avxf(P.x), avxf(P.y), avxf(P.z));
+ avx3f org4(avxf(P.x), avxf(P.y), avxf(P.z));
#endif
- /* Offsets to select the side that becomes the lower or upper bound. */
- int near_x, near_y, near_z;
- int far_x, far_y, far_z;
- obvh_near_far_idx_calc(idir,
- &near_x, &near_y, &near_z,
- &far_x, &far_y, &far_z);
+ /* Offsets to select the side that becomes the lower or upper bound. */
+ int near_x, near_y, near_z;
+ int far_x, far_y, far_z;
+ obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- /* Traversal loop. */
- do {
- do {
- /* Traverse internal nodes. */
- while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
- float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
+ /* Traversal loop. */
+ do {
+ do {
+ /* Traverse internal nodes. */
+ while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
+ float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
#ifdef __VISIBILITY_FLAG__
- if((__float_as_uint(inodes.x) & visibility) == 0) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
+ if ((__float_as_uint(inodes.x) & visibility) == 0) {
+ /* Pop. */
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ continue;
+ }
#endif
- avxf dist;
- int child_mask = NODE_INTERSECT(kg,
- tnear,
- tfar,
+ avxf dist;
+ int child_mask = NODE_INTERSECT(kg,
+ tnear,
+ tfar,
#ifdef __KERNEL_AVX2__
- P_idir4,
+ P_idir4,
#endif
#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4,
+ org4,
#endif
#if BVH_FEATURE(BVH_HAIR)
- dir4,
+ dir4,
#endif
- idir4,
- near_x, near_y, near_z,
- far_x, far_y, far_z,
- node_addr,
- &dist);
-
- if(child_mask != 0) {
- avxf cnodes;
+ idir4,
+ near_x,
+ near_y,
+ near_z,
+ far_x,
+ far_y,
+ far_z,
+ node_addr,
+ &dist);
+
+ if (child_mask != 0) {
+ avxf cnodes;
#if BVH_FEATURE(BVH_HAIR)
- if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
- cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr+26);
- }
- else
+ if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
+ cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 26);
+ }
+ else
#endif
- {
- cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr+14);
- }
-
- /* One child is hit, continue with that child. */
- int r = __bscf(child_mask);
- if(child_mask == 0) {
- node_addr = __float_as_int(cnodes[r]);
- continue;
- }
-
- /* Two children are hit, push far child, and continue with
- * closer child.
- */
- int c0 = __float_as_int(cnodes[r]);
- float d0 = ((float*)&dist)[r];
- r = __bscf(child_mask);
- int c1 = __float_as_int(cnodes[r]);
- float d1 = ((float*)&dist)[r];
- if(child_mask == 0) {
- if(d1 < d0) {
- node_addr = c1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
- continue;
- }
- else {
- node_addr = c0;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- continue;
- }
- }
-
- /* Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there.
- */
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
-
- /* Three children are hit, push all onto stack and sort 3
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c2 = __float_as_int(cnodes[r]);
- float d2 = ((float*)&dist)[r];
- if(child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Four children are hit, push all onto stack and sort 4
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c3 = __float_as_int(cnodes[r]);
- float d3 = ((float*)&dist)[r];
- if(child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
-
- /* Five children are hit, push all onto stack and sort 5
- * stack items, continue with closest child
- */
- r = __bscf(child_mask);
- int c4 = __float_as_int(cnodes[r]);
- float d4 = ((float*)&dist)[r];
- if(child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Six children are hit, push all onto stack and sort 6
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c5 = __float_as_int(cnodes[r]);
- float d5 = ((float*)&dist)[r];
- if(child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c5;
- traversal_stack[stack_ptr].dist = d5;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c5;
- traversal_stack[stack_ptr].dist = d5;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
-
- /* Seven children are hit, push all onto stack and sort 7
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c6 = __float_as_int(cnodes[r]);
- float d6 = ((float*)&dist)[r];
- if(child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c6;
- traversal_stack[stack_ptr].dist = d6;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5],
- &traversal_stack[stack_ptr - 6]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Eight children are hit, push all onto stack and sort 8
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c7 = __float_as_int(cnodes[r]);
- float d7 = ((float*)&dist)[r];
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c7;
- traversal_stack[stack_ptr].dist = d7;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c6;
- traversal_stack[stack_ptr].dist = d6;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5],
- &traversal_stack[stack_ptr - 6],
- &traversal_stack[stack_ptr - 7]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-
- /* If node is leaf, fetch triangle list. */
- if(node_addr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
-
- if((__float_as_uint(leaf.z) & visibility) == 0) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- int prim_addr = __float_as_int(leaf.x);
+ {
+ cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 14);
+ }
+
+ /* One child is hit, continue with that child. */
+ int r = __bscf(child_mask);
+ if (child_mask == 0) {
+ node_addr = __float_as_int(cnodes[r]);
+ continue;
+ }
+
+ /* Two children are hit, push far child, and continue with
+ * closer child.
+ */
+ int c0 = __float_as_int(cnodes[r]);
+ float d0 = ((float *)&dist)[r];
+ r = __bscf(child_mask);
+ int c1 = __float_as_int(cnodes[r]);
+ float d1 = ((float *)&dist)[r];
+ if (child_mask == 0) {
+ if (d1 < d0) {
+ node_addr = c1;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c0;
+ traversal_stack[stack_ptr].dist = d0;
+ continue;
+ }
+ else {
+ node_addr = c0;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c1;
+ traversal_stack[stack_ptr].dist = d1;
+ continue;
+ }
+ }
+
+ /* Here starts the slow path for 3 or 4 hit children. We push
+ * all nodes onto the stack to sort them there.
+ */
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c1;
+ traversal_stack[stack_ptr].dist = d1;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c0;
+ traversal_stack[stack_ptr].dist = d0;
+
+ /* Three children are hit, push all onto stack and sort 3
+ * stack items, continue with closest child.
+ */
+ r = __bscf(child_mask);
+ int c2 = __float_as_int(cnodes[r]);
+ float d2 = ((float *)&dist)[r];
+ if (child_mask == 0) {
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c2;
+ traversal_stack[stack_ptr].dist = d2;
+ obvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2]);
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ continue;
+ }
+
+ /* Four children are hit, push all onto stack and sort 4
+ * stack items, continue with closest child.
+ */
+ r = __bscf(child_mask);
+ int c3 = __float_as_int(cnodes[r]);
+ float d3 = ((float *)&dist)[r];
+ if (child_mask == 0) {
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c3;
+ traversal_stack[stack_ptr].dist = d3;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c2;
+ traversal_stack[stack_ptr].dist = d2;
+ obvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2],
+ &traversal_stack[stack_ptr - 3]);
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ continue;
+ }
+
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c3;
+ traversal_stack[stack_ptr].dist = d3;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c2;
+ traversal_stack[stack_ptr].dist = d2;
+
+ /* Five children are hit, push all onto stack and sort 5
+ * stack items, continue with closest child
+ */
+ r = __bscf(child_mask);
+ int c4 = __float_as_int(cnodes[r]);
+ float d4 = ((float *)&dist)[r];
+ if (child_mask == 0) {
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c4;
+ traversal_stack[stack_ptr].dist = d4;
+ obvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2],
+ &traversal_stack[stack_ptr - 3],
+ &traversal_stack[stack_ptr - 4]);
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ continue;
+ }
+
+ /* Six children are hit, push all onto stack and sort 6
+ * stack items, continue with closest child.
+ */
+ r = __bscf(child_mask);
+ int c5 = __float_as_int(cnodes[r]);
+ float d5 = ((float *)&dist)[r];
+ if (child_mask == 0) {
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c5;
+ traversal_stack[stack_ptr].dist = d5;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c4;
+ traversal_stack[stack_ptr].dist = d4;
+ obvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2],
+ &traversal_stack[stack_ptr - 3],
+ &traversal_stack[stack_ptr - 4],
+ &traversal_stack[stack_ptr - 5]);
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ continue;
+ }
+
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c5;
+ traversal_stack[stack_ptr].dist = d5;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c4;
+ traversal_stack[stack_ptr].dist = d4;
+
+ /* Seven children are hit, push all onto stack and sort 7
+ * stack items, continue with closest child.
+ */
+ r = __bscf(child_mask);
+ int c6 = __float_as_int(cnodes[r]);
+ float d6 = ((float *)&dist)[r];
+ if (child_mask == 0) {
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c6;
+ traversal_stack[stack_ptr].dist = d6;
+ obvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2],
+ &traversal_stack[stack_ptr - 3],
+ &traversal_stack[stack_ptr - 4],
+ &traversal_stack[stack_ptr - 5],
+ &traversal_stack[stack_ptr - 6]);
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ continue;
+ }
+
+ /* Eight children are hit, push all onto stack and sort 8
+ * stack items, continue with closest child.
+ */
+ r = __bscf(child_mask);
+ int c7 = __float_as_int(cnodes[r]);
+ float d7 = ((float *)&dist)[r];
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c7;
+ traversal_stack[stack_ptr].dist = d7;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c6;
+ traversal_stack[stack_ptr].dist = d6;
+ obvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2],
+ &traversal_stack[stack_ptr - 3],
+ &traversal_stack[stack_ptr - 4],
+ &traversal_stack[stack_ptr - 5],
+ &traversal_stack[stack_ptr - 6],
+ &traversal_stack[stack_ptr - 7]);
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ continue;
+ }
+
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ }
+
+ /* If node is leaf, fetch triangle list. */
+ if (node_addr < 0) {
+ float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
+
+ if ((__float_as_uint(leaf.z) & visibility) == 0) {
+ /* Pop. */
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ continue;
+ }
+
+ int prim_addr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
- if(prim_addr >= 0) {
+ if (prim_addr >= 0) {
#endif
- int prim_addr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
- const uint p_type = type & PRIMITIVE_ALL;
-
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
-
- /* Primitive intersection. */
- switch(p_type) {
- case PRIMITIVE_TRIANGLE: {
- for(; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- /* Only primitives from volume object. */
- uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
- int object_flag = kernel_tex_fetch(__object_flag, tri_object);
- if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
- continue;
- }
- /* Intersect ray against primitive. */
- triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr);
- }
- break;
- }
+ int prim_addr2 = __float_as_int(leaf.y);
+ const uint type = __float_as_int(leaf.w);
+ const uint p_type = type & PRIMITIVE_ALL;
+
+ /* Pop. */
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+
+ /* Primitive intersection. */
+ switch (p_type) {
+ case PRIMITIVE_TRIANGLE: {
+ for (; prim_addr < prim_addr2; prim_addr++) {
+ kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+ /* Only primitives from volume object. */
+ uint tri_object = (object == OBJECT_NONE) ?
+ kernel_tex_fetch(__prim_object, prim_addr) :
+ object;
+ int object_flag = kernel_tex_fetch(__object_flag, tri_object);
+ if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
+ continue;
+ }
+ /* Intersect ray against primitive. */
+ triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr);
+ }
+ break;
+ }
#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- for(; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- /* Only primitives from volume object. */
- uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
- int object_flag = kernel_tex_fetch(__object_flag, tri_object);
- if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
- continue;
- }
- /* Intersect ray against primitive. */
- motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, prim_addr);
- }
- break;
- }
+ case PRIMITIVE_MOTION_TRIANGLE: {
+ for (; prim_addr < prim_addr2; prim_addr++) {
+ kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+ /* Only primitives from volume object. */
+ uint tri_object = (object == OBJECT_NONE) ?
+ kernel_tex_fetch(__prim_object, prim_addr) :
+ object;
+ int object_flag = kernel_tex_fetch(__object_flag, tri_object);
+ if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
+ continue;
+ }
+ /* Intersect ray against primitive. */
+ motion_triangle_intersect(
+ kg, isect, P, dir, ray->time, visibility, object, prim_addr);
+ }
+ break;
+ }
#endif
- }
- }
+ }
+ }
#if BVH_FEATURE(BVH_INSTANCING)
- else {
- /* Instance push. */
- object = kernel_tex_fetch(__prim_object, -prim_addr-1);
- int object_flag = kernel_tex_fetch(__object_flag, object);
- if(object_flag & SD_OBJECT_HAS_VOLUME) {
+ else {
+ /* Instance push. */
+ object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
+ int object_flag = kernel_tex_fetch(__object_flag, object);
+ if (object_flag & SD_OBJECT_HAS_VOLUME) {
# if BVH_FEATURE(BVH_MOTION)
- isect->t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
+ isect->t = bvh_instance_motion_push(
+ kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
# else
- isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
+ isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
# endif
- obvh_near_far_idx_calc(idir,
- &near_x, &near_y, &near_z,
- &far_x, &far_y, &far_z);
- tfar = avxf(isect->t);
+ obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
+ tfar = avxf(isect->t);
# if BVH_FEATURE(BVH_HAIR)
- dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
+ dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
# endif
- idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
+ idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
# ifdef __KERNEL_AVX2__
- P_idir = P*idir;
- P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
+ P_idir = P * idir;
+ P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
# endif
# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
+ org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
# endif
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
-
- node_addr = kernel_tex_fetch(__object_node, object);
- }
- else {
- /* Pop. */
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
- }
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while(node_addr != ENTRYPOINT_SENTINEL);
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
+
+ node_addr = kernel_tex_fetch(__object_node, object);
+ }
+ else {
+ /* Pop. */
+ object = OBJECT_NONE;
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ }
+ }
+ }
+#endif /* FEATURE(BVH_INSTANCING) */
+ } while (node_addr != ENTRYPOINT_SENTINEL);
#if BVH_FEATURE(BVH_INSTANCING)
- if(stack_ptr >= 0) {
- kernel_assert(object != OBJECT_NONE);
+ if (stack_ptr >= 0) {
+ kernel_assert(object != OBJECT_NONE);
- /* Instance pop. */
+ /* Instance pop. */
# if BVH_FEATURE(BVH_MOTION)
- isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
+ isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
# else
- isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
+ isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
# endif
- obvh_near_far_idx_calc(idir,
- &near_x, &near_y, &near_z,
- &far_x, &far_y, &far_z);
- tfar = avxf(isect->t);
+ obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
+ tfar = avxf(isect->t);
# if BVH_FEATURE(BVH_HAIR)
- dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
+ dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
# endif
- idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
+ idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
# ifdef __KERNEL_AVX2__
- P_idir = P*idir;
- P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
+ P_idir = P * idir;
+ P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
# endif
# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
+ org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
# endif
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while(node_addr != ENTRYPOINT_SENTINEL);
+ object = OBJECT_NONE;
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ }
+#endif /* FEATURE(BVH_INSTANCING) */
+ } while (node_addr != ENTRYPOINT_SENTINEL);
- return (isect->prim != PRIM_NONE);
+ return (isect->prim != PRIM_NONE);
}
#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/obvh_volume_all.h b/intern/cycles/kernel/bvh/obvh_volume_all.h
index 5476f79712a..56e2afd4a11 100644
--- a/intern/cycles/kernel/bvh/obvh_volume_all.h
+++ b/intern/cycles/kernel/bvh/obvh_volume_all.h
@@ -34,514 +34,518 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
const uint max_hits,
const uint visibility)
{
- /* Traversal stack in CUDA thread-local memory. */
- OBVHStackItem traversal_stack[BVH_OSTACK_SIZE];
- traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
-
- /* Traversal variables in registers. */
- int stack_ptr = 0;
- int node_addr = kernel_data.bvh.root;
-
- /* Ray parameters in registers. */
- const float tmax = ray->t;
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
- float isect_t = tmax;
+ /* Traversal stack in CUDA thread-local memory. */
+ OBVHStackItem traversal_stack[BVH_OSTACK_SIZE];
+ traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
+
+ /* Traversal variables in registers. */
+ int stack_ptr = 0;
+ int node_addr = kernel_data.bvh.root;
+
+ /* Ray parameters in registers. */
+ const float tmax = ray->t;
+ float3 P = ray->P;
+ float3 dir = bvh_clamp_direction(ray->D);
+ float3 idir = bvh_inverse_direction(dir);
+ int object = OBJECT_NONE;
+ float isect_t = tmax;
#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
+ Transform ob_itfm;
#endif
- uint num_hits = 0;
- isect_array->t = tmax;
+ uint num_hits = 0;
+ isect_array->t = tmax;
#if BVH_FEATURE(BVH_INSTANCING)
- int num_hits_in_instance = 0;
+ int num_hits_in_instance = 0;
#endif
- avxf tnear(0.0f), tfar(isect_t);
+ avxf tnear(0.0f), tfar(isect_t);
#if BVH_FEATURE(BVH_HAIR)
- avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z));
+ avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z));
#endif
- avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
+ avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
#ifdef __KERNEL_AVX2__
- float3 P_idir = P*idir;
- avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
+ float3 P_idir = P * idir;
+ avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
#endif
#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- avx3f org4(avxf(P.x), avxf(P.y), avxf(P.z));
+ avx3f org4(avxf(P.x), avxf(P.y), avxf(P.z));
#endif
- /* Offsets to select the side that becomes the lower or upper bound. */
- int near_x, near_y, near_z;
- int far_x, far_y, far_z;
- obvh_near_far_idx_calc(idir,
- &near_x, &near_y, &near_z,
- &far_x, &far_y, &far_z);
+ /* Offsets to select the side that becomes the lower or upper bound. */
+ int near_x, near_y, near_z;
+ int far_x, far_y, far_z;
+ obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- /* Traversal loop. */
- do {
- do {
- /* Traverse internal nodes. */
- while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
- float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
+ /* Traversal loop. */
+ do {
+ do {
+ /* Traverse internal nodes. */
+ while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
+ float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
#ifdef __VISIBILITY_FLAG__
- if((__float_as_uint(inodes.x) & visibility) == 0) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
+ if ((__float_as_uint(inodes.x) & visibility) == 0) {
+ /* Pop. */
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ continue;
+ }
#endif
- avxf dist;
- int child_mask = NODE_INTERSECT(kg,
- tnear,
- tfar,
+ avxf dist;
+ int child_mask = NODE_INTERSECT(kg,
+ tnear,
+ tfar,
#ifdef __KERNEL_AVX2__
- P_idir4,
+ P_idir4,
#endif
#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4,
+ org4,
#endif
#if BVH_FEATURE(BVH_HAIR)
- dir4,
+ dir4,
#endif
- idir4,
- near_x, near_y, near_z,
- far_x, far_y, far_z,
- node_addr,
- &dist);
-
- if(child_mask != 0) {
- avxf cnodes;
+ idir4,
+ near_x,
+ near_y,
+ near_z,
+ far_x,
+ far_y,
+ far_z,
+ node_addr,
+ &dist);
+
+ if (child_mask != 0) {
+ avxf cnodes;
#if BVH_FEATURE(BVH_HAIR)
- if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
- cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr+26);
- }
- else
+ if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
+ cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 26);
+ }
+ else
#endif
- {
- cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr+14);
- }
-
- /* One child is hit, continue with that child. */
- int r = __bscf(child_mask);
- if(child_mask == 0) {
- node_addr = __float_as_int(cnodes[r]);
- continue;
- }
-
- /* Two children are hit, push far child, and continue with
- * closer child.
- */
- int c0 = __float_as_int(cnodes[r]);
- float d0 = ((float*)&dist)[r];
- r = __bscf(child_mask);
- int c1 = __float_as_int(cnodes[r]);
- float d1 = ((float*)&dist)[r];
- if(child_mask == 0) {
- if(d1 < d0) {
- node_addr = c1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
- continue;
- }
- else {
- node_addr = c0;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- continue;
- }
- }
-
- /* Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there.
- */
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
-
- /* Three children are hit, push all onto stack and sort 3
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c2 = __float_as_int(cnodes[r]);
- float d2 = ((float*)&dist)[r];
- if(child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Four children are hit, push all onto stack and sort 4
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c3 = __float_as_int(cnodes[r]);
- float d3 = ((float*)&dist)[r];
- if(child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
-
- /* Five children are hit, push all onto stack and sort 5
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c4 = __float_as_int(cnodes[r]);
- float d4 = ((float*)&dist)[r];
- if(child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Six children are hit, push all onto stack and sort 6
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c5 = __float_as_int(cnodes[r]);
- float d5 = ((float*)&dist)[r];
- if(child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c5;
- traversal_stack[stack_ptr].dist = d5;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c5;
- traversal_stack[stack_ptr].dist = d5;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
-
- /* Seven children are hit, push all onto stack and sort 7
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c6 = __float_as_int(cnodes[r]);
- float d6 = ((float*)&dist)[r];
- if(child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c6;
- traversal_stack[stack_ptr].dist = d6;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5],
- &traversal_stack[stack_ptr - 6]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Eight children are hit, push all onto stack and sort 8
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c7 = __float_as_int(cnodes[r]);
- float d7 = ((float*)&dist)[r];
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c7;
- traversal_stack[stack_ptr].dist = d7;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c6;
- traversal_stack[stack_ptr].dist = d6;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5],
- &traversal_stack[stack_ptr - 6],
- &traversal_stack[stack_ptr - 7]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-
- /* If node is leaf, fetch triangle list. */
- if(node_addr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
-
- if((__float_as_uint(leaf.z) & visibility) == 0) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- int prim_addr = __float_as_int(leaf.x);
+ {
+ cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 14);
+ }
+
+ /* One child is hit, continue with that child. */
+ int r = __bscf(child_mask);
+ if (child_mask == 0) {
+ node_addr = __float_as_int(cnodes[r]);
+ continue;
+ }
+
+ /* Two children are hit, push far child, and continue with
+ * closer child.
+ */
+ int c0 = __float_as_int(cnodes[r]);
+ float d0 = ((float *)&dist)[r];
+ r = __bscf(child_mask);
+ int c1 = __float_as_int(cnodes[r]);
+ float d1 = ((float *)&dist)[r];
+ if (child_mask == 0) {
+ if (d1 < d0) {
+ node_addr = c1;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c0;
+ traversal_stack[stack_ptr].dist = d0;
+ continue;
+ }
+ else {
+ node_addr = c0;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c1;
+ traversal_stack[stack_ptr].dist = d1;
+ continue;
+ }
+ }
+
+ /* Here starts the slow path for 3 or 4 hit children. We push
+ * all nodes onto the stack to sort them there.
+ */
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c1;
+ traversal_stack[stack_ptr].dist = d1;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c0;
+ traversal_stack[stack_ptr].dist = d0;
+
+ /* Three children are hit, push all onto stack and sort 3
+ * stack items, continue with closest child.
+ */
+ r = __bscf(child_mask);
+ int c2 = __float_as_int(cnodes[r]);
+ float d2 = ((float *)&dist)[r];
+ if (child_mask == 0) {
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c2;
+ traversal_stack[stack_ptr].dist = d2;
+ obvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2]);
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ continue;
+ }
+
+ /* Four children are hit, push all onto stack and sort 4
+ * stack items, continue with closest child.
+ */
+ r = __bscf(child_mask);
+ int c3 = __float_as_int(cnodes[r]);
+ float d3 = ((float *)&dist)[r];
+ if (child_mask == 0) {
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c3;
+ traversal_stack[stack_ptr].dist = d3;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c2;
+ traversal_stack[stack_ptr].dist = d2;
+ obvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2],
+ &traversal_stack[stack_ptr - 3]);
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ continue;
+ }
+
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c3;
+ traversal_stack[stack_ptr].dist = d3;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c2;
+ traversal_stack[stack_ptr].dist = d2;
+
+ /* Five children are hit, push all onto stack and sort 5
+ * stack items, continue with closest child.
+ */
+ r = __bscf(child_mask);
+ int c4 = __float_as_int(cnodes[r]);
+ float d4 = ((float *)&dist)[r];
+ if (child_mask == 0) {
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c4;
+ traversal_stack[stack_ptr].dist = d4;
+ obvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2],
+ &traversal_stack[stack_ptr - 3],
+ &traversal_stack[stack_ptr - 4]);
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ continue;
+ }
+
+ /* Six children are hit, push all onto stack and sort 6
+ * stack items, continue with closest child.
+ */
+ r = __bscf(child_mask);
+ int c5 = __float_as_int(cnodes[r]);
+ float d5 = ((float *)&dist)[r];
+ if (child_mask == 0) {
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c5;
+ traversal_stack[stack_ptr].dist = d5;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c4;
+ traversal_stack[stack_ptr].dist = d4;
+ obvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2],
+ &traversal_stack[stack_ptr - 3],
+ &traversal_stack[stack_ptr - 4],
+ &traversal_stack[stack_ptr - 5]);
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ continue;
+ }
+
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c5;
+ traversal_stack[stack_ptr].dist = d5;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c4;
+ traversal_stack[stack_ptr].dist = d4;
+
+ /* Seven children are hit, push all onto stack and sort 7
+ * stack items, continue with closest child.
+ */
+ r = __bscf(child_mask);
+ int c6 = __float_as_int(cnodes[r]);
+ float d6 = ((float *)&dist)[r];
+ if (child_mask == 0) {
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c6;
+ traversal_stack[stack_ptr].dist = d6;
+ obvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2],
+ &traversal_stack[stack_ptr - 3],
+ &traversal_stack[stack_ptr - 4],
+ &traversal_stack[stack_ptr - 5],
+ &traversal_stack[stack_ptr - 6]);
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ continue;
+ }
+
+ /* Eight children are hit, push all onto stack and sort 8
+ * stack items, continue with closest child.
+ */
+ r = __bscf(child_mask);
+ int c7 = __float_as_int(cnodes[r]);
+ float d7 = ((float *)&dist)[r];
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c7;
+ traversal_stack[stack_ptr].dist = d7;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c6;
+ traversal_stack[stack_ptr].dist = d6;
+ obvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2],
+ &traversal_stack[stack_ptr - 3],
+ &traversal_stack[stack_ptr - 4],
+ &traversal_stack[stack_ptr - 5],
+ &traversal_stack[stack_ptr - 6],
+ &traversal_stack[stack_ptr - 7]);
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ continue;
+ }
+
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ }
+
+ /* If node is leaf, fetch triangle list. */
+ if (node_addr < 0) {
+ float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
+
+ if ((__float_as_uint(leaf.z) & visibility) == 0) {
+ /* Pop. */
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ continue;
+ }
+
+ int prim_addr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
- if(prim_addr >= 0) {
+ if (prim_addr >= 0) {
#endif
- int prim_addr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
- const uint p_type = type & PRIMITIVE_ALL;
- bool hit;
-
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
-
- /* Primitive intersection. */
- switch(p_type) {
- case PRIMITIVE_TRIANGLE: {
- for(; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- /* Only primitives from volume object. */
- uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
- int object_flag = kernel_tex_fetch(__object_flag, tri_object);
- if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
- continue;
- }
- /* Intersect ray against primitive. */
- hit = triangle_intersect(kg, isect_array, P, dir, visibility, object, prim_addr);
- if(hit) {
- /* Move on to next entry in intersections array. */
- isect_array++;
- num_hits++;
+ int prim_addr2 = __float_as_int(leaf.y);
+ const uint type = __float_as_int(leaf.w);
+ const uint p_type = type & PRIMITIVE_ALL;
+ bool hit;
+
+ /* Pop. */
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+
+ /* Primitive intersection. */
+ switch (p_type) {
+ case PRIMITIVE_TRIANGLE: {
+ for (; prim_addr < prim_addr2; prim_addr++) {
+ kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+ /* Only primitives from volume object. */
+ uint tri_object = (object == OBJECT_NONE) ?
+ kernel_tex_fetch(__prim_object, prim_addr) :
+ object;
+ int object_flag = kernel_tex_fetch(__object_flag, tri_object);
+ if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
+ continue;
+ }
+ /* Intersect ray against primitive. */
+ hit = triangle_intersect(kg, isect_array, P, dir, visibility, object, prim_addr);
+ if (hit) {
+ /* Move on to next entry in intersections array. */
+ isect_array++;
+ num_hits++;
#if BVH_FEATURE(BVH_INSTANCING)
- num_hits_in_instance++;
+ num_hits_in_instance++;
#endif
- isect_array->t = isect_t;
- if(num_hits == max_hits) {
+ isect_array->t = isect_t;
+ if (num_hits == max_hits) {
#if BVH_FEATURE(BVH_INSTANCING)
# if BVH_FEATURE(BVH_MOTION)
- float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
+ float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
# else
- Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
- float t_fac = 1.0f / len(transform_direction(&itfm, dir));
+ Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
+ float t_fac = 1.0f / len(transform_direction(&itfm, dir));
# endif
- for(int i = 0; i < num_hits_in_instance; i++) {
- (isect_array-i-1)->t *= t_fac;
- }
-#endif /* BVH_FEATURE(BVH_INSTANCING) */
- return num_hits;
- }
- }
- }
- break;
- }
+ for (int i = 0; i < num_hits_in_instance; i++) {
+ (isect_array - i - 1)->t *= t_fac;
+ }
+#endif /* BVH_FEATURE(BVH_INSTANCING) */
+ return num_hits;
+ }
+ }
+ }
+ break;
+ }
#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- for(; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- /* Only primitives from volume object. */
- uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
- int object_flag = kernel_tex_fetch(__object_flag, tri_object);
- if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
- continue;
- }
- /* Intersect ray against primitive. */
- hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
- if(hit) {
- /* Move on to next entry in intersections array. */
- isect_array++;
- num_hits++;
+ case PRIMITIVE_MOTION_TRIANGLE: {
+ for (; prim_addr < prim_addr2; prim_addr++) {
+ kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+ /* Only primitives from volume object. */
+ uint tri_object = (object == OBJECT_NONE) ?
+ kernel_tex_fetch(__prim_object, prim_addr) :
+ object;
+ int object_flag = kernel_tex_fetch(__object_flag, tri_object);
+ if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
+ continue;
+ }
+ /* Intersect ray against primitive. */
+ hit = motion_triangle_intersect(
+ kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
+ if (hit) {
+ /* Move on to next entry in intersections array. */
+ isect_array++;
+ num_hits++;
# if BVH_FEATURE(BVH_INSTANCING)
- num_hits_in_instance++;
+ num_hits_in_instance++;
# endif
- isect_array->t = isect_t;
- if(num_hits == max_hits) {
+ isect_array->t = isect_t;
+ if (num_hits == max_hits) {
# if BVH_FEATURE(BVH_INSTANCING)
# if BVH_FEATURE(BVH_MOTION)
- float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
+ float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
# else
- Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
- float t_fac = 1.0f / len(transform_direction(&itfm, dir));
+ Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
+ float t_fac = 1.0f / len(transform_direction(&itfm, dir));
# endif
- for(int i = 0; i < num_hits_in_instance; i++) {
- (isect_array-i-1)->t *= t_fac;
- }
-# endif /* BVH_FEATURE(BVH_INSTANCING) */
- return num_hits;
- }
- }
- }
- break;
- }
+ for (int i = 0; i < num_hits_in_instance; i++) {
+ (isect_array - i - 1)->t *= t_fac;
+ }
+# endif /* BVH_FEATURE(BVH_INSTANCING) */
+ return num_hits;
+ }
+ }
+ }
+ break;
+ }
#endif
- }
- }
+ }
+ }
#if BVH_FEATURE(BVH_INSTANCING)
- else {
- /* Instance push. */
- object = kernel_tex_fetch(__prim_object, -prim_addr-1);
- int object_flag = kernel_tex_fetch(__object_flag, object);
- if(object_flag & SD_OBJECT_HAS_VOLUME) {
+ else {
+ /* Instance push. */
+ object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
+ int object_flag = kernel_tex_fetch(__object_flag, object);
+ if (object_flag & SD_OBJECT_HAS_VOLUME) {
# if BVH_FEATURE(BVH_MOTION)
- isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
+ isect_t = bvh_instance_motion_push(
+ kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
# else
- isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
+ isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
# endif
- obvh_near_far_idx_calc(idir,
- &near_x, &near_y, &near_z,
- &far_x, &far_y, &far_z);
- tfar = avxf(isect_t);
- idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
+ obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
+ tfar = avxf(isect_t);
+ idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
# if BVH_FEATURE(BVH_HAIR)
- dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
+ dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
# endif
# ifdef __KERNEL_AVX2__
- P_idir = P*idir;
- P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
+ P_idir = P * idir;
+ P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
# endif
# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
+ org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
# endif
- num_hits_in_instance = 0;
- isect_array->t = isect_t;
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
-
- node_addr = kernel_tex_fetch(__object_node, object);
- }
- else {
- /* Pop. */
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
- }
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while(node_addr != ENTRYPOINT_SENTINEL);
+ num_hits_in_instance = 0;
+ isect_array->t = isect_t;
+
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
+
+ node_addr = kernel_tex_fetch(__object_node, object);
+ }
+ else {
+ /* Pop. */
+ object = OBJECT_NONE;
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ }
+ }
+ }
+#endif /* FEATURE(BVH_INSTANCING) */
+ } while (node_addr != ENTRYPOINT_SENTINEL);
#if BVH_FEATURE(BVH_INSTANCING)
- if(stack_ptr >= 0) {
- kernel_assert(object != OBJECT_NONE);
+ if (stack_ptr >= 0) {
+ kernel_assert(object != OBJECT_NONE);
- /* Instance pop. */
- if(num_hits_in_instance) {
- float t_fac;
+ /* Instance pop. */
+ if (num_hits_in_instance) {
+ float t_fac;
# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
+ bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
# else
- bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
+ bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
# endif
- /* Scale isect->t to adjust for instancing. */
- for(int i = 0; i < num_hits_in_instance; i++) {
- (isect_array-i-1)->t *= t_fac;
- }
- }
- else {
+ /* Scale isect->t to adjust for instancing. */
+ for (int i = 0; i < num_hits_in_instance; i++) {
+ (isect_array - i - 1)->t *= t_fac;
+ }
+ }
+ else {
# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
+ bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
# else
- bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
+ bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
# endif
- }
+ }
- isect_t = tmax;
- isect_array->t = isect_t;
+ isect_t = tmax;
+ isect_array->t = isect_t;
- obvh_near_far_idx_calc(idir,
- &near_x, &near_y, &near_z,
- &far_x, &far_y, &far_z);
- tfar = avxf(isect_t);
+ obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
+ tfar = avxf(isect_t);
# if BVH_FEATURE(BVH_HAIR)
- dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
+ dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
# endif
- idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
+ idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
# ifdef __KERNEL_AVX2__
- P_idir = P*idir;
- P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
+ P_idir = P * idir;
+ P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
# endif
# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
+ org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
# endif
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while(node_addr != ENTRYPOINT_SENTINEL);
+ object = OBJECT_NONE;
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ }
+#endif /* FEATURE(BVH_INSTANCING) */
+ } while (node_addr != ENTRYPOINT_SENTINEL);
- return num_hits;
+ return num_hits;
}
#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/qbvh_local.h b/intern/cycles/kernel/bvh/qbvh_local.h
index 661182e31b3..b21f79bd3a0 100644
--- a/intern/cycles/kernel/bvh/qbvh_local.h
+++ b/intern/cycles/kernel/bvh/qbvh_local.h
@@ -35,262 +35,257 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
uint *lcg_state,
int max_hits)
{
- /* TODO(sergey):
- * - Test if pushing distance on the stack helps (for non shadow rays).
- * - Separate version for shadow rays.
- * - Likely and unlikely for if() statements.
- * - SSE for hair.
- * - Test restrict attribute for pointers.
- */
+ /* TODO(sergey):
+ * - Test if pushing distance on the stack helps (for non shadow rays).
+ * - Separate version for shadow rays.
+ * - Likely and unlikely for if() statements.
+ * - SSE for hair.
+ * - Test restrict attribute for pointers.
+ */
- /* Traversal stack in CUDA thread-local memory. */
- QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
- traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
+ /* Traversal stack in CUDA thread-local memory. */
+ QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
+ traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
- /* Traversal variables in registers. */
- int stack_ptr = 0;
- int node_addr = kernel_tex_fetch(__object_node, local_object);
+ /* Traversal variables in registers. */
+ int stack_ptr = 0;
+ int node_addr = kernel_tex_fetch(__object_node, local_object);
- /* Ray parameters in registers. */
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
- float isect_t = ray->t;
+ /* Ray parameters in registers. */
+ float3 P = ray->P;
+ float3 dir = bvh_clamp_direction(ray->D);
+ float3 idir = bvh_inverse_direction(dir);
+ int object = OBJECT_NONE;
+ float isect_t = ray->t;
- if(local_isect != NULL) {
- local_isect->num_hits = 0;
- }
- kernel_assert((local_isect == NULL) == (max_hits == 0));
+ if (local_isect != NULL) {
+ local_isect->num_hits = 0;
+ }
+ kernel_assert((local_isect == NULL) == (max_hits == 0));
- const int object_flag = kernel_tex_fetch(__object_flag, local_object);
- if(!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+ const int object_flag = kernel_tex_fetch(__object_flag, local_object);
+ if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
- isect_t = bvh_instance_motion_push(kg,
- local_object,
- ray,
- &P,
- &dir,
- &idir,
- isect_t,
- &ob_itfm);
+ Transform ob_itfm;
+ isect_t = bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
#else
- isect_t = bvh_instance_push(kg, local_object, ray, &P, &dir, &idir, isect_t);
+ isect_t = bvh_instance_push(kg, local_object, ray, &P, &dir, &idir, isect_t);
#endif
- object = local_object;
- }
+ object = local_object;
+ }
- ssef tnear(0.0f), tfar(isect_t);
+ ssef tnear(0.0f), tfar(isect_t);
#if BVH_FEATURE(BVH_HAIR)
- sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+ sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
#endif
- sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+ sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
#ifdef __KERNEL_AVX2__
- float3 P_idir = P*idir;
- sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
+ float3 P_idir = P * idir;
+ sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
#endif
#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
+ sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
#endif
- /* Offsets to select the side that becomes the lower or upper bound. */
- int near_x, near_y, near_z;
- int far_x, far_y, far_z;
- qbvh_near_far_idx_calc(idir,
- &near_x, &near_y, &near_z,
- &far_x, &far_y, &far_z);
+ /* Offsets to select the side that becomes the lower or upper bound. */
+ int near_x, near_y, near_z;
+ int far_x, far_y, far_z;
+ qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- /* Traversal loop. */
- do {
- do {
- /* Traverse internal nodes. */
- while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
- ssef dist;
- int child_mask = NODE_INTERSECT(kg,
- tnear,
- tfar,
+ /* Traversal loop. */
+ do {
+ do {
+ /* Traverse internal nodes. */
+ while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
+ ssef dist;
+ int child_mask = NODE_INTERSECT(kg,
+ tnear,
+ tfar,
#ifdef __KERNEL_AVX2__
- P_idir4,
+ P_idir4,
#endif
#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4,
+ org4,
#endif
#if BVH_FEATURE(BVH_HAIR)
- dir4,
+ dir4,
#endif
- idir4,
- near_x, near_y, near_z,
- far_x, far_y, far_z,
- node_addr,
- &dist);
+ idir4,
+ near_x,
+ near_y,
+ near_z,
+ far_x,
+ far_y,
+ far_z,
+ node_addr,
+ &dist);
- if(child_mask != 0) {
- float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
- float4 cnodes;
+ if (child_mask != 0) {
+ float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
+ float4 cnodes;
#if BVH_FEATURE(BVH_HAIR)
- if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
- cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+13);
- }
- else
+ if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
+ cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 13);
+ }
+ else
#endif
- {
- cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+7);
- }
+ {
+ cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 7);
+ }
- /* One child is hit, continue with that child. */
- int r = __bscf(child_mask);
- if(child_mask == 0) {
- node_addr = __float_as_int(cnodes[r]);
- continue;
- }
+ /* One child is hit, continue with that child. */
+ int r = __bscf(child_mask);
+ if (child_mask == 0) {
+ node_addr = __float_as_int(cnodes[r]);
+ continue;
+ }
- /* Two children are hit, push far child, and continue with
- * closer child.
- */
- int c0 = __float_as_int(cnodes[r]);
- float d0 = ((float*)&dist)[r];
- r = __bscf(child_mask);
- int c1 = __float_as_int(cnodes[r]);
- float d1 = ((float*)&dist)[r];
- if(child_mask == 0) {
- if(d1 < d0) {
- node_addr = c1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
- continue;
- }
- else {
- node_addr = c0;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- continue;
- }
- }
+ /* Two children are hit, push far child, and continue with
+ * closer child.
+ */
+ int c0 = __float_as_int(cnodes[r]);
+ float d0 = ((float *)&dist)[r];
+ r = __bscf(child_mask);
+ int c1 = __float_as_int(cnodes[r]);
+ float d1 = ((float *)&dist)[r];
+ if (child_mask == 0) {
+ if (d1 < d0) {
+ node_addr = c1;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c0;
+ traversal_stack[stack_ptr].dist = d0;
+ continue;
+ }
+ else {
+ node_addr = c0;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c1;
+ traversal_stack[stack_ptr].dist = d1;
+ continue;
+ }
+ }
- /* Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there.
- */
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
+ /* Here starts the slow path for 3 or 4 hit children. We push
+ * all nodes onto the stack to sort them there.
+ */
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c1;
+ traversal_stack[stack_ptr].dist = d1;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c0;
+ traversal_stack[stack_ptr].dist = d0;
- /* Three children are hit, push all onto stack and sort 3
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c2 = __float_as_int(cnodes[r]);
- float d2 = ((float*)&dist)[r];
- if(child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- qbvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
+ /* Three children are hit, push all onto stack and sort 3
+ * stack items, continue with closest child.
+ */
+ r = __bscf(child_mask);
+ int c2 = __float_as_int(cnodes[r]);
+ float d2 = ((float *)&dist)[r];
+ if (child_mask == 0) {
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c2;
+ traversal_stack[stack_ptr].dist = d2;
+ qbvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2]);
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ continue;
+ }
- /* Four children are hit, push all onto stack and sort 4
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c3 = __float_as_int(cnodes[r]);
- float d3 = ((float*)&dist)[r];
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- qbvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3]);
- }
+ /* Four children are hit, push all onto stack and sort 4
+ * stack items, continue with closest child.
+ */
+ r = __bscf(child_mask);
+ int c3 = __float_as_int(cnodes[r]);
+ float d3 = ((float *)&dist)[r];
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c3;
+ traversal_stack[stack_ptr].dist = d3;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c2;
+ traversal_stack[stack_ptr].dist = d2;
+ qbvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2],
+ &traversal_stack[stack_ptr - 3]);
+ }
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ }
- /* If node is leaf, fetch triangle list. */
- if(node_addr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
- int prim_addr = __float_as_int(leaf.x);
+ /* If node is leaf, fetch triangle list. */
+ if (node_addr < 0) {
+ float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
+ int prim_addr = __float_as_int(leaf.x);
- int prim_addr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
+ int prim_addr2 = __float_as_int(leaf.y);
+ const uint type = __float_as_int(leaf.w);
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
+ /* Pop. */
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
- /* Primitive intersection. */
- switch(type & PRIMITIVE_ALL) {
- case PRIMITIVE_TRIANGLE: {
- /* Intersect ray against primitive, */
- for(; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- if(triangle_intersect_local(kg,
- local_isect,
- P,
- dir,
- object,
- local_object,
- prim_addr,
- isect_t,
- lcg_state,
- max_hits)) {
- return true;
- }
- }
- break;
- }
+ /* Primitive intersection. */
+ switch (type & PRIMITIVE_ALL) {
+ case PRIMITIVE_TRIANGLE: {
+ /* Intersect ray against primitive, */
+ for (; prim_addr < prim_addr2; prim_addr++) {
+ kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+ if (triangle_intersect_local(kg,
+ local_isect,
+ P,
+ dir,
+ object,
+ local_object,
+ prim_addr,
+ isect_t,
+ lcg_state,
+ max_hits)) {
+ return true;
+ }
+ }
+ break;
+ }
#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- /* Intersect ray against primitive. */
- for(; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- if(motion_triangle_intersect_local(kg,
- local_isect,
- P,
- dir,
- ray->time,
- object,
- local_object,
- prim_addr,
- isect_t,
- lcg_state,
- max_hits)) {
- return true;
- }
- }
- break;
- }
+ case PRIMITIVE_MOTION_TRIANGLE: {
+ /* Intersect ray against primitive. */
+ for (; prim_addr < prim_addr2; prim_addr++) {
+ kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+ if (motion_triangle_intersect_local(kg,
+ local_isect,
+ P,
+ dir,
+ ray->time,
+ object,
+ local_object,
+ prim_addr,
+ isect_t,
+ lcg_state,
+ max_hits)) {
+ return true;
+ }
+ }
+ break;
+ }
#endif
- default:
- break;
- }
- }
- } while(node_addr != ENTRYPOINT_SENTINEL);
- } while(node_addr != ENTRYPOINT_SENTINEL);
+ default:
+ break;
+ }
+ }
+ } while (node_addr != ENTRYPOINT_SENTINEL);
+ } while (node_addr != ENTRYPOINT_SENTINEL);
- return false;
+ return false;
}
#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/qbvh_nodes.h b/intern/cycles/kernel/bvh/qbvh_nodes.h
index 2e622af1758..7c1d8c8c72e 100644
--- a/intern/cycles/kernel/bvh/qbvh_nodes.h
+++ b/intern/cycles/kernel/bvh/qbvh_nodes.h
@@ -17,11 +17,11 @@
*/
struct QBVHStackItem {
- int addr;
- float dist;
+ int addr;
+ float dist;
};
-ccl_device_inline void qbvh_near_far_idx_calc(const float3& idir,
+ccl_device_inline void qbvh_near_far_idx_calc(const float3 &idir,
int *ccl_restrict near_x,
int *ccl_restrict near_y,
int *ccl_restrict near_z,
@@ -31,44 +31,76 @@ ccl_device_inline void qbvh_near_far_idx_calc(const float3& idir,
{
#ifdef __KERNEL_SSE__
- *near_x = 0; *far_x = 1;
- *near_y = 2; *far_y = 3;
- *near_z = 4; *far_z = 5;
-
- const size_t mask = movemask(ssef(idir.m128));
-
- const int mask_x = mask & 1;
- const int mask_y = (mask & 2) >> 1;
- const int mask_z = (mask & 4) >> 2;
-
- *near_x += mask_x; *far_x -= mask_x;
- *near_y += mask_y; *far_y -= mask_y;
- *near_z += mask_z; *far_z -= mask_z;
+ *near_x = 0;
+ *far_x = 1;
+ *near_y = 2;
+ *far_y = 3;
+ *near_z = 4;
+ *far_z = 5;
+
+ const size_t mask = movemask(ssef(idir.m128));
+
+ const int mask_x = mask & 1;
+ const int mask_y = (mask & 2) >> 1;
+ const int mask_z = (mask & 4) >> 2;
+
+ *near_x += mask_x;
+ *far_x -= mask_x;
+ *near_y += mask_y;
+ *far_y -= mask_y;
+ *near_z += mask_z;
+ *far_z -= mask_z;
#else
- if(idir.x >= 0.0f) { *near_x = 0; *far_x = 1; } else { *near_x = 1; *far_x = 0; }
- if(idir.y >= 0.0f) { *near_y = 2; *far_y = 3; } else { *near_y = 3; *far_y = 2; }
- if(idir.z >= 0.0f) { *near_z = 4; *far_z = 5; } else { *near_z = 5; *far_z = 4; }
+ if (idir.x >= 0.0f) {
+ *near_x = 0;
+ *far_x = 1;
+ }
+ else {
+ *near_x = 1;
+ *far_x = 0;
+ }
+ if (idir.y >= 0.0f) {
+ *near_y = 2;
+ *far_y = 3;
+ }
+ else {
+ *near_y = 3;
+ *far_y = 2;
+ }
+ if (idir.z >= 0.0f) {
+ *near_z = 4;
+ *far_z = 5;
+ }
+ else {
+ *near_z = 5;
+ *far_z = 4;
+ }
#endif
}
/* TOOD(sergey): Investigate if using intrinsics helps for both
* stack item swap and float comparison.
*/
-ccl_device_inline void qbvh_item_swap(QBVHStackItem *ccl_restrict a,
- QBVHStackItem *ccl_restrict b)
+ccl_device_inline void qbvh_item_swap(QBVHStackItem *ccl_restrict a, QBVHStackItem *ccl_restrict b)
{
- QBVHStackItem tmp = *a;
- *a = *b;
- *b = tmp;
+ QBVHStackItem tmp = *a;
+ *a = *b;
+ *b = tmp;
}
ccl_device_inline void qbvh_stack_sort(QBVHStackItem *ccl_restrict s1,
QBVHStackItem *ccl_restrict s2,
QBVHStackItem *ccl_restrict s3)
{
- if(s2->dist < s1->dist) { qbvh_item_swap(s2, s1); }
- if(s3->dist < s2->dist) { qbvh_item_swap(s3, s2); }
- if(s2->dist < s1->dist) { qbvh_item_swap(s2, s1); }
+ if (s2->dist < s1->dist) {
+ qbvh_item_swap(s2, s1);
+ }
+ if (s3->dist < s2->dist) {
+ qbvh_item_swap(s3, s2);
+ }
+ if (s2->dist < s1->dist) {
+ qbvh_item_swap(s2, s1);
+ }
}
ccl_device_inline void qbvh_stack_sort(QBVHStackItem *ccl_restrict s1,
@@ -76,279 +108,283 @@ ccl_device_inline void qbvh_stack_sort(QBVHStackItem *ccl_restrict s1,
QBVHStackItem *ccl_restrict s3,
QBVHStackItem *ccl_restrict s4)
{
- if(s2->dist < s1->dist) { qbvh_item_swap(s2, s1); }
- if(s4->dist < s3->dist) { qbvh_item_swap(s4, s3); }
- if(s3->dist < s1->dist) { qbvh_item_swap(s3, s1); }
- if(s4->dist < s2->dist) { qbvh_item_swap(s4, s2); }
- if(s3->dist < s2->dist) { qbvh_item_swap(s3, s2); }
+ if (s2->dist < s1->dist) {
+ qbvh_item_swap(s2, s1);
+ }
+ if (s4->dist < s3->dist) {
+ qbvh_item_swap(s4, s3);
+ }
+ if (s3->dist < s1->dist) {
+ qbvh_item_swap(s3, s1);
+ }
+ if (s4->dist < s2->dist) {
+ qbvh_item_swap(s4, s2);
+ }
+ if (s3->dist < s2->dist) {
+ qbvh_item_swap(s3, s2);
+ }
}
/* Axis-aligned nodes intersection */
//ccl_device_inline int qbvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg,
static int qbvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg,
- const ssef& isect_near,
- const ssef& isect_far,
+ const ssef &isect_near,
+ const ssef &isect_far,
#ifdef __KERNEL_AVX2__
- const sse3f& org_idir,
+ const sse3f &org_idir,
#else
- const sse3f& org,
+ const sse3f &org,
#endif
- const sse3f& idir,
- const int near_x,
- const int near_y,
- const int near_z,
- const int far_x,
- const int far_y,
- const int far_z,
- const int node_addr,
- ssef *ccl_restrict dist)
+ const sse3f &idir,
+ const int near_x,
+ const int near_y,
+ const int near_z,
+ const int far_x,
+ const int far_y,
+ const int far_z,
+ const int node_addr,
+ ssef *ccl_restrict dist)
{
- const int offset = node_addr + 1;
+ const int offset = node_addr + 1;
#ifdef __KERNEL_AVX2__
- const ssef tnear_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x), idir.x, org_idir.x);
- const ssef tnear_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y), idir.y, org_idir.y);
- const ssef tnear_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z), idir.z, org_idir.z);
- const ssef tfar_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x), idir.x, org_idir.x);
- const ssef tfar_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y), idir.y, org_idir.y);
- const ssef tfar_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z), idir.z, org_idir.z);
+ const ssef tnear_x = msub(
+ kernel_tex_fetch_ssef(__bvh_nodes, offset + near_x), idir.x, org_idir.x);
+ const ssef tnear_y = msub(
+ kernel_tex_fetch_ssef(__bvh_nodes, offset + near_y), idir.y, org_idir.y);
+ const ssef tnear_z = msub(
+ kernel_tex_fetch_ssef(__bvh_nodes, offset + near_z), idir.z, org_idir.z);
+ const ssef tfar_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + far_x), idir.x, org_idir.x);
+ const ssef tfar_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + far_y), idir.y, org_idir.y);
+ const ssef tfar_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + far_z), idir.z, org_idir.z);
#else
- const ssef tnear_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x) - org.x) * idir.x;
- const ssef tnear_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y) - org.y) * idir.y;
- const ssef tnear_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z) - org.z) * idir.z;
- const ssef tfar_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x) - org.x) * idir.x;
- const ssef tfar_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y) - org.y) * idir.y;
- const ssef tfar_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z) - org.z) * idir.z;
+ const ssef tnear_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset + near_x) - org.x) * idir.x;
+ const ssef tnear_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset + near_y) - org.y) * idir.y;
+ const ssef tnear_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset + near_z) - org.z) * idir.z;
+ const ssef tfar_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset + far_x) - org.x) * idir.x;
+ const ssef tfar_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset + far_y) - org.y) * idir.y;
+ const ssef tfar_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset + far_z) - org.z) * idir.z;
#endif
#ifdef __KERNEL_SSE41__
- const ssef tnear = maxi(maxi(tnear_x, tnear_y), maxi(tnear_z, isect_near));
- const ssef tfar = mini(mini(tfar_x, tfar_y), mini(tfar_z, isect_far));
- const sseb vmask = cast(tnear) > cast(tfar);
- int mask = (int)movemask(vmask)^0xf;
+ const ssef tnear = maxi(maxi(tnear_x, tnear_y), maxi(tnear_z, isect_near));
+ const ssef tfar = mini(mini(tfar_x, tfar_y), mini(tfar_z, isect_far));
+ const sseb vmask = cast(tnear) > cast(tfar);
+ int mask = (int)movemask(vmask) ^ 0xf;
#else
- const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
- const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
- const sseb vmask = tnear <= tfar;
- int mask = (int)movemask(vmask);
+ const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
+ const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
+ const sseb vmask = tnear <= tfar;
+ int mask = (int)movemask(vmask);
#endif
- *dist = tnear;
- return mask;
+ *dist = tnear;
+ return mask;
}
-ccl_device_inline int qbvh_aligned_node_intersect_robust(
- KernelGlobals *ccl_restrict kg,
- const ssef& isect_near,
- const ssef& isect_far,
+ccl_device_inline int qbvh_aligned_node_intersect_robust(KernelGlobals *ccl_restrict kg,
+ const ssef &isect_near,
+ const ssef &isect_far,
#ifdef __KERNEL_AVX2__
- const sse3f& P_idir,
+ const sse3f &P_idir,
#else
- const sse3f& P,
+ const sse3f &P,
#endif
- const sse3f& idir,
- const int near_x,
- const int near_y,
- const int near_z,
- const int far_x,
- const int far_y,
- const int far_z,
- const int node_addr,
- const float difl,
- ssef *ccl_restrict dist)
+ const sse3f &idir,
+ const int near_x,
+ const int near_y,
+ const int near_z,
+ const int far_x,
+ const int far_y,
+ const int far_z,
+ const int node_addr,
+ const float difl,
+ ssef *ccl_restrict dist)
{
- const int offset = node_addr + 1;
+ const int offset = node_addr + 1;
#ifdef __KERNEL_AVX2__
- const ssef tnear_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x), idir.x, P_idir.x);
- const ssef tnear_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y), idir.y, P_idir.y);
- const ssef tnear_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z), idir.z, P_idir.z);
- const ssef tfar_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x), idir.x, P_idir.x);
- const ssef tfar_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y), idir.y, P_idir.y);
- const ssef tfar_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z), idir.z, P_idir.z);
+ const ssef tnear_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + near_x), idir.x, P_idir.x);
+ const ssef tnear_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + near_y), idir.y, P_idir.y);
+ const ssef tnear_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + near_z), idir.z, P_idir.z);
+ const ssef tfar_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + far_x), idir.x, P_idir.x);
+ const ssef tfar_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + far_y), idir.y, P_idir.y);
+ const ssef tfar_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + far_z), idir.z, P_idir.z);
#else
- const ssef tnear_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x) - P.x) * idir.x;
- const ssef tnear_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y) - P.y) * idir.y;
- const ssef tnear_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z) - P.z) * idir.z;
- const ssef tfar_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x) - P.x) * idir.x;
- const ssef tfar_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y) - P.y) * idir.y;
- const ssef tfar_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z) - P.z) * idir.z;
+ const ssef tnear_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset + near_x) - P.x) * idir.x;
+ const ssef tnear_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset + near_y) - P.y) * idir.y;
+ const ssef tnear_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset + near_z) - P.z) * idir.z;
+ const ssef tfar_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset + far_x) - P.x) * idir.x;
+ const ssef tfar_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset + far_y) - P.y) * idir.y;
+ const ssef tfar_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset + far_z) - P.z) * idir.z;
#endif
- const float round_down = 1.0f - difl;
- const float round_up = 1.0f + difl;
- const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
- const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
- const sseb vmask = round_down*tnear <= round_up*tfar;
- *dist = tnear;
- return (int)movemask(vmask);
+ const float round_down = 1.0f - difl;
+ const float round_up = 1.0f + difl;
+ const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
+ const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
+ const sseb vmask = round_down * tnear <= round_up * tfar;
+ *dist = tnear;
+ return (int)movemask(vmask);
}
/* Unaligned nodes intersection */
-ccl_device_inline int qbvh_unaligned_node_intersect(
- KernelGlobals *ccl_restrict kg,
- const ssef& isect_near,
- const ssef& isect_far,
+ccl_device_inline int qbvh_unaligned_node_intersect(KernelGlobals *ccl_restrict kg,
+ const ssef &isect_near,
+ const ssef &isect_far,
#ifdef __KERNEL_AVX2__
- const sse3f& org_idir,
+ const sse3f &org_idir,
#endif
- const sse3f& org,
- const sse3f& dir,
- const sse3f& idir,
- const int near_x,
- const int near_y,
- const int near_z,
- const int far_x,
- const int far_y,
- const int far_z,
- const int node_addr,
- ssef *ccl_restrict dist)
+ const sse3f &org,
+ const sse3f &dir,
+ const sse3f &idir,
+ const int near_x,
+ const int near_y,
+ const int near_z,
+ const int far_x,
+ const int far_y,
+ const int far_z,
+ const int node_addr,
+ ssef *ccl_restrict dist)
{
- const int offset = node_addr;
- const ssef tfm_x_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+1);
- const ssef tfm_x_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+2);
- const ssef tfm_x_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+3);
+ const int offset = node_addr;
+ const ssef tfm_x_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 1);
+ const ssef tfm_x_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 2);
+ const ssef tfm_x_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 3);
- const ssef tfm_y_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+4);
- const ssef tfm_y_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+5);
- const ssef tfm_y_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+6);
+ const ssef tfm_y_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 4);
+ const ssef tfm_y_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 5);
+ const ssef tfm_y_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 6);
- const ssef tfm_z_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+7);
- const ssef tfm_z_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+8);
- const ssef tfm_z_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+9);
+ const ssef tfm_z_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 7);
+ const ssef tfm_z_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 8);
+ const ssef tfm_z_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 9);
- const ssef tfm_t_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+10);
- const ssef tfm_t_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+11);
- const ssef tfm_t_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+12);
+ const ssef tfm_t_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 10);
+ const ssef tfm_t_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 11);
+ const ssef tfm_t_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 12);
- const ssef aligned_dir_x = dir.x*tfm_x_x + dir.y*tfm_x_y + dir.z*tfm_x_z,
- aligned_dir_y = dir.x*tfm_y_x + dir.y*tfm_y_y + dir.z*tfm_y_z,
- aligned_dir_z = dir.x*tfm_z_x + dir.y*tfm_z_y + dir.z*tfm_z_z;
+ const ssef aligned_dir_x = dir.x * tfm_x_x + dir.y * tfm_x_y + dir.z * tfm_x_z,
+ aligned_dir_y = dir.x * tfm_y_x + dir.y * tfm_y_y + dir.z * tfm_y_z,
+ aligned_dir_z = dir.x * tfm_z_x + dir.y * tfm_z_y + dir.z * tfm_z_z;
- const ssef aligned_P_x = org.x*tfm_x_x + org.y*tfm_x_y + org.z*tfm_x_z + tfm_t_x,
- aligned_P_y = org.x*tfm_y_x + org.y*tfm_y_y + org.z*tfm_y_z + tfm_t_y,
- aligned_P_z = org.x*tfm_z_x + org.y*tfm_z_y + org.z*tfm_z_z + tfm_t_z;
+ const ssef aligned_P_x = org.x * tfm_x_x + org.y * tfm_x_y + org.z * tfm_x_z + tfm_t_x,
+ aligned_P_y = org.x * tfm_y_x + org.y * tfm_y_y + org.z * tfm_y_z + tfm_t_y,
+ aligned_P_z = org.x * tfm_z_x + org.y * tfm_z_y + org.z * tfm_z_z + tfm_t_z;
- const ssef neg_one(-1.0f, -1.0f, -1.0f, -1.0f);
- const ssef nrdir_x = neg_one / aligned_dir_x,
- nrdir_y = neg_one / aligned_dir_y,
- nrdir_z = neg_one / aligned_dir_z;
+ const ssef neg_one(-1.0f, -1.0f, -1.0f, -1.0f);
+ const ssef nrdir_x = neg_one / aligned_dir_x, nrdir_y = neg_one / aligned_dir_y,
+ nrdir_z = neg_one / aligned_dir_z;
- const ssef tlower_x = aligned_P_x * nrdir_x,
- tlower_y = aligned_P_y * nrdir_y,
- tlower_z = aligned_P_z * nrdir_z;
+ const ssef tlower_x = aligned_P_x * nrdir_x, tlower_y = aligned_P_y * nrdir_y,
+ tlower_z = aligned_P_z * nrdir_z;
- const ssef tupper_x = tlower_x - nrdir_x,
- tupper_y = tlower_y - nrdir_y,
- tupper_z = tlower_z - nrdir_z;
+ const ssef tupper_x = tlower_x - nrdir_x, tupper_y = tlower_y - nrdir_y,
+ tupper_z = tlower_z - nrdir_z;
#ifdef __KERNEL_SSE41__
- const ssef tnear_x = mini(tlower_x, tupper_x);
- const ssef tnear_y = mini(tlower_y, tupper_y);
- const ssef tnear_z = mini(tlower_z, tupper_z);
- const ssef tfar_x = maxi(tlower_x, tupper_x);
- const ssef tfar_y = maxi(tlower_y, tupper_y);
- const ssef tfar_z = maxi(tlower_z, tupper_z);
- const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
- const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
- const sseb vmask = tnear <= tfar;
- *dist = tnear;
- return movemask(vmask);
+ const ssef tnear_x = mini(tlower_x, tupper_x);
+ const ssef tnear_y = mini(tlower_y, tupper_y);
+ const ssef tnear_z = mini(tlower_z, tupper_z);
+ const ssef tfar_x = maxi(tlower_x, tupper_x);
+ const ssef tfar_y = maxi(tlower_y, tupper_y);
+ const ssef tfar_z = maxi(tlower_z, tupper_z);
+ const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
+ const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
+ const sseb vmask = tnear <= tfar;
+ *dist = tnear;
+ return movemask(vmask);
#else
- const ssef tnear_x = min(tlower_x, tupper_x);
- const ssef tnear_y = min(tlower_y, tupper_y);
- const ssef tnear_z = min(tlower_z, tupper_z);
- const ssef tfar_x = max(tlower_x, tupper_x);
- const ssef tfar_y = max(tlower_y, tupper_y);
- const ssef tfar_z = max(tlower_z, tupper_z);
- const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
- const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
- const sseb vmask = tnear <= tfar;
- *dist = tnear;
- return movemask(vmask);
+ const ssef tnear_x = min(tlower_x, tupper_x);
+ const ssef tnear_y = min(tlower_y, tupper_y);
+ const ssef tnear_z = min(tlower_z, tupper_z);
+ const ssef tfar_x = max(tlower_x, tupper_x);
+ const ssef tfar_y = max(tlower_y, tupper_y);
+ const ssef tfar_z = max(tlower_z, tupper_z);
+ const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
+ const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
+ const sseb vmask = tnear <= tfar;
+ *dist = tnear;
+ return movemask(vmask);
#endif
}
-ccl_device_inline int qbvh_unaligned_node_intersect_robust(
- KernelGlobals *ccl_restrict kg,
- const ssef& isect_near,
- const ssef& isect_far,
+ccl_device_inline int qbvh_unaligned_node_intersect_robust(KernelGlobals *ccl_restrict kg,
+ const ssef &isect_near,
+ const ssef &isect_far,
#ifdef __KERNEL_AVX2__
- const sse3f& P_idir,
+ const sse3f &P_idir,
#endif
- const sse3f& P,
- const sse3f& dir,
- const sse3f& idir,
- const int near_x,
- const int near_y,
- const int near_z,
- const int far_x,
- const int far_y,
- const int far_z,
- const int node_addr,
- const float difl,
- ssef *ccl_restrict dist)
+ const sse3f &P,
+ const sse3f &dir,
+ const sse3f &idir,
+ const int near_x,
+ const int near_y,
+ const int near_z,
+ const int far_x,
+ const int far_y,
+ const int far_z,
+ const int node_addr,
+ const float difl,
+ ssef *ccl_restrict dist)
{
- const int offset = node_addr;
- const ssef tfm_x_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+1);
- const ssef tfm_x_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+2);
- const ssef tfm_x_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+3);
+ const int offset = node_addr;
+ const ssef tfm_x_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 1);
+ const ssef tfm_x_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 2);
+ const ssef tfm_x_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 3);
- const ssef tfm_y_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+4);
- const ssef tfm_y_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+5);
- const ssef tfm_y_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+6);
+ const ssef tfm_y_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 4);
+ const ssef tfm_y_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 5);
+ const ssef tfm_y_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 6);
- const ssef tfm_z_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+7);
- const ssef tfm_z_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+8);
- const ssef tfm_z_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+9);
+ const ssef tfm_z_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 7);
+ const ssef tfm_z_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 8);
+ const ssef tfm_z_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 9);
- const ssef tfm_t_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+10);
- const ssef tfm_t_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+11);
- const ssef tfm_t_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+12);
+ const ssef tfm_t_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 10);
+ const ssef tfm_t_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 11);
+ const ssef tfm_t_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 12);
- const ssef aligned_dir_x = dir.x*tfm_x_x + dir.y*tfm_x_y + dir.z*tfm_x_z,
- aligned_dir_y = dir.x*tfm_y_x + dir.y*tfm_y_y + dir.z*tfm_y_z,
- aligned_dir_z = dir.x*tfm_z_x + dir.y*tfm_z_y + dir.z*tfm_z_z;
+ const ssef aligned_dir_x = dir.x * tfm_x_x + dir.y * tfm_x_y + dir.z * tfm_x_z,
+ aligned_dir_y = dir.x * tfm_y_x + dir.y * tfm_y_y + dir.z * tfm_y_z,
+ aligned_dir_z = dir.x * tfm_z_x + dir.y * tfm_z_y + dir.z * tfm_z_z;
- const ssef aligned_P_x = P.x*tfm_x_x + P.y*tfm_x_y + P.z*tfm_x_z + tfm_t_x,
- aligned_P_y = P.x*tfm_y_x + P.y*tfm_y_y + P.z*tfm_y_z + tfm_t_y,
- aligned_P_z = P.x*tfm_z_x + P.y*tfm_z_y + P.z*tfm_z_z + tfm_t_z;
+ const ssef aligned_P_x = P.x * tfm_x_x + P.y * tfm_x_y + P.z * tfm_x_z + tfm_t_x,
+ aligned_P_y = P.x * tfm_y_x + P.y * tfm_y_y + P.z * tfm_y_z + tfm_t_y,
+ aligned_P_z = P.x * tfm_z_x + P.y * tfm_z_y + P.z * tfm_z_z + tfm_t_z;
- const ssef neg_one(-1.0f, -1.0f, -1.0f, -1.0f);
- const ssef nrdir_x = neg_one / aligned_dir_x,
- nrdir_y = neg_one / aligned_dir_y,
- nrdir_z = neg_one / aligned_dir_z;
+ const ssef neg_one(-1.0f, -1.0f, -1.0f, -1.0f);
+ const ssef nrdir_x = neg_one / aligned_dir_x, nrdir_y = neg_one / aligned_dir_y,
+ nrdir_z = neg_one / aligned_dir_z;
- const ssef tlower_x = aligned_P_x * nrdir_x,
- tlower_y = aligned_P_y * nrdir_y,
- tlower_z = aligned_P_z * nrdir_z;
+ const ssef tlower_x = aligned_P_x * nrdir_x, tlower_y = aligned_P_y * nrdir_y,
+ tlower_z = aligned_P_z * nrdir_z;
- const ssef tupper_x = tlower_x - nrdir_x,
- tupper_y = tlower_y - nrdir_y,
- tupper_z = tlower_z - nrdir_z;
+ const ssef tupper_x = tlower_x - nrdir_x, tupper_y = tlower_y - nrdir_y,
+ tupper_z = tlower_z - nrdir_z;
- const float round_down = 1.0f - difl;
- const float round_up = 1.0f + difl;
+ const float round_down = 1.0f - difl;
+ const float round_up = 1.0f + difl;
#ifdef __KERNEL_SSE41__
- const ssef tnear_x = mini(tlower_x, tupper_x);
- const ssef tnear_y = mini(tlower_y, tupper_y);
- const ssef tnear_z = mini(tlower_z, tupper_z);
- const ssef tfar_x = maxi(tlower_x, tupper_x);
- const ssef tfar_y = maxi(tlower_y, tupper_y);
- const ssef tfar_z = maxi(tlower_z, tupper_z);
+ const ssef tnear_x = mini(tlower_x, tupper_x);
+ const ssef tnear_y = mini(tlower_y, tupper_y);
+ const ssef tnear_z = mini(tlower_z, tupper_z);
+ const ssef tfar_x = maxi(tlower_x, tupper_x);
+ const ssef tfar_y = maxi(tlower_y, tupper_y);
+ const ssef tfar_z = maxi(tlower_z, tupper_z);
#else
- const ssef tnear_x = min(tlower_x, tupper_x);
- const ssef tnear_y = min(tlower_y, tupper_y);
- const ssef tnear_z = min(tlower_z, tupper_z);
- const ssef tfar_x = max(tlower_x, tupper_x);
- const ssef tfar_y = max(tlower_y, tupper_y);
- const ssef tfar_z = max(tlower_z, tupper_z);
+ const ssef tnear_x = min(tlower_x, tupper_x);
+ const ssef tnear_y = min(tlower_y, tupper_y);
+ const ssef tnear_z = min(tlower_z, tupper_z);
+ const ssef tfar_x = max(tlower_x, tupper_x);
+ const ssef tfar_y = max(tlower_y, tupper_y);
+ const ssef tfar_z = max(tlower_z, tupper_z);
#endif
- const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
- const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
- const sseb vmask = round_down*tnear <= round_up*tfar;
- *dist = tnear;
- return movemask(vmask);
+ const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
+ const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
+ const sseb vmask = round_down * tnear <= round_up * tfar;
+ *dist = tnear;
+ return movemask(vmask);
}
/* Intersectors wrappers.
@@ -356,111 +392,125 @@ ccl_device_inline int qbvh_unaligned_node_intersect_robust(
* They'll check node type and call appropriate intersection code.
*/
-ccl_device_inline int qbvh_node_intersect(
- KernelGlobals *ccl_restrict kg,
- const ssef& isect_near,
- const ssef& isect_far,
+ccl_device_inline int qbvh_node_intersect(KernelGlobals *ccl_restrict kg,
+ const ssef &isect_near,
+ const ssef &isect_far,
#ifdef __KERNEL_AVX2__
- const sse3f& org_idir,
+ const sse3f &org_idir,
#endif
- const sse3f& org,
- const sse3f& dir,
- const sse3f& idir,
- const int near_x,
- const int near_y,
- const int near_z,
- const int far_x,
- const int far_y,
- const int far_z,
- const int node_addr,
- ssef *ccl_restrict dist)
+ const sse3f &org,
+ const sse3f &dir,
+ const sse3f &idir,
+ const int near_x,
+ const int near_y,
+ const int near_z,
+ const int far_x,
+ const int far_y,
+ const int far_z,
+ const int node_addr,
+ ssef *ccl_restrict dist)
{
- const int offset = node_addr;
- const float4 node = kernel_tex_fetch(__bvh_nodes, offset);
- if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
- return qbvh_unaligned_node_intersect(kg,
- isect_near,
- isect_far,
+ const int offset = node_addr;
+ const float4 node = kernel_tex_fetch(__bvh_nodes, offset);
+ if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
+ return qbvh_unaligned_node_intersect(kg,
+ isect_near,
+ isect_far,
#ifdef __KERNEL_AVX2__
- org_idir,
+ org_idir,
#endif
- org,
- dir,
- idir,
- near_x, near_y, near_z,
- far_x, far_y, far_z,
- node_addr,
- dist);
- }
- else {
- return qbvh_aligned_node_intersect(kg,
- isect_near,
- isect_far,
+ org,
+ dir,
+ idir,
+ near_x,
+ near_y,
+ near_z,
+ far_x,
+ far_y,
+ far_z,
+ node_addr,
+ dist);
+ }
+ else {
+ return qbvh_aligned_node_intersect(kg,
+ isect_near,
+ isect_far,
#ifdef __KERNEL_AVX2__
- org_idir,
+ org_idir,
#else
- org,
+ org,
#endif
- idir,
- near_x, near_y, near_z,
- far_x, far_y, far_z,
- node_addr,
- dist);
- }
+ idir,
+ near_x,
+ near_y,
+ near_z,
+ far_x,
+ far_y,
+ far_z,
+ node_addr,
+ dist);
+ }
}
-ccl_device_inline int qbvh_node_intersect_robust(
- KernelGlobals *ccl_restrict kg,
- const ssef& isect_near,
- const ssef& isect_far,
+ccl_device_inline int qbvh_node_intersect_robust(KernelGlobals *ccl_restrict kg,
+ const ssef &isect_near,
+ const ssef &isect_far,
#ifdef __KERNEL_AVX2__
- const sse3f& P_idir,
+ const sse3f &P_idir,
#endif
- const sse3f& P,
- const sse3f& dir,
- const sse3f& idir,
- const int near_x,
- const int near_y,
- const int near_z,
- const int far_x,
- const int far_y,
- const int far_z,
- const int node_addr,
- const float difl,
- ssef *ccl_restrict dist)
+ const sse3f &P,
+ const sse3f &dir,
+ const sse3f &idir,
+ const int near_x,
+ const int near_y,
+ const int near_z,
+ const int far_x,
+ const int far_y,
+ const int far_z,
+ const int node_addr,
+ const float difl,
+ ssef *ccl_restrict dist)
{
- const int offset = node_addr;
- const float4 node = kernel_tex_fetch(__bvh_nodes, offset);
- if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
- return qbvh_unaligned_node_intersect_robust(kg,
- isect_near,
- isect_far,
+ const int offset = node_addr;
+ const float4 node = kernel_tex_fetch(__bvh_nodes, offset);
+ if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
+ return qbvh_unaligned_node_intersect_robust(kg,
+ isect_near,
+ isect_far,
#ifdef __KERNEL_AVX2__
- P_idir,
+ P_idir,
#endif
- P,
- dir,
- idir,
- near_x, near_y, near_z,
- far_x, far_y, far_z,
- node_addr,
- difl,
- dist);
- }
- else {
- return qbvh_aligned_node_intersect_robust(kg,
- isect_near,
- isect_far,
+ P,
+ dir,
+ idir,
+ near_x,
+ near_y,
+ near_z,
+ far_x,
+ far_y,
+ far_z,
+ node_addr,
+ difl,
+ dist);
+ }
+ else {
+ return qbvh_aligned_node_intersect_robust(kg,
+ isect_near,
+ isect_far,
#ifdef __KERNEL_AVX2__
- P_idir,
+ P_idir,
#else
- P,
+ P,
#endif
- idir,
- near_x, near_y, near_z,
- far_x, far_y, far_z,
- node_addr,
- difl,
- dist);
- }
+ idir,
+ near_x,
+ near_y,
+ near_z,
+ far_x,
+ far_y,
+ far_z,
+ node_addr,
+ difl,
+ dist);
+ }
}
diff --git a/intern/cycles/kernel/bvh/qbvh_shadow_all.h b/intern/cycles/kernel/bvh/qbvh_shadow_all.h
index dd977fb9e74..49e607bfbd0 100644
--- a/intern/cycles/kernel/bvh/qbvh_shadow_all.h
+++ b/intern/cycles/kernel/bvh/qbvh_shadow_all.h
@@ -36,439 +36,424 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
const uint max_hits,
uint *num_hits)
{
- /* TODO(sergey):
- * - Test if pushing distance on the stack helps.
- * - Likely and unlikely for if() statements.
- * - Test restrict attribute for pointers.
- */
-
- /* Traversal stack in CUDA thread-local memory. */
- QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
- traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
-
- /* Traversal variables in registers. */
- int stack_ptr = 0;
- int node_addr = kernel_data.bvh.root;
-
- /* Ray parameters in registers. */
- const float tmax = ray->t;
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
- float isect_t = tmax;
+ /* TODO(sergey):
+ * - Test if pushing distance on the stack helps.
+ * - Likely and unlikely for if() statements.
+ * - Test restrict attribute for pointers.
+ */
+
+ /* Traversal stack in CUDA thread-local memory. */
+ QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
+ traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
+
+ /* Traversal variables in registers. */
+ int stack_ptr = 0;
+ int node_addr = kernel_data.bvh.root;
+
+ /* Ray parameters in registers. */
+ const float tmax = ray->t;
+ float3 P = ray->P;
+ float3 dir = bvh_clamp_direction(ray->D);
+ float3 idir = bvh_inverse_direction(dir);
+ int object = OBJECT_NONE;
+ float isect_t = tmax;
#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
+ Transform ob_itfm;
#endif
- *num_hits = 0;
- isect_array->t = tmax;
-
+ *num_hits = 0;
+ isect_array->t = tmax;
#if BVH_FEATURE(BVH_INSTANCING)
- int num_hits_in_instance = 0;
+ int num_hits_in_instance = 0;
#endif
- ssef tnear(0.0f), tfar(isect_t);
+ ssef tnear(0.0f), tfar(isect_t);
#if BVH_FEATURE(BVH_HAIR)
- sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+ sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
#endif
- sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+ sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
#ifdef __KERNEL_AVX2__
- float3 P_idir = P*idir;
- sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
+ float3 P_idir = P * idir;
+ sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
#endif
#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
+ sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
#endif
- /* Offsets to select the side that becomes the lower or upper bound. */
- int near_x, near_y, near_z;
- int far_x, far_y, far_z;
- qbvh_near_far_idx_calc(idir,
- &near_x, &near_y, &near_z,
- &far_x, &far_y, &far_z);
-
- /* Traversal loop. */
- do {
- do {
- /* Traverse internal nodes. */
- while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
- float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
- (void) inodes;
-
- if(false
+ /* Offsets to select the side that becomes the lower or upper bound. */
+ int near_x, near_y, near_z;
+ int far_x, far_y, far_z;
+ qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
+
+ /* Traversal loop. */
+ do {
+ do {
+ /* Traverse internal nodes. */
+ while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
+ float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
+ (void)inodes;
+
+ if (false
#ifdef __VISIBILITY_FLAG__
- || ((__float_as_uint(inodes.x) & visibility) == 0)
+ || ((__float_as_uint(inodes.x) & visibility) == 0)
#endif
#if BVH_FEATURE(BVH_MOTION)
- || UNLIKELY(ray->time < inodes.y)
- || UNLIKELY(ray->time > inodes.z)
+ || UNLIKELY(ray->time < inodes.y) || UNLIKELY(ray->time > inodes.z)
#endif
- ) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- ssef dist;
- int child_mask = NODE_INTERSECT(kg,
- tnear,
- tfar,
+ ) {
+ /* Pop. */
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ continue;
+ }
+
+ ssef dist;
+ int child_mask = NODE_INTERSECT(kg,
+ tnear,
+ tfar,
#ifdef __KERNEL_AVX2__
- P_idir4,
+ P_idir4,
#endif
#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4,
+ org4,
#endif
#if BVH_FEATURE(BVH_HAIR)
- dir4,
+ dir4,
#endif
- idir4,
- near_x, near_y, near_z,
- far_x, far_y, far_z,
- node_addr,
- &dist);
-
- if(child_mask != 0) {
- float4 cnodes;
+ idir4,
+ near_x,
+ near_y,
+ near_z,
+ far_x,
+ far_y,
+ far_z,
+ node_addr,
+ &dist);
+
+ if (child_mask != 0) {
+ float4 cnodes;
#if BVH_FEATURE(BVH_HAIR)
- if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
- cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+13);
- }
- else
+ if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
+ cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 13);
+ }
+ else
#endif
- {
- cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+7);
- }
-
- /* One child is hit, continue with that child. */
- int r = __bscf(child_mask);
- if(child_mask == 0) {
- node_addr = __float_as_int(cnodes[r]);
- continue;
- }
-
- /* Two children are hit, push far child, and continue with
- * closer child.
- */
- int c0 = __float_as_int(cnodes[r]);
- float d0 = ((float*)&dist)[r];
- r = __bscf(child_mask);
- int c1 = __float_as_int(cnodes[r]);
- float d1 = ((float*)&dist)[r];
- if(child_mask == 0) {
- if(d1 < d0) {
- node_addr = c1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
- continue;
- }
- else {
- node_addr = c0;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- continue;
- }
- }
-
- /* Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there.
- */
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
-
- /* Three children are hit, push all onto stack and sort 3
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c2 = __float_as_int(cnodes[r]);
- float d2 = ((float*)&dist)[r];
- if(child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- qbvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Four children are hit, push all onto stack and sort 4
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c3 = __float_as_int(cnodes[r]);
- float d3 = ((float*)&dist)[r];
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- qbvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3]);
- }
-
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-
- /* If node is leaf, fetch triangle list. */
- if(node_addr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
+ {
+ cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 7);
+ }
+
+ /* One child is hit, continue with that child. */
+ int r = __bscf(child_mask);
+ if (child_mask == 0) {
+ node_addr = __float_as_int(cnodes[r]);
+ continue;
+ }
+
+ /* Two children are hit, push far child, and continue with
+ * closer child.
+ */
+ int c0 = __float_as_int(cnodes[r]);
+ float d0 = ((float *)&dist)[r];
+ r = __bscf(child_mask);
+ int c1 = __float_as_int(cnodes[r]);
+ float d1 = ((float *)&dist)[r];
+ if (child_mask == 0) {
+ if (d1 < d0) {
+ node_addr = c1;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c0;
+ traversal_stack[stack_ptr].dist = d0;
+ continue;
+ }
+ else {
+ node_addr = c0;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c1;
+ traversal_stack[stack_ptr].dist = d1;
+ continue;
+ }
+ }
+
+ /* Here starts the slow path for 3 or 4 hit children. We push
+ * all nodes onto the stack to sort them there.
+ */
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c1;
+ traversal_stack[stack_ptr].dist = d1;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c0;
+ traversal_stack[stack_ptr].dist = d0;
+
+ /* Three children are hit, push all onto stack and sort 3
+ * stack items, continue with closest child.
+ */
+ r = __bscf(child_mask);
+ int c2 = __float_as_int(cnodes[r]);
+ float d2 = ((float *)&dist)[r];
+ if (child_mask == 0) {
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c2;
+ traversal_stack[stack_ptr].dist = d2;
+ qbvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2]);
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ continue;
+ }
+
+ /* Four children are hit, push all onto stack and sort 4
+ * stack items, continue with closest child.
+ */
+ r = __bscf(child_mask);
+ int c3 = __float_as_int(cnodes[r]);
+ float d3 = ((float *)&dist)[r];
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c3;
+ traversal_stack[stack_ptr].dist = d3;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c2;
+ traversal_stack[stack_ptr].dist = d2;
+ qbvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2],
+ &traversal_stack[stack_ptr - 3]);
+ }
+
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ }
+
+ /* If node is leaf, fetch triangle list. */
+ if (node_addr < 0) {
+ float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
#ifdef __VISIBILITY_FLAG__
- if((__float_as_uint(leaf.z) & visibility) == 0) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
+ if ((__float_as_uint(leaf.z) & visibility) == 0) {
+ /* Pop. */
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ continue;
+ }
#endif
- int prim_addr = __float_as_int(leaf.x);
+ int prim_addr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
- if(prim_addr >= 0) {
+ if (prim_addr >= 0) {
#endif
- int prim_addr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
- const uint p_type = type & PRIMITIVE_ALL;
-
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
-
- /* Primitive intersection. */
- while(prim_addr < prim_addr2) {
- kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type);
- bool hit;
-
- /* todo: specialized intersect functions which don't fill in
- * isect unless needed and check SD_HAS_TRANSPARENT_SHADOW?
- * might give a few % performance improvement */
-
- switch(p_type) {
- case PRIMITIVE_TRIANGLE: {
- hit = triangle_intersect(kg,
- isect_array,
- P,
- dir,
- visibility,
- object,
- prim_addr);
- break;
- }
+ int prim_addr2 = __float_as_int(leaf.y);
+ const uint type = __float_as_int(leaf.w);
+ const uint p_type = type & PRIMITIVE_ALL;
+
+ /* Pop. */
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+
+ /* Primitive intersection. */
+ while (prim_addr < prim_addr2) {
+ kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type);
+ bool hit;
+
+ /* todo: specialized intersect functions which don't fill in
+ * isect unless needed and check SD_HAS_TRANSPARENT_SHADOW?
+ * might give a few % performance improvement */
+
+ switch (p_type) {
+ case PRIMITIVE_TRIANGLE: {
+ hit = triangle_intersect(kg, isect_array, P, dir, visibility, object, prim_addr);
+ break;
+ }
#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- hit = motion_triangle_intersect(kg,
- isect_array,
- P,
- dir,
- ray->time,
- visibility,
- object,
- prim_addr);
- break;
- }
+ case PRIMITIVE_MOTION_TRIANGLE: {
+ hit = motion_triangle_intersect(
+ kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
+ break;
+ }
#endif
#if BVH_FEATURE(BVH_HAIR)
- case PRIMITIVE_CURVE:
- case PRIMITIVE_MOTION_CURVE: {
- const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
- if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
- hit = cardinal_curve_intersect(kg,
- isect_array,
- P,
- dir,
- visibility,
- object,
- prim_addr,
- ray->time,
- curve_type,
- NULL,
- 0, 0);
- }
- else {
- hit = curve_intersect(kg,
- isect_array,
- P,
- dir,
- visibility,
- object,
- prim_addr,
- ray->time,
- curve_type,
- NULL,
- 0, 0);
- }
- break;
- }
+ case PRIMITIVE_CURVE:
+ case PRIMITIVE_MOTION_CURVE: {
+ const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
+ if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
+ hit = cardinal_curve_intersect(kg,
+ isect_array,
+ P,
+ dir,
+ visibility,
+ object,
+ prim_addr,
+ ray->time,
+ curve_type,
+ NULL,
+ 0,
+ 0);
+ }
+ else {
+ hit = curve_intersect(kg,
+ isect_array,
+ P,
+ dir,
+ visibility,
+ object,
+ prim_addr,
+ ray->time,
+ curve_type,
+ NULL,
+ 0,
+ 0);
+ }
+ break;
+ }
#endif
- default: {
- hit = false;
- break;
- }
- }
+ default: {
+ hit = false;
+ break;
+ }
+ }
- /* Shadow ray early termination. */
- if(hit) {
- /* detect if this surface has a shader with transparent shadows */
+ /* Shadow ray early termination. */
+ if (hit) {
+ /* detect if this surface has a shader with transparent shadows */
- /* todo: optimize so primitive visibility flag indicates if
- * the primitive has a transparent shadow shader? */
- int prim = kernel_tex_fetch(__prim_index, isect_array->prim);
- int shader = 0;
+ /* todo: optimize so primitive visibility flag indicates if
+ * the primitive has a transparent shadow shader? */
+ int prim = kernel_tex_fetch(__prim_index, isect_array->prim);
+ int shader = 0;
#ifdef __HAIR__
- if(kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
+ if (kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
#endif
- {
- shader = kernel_tex_fetch(__tri_shader, prim);
- }
+ {
+ shader = kernel_tex_fetch(__tri_shader, prim);
+ }
#ifdef __HAIR__
- else {
- float4 str = kernel_tex_fetch(__curves, prim);
- shader = __float_as_int(str.z);
- }
+ else {
+ float4 str = kernel_tex_fetch(__curves, prim);
+ shader = __float_as_int(str.z);
+ }
#endif
- int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
-
- /* if no transparent shadows, all light is blocked */
- if(!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
- return true;
- }
- /* if maximum number of hits reached, block all light */
- else if(*num_hits == max_hits) {
- return true;
- }
-
- /* move on to next entry in intersections array */
- isect_array++;
- (*num_hits)++;
+ int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
+
+ /* if no transparent shadows, all light is blocked */
+ if (!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
+ return true;
+ }
+ /* if maximum number of hits reached, block all light */
+ else if (*num_hits == max_hits) {
+ return true;
+ }
+
+ /* move on to next entry in intersections array */
+ isect_array++;
+ (*num_hits)++;
#if BVH_FEATURE(BVH_INSTANCING)
- num_hits_in_instance++;
+ num_hits_in_instance++;
#endif
- isect_array->t = isect_t;
- }
+ isect_array->t = isect_t;
+ }
- prim_addr++;
- }
- }
+ prim_addr++;
+ }
+ }
#if BVH_FEATURE(BVH_INSTANCING)
- else {
- /* Instance push. */
- object = kernel_tex_fetch(__prim_object, -prim_addr-1);
+ else {
+ /* Instance push. */
+ object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
# if BVH_FEATURE(BVH_MOTION)
- isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
+ isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
# else
- isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
+ isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
# endif
- num_hits_in_instance = 0;
- isect_array->t = isect_t;
+ num_hits_in_instance = 0;
+ isect_array->t = isect_t;
- qbvh_near_far_idx_calc(idir,
- &near_x, &near_y, &near_z,
- &far_x, &far_y, &far_z);
- tfar = ssef(isect_t);
+ qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
+ tfar = ssef(isect_t);
# if BVH_FEATURE(BVH_HAIR)
- dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+ dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
# endif
- idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+ idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
# ifdef __KERNEL_AVX2__
- P_idir = P*idir;
- P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
+ P_idir = P * idir;
+ P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
# endif
# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+ org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
# endif
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
-
- node_addr = kernel_tex_fetch(__object_node, object);
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
- }
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while(node_addr != ENTRYPOINT_SENTINEL);
+ node_addr = kernel_tex_fetch(__object_node, object);
+ }
+ }
+#endif /* FEATURE(BVH_INSTANCING) */
+ } while (node_addr != ENTRYPOINT_SENTINEL);
#if BVH_FEATURE(BVH_INSTANCING)
- if(stack_ptr >= 0) {
- kernel_assert(object != OBJECT_NONE);
+ if (stack_ptr >= 0) {
+ kernel_assert(object != OBJECT_NONE);
- /* Instance pop. */
- if(num_hits_in_instance) {
- float t_fac;
+ /* Instance pop. */
+ if (num_hits_in_instance) {
+ float t_fac;
# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
+ bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
# else
- bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
+ bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
# endif
- /* Scale isect->t to adjust for instancing. */
- for(int i = 0; i < num_hits_in_instance; i++) {
- (isect_array-i-1)->t *= t_fac;
- }
- }
- else {
+ /* Scale isect->t to adjust for instancing. */
+ for (int i = 0; i < num_hits_in_instance; i++) {
+ (isect_array - i - 1)->t *= t_fac;
+ }
+ }
+ else {
# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
+ bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
# else
- bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
+ bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
# endif
- }
+ }
- isect_t = tmax;
- isect_array->t = isect_t;
+ isect_t = tmax;
+ isect_array->t = isect_t;
- qbvh_near_far_idx_calc(idir,
- &near_x, &near_y, &near_z,
- &far_x, &far_y, &far_z);
- tfar = ssef(isect_t);
+ qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
+ tfar = ssef(isect_t);
# if BVH_FEATURE(BVH_HAIR)
- dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+ dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
# endif
- idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+ idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
# ifdef __KERNEL_AVX2__
- P_idir = P*idir;
- P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
+ P_idir = P * idir;
+ P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
# endif
# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+ org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
# endif
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while(node_addr != ENTRYPOINT_SENTINEL);
+ object = OBJECT_NONE;
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ }
+#endif /* FEATURE(BVH_INSTANCING) */
+ } while (node_addr != ENTRYPOINT_SENTINEL);
- return false;
+ return false;
}
#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/qbvh_traversal.h b/intern/cycles/kernel/bvh/qbvh_traversal.h
index 40cd57aad34..9ee0f7b5933 100644
--- a/intern/cycles/kernel/bvh/qbvh_traversal.h
+++ b/intern/cycles/kernel/bvh/qbvh_traversal.h
@@ -37,457 +37,446 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
Intersection *isect,
const uint visibility
#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
- ,uint *lcg_state,
+ ,
+ uint *lcg_state,
float difl,
float extmax
#endif
- )
+)
{
- /* TODO(sergey):
- * - Test if pushing distance on the stack helps (for non shadow rays).
- * - Separate version for shadow rays.
- * - Likely and unlikely for if() statements.
- * - Test restrict attribute for pointers.
- */
-
- /* Traversal stack in CUDA thread-local memory. */
- QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
- traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
- traversal_stack[0].dist = -FLT_MAX;
-
- /* Traversal variables in registers. */
- int stack_ptr = 0;
- int node_addr = kernel_data.bvh.root;
- float node_dist = -FLT_MAX;
-
- /* Ray parameters in registers. */
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
+ /* TODO(sergey):
+ * - Test if pushing distance on the stack helps (for non shadow rays).
+ * - Separate version for shadow rays.
+ * - Likely and unlikely for if() statements.
+ * - Test restrict attribute for pointers.
+ */
+
+ /* Traversal stack in CUDA thread-local memory. */
+ QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
+ traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
+ traversal_stack[0].dist = -FLT_MAX;
+
+ /* Traversal variables in registers. */
+ int stack_ptr = 0;
+ int node_addr = kernel_data.bvh.root;
+ float node_dist = -FLT_MAX;
+
+ /* Ray parameters in registers. */
+ float3 P = ray->P;
+ float3 dir = bvh_clamp_direction(ray->D);
+ float3 idir = bvh_inverse_direction(dir);
+ int object = OBJECT_NONE;
#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
+ Transform ob_itfm;
#endif
- isect->t = ray->t;
- isect->u = 0.0f;
- isect->v = 0.0f;
- isect->prim = PRIM_NONE;
- isect->object = OBJECT_NONE;
+ isect->t = ray->t;
+ isect->u = 0.0f;
+ isect->v = 0.0f;
+ isect->prim = PRIM_NONE;
+ isect->object = OBJECT_NONE;
- BVH_DEBUG_INIT();
+ BVH_DEBUG_INIT();
- ssef tnear(0.0f), tfar(ray->t);
+ ssef tnear(0.0f), tfar(ray->t);
#if BVH_FEATURE(BVH_HAIR)
- sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+ sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
#endif
- sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+ sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
#ifdef __KERNEL_AVX2__
- float3 P_idir = P*idir;
- sse3f P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
+ float3 P_idir = P * idir;
+ sse3f P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
#endif
#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- sse3f org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+ sse3f org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
#endif
- /* Offsets to select the side that becomes the lower or upper bound. */
- int near_x, near_y, near_z;
- int far_x, far_y, far_z;
- qbvh_near_far_idx_calc(idir,
- &near_x, &near_y, &near_z,
- &far_x, &far_y, &far_z);
-
- /* Traversal loop. */
- do {
- do {
- /* Traverse internal nodes. */
- while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
- float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
- (void) inodes;
-
- if(UNLIKELY(node_dist > isect->t)
+ /* Offsets to select the side that becomes the lower or upper bound. */
+ int near_x, near_y, near_z;
+ int far_x, far_y, far_z;
+ qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
+
+ /* Traversal loop. */
+ do {
+ do {
+ /* Traverse internal nodes. */
+ while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
+ float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
+ (void)inodes;
+
+ if (UNLIKELY(node_dist > isect->t)
#if BVH_FEATURE(BVH_MOTION)
- || UNLIKELY(ray->time < inodes.y)
- || UNLIKELY(ray->time > inodes.z)
+ || UNLIKELY(ray->time < inodes.y) || UNLIKELY(ray->time > inodes.z)
#endif
#ifdef __VISIBILITY_FLAG__
- || (__float_as_uint(inodes.x) & visibility) == 0
+ || (__float_as_uint(inodes.x) & visibility) == 0
#endif
- )
- {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- continue;
- }
+ ) {
+ /* Pop. */
+ node_addr = traversal_stack[stack_ptr].addr;
+ node_dist = traversal_stack[stack_ptr].dist;
+ --stack_ptr;
+ continue;
+ }
- int child_mask;
- ssef dist;
+ int child_mask;
+ ssef dist;
- BVH_DEBUG_NEXT_NODE();
+ BVH_DEBUG_NEXT_NODE();
#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
- if(difl != 0.0f) {
- /* NOTE: We extend all the child BB instead of fetching
- * and checking visibility flags for each of the,
- *
- * Need to test if doing opposite would be any faster.
- */
- child_mask = NODE_INTERSECT_ROBUST(kg,
- tnear,
- tfar,
+ if (difl != 0.0f) {
+ /* NOTE: We extend all the child BB instead of fetching
+ * and checking visibility flags for each of the,
+ *
+ * Need to test if doing opposite would be any faster.
+ */
+ child_mask = NODE_INTERSECT_ROBUST(kg,
+ tnear,
+ tfar,
# ifdef __KERNEL_AVX2__
- P_idir4,
+ P_idir4,
# endif
# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4,
+ org4,
# endif
# if BVH_FEATURE(BVH_HAIR)
- dir4,
+ dir4,
# endif
- idir4,
- near_x, near_y, near_z,
- far_x, far_y, far_z,
- node_addr,
- difl,
- &dist);
- }
- else
-#endif /* BVH_HAIR_MINIMUM_WIDTH */
- {
- child_mask = NODE_INTERSECT(kg,
- tnear,
- tfar,
+ idir4,
+ near_x,
+ near_y,
+ near_z,
+ far_x,
+ far_y,
+ far_z,
+ node_addr,
+ difl,
+ &dist);
+ }
+ else
+#endif /* BVH_HAIR_MINIMUM_WIDTH */
+ {
+ child_mask = NODE_INTERSECT(kg,
+ tnear,
+ tfar,
#ifdef __KERNEL_AVX2__
- P_idir4,
+ P_idir4,
#endif
#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4,
+ org4,
#endif
#if BVH_FEATURE(BVH_HAIR)
- dir4,
+ dir4,
#endif
- idir4,
- near_x, near_y, near_z,
- far_x, far_y, far_z,
- node_addr,
- &dist);
- }
-
- if(child_mask != 0) {
- float4 cnodes;
- /* TODO(sergey): Investigate whether moving cnodes upwards
- * gives a speedup (will be different cache pattern but will
- * avoid extra check here).
- */
+ idir4,
+ near_x,
+ near_y,
+ near_z,
+ far_x,
+ far_y,
+ far_z,
+ node_addr,
+ &dist);
+ }
+
+ if (child_mask != 0) {
+ float4 cnodes;
+ /* TODO(sergey): Investigate whether moving cnodes upwards
+ * gives a speedup (will be different cache pattern but will
+ * avoid extra check here).
+ */
#if BVH_FEATURE(BVH_HAIR)
- if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
- cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+13);
- }
- else
+ if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
+ cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 13);
+ }
+ else
#endif
- {
- cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+7);
- }
-
- /* One child is hit, continue with that child. */
- int r = __bscf(child_mask);
- float d0 = ((float*)&dist)[r];
- if(child_mask == 0) {
- node_addr = __float_as_int(cnodes[r]);
- node_dist = d0;
- continue;
- }
-
- /* Two children are hit, push far child, and continue with
- * closer child.
- */
- int c0 = __float_as_int(cnodes[r]);
- r = __bscf(child_mask);
- int c1 = __float_as_int(cnodes[r]);
- float d1 = ((float*)&dist)[r];
- if(child_mask == 0) {
- if(d1 < d0) {
- node_addr = c1;
- node_dist = d1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
- continue;
- }
- else {
- node_addr = c0;
- node_dist = d0;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- continue;
- }
- }
-
- /* Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there.
- */
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
-
- /* Three children are hit, push all onto stack and sort 3
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c2 = __float_as_int(cnodes[r]);
- float d2 = ((float*)&dist)[r];
- if(child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- qbvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2]);
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- continue;
- }
-
- /* Four children are hit, push all onto stack and sort 4
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c3 = __float_as_int(cnodes[r]);
- float d3 = ((float*)&dist)[r];
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- qbvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3]);
- }
-
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- }
-
- /* If node is leaf, fetch triangle list. */
- if(node_addr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
+ {
+ cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 7);
+ }
+
+ /* One child is hit, continue with that child. */
+ int r = __bscf(child_mask);
+ float d0 = ((float *)&dist)[r];
+ if (child_mask == 0) {
+ node_addr = __float_as_int(cnodes[r]);
+ node_dist = d0;
+ continue;
+ }
+
+ /* Two children are hit, push far child, and continue with
+ * closer child.
+ */
+ int c0 = __float_as_int(cnodes[r]);
+ r = __bscf(child_mask);
+ int c1 = __float_as_int(cnodes[r]);
+ float d1 = ((float *)&dist)[r];
+ if (child_mask == 0) {
+ if (d1 < d0) {
+ node_addr = c1;
+ node_dist = d1;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c0;
+ traversal_stack[stack_ptr].dist = d0;
+ continue;
+ }
+ else {
+ node_addr = c0;
+ node_dist = d0;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c1;
+ traversal_stack[stack_ptr].dist = d1;
+ continue;
+ }
+ }
+
+ /* Here starts the slow path for 3 or 4 hit children. We push
+ * all nodes onto the stack to sort them there.
+ */
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c1;
+ traversal_stack[stack_ptr].dist = d1;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c0;
+ traversal_stack[stack_ptr].dist = d0;
+
+ /* Three children are hit, push all onto stack and sort 3
+ * stack items, continue with closest child.
+ */
+ r = __bscf(child_mask);
+ int c2 = __float_as_int(cnodes[r]);
+ float d2 = ((float *)&dist)[r];
+ if (child_mask == 0) {
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c2;
+ traversal_stack[stack_ptr].dist = d2;
+ qbvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2]);
+ node_addr = traversal_stack[stack_ptr].addr;
+ node_dist = traversal_stack[stack_ptr].dist;
+ --stack_ptr;
+ continue;
+ }
+
+ /* Four children are hit, push all onto stack and sort 4
+ * stack items, continue with closest child.
+ */
+ r = __bscf(child_mask);
+ int c3 = __float_as_int(cnodes[r]);
+ float d3 = ((float *)&dist)[r];
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c3;
+ traversal_stack[stack_ptr].dist = d3;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c2;
+ traversal_stack[stack_ptr].dist = d2;
+ qbvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2],
+ &traversal_stack[stack_ptr - 3]);
+ }
+
+ node_addr = traversal_stack[stack_ptr].addr;
+ node_dist = traversal_stack[stack_ptr].dist;
+ --stack_ptr;
+ }
+
+ /* If node is leaf, fetch triangle list. */
+ if (node_addr < 0) {
+ float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
#ifdef __VISIBILITY_FLAG__
- if(UNLIKELY((node_dist > isect->t) ||
- ((__float_as_uint(leaf.z) & visibility) == 0)))
+ if (UNLIKELY((node_dist > isect->t) || ((__float_as_uint(leaf.z) & visibility) == 0)))
#else
- if(UNLIKELY((node_dist > isect->t)))
+ if (UNLIKELY((node_dist > isect->t)))
#endif
- {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- continue;
- }
+ {
+ /* Pop. */
+ node_addr = traversal_stack[stack_ptr].addr;
+ node_dist = traversal_stack[stack_ptr].dist;
+ --stack_ptr;
+ continue;
+ }
- int prim_addr = __float_as_int(leaf.x);
+ int prim_addr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
- if(prim_addr >= 0) {
+ if (prim_addr >= 0) {
#endif
- int prim_addr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
-
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
-
- /* Primitive intersection. */
- switch(type & PRIMITIVE_ALL) {
- case PRIMITIVE_TRIANGLE: {
- for(; prim_addr < prim_addr2; prim_addr++) {
- BVH_DEBUG_NEXT_INTERSECTION();
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- if(triangle_intersect(kg,
- isect,
- P,
- dir,
- visibility,
- object,
- prim_addr)) {
- tfar = ssef(isect->t);
- /* Shadow ray early termination. */
- if(visibility & PATH_RAY_SHADOW_OPAQUE) {
- return true;
- }
- }
- }
- break;
- }
+ int prim_addr2 = __float_as_int(leaf.y);
+ const uint type = __float_as_int(leaf.w);
+
+ /* Pop. */
+ node_addr = traversal_stack[stack_ptr].addr;
+ node_dist = traversal_stack[stack_ptr].dist;
+ --stack_ptr;
+
+ /* Primitive intersection. */
+ switch (type & PRIMITIVE_ALL) {
+ case PRIMITIVE_TRIANGLE: {
+ for (; prim_addr < prim_addr2; prim_addr++) {
+ BVH_DEBUG_NEXT_INTERSECTION();
+ kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+ if (triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr)) {
+ tfar = ssef(isect->t);
+ /* Shadow ray early termination. */
+ if (visibility & PATH_RAY_SHADOW_OPAQUE) {
+ return true;
+ }
+ }
+ }
+ break;
+ }
#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- for(; prim_addr < prim_addr2; prim_addr++) {
- BVH_DEBUG_NEXT_INTERSECTION();
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- if(motion_triangle_intersect(kg,
- isect,
- P,
- dir,
- ray->time,
- visibility,
- object,
- prim_addr)) {
- tfar = ssef(isect->t);
- /* Shadow ray early termination. */
- if(visibility & PATH_RAY_SHADOW_OPAQUE) {
- return true;
- }
- }
- }
- break;
- }
-#endif /* BVH_FEATURE(BVH_MOTION) */
+ case PRIMITIVE_MOTION_TRIANGLE: {
+ for (; prim_addr < prim_addr2; prim_addr++) {
+ BVH_DEBUG_NEXT_INTERSECTION();
+ kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+ if (motion_triangle_intersect(
+ kg, isect, P, dir, ray->time, visibility, object, prim_addr)) {
+ tfar = ssef(isect->t);
+ /* Shadow ray early termination. */
+ if (visibility & PATH_RAY_SHADOW_OPAQUE) {
+ return true;
+ }
+ }
+ }
+ break;
+ }
+#endif /* BVH_FEATURE(BVH_MOTION) */
#if BVH_FEATURE(BVH_HAIR)
- case PRIMITIVE_CURVE:
- case PRIMITIVE_MOTION_CURVE: {
- for(; prim_addr < prim_addr2; prim_addr++) {
- BVH_DEBUG_NEXT_INTERSECTION();
- const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
- kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL));
- bool hit;
- if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
- hit = cardinal_curve_intersect(kg,
- isect,
- P,
- dir,
- visibility,
- object,
- prim_addr,
- ray->time,
- curve_type,
- lcg_state,
- difl,
- extmax);
- }
- else {
- hit = curve_intersect(kg,
- isect,
- P,
- dir,
- visibility,
- object,
- prim_addr,
- ray->time,
- curve_type,
- lcg_state,
- difl,
- extmax);
- }
- if(hit) {
- tfar = ssef(isect->t);
- /* Shadow ray early termination. */
- if(visibility & PATH_RAY_SHADOW_OPAQUE) {
- return true;
- }
- }
- }
- break;
- }
-#endif /* BVH_FEATURE(BVH_HAIR) */
- }
- }
+ case PRIMITIVE_CURVE:
+ case PRIMITIVE_MOTION_CURVE: {
+ for (; prim_addr < prim_addr2; prim_addr++) {
+ BVH_DEBUG_NEXT_INTERSECTION();
+ const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
+ kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL));
+ bool hit;
+ if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
+ hit = cardinal_curve_intersect(kg,
+ isect,
+ P,
+ dir,
+ visibility,
+ object,
+ prim_addr,
+ ray->time,
+ curve_type,
+ lcg_state,
+ difl,
+ extmax);
+ }
+ else {
+ hit = curve_intersect(kg,
+ isect,
+ P,
+ dir,
+ visibility,
+ object,
+ prim_addr,
+ ray->time,
+ curve_type,
+ lcg_state,
+ difl,
+ extmax);
+ }
+ if (hit) {
+ tfar = ssef(isect->t);
+ /* Shadow ray early termination. */
+ if (visibility & PATH_RAY_SHADOW_OPAQUE) {
+ return true;
+ }
+ }
+ }
+ break;
+ }
+#endif /* BVH_FEATURE(BVH_HAIR) */
+ }
+ }
#if BVH_FEATURE(BVH_INSTANCING)
- else {
- /* Instance push. */
- object = kernel_tex_fetch(__prim_object, -prim_addr-1);
+ else {
+ /* Instance push. */
+ object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
# if BVH_FEATURE(BVH_MOTION)
- qbvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist, &ob_itfm);
+ qbvh_instance_motion_push(
+ kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist, &ob_itfm);
# else
- qbvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist);
+ qbvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist);
# endif
- qbvh_near_far_idx_calc(idir,
- &near_x, &near_y, &near_z,
- &far_x, &far_y, &far_z);
- tfar = ssef(isect->t);
+ qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
+ tfar = ssef(isect->t);
# if BVH_FEATURE(BVH_HAIR)
- dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+ dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
# endif
- idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+ idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
# ifdef __KERNEL_AVX2__
- P_idir = P*idir;
- P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
+ P_idir = P * idir;
+ P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
# endif
# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+ org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
# endif
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
- traversal_stack[stack_ptr].dist = -FLT_MAX;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
+ traversal_stack[stack_ptr].dist = -FLT_MAX;
- node_addr = kernel_tex_fetch(__object_node, object);
+ node_addr = kernel_tex_fetch(__object_node, object);
- BVH_DEBUG_NEXT_INSTANCE();
- }
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while(node_addr != ENTRYPOINT_SENTINEL);
+ BVH_DEBUG_NEXT_INSTANCE();
+ }
+ }
+#endif /* FEATURE(BVH_INSTANCING) */
+ } while (node_addr != ENTRYPOINT_SENTINEL);
#if BVH_FEATURE(BVH_INSTANCING)
- if(stack_ptr >= 0) {
- kernel_assert(object != OBJECT_NONE);
+ if (stack_ptr >= 0) {
+ kernel_assert(object != OBJECT_NONE);
- /* Instance pop. */
+ /* Instance pop. */
# if BVH_FEATURE(BVH_MOTION)
- isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
+ isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
# else
- isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
+ isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
# endif
- qbvh_near_far_idx_calc(idir,
- &near_x, &near_y, &near_z,
- &far_x, &far_y, &far_z);
- tfar = ssef(isect->t);
+ qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
+ tfar = ssef(isect->t);
# if BVH_FEATURE(BVH_HAIR)
- dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+ dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
# endif
- idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+ idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
# ifdef __KERNEL_AVX2__
- P_idir = P*idir;
- P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
+ P_idir = P * idir;
+ P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
# endif
# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+ org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
# endif
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while(node_addr != ENTRYPOINT_SENTINEL);
+ object = OBJECT_NONE;
+ node_addr = traversal_stack[stack_ptr].addr;
+ node_dist = traversal_stack[stack_ptr].dist;
+ --stack_ptr;
+ }
+#endif /* FEATURE(BVH_INSTANCING) */
+ } while (node_addr != ENTRYPOINT_SENTINEL);
- return (isect->prim != PRIM_NONE);
+ return (isect->prim != PRIM_NONE);
}
#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/qbvh_volume.h b/intern/cycles/kernel/bvh/qbvh_volume.h
index 6790bfa6c83..e4eaed04467 100644
--- a/intern/cycles/kernel/bvh/qbvh_volume.h
+++ b/intern/cycles/kernel/bvh/qbvh_volume.h
@@ -33,331 +33,335 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
Intersection *isect,
const uint visibility)
{
- /* TODO(sergey):
- * - Test if pushing distance on the stack helps.
- * - Likely and unlikely for if() statements.
- * - Test restrict attribute for pointers.
- */
-
- /* Traversal stack in CUDA thread-local memory. */
- QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
- traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
-
- /* Traversal variables in registers. */
- int stack_ptr = 0;
- int node_addr = kernel_data.bvh.root;
-
- /* Ray parameters in registers. */
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
+ /* TODO(sergey):
+ * - Test if pushing distance on the stack helps.
+ * - Likely and unlikely for if() statements.
+ * - Test restrict attribute for pointers.
+ */
+
+ /* Traversal stack in CUDA thread-local memory. */
+ QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
+ traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
+
+ /* Traversal variables in registers. */
+ int stack_ptr = 0;
+ int node_addr = kernel_data.bvh.root;
+
+ /* Ray parameters in registers. */
+ float3 P = ray->P;
+ float3 dir = bvh_clamp_direction(ray->D);
+ float3 idir = bvh_inverse_direction(dir);
+ int object = OBJECT_NONE;
#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
+ Transform ob_itfm;
#endif
- isect->t = ray->t;
- isect->u = 0.0f;
- isect->v = 0.0f;
- isect->prim = PRIM_NONE;
- isect->object = OBJECT_NONE;
+ isect->t = ray->t;
+ isect->u = 0.0f;
+ isect->v = 0.0f;
+ isect->prim = PRIM_NONE;
+ isect->object = OBJECT_NONE;
- ssef tnear(0.0f), tfar(ray->t);
+ ssef tnear(0.0f), tfar(ray->t);
#if BVH_FEATURE(BVH_HAIR)
- sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+ sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
#endif
- sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+ sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
#ifdef __KERNEL_AVX2__
- float3 P_idir = P*idir;
- sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
+ float3 P_idir = P * idir;
+ sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
#endif
#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
+ sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
#endif
- /* Offsets to select the side that becomes the lower or upper bound. */
- int near_x, near_y, near_z;
- int far_x, far_y, far_z;
- qbvh_near_far_idx_calc(idir,
- &near_x, &near_y, &near_z,
- &far_x, &far_y, &far_z);
+ /* Offsets to select the side that becomes the lower or upper bound. */
+ int near_x, near_y, near_z;
+ int far_x, far_y, far_z;
+ qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- /* Traversal loop. */
- do {
- do {
- /* Traverse internal nodes. */
- while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
- float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
+ /* Traversal loop. */
+ do {
+ do {
+ /* Traverse internal nodes. */
+ while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
+ float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
#ifdef __VISIBILITY_FLAG__
- if((__float_as_uint(inodes.x) & visibility) == 0) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
+ if ((__float_as_uint(inodes.x) & visibility) == 0) {
+ /* Pop. */
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ continue;
+ }
#endif
- ssef dist;
- int child_mask = NODE_INTERSECT(kg,
- tnear,
- tfar,
+ ssef dist;
+ int child_mask = NODE_INTERSECT(kg,
+ tnear,
+ tfar,
#ifdef __KERNEL_AVX2__
- P_idir4,
+ P_idir4,
#endif
#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4,
+ org4,
#endif
#if BVH_FEATURE(BVH_HAIR)
- dir4,
+ dir4,
#endif
- idir4,
- near_x, near_y, near_z,
- far_x, far_y, far_z,
- node_addr,
- &dist);
-
- if(child_mask != 0) {
- float4 cnodes;
+ idir4,
+ near_x,
+ near_y,
+ near_z,
+ far_x,
+ far_y,
+ far_z,
+ node_addr,
+ &dist);
+
+ if (child_mask != 0) {
+ float4 cnodes;
#if BVH_FEATURE(BVH_HAIR)
- if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
- cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+13);
- }
- else
+ if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
+ cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 13);
+ }
+ else
#endif
- {
- cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+7);
- }
-
- /* One child is hit, continue with that child. */
- int r = __bscf(child_mask);
- if(child_mask == 0) {
- node_addr = __float_as_int(cnodes[r]);
- continue;
- }
-
- /* Two children are hit, push far child, and continue with
- * closer child.
- */
- int c0 = __float_as_int(cnodes[r]);
- float d0 = ((float*)&dist)[r];
- r = __bscf(child_mask);
- int c1 = __float_as_int(cnodes[r]);
- float d1 = ((float*)&dist)[r];
- if(child_mask == 0) {
- if(d1 < d0) {
- node_addr = c1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
- continue;
- }
- else {
- node_addr = c0;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- continue;
- }
- }
-
- /* Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there.
- */
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
-
- /* Three children are hit, push all onto stack and sort 3
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c2 = __float_as_int(cnodes[r]);
- float d2 = ((float*)&dist)[r];
- if(child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- qbvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Four children are hit, push all onto stack and sort 4
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c3 = __float_as_int(cnodes[r]);
- float d3 = ((float*)&dist)[r];
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- qbvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3]);
- }
-
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-
- /* If node is leaf, fetch triangle list. */
- if(node_addr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
-
- if((__float_as_uint(leaf.z) & visibility) == 0) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- int prim_addr = __float_as_int(leaf.x);
+ {
+ cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 7);
+ }
+
+ /* One child is hit, continue with that child. */
+ int r = __bscf(child_mask);
+ if (child_mask == 0) {
+ node_addr = __float_as_int(cnodes[r]);
+ continue;
+ }
+
+ /* Two children are hit, push far child, and continue with
+ * closer child.
+ */
+ int c0 = __float_as_int(cnodes[r]);
+ float d0 = ((float *)&dist)[r];
+ r = __bscf(child_mask);
+ int c1 = __float_as_int(cnodes[r]);
+ float d1 = ((float *)&dist)[r];
+ if (child_mask == 0) {
+ if (d1 < d0) {
+ node_addr = c1;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c0;
+ traversal_stack[stack_ptr].dist = d0;
+ continue;
+ }
+ else {
+ node_addr = c0;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c1;
+ traversal_stack[stack_ptr].dist = d1;
+ continue;
+ }
+ }
+
+ /* Here starts the slow path for 3 or 4 hit children. We push
+ * all nodes onto the stack to sort them there.
+ */
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c1;
+ traversal_stack[stack_ptr].dist = d1;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c0;
+ traversal_stack[stack_ptr].dist = d0;
+
+ /* Three children are hit, push all onto stack and sort 3
+ * stack items, continue with closest child.
+ */
+ r = __bscf(child_mask);
+ int c2 = __float_as_int(cnodes[r]);
+ float d2 = ((float *)&dist)[r];
+ if (child_mask == 0) {
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c2;
+ traversal_stack[stack_ptr].dist = d2;
+ qbvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2]);
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ continue;
+ }
+
+ /* Four children are hit, push all onto stack and sort 4
+ * stack items, continue with closest child.
+ */
+ r = __bscf(child_mask);
+ int c3 = __float_as_int(cnodes[r]);
+ float d3 = ((float *)&dist)[r];
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c3;
+ traversal_stack[stack_ptr].dist = d3;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c2;
+ traversal_stack[stack_ptr].dist = d2;
+ qbvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2],
+ &traversal_stack[stack_ptr - 3]);
+ }
+
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ }
+
+ /* If node is leaf, fetch triangle list. */
+ if (node_addr < 0) {
+ float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
+
+ if ((__float_as_uint(leaf.z) & visibility) == 0) {
+ /* Pop. */
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ continue;
+ }
+
+ int prim_addr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
- if(prim_addr >= 0) {
+ if (prim_addr >= 0) {
#endif
- int prim_addr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
- const uint p_type = type & PRIMITIVE_ALL;
-
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
-
- /* Primitive intersection. */
- switch(p_type) {
- case PRIMITIVE_TRIANGLE: {
- for(; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- /* Only primitives from volume object. */
- uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
- int object_flag = kernel_tex_fetch(__object_flag, tri_object);
- if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
- continue;
- }
- /* Intersect ray against primitive. */
- triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr);
- }
- break;
- }
+ int prim_addr2 = __float_as_int(leaf.y);
+ const uint type = __float_as_int(leaf.w);
+ const uint p_type = type & PRIMITIVE_ALL;
+
+ /* Pop. */
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+
+ /* Primitive intersection. */
+ switch (p_type) {
+ case PRIMITIVE_TRIANGLE: {
+ for (; prim_addr < prim_addr2; prim_addr++) {
+ kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+ /* Only primitives from volume object. */
+ uint tri_object = (object == OBJECT_NONE) ?
+ kernel_tex_fetch(__prim_object, prim_addr) :
+ object;
+ int object_flag = kernel_tex_fetch(__object_flag, tri_object);
+ if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
+ continue;
+ }
+ /* Intersect ray against primitive. */
+ triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr);
+ }
+ break;
+ }
#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- for(; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- /* Only primitives from volume object. */
- uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
- int object_flag = kernel_tex_fetch(__object_flag, tri_object);
- if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
- continue;
- }
- /* Intersect ray against primitive. */
- motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, prim_addr);
- }
- break;
- }
+ case PRIMITIVE_MOTION_TRIANGLE: {
+ for (; prim_addr < prim_addr2; prim_addr++) {
+ kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+ /* Only primitives from volume object. */
+ uint tri_object = (object == OBJECT_NONE) ?
+ kernel_tex_fetch(__prim_object, prim_addr) :
+ object;
+ int object_flag = kernel_tex_fetch(__object_flag, tri_object);
+ if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
+ continue;
+ }
+ /* Intersect ray against primitive. */
+ motion_triangle_intersect(
+ kg, isect, P, dir, ray->time, visibility, object, prim_addr);
+ }
+ break;
+ }
#endif
- }
- }
+ }
+ }
#if BVH_FEATURE(BVH_INSTANCING)
- else {
- /* Instance push. */
- object = kernel_tex_fetch(__prim_object, -prim_addr-1);
- int object_flag = kernel_tex_fetch(__object_flag, object);
- if(object_flag & SD_OBJECT_HAS_VOLUME) {
+ else {
+ /* Instance push. */
+ object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
+ int object_flag = kernel_tex_fetch(__object_flag, object);
+ if (object_flag & SD_OBJECT_HAS_VOLUME) {
# if BVH_FEATURE(BVH_MOTION)
- isect->t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
+ isect->t = bvh_instance_motion_push(
+ kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
# else
- isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
+ isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
# endif
- qbvh_near_far_idx_calc(idir,
- &near_x, &near_y, &near_z,
- &far_x, &far_y, &far_z);
- tfar = ssef(isect->t);
+ qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
+ tfar = ssef(isect->t);
# if BVH_FEATURE(BVH_HAIR)
- dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+ dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
# endif
- idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+ idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
# ifdef __KERNEL_AVX2__
- P_idir = P*idir;
- P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
+ P_idir = P * idir;
+ P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
# endif
# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+ org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
# endif
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
-
- node_addr = kernel_tex_fetch(__object_node, object);
- }
- else {
- /* Pop. */
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
- }
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while(node_addr != ENTRYPOINT_SENTINEL);
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
+
+ node_addr = kernel_tex_fetch(__object_node, object);
+ }
+ else {
+ /* Pop. */
+ object = OBJECT_NONE;
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ }
+ }
+ }
+#endif /* FEATURE(BVH_INSTANCING) */
+ } while (node_addr != ENTRYPOINT_SENTINEL);
#if BVH_FEATURE(BVH_INSTANCING)
- if(stack_ptr >= 0) {
- kernel_assert(object != OBJECT_NONE);
+ if (stack_ptr >= 0) {
+ kernel_assert(object != OBJECT_NONE);
- /* Instance pop. */
+ /* Instance pop. */
# if BVH_FEATURE(BVH_MOTION)
- isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
+ isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
# else
- isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
+ isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
# endif
- qbvh_near_far_idx_calc(idir,
- &near_x, &near_y, &near_z,
- &far_x, &far_y, &far_z);
- tfar = ssef(isect->t);
+ qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
+ tfar = ssef(isect->t);
# if BVH_FEATURE(BVH_HAIR)
- dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+ dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
# endif
- idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+ idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
# ifdef __KERNEL_AVX2__
- P_idir = P*idir;
- P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
+ P_idir = P * idir;
+ P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
# endif
# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+ org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
# endif
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while(node_addr != ENTRYPOINT_SENTINEL);
+ object = OBJECT_NONE;
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ }
+#endif /* FEATURE(BVH_INSTANCING) */
+ } while (node_addr != ENTRYPOINT_SENTINEL);
- return (isect->prim != PRIM_NONE);
+ return (isect->prim != PRIM_NONE);
}
#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/qbvh_volume_all.h b/intern/cycles/kernel/bvh/qbvh_volume_all.h
index 63d79b6fe34..eddc48c487e 100644
--- a/intern/cycles/kernel/bvh/qbvh_volume_all.h
+++ b/intern/cycles/kernel/bvh/qbvh_volume_all.h
@@ -34,405 +34,411 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
const uint max_hits,
const uint visibility)
{
- /* TODO(sergey):
- * - Test if pushing distance on the stack helps.
- * - Likely and unlikely for if() statements.
- * - Test restrict attribute for pointers.
- */
-
- /* Traversal stack in CUDA thread-local memory. */
- QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
- traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
-
- /* Traversal variables in registers. */
- int stack_ptr = 0;
- int node_addr = kernel_data.bvh.root;
-
- /* Ray parameters in registers. */
- const float tmax = ray->t;
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
- float isect_t = tmax;
+ /* TODO(sergey):
+ * - Test if pushing distance on the stack helps.
+ * - Likely and unlikely for if() statements.
+ * - Test restrict attribute for pointers.
+ */
+
+ /* Traversal stack in CUDA thread-local memory. */
+ QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
+ traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
+
+ /* Traversal variables in registers. */
+ int stack_ptr = 0;
+ int node_addr = kernel_data.bvh.root;
+
+ /* Ray parameters in registers. */
+ const float tmax = ray->t;
+ float3 P = ray->P;
+ float3 dir = bvh_clamp_direction(ray->D);
+ float3 idir = bvh_inverse_direction(dir);
+ int object = OBJECT_NONE;
+ float isect_t = tmax;
#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
+ Transform ob_itfm;
#endif
- uint num_hits = 0;
- isect_array->t = tmax;
+ uint num_hits = 0;
+ isect_array->t = tmax;
#if BVH_FEATURE(BVH_INSTANCING)
- int num_hits_in_instance = 0;
+ int num_hits_in_instance = 0;
#endif
- ssef tnear(0.0f), tfar(isect_t);
+ ssef tnear(0.0f), tfar(isect_t);
#if BVH_FEATURE(BVH_HAIR)
- sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+ sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
#endif
- sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+ sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
#ifdef __KERNEL_AVX2__
- float3 P_idir = P*idir;
- sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
+ float3 P_idir = P * idir;
+ sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
#endif
#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
+ sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
#endif
- /* Offsets to select the side that becomes the lower or upper bound. */
- int near_x, near_y, near_z;
- int far_x, far_y, far_z;
- qbvh_near_far_idx_calc(idir,
- &near_x, &near_y, &near_z,
- &far_x, &far_y, &far_z);
+ /* Offsets to select the side that becomes the lower or upper bound. */
+ int near_x, near_y, near_z;
+ int far_x, far_y, far_z;
+ qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- /* Traversal loop. */
- do {
- do {
- /* Traverse internal nodes. */
- while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
- float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
+ /* Traversal loop. */
+ do {
+ do {
+ /* Traverse internal nodes. */
+ while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
+ float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
#ifdef __VISIBILITY_FLAG__
- if((__float_as_uint(inodes.x) & visibility) == 0) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
+ if ((__float_as_uint(inodes.x) & visibility) == 0) {
+ /* Pop. */
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ continue;
+ }
#endif
- ssef dist;
- int child_mask = NODE_INTERSECT(kg,
- tnear,
- tfar,
+ ssef dist;
+ int child_mask = NODE_INTERSECT(kg,
+ tnear,
+ tfar,
#ifdef __KERNEL_AVX2__
- P_idir4,
+ P_idir4,
#endif
#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4,
+ org4,
#endif
#if BVH_FEATURE(BVH_HAIR)
- dir4,
+ dir4,
#endif
- idir4,
- near_x, near_y, near_z,
- far_x, far_y, far_z,
- node_addr,
- &dist);
-
- if(child_mask != 0) {
- float4 cnodes;
+ idir4,
+ near_x,
+ near_y,
+ near_z,
+ far_x,
+ far_y,
+ far_z,
+ node_addr,
+ &dist);
+
+ if (child_mask != 0) {
+ float4 cnodes;
#if BVH_FEATURE(BVH_HAIR)
- if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
- cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+13);
- }
- else
+ if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
+ cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 13);
+ }
+ else
#endif
- {
- cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+7);
- }
-
- /* One child is hit, continue with that child. */
- int r = __bscf(child_mask);
- if(child_mask == 0) {
- node_addr = __float_as_int(cnodes[r]);
- continue;
- }
-
- /* Two children are hit, push far child, and continue with
- * closer child.
- */
- int c0 = __float_as_int(cnodes[r]);
- float d0 = ((float*)&dist)[r];
- r = __bscf(child_mask);
- int c1 = __float_as_int(cnodes[r]);
- float d1 = ((float*)&dist)[r];
- if(child_mask == 0) {
- if(d1 < d0) {
- node_addr = c1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
- continue;
- }
- else {
- node_addr = c0;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- continue;
- }
- }
-
- /* Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there.
- */
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
-
- /* Three children are hit, push all onto stack and sort 3
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c2 = __float_as_int(cnodes[r]);
- float d2 = ((float*)&dist)[r];
- if(child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- qbvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Four children are hit, push all onto stack and sort 4
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c3 = __float_as_int(cnodes[r]);
- float d3 = ((float*)&dist)[r];
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- qbvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3]);
- }
-
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-
- /* If node is leaf, fetch triangle list. */
- if(node_addr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
-
- if((__float_as_uint(leaf.z) & visibility) == 0) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- int prim_addr = __float_as_int(leaf.x);
+ {
+ cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 7);
+ }
+
+ /* One child is hit, continue with that child. */
+ int r = __bscf(child_mask);
+ if (child_mask == 0) {
+ node_addr = __float_as_int(cnodes[r]);
+ continue;
+ }
+
+ /* Two children are hit, push far child, and continue with
+ * closer child.
+ */
+ int c0 = __float_as_int(cnodes[r]);
+ float d0 = ((float *)&dist)[r];
+ r = __bscf(child_mask);
+ int c1 = __float_as_int(cnodes[r]);
+ float d1 = ((float *)&dist)[r];
+ if (child_mask == 0) {
+ if (d1 < d0) {
+ node_addr = c1;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c0;
+ traversal_stack[stack_ptr].dist = d0;
+ continue;
+ }
+ else {
+ node_addr = c0;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c1;
+ traversal_stack[stack_ptr].dist = d1;
+ continue;
+ }
+ }
+
+ /* Here starts the slow path for 3 or 4 hit children. We push
+ * all nodes onto the stack to sort them there.
+ */
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c1;
+ traversal_stack[stack_ptr].dist = d1;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c0;
+ traversal_stack[stack_ptr].dist = d0;
+
+ /* Three children are hit, push all onto stack and sort 3
+ * stack items, continue with closest child.
+ */
+ r = __bscf(child_mask);
+ int c2 = __float_as_int(cnodes[r]);
+ float d2 = ((float *)&dist)[r];
+ if (child_mask == 0) {
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c2;
+ traversal_stack[stack_ptr].dist = d2;
+ qbvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2]);
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ continue;
+ }
+
+ /* Four children are hit, push all onto stack and sort 4
+ * stack items, continue with closest child.
+ */
+ r = __bscf(child_mask);
+ int c3 = __float_as_int(cnodes[r]);
+ float d3 = ((float *)&dist)[r];
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c3;
+ traversal_stack[stack_ptr].dist = d3;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c2;
+ traversal_stack[stack_ptr].dist = d2;
+ qbvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2],
+ &traversal_stack[stack_ptr - 3]);
+ }
+
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ }
+
+ /* If node is leaf, fetch triangle list. */
+ if (node_addr < 0) {
+ float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
+
+ if ((__float_as_uint(leaf.z) & visibility) == 0) {
+ /* Pop. */
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ continue;
+ }
+
+ int prim_addr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
- if(prim_addr >= 0) {
+ if (prim_addr >= 0) {
#endif
- int prim_addr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
- const uint p_type = type & PRIMITIVE_ALL;
- bool hit;
-
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
-
- /* Primitive intersection. */
- switch(p_type) {
- case PRIMITIVE_TRIANGLE: {
- for(; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- /* Only primitives from volume object. */
- uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
- int object_flag = kernel_tex_fetch(__object_flag, tri_object);
- if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
- continue;
- }
- /* Intersect ray against primitive. */
- hit = triangle_intersect(kg, isect_array, P, dir, visibility, object, prim_addr);
- if(hit) {
- /* Move on to next entry in intersections array. */
- isect_array++;
- num_hits++;
+ int prim_addr2 = __float_as_int(leaf.y);
+ const uint type = __float_as_int(leaf.w);
+ const uint p_type = type & PRIMITIVE_ALL;
+ bool hit;
+
+ /* Pop. */
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+
+ /* Primitive intersection. */
+ switch (p_type) {
+ case PRIMITIVE_TRIANGLE: {
+ for (; prim_addr < prim_addr2; prim_addr++) {
+ kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+ /* Only primitives from volume object. */
+ uint tri_object = (object == OBJECT_NONE) ?
+ kernel_tex_fetch(__prim_object, prim_addr) :
+ object;
+ int object_flag = kernel_tex_fetch(__object_flag, tri_object);
+ if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
+ continue;
+ }
+ /* Intersect ray against primitive. */
+ hit = triangle_intersect(kg, isect_array, P, dir, visibility, object, prim_addr);
+ if (hit) {
+ /* Move on to next entry in intersections array. */
+ isect_array++;
+ num_hits++;
#if BVH_FEATURE(BVH_INSTANCING)
- num_hits_in_instance++;
+ num_hits_in_instance++;
#endif
- isect_array->t = isect_t;
- if(num_hits == max_hits) {
+ isect_array->t = isect_t;
+ if (num_hits == max_hits) {
#if BVH_FEATURE(BVH_INSTANCING)
- if(object != OBJECT_NONE) {
+ if (object != OBJECT_NONE) {
# if BVH_FEATURE(BVH_MOTION)
- float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
+ float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
# else
- Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
- float t_fac = 1.0f / len(transform_direction(&itfm, dir));
+ Transform itfm = object_fetch_transform(
+ kg, object, OBJECT_INVERSE_TRANSFORM);
+ float t_fac = 1.0f / len(transform_direction(&itfm, dir));
# endif
- for(int i = 0; i < num_hits_in_instance; i++) {
- (isect_array-i-1)->t *= t_fac;
- }
- }
-#endif /* BVH_FEATURE(BVH_INSTANCING) */
- return num_hits;
- }
- }
- }
- break;
- }
+ for (int i = 0; i < num_hits_in_instance; i++) {
+ (isect_array - i - 1)->t *= t_fac;
+ }
+ }
+#endif /* BVH_FEATURE(BVH_INSTANCING) */
+ return num_hits;
+ }
+ }
+ }
+ break;
+ }
#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- for(; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- /* Only primitives from volume object. */
- uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
- int object_flag = kernel_tex_fetch(__object_flag, tri_object);
- if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
- continue;
- }
- /* Intersect ray against primitive. */
- hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
- if(hit) {
- /* Move on to next entry in intersections array. */
- isect_array++;
- num_hits++;
+ case PRIMITIVE_MOTION_TRIANGLE: {
+ for (; prim_addr < prim_addr2; prim_addr++) {
+ kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+ /* Only primitives from volume object. */
+ uint tri_object = (object == OBJECT_NONE) ?
+ kernel_tex_fetch(__prim_object, prim_addr) :
+ object;
+ int object_flag = kernel_tex_fetch(__object_flag, tri_object);
+ if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
+ continue;
+ }
+ /* Intersect ray against primitive. */
+ hit = motion_triangle_intersect(
+ kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
+ if (hit) {
+ /* Move on to next entry in intersections array. */
+ isect_array++;
+ num_hits++;
# if BVH_FEATURE(BVH_INSTANCING)
- num_hits_in_instance++;
+ num_hits_in_instance++;
# endif
- isect_array->t = isect_t;
- if(num_hits == max_hits) {
+ isect_array->t = isect_t;
+ if (num_hits == max_hits) {
# if BVH_FEATURE(BVH_INSTANCING)
- if(object != OBJECT_NONE) {
+ if (object != OBJECT_NONE) {
# if BVH_FEATURE(BVH_MOTION)
- float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
+ float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
# else
- Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
- float t_fac = 1.0f / len(transform_direction(&itfm, dir));
+ Transform itfm = object_fetch_transform(
+ kg, object, OBJECT_INVERSE_TRANSFORM);
+ float t_fac = 1.0f / len(transform_direction(&itfm, dir));
# endif
- for(int i = 0; i < num_hits_in_instance; i++) {
- (isect_array-i-1)->t *= t_fac;
- }
- }
-# endif /* BVH_FEATURE(BVH_INSTANCING) */
- return num_hits;
- }
- }
- }
- break;
- }
+ for (int i = 0; i < num_hits_in_instance; i++) {
+ (isect_array - i - 1)->t *= t_fac;
+ }
+ }
+# endif /* BVH_FEATURE(BVH_INSTANCING) */
+ return num_hits;
+ }
+ }
+ }
+ break;
+ }
#endif
- }
- }
+ }
+ }
#if BVH_FEATURE(BVH_INSTANCING)
- else {
- /* Instance push. */
- object = kernel_tex_fetch(__prim_object, -prim_addr-1);
- int object_flag = kernel_tex_fetch(__object_flag, object);
- if(object_flag & SD_OBJECT_HAS_VOLUME) {
+ else {
+ /* Instance push. */
+ object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
+ int object_flag = kernel_tex_fetch(__object_flag, object);
+ if (object_flag & SD_OBJECT_HAS_VOLUME) {
# if BVH_FEATURE(BVH_MOTION)
- isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
+ isect_t = bvh_instance_motion_push(
+ kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
# else
- isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
+ isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
# endif
- qbvh_near_far_idx_calc(idir,
- &near_x, &near_y, &near_z,
- &far_x, &far_y, &far_z);
- tfar = ssef(isect_t);
- idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+ qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
+ tfar = ssef(isect_t);
+ idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
# if BVH_FEATURE(BVH_HAIR)
- dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+ dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
# endif
# ifdef __KERNEL_AVX2__
- P_idir = P*idir;
- P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
+ P_idir = P * idir;
+ P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
# endif
# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+ org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
# endif
- num_hits_in_instance = 0;
- isect_array->t = isect_t;
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
-
- node_addr = kernel_tex_fetch(__object_node, object);
- }
- else {
- /* Pop. */
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
- }
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while(node_addr != ENTRYPOINT_SENTINEL);
+ num_hits_in_instance = 0;
+ isect_array->t = isect_t;
+
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
+
+ node_addr = kernel_tex_fetch(__object_node, object);
+ }
+ else {
+ /* Pop. */
+ object = OBJECT_NONE;
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ }
+ }
+ }
+#endif /* FEATURE(BVH_INSTANCING) */
+ } while (node_addr != ENTRYPOINT_SENTINEL);
#if BVH_FEATURE(BVH_INSTANCING)
- if(stack_ptr >= 0) {
- kernel_assert(object != OBJECT_NONE);
+ if (stack_ptr >= 0) {
+ kernel_assert(object != OBJECT_NONE);
- /* Instance pop. */
- if(num_hits_in_instance) {
- float t_fac;
+ /* Instance pop. */
+ if (num_hits_in_instance) {
+ float t_fac;
# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
+ bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
# else
- bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
+ bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
# endif
- /* Scale isect->t to adjust for instancing. */
- for(int i = 0; i < num_hits_in_instance; i++) {
- (isect_array-i-1)->t *= t_fac;
- }
- }
- else {
+ /* Scale isect->t to adjust for instancing. */
+ for (int i = 0; i < num_hits_in_instance; i++) {
+ (isect_array - i - 1)->t *= t_fac;
+ }
+ }
+ else {
# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
+ bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
# else
- bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
+ bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
# endif
- }
+ }
- isect_t = tmax;
- isect_array->t = isect_t;
+ isect_t = tmax;
+ isect_array->t = isect_t;
- qbvh_near_far_idx_calc(idir,
- &near_x, &near_y, &near_z,
- &far_x, &far_y, &far_z);
- tfar = ssef(isect_t);
+ qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
+ tfar = ssef(isect_t);
# if BVH_FEATURE(BVH_HAIR)
- dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+ dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
# endif
- idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+ idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
# ifdef __KERNEL_AVX2__
- P_idir = P*idir;
- P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
+ P_idir = P * idir;
+ P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
# endif
# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+ org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
# endif
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while(node_addr != ENTRYPOINT_SENTINEL);
+ object = OBJECT_NONE;
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ }
+#endif /* FEATURE(BVH_INSTANCING) */
+ } while (node_addr != ENTRYPOINT_SENTINEL);
- return num_hits;
+ return num_hits;
}
#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/closure/alloc.h b/intern/cycles/kernel/closure/alloc.h
index acccba9ecec..341d1e16eb1 100644
--- a/intern/cycles/kernel/closure/alloc.h
+++ b/intern/cycles/kernel/closure/alloc.h
@@ -18,69 +18,72 @@ CCL_NAMESPACE_BEGIN
ccl_device ShaderClosure *closure_alloc(ShaderData *sd, int size, ClosureType type, float3 weight)
{
- kernel_assert(size <= sizeof(ShaderClosure));
+ kernel_assert(size <= sizeof(ShaderClosure));
- if(sd->num_closure_left == 0)
- return NULL;
+ if (sd->num_closure_left == 0)
+ return NULL;
- ShaderClosure *sc = &sd->closure[sd->num_closure];
+ ShaderClosure *sc = &sd->closure[sd->num_closure];
- sc->type = type;
- sc->weight = weight;
+ sc->type = type;
+ sc->weight = weight;
- sd->num_closure++;
- sd->num_closure_left--;
+ sd->num_closure++;
+ sd->num_closure_left--;
- return sc;
+ return sc;
}
ccl_device ccl_addr_space void *closure_alloc_extra(ShaderData *sd, int size)
{
- /* Allocate extra space for closure that need more parameters. We allocate
- * in chunks of sizeof(ShaderClosure) starting from the end of the closure
- * array.
- *
- * This lets us keep the same fast array iteration over closures, as we
- * found linked list iteration and iteration with skipping to be slower. */
- int num_extra = ((size + sizeof(ShaderClosure) - 1) / sizeof(ShaderClosure));
-
- if(num_extra > sd->num_closure_left) {
- /* Remove previous closure if it was allocated. */
- sd->num_closure--;
- sd->num_closure_left++;
- return NULL;
- }
-
- sd->num_closure_left -= num_extra;
- return (ccl_addr_space void*)(sd->closure + sd->num_closure + sd->num_closure_left);
+ /* Allocate extra space for closure that need more parameters. We allocate
+ * in chunks of sizeof(ShaderClosure) starting from the end of the closure
+ * array.
+ *
+ * This lets us keep the same fast array iteration over closures, as we
+ * found linked list iteration and iteration with skipping to be slower. */
+ int num_extra = ((size + sizeof(ShaderClosure) - 1) / sizeof(ShaderClosure));
+
+ if (num_extra > sd->num_closure_left) {
+ /* Remove previous closure if it was allocated. */
+ sd->num_closure--;
+ sd->num_closure_left++;
+ return NULL;
+ }
+
+ sd->num_closure_left -= num_extra;
+ return (ccl_addr_space void *)(sd->closure + sd->num_closure + sd->num_closure_left);
}
ccl_device_inline ShaderClosure *bsdf_alloc(ShaderData *sd, int size, float3 weight)
{
- ShaderClosure *sc = closure_alloc(sd, size, CLOSURE_NONE_ID, weight);
+ ShaderClosure *sc = closure_alloc(sd, size, CLOSURE_NONE_ID, weight);
- if(sc == NULL)
- return NULL;
+ if (sc == NULL)
+ return NULL;
- float sample_weight = fabsf(average(weight));
- sc->sample_weight = sample_weight;
- return (sample_weight >= CLOSURE_WEIGHT_CUTOFF) ? sc : NULL;
+ float sample_weight = fabsf(average(weight));
+ sc->sample_weight = sample_weight;
+ return (sample_weight >= CLOSURE_WEIGHT_CUTOFF) ? sc : NULL;
}
#ifdef __OSL__
-ccl_device_inline ShaderClosure *bsdf_alloc_osl(ShaderData *sd, int size, float3 weight, void *data)
+ccl_device_inline ShaderClosure *bsdf_alloc_osl(ShaderData *sd,
+ int size,
+ float3 weight,
+ void *data)
{
- ShaderClosure *sc = closure_alloc(sd, size, CLOSURE_NONE_ID, weight);
+ ShaderClosure *sc = closure_alloc(sd, size, CLOSURE_NONE_ID, weight);
- if(!sc)
- return NULL;
+ if (!sc)
+ return NULL;
- memcpy((void *)sc, data, size);
+ memcpy((void *)sc, data, size);
- float sample_weight = fabsf(average(weight));
- sc->weight = weight;
- sc->sample_weight = sample_weight;
- return (sample_weight >= CLOSURE_WEIGHT_CUTOFF) ? sc : NULL;
+ float sample_weight = fabsf(average(weight));
+ sc->weight = weight;
+ sc->sample_weight = sample_weight;
+ return (sample_weight >= CLOSURE_WEIGHT_CUTOFF) ? sc : NULL;
}
#endif
diff --git a/intern/cycles/kernel/closure/bsdf.h b/intern/cycles/kernel/closure/bsdf.h
index 3a9629ea9d7..5e26f90a878 100644
--- a/intern/cycles/kernel/closure/bsdf.h
+++ b/intern/cycles/kernel/closure/bsdf.h
@@ -39,38 +39,38 @@ CCL_NAMESPACE_BEGIN
* 0 for singular closures and 1 otherwise. */
ccl_device_inline float bsdf_get_specular_roughness_squared(const ShaderClosure *sc)
{
- if(CLOSURE_IS_BSDF_SINGULAR(sc->type)) {
- return 0.0f;
- }
+ if (CLOSURE_IS_BSDF_SINGULAR(sc->type)) {
+ return 0.0f;
+ }
- if(CLOSURE_IS_BSDF_MICROFACET(sc->type)) {
- MicrofacetBsdf *bsdf = (MicrofacetBsdf*)sc;
- return bsdf->alpha_x*bsdf->alpha_y;
- }
+ if (CLOSURE_IS_BSDF_MICROFACET(sc->type)) {
+ MicrofacetBsdf *bsdf = (MicrofacetBsdf *)sc;
+ return bsdf->alpha_x * bsdf->alpha_y;
+ }
- return 1.0f;
+ return 1.0f;
}
ccl_device_inline float bsdf_get_roughness_squared(const ShaderClosure *sc)
{
- /* This version includes diffuse, mainly for baking Principled BSDF
- * where specular and metallic zero otherwise does not bake the
- * specified roughness parameter. */
- if(sc->type == CLOSURE_BSDF_OREN_NAYAR_ID) {
- OrenNayarBsdf *bsdf = (OrenNayarBsdf*)sc;
- return sqr(sqr(bsdf->roughness));
- }
+ /* This version includes diffuse, mainly for baking Principled BSDF
+ * where specular and metallic zero otherwise does not bake the
+ * specified roughness parameter. */
+ if (sc->type == CLOSURE_BSDF_OREN_NAYAR_ID) {
+ OrenNayarBsdf *bsdf = (OrenNayarBsdf *)sc;
+ return sqr(sqr(bsdf->roughness));
+ }
- if(sc->type == CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID) {
- PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf*)sc;
- return sqr(sqr(bsdf->roughness));
- }
+ if (sc->type == CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID) {
+ PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf *)sc;
+ return sqr(sqr(bsdf->roughness));
+ }
- if(CLOSURE_IS_BSDF_DIFFUSE(sc->type)) {
- return 0.0f;
- }
+ if (CLOSURE_IS_BSDF_DIFFUSE(sc->type)) {
+ return 0.0f;
+ }
- return bsdf_get_specular_roughness_squared(sc);
+ return bsdf_get_specular_roughness_squared(sc);
}
ccl_device_inline int bsdf_sample(KernelGlobals *kg,
@@ -83,133 +83,349 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
differential3 *domega_in,
float *pdf)
{
- int label;
+ int label;
- switch(sc->type) {
- case CLOSURE_BSDF_DIFFUSE_ID:
- case CLOSURE_BSDF_BSSRDF_ID:
- label = bsdf_diffuse_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
- eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
- break;
+ switch (sc->type) {
+ case CLOSURE_BSDF_DIFFUSE_ID:
+ case CLOSURE_BSDF_BSSRDF_ID:
+ label = bsdf_diffuse_sample(sc,
+ sd->Ng,
+ sd->I,
+ sd->dI.dx,
+ sd->dI.dy,
+ randu,
+ randv,
+ eval,
+ omega_in,
+ &domega_in->dx,
+ &domega_in->dy,
+ pdf);
+ break;
#ifdef __SVM__
- case CLOSURE_BSDF_OREN_NAYAR_ID:
- label = bsdf_oren_nayar_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
- eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
- break;
-#ifdef __OSL__
- case CLOSURE_BSDF_PHONG_RAMP_ID:
- label = bsdf_phong_ramp_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
- eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
- break;
- case CLOSURE_BSDF_DIFFUSE_RAMP_ID:
- label = bsdf_diffuse_ramp_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
- eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
- break;
-#endif
- case CLOSURE_BSDF_TRANSLUCENT_ID:
- label = bsdf_translucent_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
- eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
- break;
- case CLOSURE_BSDF_REFLECTION_ID:
- label = bsdf_reflection_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
- eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
- break;
- case CLOSURE_BSDF_REFRACTION_ID:
- label = bsdf_refraction_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
- eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
- break;
- case CLOSURE_BSDF_TRANSPARENT_ID:
- label = bsdf_transparent_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
- eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
- break;
- case CLOSURE_BSDF_MICROFACET_GGX_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
- label = bsdf_microfacet_ggx_sample(kg, sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
- eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
- break;
- case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID:
- case CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID:
- label = bsdf_microfacet_multi_ggx_sample(kg, sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
- eval, omega_in, &domega_in->dx, &domega_in->dy, pdf, &sd->lcg_state);
- break;
- case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID:
- case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID:
- label = bsdf_microfacet_multi_ggx_glass_sample(kg, sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
- eval, omega_in, &domega_in->dx, &domega_in->dy, pdf, &sd->lcg_state);
- break;
- case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
- case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
- case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
- label = bsdf_microfacet_beckmann_sample(kg, sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
- eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
- break;
- case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
- case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID:
- label = bsdf_ashikhmin_shirley_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
- eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
- break;
- case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
- label = bsdf_ashikhmin_velvet_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
- eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
- break;
- case CLOSURE_BSDF_DIFFUSE_TOON_ID:
- label = bsdf_diffuse_toon_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
- eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
- break;
- case CLOSURE_BSDF_GLOSSY_TOON_ID:
- label = bsdf_glossy_toon_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
- eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
- break;
- case CLOSURE_BSDF_HAIR_REFLECTION_ID:
- label = bsdf_hair_reflection_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
- eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
- break;
- case CLOSURE_BSDF_HAIR_TRANSMISSION_ID:
- label = bsdf_hair_transmission_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
- eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
- break;
- case CLOSURE_BSDF_HAIR_PRINCIPLED_ID:
- label = bsdf_principled_hair_sample(kg, sc, sd, randu, randv,
- eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
- break;
-#ifdef __PRINCIPLED__
- case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID:
- case CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID:
- label = bsdf_principled_diffuse_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
- eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
- break;
- case CLOSURE_BSDF_PRINCIPLED_SHEEN_ID:
- label = bsdf_principled_sheen_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
- eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
- break;
-#endif /* __PRINCIPLED__ */
+ case CLOSURE_BSDF_OREN_NAYAR_ID:
+ label = bsdf_oren_nayar_sample(sc,
+ sd->Ng,
+ sd->I,
+ sd->dI.dx,
+ sd->dI.dy,
+ randu,
+ randv,
+ eval,
+ omega_in,
+ &domega_in->dx,
+ &domega_in->dy,
+ pdf);
+ break;
+# ifdef __OSL__
+ case CLOSURE_BSDF_PHONG_RAMP_ID:
+ label = bsdf_phong_ramp_sample(sc,
+ sd->Ng,
+ sd->I,
+ sd->dI.dx,
+ sd->dI.dy,
+ randu,
+ randv,
+ eval,
+ omega_in,
+ &domega_in->dx,
+ &domega_in->dy,
+ pdf);
+ break;
+ case CLOSURE_BSDF_DIFFUSE_RAMP_ID:
+ label = bsdf_diffuse_ramp_sample(sc,
+ sd->Ng,
+ sd->I,
+ sd->dI.dx,
+ sd->dI.dy,
+ randu,
+ randv,
+ eval,
+ omega_in,
+ &domega_in->dx,
+ &domega_in->dy,
+ pdf);
+ break;
+# endif
+ case CLOSURE_BSDF_TRANSLUCENT_ID:
+ label = bsdf_translucent_sample(sc,
+ sd->Ng,
+ sd->I,
+ sd->dI.dx,
+ sd->dI.dy,
+ randu,
+ randv,
+ eval,
+ omega_in,
+ &domega_in->dx,
+ &domega_in->dy,
+ pdf);
+ break;
+ case CLOSURE_BSDF_REFLECTION_ID:
+ label = bsdf_reflection_sample(sc,
+ sd->Ng,
+ sd->I,
+ sd->dI.dx,
+ sd->dI.dy,
+ randu,
+ randv,
+ eval,
+ omega_in,
+ &domega_in->dx,
+ &domega_in->dy,
+ pdf);
+ break;
+ case CLOSURE_BSDF_REFRACTION_ID:
+ label = bsdf_refraction_sample(sc,
+ sd->Ng,
+ sd->I,
+ sd->dI.dx,
+ sd->dI.dy,
+ randu,
+ randv,
+ eval,
+ omega_in,
+ &domega_in->dx,
+ &domega_in->dy,
+ pdf);
+ break;
+ case CLOSURE_BSDF_TRANSPARENT_ID:
+ label = bsdf_transparent_sample(sc,
+ sd->Ng,
+ sd->I,
+ sd->dI.dx,
+ sd->dI.dy,
+ randu,
+ randv,
+ eval,
+ omega_in,
+ &domega_in->dx,
+ &domega_in->dy,
+ pdf);
+ break;
+ case CLOSURE_BSDF_MICROFACET_GGX_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
+ label = bsdf_microfacet_ggx_sample(kg,
+ sc,
+ sd->Ng,
+ sd->I,
+ sd->dI.dx,
+ sd->dI.dy,
+ randu,
+ randv,
+ eval,
+ omega_in,
+ &domega_in->dx,
+ &domega_in->dy,
+ pdf);
+ break;
+ case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID:
+ case CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID:
+ label = bsdf_microfacet_multi_ggx_sample(kg,
+ sc,
+ sd->Ng,
+ sd->I,
+ sd->dI.dx,
+ sd->dI.dy,
+ randu,
+ randv,
+ eval,
+ omega_in,
+ &domega_in->dx,
+ &domega_in->dy,
+ pdf,
+ &sd->lcg_state);
+ break;
+ case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID:
+ case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID:
+ label = bsdf_microfacet_multi_ggx_glass_sample(kg,
+ sc,
+ sd->Ng,
+ sd->I,
+ sd->dI.dx,
+ sd->dI.dy,
+ randu,
+ randv,
+ eval,
+ omega_in,
+ &domega_in->dx,
+ &domega_in->dy,
+ pdf,
+ &sd->lcg_state);
+ break;
+ case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
+ case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
+ case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
+ label = bsdf_microfacet_beckmann_sample(kg,
+ sc,
+ sd->Ng,
+ sd->I,
+ sd->dI.dx,
+ sd->dI.dy,
+ randu,
+ randv,
+ eval,
+ omega_in,
+ &domega_in->dx,
+ &domega_in->dy,
+ pdf);
+ break;
+ case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
+ case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID:
+ label = bsdf_ashikhmin_shirley_sample(sc,
+ sd->Ng,
+ sd->I,
+ sd->dI.dx,
+ sd->dI.dy,
+ randu,
+ randv,
+ eval,
+ omega_in,
+ &domega_in->dx,
+ &domega_in->dy,
+ pdf);
+ break;
+ case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
+ label = bsdf_ashikhmin_velvet_sample(sc,
+ sd->Ng,
+ sd->I,
+ sd->dI.dx,
+ sd->dI.dy,
+ randu,
+ randv,
+ eval,
+ omega_in,
+ &domega_in->dx,
+ &domega_in->dy,
+ pdf);
+ break;
+ case CLOSURE_BSDF_DIFFUSE_TOON_ID:
+ label = bsdf_diffuse_toon_sample(sc,
+ sd->Ng,
+ sd->I,
+ sd->dI.dx,
+ sd->dI.dy,
+ randu,
+ randv,
+ eval,
+ omega_in,
+ &domega_in->dx,
+ &domega_in->dy,
+ pdf);
+ break;
+ case CLOSURE_BSDF_GLOSSY_TOON_ID:
+ label = bsdf_glossy_toon_sample(sc,
+ sd->Ng,
+ sd->I,
+ sd->dI.dx,
+ sd->dI.dy,
+ randu,
+ randv,
+ eval,
+ omega_in,
+ &domega_in->dx,
+ &domega_in->dy,
+ pdf);
+ break;
+ case CLOSURE_BSDF_HAIR_REFLECTION_ID:
+ label = bsdf_hair_reflection_sample(sc,
+ sd->Ng,
+ sd->I,
+ sd->dI.dx,
+ sd->dI.dy,
+ randu,
+ randv,
+ eval,
+ omega_in,
+ &domega_in->dx,
+ &domega_in->dy,
+ pdf);
+ break;
+ case CLOSURE_BSDF_HAIR_TRANSMISSION_ID:
+ label = bsdf_hair_transmission_sample(sc,
+ sd->Ng,
+ sd->I,
+ sd->dI.dx,
+ sd->dI.dy,
+ randu,
+ randv,
+ eval,
+ omega_in,
+ &domega_in->dx,
+ &domega_in->dy,
+ pdf);
+ break;
+ case CLOSURE_BSDF_HAIR_PRINCIPLED_ID:
+ label = bsdf_principled_hair_sample(
+ kg, sc, sd, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
+ break;
+# ifdef __PRINCIPLED__
+ case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID:
+ case CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID:
+ label = bsdf_principled_diffuse_sample(sc,
+ sd->Ng,
+ sd->I,
+ sd->dI.dx,
+ sd->dI.dy,
+ randu,
+ randv,
+ eval,
+ omega_in,
+ &domega_in->dx,
+ &domega_in->dy,
+ pdf);
+ break;
+ case CLOSURE_BSDF_PRINCIPLED_SHEEN_ID:
+ label = bsdf_principled_sheen_sample(sc,
+ sd->Ng,
+ sd->I,
+ sd->dI.dx,
+ sd->dI.dy,
+ randu,
+ randv,
+ eval,
+ omega_in,
+ &domega_in->dx,
+ &domega_in->dy,
+ pdf);
+ break;
+# endif /* __PRINCIPLED__ */
#endif
#ifdef __VOLUME__
- case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID:
- label = volume_henyey_greenstein_sample(sc, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
- break;
+ case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID:
+ label = volume_henyey_greenstein_sample(sc,
+ sd->I,
+ sd->dI.dx,
+ sd->dI.dy,
+ randu,
+ randv,
+ eval,
+ omega_in,
+ &domega_in->dx,
+ &domega_in->dy,
+ pdf);
+ break;
#endif
- default:
- label = LABEL_NONE;
- break;
- }
+ default:
+ label = LABEL_NONE;
+ break;
+ }
- /* Test if BSDF sample should be treated as transparent for background. */
- if(label & LABEL_TRANSMIT) {
- float threshold_squared = kernel_data.background.transparent_roughness_squared_threshold;
+ /* Test if BSDF sample should be treated as transparent for background. */
+ if (label & LABEL_TRANSMIT) {
+ float threshold_squared = kernel_data.background.transparent_roughness_squared_threshold;
- if(threshold_squared >= 0.0f) {
- if(bsdf_get_specular_roughness_squared(sc) <= threshold_squared) {
- label |= LABEL_TRANSMIT_TRANSPARENT;
- }
- }
- }
+ if (threshold_squared >= 0.0f) {
+ if (bsdf_get_specular_roughness_squared(sc) <= threshold_squared) {
+ label |= LABEL_TRANSMIT_TRANSPARENT;
+ }
+ }
+ }
- return label;
+ return label;
}
#ifndef __KERNEL_CUDA__
@@ -217,285 +433,288 @@ ccl_device
#else
ccl_device_inline
#endif
-float3 bsdf_eval(KernelGlobals *kg,
- ShaderData *sd,
- const ShaderClosure *sc,
- const float3 omega_in,
- float *pdf)
+ float3
+ bsdf_eval(KernelGlobals *kg,
+ ShaderData *sd,
+ const ShaderClosure *sc,
+ const float3 omega_in,
+ float *pdf)
{
- float3 eval;
+ float3 eval;
- if(dot(sd->Ng, omega_in) >= 0.0f) {
- switch(sc->type) {
- case CLOSURE_BSDF_DIFFUSE_ID:
- case CLOSURE_BSDF_BSSRDF_ID:
- eval = bsdf_diffuse_eval_reflect(sc, sd->I, omega_in, pdf);
- break;
+ if (dot(sd->Ng, omega_in) >= 0.0f) {
+ switch (sc->type) {
+ case CLOSURE_BSDF_DIFFUSE_ID:
+ case CLOSURE_BSDF_BSSRDF_ID:
+ eval = bsdf_diffuse_eval_reflect(sc, sd->I, omega_in, pdf);
+ break;
#ifdef __SVM__
- case CLOSURE_BSDF_OREN_NAYAR_ID:
- eval = bsdf_oren_nayar_eval_reflect(sc, sd->I, omega_in, pdf);
- break;
-#ifdef __OSL__
- case CLOSURE_BSDF_PHONG_RAMP_ID:
- eval = bsdf_phong_ramp_eval_reflect(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_DIFFUSE_RAMP_ID:
- eval = bsdf_diffuse_ramp_eval_reflect(sc, sd->I, omega_in, pdf);
- break;
-#endif
- case CLOSURE_BSDF_TRANSLUCENT_ID:
- eval = bsdf_translucent_eval_reflect(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_REFLECTION_ID:
- eval = bsdf_reflection_eval_reflect(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_REFRACTION_ID:
- eval = bsdf_refraction_eval_reflect(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_TRANSPARENT_ID:
- eval = bsdf_transparent_eval_reflect(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_MICROFACET_GGX_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
- eval = bsdf_microfacet_ggx_eval_reflect(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID:
- case CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID:
- eval = bsdf_microfacet_multi_ggx_eval_reflect(sc, sd->I, omega_in, pdf, &sd->lcg_state);
- break;
- case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID:
- case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID:
- eval = bsdf_microfacet_multi_ggx_glass_eval_reflect(sc, sd->I, omega_in, pdf, &sd->lcg_state);
- break;
- case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
- case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
- case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
- eval = bsdf_microfacet_beckmann_eval_reflect(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
- case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID:
- eval = bsdf_ashikhmin_shirley_eval_reflect(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
- eval = bsdf_ashikhmin_velvet_eval_reflect(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_DIFFUSE_TOON_ID:
- eval = bsdf_diffuse_toon_eval_reflect(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_GLOSSY_TOON_ID:
- eval = bsdf_glossy_toon_eval_reflect(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_HAIR_PRINCIPLED_ID:
- eval = bsdf_principled_hair_eval(kg, sd, sc, omega_in, pdf);
- break;
- case CLOSURE_BSDF_HAIR_REFLECTION_ID:
- eval = bsdf_hair_reflection_eval_reflect(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_HAIR_TRANSMISSION_ID:
- eval = bsdf_hair_transmission_eval_reflect(sc, sd->I, omega_in, pdf);
- break;
-#ifdef __PRINCIPLED__
- case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID:
- case CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID:
- eval = bsdf_principled_diffuse_eval_reflect(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_PRINCIPLED_SHEEN_ID:
- eval = bsdf_principled_sheen_eval_reflect(sc, sd->I, omega_in, pdf);
- break;
-#endif /* __PRINCIPLED__ */
+ case CLOSURE_BSDF_OREN_NAYAR_ID:
+ eval = bsdf_oren_nayar_eval_reflect(sc, sd->I, omega_in, pdf);
+ break;
+# ifdef __OSL__
+ case CLOSURE_BSDF_PHONG_RAMP_ID:
+ eval = bsdf_phong_ramp_eval_reflect(sc, sd->I, omega_in, pdf);
+ break;
+ case CLOSURE_BSDF_DIFFUSE_RAMP_ID:
+ eval = bsdf_diffuse_ramp_eval_reflect(sc, sd->I, omega_in, pdf);
+ break;
+# endif
+ case CLOSURE_BSDF_TRANSLUCENT_ID:
+ eval = bsdf_translucent_eval_reflect(sc, sd->I, omega_in, pdf);
+ break;
+ case CLOSURE_BSDF_REFLECTION_ID:
+ eval = bsdf_reflection_eval_reflect(sc, sd->I, omega_in, pdf);
+ break;
+ case CLOSURE_BSDF_REFRACTION_ID:
+ eval = bsdf_refraction_eval_reflect(sc, sd->I, omega_in, pdf);
+ break;
+ case CLOSURE_BSDF_TRANSPARENT_ID:
+ eval = bsdf_transparent_eval_reflect(sc, sd->I, omega_in, pdf);
+ break;
+ case CLOSURE_BSDF_MICROFACET_GGX_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
+ eval = bsdf_microfacet_ggx_eval_reflect(sc, sd->I, omega_in, pdf);
+ break;
+ case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID:
+ case CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID:
+ eval = bsdf_microfacet_multi_ggx_eval_reflect(sc, sd->I, omega_in, pdf, &sd->lcg_state);
+ break;
+ case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID:
+ case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID:
+ eval = bsdf_microfacet_multi_ggx_glass_eval_reflect(
+ sc, sd->I, omega_in, pdf, &sd->lcg_state);
+ break;
+ case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
+ case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
+ case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
+ eval = bsdf_microfacet_beckmann_eval_reflect(sc, sd->I, omega_in, pdf);
+ break;
+ case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
+ case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID:
+ eval = bsdf_ashikhmin_shirley_eval_reflect(sc, sd->I, omega_in, pdf);
+ break;
+ case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
+ eval = bsdf_ashikhmin_velvet_eval_reflect(sc, sd->I, omega_in, pdf);
+ break;
+ case CLOSURE_BSDF_DIFFUSE_TOON_ID:
+ eval = bsdf_diffuse_toon_eval_reflect(sc, sd->I, omega_in, pdf);
+ break;
+ case CLOSURE_BSDF_GLOSSY_TOON_ID:
+ eval = bsdf_glossy_toon_eval_reflect(sc, sd->I, omega_in, pdf);
+ break;
+ case CLOSURE_BSDF_HAIR_PRINCIPLED_ID:
+ eval = bsdf_principled_hair_eval(kg, sd, sc, omega_in, pdf);
+ break;
+ case CLOSURE_BSDF_HAIR_REFLECTION_ID:
+ eval = bsdf_hair_reflection_eval_reflect(sc, sd->I, omega_in, pdf);
+ break;
+ case CLOSURE_BSDF_HAIR_TRANSMISSION_ID:
+ eval = bsdf_hair_transmission_eval_reflect(sc, sd->I, omega_in, pdf);
+ break;
+# ifdef __PRINCIPLED__
+ case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID:
+ case CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID:
+ eval = bsdf_principled_diffuse_eval_reflect(sc, sd->I, omega_in, pdf);
+ break;
+ case CLOSURE_BSDF_PRINCIPLED_SHEEN_ID:
+ eval = bsdf_principled_sheen_eval_reflect(sc, sd->I, omega_in, pdf);
+ break;
+# endif /* __PRINCIPLED__ */
#endif
#ifdef __VOLUME__
- case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID:
- eval = volume_henyey_greenstein_eval_phase(sc, sd->I, omega_in, pdf);
- break;
+ case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID:
+ eval = volume_henyey_greenstein_eval_phase(sc, sd->I, omega_in, pdf);
+ break;
#endif
- default:
- eval = make_float3(0.0f, 0.0f, 0.0f);
- break;
- }
- }
- else {
- switch(sc->type) {
- case CLOSURE_BSDF_DIFFUSE_ID:
- case CLOSURE_BSDF_BSSRDF_ID:
- eval = bsdf_diffuse_eval_transmit(sc, sd->I, omega_in, pdf);
- break;
+ default:
+ eval = make_float3(0.0f, 0.0f, 0.0f);
+ break;
+ }
+ }
+ else {
+ switch (sc->type) {
+ case CLOSURE_BSDF_DIFFUSE_ID:
+ case CLOSURE_BSDF_BSSRDF_ID:
+ eval = bsdf_diffuse_eval_transmit(sc, sd->I, omega_in, pdf);
+ break;
#ifdef __SVM__
- case CLOSURE_BSDF_OREN_NAYAR_ID:
- eval = bsdf_oren_nayar_eval_transmit(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_TRANSLUCENT_ID:
- eval = bsdf_translucent_eval_transmit(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_REFLECTION_ID:
- eval = bsdf_reflection_eval_transmit(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_REFRACTION_ID:
- eval = bsdf_refraction_eval_transmit(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_TRANSPARENT_ID:
- eval = bsdf_transparent_eval_transmit(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_MICROFACET_GGX_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
- eval = bsdf_microfacet_ggx_eval_transmit(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID:
- case CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID:
- eval = bsdf_microfacet_multi_ggx_eval_transmit(sc, sd->I, omega_in, pdf, &sd->lcg_state);
- break;
- case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID:
- case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID:
- eval = bsdf_microfacet_multi_ggx_glass_eval_transmit(sc, sd->I, omega_in, pdf, &sd->lcg_state);
- break;
- case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
- case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
- case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
- eval = bsdf_microfacet_beckmann_eval_transmit(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
- case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID:
- eval = bsdf_ashikhmin_shirley_eval_transmit(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
- eval = bsdf_ashikhmin_velvet_eval_transmit(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_DIFFUSE_TOON_ID:
- eval = bsdf_diffuse_toon_eval_transmit(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_GLOSSY_TOON_ID:
- eval = bsdf_glossy_toon_eval_transmit(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_HAIR_PRINCIPLED_ID:
- eval = bsdf_principled_hair_eval(kg, sd, sc, omega_in, pdf);
- break;
- case CLOSURE_BSDF_HAIR_REFLECTION_ID:
- eval = bsdf_hair_reflection_eval_transmit(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_HAIR_TRANSMISSION_ID:
- eval = bsdf_hair_transmission_eval_transmit(sc, sd->I, omega_in, pdf);
- break;
-#ifdef __PRINCIPLED__
- case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID:
- case CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID:
- eval = bsdf_principled_diffuse_eval_transmit(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_PRINCIPLED_SHEEN_ID:
- eval = bsdf_principled_sheen_eval_transmit(sc, sd->I, omega_in, pdf);
- break;
-#endif /* __PRINCIPLED__ */
+ case CLOSURE_BSDF_OREN_NAYAR_ID:
+ eval = bsdf_oren_nayar_eval_transmit(sc, sd->I, omega_in, pdf);
+ break;
+ case CLOSURE_BSDF_TRANSLUCENT_ID:
+ eval = bsdf_translucent_eval_transmit(sc, sd->I, omega_in, pdf);
+ break;
+ case CLOSURE_BSDF_REFLECTION_ID:
+ eval = bsdf_reflection_eval_transmit(sc, sd->I, omega_in, pdf);
+ break;
+ case CLOSURE_BSDF_REFRACTION_ID:
+ eval = bsdf_refraction_eval_transmit(sc, sd->I, omega_in, pdf);
+ break;
+ case CLOSURE_BSDF_TRANSPARENT_ID:
+ eval = bsdf_transparent_eval_transmit(sc, sd->I, omega_in, pdf);
+ break;
+ case CLOSURE_BSDF_MICROFACET_GGX_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
+ eval = bsdf_microfacet_ggx_eval_transmit(sc, sd->I, omega_in, pdf);
+ break;
+ case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID:
+ case CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID:
+ eval = bsdf_microfacet_multi_ggx_eval_transmit(sc, sd->I, omega_in, pdf, &sd->lcg_state);
+ break;
+ case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID:
+ case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID:
+ eval = bsdf_microfacet_multi_ggx_glass_eval_transmit(
+ sc, sd->I, omega_in, pdf, &sd->lcg_state);
+ break;
+ case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
+ case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
+ case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
+ eval = bsdf_microfacet_beckmann_eval_transmit(sc, sd->I, omega_in, pdf);
+ break;
+ case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
+ case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID:
+ eval = bsdf_ashikhmin_shirley_eval_transmit(sc, sd->I, omega_in, pdf);
+ break;
+ case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
+ eval = bsdf_ashikhmin_velvet_eval_transmit(sc, sd->I, omega_in, pdf);
+ break;
+ case CLOSURE_BSDF_DIFFUSE_TOON_ID:
+ eval = bsdf_diffuse_toon_eval_transmit(sc, sd->I, omega_in, pdf);
+ break;
+ case CLOSURE_BSDF_GLOSSY_TOON_ID:
+ eval = bsdf_glossy_toon_eval_transmit(sc, sd->I, omega_in, pdf);
+ break;
+ case CLOSURE_BSDF_HAIR_PRINCIPLED_ID:
+ eval = bsdf_principled_hair_eval(kg, sd, sc, omega_in, pdf);
+ break;
+ case CLOSURE_BSDF_HAIR_REFLECTION_ID:
+ eval = bsdf_hair_reflection_eval_transmit(sc, sd->I, omega_in, pdf);
+ break;
+ case CLOSURE_BSDF_HAIR_TRANSMISSION_ID:
+ eval = bsdf_hair_transmission_eval_transmit(sc, sd->I, omega_in, pdf);
+ break;
+# ifdef __PRINCIPLED__
+ case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID:
+ case CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID:
+ eval = bsdf_principled_diffuse_eval_transmit(sc, sd->I, omega_in, pdf);
+ break;
+ case CLOSURE_BSDF_PRINCIPLED_SHEEN_ID:
+ eval = bsdf_principled_sheen_eval_transmit(sc, sd->I, omega_in, pdf);
+ break;
+# endif /* __PRINCIPLED__ */
#endif
#ifdef __VOLUME__
- case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID:
- eval = volume_henyey_greenstein_eval_phase(sc, sd->I, omega_in, pdf);
- break;
+ case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID:
+ eval = volume_henyey_greenstein_eval_phase(sc, sd->I, omega_in, pdf);
+ break;
#endif
- default:
- eval = make_float3(0.0f, 0.0f, 0.0f);
- break;
- }
- }
+ default:
+ eval = make_float3(0.0f, 0.0f, 0.0f);
+ break;
+ }
+ }
- return eval;
+ return eval;
}
ccl_device void bsdf_blur(KernelGlobals *kg, ShaderClosure *sc, float roughness)
{
- /* ToDo: do we want to blur volume closures? */
+ /* ToDo: do we want to blur volume closures? */
#ifdef __SVM__
- switch(sc->type) {
- case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID:
- case CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID:
- case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID:
- case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID:
- bsdf_microfacet_multi_ggx_blur(sc, roughness);
- break;
- case CLOSURE_BSDF_MICROFACET_GGX_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
- bsdf_microfacet_ggx_blur(sc, roughness);
- break;
- case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
- case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
- case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
- bsdf_microfacet_beckmann_blur(sc, roughness);
- break;
- case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
- case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID:
- bsdf_ashikhmin_shirley_blur(sc, roughness);
- break;
- case CLOSURE_BSDF_HAIR_PRINCIPLED_ID:
- bsdf_principled_hair_blur(sc, roughness);
- break;
- default:
- break;
- }
+ switch (sc->type) {
+ case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID:
+ case CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID:
+ case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID:
+ case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID:
+ bsdf_microfacet_multi_ggx_blur(sc, roughness);
+ break;
+ case CLOSURE_BSDF_MICROFACET_GGX_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
+ bsdf_microfacet_ggx_blur(sc, roughness);
+ break;
+ case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
+ case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
+ case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
+ bsdf_microfacet_beckmann_blur(sc, roughness);
+ break;
+ case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
+ case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID:
+ bsdf_ashikhmin_shirley_blur(sc, roughness);
+ break;
+ case CLOSURE_BSDF_HAIR_PRINCIPLED_ID:
+ bsdf_principled_hair_blur(sc, roughness);
+ break;
+ default:
+ break;
+ }
#endif
}
ccl_device bool bsdf_merge(ShaderClosure *a, ShaderClosure *b)
{
#ifdef __SVM__
- switch(a->type) {
- case CLOSURE_BSDF_TRANSPARENT_ID:
- return true;
- case CLOSURE_BSDF_DIFFUSE_ID:
- case CLOSURE_BSDF_BSSRDF_ID:
- case CLOSURE_BSDF_TRANSLUCENT_ID:
- return bsdf_diffuse_merge(a, b);
- case CLOSURE_BSDF_OREN_NAYAR_ID:
- return bsdf_oren_nayar_merge(a, b);
- case CLOSURE_BSDF_REFLECTION_ID:
- case CLOSURE_BSDF_REFRACTION_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
- case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID:
- case CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID:
- case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID:
- case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID:
- case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
- case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
- case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
- case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
- case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID:
- return bsdf_microfacet_merge(a, b);
- case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
- return bsdf_ashikhmin_velvet_merge(a, b);
- case CLOSURE_BSDF_DIFFUSE_TOON_ID:
- case CLOSURE_BSDF_GLOSSY_TOON_ID:
- return bsdf_toon_merge(a, b);
- case CLOSURE_BSDF_HAIR_REFLECTION_ID:
- case CLOSURE_BSDF_HAIR_TRANSMISSION_ID:
- return bsdf_hair_merge(a, b);
-#ifdef __PRINCIPLED__
- case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID:
- case CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID:
- return bsdf_principled_diffuse_merge(a, b);
-#endif
-#ifdef __VOLUME__
- case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID:
- return volume_henyey_greenstein_merge(a, b);
-#endif
- default:
- return false;
- }
+ switch (a->type) {
+ case CLOSURE_BSDF_TRANSPARENT_ID:
+ return true;
+ case CLOSURE_BSDF_DIFFUSE_ID:
+ case CLOSURE_BSDF_BSSRDF_ID:
+ case CLOSURE_BSDF_TRANSLUCENT_ID:
+ return bsdf_diffuse_merge(a, b);
+ case CLOSURE_BSDF_OREN_NAYAR_ID:
+ return bsdf_oren_nayar_merge(a, b);
+ case CLOSURE_BSDF_REFLECTION_ID:
+ case CLOSURE_BSDF_REFRACTION_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
+ case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID:
+ case CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID:
+ case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID:
+ case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID:
+ case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
+ case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
+ case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
+ case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
+ case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID:
+ return bsdf_microfacet_merge(a, b);
+ case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
+ return bsdf_ashikhmin_velvet_merge(a, b);
+ case CLOSURE_BSDF_DIFFUSE_TOON_ID:
+ case CLOSURE_BSDF_GLOSSY_TOON_ID:
+ return bsdf_toon_merge(a, b);
+ case CLOSURE_BSDF_HAIR_REFLECTION_ID:
+ case CLOSURE_BSDF_HAIR_TRANSMISSION_ID:
+ return bsdf_hair_merge(a, b);
+# ifdef __PRINCIPLED__
+ case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID:
+ case CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID:
+ return bsdf_principled_diffuse_merge(a, b);
+# endif
+# ifdef __VOLUME__
+ case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID:
+ return volume_henyey_greenstein_merge(a, b);
+# endif
+ default:
+ return false;
+ }
#else
- return false;
+ return false;
#endif
}
diff --git a/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h b/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h
index 4e7425bd800..b3b1c37748d 100644
--- a/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h
+++ b/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h
@@ -33,203 +33,226 @@ CCL_NAMESPACE_BEGIN
ccl_device int bsdf_ashikhmin_shirley_setup(MicrofacetBsdf *bsdf)
{
- bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f);
- bsdf->alpha_y = bsdf->alpha_x;
+ bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f);
+ bsdf->alpha_y = bsdf->alpha_x;
- bsdf->type = CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID;
- return SD_BSDF|SD_BSDF_HAS_EVAL;
+ bsdf->type = CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID;
+ return SD_BSDF | SD_BSDF_HAS_EVAL;
}
ccl_device int bsdf_ashikhmin_shirley_aniso_setup(MicrofacetBsdf *bsdf)
{
- bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f);
- bsdf->alpha_y = clamp(bsdf->alpha_y, 1e-4f, 1.0f);
+ bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f);
+ bsdf->alpha_y = clamp(bsdf->alpha_y, 1e-4f, 1.0f);
- bsdf->type = CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID;
- return SD_BSDF|SD_BSDF_HAS_EVAL;
+ bsdf->type = CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID;
+ return SD_BSDF | SD_BSDF_HAS_EVAL;
}
ccl_device void bsdf_ashikhmin_shirley_blur(ShaderClosure *sc, float roughness)
{
- MicrofacetBsdf *bsdf = (MicrofacetBsdf*)sc;
+ MicrofacetBsdf *bsdf = (MicrofacetBsdf *)sc;
- bsdf->alpha_x = fmaxf(roughness, bsdf->alpha_x);
- bsdf->alpha_y = fmaxf(roughness, bsdf->alpha_y);
+ bsdf->alpha_x = fmaxf(roughness, bsdf->alpha_x);
+ bsdf->alpha_y = fmaxf(roughness, bsdf->alpha_y);
}
ccl_device_inline float bsdf_ashikhmin_shirley_roughness_to_exponent(float roughness)
{
- return 2.0f / (roughness*roughness) - 2.0f;
+ return 2.0f / (roughness * roughness) - 2.0f;
}
-ccl_device_forceinline float3 bsdf_ashikhmin_shirley_eval_reflect(
- const ShaderClosure *sc,
- const float3 I,
- const float3 omega_in,
- float *pdf)
+ccl_device_forceinline float3 bsdf_ashikhmin_shirley_eval_reflect(const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ float *pdf)
{
- const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
- float3 N = bsdf->N;
-
- float NdotI = dot(N, I); /* in Cycles/OSL convention I is omega_out */
- float NdotO = dot(N, omega_in); /* and consequently we use for O omaga_in ;) */
-
- float out = 0.0f;
-
- if(fmaxf(bsdf->alpha_x, bsdf->alpha_y) <= 1e-4f)
- return make_float3(0.0f, 0.0f, 0.0f);
-
- if(NdotI > 0.0f && NdotO > 0.0f) {
- NdotI = fmaxf(NdotI, 1e-6f);
- NdotO = fmaxf(NdotO, 1e-6f);
- float3 H = normalize(omega_in + I);
- float HdotI = fmaxf(fabsf(dot(H, I)), 1e-6f);
- float HdotN = fmaxf(dot(H, N), 1e-6f);
-
- float pump = 1.0f / fmaxf(1e-6f, (HdotI*fmaxf(NdotO, NdotI))); /* pump from original paper (first derivative disc., but cancels the HdotI in the pdf nicely) */
- /*float pump = 1.0f / fmaxf(1e-4f, ((NdotO + NdotI) * (NdotO*NdotI))); */ /* pump from d-brdf paper */
-
- float n_x = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_x);
- float n_y = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_y);
-
- if(n_x == n_y) {
- /* isotropic */
- float e = n_x;
- float lobe = powf(HdotN, e);
- float norm = (n_x + 1.0f) / (8.0f * M_PI_F);
-
- out = NdotO * norm * lobe * pump;
- *pdf = norm * lobe / HdotI; /* this is p_h / 4(H.I) (conversion from 'wh measure' to 'wi measure', eq. 8 in paper) */
- }
- else {
- /* anisotropic */
- float3 X, Y;
- make_orthonormals_tangent(N, bsdf->T, &X, &Y);
-
- float HdotX = dot(H, X);
- float HdotY = dot(H, Y);
- float lobe;
- if(HdotN < 1.0f) {
- float e = (n_x * HdotX*HdotX + n_y * HdotY*HdotY) / (1.0f - HdotN*HdotN);
- lobe = powf(HdotN, e);
- }
- else {
- lobe = 1.0f;
- }
- float norm = sqrtf((n_x + 1.0f)*(n_y + 1.0f)) / (8.0f * M_PI_F);
-
- out = NdotO * norm * lobe * pump;
- *pdf = norm * lobe / HdotI;
- }
- }
-
- return make_float3(out, out, out);
+ const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+ float3 N = bsdf->N;
+
+ float NdotI = dot(N, I); /* in Cycles/OSL convention I is omega_out */
+ float NdotO = dot(N, omega_in); /* and consequently we use for O omaga_in ;) */
+
+ float out = 0.0f;
+
+ if (fmaxf(bsdf->alpha_x, bsdf->alpha_y) <= 1e-4f)
+ return make_float3(0.0f, 0.0f, 0.0f);
+
+ if (NdotI > 0.0f && NdotO > 0.0f) {
+ NdotI = fmaxf(NdotI, 1e-6f);
+ NdotO = fmaxf(NdotO, 1e-6f);
+ float3 H = normalize(omega_in + I);
+ float HdotI = fmaxf(fabsf(dot(H, I)), 1e-6f);
+ float HdotN = fmaxf(dot(H, N), 1e-6f);
+
+ float pump =
+ 1.0f /
+ fmaxf(
+ 1e-6f,
+ (HdotI *
+ fmaxf(
+ NdotO,
+ NdotI))); /* pump from original paper (first derivative disc., but cancels the HdotI in the pdf nicely) */
+ /*float pump = 1.0f / fmaxf(1e-4f, ((NdotO + NdotI) * (NdotO*NdotI))); */ /* pump from d-brdf paper */
+
+ float n_x = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_x);
+ float n_y = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_y);
+
+ if (n_x == n_y) {
+ /* isotropic */
+ float e = n_x;
+ float lobe = powf(HdotN, e);
+ float norm = (n_x + 1.0f) / (8.0f * M_PI_F);
+
+ out = NdotO * norm * lobe * pump;
+ *pdf =
+ norm * lobe /
+ HdotI; /* this is p_h / 4(H.I) (conversion from 'wh measure' to 'wi measure', eq. 8 in paper) */
+ }
+ else {
+ /* anisotropic */
+ float3 X, Y;
+ make_orthonormals_tangent(N, bsdf->T, &X, &Y);
+
+ float HdotX = dot(H, X);
+ float HdotY = dot(H, Y);
+ float lobe;
+ if (HdotN < 1.0f) {
+ float e = (n_x * HdotX * HdotX + n_y * HdotY * HdotY) / (1.0f - HdotN * HdotN);
+ lobe = powf(HdotN, e);
+ }
+ else {
+ lobe = 1.0f;
+ }
+ float norm = sqrtf((n_x + 1.0f) * (n_y + 1.0f)) / (8.0f * M_PI_F);
+
+ out = NdotO * norm * lobe * pump;
+ *pdf = norm * lobe / HdotI;
+ }
+ }
+
+ return make_float3(out, out, out);
}
-ccl_device float3 bsdf_ashikhmin_shirley_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_ashikhmin_shirley_eval_transmit(const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ float *pdf)
{
- return make_float3(0.0f, 0.0f, 0.0f);
+ return make_float3(0.0f, 0.0f, 0.0f);
}
-ccl_device_inline void bsdf_ashikhmin_shirley_sample_first_quadrant(float n_x, float n_y, float randu, float randv, float *phi, float *cos_theta)
+ccl_device_inline void bsdf_ashikhmin_shirley_sample_first_quadrant(
+ float n_x, float n_y, float randu, float randv, float *phi, float *cos_theta)
{
- *phi = atanf(sqrtf((n_x + 1.0f) / (n_y + 1.0f)) * tanf(M_PI_2_F * randu));
- float cos_phi = cosf(*phi);
- float sin_phi = sinf(*phi);
- *cos_theta = powf(randv, 1.0f / (n_x * cos_phi*cos_phi + n_y * sin_phi*sin_phi + 1.0f));
+ *phi = atanf(sqrtf((n_x + 1.0f) / (n_y + 1.0f)) * tanf(M_PI_2_F * randu));
+ float cos_phi = cosf(*phi);
+ float sin_phi = sinf(*phi);
+ *cos_theta = powf(randv, 1.0f / (n_x * cos_phi * cos_phi + n_y * sin_phi * sin_phi + 1.0f));
}
-ccl_device int bsdf_ashikhmin_shirley_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
+ccl_device int bsdf_ashikhmin_shirley_sample(const ShaderClosure *sc,
+ float3 Ng,
+ float3 I,
+ float3 dIdx,
+ float3 dIdy,
+ float randu,
+ float randv,
+ float3 *eval,
+ float3 *omega_in,
+ float3 *domega_in_dx,
+ float3 *domega_in_dy,
+ float *pdf)
{
- const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
- float3 N = bsdf->N;
- int label = LABEL_REFLECT | LABEL_GLOSSY;
-
- float NdotI = dot(N, I);
- if(NdotI > 0.0f) {
-
- float n_x = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_x);
- float n_y = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_y);
-
- /* get x,y basis on the surface for anisotropy */
- float3 X, Y;
-
- if(n_x == n_y)
- make_orthonormals(N, &X, &Y);
- else
- make_orthonormals_tangent(N, bsdf->T, &X, &Y);
-
- /* sample spherical coords for h in tangent space */
- float phi;
- float cos_theta;
- if(n_x == n_y) {
- /* isotropic sampling */
- phi = M_2PI_F * randu;
- cos_theta = powf(randv, 1.0f / (n_x + 1.0f));
- }
- else {
- /* anisotropic sampling */
- if(randu < 0.25f) { /* first quadrant */
- float remapped_randu = 4.0f * randu;
- bsdf_ashikhmin_shirley_sample_first_quadrant(n_x, n_y, remapped_randu, randv, &phi, &cos_theta);
- }
- else if(randu < 0.5f) { /* second quadrant */
- float remapped_randu = 4.0f * (.5f - randu);
- bsdf_ashikhmin_shirley_sample_first_quadrant(n_x, n_y, remapped_randu, randv, &phi, &cos_theta);
- phi = M_PI_F - phi;
- }
- else if(randu < 0.75f) { /* third quadrant */
- float remapped_randu = 4.0f * (randu - 0.5f);
- bsdf_ashikhmin_shirley_sample_first_quadrant(n_x, n_y, remapped_randu, randv, &phi, &cos_theta);
- phi = M_PI_F + phi;
- }
- else { /* fourth quadrant */
- float remapped_randu = 4.0f * (1.0f - randu);
- bsdf_ashikhmin_shirley_sample_first_quadrant(n_x, n_y, remapped_randu, randv, &phi, &cos_theta);
- phi = 2.0f * M_PI_F - phi;
- }
- }
-
- /* get half vector in tangent space */
- float sin_theta = sqrtf(fmaxf(0.0f, 1.0f - cos_theta*cos_theta));
- float cos_phi = cosf(phi);
- float sin_phi = sinf(phi); /* no sqrt(1-cos^2) here b/c it causes artifacts */
- float3 h = make_float3(
- sin_theta * cos_phi,
- sin_theta * sin_phi,
- cos_theta
- );
-
- /* half vector to world space */
- float3 H = h.x*X + h.y*Y + h.z*N;
- float HdotI = dot(H, I);
- if(HdotI < 0.0f) H = -H;
-
- /* reflect I on H to get omega_in */
- *omega_in = -I + (2.0f * HdotI) * H;
-
- if(fmaxf(bsdf->alpha_x, bsdf->alpha_y) <= 1e-4f) {
- /* Some high number for MIS. */
- *pdf = 1e6f;
- *eval = make_float3(1e6f, 1e6f, 1e6f);
- label = LABEL_REFLECT | LABEL_SINGULAR;
- }
- else {
- /* leave the rest to eval_reflect */
- *eval = bsdf_ashikhmin_shirley_eval_reflect(sc, I, *omega_in, pdf);
- }
+ const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+ float3 N = bsdf->N;
+ int label = LABEL_REFLECT | LABEL_GLOSSY;
+
+ float NdotI = dot(N, I);
+ if (NdotI > 0.0f) {
+
+ float n_x = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_x);
+ float n_y = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_y);
+
+ /* get x,y basis on the surface for anisotropy */
+ float3 X, Y;
+
+ if (n_x == n_y)
+ make_orthonormals(N, &X, &Y);
+ else
+ make_orthonormals_tangent(N, bsdf->T, &X, &Y);
+
+ /* sample spherical coords for h in tangent space */
+ float phi;
+ float cos_theta;
+ if (n_x == n_y) {
+ /* isotropic sampling */
+ phi = M_2PI_F * randu;
+ cos_theta = powf(randv, 1.0f / (n_x + 1.0f));
+ }
+ else {
+ /* anisotropic sampling */
+ if (randu < 0.25f) { /* first quadrant */
+ float remapped_randu = 4.0f * randu;
+ bsdf_ashikhmin_shirley_sample_first_quadrant(
+ n_x, n_y, remapped_randu, randv, &phi, &cos_theta);
+ }
+ else if (randu < 0.5f) { /* second quadrant */
+ float remapped_randu = 4.0f * (.5f - randu);
+ bsdf_ashikhmin_shirley_sample_first_quadrant(
+ n_x, n_y, remapped_randu, randv, &phi, &cos_theta);
+ phi = M_PI_F - phi;
+ }
+ else if (randu < 0.75f) { /* third quadrant */
+ float remapped_randu = 4.0f * (randu - 0.5f);
+ bsdf_ashikhmin_shirley_sample_first_quadrant(
+ n_x, n_y, remapped_randu, randv, &phi, &cos_theta);
+ phi = M_PI_F + phi;
+ }
+ else { /* fourth quadrant */
+ float remapped_randu = 4.0f * (1.0f - randu);
+ bsdf_ashikhmin_shirley_sample_first_quadrant(
+ n_x, n_y, remapped_randu, randv, &phi, &cos_theta);
+ phi = 2.0f * M_PI_F - phi;
+ }
+ }
+
+ /* get half vector in tangent space */
+ float sin_theta = sqrtf(fmaxf(0.0f, 1.0f - cos_theta * cos_theta));
+ float cos_phi = cosf(phi);
+ float sin_phi = sinf(phi); /* no sqrt(1-cos^2) here b/c it causes artifacts */
+ float3 h = make_float3(sin_theta * cos_phi, sin_theta * sin_phi, cos_theta);
+
+ /* half vector to world space */
+ float3 H = h.x * X + h.y * Y + h.z * N;
+ float HdotI = dot(H, I);
+ if (HdotI < 0.0f)
+ H = -H;
+
+ /* reflect I on H to get omega_in */
+ *omega_in = -I + (2.0f * HdotI) * H;
+
+ if (fmaxf(bsdf->alpha_x, bsdf->alpha_y) <= 1e-4f) {
+ /* Some high number for MIS. */
+ *pdf = 1e6f;
+ *eval = make_float3(1e6f, 1e6f, 1e6f);
+ label = LABEL_REFLECT | LABEL_SINGULAR;
+ }
+ else {
+ /* leave the rest to eval_reflect */
+ *eval = bsdf_ashikhmin_shirley_eval_reflect(sc, I, *omega_in, pdf);
+ }
#ifdef __RAY_DIFFERENTIALS__
- /* just do the reflection thing for now */
- *domega_in_dx = (2.0f * dot(N, dIdx)) * N - dIdx;
- *domega_in_dy = (2.0f * dot(N, dIdy)) * N - dIdy;
+ /* just do the reflection thing for now */
+ *domega_in_dx = (2.0f * dot(N, dIdx)) * N - dIdx;
+ *domega_in_dy = (2.0f * dot(N, dIdy)) * N - dIdy;
#endif
- }
+ }
- return label;
+ return label;
}
-
CCL_NAMESPACE_END
-#endif /* __BSDF_ASHIKHMIN_SHIRLEY_H__ */
+#endif /* __BSDF_ASHIKHMIN_SHIRLEY_H__ */
diff --git a/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h b/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h
index 80fd9ba2b37..8122bcc1424 100644
--- a/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h
+++ b/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h
@@ -36,126 +36,142 @@
CCL_NAMESPACE_BEGIN
typedef ccl_addr_space struct VelvetBsdf {
- SHADER_CLOSURE_BASE;
+ SHADER_CLOSURE_BASE;
- float sigma;
- float invsigma2;
+ float sigma;
+ float invsigma2;
} VelvetBsdf;
ccl_device int bsdf_ashikhmin_velvet_setup(VelvetBsdf *bsdf)
{
- float sigma = fmaxf(bsdf->sigma, 0.01f);
- bsdf->invsigma2 = 1.0f/(sigma * sigma);
+ float sigma = fmaxf(bsdf->sigma, 0.01f);
+ bsdf->invsigma2 = 1.0f / (sigma * sigma);
- bsdf->type = CLOSURE_BSDF_ASHIKHMIN_VELVET_ID;
+ bsdf->type = CLOSURE_BSDF_ASHIKHMIN_VELVET_ID;
- return SD_BSDF|SD_BSDF_HAS_EVAL;
+ return SD_BSDF | SD_BSDF_HAS_EVAL;
}
ccl_device bool bsdf_ashikhmin_velvet_merge(const ShaderClosure *a, const ShaderClosure *b)
{
- const VelvetBsdf *bsdf_a = (const VelvetBsdf*)a;
- const VelvetBsdf *bsdf_b = (const VelvetBsdf*)b;
+ const VelvetBsdf *bsdf_a = (const VelvetBsdf *)a;
+ const VelvetBsdf *bsdf_b = (const VelvetBsdf *)b;
- return (isequal_float3(bsdf_a->N, bsdf_b->N)) &&
- (bsdf_a->sigma == bsdf_b->sigma);
+ return (isequal_float3(bsdf_a->N, bsdf_b->N)) && (bsdf_a->sigma == bsdf_b->sigma);
}
-ccl_device float3 bsdf_ashikhmin_velvet_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_ashikhmin_velvet_eval_reflect(const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ float *pdf)
{
- const VelvetBsdf *bsdf = (const VelvetBsdf*)sc;
- float m_invsigma2 = bsdf->invsigma2;
- float3 N = bsdf->N;
+ const VelvetBsdf *bsdf = (const VelvetBsdf *)sc;
+ float m_invsigma2 = bsdf->invsigma2;
+ float3 N = bsdf->N;
- float cosNO = dot(N, I);
- float cosNI = dot(N, omega_in);
- if(cosNO > 0 && cosNI > 0) {
- float3 H = normalize(omega_in + I);
+ float cosNO = dot(N, I);
+ float cosNI = dot(N, omega_in);
+ if (cosNO > 0 && cosNI > 0) {
+ float3 H = normalize(omega_in + I);
- float cosNH = dot(N, H);
- float cosHO = fabsf(dot(I, H));
+ float cosNH = dot(N, H);
+ float cosHO = fabsf(dot(I, H));
- if(!(fabsf(cosNH) < 1.0f-1e-5f && cosHO > 1e-5f))
- return make_float3(0.0f, 0.0f, 0.0f);
+ if (!(fabsf(cosNH) < 1.0f - 1e-5f && cosHO > 1e-5f))
+ return make_float3(0.0f, 0.0f, 0.0f);
- float cosNHdivHO = cosNH / cosHO;
- cosNHdivHO = fmaxf(cosNHdivHO, 1e-5f);
+ float cosNHdivHO = cosNH / cosHO;
+ cosNHdivHO = fmaxf(cosNHdivHO, 1e-5f);
- float fac1 = 2 * fabsf(cosNHdivHO * cosNO);
- float fac2 = 2 * fabsf(cosNHdivHO * cosNI);
+ float fac1 = 2 * fabsf(cosNHdivHO * cosNO);
+ float fac2 = 2 * fabsf(cosNHdivHO * cosNI);
- float sinNH2 = 1 - cosNH * cosNH;
- float sinNH4 = sinNH2 * sinNH2;
- float cotangent2 = (cosNH * cosNH) / sinNH2;
+ float sinNH2 = 1 - cosNH * cosNH;
+ float sinNH4 = sinNH2 * sinNH2;
+ float cotangent2 = (cosNH * cosNH) / sinNH2;
- float D = expf(-cotangent2 * m_invsigma2) * m_invsigma2 * M_1_PI_F / sinNH4;
- float G = min(1.0f, min(fac1, fac2)); // TODO: derive G from D analytically
+ float D = expf(-cotangent2 * m_invsigma2) * m_invsigma2 * M_1_PI_F / sinNH4;
+ float G = min(1.0f, min(fac1, fac2)); // TODO: derive G from D analytically
- float out = 0.25f * (D * G) / cosNO;
+ float out = 0.25f * (D * G) / cosNO;
- *pdf = 0.5f * M_1_PI_F;
- return make_float3(out, out, out);
- }
+ *pdf = 0.5f * M_1_PI_F;
+ return make_float3(out, out, out);
+ }
- return make_float3(0.0f, 0.0f, 0.0f);
+ return make_float3(0.0f, 0.0f, 0.0f);
}
-ccl_device float3 bsdf_ashikhmin_velvet_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_ashikhmin_velvet_eval_transmit(const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ float *pdf)
{
- return make_float3(0.0f, 0.0f, 0.0f);
+ return make_float3(0.0f, 0.0f, 0.0f);
}
-ccl_device int bsdf_ashikhmin_velvet_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
+ccl_device int bsdf_ashikhmin_velvet_sample(const ShaderClosure *sc,
+ float3 Ng,
+ float3 I,
+ float3 dIdx,
+ float3 dIdy,
+ float randu,
+ float randv,
+ float3 *eval,
+ float3 *omega_in,
+ float3 *domega_in_dx,
+ float3 *domega_in_dy,
+ float *pdf)
{
- const VelvetBsdf *bsdf = (const VelvetBsdf*)sc;
- float m_invsigma2 = bsdf->invsigma2;
- float3 N = bsdf->N;
+ const VelvetBsdf *bsdf = (const VelvetBsdf *)sc;
+ float m_invsigma2 = bsdf->invsigma2;
+ float3 N = bsdf->N;
- // we are viewing the surface from above - send a ray out with uniform
- // distribution over the hemisphere
- sample_uniform_hemisphere(N, randu, randv, omega_in, pdf);
+ // we are viewing the surface from above - send a ray out with uniform
+ // distribution over the hemisphere
+ sample_uniform_hemisphere(N, randu, randv, omega_in, pdf);
- if(dot(Ng, *omega_in) > 0) {
- float3 H = normalize(*omega_in + I);
+ if (dot(Ng, *omega_in) > 0) {
+ float3 H = normalize(*omega_in + I);
- float cosNI = dot(N, *omega_in);
- float cosNO = dot(N, I);
- float cosNH = dot(N, H);
- float cosHO = fabsf(dot(I, H));
+ float cosNI = dot(N, *omega_in);
+ float cosNO = dot(N, I);
+ float cosNH = dot(N, H);
+ float cosHO = fabsf(dot(I, H));
- if(fabsf(cosNO) > 1e-5f && fabsf(cosNH) < 1.0f-1e-5f && cosHO > 1e-5f) {
- float cosNHdivHO = cosNH / cosHO;
- cosNHdivHO = fmaxf(cosNHdivHO, 1e-5f);
+ if (fabsf(cosNO) > 1e-5f && fabsf(cosNH) < 1.0f - 1e-5f && cosHO > 1e-5f) {
+ float cosNHdivHO = cosNH / cosHO;
+ cosNHdivHO = fmaxf(cosNHdivHO, 1e-5f);
- float fac1 = 2 * fabsf(cosNHdivHO * cosNO);
- float fac2 = 2 * fabsf(cosNHdivHO * cosNI);
+ float fac1 = 2 * fabsf(cosNHdivHO * cosNO);
+ float fac2 = 2 * fabsf(cosNHdivHO * cosNI);
- float sinNH2 = 1 - cosNH * cosNH;
- float sinNH4 = sinNH2 * sinNH2;
- float cotangent2 = (cosNH * cosNH) / sinNH2;
+ float sinNH2 = 1 - cosNH * cosNH;
+ float sinNH4 = sinNH2 * sinNH2;
+ float cotangent2 = (cosNH * cosNH) / sinNH2;
- float D = expf(-cotangent2 * m_invsigma2) * m_invsigma2 * M_1_PI_F / sinNH4;
- float G = min(1.0f, min(fac1, fac2)); // TODO: derive G from D analytically
+ float D = expf(-cotangent2 * m_invsigma2) * m_invsigma2 * M_1_PI_F / sinNH4;
+ float G = min(1.0f, min(fac1, fac2)); // TODO: derive G from D analytically
- float power = 0.25f * (D * G) / cosNO;
+ float power = 0.25f * (D * G) / cosNO;
- *eval = make_float3(power, power, power);
+ *eval = make_float3(power, power, power);
#ifdef __RAY_DIFFERENTIALS__
- // TODO: find a better approximation for the retroreflective bounce
- *domega_in_dx = (2 * dot(N, dIdx)) * N - dIdx;
- *domega_in_dy = (2 * dot(N, dIdy)) * N - dIdy;
+ // TODO: find a better approximation for the retroreflective bounce
+ *domega_in_dx = (2 * dot(N, dIdx)) * N - dIdx;
+ *domega_in_dy = (2 * dot(N, dIdy)) * N - dIdy;
#endif
- }
- else
- *pdf = 0.0f;
- }
- else
- *pdf = 0.0f;
-
- return LABEL_REFLECT|LABEL_DIFFUSE;
+ }
+ else
+ *pdf = 0.0f;
+ }
+ else
+ *pdf = 0.0f;
+
+ return LABEL_REFLECT | LABEL_DIFFUSE;
}
CCL_NAMESPACE_END
-#endif /* __BSDF_ASHIKHMIN_VELVET_H__ */
+#endif /* __BSDF_ASHIKHMIN_VELVET_H__ */
diff --git a/intern/cycles/kernel/closure/bsdf_diffuse.h b/intern/cycles/kernel/closure/bsdf_diffuse.h
index 946c460a70e..76b50548455 100644
--- a/intern/cycles/kernel/closure/bsdf_diffuse.h
+++ b/intern/cycles/kernel/closure/bsdf_diffuse.h
@@ -36,107 +36,141 @@
CCL_NAMESPACE_BEGIN
typedef ccl_addr_space struct DiffuseBsdf {
- SHADER_CLOSURE_BASE;
+ SHADER_CLOSURE_BASE;
} DiffuseBsdf;
/* DIFFUSE */
ccl_device int bsdf_diffuse_setup(DiffuseBsdf *bsdf)
{
- bsdf->type = CLOSURE_BSDF_DIFFUSE_ID;
- return SD_BSDF|SD_BSDF_HAS_EVAL;
+ bsdf->type = CLOSURE_BSDF_DIFFUSE_ID;
+ return SD_BSDF | SD_BSDF_HAS_EVAL;
}
ccl_device bool bsdf_diffuse_merge(const ShaderClosure *a, const ShaderClosure *b)
{
- const DiffuseBsdf *bsdf_a = (const DiffuseBsdf*)a;
- const DiffuseBsdf *bsdf_b = (const DiffuseBsdf*)b;
+ const DiffuseBsdf *bsdf_a = (const DiffuseBsdf *)a;
+ const DiffuseBsdf *bsdf_b = (const DiffuseBsdf *)b;
- return (isequal_float3(bsdf_a->N, bsdf_b->N));
+ return (isequal_float3(bsdf_a->N, bsdf_b->N));
}
-ccl_device float3 bsdf_diffuse_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_diffuse_eval_reflect(const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ float *pdf)
{
- const DiffuseBsdf *bsdf = (const DiffuseBsdf*)sc;
- float3 N = bsdf->N;
+ const DiffuseBsdf *bsdf = (const DiffuseBsdf *)sc;
+ float3 N = bsdf->N;
- float cos_pi = fmaxf(dot(N, omega_in), 0.0f) * M_1_PI_F;
- *pdf = cos_pi;
- return make_float3(cos_pi, cos_pi, cos_pi);
+ float cos_pi = fmaxf(dot(N, omega_in), 0.0f) * M_1_PI_F;
+ *pdf = cos_pi;
+ return make_float3(cos_pi, cos_pi, cos_pi);
}
-ccl_device float3 bsdf_diffuse_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_diffuse_eval_transmit(const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ float *pdf)
{
- return make_float3(0.0f, 0.0f, 0.0f);
+ return make_float3(0.0f, 0.0f, 0.0f);
}
-ccl_device int bsdf_diffuse_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
+ccl_device int bsdf_diffuse_sample(const ShaderClosure *sc,
+ float3 Ng,
+ float3 I,
+ float3 dIdx,
+ float3 dIdy,
+ float randu,
+ float randv,
+ float3 *eval,
+ float3 *omega_in,
+ float3 *domega_in_dx,
+ float3 *domega_in_dy,
+ float *pdf)
{
- const DiffuseBsdf *bsdf = (const DiffuseBsdf*)sc;
- float3 N = bsdf->N;
+ const DiffuseBsdf *bsdf = (const DiffuseBsdf *)sc;
+ float3 N = bsdf->N;
- // distribution over the hemisphere
- sample_cos_hemisphere(N, randu, randv, omega_in, pdf);
+ // distribution over the hemisphere
+ sample_cos_hemisphere(N, randu, randv, omega_in, pdf);
- if(dot(Ng, *omega_in) > 0.0f) {
- *eval = make_float3(*pdf, *pdf, *pdf);
+ if (dot(Ng, *omega_in) > 0.0f) {
+ *eval = make_float3(*pdf, *pdf, *pdf);
#ifdef __RAY_DIFFERENTIALS__
- // TODO: find a better approximation for the diffuse bounce
- *domega_in_dx = (2 * dot(N, dIdx)) * N - dIdx;
- *domega_in_dy = (2 * dot(N, dIdy)) * N - dIdy;
+ // TODO: find a better approximation for the diffuse bounce
+ *domega_in_dx = (2 * dot(N, dIdx)) * N - dIdx;
+ *domega_in_dy = (2 * dot(N, dIdy)) * N - dIdy;
#endif
- }
- else
- *pdf = 0.0f;
+ }
+ else
+ *pdf = 0.0f;
- return LABEL_REFLECT|LABEL_DIFFUSE;
+ return LABEL_REFLECT | LABEL_DIFFUSE;
}
/* TRANSLUCENT */
ccl_device int bsdf_translucent_setup(DiffuseBsdf *bsdf)
{
- bsdf->type = CLOSURE_BSDF_TRANSLUCENT_ID;
- return SD_BSDF|SD_BSDF_HAS_EVAL;
+ bsdf->type = CLOSURE_BSDF_TRANSLUCENT_ID;
+ return SD_BSDF | SD_BSDF_HAS_EVAL;
}
-ccl_device float3 bsdf_translucent_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_translucent_eval_reflect(const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ float *pdf)
{
- return make_float3(0.0f, 0.0f, 0.0f);
+ return make_float3(0.0f, 0.0f, 0.0f);
}
-ccl_device float3 bsdf_translucent_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_translucent_eval_transmit(const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ float *pdf)
{
- const DiffuseBsdf *bsdf = (const DiffuseBsdf*)sc;
- float3 N = bsdf->N;
+ const DiffuseBsdf *bsdf = (const DiffuseBsdf *)sc;
+ float3 N = bsdf->N;
- float cos_pi = fmaxf(-dot(N, omega_in), 0.0f) * M_1_PI_F;
- *pdf = cos_pi;
- return make_float3 (cos_pi, cos_pi, cos_pi);
+ float cos_pi = fmaxf(-dot(N, omega_in), 0.0f) * M_1_PI_F;
+ *pdf = cos_pi;
+ return make_float3(cos_pi, cos_pi, cos_pi);
}
-ccl_device int bsdf_translucent_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
+ccl_device int bsdf_translucent_sample(const ShaderClosure *sc,
+ float3 Ng,
+ float3 I,
+ float3 dIdx,
+ float3 dIdy,
+ float randu,
+ float randv,
+ float3 *eval,
+ float3 *omega_in,
+ float3 *domega_in_dx,
+ float3 *domega_in_dy,
+ float *pdf)
{
- const DiffuseBsdf *bsdf = (const DiffuseBsdf*)sc;
- float3 N = bsdf->N;
-
- // we are viewing the surface from the right side - send a ray out with cosine
- // distribution over the hemisphere
- sample_cos_hemisphere (-N, randu, randv, omega_in, pdf);
- if(dot(Ng, *omega_in) < 0) {
- *eval = make_float3(*pdf, *pdf, *pdf);
+ const DiffuseBsdf *bsdf = (const DiffuseBsdf *)sc;
+ float3 N = bsdf->N;
+
+ // we are viewing the surface from the right side - send a ray out with cosine
+ // distribution over the hemisphere
+ sample_cos_hemisphere(-N, randu, randv, omega_in, pdf);
+ if (dot(Ng, *omega_in) < 0) {
+ *eval = make_float3(*pdf, *pdf, *pdf);
#ifdef __RAY_DIFFERENTIALS__
- // TODO: find a better approximation for the diffuse bounce
- *domega_in_dx = -((2 * dot(N, dIdx)) * N - dIdx);
- *domega_in_dy = -((2 * dot(N, dIdy)) * N - dIdy);
+ // TODO: find a better approximation for the diffuse bounce
+ *domega_in_dx = -((2 * dot(N, dIdx)) * N - dIdx);
+ *domega_in_dy = -((2 * dot(N, dIdy)) * N - dIdy);
#endif
- }
- else {
- *pdf = 0;
- }
- return LABEL_TRANSMIT|LABEL_DIFFUSE;
+ }
+ else {
+ *pdf = 0;
+ }
+ return LABEL_TRANSMIT | LABEL_DIFFUSE;
}
CCL_NAMESPACE_END
-#endif /* __BSDF_DIFFUSE_H__ */
+#endif /* __BSDF_DIFFUSE_H__ */
diff --git a/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h b/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h
index ca33a5b275c..9d13eb8d4e0 100644
--- a/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h
+++ b/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h
@@ -38,73 +38,90 @@ CCL_NAMESPACE_BEGIN
#ifdef __OSL__
typedef ccl_addr_space struct DiffuseRampBsdf {
- SHADER_CLOSURE_BASE;
+ SHADER_CLOSURE_BASE;
- float3 *colors;
+ float3 *colors;
} DiffuseRampBsdf;
ccl_device float3 bsdf_diffuse_ramp_get_color(const float3 colors[8], float pos)
{
- int MAXCOLORS = 8;
-
- float npos = pos * (float)(MAXCOLORS - 1);
- int ipos = float_to_int(npos);
- if(ipos < 0)
- return colors[0];
- if(ipos >= (MAXCOLORS - 1))
- return colors[MAXCOLORS - 1];
- float offset = npos - (float)ipos;
- return colors[ipos] * (1.0f - offset) + colors[ipos+1] * offset;
+ int MAXCOLORS = 8;
+
+ float npos = pos * (float)(MAXCOLORS - 1);
+ int ipos = float_to_int(npos);
+ if (ipos < 0)
+ return colors[0];
+ if (ipos >= (MAXCOLORS - 1))
+ return colors[MAXCOLORS - 1];
+ float offset = npos - (float)ipos;
+ return colors[ipos] * (1.0f - offset) + colors[ipos + 1] * offset;
}
ccl_device int bsdf_diffuse_ramp_setup(DiffuseRampBsdf *bsdf)
{
- bsdf->type = CLOSURE_BSDF_DIFFUSE_RAMP_ID;
- return SD_BSDF|SD_BSDF_HAS_EVAL;
+ bsdf->type = CLOSURE_BSDF_DIFFUSE_RAMP_ID;
+ return SD_BSDF | SD_BSDF_HAS_EVAL;
}
ccl_device void bsdf_diffuse_ramp_blur(ShaderClosure *sc, float roughness)
{
}
-ccl_device float3 bsdf_diffuse_ramp_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_diffuse_ramp_eval_reflect(const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ float *pdf)
{
- const DiffuseRampBsdf *bsdf = (const DiffuseRampBsdf*)sc;
- float3 N = bsdf->N;
+ const DiffuseRampBsdf *bsdf = (const DiffuseRampBsdf *)sc;
+ float3 N = bsdf->N;
- float cos_pi = fmaxf(dot(N, omega_in), 0.0f);
- *pdf = cos_pi * M_1_PI_F;
- return bsdf_diffuse_ramp_get_color(bsdf->colors, cos_pi) * M_1_PI_F;
+ float cos_pi = fmaxf(dot(N, omega_in), 0.0f);
+ *pdf = cos_pi * M_1_PI_F;
+ return bsdf_diffuse_ramp_get_color(bsdf->colors, cos_pi) * M_1_PI_F;
}
-ccl_device float3 bsdf_diffuse_ramp_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_diffuse_ramp_eval_transmit(const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ float *pdf)
{
- return make_float3(0.0f, 0.0f, 0.0f);
+ return make_float3(0.0f, 0.0f, 0.0f);
}
-ccl_device int bsdf_diffuse_ramp_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
+ccl_device int bsdf_diffuse_ramp_sample(const ShaderClosure *sc,
+ float3 Ng,
+ float3 I,
+ float3 dIdx,
+ float3 dIdy,
+ float randu,
+ float randv,
+ float3 *eval,
+ float3 *omega_in,
+ float3 *domega_in_dx,
+ float3 *domega_in_dy,
+ float *pdf)
{
- const DiffuseRampBsdf *bsdf = (const DiffuseRampBsdf*)sc;
- float3 N = bsdf->N;
-
- // distribution over the hemisphere
- sample_cos_hemisphere(N, randu, randv, omega_in, pdf);
-
- if(dot(Ng, *omega_in) > 0.0f) {
- *eval = bsdf_diffuse_ramp_get_color(bsdf->colors, *pdf * M_PI_F) * M_1_PI_F;
-#ifdef __RAY_DIFFERENTIALS__
- *domega_in_dx = (2 * dot(N, dIdx)) * N - dIdx;
- *domega_in_dy = (2 * dot(N, dIdy)) * N - dIdy;
-#endif
- }
- else
- *pdf = 0.0f;
-
- return LABEL_REFLECT|LABEL_DIFFUSE;
+ const DiffuseRampBsdf *bsdf = (const DiffuseRampBsdf *)sc;
+ float3 N = bsdf->N;
+
+ // distribution over the hemisphere
+ sample_cos_hemisphere(N, randu, randv, omega_in, pdf);
+
+ if (dot(Ng, *omega_in) > 0.0f) {
+ *eval = bsdf_diffuse_ramp_get_color(bsdf->colors, *pdf * M_PI_F) * M_1_PI_F;
+# ifdef __RAY_DIFFERENTIALS__
+ *domega_in_dx = (2 * dot(N, dIdx)) * N - dIdx;
+ *domega_in_dy = (2 * dot(N, dIdy)) * N - dIdy;
+# endif
+ }
+ else
+ *pdf = 0.0f;
+
+ return LABEL_REFLECT | LABEL_DIFFUSE;
}
-#endif /* __OSL__ */
+#endif /* __OSL__ */
CCL_NAMESPACE_END
-#endif /* __BSDF_DIFFUSE_RAMP_H__ */
+#endif /* __BSDF_DIFFUSE_RAMP_H__ */
diff --git a/intern/cycles/kernel/closure/bsdf_hair.h b/intern/cycles/kernel/closure/bsdf_hair.h
index e1a0cfaa3f5..6b2a9a97d30 100644
--- a/intern/cycles/kernel/closure/bsdf_hair.h
+++ b/intern/cycles/kernel/closure/bsdf_hair.h
@@ -36,245 +36,276 @@
CCL_NAMESPACE_BEGIN
typedef ccl_addr_space struct HairBsdf {
- SHADER_CLOSURE_BASE;
+ SHADER_CLOSURE_BASE;
- float3 T;
- float roughness1;
- float roughness2;
- float offset;
+ float3 T;
+ float roughness1;
+ float roughness2;
+ float offset;
} HairBsdf;
ccl_device int bsdf_hair_reflection_setup(HairBsdf *bsdf)
{
- bsdf->type = CLOSURE_BSDF_HAIR_REFLECTION_ID;
- bsdf->roughness1 = clamp(bsdf->roughness1, 0.001f, 1.0f);
- bsdf->roughness2 = clamp(bsdf->roughness2, 0.001f, 1.0f);
- return SD_BSDF|SD_BSDF_HAS_EVAL;
+ bsdf->type = CLOSURE_BSDF_HAIR_REFLECTION_ID;
+ bsdf->roughness1 = clamp(bsdf->roughness1, 0.001f, 1.0f);
+ bsdf->roughness2 = clamp(bsdf->roughness2, 0.001f, 1.0f);
+ return SD_BSDF | SD_BSDF_HAS_EVAL;
}
ccl_device int bsdf_hair_transmission_setup(HairBsdf *bsdf)
{
- bsdf->type = CLOSURE_BSDF_HAIR_TRANSMISSION_ID;
- bsdf->roughness1 = clamp(bsdf->roughness1, 0.001f, 1.0f);
- bsdf->roughness2 = clamp(bsdf->roughness2, 0.001f, 1.0f);
- return SD_BSDF|SD_BSDF_HAS_EVAL;
+ bsdf->type = CLOSURE_BSDF_HAIR_TRANSMISSION_ID;
+ bsdf->roughness1 = clamp(bsdf->roughness1, 0.001f, 1.0f);
+ bsdf->roughness2 = clamp(bsdf->roughness2, 0.001f, 1.0f);
+ return SD_BSDF | SD_BSDF_HAS_EVAL;
}
ccl_device bool bsdf_hair_merge(const ShaderClosure *a, const ShaderClosure *b)
{
- const HairBsdf *bsdf_a = (const HairBsdf*)a;
- const HairBsdf *bsdf_b = (const HairBsdf*)b;
+ const HairBsdf *bsdf_a = (const HairBsdf *)a;
+ const HairBsdf *bsdf_b = (const HairBsdf *)b;
- return (isequal_float3(bsdf_a->T, bsdf_b->T)) &&
- (bsdf_a->roughness1 == bsdf_b->roughness1) &&
- (bsdf_a->roughness2 == bsdf_b->roughness2) &&
- (bsdf_a->offset == bsdf_b->offset);
+ return (isequal_float3(bsdf_a->T, bsdf_b->T)) && (bsdf_a->roughness1 == bsdf_b->roughness1) &&
+ (bsdf_a->roughness2 == bsdf_b->roughness2) && (bsdf_a->offset == bsdf_b->offset);
}
-ccl_device float3 bsdf_hair_reflection_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_hair_reflection_eval_reflect(const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ float *pdf)
{
- const HairBsdf *bsdf = (const HairBsdf*)sc;
- float offset = bsdf->offset;
- float3 Tg = bsdf->T;
- float roughness1 = bsdf->roughness1;
- float roughness2 = bsdf->roughness2;
+ const HairBsdf *bsdf = (const HairBsdf *)sc;
+ float offset = bsdf->offset;
+ float3 Tg = bsdf->T;
+ float roughness1 = bsdf->roughness1;
+ float roughness2 = bsdf->roughness2;
- float Iz = dot(Tg, I);
- float3 locy = normalize(I - Tg * Iz);
+ float Iz = dot(Tg, I);
+ float3 locy = normalize(I - Tg * Iz);
- float theta_r = M_PI_2_F - fast_acosf(Iz);
+ float theta_r = M_PI_2_F - fast_acosf(Iz);
- float omega_in_z = dot(Tg, omega_in);
- float3 omega_in_y = normalize(omega_in - Tg * omega_in_z);
+ float omega_in_z = dot(Tg, omega_in);
+ float3 omega_in_y = normalize(omega_in - Tg * omega_in_z);
- float theta_i = M_PI_2_F - fast_acosf(omega_in_z);
- float cosphi_i = dot(omega_in_y, locy);
+ float theta_i = M_PI_2_F - fast_acosf(omega_in_z);
+ float cosphi_i = dot(omega_in_y, locy);
- if(M_PI_2_F - fabsf(theta_i) < 0.001f || cosphi_i < 0.0f) {
- *pdf = 0.0f;
- return make_float3(*pdf, *pdf, *pdf);
- }
+ if (M_PI_2_F - fabsf(theta_i) < 0.001f || cosphi_i < 0.0f) {
+ *pdf = 0.0f;
+ return make_float3(*pdf, *pdf, *pdf);
+ }
- float roughness1_inv = 1.0f / roughness1;
- float roughness2_inv = 1.0f / roughness2;
- float phi_i = fast_acosf(cosphi_i) * roughness2_inv;
- phi_i = fabsf(phi_i) < M_PI_F ? phi_i : M_PI_F;
- float costheta_i = fast_cosf(theta_i);
+ float roughness1_inv = 1.0f / roughness1;
+ float roughness2_inv = 1.0f / roughness2;
+ float phi_i = fast_acosf(cosphi_i) * roughness2_inv;
+ phi_i = fabsf(phi_i) < M_PI_F ? phi_i : M_PI_F;
+ float costheta_i = fast_cosf(theta_i);
- float a_R = fast_atan2f(((M_PI_2_F + theta_r) * 0.5f - offset) * roughness1_inv, 1.0f);
- float b_R = fast_atan2f(((-M_PI_2_F + theta_r) * 0.5f - offset) * roughness1_inv, 1.0f);
+ float a_R = fast_atan2f(((M_PI_2_F + theta_r) * 0.5f - offset) * roughness1_inv, 1.0f);
+ float b_R = fast_atan2f(((-M_PI_2_F + theta_r) * 0.5f - offset) * roughness1_inv, 1.0f);
- float theta_h = (theta_i + theta_r) * 0.5f;
- float t = theta_h - offset;
+ float theta_h = (theta_i + theta_r) * 0.5f;
+ float t = theta_h - offset;
- float phi_pdf = fast_cosf(phi_i * 0.5f) * 0.25f * roughness2_inv;
- float theta_pdf = roughness1 / (2 * (t*t + roughness1*roughness1) * (a_R - b_R)* costheta_i);
- *pdf = phi_pdf * theta_pdf;
+ float phi_pdf = fast_cosf(phi_i * 0.5f) * 0.25f * roughness2_inv;
+ float theta_pdf = roughness1 /
+ (2 * (t * t + roughness1 * roughness1) * (a_R - b_R) * costheta_i);
+ *pdf = phi_pdf * theta_pdf;
- return make_float3(*pdf, *pdf, *pdf);
+ return make_float3(*pdf, *pdf, *pdf);
}
-ccl_device float3 bsdf_hair_transmission_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_hair_transmission_eval_reflect(const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ float *pdf)
{
- return make_float3(0.0f, 0.0f, 0.0f);
+ return make_float3(0.0f, 0.0f, 0.0f);
}
-
-ccl_device float3 bsdf_hair_reflection_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_hair_reflection_eval_transmit(const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ float *pdf)
{
- return make_float3(0.0f, 0.0f, 0.0f);
+ return make_float3(0.0f, 0.0f, 0.0f);
}
-ccl_device float3 bsdf_hair_transmission_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_hair_transmission_eval_transmit(const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ float *pdf)
{
- const HairBsdf *bsdf = (const HairBsdf*)sc;
- float offset = bsdf->offset;
- float3 Tg = bsdf->T;
- float roughness1 = bsdf->roughness1;
- float roughness2 = bsdf->roughness2;
- float Iz = dot(Tg, I);
- float3 locy = normalize(I - Tg * Iz);
+ const HairBsdf *bsdf = (const HairBsdf *)sc;
+ float offset = bsdf->offset;
+ float3 Tg = bsdf->T;
+ float roughness1 = bsdf->roughness1;
+ float roughness2 = bsdf->roughness2;
+ float Iz = dot(Tg, I);
+ float3 locy = normalize(I - Tg * Iz);
- float theta_r = M_PI_2_F - fast_acosf(Iz);
+ float theta_r = M_PI_2_F - fast_acosf(Iz);
- float omega_in_z = dot(Tg, omega_in);
- float3 omega_in_y = normalize(omega_in - Tg * omega_in_z);
+ float omega_in_z = dot(Tg, omega_in);
+ float3 omega_in_y = normalize(omega_in - Tg * omega_in_z);
- float theta_i = M_PI_2_F - fast_acosf(omega_in_z);
- float phi_i = fast_acosf(dot(omega_in_y, locy));
+ float theta_i = M_PI_2_F - fast_acosf(omega_in_z);
+ float phi_i = fast_acosf(dot(omega_in_y, locy));
- if(M_PI_2_F - fabsf(theta_i) < 0.001f) {
- *pdf = 0.0f;
- return make_float3(*pdf, *pdf, *pdf);
- }
+ if (M_PI_2_F - fabsf(theta_i) < 0.001f) {
+ *pdf = 0.0f;
+ return make_float3(*pdf, *pdf, *pdf);
+ }
- float costheta_i = fast_cosf(theta_i);
+ float costheta_i = fast_cosf(theta_i);
- float roughness1_inv = 1.0f / roughness1;
- float a_TT = fast_atan2f(((M_PI_2_F + theta_r)/2 - offset) * roughness1_inv, 1.0f);
- float b_TT = fast_atan2f(((-M_PI_2_F + theta_r)/2 - offset) * roughness1_inv, 1.0f);
- float c_TT = 2 * fast_atan2f(M_PI_2_F / roughness2, 1.0f);
+ float roughness1_inv = 1.0f / roughness1;
+ float a_TT = fast_atan2f(((M_PI_2_F + theta_r) / 2 - offset) * roughness1_inv, 1.0f);
+ float b_TT = fast_atan2f(((-M_PI_2_F + theta_r) / 2 - offset) * roughness1_inv, 1.0f);
+ float c_TT = 2 * fast_atan2f(M_PI_2_F / roughness2, 1.0f);
- float theta_h = (theta_i + theta_r) / 2;
- float t = theta_h - offset;
- float phi = fabsf(phi_i);
+ float theta_h = (theta_i + theta_r) / 2;
+ float t = theta_h - offset;
+ float phi = fabsf(phi_i);
- float p = M_PI_F - phi;
- float theta_pdf = roughness1 / (2 * (t*t + roughness1 * roughness1) * (a_TT - b_TT)*costheta_i);
- float phi_pdf = roughness2 / (c_TT * (p * p + roughness2 * roughness2));
+ float p = M_PI_F - phi;
+ float theta_pdf = roughness1 /
+ (2 * (t * t + roughness1 * roughness1) * (a_TT - b_TT) * costheta_i);
+ float phi_pdf = roughness2 / (c_TT * (p * p + roughness2 * roughness2));
- *pdf = phi_pdf * theta_pdf;
- return make_float3(*pdf, *pdf, *pdf);
+ *pdf = phi_pdf * theta_pdf;
+ return make_float3(*pdf, *pdf, *pdf);
}
-ccl_device int bsdf_hair_reflection_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
+ccl_device int bsdf_hair_reflection_sample(const ShaderClosure *sc,
+ float3 Ng,
+ float3 I,
+ float3 dIdx,
+ float3 dIdy,
+ float randu,
+ float randv,
+ float3 *eval,
+ float3 *omega_in,
+ float3 *domega_in_dx,
+ float3 *domega_in_dy,
+ float *pdf)
{
- const HairBsdf *bsdf = (const HairBsdf*)sc;
- float offset = bsdf->offset;
- float3 Tg = bsdf->T;
- float roughness1 = bsdf->roughness1;
- float roughness2 = bsdf->roughness2;
- float Iz = dot(Tg, I);
- float3 locy = normalize(I - Tg * Iz);
- float3 locx = cross(locy, Tg);
- float theta_r = M_PI_2_F - fast_acosf(Iz);
+ const HairBsdf *bsdf = (const HairBsdf *)sc;
+ float offset = bsdf->offset;
+ float3 Tg = bsdf->T;
+ float roughness1 = bsdf->roughness1;
+ float roughness2 = bsdf->roughness2;
+ float Iz = dot(Tg, I);
+ float3 locy = normalize(I - Tg * Iz);
+ float3 locx = cross(locy, Tg);
+ float theta_r = M_PI_2_F - fast_acosf(Iz);
- float roughness1_inv = 1.0f / roughness1;
- float a_R = fast_atan2f(((M_PI_2_F + theta_r) * 0.5f - offset) * roughness1_inv, 1.0f);
- float b_R = fast_atan2f(((-M_PI_2_F + theta_r) * 0.5f - offset) * roughness1_inv, 1.0f);
+ float roughness1_inv = 1.0f / roughness1;
+ float a_R = fast_atan2f(((M_PI_2_F + theta_r) * 0.5f - offset) * roughness1_inv, 1.0f);
+ float b_R = fast_atan2f(((-M_PI_2_F + theta_r) * 0.5f - offset) * roughness1_inv, 1.0f);
- float t = roughness1 * tanf(randu * (a_R - b_R) + b_R);
+ float t = roughness1 * tanf(randu * (a_R - b_R) + b_R);
- float theta_h = t + offset;
- float theta_i = 2 * theta_h - theta_r;
+ float theta_h = t + offset;
+ float theta_i = 2 * theta_h - theta_r;
- float costheta_i, sintheta_i;
- fast_sincosf(theta_i, &sintheta_i, &costheta_i);
+ float costheta_i, sintheta_i;
+ fast_sincosf(theta_i, &sintheta_i, &costheta_i);
- float phi = 2 * safe_asinf(1 - 2 * randv) * roughness2;
+ float phi = 2 * safe_asinf(1 - 2 * randv) * roughness2;
- float phi_pdf = fast_cosf(phi * 0.5f) * 0.25f / roughness2;
+ float phi_pdf = fast_cosf(phi * 0.5f) * 0.25f / roughness2;
- float theta_pdf = roughness1 / (2 * (t*t + roughness1*roughness1) * (a_R - b_R)*costheta_i);
+ float theta_pdf = roughness1 /
+ (2 * (t * t + roughness1 * roughness1) * (a_R - b_R) * costheta_i);
- float sinphi, cosphi;
- fast_sincosf(phi, &sinphi, &cosphi);
- *omega_in =(cosphi * costheta_i) * locy -
- (sinphi * costheta_i) * locx +
- ( sintheta_i) * Tg;
+ float sinphi, cosphi;
+ fast_sincosf(phi, &sinphi, &cosphi);
+ *omega_in = (cosphi * costheta_i) * locy - (sinphi * costheta_i) * locx + (sintheta_i)*Tg;
- //differentials - TODO: find a better approximation for the reflective bounce
+ //differentials - TODO: find a better approximation for the reflective bounce
#ifdef __RAY_DIFFERENTIALS__
- *domega_in_dx = 2 * dot(locy, dIdx) * locy - dIdx;
- *domega_in_dy = 2 * dot(locy, dIdy) * locy - dIdy;
+ *domega_in_dx = 2 * dot(locy, dIdx) * locy - dIdx;
+ *domega_in_dy = 2 * dot(locy, dIdy) * locy - dIdy;
#endif
- *pdf = fabsf(phi_pdf * theta_pdf);
- if(M_PI_2_F - fabsf(theta_i) < 0.001f)
- *pdf = 0.0f;
+ *pdf = fabsf(phi_pdf * theta_pdf);
+ if (M_PI_2_F - fabsf(theta_i) < 0.001f)
+ *pdf = 0.0f;
- *eval = make_float3(*pdf, *pdf, *pdf);
+ *eval = make_float3(*pdf, *pdf, *pdf);
- return LABEL_REFLECT|LABEL_GLOSSY;
+ return LABEL_REFLECT | LABEL_GLOSSY;
}
-ccl_device int bsdf_hair_transmission_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
+ccl_device int bsdf_hair_transmission_sample(const ShaderClosure *sc,
+ float3 Ng,
+ float3 I,
+ float3 dIdx,
+ float3 dIdy,
+ float randu,
+ float randv,
+ float3 *eval,
+ float3 *omega_in,
+ float3 *domega_in_dx,
+ float3 *domega_in_dy,
+ float *pdf)
{
- const HairBsdf *bsdf = (const HairBsdf*)sc;
- float offset = bsdf->offset;
- float3 Tg = bsdf->T;
- float roughness1 = bsdf->roughness1;
- float roughness2 = bsdf->roughness2;
- float Iz = dot(Tg, I);
- float3 locy = normalize(I - Tg * Iz);
- float3 locx = cross(locy, Tg);
- float theta_r = M_PI_2_F - fast_acosf(Iz);
-
- float roughness1_inv = 1.0f / roughness1;
- float a_TT = fast_atan2f(((M_PI_2_F + theta_r)/2 - offset) * roughness1_inv, 1.0f);
- float b_TT = fast_atan2f(((-M_PI_2_F + theta_r)/2 - offset) * roughness1_inv, 1.0f);
- float c_TT = 2 * fast_atan2f(M_PI_2_F / roughness2, 1.0f);
-
- float t = roughness1 * tanf(randu * (a_TT - b_TT) + b_TT);
-
- float theta_h = t + offset;
- float theta_i = 2 * theta_h - theta_r;
-
- float costheta_i, sintheta_i;
- fast_sincosf(theta_i, &sintheta_i, &costheta_i);
-
- float p = roughness2 * tanf(c_TT * (randv - 0.5f));
- float phi = p + M_PI_F;
- float theta_pdf = roughness1 / (2 * (t*t + roughness1*roughness1) * (a_TT - b_TT) * costheta_i);
- float phi_pdf = roughness2 / (c_TT * (p * p + roughness2 * roughness2));
-
- float sinphi, cosphi;
- fast_sincosf(phi, &sinphi, &cosphi);
- *omega_in =(cosphi * costheta_i) * locy -
- (sinphi * costheta_i) * locx +
- ( sintheta_i) * Tg;
-
- //differentials - TODO: find a better approximation for the transmission bounce
+ const HairBsdf *bsdf = (const HairBsdf *)sc;
+ float offset = bsdf->offset;
+ float3 Tg = bsdf->T;
+ float roughness1 = bsdf->roughness1;
+ float roughness2 = bsdf->roughness2;
+ float Iz = dot(Tg, I);
+ float3 locy = normalize(I - Tg * Iz);
+ float3 locx = cross(locy, Tg);
+ float theta_r = M_PI_2_F - fast_acosf(Iz);
+
+ float roughness1_inv = 1.0f / roughness1;
+ float a_TT = fast_atan2f(((M_PI_2_F + theta_r) / 2 - offset) * roughness1_inv, 1.0f);
+ float b_TT = fast_atan2f(((-M_PI_2_F + theta_r) / 2 - offset) * roughness1_inv, 1.0f);
+ float c_TT = 2 * fast_atan2f(M_PI_2_F / roughness2, 1.0f);
+
+ float t = roughness1 * tanf(randu * (a_TT - b_TT) + b_TT);
+
+ float theta_h = t + offset;
+ float theta_i = 2 * theta_h - theta_r;
+
+ float costheta_i, sintheta_i;
+ fast_sincosf(theta_i, &sintheta_i, &costheta_i);
+
+ float p = roughness2 * tanf(c_TT * (randv - 0.5f));
+ float phi = p + M_PI_F;
+ float theta_pdf = roughness1 /
+ (2 * (t * t + roughness1 * roughness1) * (a_TT - b_TT) * costheta_i);
+ float phi_pdf = roughness2 / (c_TT * (p * p + roughness2 * roughness2));
+
+ float sinphi, cosphi;
+ fast_sincosf(phi, &sinphi, &cosphi);
+ *omega_in = (cosphi * costheta_i) * locy - (sinphi * costheta_i) * locx + (sintheta_i)*Tg;
+
+ //differentials - TODO: find a better approximation for the transmission bounce
#ifdef __RAY_DIFFERENTIALS__
- *domega_in_dx = 2 * dot(locy, dIdx) * locy - dIdx;
- *domega_in_dy = 2 * dot(locy, dIdy) * locy - dIdy;
+ *domega_in_dx = 2 * dot(locy, dIdx) * locy - dIdx;
+ *domega_in_dy = 2 * dot(locy, dIdy) * locy - dIdy;
#endif
- *pdf = fabsf(phi_pdf * theta_pdf);
- if(M_PI_2_F - fabsf(theta_i) < 0.001f) {
- *pdf = 0.0f;
- }
+ *pdf = fabsf(phi_pdf * theta_pdf);
+ if (M_PI_2_F - fabsf(theta_i) < 0.001f) {
+ *pdf = 0.0f;
+ }
- *eval = make_float3(*pdf, *pdf, *pdf);
+ *eval = make_float3(*pdf, *pdf, *pdf);
- /* TODO(sergey): Should always be negative, but seems some precision issue
- * is involved here.
- */
- kernel_assert(dot(locy, *omega_in) < 1e-4f);
+ /* TODO(sergey): Should always be negative, but seems some precision issue
+ * is involved here.
+ */
+ kernel_assert(dot(locy, *omega_in) < 1e-4f);
- return LABEL_TRANSMIT|LABEL_GLOSSY;
+ return LABEL_TRANSMIT | LABEL_GLOSSY;
}
CCL_NAMESPACE_END
-#endif /* __BSDF_HAIR_H__ */
+#endif /* __BSDF_HAIR_H__ */
diff --git a/intern/cycles/kernel/closure/bsdf_hair_principled.h b/intern/cycles/kernel/closure/bsdf_hair_principled.h
index 68335ee887a..a4bba2fbf6c 100644
--- a/intern/cycles/kernel/closure/bsdf_hair_principled.h
+++ b/intern/cycles/kernel/closure/bsdf_hair_principled.h
@@ -15,251 +15,245 @@
*/
#ifdef __KERNEL_CPU__
-#include <fenv.h>
+# include <fenv.h>
#endif
#include "kernel/kernel_color.h"
#ifndef __BSDF_HAIR_PRINCIPLED_H__
-#define __BSDF_HAIR_PRINCIPLED_H__
+# define __BSDF_HAIR_PRINCIPLED_H__
CCL_NAMESPACE_BEGIN
typedef ccl_addr_space struct PrincipledHairExtra {
- /* Geometry data. */
- float4 geom;
+ /* Geometry data. */
+ float4 geom;
} PrincipledHairExtra;
typedef ccl_addr_space struct PrincipledHairBSDF {
- SHADER_CLOSURE_BASE;
-
- /* Absorption coefficient. */
- float3 sigma;
- /* Variance of the underlying logistic distribution. */
- float v;
- /* Scale factor of the underlying logistic distribution. */
- float s;
- /* Cuticle tilt angle. */
- float alpha;
- /* IOR. */
- float eta;
- /* Effective variance for the diffuse bounce only. */
- float m0_roughness;
-
- /* Extra closure. */
- PrincipledHairExtra *extra;
+ SHADER_CLOSURE_BASE;
+
+ /* Absorption coefficient. */
+ float3 sigma;
+ /* Variance of the underlying logistic distribution. */
+ float v;
+ /* Scale factor of the underlying logistic distribution. */
+ float s;
+ /* Cuticle tilt angle. */
+ float alpha;
+ /* IOR. */
+ float eta;
+ /* Effective variance for the diffuse bounce only. */
+ float m0_roughness;
+
+ /* Extra closure. */
+ PrincipledHairExtra *extra;
} PrincipledHairBSDF;
-static_assert(sizeof(ShaderClosure) >= sizeof(PrincipledHairBSDF), "PrincipledHairBSDF is too large!");
-static_assert(sizeof(ShaderClosure) >= sizeof(PrincipledHairExtra), "PrincipledHairExtra is too large!");
+static_assert(sizeof(ShaderClosure) >= sizeof(PrincipledHairBSDF),
+ "PrincipledHairBSDF is too large!");
+static_assert(sizeof(ShaderClosure) >= sizeof(PrincipledHairExtra),
+ "PrincipledHairExtra is too large!");
ccl_device_inline float cos_from_sin(const float s)
{
- return safe_sqrtf(1.0f - s*s);
+ return safe_sqrtf(1.0f - s * s);
}
/* Gives the change in direction in the normal plane for the given angles and p-th-order scattering. */
ccl_device_inline float delta_phi(int p, float gamma_o, float gamma_t)
{
- return 2.0f * p * gamma_t - 2.0f * gamma_o + p * M_PI_F;
+ return 2.0f * p * gamma_t - 2.0f * gamma_o + p * M_PI_F;
}
/* Remaps the given angle to [-pi, pi]. */
ccl_device_inline float wrap_angle(float a)
{
- while(a > M_PI_F) {
- a -= M_2PI_F;
- }
- while(a < -M_PI_F) {
- a += M_2PI_F;
- }
- return a;
+ while (a > M_PI_F) {
+ a -= M_2PI_F;
+ }
+ while (a < -M_PI_F) {
+ a += M_2PI_F;
+ }
+ return a;
}
/* Logistic distribution function. */
ccl_device_inline float logistic(float x, float s)
{
- float v = expf(-fabsf(x)/s);
- return v / (s * sqr(1.0f + v));
+ float v = expf(-fabsf(x) / s);
+ return v / (s * sqr(1.0f + v));
}
/* Logistic cumulative density function. */
ccl_device_inline float logistic_cdf(float x, float s)
{
- float arg = -x/s;
- /* expf() overflows if arg >= 89.0. */
- if(arg > 88.0f) {
- return 0.0f;
- }
- else {
- return 1.0f / (1.0f + expf(arg));
- }
+ float arg = -x / s;
+ /* expf() overflows if arg >= 89.0. */
+ if (arg > 88.0f) {
+ return 0.0f;
+ }
+ else {
+ return 1.0f / (1.0f + expf(arg));
+ }
}
/* Numerical approximation to the Bessel function of the first kind. */
ccl_device_inline float bessel_I0(float x)
{
- x = sqr(x);
- float val = 1.0f + 0.25f*x;
- float pow_x_2i = sqr(x);
- uint64_t i_fac_2 = 1;
- int pow_4_i = 16;
- for(int i = 2; i < 10; i++) {
- i_fac_2 *= i*i;
- float newval = val + pow_x_2i / (pow_4_i * i_fac_2);
- if(val == newval) {
- return val;
- }
- val = newval;
- pow_x_2i *= x;
- pow_4_i *= 4;
- }
- return val;
+ x = sqr(x);
+ float val = 1.0f + 0.25f * x;
+ float pow_x_2i = sqr(x);
+ uint64_t i_fac_2 = 1;
+ int pow_4_i = 16;
+ for (int i = 2; i < 10; i++) {
+ i_fac_2 *= i * i;
+ float newval = val + pow_x_2i / (pow_4_i * i_fac_2);
+ if (val == newval) {
+ return val;
+ }
+ val = newval;
+ pow_x_2i *= x;
+ pow_4_i *= 4;
+ }
+ return val;
}
/* Logarithm of the Bessel function of the first kind. */
ccl_device_inline float log_bessel_I0(float x)
{
- if(x > 12.0f) {
- /* log(1/x) == -log(x) iff x > 0.
- * This is only used with positive cosines */
- return x + 0.5f * (1.f / (8.0f * x) - M_LN_2PI_F - logf(x));
- }
- else {
- return logf(bessel_I0(x));
- }
+ if (x > 12.0f) {
+ /* log(1/x) == -log(x) iff x > 0.
+ * This is only used with positive cosines */
+ return x + 0.5f * (1.f / (8.0f * x) - M_LN_2PI_F - logf(x));
+ }
+ else {
+ return logf(bessel_I0(x));
+ }
}
/* Logistic distribution limited to the interval [-pi, pi]. */
ccl_device_inline float trimmed_logistic(float x, float s)
{
- /* The logistic distribution is symmetric and centered around zero,
- * so logistic_cdf(x, s) = 1 - logistic_cdf(-x, s).
- * Therefore, logistic_cdf(x, s)-logistic_cdf(-x, s) = 1 - 2*logistic_cdf(-x, s) */
- float scaling_fac = 1.0f - 2.0f*logistic_cdf(-M_PI_F, s);
- float val = logistic(x, s);
- return safe_divide(val, scaling_fac);
+ /* The logistic distribution is symmetric and centered around zero,
+ * so logistic_cdf(x, s) = 1 - logistic_cdf(-x, s).
+ * Therefore, logistic_cdf(x, s)-logistic_cdf(-x, s) = 1 - 2*logistic_cdf(-x, s) */
+ float scaling_fac = 1.0f - 2.0f * logistic_cdf(-M_PI_F, s);
+ float val = logistic(x, s);
+ return safe_divide(val, scaling_fac);
}
/* Sampling function for the trimmed logistic function. */
ccl_device_inline float sample_trimmed_logistic(float u, float s)
{
- float cdf_minuspi = logistic_cdf(-M_PI_F, s);
- float x = -s*logf(1.0f / (u*(1.0f - 2.0f*cdf_minuspi) + cdf_minuspi) - 1.0f);
- return clamp(x, -M_PI_F, M_PI_F);
+ float cdf_minuspi = logistic_cdf(-M_PI_F, s);
+ float x = -s * logf(1.0f / (u * (1.0f - 2.0f * cdf_minuspi) + cdf_minuspi) - 1.0f);
+ return clamp(x, -M_PI_F, M_PI_F);
}
/* Azimuthal scattering function Np. */
-ccl_device_inline float azimuthal_scattering(float phi,
- int p,
- float s,
- float gamma_o,
- float gamma_t)
+ccl_device_inline float azimuthal_scattering(
+ float phi, int p, float s, float gamma_o, float gamma_t)
{
- float phi_o = wrap_angle(phi - delta_phi(p, gamma_o, gamma_t));
- float val = trimmed_logistic(phi_o, s);
- return val;
+ float phi_o = wrap_angle(phi - delta_phi(p, gamma_o, gamma_t));
+ float val = trimmed_logistic(phi_o, s);
+ return val;
}
/* Longitudinal scattering function Mp. */
-ccl_device_inline float longitudinal_scattering(float sin_theta_i,
- float cos_theta_i,
- float sin_theta_o,
- float cos_theta_o,
- float v)
+ccl_device_inline float longitudinal_scattering(
+ float sin_theta_i, float cos_theta_i, float sin_theta_o, float cos_theta_o, float v)
{
- float inv_v = 1.0f/v;
- float cos_arg = cos_theta_i * cos_theta_o * inv_v;
- float sin_arg = sin_theta_i * sin_theta_o * inv_v;
- if(v <= 0.1f) {
- float i0 = log_bessel_I0(cos_arg);
- float val = expf(i0 - sin_arg - inv_v + 0.6931f + logf(0.5f*inv_v));
- return val;
- }
- else {
- float i0 = bessel_I0(cos_arg);
- float val = (expf(-sin_arg) * i0) / (sinhf(inv_v) * 2.0f * v);
- return val;
- }
+ float inv_v = 1.0f / v;
+ float cos_arg = cos_theta_i * cos_theta_o * inv_v;
+ float sin_arg = sin_theta_i * sin_theta_o * inv_v;
+ if (v <= 0.1f) {
+ float i0 = log_bessel_I0(cos_arg);
+ float val = expf(i0 - sin_arg - inv_v + 0.6931f + logf(0.5f * inv_v));
+ return val;
+ }
+ else {
+ float i0 = bessel_I0(cos_arg);
+ float val = (expf(-sin_arg) * i0) / (sinhf(inv_v) * 2.0f * v);
+ return val;
+ }
}
/* Combine the three values using their luminances. */
ccl_device_inline float4 combine_with_energy(KernelGlobals *kg, float3 c)
{
- return make_float4(c.x, c.y, c.z, linear_rgb_to_gray(kg, c));
+ return make_float4(c.x, c.y, c.z, linear_rgb_to_gray(kg, c));
}
-#ifdef __HAIR__
+# ifdef __HAIR__
/* Set up the hair closure. */
ccl_device int bsdf_principled_hair_setup(ShaderData *sd, PrincipledHairBSDF *bsdf)
{
- bsdf->type = CLOSURE_BSDF_HAIR_PRINCIPLED_ID;
- bsdf->v = clamp(bsdf->v, 0.001f, 1.0f);
- bsdf->s = clamp(bsdf->s, 0.001f, 1.0f);
- /* Apply Primary Reflection Roughness modifier. */
- bsdf->m0_roughness = clamp(bsdf->m0_roughness*bsdf->v, 0.001f, 1.0f);
-
- /* Map from roughness_u and roughness_v to variance and scale factor. */
- bsdf->v = sqr(0.726f*bsdf->v + 0.812f*sqr(bsdf->v) + 3.700f*pow20(bsdf->v));
- bsdf->s = (0.265f*bsdf->s + 1.194f*sqr(bsdf->s) + 5.372f*pow22(bsdf->s))*M_SQRT_PI_8_F;
- bsdf->m0_roughness = sqr(0.726f*bsdf->m0_roughness + 0.812f*sqr(bsdf->m0_roughness) + 3.700f*pow20(bsdf->m0_roughness));
-
- /* Compute local frame, aligned to curve tangent and ray direction. */
- float3 X = safe_normalize(sd->dPdu);
- float3 Y = safe_normalize(cross(X, sd->I));
- float3 Z = safe_normalize(cross(X, Y));
- /* TODO: the solution below works where sd->Ng is the normal
- * pointing from the center of the curve to the shading point.
- * It doesn't work for triangles, see https://developer.blender.org/T43625 */
-
- /* h -1..0..1 means the rays goes from grazing the hair, to hitting it at
- * the center, to grazing the other edge. This is the sine of the angle
- * between sd->Ng and Z, as seen from the tangent X. */
-
- /* TODO: we convert this value to a cosine later and discard the sign, so
- * we could probably save some operations. */
- float h = dot(cross(sd->Ng, X), Z);
-
- kernel_assert(fabsf(h) < 1.0f + 1e-4f);
- kernel_assert(isfinite3_safe(Y));
- kernel_assert(isfinite_safe(h));
-
- bsdf->extra->geom = make_float4(Y.x, Y.y, Y.z, h);
-
- return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_NEEDS_LCG;
+ bsdf->type = CLOSURE_BSDF_HAIR_PRINCIPLED_ID;
+ bsdf->v = clamp(bsdf->v, 0.001f, 1.0f);
+ bsdf->s = clamp(bsdf->s, 0.001f, 1.0f);
+ /* Apply Primary Reflection Roughness modifier. */
+ bsdf->m0_roughness = clamp(bsdf->m0_roughness * bsdf->v, 0.001f, 1.0f);
+
+ /* Map from roughness_u and roughness_v to variance and scale factor. */
+ bsdf->v = sqr(0.726f * bsdf->v + 0.812f * sqr(bsdf->v) + 3.700f * pow20(bsdf->v));
+ bsdf->s = (0.265f * bsdf->s + 1.194f * sqr(bsdf->s) + 5.372f * pow22(bsdf->s)) * M_SQRT_PI_8_F;
+ bsdf->m0_roughness = sqr(0.726f * bsdf->m0_roughness + 0.812f * sqr(bsdf->m0_roughness) +
+ 3.700f * pow20(bsdf->m0_roughness));
+
+ /* Compute local frame, aligned to curve tangent and ray direction. */
+ float3 X = safe_normalize(sd->dPdu);
+ float3 Y = safe_normalize(cross(X, sd->I));
+ float3 Z = safe_normalize(cross(X, Y));
+ /* TODO: the solution below works where sd->Ng is the normal
+ * pointing from the center of the curve to the shading point.
+ * It doesn't work for triangles, see https://developer.blender.org/T43625 */
+
+ /* h -1..0..1 means the rays goes from grazing the hair, to hitting it at
+ * the center, to grazing the other edge. This is the sine of the angle
+ * between sd->Ng and Z, as seen from the tangent X. */
+
+ /* TODO: we convert this value to a cosine later and discard the sign, so
+ * we could probably save some operations. */
+ float h = dot(cross(sd->Ng, X), Z);
+
+ kernel_assert(fabsf(h) < 1.0f + 1e-4f);
+ kernel_assert(isfinite3_safe(Y));
+ kernel_assert(isfinite_safe(h));
+
+ bsdf->extra->geom = make_float4(Y.x, Y.y, Y.z, h);
+
+ return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_NEEDS_LCG;
}
-#endif /* __HAIR__ */
+# endif /* __HAIR__ */
/* Given the Fresnel term and transmittance, generate the attenuation terms for each bounce. */
-ccl_device_inline void hair_attenuation(KernelGlobals *kg,
- float f,
- float3 T,
- float4 *Ap)
+ccl_device_inline void hair_attenuation(KernelGlobals *kg, float f, float3 T, float4 *Ap)
{
- /* Primary specular (R). */
- Ap[0] = make_float4(f, f, f, f);
+ /* Primary specular (R). */
+ Ap[0] = make_float4(f, f, f, f);
- /* Transmission (TT). */
- float3 col = sqr(1.0f - f) * T;
- Ap[1] = combine_with_energy(kg, col);
+ /* Transmission (TT). */
+ float3 col = sqr(1.0f - f) * T;
+ Ap[1] = combine_with_energy(kg, col);
- /* Secondary specular (TRT). */
- col *= T*f;
- Ap[2] = combine_with_energy(kg, col);
+ /* Secondary specular (TRT). */
+ col *= T * f;
+ Ap[2] = combine_with_energy(kg, col);
- /* Residual component (TRRT+). */
- col *= safe_divide_color(T*f, make_float3(1.0f, 1.0f, 1.0f) - T*f);
- Ap[3] = combine_with_energy(kg, col);
+ /* Residual component (TRRT+). */
+ col *= safe_divide_color(T * f, make_float3(1.0f, 1.0f, 1.0f) - T * f);
+ Ap[3] = combine_with_energy(kg, col);
- /* Normalize sampling weights. */
- float totweight = Ap[0].w + Ap[1].w + Ap[2].w + Ap[3].w;
- float fac = safe_divide(1.0f, totweight);
+ /* Normalize sampling weights. */
+ float totweight = Ap[0].w + Ap[1].w + Ap[2].w + Ap[3].w;
+ float fac = safe_divide(1.0f, totweight);
- Ap[0].w *= fac;
- Ap[1].w *= fac;
- Ap[2].w *= fac;
- Ap[3].w *= fac;
+ Ap[0].w *= fac;
+ Ap[1].w *= fac;
+ Ap[2].w *= fac;
+ Ap[3].w *= fac;
}
/* Given the tilt angle, generate the rotated theta_i for the different bounces. */
@@ -268,19 +262,19 @@ ccl_device_inline void hair_alpha_angles(float sin_theta_i,
float alpha,
float *angles)
{
- float sin_1alpha = sinf(alpha);
- float cos_1alpha = cos_from_sin(sin_1alpha);
- float sin_2alpha = 2.0f*sin_1alpha*cos_1alpha;
- float cos_2alpha = sqr(cos_1alpha) - sqr(sin_1alpha);
- float sin_4alpha = 2.0f*sin_2alpha*cos_2alpha;
- float cos_4alpha = sqr(cos_2alpha) - sqr(sin_2alpha);
-
- angles[0] = sin_theta_i*cos_2alpha + cos_theta_i*sin_2alpha;
- angles[1] = fabsf(cos_theta_i*cos_2alpha - sin_theta_i*sin_2alpha);
- angles[2] = sin_theta_i*cos_1alpha - cos_theta_i*sin_1alpha;
- angles[3] = fabsf(cos_theta_i*cos_1alpha + sin_theta_i*sin_1alpha);
- angles[4] = sin_theta_i*cos_4alpha - cos_theta_i*sin_4alpha;
- angles[5] = fabsf(cos_theta_i*cos_4alpha + sin_theta_i*sin_4alpha);
+ float sin_1alpha = sinf(alpha);
+ float cos_1alpha = cos_from_sin(sin_1alpha);
+ float sin_2alpha = 2.0f * sin_1alpha * cos_1alpha;
+ float cos_2alpha = sqr(cos_1alpha) - sqr(sin_1alpha);
+ float sin_4alpha = 2.0f * sin_2alpha * cos_2alpha;
+ float cos_4alpha = sqr(cos_2alpha) - sqr(sin_2alpha);
+
+ angles[0] = sin_theta_i * cos_2alpha + cos_theta_i * sin_2alpha;
+ angles[1] = fabsf(cos_theta_i * cos_2alpha - sin_theta_i * sin_2alpha);
+ angles[2] = sin_theta_i * cos_1alpha - cos_theta_i * sin_1alpha;
+ angles[3] = fabsf(cos_theta_i * cos_1alpha + sin_theta_i * sin_1alpha);
+ angles[4] = sin_theta_i * cos_4alpha - cos_theta_i * sin_4alpha;
+ angles[5] = fabsf(cos_theta_i * cos_4alpha + sin_theta_i * sin_4alpha);
}
/* Evaluation function for our shader. */
@@ -290,75 +284,75 @@ ccl_device float3 bsdf_principled_hair_eval(KernelGlobals *kg,
const float3 omega_in,
float *pdf)
{
- kernel_assert(isfinite3_safe(sd->P) && isfinite_safe(sd->ray_length));
+ kernel_assert(isfinite3_safe(sd->P) && isfinite_safe(sd->ray_length));
- const PrincipledHairBSDF *bsdf = (const PrincipledHairBSDF*) sc;
- float3 Y = float4_to_float3(bsdf->extra->geom);
+ const PrincipledHairBSDF *bsdf = (const PrincipledHairBSDF *)sc;
+ float3 Y = float4_to_float3(bsdf->extra->geom);
- float3 X = safe_normalize(sd->dPdu);
- kernel_assert(fabsf(dot(X, Y)) < 1e-3f);
- float3 Z = safe_normalize(cross(X, Y));
+ float3 X = safe_normalize(sd->dPdu);
+ kernel_assert(fabsf(dot(X, Y)) < 1e-3f);
+ float3 Z = safe_normalize(cross(X, Y));
- float3 wo = make_float3(dot(sd->I, X), dot(sd->I, Y), dot(sd->I, Z));
- float3 wi = make_float3(dot(omega_in, X), dot(omega_in, Y), dot(omega_in, Z));
+ float3 wo = make_float3(dot(sd->I, X), dot(sd->I, Y), dot(sd->I, Z));
+ float3 wi = make_float3(dot(omega_in, X), dot(omega_in, Y), dot(omega_in, Z));
- float sin_theta_o = wo.x;
- float cos_theta_o = cos_from_sin(sin_theta_o);
- float phi_o = atan2f(wo.z, wo.y);
+ float sin_theta_o = wo.x;
+ float cos_theta_o = cos_from_sin(sin_theta_o);
+ float phi_o = atan2f(wo.z, wo.y);
- float sin_theta_t = sin_theta_o / bsdf->eta;
- float cos_theta_t = cos_from_sin(sin_theta_t);
+ float sin_theta_t = sin_theta_o / bsdf->eta;
+ float cos_theta_t = cos_from_sin(sin_theta_t);
- float sin_gamma_o = bsdf->extra->geom.w;
- float cos_gamma_o = cos_from_sin(sin_gamma_o);
- float gamma_o = safe_asinf(sin_gamma_o);
+ float sin_gamma_o = bsdf->extra->geom.w;
+ float cos_gamma_o = cos_from_sin(sin_gamma_o);
+ float gamma_o = safe_asinf(sin_gamma_o);
- float sin_gamma_t = sin_gamma_o * cos_theta_o / sqrtf(sqr(bsdf->eta) - sqr(sin_theta_o));
- float cos_gamma_t = cos_from_sin(sin_gamma_t);
- float gamma_t = safe_asinf(sin_gamma_t);
+ float sin_gamma_t = sin_gamma_o * cos_theta_o / sqrtf(sqr(bsdf->eta) - sqr(sin_theta_o));
+ float cos_gamma_t = cos_from_sin(sin_gamma_t);
+ float gamma_t = safe_asinf(sin_gamma_t);
- float3 T = exp3(-bsdf->sigma * (2.0f * cos_gamma_t / cos_theta_t));
- float4 Ap[4];
- hair_attenuation(kg, fresnel_dielectric_cos(cos_theta_o * cos_gamma_o, bsdf->eta), T, Ap);
+ float3 T = exp3(-bsdf->sigma * (2.0f * cos_gamma_t / cos_theta_t));
+ float4 Ap[4];
+ hair_attenuation(kg, fresnel_dielectric_cos(cos_theta_o * cos_gamma_o, bsdf->eta), T, Ap);
- float sin_theta_i = wi.x;
- float cos_theta_i = cos_from_sin(sin_theta_i);
- float phi_i = atan2f(wi.z, wi.y);
+ float sin_theta_i = wi.x;
+ float cos_theta_i = cos_from_sin(sin_theta_i);
+ float phi_i = atan2f(wi.z, wi.y);
- float phi = phi_i - phi_o;
+ float phi = phi_i - phi_o;
- float angles[6];
- hair_alpha_angles(sin_theta_i, cos_theta_i, bsdf->alpha, angles);
+ float angles[6];
+ hair_alpha_angles(sin_theta_i, cos_theta_i, bsdf->alpha, angles);
- float4 F;
- float Mp, Np;
+ float4 F;
+ float Mp, Np;
- /* Primary specular (R). */
- Mp = longitudinal_scattering(angles[0], angles[1], sin_theta_o, cos_theta_o, bsdf->m0_roughness);
- Np = azimuthal_scattering(phi, 0, bsdf->s, gamma_o, gamma_t);
- F = Ap[0] * Mp * Np;
- kernel_assert(isfinite3_safe(float4_to_float3(F)));
+ /* Primary specular (R). */
+ Mp = longitudinal_scattering(angles[0], angles[1], sin_theta_o, cos_theta_o, bsdf->m0_roughness);
+ Np = azimuthal_scattering(phi, 0, bsdf->s, gamma_o, gamma_t);
+ F = Ap[0] * Mp * Np;
+ kernel_assert(isfinite3_safe(float4_to_float3(F)));
- /* Transmission (TT). */
- Mp = longitudinal_scattering(angles[2], angles[3], sin_theta_o, cos_theta_o, 0.25f*bsdf->v);
- Np = azimuthal_scattering(phi, 1, bsdf->s, gamma_o, gamma_t);
- F += Ap[1] * Mp * Np;
- kernel_assert(isfinite3_safe(float4_to_float3(F)));
+ /* Transmission (TT). */
+ Mp = longitudinal_scattering(angles[2], angles[3], sin_theta_o, cos_theta_o, 0.25f * bsdf->v);
+ Np = azimuthal_scattering(phi, 1, bsdf->s, gamma_o, gamma_t);
+ F += Ap[1] * Mp * Np;
+ kernel_assert(isfinite3_safe(float4_to_float3(F)));
- /* Secondary specular (TRT). */
- Mp = longitudinal_scattering(angles[4], angles[5], sin_theta_o, cos_theta_o, 4.0f*bsdf->v);
- Np = azimuthal_scattering(phi, 2, bsdf->s, gamma_o, gamma_t);
- F += Ap[2] * Mp * Np;
- kernel_assert(isfinite3_safe(float4_to_float3(F)));
+ /* Secondary specular (TRT). */
+ Mp = longitudinal_scattering(angles[4], angles[5], sin_theta_o, cos_theta_o, 4.0f * bsdf->v);
+ Np = azimuthal_scattering(phi, 2, bsdf->s, gamma_o, gamma_t);
+ F += Ap[2] * Mp * Np;
+ kernel_assert(isfinite3_safe(float4_to_float3(F)));
- /* Residual component (TRRT+). */
- Mp = longitudinal_scattering(sin_theta_i, cos_theta_i, sin_theta_o, cos_theta_o, 4.0f*bsdf->v);
- Np = M_1_2PI_F;
- F += Ap[3] * Mp * Np;
- kernel_assert(isfinite3_safe(float4_to_float3(F)));
+ /* Residual component (TRRT+). */
+ Mp = longitudinal_scattering(sin_theta_i, cos_theta_i, sin_theta_o, cos_theta_o, 4.0f * bsdf->v);
+ Np = M_1_2PI_F;
+ F += Ap[3] * Mp * Np;
+ kernel_assert(isfinite3_safe(float4_to_float3(F)));
- *pdf = F.w;
- return float4_to_float3(F);
+ *pdf = F.w;
+ return float4_to_float3(F);
}
/* Sampling function for the hair shader. */
@@ -373,130 +367,131 @@ ccl_device int bsdf_principled_hair_sample(KernelGlobals *kg,
float3 *domega_in_dy,
float *pdf)
{
- PrincipledHairBSDF *bsdf = (PrincipledHairBSDF*) sc;
-
- float3 Y = float4_to_float3(bsdf->extra->geom);
-
- float3 X = safe_normalize(sd->dPdu);
- kernel_assert(fabsf(dot(X, Y)) < 1e-3f);
- float3 Z = safe_normalize(cross(X, Y));
-
- float3 wo = make_float3(dot(sd->I, X), dot(sd->I, Y), dot(sd->I, Z));
-
- float2 u[2];
- u[0] = make_float2(randu, randv);
- u[1].x = lcg_step_float_addrspace(&sd->lcg_state);
- u[1].y = lcg_step_float_addrspace(&sd->lcg_state);
-
- float sin_theta_o = wo.x;
- float cos_theta_o = cos_from_sin(sin_theta_o);
- float phi_o = atan2f(wo.z, wo.y);
-
- float sin_theta_t = sin_theta_o / bsdf->eta;
- float cos_theta_t = cos_from_sin(sin_theta_t);
-
- float sin_gamma_o = bsdf->extra->geom.w;
- float cos_gamma_o = cos_from_sin(sin_gamma_o);
- float gamma_o = safe_asinf(sin_gamma_o);
-
- float sin_gamma_t = sin_gamma_o * cos_theta_o / sqrtf(sqr(bsdf->eta) - sqr(sin_theta_o));
- float cos_gamma_t = cos_from_sin(sin_gamma_t);
- float gamma_t = safe_asinf(sin_gamma_t);
-
- float3 T = exp3(-bsdf->sigma * (2.0f * cos_gamma_t / cos_theta_t));
- float4 Ap[4];
- hair_attenuation(kg, fresnel_dielectric_cos(cos_theta_o * cos_gamma_o, bsdf->eta), T, Ap);
-
- int p = 0;
- for(; p < 3; p++) {
- if(u[0].x < Ap[p].w) {
- break;
- }
- u[0].x -= Ap[p].w;
- }
-
- float v = bsdf->v;
- if(p == 1) {
- v *= 0.25f;
- }
- if(p >= 2) {
- v *= 4.0f;
- }
-
- u[1].x = max(u[1].x, 1e-5f);
- float fac = 1.0f + v*logf(u[1].x + (1.0f - u[1].x)*expf(-2.0f/v));
- float sin_theta_i = -fac * sin_theta_o + cos_from_sin(fac) * cosf(M_2PI_F * u[1].y) * cos_theta_o;
- float cos_theta_i = cos_from_sin(sin_theta_i);
-
- float angles[6];
- if(p < 3) {
- hair_alpha_angles(sin_theta_i, cos_theta_i, -bsdf->alpha, angles);
- sin_theta_i = angles[2*p];
- cos_theta_i = angles[2*p+1];
- }
-
- float phi;
- if(p < 3) {
- phi = delta_phi(p, gamma_o, gamma_t) + sample_trimmed_logistic(u[0].y, bsdf->s);
- }
- else {
- phi = M_2PI_F*u[0].y;
- }
- float phi_i = phi_o + phi;
-
- hair_alpha_angles(sin_theta_i, cos_theta_i, bsdf->alpha, angles);
-
- float4 F;
- float Mp, Np;
-
- /* Primary specular (R). */
- Mp = longitudinal_scattering(angles[0], angles[1], sin_theta_o, cos_theta_o, bsdf->m0_roughness);
- Np = azimuthal_scattering(phi, 0, bsdf->s, gamma_o, gamma_t);
- F = Ap[0] * Mp * Np;
- kernel_assert(isfinite3_safe(float4_to_float3(F)));
-
- /* Transmission (TT). */
- Mp = longitudinal_scattering(angles[2], angles[3], sin_theta_o, cos_theta_o, 0.25f*bsdf->v);
- Np = azimuthal_scattering(phi, 1, bsdf->s, gamma_o, gamma_t);
- F += Ap[1] * Mp * Np;
- kernel_assert(isfinite3_safe(float4_to_float3(F)));
-
- /* Secondary specular (TRT). */
- Mp = longitudinal_scattering(angles[4], angles[5], sin_theta_o, cos_theta_o, 4.0f*bsdf->v);
- Np = azimuthal_scattering(phi, 2, bsdf->s, gamma_o, gamma_t);
- F += Ap[2] * Mp * Np;
- kernel_assert(isfinite3_safe(float4_to_float3(F)));
-
- /* Residual component (TRRT+). */
- Mp = longitudinal_scattering(sin_theta_i, cos_theta_i, sin_theta_o, cos_theta_o, 4.0f*bsdf->v);
- Np = M_1_2PI_F;
- F += Ap[3] * Mp * Np;
- kernel_assert(isfinite3_safe(float4_to_float3(F)));
-
- *eval = float4_to_float3(F);
- *pdf = F.w;
-
- *omega_in = X*sin_theta_i + Y*cos_theta_i*cosf(phi_i) + Z*cos_theta_i*sinf(phi_i);
-
-#ifdef __RAY_DIFFERENTIALS__
- float3 N = safe_normalize(sd->I + *omega_in);
- *domega_in_dx = (2 * dot(N, sd->dI.dx)) * N - sd->dI.dx;
- *domega_in_dy = (2 * dot(N, sd->dI.dy)) * N - sd->dI.dy;
-#endif
-
- return LABEL_GLOSSY|((p == 0)? LABEL_REFLECT : LABEL_TRANSMIT);
+ PrincipledHairBSDF *bsdf = (PrincipledHairBSDF *)sc;
+
+ float3 Y = float4_to_float3(bsdf->extra->geom);
+
+ float3 X = safe_normalize(sd->dPdu);
+ kernel_assert(fabsf(dot(X, Y)) < 1e-3f);
+ float3 Z = safe_normalize(cross(X, Y));
+
+ float3 wo = make_float3(dot(sd->I, X), dot(sd->I, Y), dot(sd->I, Z));
+
+ float2 u[2];
+ u[0] = make_float2(randu, randv);
+ u[1].x = lcg_step_float_addrspace(&sd->lcg_state);
+ u[1].y = lcg_step_float_addrspace(&sd->lcg_state);
+
+ float sin_theta_o = wo.x;
+ float cos_theta_o = cos_from_sin(sin_theta_o);
+ float phi_o = atan2f(wo.z, wo.y);
+
+ float sin_theta_t = sin_theta_o / bsdf->eta;
+ float cos_theta_t = cos_from_sin(sin_theta_t);
+
+ float sin_gamma_o = bsdf->extra->geom.w;
+ float cos_gamma_o = cos_from_sin(sin_gamma_o);
+ float gamma_o = safe_asinf(sin_gamma_o);
+
+ float sin_gamma_t = sin_gamma_o * cos_theta_o / sqrtf(sqr(bsdf->eta) - sqr(sin_theta_o));
+ float cos_gamma_t = cos_from_sin(sin_gamma_t);
+ float gamma_t = safe_asinf(sin_gamma_t);
+
+ float3 T = exp3(-bsdf->sigma * (2.0f * cos_gamma_t / cos_theta_t));
+ float4 Ap[4];
+ hair_attenuation(kg, fresnel_dielectric_cos(cos_theta_o * cos_gamma_o, bsdf->eta), T, Ap);
+
+ int p = 0;
+ for (; p < 3; p++) {
+ if (u[0].x < Ap[p].w) {
+ break;
+ }
+ u[0].x -= Ap[p].w;
+ }
+
+ float v = bsdf->v;
+ if (p == 1) {
+ v *= 0.25f;
+ }
+ if (p >= 2) {
+ v *= 4.0f;
+ }
+
+ u[1].x = max(u[1].x, 1e-5f);
+ float fac = 1.0f + v * logf(u[1].x + (1.0f - u[1].x) * expf(-2.0f / v));
+ float sin_theta_i = -fac * sin_theta_o +
+ cos_from_sin(fac) * cosf(M_2PI_F * u[1].y) * cos_theta_o;
+ float cos_theta_i = cos_from_sin(sin_theta_i);
+
+ float angles[6];
+ if (p < 3) {
+ hair_alpha_angles(sin_theta_i, cos_theta_i, -bsdf->alpha, angles);
+ sin_theta_i = angles[2 * p];
+ cos_theta_i = angles[2 * p + 1];
+ }
+
+ float phi;
+ if (p < 3) {
+ phi = delta_phi(p, gamma_o, gamma_t) + sample_trimmed_logistic(u[0].y, bsdf->s);
+ }
+ else {
+ phi = M_2PI_F * u[0].y;
+ }
+ float phi_i = phi_o + phi;
+
+ hair_alpha_angles(sin_theta_i, cos_theta_i, bsdf->alpha, angles);
+
+ float4 F;
+ float Mp, Np;
+
+ /* Primary specular (R). */
+ Mp = longitudinal_scattering(angles[0], angles[1], sin_theta_o, cos_theta_o, bsdf->m0_roughness);
+ Np = azimuthal_scattering(phi, 0, bsdf->s, gamma_o, gamma_t);
+ F = Ap[0] * Mp * Np;
+ kernel_assert(isfinite3_safe(float4_to_float3(F)));
+
+ /* Transmission (TT). */
+ Mp = longitudinal_scattering(angles[2], angles[3], sin_theta_o, cos_theta_o, 0.25f * bsdf->v);
+ Np = azimuthal_scattering(phi, 1, bsdf->s, gamma_o, gamma_t);
+ F += Ap[1] * Mp * Np;
+ kernel_assert(isfinite3_safe(float4_to_float3(F)));
+
+ /* Secondary specular (TRT). */
+ Mp = longitudinal_scattering(angles[4], angles[5], sin_theta_o, cos_theta_o, 4.0f * bsdf->v);
+ Np = azimuthal_scattering(phi, 2, bsdf->s, gamma_o, gamma_t);
+ F += Ap[2] * Mp * Np;
+ kernel_assert(isfinite3_safe(float4_to_float3(F)));
+
+ /* Residual component (TRRT+). */
+ Mp = longitudinal_scattering(sin_theta_i, cos_theta_i, sin_theta_o, cos_theta_o, 4.0f * bsdf->v);
+ Np = M_1_2PI_F;
+ F += Ap[3] * Mp * Np;
+ kernel_assert(isfinite3_safe(float4_to_float3(F)));
+
+ *eval = float4_to_float3(F);
+ *pdf = F.w;
+
+ *omega_in = X * sin_theta_i + Y * cos_theta_i * cosf(phi_i) + Z * cos_theta_i * sinf(phi_i);
+
+# ifdef __RAY_DIFFERENTIALS__
+ float3 N = safe_normalize(sd->I + *omega_in);
+ *domega_in_dx = (2 * dot(N, sd->dI.dx)) * N - sd->dI.dx;
+ *domega_in_dy = (2 * dot(N, sd->dI.dy)) * N - sd->dI.dy;
+# endif
+
+ return LABEL_GLOSSY | ((p == 0) ? LABEL_REFLECT : LABEL_TRANSMIT);
}
/* Implements Filter Glossy by capping the effective roughness. */
ccl_device void bsdf_principled_hair_blur(ShaderClosure *sc, float roughness)
{
- PrincipledHairBSDF *bsdf = (PrincipledHairBSDF*)sc;
+ PrincipledHairBSDF *bsdf = (PrincipledHairBSDF *)sc;
- bsdf->v = fmaxf(roughness, bsdf->v);
- bsdf->s = fmaxf(roughness, bsdf->s);
- bsdf->m0_roughness = fmaxf(roughness, bsdf->m0_roughness);
+ bsdf->v = fmaxf(roughness, bsdf->v);
+ bsdf->s = fmaxf(roughness, bsdf->s);
+ bsdf->m0_roughness = fmaxf(roughness, bsdf->m0_roughness);
}
CCL_NAMESPACE_END
-#endif /* __BSDF_HAIR_PRINCIPLED_H__ */
+#endif /* __BSDF_HAIR_PRINCIPLED_H__ */
diff --git a/intern/cycles/kernel/closure/bsdf_microfacet.h b/intern/cycles/kernel/closure/bsdf_microfacet.h
index 32b6e50b09a..b4da3123f28 100644
--- a/intern/cycles/kernel/closure/bsdf_microfacet.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet.h
@@ -36,95 +36,98 @@
CCL_NAMESPACE_BEGIN
typedef ccl_addr_space struct MicrofacetExtra {
- float3 color, cspec0;
- float clearcoat;
+ float3 color, cspec0;
+ float clearcoat;
} MicrofacetExtra;
typedef ccl_addr_space struct MicrofacetBsdf {
- SHADER_CLOSURE_BASE;
+ SHADER_CLOSURE_BASE;
- float alpha_x, alpha_y, ior;
- MicrofacetExtra *extra;
- float3 T;
+ float alpha_x, alpha_y, ior;
+ MicrofacetExtra *extra;
+ float3 T;
} MicrofacetBsdf;
/* Beckmann and GGX microfacet importance sampling. */
-ccl_device_inline void microfacet_beckmann_sample_slopes(
- KernelGlobals *kg,
- const float cos_theta_i, const float sin_theta_i,
- float randu, float randv, float *slope_x, float *slope_y,
- float *G1i)
+ccl_device_inline void microfacet_beckmann_sample_slopes(KernelGlobals *kg,
+ const float cos_theta_i,
+ const float sin_theta_i,
+ float randu,
+ float randv,
+ float *slope_x,
+ float *slope_y,
+ float *G1i)
{
- /* special case (normal incidence) */
- if(cos_theta_i >= 0.99999f) {
- const float r = sqrtf(-logf(randu));
- const float phi = M_2PI_F * randv;
- *slope_x = r * cosf(phi);
- *slope_y = r * sinf(phi);
- *G1i = 1.0f;
- return;
- }
-
- /* precomputations */
- const float tan_theta_i = sin_theta_i/cos_theta_i;
- const float inv_a = tan_theta_i;
- const float cot_theta_i = 1.0f/tan_theta_i;
- const float erf_a = fast_erff(cot_theta_i);
- const float exp_a2 = expf(-cot_theta_i*cot_theta_i);
- const float SQRT_PI_INV = 0.56418958354f;
- const float Lambda = 0.5f*(erf_a - 1.0f) + (0.5f*SQRT_PI_INV)*(exp_a2*inv_a);
- const float G1 = 1.0f/(1.0f + Lambda); /* masking */
-
- *G1i = G1;
+ /* special case (normal incidence) */
+ if (cos_theta_i >= 0.99999f) {
+ const float r = sqrtf(-logf(randu));
+ const float phi = M_2PI_F * randv;
+ *slope_x = r * cosf(phi);
+ *slope_y = r * sinf(phi);
+ *G1i = 1.0f;
+ return;
+ }
+
+ /* precomputations */
+ const float tan_theta_i = sin_theta_i / cos_theta_i;
+ const float inv_a = tan_theta_i;
+ const float cot_theta_i = 1.0f / tan_theta_i;
+ const float erf_a = fast_erff(cot_theta_i);
+ const float exp_a2 = expf(-cot_theta_i * cot_theta_i);
+ const float SQRT_PI_INV = 0.56418958354f;
+ const float Lambda = 0.5f * (erf_a - 1.0f) + (0.5f * SQRT_PI_INV) * (exp_a2 * inv_a);
+ const float G1 = 1.0f / (1.0f + Lambda); /* masking */
+
+ *G1i = G1;
#if defined(__KERNEL_GPU__)
- /* Based on paper from Wenzel Jakob
- * An Improved Visible Normal Sampling Routine for the Beckmann Distribution
- *
- * http://www.mitsuba-renderer.org/~wenzel/files/visnormal.pdf
- *
- * Reformulation from OpenShadingLanguage which avoids using inverse
- * trigonometric functions.
- */
-
- /* Sample slope X.
- *
- * Compute a coarse approximation using the approximation:
- * exp(-ierf(x)^2) ~= 1 - x * x
- * solve y = 1 + b + K * (1 - b * b)
- */
- float K = tan_theta_i * SQRT_PI_INV;
- float y_approx = randu * (1.0f + erf_a + K * (1 - erf_a * erf_a));
- float y_exact = randu * (1.0f + erf_a + K * exp_a2);
- float b = K > 0 ? (0.5f - sqrtf(K * (K - y_approx + 1.0f) + 0.25f)) / K : y_approx - 1.0f;
-
- /* Perform newton step to refine toward the true root. */
- float inv_erf = fast_ierff(b);
- float value = 1.0f + b + K * expf(-inv_erf * inv_erf) - y_exact;
- /* Check if we are close enough already,
- * this also avoids NaNs as we get close to the root.
- */
- if(fabsf(value) > 1e-6f) {
- b -= value / (1.0f - inv_erf * tan_theta_i); /* newton step 1. */
- inv_erf = fast_ierff(b);
- value = 1.0f + b + K * expf(-inv_erf * inv_erf) - y_exact;
- b -= value / (1.0f - inv_erf * tan_theta_i); /* newton step 2. */
- /* Compute the slope from the refined value. */
- *slope_x = fast_ierff(b);
- }
- else {
- /* We are close enough already. */
- *slope_x = inv_erf;
- }
- *slope_y = fast_ierff(2.0f*randv - 1.0f);
+ /* Based on paper from Wenzel Jakob
+ * An Improved Visible Normal Sampling Routine for the Beckmann Distribution
+ *
+ * http://www.mitsuba-renderer.org/~wenzel/files/visnormal.pdf
+ *
+ * Reformulation from OpenShadingLanguage which avoids using inverse
+ * trigonometric functions.
+ */
+
+ /* Sample slope X.
+ *
+ * Compute a coarse approximation using the approximation:
+ * exp(-ierf(x)^2) ~= 1 - x * x
+ * solve y = 1 + b + K * (1 - b * b)
+ */
+ float K = tan_theta_i * SQRT_PI_INV;
+ float y_approx = randu * (1.0f + erf_a + K * (1 - erf_a * erf_a));
+ float y_exact = randu * (1.0f + erf_a + K * exp_a2);
+ float b = K > 0 ? (0.5f - sqrtf(K * (K - y_approx + 1.0f) + 0.25f)) / K : y_approx - 1.0f;
+
+ /* Perform newton step to refine toward the true root. */
+ float inv_erf = fast_ierff(b);
+ float value = 1.0f + b + K * expf(-inv_erf * inv_erf) - y_exact;
+ /* Check if we are close enough already,
+ * this also avoids NaNs as we get close to the root.
+ */
+ if (fabsf(value) > 1e-6f) {
+ b -= value / (1.0f - inv_erf * tan_theta_i); /* newton step 1. */
+ inv_erf = fast_ierff(b);
+ value = 1.0f + b + K * expf(-inv_erf * inv_erf) - y_exact;
+ b -= value / (1.0f - inv_erf * tan_theta_i); /* newton step 2. */
+ /* Compute the slope from the refined value. */
+ *slope_x = fast_ierff(b);
+ }
+ else {
+ /* We are close enough already. */
+ *slope_x = inv_erf;
+ }
+ *slope_y = fast_ierff(2.0f * randv - 1.0f);
#else
- /* Use precomputed table on CPU, it gives better perfomance. */
- int beckmann_table_offset = kernel_data.tables.beckmann_offset;
+ /* Use precomputed table on CPU, it gives better perfomance. */
+ int beckmann_table_offset = kernel_data.tables.beckmann_offset;
- *slope_x = lookup_table_read_2D(kg, randu, cos_theta_i,
- beckmann_table_offset, BECKMANN_TABLE_SIZE, BECKMANN_TABLE_SIZE);
- *slope_y = fast_ierff(2.0f*randv - 1.0f);
+ *slope_x = lookup_table_read_2D(
+ kg, randu, cos_theta_i, beckmann_table_offset, BECKMANN_TABLE_SIZE, BECKMANN_TABLE_SIZE);
+ *slope_y = fast_ierff(2.0f * randv - 1.0f);
#endif
}
@@ -134,103 +137,109 @@ ccl_device_inline void microfacet_beckmann_sample_slopes(
* E. Heitz and E. d'Eon, EGSR 2014
*/
-ccl_device_inline void microfacet_ggx_sample_slopes(
- const float cos_theta_i, const float sin_theta_i,
- float randu, float randv, float *slope_x, float *slope_y,
- float *G1i)
+ccl_device_inline void microfacet_ggx_sample_slopes(const float cos_theta_i,
+ const float sin_theta_i,
+ float randu,
+ float randv,
+ float *slope_x,
+ float *slope_y,
+ float *G1i)
{
- /* special case (normal incidence) */
- if(cos_theta_i >= 0.99999f) {
- const float r = sqrtf(randu/(1.0f - randu));
- const float phi = M_2PI_F * randv;
- *slope_x = r * cosf(phi);
- *slope_y = r * sinf(phi);
- *G1i = 1.0f;
-
- return;
- }
-
- /* precomputations */
- const float tan_theta_i = sin_theta_i/cos_theta_i;
- const float G1_inv = 0.5f * (1.0f + safe_sqrtf(1.0f + tan_theta_i*tan_theta_i));
-
- *G1i = 1.0f/G1_inv;
-
- /* sample slope_x */
- const float A = 2.0f*randu*G1_inv - 1.0f;
- const float AA = A*A;
- const float tmp = 1.0f/(AA - 1.0f);
- const float B = tan_theta_i;
- const float BB = B*B;
- const float D = safe_sqrtf(BB*(tmp*tmp) - (AA - BB)*tmp);
- const float slope_x_1 = B*tmp - D;
- const float slope_x_2 = B*tmp + D;
- *slope_x = (A < 0.0f || slope_x_2*tan_theta_i > 1.0f)? slope_x_1: slope_x_2;
-
- /* sample slope_y */
- float S;
-
- if(randv > 0.5f) {
- S = 1.0f;
- randv = 2.0f*(randv - 0.5f);
- }
- else {
- S = -1.0f;
- randv = 2.0f*(0.5f - randv);
- }
-
- const float z = (randv*(randv*(randv*0.27385f - 0.73369f) + 0.46341f)) / (randv*(randv*(randv*0.093073f + 0.309420f) - 1.000000f) + 0.597999f);
- *slope_y = S * z * safe_sqrtf(1.0f + (*slope_x)*(*slope_x));
+ /* special case (normal incidence) */
+ if (cos_theta_i >= 0.99999f) {
+ const float r = sqrtf(randu / (1.0f - randu));
+ const float phi = M_2PI_F * randv;
+ *slope_x = r * cosf(phi);
+ *slope_y = r * sinf(phi);
+ *G1i = 1.0f;
+
+ return;
+ }
+
+ /* precomputations */
+ const float tan_theta_i = sin_theta_i / cos_theta_i;
+ const float G1_inv = 0.5f * (1.0f + safe_sqrtf(1.0f + tan_theta_i * tan_theta_i));
+
+ *G1i = 1.0f / G1_inv;
+
+ /* sample slope_x */
+ const float A = 2.0f * randu * G1_inv - 1.0f;
+ const float AA = A * A;
+ const float tmp = 1.0f / (AA - 1.0f);
+ const float B = tan_theta_i;
+ const float BB = B * B;
+ const float D = safe_sqrtf(BB * (tmp * tmp) - (AA - BB) * tmp);
+ const float slope_x_1 = B * tmp - D;
+ const float slope_x_2 = B * tmp + D;
+ *slope_x = (A < 0.0f || slope_x_2 * tan_theta_i > 1.0f) ? slope_x_1 : slope_x_2;
+
+ /* sample slope_y */
+ float S;
+
+ if (randv > 0.5f) {
+ S = 1.0f;
+ randv = 2.0f * (randv - 0.5f);
+ }
+ else {
+ S = -1.0f;
+ randv = 2.0f * (0.5f - randv);
+ }
+
+ const float z = (randv * (randv * (randv * 0.27385f - 0.73369f) + 0.46341f)) /
+ (randv * (randv * (randv * 0.093073f + 0.309420f) - 1.000000f) + 0.597999f);
+ *slope_y = S * z * safe_sqrtf(1.0f + (*slope_x) * (*slope_x));
}
-ccl_device_forceinline float3 microfacet_sample_stretched(
- KernelGlobals *kg, const float3 omega_i,
- const float alpha_x, const float alpha_y,
- const float randu, const float randv,
- bool beckmann, float *G1i)
+ccl_device_forceinline float3 microfacet_sample_stretched(KernelGlobals *kg,
+ const float3 omega_i,
+ const float alpha_x,
+ const float alpha_y,
+ const float randu,
+ const float randv,
+ bool beckmann,
+ float *G1i)
{
- /* 1. stretch omega_i */
- float3 omega_i_ = make_float3(alpha_x * omega_i.x, alpha_y * omega_i.y, omega_i.z);
- omega_i_ = normalize(omega_i_);
-
- /* get polar coordinates of omega_i_ */
- float costheta_ = 1.0f;
- float sintheta_ = 0.0f;
- float cosphi_ = 1.0f;
- float sinphi_ = 0.0f;
-
- if(omega_i_.z < 0.99999f) {
- costheta_ = omega_i_.z;
- sintheta_ = safe_sqrtf(1.0f - costheta_*costheta_);
-
- float invlen = 1.0f/sintheta_;
- cosphi_ = omega_i_.x * invlen;
- sinphi_ = omega_i_.y * invlen;
- }
-
- /* 2. sample P22_{omega_i}(x_slope, y_slope, 1, 1) */
- float slope_x, slope_y;
-
- if(beckmann) {
- microfacet_beckmann_sample_slopes(kg, costheta_, sintheta_,
- randu, randv, &slope_x, &slope_y, G1i);
- }
- else {
- microfacet_ggx_sample_slopes(costheta_, sintheta_,
- randu, randv, &slope_x, &slope_y, G1i);
- }
-
- /* 3. rotate */
- float tmp = cosphi_*slope_x - sinphi_*slope_y;
- slope_y = sinphi_*slope_x + cosphi_*slope_y;
- slope_x = tmp;
-
- /* 4. unstretch */
- slope_x = alpha_x * slope_x;
- slope_y = alpha_y * slope_y;
-
- /* 5. compute normal */
- return normalize(make_float3(-slope_x, -slope_y, 1.0f));
+ /* 1. stretch omega_i */
+ float3 omega_i_ = make_float3(alpha_x * omega_i.x, alpha_y * omega_i.y, omega_i.z);
+ omega_i_ = normalize(omega_i_);
+
+ /* get polar coordinates of omega_i_ */
+ float costheta_ = 1.0f;
+ float sintheta_ = 0.0f;
+ float cosphi_ = 1.0f;
+ float sinphi_ = 0.0f;
+
+ if (omega_i_.z < 0.99999f) {
+ costheta_ = omega_i_.z;
+ sintheta_ = safe_sqrtf(1.0f - costheta_ * costheta_);
+
+ float invlen = 1.0f / sintheta_;
+ cosphi_ = omega_i_.x * invlen;
+ sinphi_ = omega_i_.y * invlen;
+ }
+
+ /* 2. sample P22_{omega_i}(x_slope, y_slope, 1, 1) */
+ float slope_x, slope_y;
+
+ if (beckmann) {
+ microfacet_beckmann_sample_slopes(
+ kg, costheta_, sintheta_, randu, randv, &slope_x, &slope_y, G1i);
+ }
+ else {
+ microfacet_ggx_sample_slopes(costheta_, sintheta_, randu, randv, &slope_x, &slope_y, G1i);
+ }
+
+ /* 3. rotate */
+ float tmp = cosphi_ * slope_x - sinphi_ * slope_y;
+ slope_y = sinphi_ * slope_x + cosphi_ * slope_y;
+ slope_x = tmp;
+
+ /* 4. unstretch */
+ slope_x = alpha_x * slope_x;
+ slope_y = alpha_y * slope_y;
+
+ /* 5. compute normal */
+ return normalize(make_float3(-slope_x, -slope_y, 1.0f));
}
/* Calculate the reflection color
@@ -240,27 +249,29 @@ ccl_device_forceinline float3 microfacet_sample_stretched(
*
* Else it is simply white
*/
-ccl_device_forceinline float3 reflection_color(const MicrofacetBsdf *bsdf, float3 L, float3 H) {
- float3 F = make_float3(1.0f, 1.0f, 1.0f);
- bool use_fresnel = (bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID
- || bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID
- || bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID);
+ccl_device_forceinline float3 reflection_color(const MicrofacetBsdf *bsdf, float3 L, float3 H)
+{
+ float3 F = make_float3(1.0f, 1.0f, 1.0f);
+ bool use_fresnel = (bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID ||
+ bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID ||
+ bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID);
- if(use_fresnel) {
- float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior);
+ if (use_fresnel) {
+ float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior);
- F = interpolate_fresnel_color(L, H, bsdf->ior, F0, bsdf->extra->cspec0);
- }
+ F = interpolate_fresnel_color(L, H, bsdf->ior, F0, bsdf->extra->cspec0);
+ }
- return F;
+ return F;
}
ccl_device_forceinline float D_GTR1(float NdotH, float alpha)
{
- if(alpha >= 1.0f) return M_1_PI_F;
- float alpha2 = alpha*alpha;
- float t = 1.0f + (alpha2 - 1.0f) * NdotH*NdotH;
- return (alpha2 - 1.0f) / (M_PI_F * logf(alpha2) * t);
+ if (alpha >= 1.0f)
+ return M_1_PI_F;
+ float alpha2 = alpha * alpha;
+ float t = 1.0f + (alpha2 - 1.0f) * NdotH * NdotH;
+ return (alpha2 - 1.0f) / (M_PI_F * logf(alpha2) * t);
}
/* GGX microfacet with Smith shadow-masking from:
@@ -278,483 +289,511 @@ ccl_device_forceinline float D_GTR1(float NdotH, float alpha)
ccl_device int bsdf_microfacet_ggx_setup(MicrofacetBsdf *bsdf)
{
- bsdf->extra = NULL;
+ bsdf->extra = NULL;
- bsdf->alpha_x = saturate(bsdf->alpha_x);
- bsdf->alpha_y = bsdf->alpha_x;
+ bsdf->alpha_x = saturate(bsdf->alpha_x);
+ bsdf->alpha_y = bsdf->alpha_x;
- bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_ID;
+ bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_ID;
- return SD_BSDF|SD_BSDF_HAS_EVAL;
+ return SD_BSDF | SD_BSDF_HAS_EVAL;
}
ccl_device int bsdf_microfacet_ggx_fresnel_setup(MicrofacetBsdf *bsdf, const ShaderData *sd)
{
- bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x);
- bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y);
- bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z);
+ bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x);
+ bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y);
+ bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z);
- float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior);
- float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0));
- bsdf->sample_weight *= F;
+ float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior);
+ float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0));
+ bsdf->sample_weight *= F;
- bsdf->alpha_x = saturate(bsdf->alpha_x);
- bsdf->alpha_y = bsdf->alpha_x;
+ bsdf->alpha_x = saturate(bsdf->alpha_x);
+ bsdf->alpha_y = bsdf->alpha_x;
- bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID;
+ bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID;
- return SD_BSDF|SD_BSDF_HAS_EVAL;
+ return SD_BSDF | SD_BSDF_HAS_EVAL;
}
ccl_device int bsdf_microfacet_ggx_clearcoat_setup(MicrofacetBsdf *bsdf, const ShaderData *sd)
{
- bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x);
- bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y);
- bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z);
+ bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x);
+ bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y);
+ bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z);
- float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior);
- float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0));
- bsdf->sample_weight *= 0.25f * bsdf->extra->clearcoat * F;
+ float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior);
+ float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0));
+ bsdf->sample_weight *= 0.25f * bsdf->extra->clearcoat * F;
- bsdf->alpha_x = saturate(bsdf->alpha_x);
- bsdf->alpha_y = bsdf->alpha_x;
+ bsdf->alpha_x = saturate(bsdf->alpha_x);
+ bsdf->alpha_y = bsdf->alpha_x;
- bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID;
+ bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID;
- return SD_BSDF|SD_BSDF_HAS_EVAL;
+ return SD_BSDF | SD_BSDF_HAS_EVAL;
}
ccl_device bool bsdf_microfacet_merge(const ShaderClosure *a, const ShaderClosure *b)
{
- const MicrofacetBsdf *bsdf_a = (const MicrofacetBsdf*)a;
- const MicrofacetBsdf *bsdf_b = (const MicrofacetBsdf*)b;
-
- return (isequal_float3(bsdf_a->N, bsdf_b->N)) &&
- (bsdf_a->alpha_x == bsdf_b->alpha_x) &&
- (bsdf_a->alpha_y == bsdf_b->alpha_y) &&
- (isequal_float3(bsdf_a->T, bsdf_b->T)) &&
- (bsdf_a->ior == bsdf_b->ior) &&
- ((bsdf_a->extra == NULL && bsdf_b->extra == NULL) ||
- ((bsdf_a->extra && bsdf_b->extra) &&
- (isequal_float3(bsdf_a->extra->color, bsdf_b->extra->color)) &&
- (isequal_float3(bsdf_a->extra->cspec0, bsdf_b->extra->cspec0)) &&
- (bsdf_a->extra->clearcoat == bsdf_b->extra->clearcoat)));
+ const MicrofacetBsdf *bsdf_a = (const MicrofacetBsdf *)a;
+ const MicrofacetBsdf *bsdf_b = (const MicrofacetBsdf *)b;
+
+ return (isequal_float3(bsdf_a->N, bsdf_b->N)) && (bsdf_a->alpha_x == bsdf_b->alpha_x) &&
+ (bsdf_a->alpha_y == bsdf_b->alpha_y) && (isequal_float3(bsdf_a->T, bsdf_b->T)) &&
+ (bsdf_a->ior == bsdf_b->ior) &&
+ ((bsdf_a->extra == NULL && bsdf_b->extra == NULL) ||
+ ((bsdf_a->extra && bsdf_b->extra) &&
+ (isequal_float3(bsdf_a->extra->color, bsdf_b->extra->color)) &&
+ (isequal_float3(bsdf_a->extra->cspec0, bsdf_b->extra->cspec0)) &&
+ (bsdf_a->extra->clearcoat == bsdf_b->extra->clearcoat)));
}
ccl_device int bsdf_microfacet_ggx_aniso_setup(MicrofacetBsdf *bsdf)
{
- bsdf->extra = NULL;
+ bsdf->extra = NULL;
- bsdf->alpha_x = saturate(bsdf->alpha_x);
- bsdf->alpha_y = saturate(bsdf->alpha_y);
+ bsdf->alpha_x = saturate(bsdf->alpha_x);
+ bsdf->alpha_y = saturate(bsdf->alpha_y);
- bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID;
+ bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID;
- return SD_BSDF|SD_BSDF_HAS_EVAL;
+ return SD_BSDF | SD_BSDF_HAS_EVAL;
}
ccl_device int bsdf_microfacet_ggx_aniso_fresnel_setup(MicrofacetBsdf *bsdf, const ShaderData *sd)
{
- bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x);
- bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y);
- bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z);
+ bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x);
+ bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y);
+ bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z);
- float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior);
- float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0));
- bsdf->sample_weight *= F;
+ float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior);
+ float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0));
+ bsdf->sample_weight *= F;
- bsdf->alpha_x = saturate(bsdf->alpha_x);
- bsdf->alpha_y = saturate(bsdf->alpha_y);
+ bsdf->alpha_x = saturate(bsdf->alpha_x);
+ bsdf->alpha_y = saturate(bsdf->alpha_y);
- bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID;
+ bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID;
- return SD_BSDF|SD_BSDF_HAS_EVAL;
+ return SD_BSDF | SD_BSDF_HAS_EVAL;
}
ccl_device int bsdf_microfacet_ggx_refraction_setup(MicrofacetBsdf *bsdf)
{
- bsdf->extra = NULL;
+ bsdf->extra = NULL;
- bsdf->alpha_x = saturate(bsdf->alpha_x);
- bsdf->alpha_y = bsdf->alpha_x;
+ bsdf->alpha_x = saturate(bsdf->alpha_x);
+ bsdf->alpha_y = bsdf->alpha_x;
- bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;
+ bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;
- return SD_BSDF|SD_BSDF_HAS_EVAL;
+ return SD_BSDF | SD_BSDF_HAS_EVAL;
}
ccl_device void bsdf_microfacet_ggx_blur(ShaderClosure *sc, float roughness)
{
- MicrofacetBsdf *bsdf = (MicrofacetBsdf*)sc;
+ MicrofacetBsdf *bsdf = (MicrofacetBsdf *)sc;
- bsdf->alpha_x = fmaxf(roughness, bsdf->alpha_x);
- bsdf->alpha_y = fmaxf(roughness, bsdf->alpha_y);
+ bsdf->alpha_x = fmaxf(roughness, bsdf->alpha_x);
+ bsdf->alpha_y = fmaxf(roughness, bsdf->alpha_y);
}
-ccl_device float3 bsdf_microfacet_ggx_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_microfacet_ggx_eval_reflect(const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ float *pdf)
{
- const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
- float alpha_x = bsdf->alpha_x;
- float alpha_y = bsdf->alpha_y;
- bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;
- float3 N = bsdf->N;
-
- if(m_refractive || alpha_x*alpha_y <= 1e-7f)
- return make_float3(0.0f, 0.0f, 0.0f);
-
- float cosNO = dot(N, I);
- float cosNI = dot(N, omega_in);
-
- if(cosNI > 0 && cosNO > 0) {
- /* get half vector */
- float3 m = normalize(omega_in + I);
- float alpha2 = alpha_x * alpha_y;
- float D, G1o, G1i;
-
- if(alpha_x == alpha_y) {
- /* isotropic
- * eq. 20: (F*G*D)/(4*in*on)
- * eq. 33: first we calculate D(m) */
- float cosThetaM = dot(N, m);
- float cosThetaM2 = cosThetaM * cosThetaM;
- float cosThetaM4 = cosThetaM2 * cosThetaM2;
- float tanThetaM2 = (1 - cosThetaM2) / cosThetaM2;
-
- if(bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID) {
- /* use GTR1 for clearcoat */
- D = D_GTR1(cosThetaM, bsdf->alpha_x);
-
- /* the alpha value for clearcoat is a fixed 0.25 => alpha2 = 0.25 * 0.25 */
- alpha2 = 0.0625f;
- }
- else {
- /* use GTR2 otherwise */
- D = alpha2 / (M_PI_F * cosThetaM4 * (alpha2 + tanThetaM2) * (alpha2 + tanThetaM2));
- }
-
- /* eq. 34: now calculate G1(i,m) and G1(o,m) */
- G1o = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNO * cosNO) / (cosNO * cosNO)));
- G1i = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNI * cosNI) / (cosNI * cosNI)));
- }
- else {
- /* anisotropic */
- float3 X, Y, Z = N;
- make_orthonormals_tangent(Z, bsdf->T, &X, &Y);
-
- /* distribution */
- float3 local_m = make_float3(dot(X, m), dot(Y, m), dot(Z, m));
- float slope_x = -local_m.x/(local_m.z*alpha_x);
- float slope_y = -local_m.y/(local_m.z*alpha_y);
- float slope_len = 1 + slope_x*slope_x + slope_y*slope_y;
-
- float cosThetaM = local_m.z;
- float cosThetaM2 = cosThetaM * cosThetaM;
- float cosThetaM4 = cosThetaM2 * cosThetaM2;
-
- D = 1 / ((slope_len * slope_len) * M_PI_F * alpha2 * cosThetaM4);
-
- /* G1(i,m) and G1(o,m) */
- float tanThetaO2 = (1 - cosNO * cosNO) / (cosNO * cosNO);
- float cosPhiO = dot(I, X);
- float sinPhiO = dot(I, Y);
-
- float alphaO2 = (cosPhiO*cosPhiO)*(alpha_x*alpha_x) + (sinPhiO*sinPhiO)*(alpha_y*alpha_y);
- alphaO2 /= cosPhiO*cosPhiO + sinPhiO*sinPhiO;
-
- G1o = 2 / (1 + safe_sqrtf(1 + alphaO2 * tanThetaO2));
-
- float tanThetaI2 = (1 - cosNI * cosNI) / (cosNI * cosNI);
- float cosPhiI = dot(omega_in, X);
- float sinPhiI = dot(omega_in, Y);
-
- float alphaI2 = (cosPhiI*cosPhiI)*(alpha_x*alpha_x) + (sinPhiI*sinPhiI)*(alpha_y*alpha_y);
- alphaI2 /= cosPhiI*cosPhiI + sinPhiI*sinPhiI;
-
- G1i = 2 / (1 + safe_sqrtf(1 + alphaI2 * tanThetaI2));
- }
-
- float G = G1o * G1i;
-
- /* eq. 20 */
- float common = D * 0.25f / cosNO;
-
- float3 F = reflection_color(bsdf, omega_in, m);
- if(bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID) {
- F *= 0.25f * bsdf->extra->clearcoat;
- }
-
- float3 out = F * G * common;
-
- /* eq. 2 in distribution of visible normals sampling
- * pm = Dw = G1o * dot(m, I) * D / dot(N, I); */
-
- /* eq. 38 - but see also:
- * eq. 17 in http://www.graphics.cornell.edu/~bjw/wardnotes.pdf
- * pdf = pm * 0.25 / dot(m, I); */
- *pdf = G1o * common;
-
- return out;
- }
-
- return make_float3(0.0f, 0.0f, 0.0f);
+ const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+ float alpha_x = bsdf->alpha_x;
+ float alpha_y = bsdf->alpha_y;
+ bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;
+ float3 N = bsdf->N;
+
+ if (m_refractive || alpha_x * alpha_y <= 1e-7f)
+ return make_float3(0.0f, 0.0f, 0.0f);
+
+ float cosNO = dot(N, I);
+ float cosNI = dot(N, omega_in);
+
+ if (cosNI > 0 && cosNO > 0) {
+ /* get half vector */
+ float3 m = normalize(omega_in + I);
+ float alpha2 = alpha_x * alpha_y;
+ float D, G1o, G1i;
+
+ if (alpha_x == alpha_y) {
+ /* isotropic
+ * eq. 20: (F*G*D)/(4*in*on)
+ * eq. 33: first we calculate D(m) */
+ float cosThetaM = dot(N, m);
+ float cosThetaM2 = cosThetaM * cosThetaM;
+ float cosThetaM4 = cosThetaM2 * cosThetaM2;
+ float tanThetaM2 = (1 - cosThetaM2) / cosThetaM2;
+
+ if (bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID) {
+ /* use GTR1 for clearcoat */
+ D = D_GTR1(cosThetaM, bsdf->alpha_x);
+
+ /* the alpha value for clearcoat is a fixed 0.25 => alpha2 = 0.25 * 0.25 */
+ alpha2 = 0.0625f;
+ }
+ else {
+ /* use GTR2 otherwise */
+ D = alpha2 / (M_PI_F * cosThetaM4 * (alpha2 + tanThetaM2) * (alpha2 + tanThetaM2));
+ }
+
+ /* eq. 34: now calculate G1(i,m) and G1(o,m) */
+ G1o = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNO * cosNO) / (cosNO * cosNO)));
+ G1i = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNI * cosNI) / (cosNI * cosNI)));
+ }
+ else {
+ /* anisotropic */
+ float3 X, Y, Z = N;
+ make_orthonormals_tangent(Z, bsdf->T, &X, &Y);
+
+ /* distribution */
+ float3 local_m = make_float3(dot(X, m), dot(Y, m), dot(Z, m));
+ float slope_x = -local_m.x / (local_m.z * alpha_x);
+ float slope_y = -local_m.y / (local_m.z * alpha_y);
+ float slope_len = 1 + slope_x * slope_x + slope_y * slope_y;
+
+ float cosThetaM = local_m.z;
+ float cosThetaM2 = cosThetaM * cosThetaM;
+ float cosThetaM4 = cosThetaM2 * cosThetaM2;
+
+ D = 1 / ((slope_len * slope_len) * M_PI_F * alpha2 * cosThetaM4);
+
+ /* G1(i,m) and G1(o,m) */
+ float tanThetaO2 = (1 - cosNO * cosNO) / (cosNO * cosNO);
+ float cosPhiO = dot(I, X);
+ float sinPhiO = dot(I, Y);
+
+ float alphaO2 = (cosPhiO * cosPhiO) * (alpha_x * alpha_x) +
+ (sinPhiO * sinPhiO) * (alpha_y * alpha_y);
+ alphaO2 /= cosPhiO * cosPhiO + sinPhiO * sinPhiO;
+
+ G1o = 2 / (1 + safe_sqrtf(1 + alphaO2 * tanThetaO2));
+
+ float tanThetaI2 = (1 - cosNI * cosNI) / (cosNI * cosNI);
+ float cosPhiI = dot(omega_in, X);
+ float sinPhiI = dot(omega_in, Y);
+
+ float alphaI2 = (cosPhiI * cosPhiI) * (alpha_x * alpha_x) +
+ (sinPhiI * sinPhiI) * (alpha_y * alpha_y);
+ alphaI2 /= cosPhiI * cosPhiI + sinPhiI * sinPhiI;
+
+ G1i = 2 / (1 + safe_sqrtf(1 + alphaI2 * tanThetaI2));
+ }
+
+ float G = G1o * G1i;
+
+ /* eq. 20 */
+ float common = D * 0.25f / cosNO;
+
+ float3 F = reflection_color(bsdf, omega_in, m);
+ if (bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID) {
+ F *= 0.25f * bsdf->extra->clearcoat;
+ }
+
+ float3 out = F * G * common;
+
+ /* eq. 2 in distribution of visible normals sampling
+ * pm = Dw = G1o * dot(m, I) * D / dot(N, I); */
+
+ /* eq. 38 - but see also:
+ * eq. 17 in http://www.graphics.cornell.edu/~bjw/wardnotes.pdf
+ * pdf = pm * 0.25 / dot(m, I); */
+ *pdf = G1o * common;
+
+ return out;
+ }
+
+ return make_float3(0.0f, 0.0f, 0.0f);
}
-ccl_device float3 bsdf_microfacet_ggx_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_microfacet_ggx_eval_transmit(const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ float *pdf)
{
- const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
- float alpha_x = bsdf->alpha_x;
- float alpha_y = bsdf->alpha_y;
- float m_eta = bsdf->ior;
- bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;
- float3 N = bsdf->N;
+ const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+ float alpha_x = bsdf->alpha_x;
+ float alpha_y = bsdf->alpha_y;
+ float m_eta = bsdf->ior;
+ bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;
+ float3 N = bsdf->N;
- if(!m_refractive || alpha_x*alpha_y <= 1e-7f)
- return make_float3(0.0f, 0.0f, 0.0f);
+ if (!m_refractive || alpha_x * alpha_y <= 1e-7f)
+ return make_float3(0.0f, 0.0f, 0.0f);
- float cosNO = dot(N, I);
- float cosNI = dot(N, omega_in);
+ float cosNO = dot(N, I);
+ float cosNI = dot(N, omega_in);
- if(cosNO <= 0 || cosNI >= 0)
- return make_float3(0.0f, 0.0f, 0.0f); /* vectors on same side -- not possible */
+ if (cosNO <= 0 || cosNI >= 0)
+ return make_float3(0.0f, 0.0f, 0.0f); /* vectors on same side -- not possible */
- /* compute half-vector of the refraction (eq. 16) */
- float3 ht = -(m_eta * omega_in + I);
- float3 Ht = normalize(ht);
- float cosHO = dot(Ht, I);
- float cosHI = dot(Ht, omega_in);
+ /* compute half-vector of the refraction (eq. 16) */
+ float3 ht = -(m_eta * omega_in + I);
+ float3 Ht = normalize(ht);
+ float cosHO = dot(Ht, I);
+ float cosHI = dot(Ht, omega_in);
- float D, G1o, G1i;
+ float D, G1o, G1i;
- /* eq. 33: first we calculate D(m) with m=Ht: */
- float alpha2 = alpha_x * alpha_y;
- float cosThetaM = dot(N, Ht);
- float cosThetaM2 = cosThetaM * cosThetaM;
- float tanThetaM2 = (1 - cosThetaM2) / cosThetaM2;
- float cosThetaM4 = cosThetaM2 * cosThetaM2;
- D = alpha2 / (M_PI_F * cosThetaM4 * (alpha2 + tanThetaM2) * (alpha2 + tanThetaM2));
+ /* eq. 33: first we calculate D(m) with m=Ht: */
+ float alpha2 = alpha_x * alpha_y;
+ float cosThetaM = dot(N, Ht);
+ float cosThetaM2 = cosThetaM * cosThetaM;
+ float tanThetaM2 = (1 - cosThetaM2) / cosThetaM2;
+ float cosThetaM4 = cosThetaM2 * cosThetaM2;
+ D = alpha2 / (M_PI_F * cosThetaM4 * (alpha2 + tanThetaM2) * (alpha2 + tanThetaM2));
- /* eq. 34: now calculate G1(i,m) and G1(o,m) */
- G1o = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNO * cosNO) / (cosNO * cosNO)));
- G1i = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNI * cosNI) / (cosNI * cosNI)));
+ /* eq. 34: now calculate G1(i,m) and G1(o,m) */
+ G1o = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNO * cosNO) / (cosNO * cosNO)));
+ G1i = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNI * cosNI) / (cosNI * cosNI)));
- float G = G1o * G1i;
+ float G = G1o * G1i;
- /* probability */
- float Ht2 = dot(ht, ht);
+ /* probability */
+ float Ht2 = dot(ht, ht);
- /* eq. 2 in distribution of visible normals sampling
- * pm = Dw = G1o * dot(m, I) * D / dot(N, I); */
+ /* eq. 2 in distribution of visible normals sampling
+ * pm = Dw = G1o * dot(m, I) * D / dot(N, I); */
- /* out = fabsf(cosHI * cosHO) * (m_eta * m_eta) * G * D / (cosNO * Ht2)
- * pdf = pm * (m_eta * m_eta) * fabsf(cosHI) / Ht2 */
- float common = D * (m_eta * m_eta) / (cosNO * Ht2);
- float out = G * fabsf(cosHI * cosHO) * common;
- *pdf = G1o * fabsf(cosHO * cosHI) * common;
+ /* out = fabsf(cosHI * cosHO) * (m_eta * m_eta) * G * D / (cosNO * Ht2)
+ * pdf = pm * (m_eta * m_eta) * fabsf(cosHI) / Ht2 */
+ float common = D * (m_eta * m_eta) / (cosNO * Ht2);
+ float out = G * fabsf(cosHI * cosHO) * common;
+ *pdf = G1o * fabsf(cosHO * cosHI) * common;
- return make_float3(out, out, out);
+ return make_float3(out, out, out);
}
-ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
+ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg,
+ const ShaderClosure *sc,
+ float3 Ng,
+ float3 I,
+ float3 dIdx,
+ float3 dIdy,
+ float randu,
+ float randv,
+ float3 *eval,
+ float3 *omega_in,
+ float3 *domega_in_dx,
+ float3 *domega_in_dy,
+ float *pdf)
{
- const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
- float alpha_x = bsdf->alpha_x;
- float alpha_y = bsdf->alpha_y;
- bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;
- float3 N = bsdf->N;
- int label;
-
- float cosNO = dot(N, I);
- if(cosNO > 0) {
- float3 X, Y, Z = N;
-
- if(alpha_x == alpha_y)
- make_orthonormals(Z, &X, &Y);
- else
- make_orthonormals_tangent(Z, bsdf->T, &X, &Y);
-
- /* importance sampling with distribution of visible normals. vectors are
- * transformed to local space before and after */
- float3 local_I = make_float3(dot(X, I), dot(Y, I), cosNO);
- float3 local_m;
- float G1o;
-
- local_m = microfacet_sample_stretched(kg, local_I, alpha_x, alpha_y,
- randu, randv, false, &G1o);
-
- float3 m = X*local_m.x + Y*local_m.y + Z*local_m.z;
- float cosThetaM = local_m.z;
-
- /* reflection or refraction? */
- if(!m_refractive) {
- float cosMO = dot(m, I);
- label = LABEL_REFLECT | LABEL_GLOSSY;
-
- if(cosMO > 0) {
- /* eq. 39 - compute actual reflected direction */
- *omega_in = 2 * cosMO * m - I;
-
- if(dot(Ng, *omega_in) > 0) {
- if(alpha_x*alpha_y <= 1e-7f) {
- /* some high number for MIS */
- *pdf = 1e6f;
- *eval = make_float3(1e6f, 1e6f, 1e6f);
-
- bool use_fresnel = (bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID
- || bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID
- || bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID);
-
- /* if fresnel is used, calculate the color with reflection_color(...) */
- if(use_fresnel) {
- *eval *= reflection_color(bsdf, *omega_in, m);
- }
-
- label = LABEL_REFLECT | LABEL_SINGULAR;
- }
- else {
- /* microfacet normal is visible to this ray */
- /* eq. 33 */
- float alpha2 = alpha_x * alpha_y;
- float D, G1i;
-
- if(alpha_x == alpha_y) {
- /* isotropic */
- float cosThetaM2 = cosThetaM * cosThetaM;
- float cosThetaM4 = cosThetaM2 * cosThetaM2;
- float tanThetaM2 = 1/(cosThetaM2) - 1;
-
- /* eval BRDF*cosNI */
- float cosNI = dot(N, *omega_in);
-
- if(bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID) {
- /* use GTR1 for clearcoat */
- D = D_GTR1(cosThetaM, bsdf->alpha_x);
-
- /* the alpha value for clearcoat is a fixed 0.25 => alpha2 = 0.25 * 0.25 */
- alpha2 = 0.0625f;
-
- /* recalculate G1o */
- G1o = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNO * cosNO) / (cosNO * cosNO)));
- }
- else {
- /* use GTR2 otherwise */
- D = alpha2 / (M_PI_F * cosThetaM4 * (alpha2 + tanThetaM2) * (alpha2 + tanThetaM2));
- }
-
- /* eq. 34: now calculate G1(i,m) */
- G1i = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNI * cosNI) / (cosNI * cosNI)));
- }
- else {
- /* anisotropic distribution */
- float3 local_m = make_float3(dot(X, m), dot(Y, m), dot(Z, m));
- float slope_x = -local_m.x/(local_m.z*alpha_x);
- float slope_y = -local_m.y/(local_m.z*alpha_y);
- float slope_len = 1 + slope_x*slope_x + slope_y*slope_y;
-
- float cosThetaM = local_m.z;
- float cosThetaM2 = cosThetaM * cosThetaM;
- float cosThetaM4 = cosThetaM2 * cosThetaM2;
-
- D = 1 / ((slope_len * slope_len) * M_PI_F * alpha2 * cosThetaM4);
-
- /* calculate G1(i,m) */
- float cosNI = dot(N, *omega_in);
-
- float tanThetaI2 = (1 - cosNI * cosNI) / (cosNI * cosNI);
- float cosPhiI = dot(*omega_in, X);
- float sinPhiI = dot(*omega_in, Y);
-
- float alphaI2 = (cosPhiI*cosPhiI)*(alpha_x*alpha_x) + (sinPhiI*sinPhiI)*(alpha_y*alpha_y);
- alphaI2 /= cosPhiI*cosPhiI + sinPhiI*sinPhiI;
-
- G1i = 2 / (1 + safe_sqrtf(1 + alphaI2 * tanThetaI2));
- }
-
- /* see eval function for derivation */
- float common = (G1o * D) * 0.25f / cosNO;
- *pdf = common;
-
- float3 F = reflection_color(bsdf, *omega_in, m);
-
- *eval = G1i * common * F;
- }
-
- if(bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID) {
- *eval *= 0.25f * bsdf->extra->clearcoat;
- }
+ const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+ float alpha_x = bsdf->alpha_x;
+ float alpha_y = bsdf->alpha_y;
+ bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;
+ float3 N = bsdf->N;
+ int label;
+
+ float cosNO = dot(N, I);
+ if (cosNO > 0) {
+ float3 X, Y, Z = N;
+
+ if (alpha_x == alpha_y)
+ make_orthonormals(Z, &X, &Y);
+ else
+ make_orthonormals_tangent(Z, bsdf->T, &X, &Y);
+
+ /* importance sampling with distribution of visible normals. vectors are
+ * transformed to local space before and after */
+ float3 local_I = make_float3(dot(X, I), dot(Y, I), cosNO);
+ float3 local_m;
+ float G1o;
+
+ local_m = microfacet_sample_stretched(
+ kg, local_I, alpha_x, alpha_y, randu, randv, false, &G1o);
+
+ float3 m = X * local_m.x + Y * local_m.y + Z * local_m.z;
+ float cosThetaM = local_m.z;
+
+ /* reflection or refraction? */
+ if (!m_refractive) {
+ float cosMO = dot(m, I);
+ label = LABEL_REFLECT | LABEL_GLOSSY;
+
+ if (cosMO > 0) {
+ /* eq. 39 - compute actual reflected direction */
+ *omega_in = 2 * cosMO * m - I;
+
+ if (dot(Ng, *omega_in) > 0) {
+ if (alpha_x * alpha_y <= 1e-7f) {
+ /* some high number for MIS */
+ *pdf = 1e6f;
+ *eval = make_float3(1e6f, 1e6f, 1e6f);
+
+ bool use_fresnel = (bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID ||
+ bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID ||
+ bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID);
+
+ /* if fresnel is used, calculate the color with reflection_color(...) */
+ if (use_fresnel) {
+ *eval *= reflection_color(bsdf, *omega_in, m);
+ }
+
+ label = LABEL_REFLECT | LABEL_SINGULAR;
+ }
+ else {
+ /* microfacet normal is visible to this ray */
+ /* eq. 33 */
+ float alpha2 = alpha_x * alpha_y;
+ float D, G1i;
+
+ if (alpha_x == alpha_y) {
+ /* isotropic */
+ float cosThetaM2 = cosThetaM * cosThetaM;
+ float cosThetaM4 = cosThetaM2 * cosThetaM2;
+ float tanThetaM2 = 1 / (cosThetaM2)-1;
+
+ /* eval BRDF*cosNI */
+ float cosNI = dot(N, *omega_in);
+
+ if (bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID) {
+ /* use GTR1 for clearcoat */
+ D = D_GTR1(cosThetaM, bsdf->alpha_x);
+
+ /* the alpha value for clearcoat is a fixed 0.25 => alpha2 = 0.25 * 0.25 */
+ alpha2 = 0.0625f;
+
+ /* recalculate G1o */
+ G1o = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNO * cosNO) / (cosNO * cosNO)));
+ }
+ else {
+ /* use GTR2 otherwise */
+ D = alpha2 / (M_PI_F * cosThetaM4 * (alpha2 + tanThetaM2) * (alpha2 + tanThetaM2));
+ }
+
+ /* eq. 34: now calculate G1(i,m) */
+ G1i = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNI * cosNI) / (cosNI * cosNI)));
+ }
+ else {
+ /* anisotropic distribution */
+ float3 local_m = make_float3(dot(X, m), dot(Y, m), dot(Z, m));
+ float slope_x = -local_m.x / (local_m.z * alpha_x);
+ float slope_y = -local_m.y / (local_m.z * alpha_y);
+ float slope_len = 1 + slope_x * slope_x + slope_y * slope_y;
+
+ float cosThetaM = local_m.z;
+ float cosThetaM2 = cosThetaM * cosThetaM;
+ float cosThetaM4 = cosThetaM2 * cosThetaM2;
+
+ D = 1 / ((slope_len * slope_len) * M_PI_F * alpha2 * cosThetaM4);
+
+ /* calculate G1(i,m) */
+ float cosNI = dot(N, *omega_in);
+
+ float tanThetaI2 = (1 - cosNI * cosNI) / (cosNI * cosNI);
+ float cosPhiI = dot(*omega_in, X);
+ float sinPhiI = dot(*omega_in, Y);
+
+ float alphaI2 = (cosPhiI * cosPhiI) * (alpha_x * alpha_x) +
+ (sinPhiI * sinPhiI) * (alpha_y * alpha_y);
+ alphaI2 /= cosPhiI * cosPhiI + sinPhiI * sinPhiI;
+
+ G1i = 2 / (1 + safe_sqrtf(1 + alphaI2 * tanThetaI2));
+ }
+
+ /* see eval function for derivation */
+ float common = (G1o * D) * 0.25f / cosNO;
+ *pdf = common;
+
+ float3 F = reflection_color(bsdf, *omega_in, m);
+
+ *eval = G1i * common * F;
+ }
+
+ if (bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID) {
+ *eval *= 0.25f * bsdf->extra->clearcoat;
+ }
#ifdef __RAY_DIFFERENTIALS__
- *domega_in_dx = (2 * dot(m, dIdx)) * m - dIdx;
- *domega_in_dy = (2 * dot(m, dIdy)) * m - dIdy;
+ *domega_in_dx = (2 * dot(m, dIdx)) * m - dIdx;
+ *domega_in_dy = (2 * dot(m, dIdy)) * m - dIdy;
#endif
- }
- }
- }
- else {
- label = LABEL_TRANSMIT | LABEL_GLOSSY;
-
- /* CAUTION: the i and o variables are inverted relative to the paper
- * eq. 39 - compute actual refractive direction */
- float3 R, T;
+ }
+ }
+ }
+ else {
+ label = LABEL_TRANSMIT | LABEL_GLOSSY;
+
+ /* CAUTION: the i and o variables are inverted relative to the paper
+ * eq. 39 - compute actual refractive direction */
+ float3 R, T;
#ifdef __RAY_DIFFERENTIALS__
- float3 dRdx, dRdy, dTdx, dTdy;
+ float3 dRdx, dRdy, dTdx, dTdy;
#endif
- float m_eta = bsdf->ior, fresnel;
- bool inside;
-
- fresnel = fresnel_dielectric(m_eta, m, I, &R, &T,
+ float m_eta = bsdf->ior, fresnel;
+ bool inside;
+
+ fresnel = fresnel_dielectric(m_eta,
+ m,
+ I,
+ &R,
+ &T,
#ifdef __RAY_DIFFERENTIALS__
- dIdx, dIdy, &dRdx, &dRdy, &dTdx, &dTdy,
+ dIdx,
+ dIdy,
+ &dRdx,
+ &dRdy,
+ &dTdx,
+ &dTdy,
#endif
- &inside);
+ &inside);
- if(!inside && fresnel != 1.0f) {
+ if (!inside && fresnel != 1.0f) {
- *omega_in = T;
+ *omega_in = T;
#ifdef __RAY_DIFFERENTIALS__
- *domega_in_dx = dTdx;
- *domega_in_dy = dTdy;
+ *domega_in_dx = dTdx;
+ *domega_in_dy = dTdy;
#endif
- if(alpha_x*alpha_y <= 1e-7f || fabsf(m_eta - 1.0f) < 1e-4f) {
- /* some high number for MIS */
- *pdf = 1e6f;
- *eval = make_float3(1e6f, 1e6f, 1e6f);
- label = LABEL_TRANSMIT | LABEL_SINGULAR;
- }
- else {
- /* eq. 33 */
- float alpha2 = alpha_x * alpha_y;
- float cosThetaM2 = cosThetaM * cosThetaM;
- float cosThetaM4 = cosThetaM2 * cosThetaM2;
- float tanThetaM2 = 1/(cosThetaM2) - 1;
- float D = alpha2 / (M_PI_F * cosThetaM4 * (alpha2 + tanThetaM2) * (alpha2 + tanThetaM2));
-
- /* eval BRDF*cosNI */
- float cosNI = dot(N, *omega_in);
-
- /* eq. 34: now calculate G1(i,m) */
- float G1i = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNI * cosNI) / (cosNI * cosNI)));
-
- /* eq. 21 */
- float cosHI = dot(m, *omega_in);
- float cosHO = dot(m, I);
- float Ht2 = m_eta * cosHI + cosHO;
- Ht2 *= Ht2;
-
- /* see eval function for derivation */
- float common = (G1o * D) * (m_eta * m_eta) / (cosNO * Ht2);
- float out = G1i * fabsf(cosHI * cosHO) * common;
- *pdf = cosHO * fabsf(cosHI) * common;
-
- *eval = make_float3(out, out, out);
- }
- }
- }
- }
- else {
- label = (m_refractive) ? LABEL_TRANSMIT|LABEL_GLOSSY : LABEL_REFLECT|LABEL_GLOSSY;
- }
- return label;
+ if (alpha_x * alpha_y <= 1e-7f || fabsf(m_eta - 1.0f) < 1e-4f) {
+ /* some high number for MIS */
+ *pdf = 1e6f;
+ *eval = make_float3(1e6f, 1e6f, 1e6f);
+ label = LABEL_TRANSMIT | LABEL_SINGULAR;
+ }
+ else {
+ /* eq. 33 */
+ float alpha2 = alpha_x * alpha_y;
+ float cosThetaM2 = cosThetaM * cosThetaM;
+ float cosThetaM4 = cosThetaM2 * cosThetaM2;
+ float tanThetaM2 = 1 / (cosThetaM2)-1;
+ float D = alpha2 / (M_PI_F * cosThetaM4 * (alpha2 + tanThetaM2) * (alpha2 + tanThetaM2));
+
+ /* eval BRDF*cosNI */
+ float cosNI = dot(N, *omega_in);
+
+ /* eq. 34: now calculate G1(i,m) */
+ float G1i = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNI * cosNI) / (cosNI * cosNI)));
+
+ /* eq. 21 */
+ float cosHI = dot(m, *omega_in);
+ float cosHO = dot(m, I);
+ float Ht2 = m_eta * cosHI + cosHO;
+ Ht2 *= Ht2;
+
+ /* see eval function for derivation */
+ float common = (G1o * D) * (m_eta * m_eta) / (cosNO * Ht2);
+ float out = G1i * fabsf(cosHI * cosHO) * common;
+ *pdf = cosHO * fabsf(cosHI) * common;
+
+ *eval = make_float3(out, out, out);
+ }
+ }
+ }
+ }
+ else {
+ label = (m_refractive) ? LABEL_TRANSMIT | LABEL_GLOSSY : LABEL_REFLECT | LABEL_GLOSSY;
+ }
+ return label;
}
/* Beckmann microfacet with Smith shadow-masking from:
@@ -764,364 +803,392 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure
ccl_device int bsdf_microfacet_beckmann_setup(MicrofacetBsdf *bsdf)
{
- bsdf->alpha_x = saturate(bsdf->alpha_x);
- bsdf->alpha_y = bsdf->alpha_x;
+ bsdf->alpha_x = saturate(bsdf->alpha_x);
+ bsdf->alpha_y = bsdf->alpha_x;
- bsdf->type = CLOSURE_BSDF_MICROFACET_BECKMANN_ID;
- return SD_BSDF|SD_BSDF_HAS_EVAL;
+ bsdf->type = CLOSURE_BSDF_MICROFACET_BECKMANN_ID;
+ return SD_BSDF | SD_BSDF_HAS_EVAL;
}
ccl_device int bsdf_microfacet_beckmann_aniso_setup(MicrofacetBsdf *bsdf)
{
- bsdf->alpha_x = saturate(bsdf->alpha_x);
- bsdf->alpha_y = saturate(bsdf->alpha_y);
+ bsdf->alpha_x = saturate(bsdf->alpha_x);
+ bsdf->alpha_y = saturate(bsdf->alpha_y);
- bsdf->type = CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID;
- return SD_BSDF|SD_BSDF_HAS_EVAL;
+ bsdf->type = CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID;
+ return SD_BSDF | SD_BSDF_HAS_EVAL;
}
ccl_device int bsdf_microfacet_beckmann_refraction_setup(MicrofacetBsdf *bsdf)
{
- bsdf->alpha_x = saturate(bsdf->alpha_x);
- bsdf->alpha_y = bsdf->alpha_x;
+ bsdf->alpha_x = saturate(bsdf->alpha_x);
+ bsdf->alpha_y = bsdf->alpha_x;
- bsdf->type = CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID;
- return SD_BSDF|SD_BSDF_HAS_EVAL;
+ bsdf->type = CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID;
+ return SD_BSDF | SD_BSDF_HAS_EVAL;
}
ccl_device void bsdf_microfacet_beckmann_blur(ShaderClosure *sc, float roughness)
{
- MicrofacetBsdf *bsdf = (MicrofacetBsdf*)sc;
+ MicrofacetBsdf *bsdf = (MicrofacetBsdf *)sc;
- bsdf->alpha_x = fmaxf(roughness, bsdf->alpha_x);
- bsdf->alpha_y = fmaxf(roughness, bsdf->alpha_y);
+ bsdf->alpha_x = fmaxf(roughness, bsdf->alpha_x);
+ bsdf->alpha_y = fmaxf(roughness, bsdf->alpha_y);
}
ccl_device_inline float bsdf_beckmann_G1(float alpha, float cos_n)
{
- cos_n *= cos_n;
- float invA = alpha * safe_sqrtf((1.0f - cos_n) / cos_n);
- if(invA < 0.625f) {
- return 1.0f;
- }
-
- float a = 1.0f / invA;
- return ((2.181f*a + 3.535f)*a) / ((2.577f*a + 2.276f)*a + 1.0f);
+ cos_n *= cos_n;
+ float invA = alpha * safe_sqrtf((1.0f - cos_n) / cos_n);
+ if (invA < 0.625f) {
+ return 1.0f;
+ }
+
+ float a = 1.0f / invA;
+ return ((2.181f * a + 3.535f) * a) / ((2.577f * a + 2.276f) * a + 1.0f);
}
-ccl_device_inline float bsdf_beckmann_aniso_G1(float alpha_x, float alpha_y, float cos_n, float cos_phi, float sin_phi)
+ccl_device_inline float bsdf_beckmann_aniso_G1(
+ float alpha_x, float alpha_y, float cos_n, float cos_phi, float sin_phi)
{
- cos_n *= cos_n;
- sin_phi *= sin_phi;
- cos_phi *= cos_phi;
- alpha_x *= alpha_x;
- alpha_y *= alpha_y;
-
- float alphaO2 = (cos_phi*alpha_x + sin_phi*alpha_y) / (cos_phi + sin_phi);
- float invA = safe_sqrtf(alphaO2 * (1 - cos_n) / cos_n);
- if(invA < 0.625f) {
- return 1.0f;
- }
-
- float a = 1.0f / invA;
- return ((2.181f*a + 3.535f)*a) / ((2.577f*a + 2.276f)*a + 1.0f);
+ cos_n *= cos_n;
+ sin_phi *= sin_phi;
+ cos_phi *= cos_phi;
+ alpha_x *= alpha_x;
+ alpha_y *= alpha_y;
+
+ float alphaO2 = (cos_phi * alpha_x + sin_phi * alpha_y) / (cos_phi + sin_phi);
+ float invA = safe_sqrtf(alphaO2 * (1 - cos_n) / cos_n);
+ if (invA < 0.625f) {
+ return 1.0f;
+ }
+
+ float a = 1.0f / invA;
+ return ((2.181f * a + 3.535f) * a) / ((2.577f * a + 2.276f) * a + 1.0f);
}
-ccl_device float3 bsdf_microfacet_beckmann_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_microfacet_beckmann_eval_reflect(const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ float *pdf)
{
- const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
- float alpha_x = bsdf->alpha_x;
- float alpha_y = bsdf->alpha_y;
- bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID;
- float3 N = bsdf->N;
-
- if(m_refractive || alpha_x*alpha_y <= 1e-7f)
- return make_float3(0.0f, 0.0f, 0.0f);
-
- float cosNO = dot(N, I);
- float cosNI = dot(N, omega_in);
-
- if(cosNO > 0 && cosNI > 0) {
- /* get half vector */
- float3 m = normalize(omega_in + I);
-
- float alpha2 = alpha_x * alpha_y;
- float D, G1o, G1i;
-
- if(alpha_x == alpha_y) {
- /* isotropic
- * eq. 20: (F*G*D)/(4*in*on)
- * eq. 25: first we calculate D(m) */
- float cosThetaM = dot(N, m);
- float cosThetaM2 = cosThetaM * cosThetaM;
- float tanThetaM2 = (1 - cosThetaM2) / cosThetaM2;
- float cosThetaM4 = cosThetaM2 * cosThetaM2;
- D = expf(-tanThetaM2 / alpha2) / (M_PI_F * alpha2 * cosThetaM4);
-
- /* eq. 26, 27: now calculate G1(i,m) and G1(o,m) */
- G1o = bsdf_beckmann_G1(alpha_x, cosNO);
- G1i = bsdf_beckmann_G1(alpha_x, cosNI);
- }
- else {
- /* anisotropic */
- float3 X, Y, Z = N;
- make_orthonormals_tangent(Z, bsdf->T, &X, &Y);
-
- /* distribution */
- float3 local_m = make_float3(dot(X, m), dot(Y, m), dot(Z, m));
- float slope_x = -local_m.x/(local_m.z*alpha_x);
- float slope_y = -local_m.y/(local_m.z*alpha_y);
-
- float cosThetaM = local_m.z;
- float cosThetaM2 = cosThetaM * cosThetaM;
- float cosThetaM4 = cosThetaM2 * cosThetaM2;
-
- D = expf(-slope_x*slope_x - slope_y*slope_y) / (M_PI_F * alpha2 * cosThetaM4);
-
- /* G1(i,m) and G1(o,m) */
- G1o = bsdf_beckmann_aniso_G1(alpha_x, alpha_y, cosNO, dot(I, X), dot(I, Y));
- G1i = bsdf_beckmann_aniso_G1(alpha_x, alpha_y, cosNI, dot(omega_in, X), dot(omega_in, Y));
- }
-
- float G = G1o * G1i;
-
- /* eq. 20 */
- float common = D * 0.25f / cosNO;
- float out = G * common;
-
- /* eq. 2 in distribution of visible normals sampling
- * pm = Dw = G1o * dot(m, I) * D / dot(N, I); */
-
- /* eq. 38 - but see also:
- * eq. 17 in http://www.graphics.cornell.edu/~bjw/wardnotes.pdf
- * pdf = pm * 0.25 / dot(m, I); */
- *pdf = G1o * common;
-
- return make_float3(out, out, out);
- }
-
- return make_float3(0.0f, 0.0f, 0.0f);
+ const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+ float alpha_x = bsdf->alpha_x;
+ float alpha_y = bsdf->alpha_y;
+ bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID;
+ float3 N = bsdf->N;
+
+ if (m_refractive || alpha_x * alpha_y <= 1e-7f)
+ return make_float3(0.0f, 0.0f, 0.0f);
+
+ float cosNO = dot(N, I);
+ float cosNI = dot(N, omega_in);
+
+ if (cosNO > 0 && cosNI > 0) {
+ /* get half vector */
+ float3 m = normalize(omega_in + I);
+
+ float alpha2 = alpha_x * alpha_y;
+ float D, G1o, G1i;
+
+ if (alpha_x == alpha_y) {
+ /* isotropic
+ * eq. 20: (F*G*D)/(4*in*on)
+ * eq. 25: first we calculate D(m) */
+ float cosThetaM = dot(N, m);
+ float cosThetaM2 = cosThetaM * cosThetaM;
+ float tanThetaM2 = (1 - cosThetaM2) / cosThetaM2;
+ float cosThetaM4 = cosThetaM2 * cosThetaM2;
+ D = expf(-tanThetaM2 / alpha2) / (M_PI_F * alpha2 * cosThetaM4);
+
+ /* eq. 26, 27: now calculate G1(i,m) and G1(o,m) */
+ G1o = bsdf_beckmann_G1(alpha_x, cosNO);
+ G1i = bsdf_beckmann_G1(alpha_x, cosNI);
+ }
+ else {
+ /* anisotropic */
+ float3 X, Y, Z = N;
+ make_orthonormals_tangent(Z, bsdf->T, &X, &Y);
+
+ /* distribution */
+ float3 local_m = make_float3(dot(X, m), dot(Y, m), dot(Z, m));
+ float slope_x = -local_m.x / (local_m.z * alpha_x);
+ float slope_y = -local_m.y / (local_m.z * alpha_y);
+
+ float cosThetaM = local_m.z;
+ float cosThetaM2 = cosThetaM * cosThetaM;
+ float cosThetaM4 = cosThetaM2 * cosThetaM2;
+
+ D = expf(-slope_x * slope_x - slope_y * slope_y) / (M_PI_F * alpha2 * cosThetaM4);
+
+ /* G1(i,m) and G1(o,m) */
+ G1o = bsdf_beckmann_aniso_G1(alpha_x, alpha_y, cosNO, dot(I, X), dot(I, Y));
+ G1i = bsdf_beckmann_aniso_G1(alpha_x, alpha_y, cosNI, dot(omega_in, X), dot(omega_in, Y));
+ }
+
+ float G = G1o * G1i;
+
+ /* eq. 20 */
+ float common = D * 0.25f / cosNO;
+ float out = G * common;
+
+ /* eq. 2 in distribution of visible normals sampling
+ * pm = Dw = G1o * dot(m, I) * D / dot(N, I); */
+
+ /* eq. 38 - but see also:
+ * eq. 17 in http://www.graphics.cornell.edu/~bjw/wardnotes.pdf
+ * pdf = pm * 0.25 / dot(m, I); */
+ *pdf = G1o * common;
+
+ return make_float3(out, out, out);
+ }
+
+ return make_float3(0.0f, 0.0f, 0.0f);
}
-ccl_device float3 bsdf_microfacet_beckmann_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_microfacet_beckmann_eval_transmit(const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ float *pdf)
{
- const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
- float alpha_x = bsdf->alpha_x;
- float alpha_y = bsdf->alpha_y;
- float m_eta = bsdf->ior;
- bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID;
- float3 N = bsdf->N;
-
- if(!m_refractive || alpha_x*alpha_y <= 1e-7f)
- return make_float3(0.0f, 0.0f, 0.0f);
-
- float cosNO = dot(N, I);
- float cosNI = dot(N, omega_in);
-
- if(cosNO <= 0 || cosNI >= 0)
- return make_float3(0.0f, 0.0f, 0.0f);
-
- /* compute half-vector of the refraction (eq. 16) */
- float3 ht = -(m_eta * omega_in + I);
- float3 Ht = normalize(ht);
- float cosHO = dot(Ht, I);
- float cosHI = dot(Ht, omega_in);
-
- /* eq. 25: first we calculate D(m) with m=Ht: */
- float alpha2 = alpha_x * alpha_y;
- float cosThetaM = min(dot(N, Ht), 1.0f);
- float cosThetaM2 = cosThetaM * cosThetaM;
- float tanThetaM2 = (1 - cosThetaM2) / cosThetaM2;
- float cosThetaM4 = cosThetaM2 * cosThetaM2;
- float D = expf(-tanThetaM2 / alpha2) / (M_PI_F * alpha2 * cosThetaM4);
-
- /* eq. 26, 27: now calculate G1(i,m) and G1(o,m) */
- float G1o = bsdf_beckmann_G1(alpha_x, cosNO);
- float G1i = bsdf_beckmann_G1(alpha_x, cosNI);
- float G = G1o * G1i;
-
- /* probability */
- float Ht2 = dot(ht, ht);
-
- /* eq. 2 in distribution of visible normals sampling
- * pm = Dw = G1o * dot(m, I) * D / dot(N, I); */
-
- /* out = fabsf(cosHI * cosHO) * (m_eta * m_eta) * G * D / (cosNO * Ht2)
- * pdf = pm * (m_eta * m_eta) * fabsf(cosHI) / Ht2 */
- float common = D * (m_eta * m_eta) / (cosNO * Ht2);
- float out = G * fabsf(cosHI * cosHO) * common;
- *pdf = G1o * fabsf(cosHO * cosHI) * common;
-
- return make_float3(out, out, out);
+ const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+ float alpha_x = bsdf->alpha_x;
+ float alpha_y = bsdf->alpha_y;
+ float m_eta = bsdf->ior;
+ bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID;
+ float3 N = bsdf->N;
+
+ if (!m_refractive || alpha_x * alpha_y <= 1e-7f)
+ return make_float3(0.0f, 0.0f, 0.0f);
+
+ float cosNO = dot(N, I);
+ float cosNI = dot(N, omega_in);
+
+ if (cosNO <= 0 || cosNI >= 0)
+ return make_float3(0.0f, 0.0f, 0.0f);
+
+ /* compute half-vector of the refraction (eq. 16) */
+ float3 ht = -(m_eta * omega_in + I);
+ float3 Ht = normalize(ht);
+ float cosHO = dot(Ht, I);
+ float cosHI = dot(Ht, omega_in);
+
+ /* eq. 25: first we calculate D(m) with m=Ht: */
+ float alpha2 = alpha_x * alpha_y;
+ float cosThetaM = min(dot(N, Ht), 1.0f);
+ float cosThetaM2 = cosThetaM * cosThetaM;
+ float tanThetaM2 = (1 - cosThetaM2) / cosThetaM2;
+ float cosThetaM4 = cosThetaM2 * cosThetaM2;
+ float D = expf(-tanThetaM2 / alpha2) / (M_PI_F * alpha2 * cosThetaM4);
+
+ /* eq. 26, 27: now calculate G1(i,m) and G1(o,m) */
+ float G1o = bsdf_beckmann_G1(alpha_x, cosNO);
+ float G1i = bsdf_beckmann_G1(alpha_x, cosNI);
+ float G = G1o * G1i;
+
+ /* probability */
+ float Ht2 = dot(ht, ht);
+
+ /* eq. 2 in distribution of visible normals sampling
+ * pm = Dw = G1o * dot(m, I) * D / dot(N, I); */
+
+ /* out = fabsf(cosHI * cosHO) * (m_eta * m_eta) * G * D / (cosNO * Ht2)
+ * pdf = pm * (m_eta * m_eta) * fabsf(cosHI) / Ht2 */
+ float common = D * (m_eta * m_eta) / (cosNO * Ht2);
+ float out = G * fabsf(cosHI * cosHO) * common;
+ *pdf = G1o * fabsf(cosHO * cosHI) * common;
+
+ return make_float3(out, out, out);
}
-ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
+ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg,
+ const ShaderClosure *sc,
+ float3 Ng,
+ float3 I,
+ float3 dIdx,
+ float3 dIdy,
+ float randu,
+ float randv,
+ float3 *eval,
+ float3 *omega_in,
+ float3 *domega_in_dx,
+ float3 *domega_in_dy,
+ float *pdf)
{
- const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
- float alpha_x = bsdf->alpha_x;
- float alpha_y = bsdf->alpha_y;
- bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID;
- float3 N = bsdf->N;
- int label;
-
- float cosNO = dot(N, I);
- if(cosNO > 0) {
- float3 X, Y, Z = N;
-
- if(alpha_x == alpha_y)
- make_orthonormals(Z, &X, &Y);
- else
- make_orthonormals_tangent(Z, bsdf->T, &X, &Y);
-
- /* importance sampling with distribution of visible normals. vectors are
- * transformed to local space before and after */
- float3 local_I = make_float3(dot(X, I), dot(Y, I), cosNO);
- float3 local_m;
- float G1o;
-
- local_m = microfacet_sample_stretched(kg, local_I, alpha_x, alpha_x,
- randu, randv, true, &G1o);
-
- float3 m = X*local_m.x + Y*local_m.y + Z*local_m.z;
- float cosThetaM = local_m.z;
-
- /* reflection or refraction? */
- if(!m_refractive) {
- label = LABEL_REFLECT | LABEL_GLOSSY;
- float cosMO = dot(m, I);
-
- if(cosMO > 0) {
- /* eq. 39 - compute actual reflected direction */
- *omega_in = 2 * cosMO * m - I;
-
- if(dot(Ng, *omega_in) > 0) {
- if(alpha_x*alpha_y <= 1e-7f) {
- /* some high number for MIS */
- *pdf = 1e6f;
- *eval = make_float3(1e6f, 1e6f, 1e6f);
- label = LABEL_REFLECT | LABEL_SINGULAR;
- }
- else {
- /* microfacet normal is visible to this ray
- * eq. 25 */
- float alpha2 = alpha_x * alpha_y;
- float D, G1i;
-
- if(alpha_x == alpha_y) {
- /* istropic distribution */
- float cosThetaM2 = cosThetaM * cosThetaM;
- float cosThetaM4 = cosThetaM2 * cosThetaM2;
- float tanThetaM2 = 1/(cosThetaM2) - 1;
- D = expf(-tanThetaM2 / alpha2) / (M_PI_F * alpha2 * cosThetaM4);
-
- /* eval BRDF*cosNI */
- float cosNI = dot(N, *omega_in);
-
- /* eq. 26, 27: now calculate G1(i,m) */
- G1i = bsdf_beckmann_G1(alpha_x, cosNI);
- }
- else {
- /* anisotropic distribution */
- float3 local_m = make_float3(dot(X, m), dot(Y, m), dot(Z, m));
- float slope_x = -local_m.x/(local_m.z*alpha_x);
- float slope_y = -local_m.y/(local_m.z*alpha_y);
-
- float cosThetaM = local_m.z;
- float cosThetaM2 = cosThetaM * cosThetaM;
- float cosThetaM4 = cosThetaM2 * cosThetaM2;
-
- D = expf(-slope_x*slope_x - slope_y*slope_y) / (M_PI_F * alpha2 * cosThetaM4);
-
- /* G1(i,m) */
- G1i = bsdf_beckmann_aniso_G1(alpha_x, alpha_y, dot(*omega_in, N), dot(*omega_in, X), dot(*omega_in, Y));
- }
-
- float G = G1o * G1i;
-
- /* see eval function for derivation */
- float common = D * 0.25f / cosNO;
- float out = G * common;
- *pdf = G1o * common;
-
- *eval = make_float3(out, out, out);
- }
+ const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+ float alpha_x = bsdf->alpha_x;
+ float alpha_y = bsdf->alpha_y;
+ bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID;
+ float3 N = bsdf->N;
+ int label;
+
+ float cosNO = dot(N, I);
+ if (cosNO > 0) {
+ float3 X, Y, Z = N;
+
+ if (alpha_x == alpha_y)
+ make_orthonormals(Z, &X, &Y);
+ else
+ make_orthonormals_tangent(Z, bsdf->T, &X, &Y);
+
+ /* importance sampling with distribution of visible normals. vectors are
+ * transformed to local space before and after */
+ float3 local_I = make_float3(dot(X, I), dot(Y, I), cosNO);
+ float3 local_m;
+ float G1o;
+
+ local_m = microfacet_sample_stretched(kg, local_I, alpha_x, alpha_x, randu, randv, true, &G1o);
+
+ float3 m = X * local_m.x + Y * local_m.y + Z * local_m.z;
+ float cosThetaM = local_m.z;
+
+ /* reflection or refraction? */
+ if (!m_refractive) {
+ label = LABEL_REFLECT | LABEL_GLOSSY;
+ float cosMO = dot(m, I);
+
+ if (cosMO > 0) {
+ /* eq. 39 - compute actual reflected direction */
+ *omega_in = 2 * cosMO * m - I;
+
+ if (dot(Ng, *omega_in) > 0) {
+ if (alpha_x * alpha_y <= 1e-7f) {
+ /* some high number for MIS */
+ *pdf = 1e6f;
+ *eval = make_float3(1e6f, 1e6f, 1e6f);
+ label = LABEL_REFLECT | LABEL_SINGULAR;
+ }
+ else {
+ /* microfacet normal is visible to this ray
+ * eq. 25 */
+ float alpha2 = alpha_x * alpha_y;
+ float D, G1i;
+
+ if (alpha_x == alpha_y) {
+ /* istropic distribution */
+ float cosThetaM2 = cosThetaM * cosThetaM;
+ float cosThetaM4 = cosThetaM2 * cosThetaM2;
+ float tanThetaM2 = 1 / (cosThetaM2)-1;
+ D = expf(-tanThetaM2 / alpha2) / (M_PI_F * alpha2 * cosThetaM4);
+
+ /* eval BRDF*cosNI */
+ float cosNI = dot(N, *omega_in);
+
+ /* eq. 26, 27: now calculate G1(i,m) */
+ G1i = bsdf_beckmann_G1(alpha_x, cosNI);
+ }
+ else {
+ /* anisotropic distribution */
+ float3 local_m = make_float3(dot(X, m), dot(Y, m), dot(Z, m));
+ float slope_x = -local_m.x / (local_m.z * alpha_x);
+ float slope_y = -local_m.y / (local_m.z * alpha_y);
+
+ float cosThetaM = local_m.z;
+ float cosThetaM2 = cosThetaM * cosThetaM;
+ float cosThetaM4 = cosThetaM2 * cosThetaM2;
+
+ D = expf(-slope_x * slope_x - slope_y * slope_y) / (M_PI_F * alpha2 * cosThetaM4);
+
+ /* G1(i,m) */
+ G1i = bsdf_beckmann_aniso_G1(
+ alpha_x, alpha_y, dot(*omega_in, N), dot(*omega_in, X), dot(*omega_in, Y));
+ }
+
+ float G = G1o * G1i;
+
+ /* see eval function for derivation */
+ float common = D * 0.25f / cosNO;
+ float out = G * common;
+ *pdf = G1o * common;
+
+ *eval = make_float3(out, out, out);
+ }
#ifdef __RAY_DIFFERENTIALS__
- *domega_in_dx = (2 * dot(m, dIdx)) * m - dIdx;
- *domega_in_dy = (2 * dot(m, dIdy)) * m - dIdy;
+ *domega_in_dx = (2 * dot(m, dIdx)) * m - dIdx;
+ *domega_in_dy = (2 * dot(m, dIdy)) * m - dIdy;
#endif
- }
- }
- }
- else {
- label = LABEL_TRANSMIT | LABEL_GLOSSY;
-
- /* CAUTION: the i and o variables are inverted relative to the paper
- * eq. 39 - compute actual refractive direction */
- float3 R, T;
+ }
+ }
+ }
+ else {
+ label = LABEL_TRANSMIT | LABEL_GLOSSY;
+
+ /* CAUTION: the i and o variables are inverted relative to the paper
+ * eq. 39 - compute actual refractive direction */
+ float3 R, T;
#ifdef __RAY_DIFFERENTIALS__
- float3 dRdx, dRdy, dTdx, dTdy;
+ float3 dRdx, dRdy, dTdx, dTdy;
#endif
- float m_eta = bsdf->ior, fresnel;
- bool inside;
-
- fresnel = fresnel_dielectric(m_eta, m, I, &R, &T,
+ float m_eta = bsdf->ior, fresnel;
+ bool inside;
+
+ fresnel = fresnel_dielectric(m_eta,
+ m,
+ I,
+ &R,
+ &T,
#ifdef __RAY_DIFFERENTIALS__
- dIdx, dIdy, &dRdx, &dRdy, &dTdx, &dTdy,
+ dIdx,
+ dIdy,
+ &dRdx,
+ &dRdy,
+ &dTdx,
+ &dTdy,
#endif
- &inside);
+ &inside);
- if(!inside && fresnel != 1.0f) {
- *omega_in = T;
+ if (!inside && fresnel != 1.0f) {
+ *omega_in = T;
#ifdef __RAY_DIFFERENTIALS__
- *domega_in_dx = dTdx;
- *domega_in_dy = dTdy;
+ *domega_in_dx = dTdx;
+ *domega_in_dy = dTdy;
#endif
- if(alpha_x*alpha_y <= 1e-7f || fabsf(m_eta - 1.0f) < 1e-4f) {
- /* some high number for MIS */
- *pdf = 1e6f;
- *eval = make_float3(1e6f, 1e6f, 1e6f);
- label = LABEL_TRANSMIT | LABEL_SINGULAR;
- }
- else {
- /* eq. 33 */
- float alpha2 = alpha_x * alpha_y;
- float cosThetaM2 = cosThetaM * cosThetaM;
- float cosThetaM4 = cosThetaM2 * cosThetaM2;
- float tanThetaM2 = 1/(cosThetaM2) - 1;
- float D = expf(-tanThetaM2 / alpha2) / (M_PI_F * alpha2 * cosThetaM4);
-
- /* eval BRDF*cosNI */
- float cosNI = dot(N, *omega_in);
-
- /* eq. 26, 27: now calculate G1(i,m) */
- float G1i = bsdf_beckmann_G1(alpha_x, cosNI);
- float G = G1o * G1i;
-
- /* eq. 21 */
- float cosHI = dot(m, *omega_in);
- float cosHO = dot(m, I);
- float Ht2 = m_eta * cosHI + cosHO;
- Ht2 *= Ht2;
-
- /* see eval function for derivation */
- float common = D * (m_eta * m_eta) / (cosNO * Ht2);
- float out = G * fabsf(cosHI * cosHO) * common;
- *pdf = G1o * cosHO * fabsf(cosHI) * common;
-
- *eval = make_float3(out, out, out);
- }
- }
- }
- }
- else {
- label = (m_refractive) ? LABEL_TRANSMIT|LABEL_GLOSSY : LABEL_REFLECT|LABEL_GLOSSY;
- }
- return label;
+ if (alpha_x * alpha_y <= 1e-7f || fabsf(m_eta - 1.0f) < 1e-4f) {
+ /* some high number for MIS */
+ *pdf = 1e6f;
+ *eval = make_float3(1e6f, 1e6f, 1e6f);
+ label = LABEL_TRANSMIT | LABEL_SINGULAR;
+ }
+ else {
+ /* eq. 33 */
+ float alpha2 = alpha_x * alpha_y;
+ float cosThetaM2 = cosThetaM * cosThetaM;
+ float cosThetaM4 = cosThetaM2 * cosThetaM2;
+ float tanThetaM2 = 1 / (cosThetaM2)-1;
+ float D = expf(-tanThetaM2 / alpha2) / (M_PI_F * alpha2 * cosThetaM4);
+
+ /* eval BRDF*cosNI */
+ float cosNI = dot(N, *omega_in);
+
+ /* eq. 26, 27: now calculate G1(i,m) */
+ float G1i = bsdf_beckmann_G1(alpha_x, cosNI);
+ float G = G1o * G1i;
+
+ /* eq. 21 */
+ float cosHI = dot(m, *omega_in);
+ float cosHO = dot(m, I);
+ float Ht2 = m_eta * cosHI + cosHO;
+ Ht2 *= Ht2;
+
+ /* see eval function for derivation */
+ float common = D * (m_eta * m_eta) / (cosNO * Ht2);
+ float out = G * fabsf(cosHI * cosHO) * common;
+ *pdf = G1o * cosHO * fabsf(cosHI) * common;
+
+ *eval = make_float3(out, out, out);
+ }
+ }
+ }
+ }
+ else {
+ label = (m_refractive) ? LABEL_TRANSMIT | LABEL_GLOSSY : LABEL_REFLECT | LABEL_GLOSSY;
+ }
+ return label;
}
CCL_NAMESPACE_END
-#endif /* __BSDF_MICROFACET_H__ */
+#endif /* __BSDF_MICROFACET_H__ */
diff --git a/intern/cycles/kernel/closure/bsdf_microfacet_multi.h b/intern/cycles/kernel/closure/bsdf_microfacet_multi.h
index 2f2c35d5d1f..2cc1a9c5299 100644
--- a/intern/cycles/kernel/closure/bsdf_microfacet_multi.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet_multi.h
@@ -23,149 +23,168 @@ CCL_NAMESPACE_BEGIN
/* Isotropic GGX microfacet distribution */
ccl_device_forceinline float D_ggx(float3 wm, float alpha)
{
- wm.z *= wm.z;
- alpha *= alpha;
- float tmp = (1.0f - wm.z) + alpha * wm.z;
- return alpha / max(M_PI_F * tmp*tmp, 1e-7f);
+ wm.z *= wm.z;
+ alpha *= alpha;
+ float tmp = (1.0f - wm.z) + alpha * wm.z;
+ return alpha / max(M_PI_F * tmp * tmp, 1e-7f);
}
/* Anisotropic GGX microfacet distribution */
ccl_device_forceinline float D_ggx_aniso(const float3 wm, const float2 alpha)
{
- float slope_x = -wm.x/alpha.x;
- float slope_y = -wm.y/alpha.y;
- float tmp = wm.z*wm.z + slope_x*slope_x + slope_y*slope_y;
+ float slope_x = -wm.x / alpha.x;
+ float slope_y = -wm.y / alpha.y;
+ float tmp = wm.z * wm.z + slope_x * slope_x + slope_y * slope_y;
- return 1.0f / max(M_PI_F * tmp*tmp * alpha.x*alpha.y, 1e-7f);
+ return 1.0f / max(M_PI_F * tmp * tmp * alpha.x * alpha.y, 1e-7f);
}
/* Sample slope distribution (based on page 14 of the supplemental implementation). */
-ccl_device_forceinline float2 mf_sampleP22_11(const float cosI, const float randx, const float randy)
-{
- if(cosI > 0.9999f || fabsf(cosI) < 1e-6f) {
- const float r = sqrtf(randx / max(1.0f - randx, 1e-7f));
- const float phi = M_2PI_F * randy;
- return make_float2(r*cosf(phi), r*sinf(phi));
- }
-
- const float sinI = safe_sqrtf(1.0f - cosI*cosI);
- const float tanI = sinI/cosI;
- const float projA = 0.5f * (cosI + 1.0f);
- if(projA < 0.0001f)
- return make_float2(0.0f, 0.0f);
- const float A = 2.0f*randx*projA / cosI - 1.0f;
- float tmp = A*A-1.0f;
- if(fabsf(tmp) < 1e-7f)
- return make_float2(0.0f, 0.0f);
- tmp = 1.0f / tmp;
- const float D = safe_sqrtf(tanI*tanI*tmp*tmp - (A*A-tanI*tanI)*tmp);
-
- const float slopeX2 = tanI*tmp + D;
- const float slopeX = (A < 0.0f || slopeX2 > 1.0f/tanI)? (tanI*tmp - D) : slopeX2;
-
- float U2;
- if(randy >= 0.5f)
- U2 = 2.0f*(randy - 0.5f);
- else
- U2 = 2.0f*(0.5f - randy);
- const float z = (U2*(U2*(U2*0.27385f-0.73369f)+0.46341f)) / (U2*(U2*(U2*0.093073f+0.309420f)-1.0f)+0.597999f);
- const float slopeY = z * sqrtf(1.0f + slopeX*slopeX);
-
- if(randy >= 0.5f)
- return make_float2(slopeX, slopeY);
- else
- return make_float2(slopeX, -slopeY);
+ccl_device_forceinline float2 mf_sampleP22_11(const float cosI,
+ const float randx,
+ const float randy)
+{
+ if (cosI > 0.9999f || fabsf(cosI) < 1e-6f) {
+ const float r = sqrtf(randx / max(1.0f - randx, 1e-7f));
+ const float phi = M_2PI_F * randy;
+ return make_float2(r * cosf(phi), r * sinf(phi));
+ }
+
+ const float sinI = safe_sqrtf(1.0f - cosI * cosI);
+ const float tanI = sinI / cosI;
+ const float projA = 0.5f * (cosI + 1.0f);
+ if (projA < 0.0001f)
+ return make_float2(0.0f, 0.0f);
+ const float A = 2.0f * randx * projA / cosI - 1.0f;
+ float tmp = A * A - 1.0f;
+ if (fabsf(tmp) < 1e-7f)
+ return make_float2(0.0f, 0.0f);
+ tmp = 1.0f / tmp;
+ const float D = safe_sqrtf(tanI * tanI * tmp * tmp - (A * A - tanI * tanI) * tmp);
+
+ const float slopeX2 = tanI * tmp + D;
+ const float slopeX = (A < 0.0f || slopeX2 > 1.0f / tanI) ? (tanI * tmp - D) : slopeX2;
+
+ float U2;
+ if (randy >= 0.5f)
+ U2 = 2.0f * (randy - 0.5f);
+ else
+ U2 = 2.0f * (0.5f - randy);
+ const float z = (U2 * (U2 * (U2 * 0.27385f - 0.73369f) + 0.46341f)) /
+ (U2 * (U2 * (U2 * 0.093073f + 0.309420f) - 1.0f) + 0.597999f);
+ const float slopeY = z * sqrtf(1.0f + slopeX * slopeX);
+
+ if (randy >= 0.5f)
+ return make_float2(slopeX, slopeY);
+ else
+ return make_float2(slopeX, -slopeY);
}
/* Visible normal sampling for the GGX distribution (based on page 7 of the supplemental implementation). */
-ccl_device_forceinline float3 mf_sample_vndf(const float3 wi, const float2 alpha, const float randx, const float randy)
+ccl_device_forceinline float3 mf_sample_vndf(const float3 wi,
+ const float2 alpha,
+ const float randx,
+ const float randy)
{
- const float3 wi_11 = normalize(make_float3(alpha.x*wi.x, alpha.y*wi.y, wi.z));
- const float2 slope_11 = mf_sampleP22_11(wi_11.z, randx, randy);
+ const float3 wi_11 = normalize(make_float3(alpha.x * wi.x, alpha.y * wi.y, wi.z));
+ const float2 slope_11 = mf_sampleP22_11(wi_11.z, randx, randy);
- const float3 cossin_phi = safe_normalize(make_float3(wi_11.x, wi_11.y, 0.0f));
- const float slope_x = alpha.x*(cossin_phi.x * slope_11.x - cossin_phi.y * slope_11.y);
- const float slope_y = alpha.y*(cossin_phi.y * slope_11.x + cossin_phi.x * slope_11.y);
+ const float3 cossin_phi = safe_normalize(make_float3(wi_11.x, wi_11.y, 0.0f));
+ const float slope_x = alpha.x * (cossin_phi.x * slope_11.x - cossin_phi.y * slope_11.y);
+ const float slope_y = alpha.y * (cossin_phi.y * slope_11.x + cossin_phi.x * slope_11.y);
- kernel_assert(isfinite(slope_x));
- return normalize(make_float3(-slope_x, -slope_y, 1.0f));
+ kernel_assert(isfinite(slope_x));
+ return normalize(make_float3(-slope_x, -slope_y, 1.0f));
}
/* === Phase functions: Glossy and Glass === */
/* Phase function for reflective materials. */
-ccl_device_forceinline float3 mf_sample_phase_glossy(const float3 wi, float3 *weight, const float3 wm)
+ccl_device_forceinline float3 mf_sample_phase_glossy(const float3 wi,
+ float3 *weight,
+ const float3 wm)
{
- return -wi + 2.0f * wm * dot(wi, wm);
+ return -wi + 2.0f * wm * dot(wi, wm);
}
-ccl_device_forceinline float3 mf_eval_phase_glossy(const float3 w, const float lambda, const float3 wo, const float2 alpha)
+ccl_device_forceinline float3 mf_eval_phase_glossy(const float3 w,
+ const float lambda,
+ const float3 wo,
+ const float2 alpha)
{
- if(w.z > 0.9999f)
- return make_float3(0.0f, 0.0f, 0.0f);
+ if (w.z > 0.9999f)
+ return make_float3(0.0f, 0.0f, 0.0f);
- const float3 wh = normalize(wo - w);
- if(wh.z < 0.0f)
- return make_float3(0.0f, 0.0f, 0.0f);
+ const float3 wh = normalize(wo - w);
+ if (wh.z < 0.0f)
+ return make_float3(0.0f, 0.0f, 0.0f);
- float pArea = (w.z < -0.9999f)? 1.0f: lambda*w.z;
+ float pArea = (w.z < -0.9999f) ? 1.0f : lambda * w.z;
- const float dotW_WH = dot(-w, wh);
- if(dotW_WH < 0.0f)
- return make_float3(0.0f, 0.0f, 0.0f);
+ const float dotW_WH = dot(-w, wh);
+ if (dotW_WH < 0.0f)
+ return make_float3(0.0f, 0.0f, 0.0f);
- float phase = max(0.0f, dotW_WH) * 0.25f / max(pArea * dotW_WH, 1e-7f);
- if(alpha.x == alpha.y)
- phase *= D_ggx(wh, alpha.x);
- else
- phase *= D_ggx_aniso(wh, alpha);
+ float phase = max(0.0f, dotW_WH) * 0.25f / max(pArea * dotW_WH, 1e-7f);
+ if (alpha.x == alpha.y)
+ phase *= D_ggx(wh, alpha.x);
+ else
+ phase *= D_ggx_aniso(wh, alpha);
- return make_float3(phase, phase, phase);
+ return make_float3(phase, phase, phase);
}
/* Phase function for dielectric transmissive materials, including both reflection and refraction according to the dielectric fresnel term. */
-ccl_device_forceinline float3 mf_sample_phase_glass(const float3 wi, const float eta, const float3 wm, const float randV, bool *outside)
-{
- float cosI = dot(wi, wm);
- float f = fresnel_dielectric_cos(cosI, eta);
- if(randV < f) {
- *outside = true;
- return -wi + 2.0f * wm * cosI;
- }
- *outside = false;
- float inv_eta = 1.0f/eta;
- float cosT = -safe_sqrtf(1.0f - (1.0f - cosI*cosI) * inv_eta*inv_eta);
- return normalize(wm*(cosI*inv_eta + cosT) - wi*inv_eta);
-}
-
-ccl_device_forceinline float3 mf_eval_phase_glass(const float3 w, const float lambda, const float3 wo, const bool wo_outside, const float2 alpha, const float eta)
-{
- if(w.z > 0.9999f)
- return make_float3(0.0f, 0.0f, 0.0f);
-
- float pArea = (w.z < -0.9999f)? 1.0f: lambda*w.z;
- float v;
- if(wo_outside) {
- const float3 wh = normalize(wo - w);
- if(wh.z < 0.0f)
- return make_float3(0.0f, 0.0f, 0.0f);
-
- const float dotW_WH = dot(-w, wh);
- v = fresnel_dielectric_cos(dotW_WH, eta) * max(0.0f, dotW_WH) * D_ggx(wh, alpha.x) * 0.25f / (pArea * dotW_WH);
- }
- else {
- float3 wh = normalize(wo*eta - w);
- if(wh.z < 0.0f)
- wh = -wh;
- const float dotW_WH = dot(-w, wh), dotWO_WH = dot(wo, wh);
- if(dotW_WH < 0.0f)
- return make_float3(0.0f, 0.0f, 0.0f);
-
- float temp = dotW_WH + eta*dotWO_WH;
- v = (1.0f - fresnel_dielectric_cos(dotW_WH, eta)) * max(0.0f, dotW_WH) * max(0.0f, -dotWO_WH) * D_ggx(wh, alpha.x) / (pArea * temp * temp);
- }
-
- return make_float3(v, v, v);
+ccl_device_forceinline float3 mf_sample_phase_glass(
+ const float3 wi, const float eta, const float3 wm, const float randV, bool *outside)
+{
+ float cosI = dot(wi, wm);
+ float f = fresnel_dielectric_cos(cosI, eta);
+ if (randV < f) {
+ *outside = true;
+ return -wi + 2.0f * wm * cosI;
+ }
+ *outside = false;
+ float inv_eta = 1.0f / eta;
+ float cosT = -safe_sqrtf(1.0f - (1.0f - cosI * cosI) * inv_eta * inv_eta);
+ return normalize(wm * (cosI * inv_eta + cosT) - wi * inv_eta);
+}
+
+ccl_device_forceinline float3 mf_eval_phase_glass(const float3 w,
+ const float lambda,
+ const float3 wo,
+ const bool wo_outside,
+ const float2 alpha,
+ const float eta)
+{
+ if (w.z > 0.9999f)
+ return make_float3(0.0f, 0.0f, 0.0f);
+
+ float pArea = (w.z < -0.9999f) ? 1.0f : lambda * w.z;
+ float v;
+ if (wo_outside) {
+ const float3 wh = normalize(wo - w);
+ if (wh.z < 0.0f)
+ return make_float3(0.0f, 0.0f, 0.0f);
+
+ const float dotW_WH = dot(-w, wh);
+ v = fresnel_dielectric_cos(dotW_WH, eta) * max(0.0f, dotW_WH) * D_ggx(wh, alpha.x) * 0.25f /
+ (pArea * dotW_WH);
+ }
+ else {
+ float3 wh = normalize(wo * eta - w);
+ if (wh.z < 0.0f)
+ wh = -wh;
+ const float dotW_WH = dot(-w, wh), dotWO_WH = dot(wo, wh);
+ if (dotW_WH < 0.0f)
+ return make_float3(0.0f, 0.0f, 0.0f);
+
+ float temp = dotW_WH + eta * dotWO_WH;
+ v = (1.0f - fresnel_dielectric_cos(dotW_WH, eta)) * max(0.0f, dotW_WH) * max(0.0f, -dotWO_WH) *
+ D_ggx(wh, alpha.x) / (pArea * temp * temp);
+ }
+
+ return make_float3(v, v, v);
}
/* === Utility functions for the random walks === */
@@ -173,64 +192,65 @@ ccl_device_forceinline float3 mf_eval_phase_glass(const float3 w, const float la
/* Smith Lambda function for GGX (based on page 12 of the supplemental implementation). */
ccl_device_forceinline float mf_lambda(const float3 w, const float2 alpha)
{
- if(w.z > 0.9999f)
- return 0.0f;
- else if(w.z < -0.9999f)
- return -0.9999f;
+ if (w.z > 0.9999f)
+ return 0.0f;
+ else if (w.z < -0.9999f)
+ return -0.9999f;
- const float inv_wz2 = 1.0f / max(w.z*w.z, 1e-7f);
- const float2 wa = make_float2(w.x, w.y)*alpha;
- float v = sqrtf(1.0f + dot(wa, wa) * inv_wz2);
- if(w.z <= 0.0f)
- v = -v;
+ const float inv_wz2 = 1.0f / max(w.z * w.z, 1e-7f);
+ const float2 wa = make_float2(w.x, w.y) * alpha;
+ float v = sqrtf(1.0f + dot(wa, wa) * inv_wz2);
+ if (w.z <= 0.0f)
+ v = -v;
- return 0.5f*(v - 1.0f);
+ return 0.5f * (v - 1.0f);
}
/* Height distribution CDF (based on page 4 of the supplemental implementation). */
ccl_device_forceinline float mf_invC1(const float h)
{
- return 2.0f * saturate(h) - 1.0f;
+ return 2.0f * saturate(h) - 1.0f;
}
ccl_device_forceinline float mf_C1(const float h)
{
- return saturate(0.5f * (h + 1.0f));
+ return saturate(0.5f * (h + 1.0f));
}
/* Masking function (based on page 16 of the supplemental implementation). */
ccl_device_forceinline float mf_G1(const float3 w, const float C1, const float lambda)
{
- if(w.z > 0.9999f)
- return 1.0f;
- if(w.z < 1e-5f)
- return 0.0f;
- return powf(C1, lambda);
+ if (w.z > 0.9999f)
+ return 1.0f;
+ if (w.z < 1e-5f)
+ return 0.0f;
+ return powf(C1, lambda);
}
/* Sampling from the visible height distribution (based on page 17 of the supplemental implementation). */
-ccl_device_forceinline bool mf_sample_height(const float3 w, float *h, float *C1, float *G1, float *lambda, const float U)
-{
- if(w.z > 0.9999f)
- return false;
- if(w.z < -0.9999f) {
- *C1 *= U;
- *h = mf_invC1(*C1);
- *G1 = mf_G1(w, *C1, *lambda);
- }
- else if(fabsf(w.z) >= 0.0001f) {
- if(U > 1.0f - *G1)
- return false;
- if(*lambda >= 0.0f) {
- *C1 = 1.0f;
- }
- else {
- *C1 *= powf(1.0f-U, -1.0f / *lambda);
- }
- *h = mf_invC1(*C1);
- *G1 = mf_G1(w, *C1, *lambda);
- }
- return true;
+ccl_device_forceinline bool mf_sample_height(
+ const float3 w, float *h, float *C1, float *G1, float *lambda, const float U)
+{
+ if (w.z > 0.9999f)
+ return false;
+ if (w.z < -0.9999f) {
+ *C1 *= U;
+ *h = mf_invC1(*C1);
+ *G1 = mf_G1(w, *C1, *lambda);
+ }
+ else if (fabsf(w.z) >= 0.0001f) {
+ if (U > 1.0f - *G1)
+ return false;
+ if (*lambda >= 0.0f) {
+ *C1 = 1.0f;
+ }
+ else {
+ *C1 *= powf(1.0f - U, -1.0f / *lambda);
+ }
+ *h = mf_invC1(*C1);
+ *G1 = mf_G1(w, *C1, *lambda);
+ }
+ return true;
}
/* === PDF approximations for the different phase functions. ===
@@ -240,80 +260,92 @@ ccl_device_forceinline bool mf_sample_height(const float3 w, float *h, float *C1
* the missing energy is then approximated as a diffuse reflection for the PDF. */
ccl_device_forceinline float mf_ggx_albedo(float r)
{
- float albedo = 0.806495f*expf(-1.98712f*r*r) + 0.199531f;
- albedo -= ((((((1.76741f*r - 8.43891f)*r + 15.784f)*r - 14.398f)*r + 6.45221f)*r - 1.19722f)*r + 0.027803f)*r + 0.00568739f;
- return saturate(albedo);
+ float albedo = 0.806495f * expf(-1.98712f * r * r) + 0.199531f;
+ albedo -= ((((((1.76741f * r - 8.43891f) * r + 15.784f) * r - 14.398f) * r + 6.45221f) * r -
+ 1.19722f) *
+ r +
+ 0.027803f) *
+ r +
+ 0.00568739f;
+ return saturate(albedo);
}
ccl_device_inline float mf_ggx_transmission_albedo(float a, float ior)
{
- if(ior < 1.0f) {
- ior = 1.0f/ior;
- }
- a = saturate(a);
- ior = clamp(ior, 1.0f, 3.0f);
- float I_1 = 0.0476898f*expf(-0.978352f*(ior-0.65657f)*(ior-0.65657f)) - 0.033756f*ior + 0.993261f;
- float R_1 = (((0.116991f*a - 0.270369f)*a + 0.0501366f)*a - 0.00411511f)*a + 1.00008f;
- float I_2 = (((-2.08704f*ior + 26.3298f)*ior - 127.906f)*ior + 292.958f)*ior - 287.946f + 199.803f/(ior*ior) - 101.668f/(ior*ior*ior);
- float R_2 = ((((5.3725f*a -24.9307f)*a + 22.7437f)*a - 3.40751f)*a + 0.0986325f)*a + 0.00493504f;
-
- return saturate(1.0f + I_2*R_2*0.0019127f - (1.0f - I_1)*(1.0f - R_1)*9.3205f);
+ if (ior < 1.0f) {
+ ior = 1.0f / ior;
+ }
+ a = saturate(a);
+ ior = clamp(ior, 1.0f, 3.0f);
+ float I_1 = 0.0476898f * expf(-0.978352f * (ior - 0.65657f) * (ior - 0.65657f)) -
+ 0.033756f * ior + 0.993261f;
+ float R_1 = (((0.116991f * a - 0.270369f) * a + 0.0501366f) * a - 0.00411511f) * a + 1.00008f;
+ float I_2 = (((-2.08704f * ior + 26.3298f) * ior - 127.906f) * ior + 292.958f) * ior - 287.946f +
+ 199.803f / (ior * ior) - 101.668f / (ior * ior * ior);
+ float R_2 = ((((5.3725f * a - 24.9307f) * a + 22.7437f) * a - 3.40751f) * a + 0.0986325f) * a +
+ 0.00493504f;
+
+ return saturate(1.0f + I_2 * R_2 * 0.0019127f - (1.0f - I_1) * (1.0f - R_1) * 9.3205f);
}
ccl_device_forceinline float mf_ggx_pdf(const float3 wi, const float3 wo, const float alpha)
{
- float D = D_ggx(normalize(wi+wo), alpha);
- float lambda = mf_lambda(wi, make_float2(alpha, alpha));
- float singlescatter = 0.25f * D / max((1.0f + lambda) * wi.z, 1e-7f);
+ float D = D_ggx(normalize(wi + wo), alpha);
+ float lambda = mf_lambda(wi, make_float2(alpha, alpha));
+ float singlescatter = 0.25f * D / max((1.0f + lambda) * wi.z, 1e-7f);
- float multiscatter = wo.z * M_1_PI_F;
+ float multiscatter = wo.z * M_1_PI_F;
- float albedo = mf_ggx_albedo(alpha);
- return albedo*singlescatter + (1.0f - albedo)*multiscatter;
+ float albedo = mf_ggx_albedo(alpha);
+ return albedo * singlescatter + (1.0f - albedo) * multiscatter;
}
ccl_device_forceinline float mf_ggx_aniso_pdf(const float3 wi, const float3 wo, const float2 alpha)
{
- float D = D_ggx_aniso(normalize(wi+wo), alpha);
- float lambda = mf_lambda(wi, alpha);
- float singlescatter = 0.25f * D / max((1.0f + lambda) * wi.z, 1e-7f);
+ float D = D_ggx_aniso(normalize(wi + wo), alpha);
+ float lambda = mf_lambda(wi, alpha);
+ float singlescatter = 0.25f * D / max((1.0f + lambda) * wi.z, 1e-7f);
- float multiscatter = wo.z * M_1_PI_F;
+ float multiscatter = wo.z * M_1_PI_F;
- float albedo = mf_ggx_albedo(sqrtf(alpha.x*alpha.y));
- return albedo*singlescatter + (1.0f - albedo)*multiscatter;
+ float albedo = mf_ggx_albedo(sqrtf(alpha.x * alpha.y));
+ return albedo * singlescatter + (1.0f - albedo) * multiscatter;
}
-ccl_device_forceinline float mf_glass_pdf(const float3 wi, const float3 wo, const float alpha, const float eta)
+ccl_device_forceinline float mf_glass_pdf(const float3 wi,
+ const float3 wo,
+ const float alpha,
+ const float eta)
{
- bool reflective = (wi.z*wo.z > 0.0f);
-
- float wh_len;
- float3 wh = normalize_len(wi + (reflective? wo : (wo*eta)), &wh_len);
- if(wh.z < 0.0f)
- wh = -wh;
- float3 r_wi = (wi.z < 0.0f)? -wi: wi;
- float lambda = mf_lambda(r_wi, make_float2(alpha, alpha));
- float D = D_ggx(wh, alpha);
- float fresnel = fresnel_dielectric_cos(dot(r_wi, wh), eta);
-
- float multiscatter = fabsf(wo.z * M_1_PI_F);
- if(reflective) {
- float singlescatter = 0.25f * D / max((1.0f + lambda) * r_wi.z, 1e-7f);
- float albedo = mf_ggx_albedo(alpha);
- return fresnel * (albedo*singlescatter + (1.0f - albedo)*multiscatter);
- }
- else {
- float singlescatter = fabsf(dot(r_wi, wh)*dot(wo, wh) * D * eta*eta / max((1.0f + lambda) * r_wi.z * wh_len*wh_len, 1e-7f));
- float albedo = mf_ggx_transmission_albedo(alpha, eta);
- return (1.0f - fresnel) * (albedo*singlescatter + (1.0f - albedo)*multiscatter);
- }
+ bool reflective = (wi.z * wo.z > 0.0f);
+
+ float wh_len;
+ float3 wh = normalize_len(wi + (reflective ? wo : (wo * eta)), &wh_len);
+ if (wh.z < 0.0f)
+ wh = -wh;
+ float3 r_wi = (wi.z < 0.0f) ? -wi : wi;
+ float lambda = mf_lambda(r_wi, make_float2(alpha, alpha));
+ float D = D_ggx(wh, alpha);
+ float fresnel = fresnel_dielectric_cos(dot(r_wi, wh), eta);
+
+ float multiscatter = fabsf(wo.z * M_1_PI_F);
+ if (reflective) {
+ float singlescatter = 0.25f * D / max((1.0f + lambda) * r_wi.z, 1e-7f);
+ float albedo = mf_ggx_albedo(alpha);
+ return fresnel * (albedo * singlescatter + (1.0f - albedo) * multiscatter);
+ }
+ else {
+ float singlescatter = fabsf(dot(r_wi, wh) * dot(wo, wh) * D * eta * eta /
+ max((1.0f + lambda) * r_wi.z * wh_len * wh_len, 1e-7f));
+ float albedo = mf_ggx_transmission_albedo(alpha, eta);
+ return (1.0f - fresnel) * (albedo * singlescatter + (1.0f - albedo) * multiscatter);
+ }
}
/* === Actual random walk implementations, one version of mf_eval and mf_sample per phase function. === */
-#define MF_NAME_JOIN(x,y) x ## _ ## y
-#define MF_NAME_EVAL(x,y) MF_NAME_JOIN(x,y)
+#define MF_NAME_JOIN(x, y) x##_##y
+#define MF_NAME_EVAL(x, y) MF_NAME_JOIN(x, y)
#define MF_FUNCTION_FULL_NAME(prefix) MF_NAME_EVAL(prefix, MF_PHASE_FUNCTION)
#define MF_PHASE_FUNCTION glass
@@ -326,10 +358,10 @@ ccl_device_forceinline float mf_glass_pdf(const float3 wi, const float3 wo, cons
ccl_device void bsdf_microfacet_multi_ggx_blur(ShaderClosure *sc, float roughness)
{
- MicrofacetBsdf *bsdf = (MicrofacetBsdf*)sc;
+ MicrofacetBsdf *bsdf = (MicrofacetBsdf *)sc;
- bsdf->alpha_x = fmaxf(roughness, bsdf->alpha_x);
- bsdf->alpha_y = fmaxf(roughness, bsdf->alpha_y);
+ bsdf->alpha_x = fmaxf(roughness, bsdf->alpha_x);
+ bsdf->alpha_y = fmaxf(roughness, bsdf->alpha_y);
}
/* === Closure implementations === */
@@ -338,293 +370,395 @@ ccl_device void bsdf_microfacet_multi_ggx_blur(ShaderClosure *sc, float roughnes
ccl_device int bsdf_microfacet_multi_ggx_common_setup(MicrofacetBsdf *bsdf)
{
- bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f);
- bsdf->alpha_y = clamp(bsdf->alpha_y, 1e-4f, 1.0f);
- bsdf->extra->color.x = saturate(bsdf->extra->color.x);
- bsdf->extra->color.y = saturate(bsdf->extra->color.y);
- bsdf->extra->color.z = saturate(bsdf->extra->color.z);
- bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x);
- bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y);
- bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z);
+ bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f);
+ bsdf->alpha_y = clamp(bsdf->alpha_y, 1e-4f, 1.0f);
+ bsdf->extra->color.x = saturate(bsdf->extra->color.x);
+ bsdf->extra->color.y = saturate(bsdf->extra->color.y);
+ bsdf->extra->color.z = saturate(bsdf->extra->color.z);
+ bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x);
+ bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y);
+ bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z);
- return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_NEEDS_LCG;
+ return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_NEEDS_LCG;
}
ccl_device int bsdf_microfacet_multi_ggx_aniso_setup(MicrofacetBsdf *bsdf)
{
- if(is_zero(bsdf->T))
- bsdf->T = make_float3(1.0f, 0.0f, 0.0f);
+ if (is_zero(bsdf->T))
+ bsdf->T = make_float3(1.0f, 0.0f, 0.0f);
- bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID;
+ bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID;
- return bsdf_microfacet_multi_ggx_common_setup(bsdf);
+ return bsdf_microfacet_multi_ggx_common_setup(bsdf);
}
-ccl_device int bsdf_microfacet_multi_ggx_aniso_fresnel_setup(MicrofacetBsdf *bsdf, const ShaderData *sd)
+ccl_device int bsdf_microfacet_multi_ggx_aniso_fresnel_setup(MicrofacetBsdf *bsdf,
+ const ShaderData *sd)
{
- if(is_zero(bsdf->T))
- bsdf->T = make_float3(1.0f, 0.0f, 0.0f);
+ if (is_zero(bsdf->T))
+ bsdf->T = make_float3(1.0f, 0.0f, 0.0f);
- bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID;
+ bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID;
- float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior);
- float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0));
- bsdf->sample_weight *= F;
+ float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior);
+ float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0));
+ bsdf->sample_weight *= F;
- return bsdf_microfacet_multi_ggx_common_setup(bsdf);
+ return bsdf_microfacet_multi_ggx_common_setup(bsdf);
}
ccl_device int bsdf_microfacet_multi_ggx_setup(MicrofacetBsdf *bsdf)
{
- bsdf->alpha_y = bsdf->alpha_x;
+ bsdf->alpha_y = bsdf->alpha_x;
- bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID;
+ bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID;
- return bsdf_microfacet_multi_ggx_common_setup(bsdf);
+ return bsdf_microfacet_multi_ggx_common_setup(bsdf);
}
ccl_device int bsdf_microfacet_multi_ggx_fresnel_setup(MicrofacetBsdf *bsdf, const ShaderData *sd)
{
- bsdf->alpha_y = bsdf->alpha_x;
+ bsdf->alpha_y = bsdf->alpha_x;
- bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID;
+ bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID;
- float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior);
- float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0));
- bsdf->sample_weight *= F;
+ float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior);
+ float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0));
+ bsdf->sample_weight *= F;
- return bsdf_microfacet_multi_ggx_common_setup(bsdf);
+ return bsdf_microfacet_multi_ggx_common_setup(bsdf);
}
ccl_device int bsdf_microfacet_multi_ggx_refraction_setup(MicrofacetBsdf *bsdf)
{
- bsdf->alpha_y = bsdf->alpha_x;
-
- bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID;
+ bsdf->alpha_y = bsdf->alpha_x;
- return bsdf_microfacet_multi_ggx_common_setup(bsdf);
-}
+ bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID;
-ccl_device float3 bsdf_microfacet_multi_ggx_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf, ccl_addr_space uint *lcg_state) {
- *pdf = 0.0f;
- return make_float3(0.0f, 0.0f, 0.0f);
+ return bsdf_microfacet_multi_ggx_common_setup(bsdf);
}
-ccl_device float3 bsdf_microfacet_multi_ggx_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf, ccl_addr_space uint *lcg_state) {
- const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
-
- if(bsdf->alpha_x*bsdf->alpha_y < 1e-7f) {
- return make_float3(0.0f, 0.0f, 0.0f);
- }
-
- bool use_fresnel = (bsdf->type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID);
-
- bool is_aniso = (bsdf->alpha_x != bsdf->alpha_y);
- float3 X, Y, Z;
- Z = bsdf->N;
- if(is_aniso)
- make_orthonormals_tangent(Z, bsdf->T, &X, &Y);
- else
- make_orthonormals(Z, &X, &Y);
-
- float3 localI = make_float3(dot(I, X), dot(I, Y), dot(I, Z));
- float3 localO = make_float3(dot(omega_in, X), dot(omega_in, Y), dot(omega_in, Z));
-
- if(is_aniso)
- *pdf = mf_ggx_aniso_pdf(localI, localO, make_float2(bsdf->alpha_x, bsdf->alpha_y));
- else
- *pdf = mf_ggx_pdf(localI, localO, bsdf->alpha_x);
- return mf_eval_glossy(localI, localO, true, bsdf->extra->color, bsdf->alpha_x, bsdf->alpha_y, lcg_state, bsdf->ior, use_fresnel, bsdf->extra->cspec0);
+ccl_device float3 bsdf_microfacet_multi_ggx_eval_transmit(const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ float *pdf,
+ ccl_addr_space uint *lcg_state)
+{
+ *pdf = 0.0f;
+ return make_float3(0.0f, 0.0f, 0.0f);
}
-ccl_device int bsdf_microfacet_multi_ggx_sample(KernelGlobals *kg, const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf, ccl_addr_space uint *lcg_state)
+ccl_device float3 bsdf_microfacet_multi_ggx_eval_reflect(const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ float *pdf,
+ ccl_addr_space uint *lcg_state)
{
- const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
+ const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+
+ if (bsdf->alpha_x * bsdf->alpha_y < 1e-7f) {
+ return make_float3(0.0f, 0.0f, 0.0f);
+ }
+
+ bool use_fresnel = (bsdf->type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID);
+
+ bool is_aniso = (bsdf->alpha_x != bsdf->alpha_y);
+ float3 X, Y, Z;
+ Z = bsdf->N;
+ if (is_aniso)
+ make_orthonormals_tangent(Z, bsdf->T, &X, &Y);
+ else
+ make_orthonormals(Z, &X, &Y);
+
+ float3 localI = make_float3(dot(I, X), dot(I, Y), dot(I, Z));
+ float3 localO = make_float3(dot(omega_in, X), dot(omega_in, Y), dot(omega_in, Z));
+
+ if (is_aniso)
+ *pdf = mf_ggx_aniso_pdf(localI, localO, make_float2(bsdf->alpha_x, bsdf->alpha_y));
+ else
+ *pdf = mf_ggx_pdf(localI, localO, bsdf->alpha_x);
+ return mf_eval_glossy(localI,
+ localO,
+ true,
+ bsdf->extra->color,
+ bsdf->alpha_x,
+ bsdf->alpha_y,
+ lcg_state,
+ bsdf->ior,
+ use_fresnel,
+ bsdf->extra->cspec0);
+}
+
+ccl_device int bsdf_microfacet_multi_ggx_sample(KernelGlobals *kg,
+ const ShaderClosure *sc,
+ float3 Ng,
+ float3 I,
+ float3 dIdx,
+ float3 dIdy,
+ float randu,
+ float randv,
+ float3 *eval,
+ float3 *omega_in,
+ float3 *domega_in_dx,
+ float3 *domega_in_dy,
+ float *pdf,
+ ccl_addr_space uint *lcg_state)
+{
+ const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
- float3 X, Y, Z;
- Z = bsdf->N;
+ float3 X, Y, Z;
+ Z = bsdf->N;
- if(bsdf->alpha_x*bsdf->alpha_y < 1e-7f) {
- *omega_in = 2*dot(Z, I)*Z - I;
- *pdf = 1e6f;
- *eval = make_float3(1e6f, 1e6f, 1e6f);
+ if (bsdf->alpha_x * bsdf->alpha_y < 1e-7f) {
+ *omega_in = 2 * dot(Z, I) * Z - I;
+ *pdf = 1e6f;
+ *eval = make_float3(1e6f, 1e6f, 1e6f);
#ifdef __RAY_DIFFERENTIALS__
- *domega_in_dx = (2 * dot(Z, dIdx)) * Z - dIdx;
- *domega_in_dy = (2 * dot(Z, dIdy)) * Z - dIdy;
+ *domega_in_dx = (2 * dot(Z, dIdx)) * Z - dIdx;
+ *domega_in_dy = (2 * dot(Z, dIdy)) * Z - dIdy;
#endif
- return LABEL_REFLECT|LABEL_SINGULAR;
- }
-
- bool use_fresnel = (bsdf->type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID);
-
- bool is_aniso = (bsdf->alpha_x != bsdf->alpha_y);
- if(is_aniso)
- make_orthonormals_tangent(Z, bsdf->T, &X, &Y);
- else
- make_orthonormals(Z, &X, &Y);
-
- float3 localI = make_float3(dot(I, X), dot(I, Y), dot(I, Z));
- float3 localO;
-
- *eval = mf_sample_glossy(localI, &localO, bsdf->extra->color, bsdf->alpha_x, bsdf->alpha_y, lcg_state, bsdf->ior, use_fresnel, bsdf->extra->cspec0);
- if(is_aniso)
- *pdf = mf_ggx_aniso_pdf(localI, localO, make_float2(bsdf->alpha_x, bsdf->alpha_y));
- else
- *pdf = mf_ggx_pdf(localI, localO, bsdf->alpha_x);
- *eval *= *pdf;
-
- *omega_in = X*localO.x + Y*localO.y + Z*localO.z;
+ return LABEL_REFLECT | LABEL_SINGULAR;
+ }
+
+ bool use_fresnel = (bsdf->type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID);
+
+ bool is_aniso = (bsdf->alpha_x != bsdf->alpha_y);
+ if (is_aniso)
+ make_orthonormals_tangent(Z, bsdf->T, &X, &Y);
+ else
+ make_orthonormals(Z, &X, &Y);
+
+ float3 localI = make_float3(dot(I, X), dot(I, Y), dot(I, Z));
+ float3 localO;
+
+ *eval = mf_sample_glossy(localI,
+ &localO,
+ bsdf->extra->color,
+ bsdf->alpha_x,
+ bsdf->alpha_y,
+ lcg_state,
+ bsdf->ior,
+ use_fresnel,
+ bsdf->extra->cspec0);
+ if (is_aniso)
+ *pdf = mf_ggx_aniso_pdf(localI, localO, make_float2(bsdf->alpha_x, bsdf->alpha_y));
+ else
+ *pdf = mf_ggx_pdf(localI, localO, bsdf->alpha_x);
+ *eval *= *pdf;
+
+ *omega_in = X * localO.x + Y * localO.y + Z * localO.z;
#ifdef __RAY_DIFFERENTIALS__
- *domega_in_dx = (2 * dot(Z, dIdx)) * Z - dIdx;
- *domega_in_dy = (2 * dot(Z, dIdy)) * Z - dIdy;
+ *domega_in_dx = (2 * dot(Z, dIdx)) * Z - dIdx;
+ *domega_in_dy = (2 * dot(Z, dIdy)) * Z - dIdy;
#endif
- return LABEL_REFLECT|LABEL_GLOSSY;
+ return LABEL_REFLECT | LABEL_GLOSSY;
}
/* Multiscattering GGX Glass closure */
ccl_device int bsdf_microfacet_multi_ggx_glass_setup(MicrofacetBsdf *bsdf)
{
- bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f);
- bsdf->alpha_y = bsdf->alpha_x;
- bsdf->ior = max(0.0f, bsdf->ior);
- bsdf->extra->color.x = saturate(bsdf->extra->color.x);
- bsdf->extra->color.y = saturate(bsdf->extra->color.y);
- bsdf->extra->color.z = saturate(bsdf->extra->color.z);
+ bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f);
+ bsdf->alpha_y = bsdf->alpha_x;
+ bsdf->ior = max(0.0f, bsdf->ior);
+ bsdf->extra->color.x = saturate(bsdf->extra->color.x);
+ bsdf->extra->color.y = saturate(bsdf->extra->color.y);
+ bsdf->extra->color.z = saturate(bsdf->extra->color.z);
- bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID;
+ bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID;
- return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_NEEDS_LCG;
+ return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_NEEDS_LCG;
}
-ccl_device int bsdf_microfacet_multi_ggx_glass_fresnel_setup(MicrofacetBsdf *bsdf, const ShaderData *sd)
+ccl_device int bsdf_microfacet_multi_ggx_glass_fresnel_setup(MicrofacetBsdf *bsdf,
+ const ShaderData *sd)
{
- bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f);
- bsdf->alpha_y = bsdf->alpha_x;
- bsdf->ior = max(0.0f, bsdf->ior);
- bsdf->extra->color.x = saturate(bsdf->extra->color.x);
- bsdf->extra->color.y = saturate(bsdf->extra->color.y);
- bsdf->extra->color.z = saturate(bsdf->extra->color.z);
- bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x);
- bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y);
- bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z);
-
- bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID;
-
- float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior);
- float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0));
- bsdf->sample_weight *= F;
-
- return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_NEEDS_LCG;
-}
-
-ccl_device float3 bsdf_microfacet_multi_ggx_glass_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf, ccl_addr_space uint *lcg_state) {
- const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
-
- if(bsdf->alpha_x*bsdf->alpha_y < 1e-7f) {
- return make_float3(0.0f, 0.0f, 0.0f);
- }
-
- float3 X, Y, Z;
- Z = bsdf->N;
- make_orthonormals(Z, &X, &Y);
-
- float3 localI = make_float3(dot(I, X), dot(I, Y), dot(I, Z));
- float3 localO = make_float3(dot(omega_in, X), dot(omega_in, Y), dot(omega_in, Z));
-
- *pdf = mf_glass_pdf(localI, localO, bsdf->alpha_x, bsdf->ior);
- return mf_eval_glass(localI, localO, false, bsdf->extra->color, bsdf->alpha_x, bsdf->alpha_y, lcg_state, bsdf->ior, false, bsdf->extra->color);
-}
-
-ccl_device float3 bsdf_microfacet_multi_ggx_glass_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf, ccl_addr_space uint *lcg_state) {
- const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
-
- if(bsdf->alpha_x*bsdf->alpha_y < 1e-7f) {
- return make_float3(0.0f, 0.0f, 0.0f);
- }
-
- bool use_fresnel = (bsdf->type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID);
-
- float3 X, Y, Z;
- Z = bsdf->N;
- make_orthonormals(Z, &X, &Y);
-
- float3 localI = make_float3(dot(I, X), dot(I, Y), dot(I, Z));
- float3 localO = make_float3(dot(omega_in, X), dot(omega_in, Y), dot(omega_in, Z));
-
- *pdf = mf_glass_pdf(localI, localO, bsdf->alpha_x, bsdf->ior);
- return mf_eval_glass(localI, localO, true, bsdf->extra->color, bsdf->alpha_x, bsdf->alpha_y, lcg_state, bsdf->ior, use_fresnel, bsdf->extra->cspec0);
-}
-
-ccl_device int bsdf_microfacet_multi_ggx_glass_sample(KernelGlobals *kg, const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf, ccl_addr_space uint *lcg_state)
+ bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f);
+ bsdf->alpha_y = bsdf->alpha_x;
+ bsdf->ior = max(0.0f, bsdf->ior);
+ bsdf->extra->color.x = saturate(bsdf->extra->color.x);
+ bsdf->extra->color.y = saturate(bsdf->extra->color.y);
+ bsdf->extra->color.z = saturate(bsdf->extra->color.z);
+ bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x);
+ bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y);
+ bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z);
+
+ bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID;
+
+ float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior);
+ float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0));
+ bsdf->sample_weight *= F;
+
+ return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_NEEDS_LCG;
+}
+
+ccl_device float3 bsdf_microfacet_multi_ggx_glass_eval_transmit(const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ float *pdf,
+ ccl_addr_space uint *lcg_state)
+{
+ const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+
+ if (bsdf->alpha_x * bsdf->alpha_y < 1e-7f) {
+ return make_float3(0.0f, 0.0f, 0.0f);
+ }
+
+ float3 X, Y, Z;
+ Z = bsdf->N;
+ make_orthonormals(Z, &X, &Y);
+
+ float3 localI = make_float3(dot(I, X), dot(I, Y), dot(I, Z));
+ float3 localO = make_float3(dot(omega_in, X), dot(omega_in, Y), dot(omega_in, Z));
+
+ *pdf = mf_glass_pdf(localI, localO, bsdf->alpha_x, bsdf->ior);
+ return mf_eval_glass(localI,
+ localO,
+ false,
+ bsdf->extra->color,
+ bsdf->alpha_x,
+ bsdf->alpha_y,
+ lcg_state,
+ bsdf->ior,
+ false,
+ bsdf->extra->color);
+}
+
+ccl_device float3 bsdf_microfacet_multi_ggx_glass_eval_reflect(const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ float *pdf,
+ ccl_addr_space uint *lcg_state)
+{
+ const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+
+ if (bsdf->alpha_x * bsdf->alpha_y < 1e-7f) {
+ return make_float3(0.0f, 0.0f, 0.0f);
+ }
+
+ bool use_fresnel = (bsdf->type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID);
+
+ float3 X, Y, Z;
+ Z = bsdf->N;
+ make_orthonormals(Z, &X, &Y);
+
+ float3 localI = make_float3(dot(I, X), dot(I, Y), dot(I, Z));
+ float3 localO = make_float3(dot(omega_in, X), dot(omega_in, Y), dot(omega_in, Z));
+
+ *pdf = mf_glass_pdf(localI, localO, bsdf->alpha_x, bsdf->ior);
+ return mf_eval_glass(localI,
+ localO,
+ true,
+ bsdf->extra->color,
+ bsdf->alpha_x,
+ bsdf->alpha_y,
+ lcg_state,
+ bsdf->ior,
+ use_fresnel,
+ bsdf->extra->cspec0);
+}
+
+ccl_device int bsdf_microfacet_multi_ggx_glass_sample(KernelGlobals *kg,
+ const ShaderClosure *sc,
+ float3 Ng,
+ float3 I,
+ float3 dIdx,
+ float3 dIdy,
+ float randu,
+ float randv,
+ float3 *eval,
+ float3 *omega_in,
+ float3 *domega_in_dx,
+ float3 *domega_in_dy,
+ float *pdf,
+ ccl_addr_space uint *lcg_state)
{
- const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
+ const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
- float3 X, Y, Z;
- Z = bsdf->N;
+ float3 X, Y, Z;
+ Z = bsdf->N;
- if(bsdf->alpha_x*bsdf->alpha_y < 1e-7f) {
- float3 R, T;
+ if (bsdf->alpha_x * bsdf->alpha_y < 1e-7f) {
+ float3 R, T;
#ifdef __RAY_DIFFERENTIALS__
- float3 dRdx, dRdy, dTdx, dTdy;
+ float3 dRdx, dRdy, dTdx, dTdy;
#endif
- bool inside;
- float fresnel = fresnel_dielectric(bsdf->ior, Z, I, &R, &T,
+ bool inside;
+ float fresnel = fresnel_dielectric(bsdf->ior,
+ Z,
+ I,
+ &R,
+ &T,
#ifdef __RAY_DIFFERENTIALS__
- dIdx, dIdy, &dRdx, &dRdy, &dTdx, &dTdy,
+ dIdx,
+ dIdy,
+ &dRdx,
+ &dRdy,
+ &dTdx,
+ &dTdy,
#endif
- &inside);
+ &inside);
- *pdf = 1e6f;
- *eval = make_float3(1e6f, 1e6f, 1e6f);
- if(randu < fresnel) {
- *omega_in = R;
+ *pdf = 1e6f;
+ *eval = make_float3(1e6f, 1e6f, 1e6f);
+ if (randu < fresnel) {
+ *omega_in = R;
#ifdef __RAY_DIFFERENTIALS__
- *domega_in_dx = dRdx;
- *domega_in_dy = dRdy;
+ *domega_in_dx = dRdx;
+ *domega_in_dy = dRdy;
#endif
- return LABEL_REFLECT|LABEL_SINGULAR;
- }
- else {
- *omega_in = T;
+ return LABEL_REFLECT | LABEL_SINGULAR;
+ }
+ else {
+ *omega_in = T;
#ifdef __RAY_DIFFERENTIALS__
- *domega_in_dx = dTdx;
- *domega_in_dy = dTdy;
+ *domega_in_dx = dTdx;
+ *domega_in_dy = dTdy;
#endif
- return LABEL_TRANSMIT|LABEL_SINGULAR;
- }
- }
-
- bool use_fresnel = (bsdf->type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID);
-
- make_orthonormals(Z, &X, &Y);
-
- float3 localI = make_float3(dot(I, X), dot(I, Y), dot(I, Z));
- float3 localO;
-
- *eval = mf_sample_glass(localI, &localO, bsdf->extra->color, bsdf->alpha_x, bsdf->alpha_y, lcg_state, bsdf->ior, use_fresnel, bsdf->extra->cspec0);
- *pdf = mf_glass_pdf(localI, localO, bsdf->alpha_x, bsdf->ior);
- *eval *= *pdf;
-
- *omega_in = X*localO.x + Y*localO.y + Z*localO.z;
- if(localO.z*localI.z > 0.0f) {
+ return LABEL_TRANSMIT | LABEL_SINGULAR;
+ }
+ }
+
+ bool use_fresnel = (bsdf->type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID);
+
+ make_orthonormals(Z, &X, &Y);
+
+ float3 localI = make_float3(dot(I, X), dot(I, Y), dot(I, Z));
+ float3 localO;
+
+ *eval = mf_sample_glass(localI,
+ &localO,
+ bsdf->extra->color,
+ bsdf->alpha_x,
+ bsdf->alpha_y,
+ lcg_state,
+ bsdf->ior,
+ use_fresnel,
+ bsdf->extra->cspec0);
+ *pdf = mf_glass_pdf(localI, localO, bsdf->alpha_x, bsdf->ior);
+ *eval *= *pdf;
+
+ *omega_in = X * localO.x + Y * localO.y + Z * localO.z;
+ if (localO.z * localI.z > 0.0f) {
#ifdef __RAY_DIFFERENTIALS__
- *domega_in_dx = (2 * dot(Z, dIdx)) * Z - dIdx;
- *domega_in_dy = (2 * dot(Z, dIdy)) * Z - dIdy;
+ *domega_in_dx = (2 * dot(Z, dIdx)) * Z - dIdx;
+ *domega_in_dy = (2 * dot(Z, dIdy)) * Z - dIdy;
#endif
- return LABEL_REFLECT|LABEL_GLOSSY;
- }
- else {
+ return LABEL_REFLECT | LABEL_GLOSSY;
+ }
+ else {
#ifdef __RAY_DIFFERENTIALS__
- float cosI = dot(Z, I);
- float dnp = max(sqrtf(1.0f - (bsdf->ior * bsdf->ior * (1.0f - cosI*cosI))), 1e-7f);
- *domega_in_dx = -(bsdf->ior * dIdx) + ((bsdf->ior - bsdf->ior * bsdf->ior * cosI / dnp) * dot(dIdx, Z)) * Z;
- *domega_in_dy = -(bsdf->ior * dIdy) + ((bsdf->ior - bsdf->ior * bsdf->ior * cosI / dnp) * dot(dIdy, Z)) * Z;
+ float cosI = dot(Z, I);
+ float dnp = max(sqrtf(1.0f - (bsdf->ior * bsdf->ior * (1.0f - cosI * cosI))), 1e-7f);
+ *domega_in_dx = -(bsdf->ior * dIdx) +
+ ((bsdf->ior - bsdf->ior * bsdf->ior * cosI / dnp) * dot(dIdx, Z)) * Z;
+ *domega_in_dy = -(bsdf->ior * dIdy) +
+ ((bsdf->ior - bsdf->ior * bsdf->ior * cosI / dnp) * dot(dIdy, Z)) * Z;
#endif
- return LABEL_TRANSMIT|LABEL_GLOSSY;
- }
+ return LABEL_TRANSMIT | LABEL_GLOSSY;
+ }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h b/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h
index 5d300ef6db5..79247ee8057 100644
--- a/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h
@@ -25,247 +25,251 @@
* energy is used. In combination with MIS, that is enough to produce an unbiased result, although
* the balance heuristic isn't necessarily optimal anymore.
*/
-ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_eval)(
- float3 wi,
- float3 wo,
- const bool wo_outside,
- const float3 color,
- const float alpha_x,
- const float alpha_y,
- ccl_addr_space uint *lcg_state,
- const float eta,
- bool use_fresnel,
- const float3 cspec0)
+ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_eval)(float3 wi,
+ float3 wo,
+ const bool wo_outside,
+ const float3 color,
+ const float alpha_x,
+ const float alpha_y,
+ ccl_addr_space uint *lcg_state,
+ const float eta,
+ bool use_fresnel,
+ const float3 cspec0)
{
- /* Evaluating for a shallower incoming direction produces less noise, and the properties of the BSDF guarantee reciprocity. */
- bool swapped = false;
+ /* Evaluating for a shallower incoming direction produces less noise, and the properties of the BSDF guarantee reciprocity. */
+ bool swapped = false;
#ifdef MF_MULTI_GLASS
- if(wi.z*wo.z < 0.0f) {
- /* Glass transmission is a special case and requires the directions to change hemisphere. */
- if(-wo.z < wi.z) {
- swapped = true;
- float3 tmp = -wo;
- wo = -wi;
- wi = tmp;
- }
- }
- else
+ if (wi.z * wo.z < 0.0f) {
+ /* Glass transmission is a special case and requires the directions to change hemisphere. */
+ if (-wo.z < wi.z) {
+ swapped = true;
+ float3 tmp = -wo;
+ wo = -wi;
+ wi = tmp;
+ }
+ }
+ else
#endif
- if(wo.z < wi.z) {
- swapped = true;
- float3 tmp = wo;
- wo = wi;
- wi = tmp;
- }
+ if (wo.z < wi.z) {
+ swapped = true;
+ float3 tmp = wo;
+ wo = wi;
+ wi = tmp;
+ }
- if(wi.z < 1e-5f || (wo.z < 1e-5f && wo_outside) || (wo.z > -1e-5f && !wo_outside))
- return make_float3(0.0f, 0.0f, 0.0f);
+ if (wi.z < 1e-5f || (wo.z < 1e-5f && wo_outside) || (wo.z > -1e-5f && !wo_outside))
+ return make_float3(0.0f, 0.0f, 0.0f);
- const float2 alpha = make_float2(alpha_x, alpha_y);
+ const float2 alpha = make_float2(alpha_x, alpha_y);
- float lambda_r = mf_lambda(-wi, alpha);
- float shadowing_lambda = mf_lambda(wo_outside? wo: -wo, alpha);
+ float lambda_r = mf_lambda(-wi, alpha);
+ float shadowing_lambda = mf_lambda(wo_outside ? wo : -wo, alpha);
- /* Analytically compute single scattering for lower noise. */
- float3 eval;
- float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
- const float3 wh = normalize(wi+wo);
+ /* Analytically compute single scattering for lower noise. */
+ float3 eval;
+ float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
+ const float3 wh = normalize(wi + wo);
#ifdef MF_MULTI_GLASS
- eval = mf_eval_phase_glass(-wi, lambda_r, wo, wo_outside, alpha, eta);
- if(wo_outside)
- eval *= -lambda_r / (shadowing_lambda - lambda_r);
- else
- eval *= -lambda_r * beta(-lambda_r, shadowing_lambda+1.0f);
-#else /* MF_MULTI_GLOSSY */
- const float G2 = 1.0f / (1.0f - (lambda_r + 1.0f) + shadowing_lambda);
- float val = G2 * 0.25f / wi.z;
- if(alpha.x == alpha.y)
- val *= D_ggx(wh, alpha.x);
- else
- val *= D_ggx_aniso(wh, alpha);
- eval = make_float3(val, val, val);
+ eval = mf_eval_phase_glass(-wi, lambda_r, wo, wo_outside, alpha, eta);
+ if (wo_outside)
+ eval *= -lambda_r / (shadowing_lambda - lambda_r);
+ else
+ eval *= -lambda_r * beta(-lambda_r, shadowing_lambda + 1.0f);
+#else /* MF_MULTI_GLOSSY */
+ const float G2 = 1.0f / (1.0f - (lambda_r + 1.0f) + shadowing_lambda);
+ float val = G2 * 0.25f / wi.z;
+ if (alpha.x == alpha.y)
+ val *= D_ggx(wh, alpha.x);
+ else
+ val *= D_ggx_aniso(wh, alpha);
+ eval = make_float3(val, val, val);
#endif
- float F0 = fresnel_dielectric_cos(1.0f, eta);
- if(use_fresnel) {
- throughput = interpolate_fresnel_color(wi, wh, eta, F0, cspec0);
+ float F0 = fresnel_dielectric_cos(1.0f, eta);
+ if (use_fresnel) {
+ throughput = interpolate_fresnel_color(wi, wh, eta, F0, cspec0);
- eval *= throughput;
- }
+ eval *= throughput;
+ }
- float3 wr = -wi;
- float hr = 1.0f;
- float C1_r = 1.0f;
- float G1_r = 0.0f;
- bool outside = true;
+ float3 wr = -wi;
+ float hr = 1.0f;
+ float C1_r = 1.0f;
+ float G1_r = 0.0f;
+ bool outside = true;
- for(int order = 0; order < 10; order++) {
- /* Sample microfacet height. */
- float height_rand = lcg_step_float_addrspace(lcg_state);
- if(!mf_sample_height(wr, &hr, &C1_r, &G1_r, &lambda_r, height_rand))
- break;
- /* Sample microfacet normal. */
- float vndf_rand_y = lcg_step_float_addrspace(lcg_state);
- float vndf_rand_x = lcg_step_float_addrspace(lcg_state);
- float3 wm = mf_sample_vndf(-wr, alpha, vndf_rand_x, vndf_rand_y);
+ for (int order = 0; order < 10; order++) {
+ /* Sample microfacet height. */
+ float height_rand = lcg_step_float_addrspace(lcg_state);
+ if (!mf_sample_height(wr, &hr, &C1_r, &G1_r, &lambda_r, height_rand))
+ break;
+ /* Sample microfacet normal. */
+ float vndf_rand_y = lcg_step_float_addrspace(lcg_state);
+ float vndf_rand_x = lcg_step_float_addrspace(lcg_state);
+ float3 wm = mf_sample_vndf(-wr, alpha, vndf_rand_x, vndf_rand_y);
#ifdef MF_MULTI_GLASS
- if(order == 0 && use_fresnel) {
- /* Evaluate amount of scattering towards wo on this microfacet. */
- float3 phase;
- if(outside)
- phase = mf_eval_phase_glass(wr, lambda_r, wo, wo_outside, alpha, eta);
- else
- phase = mf_eval_phase_glass(wr, lambda_r, -wo, !wo_outside, alpha, 1.0f / eta);
+ if (order == 0 && use_fresnel) {
+ /* Evaluate amount of scattering towards wo on this microfacet. */
+ float3 phase;
+ if (outside)
+ phase = mf_eval_phase_glass(wr, lambda_r, wo, wo_outside, alpha, eta);
+ else
+ phase = mf_eval_phase_glass(wr, lambda_r, -wo, !wo_outside, alpha, 1.0f / eta);
- eval = throughput * phase * mf_G1(wo_outside ? wo : -wo, mf_C1((outside == wo_outside) ? hr : -hr), shadowing_lambda);
- }
+ eval = throughput * phase *
+ mf_G1(wo_outside ? wo : -wo,
+ mf_C1((outside == wo_outside) ? hr : -hr),
+ shadowing_lambda);
+ }
#endif
- if(order > 0) {
- /* Evaluate amount of scattering towards wo on this microfacet. */
- float3 phase;
+ if (order > 0) {
+ /* Evaluate amount of scattering towards wo on this microfacet. */
+ float3 phase;
#ifdef MF_MULTI_GLASS
- if(outside)
- phase = mf_eval_phase_glass(wr, lambda_r, wo, wo_outside, alpha, eta);
- else
- phase = mf_eval_phase_glass(wr, lambda_r, -wo, !wo_outside, alpha, 1.0f/eta);
-#else /* MF_MULTI_GLOSSY */
- phase = mf_eval_phase_glossy(wr, lambda_r, wo, alpha) * throughput;
+ if (outside)
+ phase = mf_eval_phase_glass(wr, lambda_r, wo, wo_outside, alpha, eta);
+ else
+ phase = mf_eval_phase_glass(wr, lambda_r, -wo, !wo_outside, alpha, 1.0f / eta);
+#else /* MF_MULTI_GLOSSY */
+ phase = mf_eval_phase_glossy(wr, lambda_r, wo, alpha) * throughput;
#endif
- eval += throughput * phase * mf_G1(wo_outside? wo: -wo, mf_C1((outside == wo_outside)? hr: -hr), shadowing_lambda);
- }
- if(order+1 < 10) {
- /* Bounce from the microfacet. */
+ eval += throughput * phase *
+ mf_G1(wo_outside ? wo : -wo,
+ mf_C1((outside == wo_outside) ? hr : -hr),
+ shadowing_lambda);
+ }
+ if (order + 1 < 10) {
+ /* Bounce from the microfacet. */
#ifdef MF_MULTI_GLASS
- bool next_outside;
- float3 wi_prev = -wr;
- float phase_rand = lcg_step_float_addrspace(lcg_state);
- wr = mf_sample_phase_glass(-wr, outside? eta: 1.0f/eta, wm, phase_rand, &next_outside);
- if(!next_outside) {
- outside = !outside;
- wr = -wr;
- hr = -hr;
- }
+ bool next_outside;
+ float3 wi_prev = -wr;
+ float phase_rand = lcg_step_float_addrspace(lcg_state);
+ wr = mf_sample_phase_glass(-wr, outside ? eta : 1.0f / eta, wm, phase_rand, &next_outside);
+ if (!next_outside) {
+ outside = !outside;
+ wr = -wr;
+ hr = -hr;
+ }
- if(use_fresnel && !next_outside) {
- throughput *= color;
- }
- else if(use_fresnel && order > 0) {
- throughput *= interpolate_fresnel_color(wi_prev, wm, eta, F0, cspec0);
- }
-#else /* MF_MULTI_GLOSSY */
- if(use_fresnel && order > 0) {
- throughput *= interpolate_fresnel_color(-wr, wm, eta, F0, cspec0);
- }
- wr = mf_sample_phase_glossy(-wr, &throughput, wm);
+ if (use_fresnel && !next_outside) {
+ throughput *= color;
+ }
+ else if (use_fresnel && order > 0) {
+ throughput *= interpolate_fresnel_color(wi_prev, wm, eta, F0, cspec0);
+ }
+#else /* MF_MULTI_GLOSSY */
+ if (use_fresnel && order > 0) {
+ throughput *= interpolate_fresnel_color(-wr, wm, eta, F0, cspec0);
+ }
+ wr = mf_sample_phase_glossy(-wr, &throughput, wm);
#endif
- lambda_r = mf_lambda(wr, alpha);
+ lambda_r = mf_lambda(wr, alpha);
- if(!use_fresnel)
- throughput *= color;
+ if (!use_fresnel)
+ throughput *= color;
- C1_r = mf_C1(hr);
- G1_r = mf_G1(wr, C1_r, lambda_r);
- }
- }
+ C1_r = mf_C1(hr);
+ G1_r = mf_G1(wr, C1_r, lambda_r);
+ }
+ }
- if(swapped)
- eval *= fabsf(wi.z / wo.z);
- return eval;
+ if (swapped)
+ eval *= fabsf(wi.z / wo.z);
+ return eval;
}
/* Perform a random walk on the microsurface starting from wi, returning the direction in which the walk
* escaped the surface in wo. The function returns the throughput between wi and wo.
* Without reflection losses due to coloring or fresnel absorption in conductors, the sampling is optimal.
*/
-ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_sample)(
- float3 wi,
- float3 *wo,
- const float3 color,
- const float alpha_x,
- const float alpha_y,
- ccl_addr_space uint *lcg_state,
- const float eta,
- bool use_fresnel,
- const float3 cspec0)
+ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_sample)(float3 wi,
+ float3 *wo,
+ const float3 color,
+ const float alpha_x,
+ const float alpha_y,
+ ccl_addr_space uint *lcg_state,
+ const float eta,
+ bool use_fresnel,
+ const float3 cspec0)
{
- const float2 alpha = make_float2(alpha_x, alpha_y);
+ const float2 alpha = make_float2(alpha_x, alpha_y);
- float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
- float3 wr = -wi;
- float lambda_r = mf_lambda(wr, alpha);
- float hr = 1.0f;
- float C1_r = 1.0f;
- float G1_r = 0.0f;
- bool outside = true;
+ float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
+ float3 wr = -wi;
+ float lambda_r = mf_lambda(wr, alpha);
+ float hr = 1.0f;
+ float C1_r = 1.0f;
+ float G1_r = 0.0f;
+ bool outside = true;
- float F0 = fresnel_dielectric_cos(1.0f, eta);
- if(use_fresnel) {
- throughput = interpolate_fresnel_color(wi, normalize(wi + wr), eta, F0, cspec0);
- }
+ float F0 = fresnel_dielectric_cos(1.0f, eta);
+ if (use_fresnel) {
+ throughput = interpolate_fresnel_color(wi, normalize(wi + wr), eta, F0, cspec0);
+ }
- int order;
- for(order = 0; order < 10; order++) {
- /* Sample microfacet height. */
- float height_rand = lcg_step_float_addrspace(lcg_state);
- if(!mf_sample_height(wr, &hr, &C1_r, &G1_r, &lambda_r, height_rand)) {
- /* The random walk has left the surface. */
- *wo = outside? wr: -wr;
- return throughput;
- }
- /* Sample microfacet normal. */
- float vndf_rand_y = lcg_step_float_addrspace(lcg_state);
- float vndf_rand_x = lcg_step_float_addrspace(lcg_state);
- float3 wm = mf_sample_vndf(-wr, alpha, vndf_rand_x, vndf_rand_y);
+ int order;
+ for (order = 0; order < 10; order++) {
+ /* Sample microfacet height. */
+ float height_rand = lcg_step_float_addrspace(lcg_state);
+ if (!mf_sample_height(wr, &hr, &C1_r, &G1_r, &lambda_r, height_rand)) {
+ /* The random walk has left the surface. */
+ *wo = outside ? wr : -wr;
+ return throughput;
+ }
+ /* Sample microfacet normal. */
+ float vndf_rand_y = lcg_step_float_addrspace(lcg_state);
+ float vndf_rand_x = lcg_step_float_addrspace(lcg_state);
+ float3 wm = mf_sample_vndf(-wr, alpha, vndf_rand_x, vndf_rand_y);
- /* First-bounce color is already accounted for in mix weight. */
- if(!use_fresnel && order > 0)
- throughput *= color;
+ /* First-bounce color is already accounted for in mix weight. */
+ if (!use_fresnel && order > 0)
+ throughput *= color;
- /* Bounce from the microfacet. */
+ /* Bounce from the microfacet. */
#ifdef MF_MULTI_GLASS
- bool next_outside;
- float3 wi_prev = -wr;
- float phase_rand = lcg_step_float_addrspace(lcg_state);
- wr = mf_sample_phase_glass(-wr, outside? eta: 1.0f/eta, wm, phase_rand, &next_outside);
- if(!next_outside) {
- hr = -hr;
- wr = -wr;
- outside = !outside;
- }
+ bool next_outside;
+ float3 wi_prev = -wr;
+ float phase_rand = lcg_step_float_addrspace(lcg_state);
+ wr = mf_sample_phase_glass(-wr, outside ? eta : 1.0f / eta, wm, phase_rand, &next_outside);
+ if (!next_outside) {
+ hr = -hr;
+ wr = -wr;
+ outside = !outside;
+ }
- if(use_fresnel) {
- if(!next_outside) {
- throughput *= color;
- }
- else {
- float3 t_color = interpolate_fresnel_color(wi_prev, wm, eta, F0, cspec0);
+ if (use_fresnel) {
+ if (!next_outside) {
+ throughput *= color;
+ }
+ else {
+ float3 t_color = interpolate_fresnel_color(wi_prev, wm, eta, F0, cspec0);
- if(order == 0)
- throughput = t_color;
- else
- throughput *= t_color;
- }
- }
-#else /* MF_MULTI_GLOSSY */
- if(use_fresnel) {
- float3 t_color = interpolate_fresnel_color(-wr, wm, eta, F0, cspec0);
+ if (order == 0)
+ throughput = t_color;
+ else
+ throughput *= t_color;
+ }
+ }
+#else /* MF_MULTI_GLOSSY */
+ if (use_fresnel) {
+ float3 t_color = interpolate_fresnel_color(-wr, wm, eta, F0, cspec0);
- if(order == 0)
- throughput = t_color;
- else
- throughput *= t_color;
- }
- wr = mf_sample_phase_glossy(-wr, &throughput, wm);
+ if (order == 0)
+ throughput = t_color;
+ else
+ throughput *= t_color;
+ }
+ wr = mf_sample_phase_glossy(-wr, &throughput, wm);
#endif
- /* Update random walk parameters. */
- lambda_r = mf_lambda(wr, alpha);
- G1_r = mf_G1(wr, C1_r, lambda_r);
- }
- *wo = make_float3(0.0f, 0.0f, 1.0f);
- return make_float3(0.0f, 0.0f, 0.0f);
+ /* Update random walk parameters. */
+ lambda_r = mf_lambda(wr, alpha);
+ G1_r = mf_G1(wr, C1_r, lambda_r);
+ }
+ *wo = make_float3(0.0f, 0.0f, 1.0f);
+ return make_float3(0.0f, 0.0f, 0.0f);
}
#undef MF_MULTI_GLASS
diff --git a/intern/cycles/kernel/closure/bsdf_oren_nayar.h b/intern/cycles/kernel/closure/bsdf_oren_nayar.h
index 3446d1609d9..104ed5b2818 100644
--- a/intern/cycles/kernel/closure/bsdf_oren_nayar.h
+++ b/intern/cycles/kernel/closure/bsdf_oren_nayar.h
@@ -20,92 +20,110 @@
CCL_NAMESPACE_BEGIN
typedef ccl_addr_space struct OrenNayarBsdf {
- SHADER_CLOSURE_BASE;
+ SHADER_CLOSURE_BASE;
- float roughness;
- float a;
- float b;
+ float roughness;
+ float a;
+ float b;
} OrenNayarBsdf;
-ccl_device float3 bsdf_oren_nayar_get_intensity(const ShaderClosure *sc, float3 n, float3 v, float3 l)
+ccl_device float3 bsdf_oren_nayar_get_intensity(const ShaderClosure *sc,
+ float3 n,
+ float3 v,
+ float3 l)
{
- const OrenNayarBsdf *bsdf = (const OrenNayarBsdf*)sc;
- float nl = max(dot(n, l), 0.0f);
- float nv = max(dot(n, v), 0.0f);
- float t = dot(l, v) - nl * nv;
-
- if(t > 0.0f)
- t /= max(nl, nv) + FLT_MIN;
- float is = nl * (bsdf->a + bsdf->b * t);
- return make_float3(is, is, is);
+ const OrenNayarBsdf *bsdf = (const OrenNayarBsdf *)sc;
+ float nl = max(dot(n, l), 0.0f);
+ float nv = max(dot(n, v), 0.0f);
+ float t = dot(l, v) - nl * nv;
+
+ if (t > 0.0f)
+ t /= max(nl, nv) + FLT_MIN;
+ float is = nl * (bsdf->a + bsdf->b * t);
+ return make_float3(is, is, is);
}
ccl_device int bsdf_oren_nayar_setup(OrenNayarBsdf *bsdf)
{
- float sigma = bsdf->roughness;
+ float sigma = bsdf->roughness;
- bsdf->type = CLOSURE_BSDF_OREN_NAYAR_ID;
+ bsdf->type = CLOSURE_BSDF_OREN_NAYAR_ID;
- sigma = saturate(sigma);
+ sigma = saturate(sigma);
- float div = 1.0f / (M_PI_F + ((3.0f * M_PI_F - 4.0f) / 6.0f) * sigma);
+ float div = 1.0f / (M_PI_F + ((3.0f * M_PI_F - 4.0f) / 6.0f) * sigma);
- bsdf->a = 1.0f * div;
- bsdf->b = sigma * div;
+ bsdf->a = 1.0f * div;
+ bsdf->b = sigma * div;
- return SD_BSDF|SD_BSDF_HAS_EVAL;
+ return SD_BSDF | SD_BSDF_HAS_EVAL;
}
ccl_device bool bsdf_oren_nayar_merge(const ShaderClosure *a, const ShaderClosure *b)
{
- const OrenNayarBsdf *bsdf_a = (const OrenNayarBsdf*)a;
- const OrenNayarBsdf *bsdf_b = (const OrenNayarBsdf*)b;
+ const OrenNayarBsdf *bsdf_a = (const OrenNayarBsdf *)a;
+ const OrenNayarBsdf *bsdf_b = (const OrenNayarBsdf *)b;
- return (isequal_float3(bsdf_a->N, bsdf_b->N)) &&
- (bsdf_a->roughness == bsdf_b->roughness);
+ return (isequal_float3(bsdf_a->N, bsdf_b->N)) && (bsdf_a->roughness == bsdf_b->roughness);
}
-ccl_device float3 bsdf_oren_nayar_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_oren_nayar_eval_reflect(const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ float *pdf)
{
- const OrenNayarBsdf *bsdf = (const OrenNayarBsdf*)sc;
- if(dot(bsdf->N, omega_in) > 0.0f) {
- *pdf = 0.5f * M_1_PI_F;
- return bsdf_oren_nayar_get_intensity(sc, bsdf->N, I, omega_in);
- }
- else {
- *pdf = 0.0f;
- return make_float3(0.0f, 0.0f, 0.0f);
- }
+ const OrenNayarBsdf *bsdf = (const OrenNayarBsdf *)sc;
+ if (dot(bsdf->N, omega_in) > 0.0f) {
+ *pdf = 0.5f * M_1_PI_F;
+ return bsdf_oren_nayar_get_intensity(sc, bsdf->N, I, omega_in);
+ }
+ else {
+ *pdf = 0.0f;
+ return make_float3(0.0f, 0.0f, 0.0f);
+ }
}
-ccl_device float3 bsdf_oren_nayar_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_oren_nayar_eval_transmit(const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ float *pdf)
{
- return make_float3(0.0f, 0.0f, 0.0f);
+ return make_float3(0.0f, 0.0f, 0.0f);
}
-ccl_device int bsdf_oren_nayar_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
+ccl_device int bsdf_oren_nayar_sample(const ShaderClosure *sc,
+ float3 Ng,
+ float3 I,
+ float3 dIdx,
+ float3 dIdy,
+ float randu,
+ float randv,
+ float3 *eval,
+ float3 *omega_in,
+ float3 *domega_in_dx,
+ float3 *domega_in_dy,
+ float *pdf)
{
- const OrenNayarBsdf *bsdf = (const OrenNayarBsdf*)sc;
- sample_uniform_hemisphere(bsdf->N, randu, randv, omega_in, pdf);
+ const OrenNayarBsdf *bsdf = (const OrenNayarBsdf *)sc;
+ sample_uniform_hemisphere(bsdf->N, randu, randv, omega_in, pdf);
- if(dot(Ng, *omega_in) > 0.0f) {
- *eval = bsdf_oren_nayar_get_intensity(sc, bsdf->N, I, *omega_in);
+ if (dot(Ng, *omega_in) > 0.0f) {
+ *eval = bsdf_oren_nayar_get_intensity(sc, bsdf->N, I, *omega_in);
#ifdef __RAY_DIFFERENTIALS__
- // TODO: find a better approximation for the bounce
- *domega_in_dx = (2.0f * dot(bsdf->N, dIdx)) * bsdf->N - dIdx;
- *domega_in_dy = (2.0f * dot(bsdf->N, dIdy)) * bsdf->N - dIdy;
+ // TODO: find a better approximation for the bounce
+ *domega_in_dx = (2.0f * dot(bsdf->N, dIdx)) * bsdf->N - dIdx;
+ *domega_in_dy = (2.0f * dot(bsdf->N, dIdy)) * bsdf->N - dIdy;
#endif
- }
- else {
- *pdf = 0.0f;
- *eval = make_float3(0.0f, 0.0f, 0.0f);
- }
+ }
+ else {
+ *pdf = 0.0f;
+ *eval = make_float3(0.0f, 0.0f, 0.0f);
+ }
- return LABEL_REFLECT|LABEL_DIFFUSE;
+ return LABEL_REFLECT | LABEL_DIFFUSE;
}
-
CCL_NAMESPACE_END
-#endif /* __BSDF_OREN_NAYAR_H__ */
+#endif /* __BSDF_OREN_NAYAR_H__ */
diff --git a/intern/cycles/kernel/closure/bsdf_phong_ramp.h b/intern/cycles/kernel/closure/bsdf_phong_ramp.h
index 83da05ac435..b6fd0e68681 100644
--- a/intern/cycles/kernel/closure/bsdf_phong_ramp.h
+++ b/intern/cycles/kernel/closure/bsdf_phong_ramp.h
@@ -38,105 +38,118 @@ CCL_NAMESPACE_BEGIN
#ifdef __OSL__
typedef ccl_addr_space struct PhongRampBsdf {
- SHADER_CLOSURE_BASE;
+ SHADER_CLOSURE_BASE;
- float exponent;
- float3 *colors;
+ float exponent;
+ float3 *colors;
} PhongRampBsdf;
ccl_device float3 bsdf_phong_ramp_get_color(const float3 colors[8], float pos)
{
- int MAXCOLORS = 8;
-
- float npos = pos * (float)(MAXCOLORS - 1);
- int ipos = float_to_int(npos);
- if(ipos < 0)
- return colors[0];
- if(ipos >= (MAXCOLORS - 1))
- return colors[MAXCOLORS - 1];
- float offset = npos - (float)ipos;
- return colors[ipos] * (1.0f - offset) + colors[ipos+1] * offset;
+ int MAXCOLORS = 8;
+
+ float npos = pos * (float)(MAXCOLORS - 1);
+ int ipos = float_to_int(npos);
+ if (ipos < 0)
+ return colors[0];
+ if (ipos >= (MAXCOLORS - 1))
+ return colors[MAXCOLORS - 1];
+ float offset = npos - (float)ipos;
+ return colors[ipos] * (1.0f - offset) + colors[ipos + 1] * offset;
}
ccl_device int bsdf_phong_ramp_setup(PhongRampBsdf *bsdf)
{
- bsdf->type = CLOSURE_BSDF_PHONG_RAMP_ID;
- bsdf->exponent = max(bsdf->exponent, 0.0f);
- return SD_BSDF|SD_BSDF_HAS_EVAL;
+ bsdf->type = CLOSURE_BSDF_PHONG_RAMP_ID;
+ bsdf->exponent = max(bsdf->exponent, 0.0f);
+ return SD_BSDF | SD_BSDF_HAS_EVAL;
}
-ccl_device float3 bsdf_phong_ramp_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_phong_ramp_eval_reflect(const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ float *pdf)
{
- const PhongRampBsdf *bsdf = (const PhongRampBsdf*)sc;
- float m_exponent = bsdf->exponent;
- float cosNI = dot(bsdf->N, omega_in);
- float cosNO = dot(bsdf->N, I);
-
- if(cosNI > 0 && cosNO > 0) {
- // reflect the view vector
- float3 R = (2 * cosNO) * bsdf->N - I;
- float cosRI = dot(R, omega_in);
- if(cosRI > 0) {
- float cosp = powf(cosRI, m_exponent);
- float common = 0.5f * M_1_PI_F * cosp;
- float out = cosNI * (m_exponent + 2) * common;
- *pdf = (m_exponent + 1) * common;
- return bsdf_phong_ramp_get_color(bsdf->colors, cosp) * out;
- }
- }
-
- return make_float3(0.0f, 0.0f, 0.0f);
+ const PhongRampBsdf *bsdf = (const PhongRampBsdf *)sc;
+ float m_exponent = bsdf->exponent;
+ float cosNI = dot(bsdf->N, omega_in);
+ float cosNO = dot(bsdf->N, I);
+
+ if (cosNI > 0 && cosNO > 0) {
+ // reflect the view vector
+ float3 R = (2 * cosNO) * bsdf->N - I;
+ float cosRI = dot(R, omega_in);
+ if (cosRI > 0) {
+ float cosp = powf(cosRI, m_exponent);
+ float common = 0.5f * M_1_PI_F * cosp;
+ float out = cosNI * (m_exponent + 2) * common;
+ *pdf = (m_exponent + 1) * common;
+ return bsdf_phong_ramp_get_color(bsdf->colors, cosp) * out;
+ }
+ }
+
+ return make_float3(0.0f, 0.0f, 0.0f);
}
-ccl_device float3 bsdf_phong_ramp_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_phong_ramp_eval_transmit(const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ float *pdf)
{
- return make_float3(0.0f, 0.0f, 0.0f);
+ return make_float3(0.0f, 0.0f, 0.0f);
}
-ccl_device int bsdf_phong_ramp_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
+ccl_device int bsdf_phong_ramp_sample(const ShaderClosure *sc,
+ float3 Ng,
+ float3 I,
+ float3 dIdx,
+ float3 dIdy,
+ float randu,
+ float randv,
+ float3 *eval,
+ float3 *omega_in,
+ float3 *domega_in_dx,
+ float3 *domega_in_dy,
+ float *pdf)
{
- const PhongRampBsdf *bsdf = (const PhongRampBsdf*)sc;
- float cosNO = dot(bsdf->N, I);
- float m_exponent = bsdf->exponent;
-
- if(cosNO > 0) {
- // reflect the view vector
- float3 R = (2 * cosNO) * bsdf->N - I;
-
-#ifdef __RAY_DIFFERENTIALS__
- *domega_in_dx = (2 * dot(bsdf->N, dIdx)) * bsdf->N - dIdx;
- *domega_in_dy = (2 * dot(bsdf->N, dIdy)) * bsdf->N - dIdy;
-#endif
-
- float3 T, B;
- make_orthonormals (R, &T, &B);
- float phi = M_2PI_F * randu;
- float cosTheta = powf(randv, 1 / (m_exponent + 1));
- float sinTheta2 = 1 - cosTheta * cosTheta;
- float sinTheta = sinTheta2 > 0 ? sqrtf(sinTheta2) : 0;
- *omega_in = (cosf(phi) * sinTheta) * T +
- (sinf(phi) * sinTheta) * B +
- ( cosTheta) * R;
- if(dot(Ng, *omega_in) > 0.0f)
- {
- // common terms for pdf and eval
- float cosNI = dot(bsdf->N, *omega_in);
- // make sure the direction we chose is still in the right hemisphere
- if(cosNI > 0)
- {
- float cosp = powf(cosTheta, m_exponent);
- float common = 0.5f * M_1_PI_F * cosp;
- *pdf = (m_exponent + 1) * common;
- float out = cosNI * (m_exponent + 2) * common;
- *eval = bsdf_phong_ramp_get_color(bsdf->colors, cosp) * out;
- }
- }
- }
- return LABEL_REFLECT|LABEL_GLOSSY;
+ const PhongRampBsdf *bsdf = (const PhongRampBsdf *)sc;
+ float cosNO = dot(bsdf->N, I);
+ float m_exponent = bsdf->exponent;
+
+ if (cosNO > 0) {
+ // reflect the view vector
+ float3 R = (2 * cosNO) * bsdf->N - I;
+
+# ifdef __RAY_DIFFERENTIALS__
+ *domega_in_dx = (2 * dot(bsdf->N, dIdx)) * bsdf->N - dIdx;
+ *domega_in_dy = (2 * dot(bsdf->N, dIdy)) * bsdf->N - dIdy;
+# endif
+
+ float3 T, B;
+ make_orthonormals(R, &T, &B);
+ float phi = M_2PI_F * randu;
+ float cosTheta = powf(randv, 1 / (m_exponent + 1));
+ float sinTheta2 = 1 - cosTheta * cosTheta;
+ float sinTheta = sinTheta2 > 0 ? sqrtf(sinTheta2) : 0;
+ *omega_in = (cosf(phi) * sinTheta) * T + (sinf(phi) * sinTheta) * B + (cosTheta)*R;
+ if (dot(Ng, *omega_in) > 0.0f) {
+ // common terms for pdf and eval
+ float cosNI = dot(bsdf->N, *omega_in);
+ // make sure the direction we chose is still in the right hemisphere
+ if (cosNI > 0) {
+ float cosp = powf(cosTheta, m_exponent);
+ float common = 0.5f * M_1_PI_F * cosp;
+ *pdf = (m_exponent + 1) * common;
+ float out = cosNI * (m_exponent + 2) * common;
+ *eval = bsdf_phong_ramp_get_color(bsdf->colors, cosp) * out;
+ }
+ }
+ }
+ return LABEL_REFLECT | LABEL_GLOSSY;
}
-#endif /* __OSL__ */
+#endif /* __OSL__ */
CCL_NAMESPACE_END
-#endif /* __BSDF_PHONG_RAMP_H__ */
+#endif /* __BSDF_PHONG_RAMP_H__ */
diff --git a/intern/cycles/kernel/closure/bsdf_principled_diffuse.h b/intern/cycles/kernel/closure/bsdf_principled_diffuse.h
index 2f65fd54be2..d7795974ef5 100644
--- a/intern/cycles/kernel/closure/bsdf_principled_diffuse.h
+++ b/intern/cycles/kernel/closure/bsdf_principled_diffuse.h
@@ -25,101 +25,113 @@
CCL_NAMESPACE_BEGIN
typedef ccl_addr_space struct PrincipledDiffuseBsdf {
- SHADER_CLOSURE_BASE;
+ SHADER_CLOSURE_BASE;
- float roughness;
+ float roughness;
} PrincipledDiffuseBsdf;
-ccl_device float3 calculate_principled_diffuse_brdf(const PrincipledDiffuseBsdf *bsdf,
- float3 N, float3 V, float3 L, float3 H, float *pdf)
+ccl_device float3 calculate_principled_diffuse_brdf(
+ const PrincipledDiffuseBsdf *bsdf, float3 N, float3 V, float3 L, float3 H, float *pdf)
{
- float NdotL = max(dot(N, L), 0.0f);
- float NdotV = max(dot(N, V), 0.0f);
+ float NdotL = max(dot(N, L), 0.0f);
+ float NdotV = max(dot(N, V), 0.0f);
- if(NdotL < 0 || NdotV < 0) {
- *pdf = 0.0f;
- return make_float3(0.0f, 0.0f, 0.0f);
- }
+ if (NdotL < 0 || NdotV < 0) {
+ *pdf = 0.0f;
+ return make_float3(0.0f, 0.0f, 0.0f);
+ }
- float LdotH = dot(L, H);
+ float LdotH = dot(L, H);
- float FL = schlick_fresnel(NdotL), FV = schlick_fresnel(NdotV);
- const float Fd90 = 0.5f + 2.0f * LdotH*LdotH * bsdf->roughness;
- float Fd = (1.0f * (1.0f - FL) + Fd90 * FL) * (1.0f * (1.0f - FV) + Fd90 * FV);
+ float FL = schlick_fresnel(NdotL), FV = schlick_fresnel(NdotV);
+ const float Fd90 = 0.5f + 2.0f * LdotH * LdotH * bsdf->roughness;
+ float Fd = (1.0f * (1.0f - FL) + Fd90 * FL) * (1.0f * (1.0f - FV) + Fd90 * FV);
- float value = M_1_PI_F * NdotL * Fd;
+ float value = M_1_PI_F * NdotL * Fd;
- return make_float3(value, value, value);
+ return make_float3(value, value, value);
}
ccl_device int bsdf_principled_diffuse_setup(PrincipledDiffuseBsdf *bsdf)
{
- bsdf->type = CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID;
- return SD_BSDF|SD_BSDF_HAS_EVAL;
+ bsdf->type = CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID;
+ return SD_BSDF | SD_BSDF_HAS_EVAL;
}
ccl_device bool bsdf_principled_diffuse_merge(const ShaderClosure *a, const ShaderClosure *b)
{
- const PrincipledDiffuseBsdf *bsdf_a = (const PrincipledDiffuseBsdf*)a;
- const PrincipledDiffuseBsdf *bsdf_b = (const PrincipledDiffuseBsdf*)b;
+ const PrincipledDiffuseBsdf *bsdf_a = (const PrincipledDiffuseBsdf *)a;
+ const PrincipledDiffuseBsdf *bsdf_b = (const PrincipledDiffuseBsdf *)b;
- return (isequal_float3(bsdf_a->N, bsdf_b->N) && bsdf_a->roughness == bsdf_b->roughness);
+ return (isequal_float3(bsdf_a->N, bsdf_b->N) && bsdf_a->roughness == bsdf_b->roughness);
}
-ccl_device float3 bsdf_principled_diffuse_eval_reflect(const ShaderClosure *sc, const float3 I,
- const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_principled_diffuse_eval_reflect(const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ float *pdf)
{
- const PrincipledDiffuseBsdf *bsdf = (const PrincipledDiffuseBsdf *)sc;
-
- float3 N = bsdf->N;
- float3 V = I; // outgoing
- float3 L = omega_in; // incoming
- float3 H = normalize(L + V);
-
- if(dot(N, omega_in) > 0.0f) {
- *pdf = fmaxf(dot(N, omega_in), 0.0f) * M_1_PI_F;
- return calculate_principled_diffuse_brdf(bsdf, N, V, L, H, pdf);
- }
- else {
- *pdf = 0.0f;
- return make_float3(0.0f, 0.0f, 0.0f);
- }
+ const PrincipledDiffuseBsdf *bsdf = (const PrincipledDiffuseBsdf *)sc;
+
+ float3 N = bsdf->N;
+ float3 V = I; // outgoing
+ float3 L = omega_in; // incoming
+ float3 H = normalize(L + V);
+
+ if (dot(N, omega_in) > 0.0f) {
+ *pdf = fmaxf(dot(N, omega_in), 0.0f) * M_1_PI_F;
+ return calculate_principled_diffuse_brdf(bsdf, N, V, L, H, pdf);
+ }
+ else {
+ *pdf = 0.0f;
+ return make_float3(0.0f, 0.0f, 0.0f);
+ }
}
-ccl_device float3 bsdf_principled_diffuse_eval_transmit(const ShaderClosure *sc, const float3 I,
- const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_principled_diffuse_eval_transmit(const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ float *pdf)
{
- return make_float3(0.0f, 0.0f, 0.0f);
+ return make_float3(0.0f, 0.0f, 0.0f);
}
ccl_device int bsdf_principled_diffuse_sample(const ShaderClosure *sc,
- float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv,
- float3 *eval, float3 *omega_in, float3 *domega_in_dx,
- float3 *domega_in_dy, float *pdf)
+ float3 Ng,
+ float3 I,
+ float3 dIdx,
+ float3 dIdy,
+ float randu,
+ float randv,
+ float3 *eval,
+ float3 *omega_in,
+ float3 *domega_in_dx,
+ float3 *domega_in_dy,
+ float *pdf)
{
- const PrincipledDiffuseBsdf *bsdf = (const PrincipledDiffuseBsdf *)sc;
+ const PrincipledDiffuseBsdf *bsdf = (const PrincipledDiffuseBsdf *)sc;
- float3 N = bsdf->N;
+ float3 N = bsdf->N;
- sample_cos_hemisphere(N, randu, randv, omega_in, pdf);
+ sample_cos_hemisphere(N, randu, randv, omega_in, pdf);
- if(dot(Ng, *omega_in) > 0) {
- float3 H = normalize(I + *omega_in);
+ if (dot(Ng, *omega_in) > 0) {
+ float3 H = normalize(I + *omega_in);
- *eval = calculate_principled_diffuse_brdf(bsdf, N, I, *omega_in, H, pdf);
+ *eval = calculate_principled_diffuse_brdf(bsdf, N, I, *omega_in, H, pdf);
#ifdef __RAY_DIFFERENTIALS__
- // TODO: find a better approximation for the diffuse bounce
- *domega_in_dx = -((2 * dot(N, dIdx)) * N - dIdx);
- *domega_in_dy = -((2 * dot(N, dIdy)) * N - dIdy);
+ // TODO: find a better approximation for the diffuse bounce
+ *domega_in_dx = -((2 * dot(N, dIdx)) * N - dIdx);
+ *domega_in_dy = -((2 * dot(N, dIdy)) * N - dIdy);
#endif
- }
- else {
- *pdf = 0.0f;
- }
- return LABEL_REFLECT|LABEL_DIFFUSE;
+ }
+ else {
+ *pdf = 0.0f;
+ }
+ return LABEL_REFLECT | LABEL_DIFFUSE;
}
CCL_NAMESPACE_END
-#endif /* __BSDF_PRINCIPLED_DIFFUSE_H__ */
+#endif /* __BSDF_PRINCIPLED_DIFFUSE_H__ */
diff --git a/intern/cycles/kernel/closure/bsdf_principled_sheen.h b/intern/cycles/kernel/closure/bsdf_principled_sheen.h
index ccdcb1babd2..bc522095b3b 100644
--- a/intern/cycles/kernel/closure/bsdf_principled_sheen.h
+++ b/intern/cycles/kernel/closure/bsdf_principled_sheen.h
@@ -25,87 +25,99 @@
CCL_NAMESPACE_BEGIN
typedef ccl_addr_space struct PrincipledSheenBsdf {
- SHADER_CLOSURE_BASE;
+ SHADER_CLOSURE_BASE;
} PrincipledSheenBsdf;
-ccl_device float3 calculate_principled_sheen_brdf(const PrincipledSheenBsdf *bsdf,
- float3 N, float3 V, float3 L, float3 H, float *pdf)
+ccl_device float3 calculate_principled_sheen_brdf(
+ const PrincipledSheenBsdf *bsdf, float3 N, float3 V, float3 L, float3 H, float *pdf)
{
- float NdotL = dot(N, L);
- float NdotV = dot(N, V);
+ float NdotL = dot(N, L);
+ float NdotV = dot(N, V);
- if(NdotL < 0 || NdotV < 0) {
- *pdf = 0.0f;
- return make_float3(0.0f, 0.0f, 0.0f);
- }
+ if (NdotL < 0 || NdotV < 0) {
+ *pdf = 0.0f;
+ return make_float3(0.0f, 0.0f, 0.0f);
+ }
- float LdotH = dot(L, H);
+ float LdotH = dot(L, H);
- float value = schlick_fresnel(LdotH) * NdotL;
+ float value = schlick_fresnel(LdotH) * NdotL;
- return make_float3(value, value, value);
+ return make_float3(value, value, value);
}
ccl_device int bsdf_principled_sheen_setup(PrincipledSheenBsdf *bsdf)
{
- bsdf->type = CLOSURE_BSDF_PRINCIPLED_SHEEN_ID;
- return SD_BSDF|SD_BSDF_HAS_EVAL;
+ bsdf->type = CLOSURE_BSDF_PRINCIPLED_SHEEN_ID;
+ return SD_BSDF | SD_BSDF_HAS_EVAL;
}
-ccl_device float3 bsdf_principled_sheen_eval_reflect(const ShaderClosure *sc, const float3 I,
- const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_principled_sheen_eval_reflect(const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ float *pdf)
{
- const PrincipledSheenBsdf *bsdf = (const PrincipledSheenBsdf *)sc;
-
- float3 N = bsdf->N;
- float3 V = I; // outgoing
- float3 L = omega_in; // incoming
- float3 H = normalize(L + V);
-
- if(dot(N, omega_in) > 0.0f) {
- *pdf = fmaxf(dot(N, omega_in), 0.0f) * M_1_PI_F;
- return calculate_principled_sheen_brdf(bsdf, N, V, L, H, pdf);
- }
- else {
- *pdf = 0.0f;
- return make_float3(0.0f, 0.0f, 0.0f);
- }
+ const PrincipledSheenBsdf *bsdf = (const PrincipledSheenBsdf *)sc;
+
+ float3 N = bsdf->N;
+ float3 V = I; // outgoing
+ float3 L = omega_in; // incoming
+ float3 H = normalize(L + V);
+
+ if (dot(N, omega_in) > 0.0f) {
+ *pdf = fmaxf(dot(N, omega_in), 0.0f) * M_1_PI_F;
+ return calculate_principled_sheen_brdf(bsdf, N, V, L, H, pdf);
+ }
+ else {
+ *pdf = 0.0f;
+ return make_float3(0.0f, 0.0f, 0.0f);
+ }
}
-ccl_device float3 bsdf_principled_sheen_eval_transmit(const ShaderClosure *sc, const float3 I,
- const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_principled_sheen_eval_transmit(const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ float *pdf)
{
- return make_float3(0.0f, 0.0f, 0.0f);
+ return make_float3(0.0f, 0.0f, 0.0f);
}
ccl_device int bsdf_principled_sheen_sample(const ShaderClosure *sc,
- float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv,
- float3 *eval, float3 *omega_in, float3 *domega_in_dx,
- float3 *domega_in_dy, float *pdf)
+ float3 Ng,
+ float3 I,
+ float3 dIdx,
+ float3 dIdy,
+ float randu,
+ float randv,
+ float3 *eval,
+ float3 *omega_in,
+ float3 *domega_in_dx,
+ float3 *domega_in_dy,
+ float *pdf)
{
- const PrincipledSheenBsdf *bsdf = (const PrincipledSheenBsdf *)sc;
+ const PrincipledSheenBsdf *bsdf = (const PrincipledSheenBsdf *)sc;
- float3 N = bsdf->N;
+ float3 N = bsdf->N;
- sample_cos_hemisphere(N, randu, randv, omega_in, pdf);
+ sample_cos_hemisphere(N, randu, randv, omega_in, pdf);
- if(dot(Ng, *omega_in) > 0) {
- float3 H = normalize(I + *omega_in);
+ if (dot(Ng, *omega_in) > 0) {
+ float3 H = normalize(I + *omega_in);
- *eval = calculate_principled_sheen_brdf(bsdf, N, I, *omega_in, H, pdf);
+ *eval = calculate_principled_sheen_brdf(bsdf, N, I, *omega_in, H, pdf);
#ifdef __RAY_DIFFERENTIALS__
- // TODO: find a better approximation for the diffuse bounce
- *domega_in_dx = -((2 * dot(N, dIdx)) * N - dIdx);
- *domega_in_dy = -((2 * dot(N, dIdy)) * N - dIdy);
+ // TODO: find a better approximation for the diffuse bounce
+ *domega_in_dx = -((2 * dot(N, dIdx)) * N - dIdx);
+ *domega_in_dy = -((2 * dot(N, dIdy)) * N - dIdy);
#endif
- }
- else {
- *pdf = 0.0f;
- }
- return LABEL_REFLECT|LABEL_DIFFUSE;
+ }
+ else {
+ *pdf = 0.0f;
+ }
+ return LABEL_REFLECT | LABEL_DIFFUSE;
}
CCL_NAMESPACE_END
-#endif /* __BSDF_PRINCIPLED_SHEEN_H__ */
+#endif /* __BSDF_PRINCIPLED_SHEEN_H__ */
diff --git a/intern/cycles/kernel/closure/bsdf_reflection.h b/intern/cycles/kernel/closure/bsdf_reflection.h
index 94f1c283af7..c24ba170915 100644
--- a/intern/cycles/kernel/closure/bsdf_reflection.h
+++ b/intern/cycles/kernel/closure/bsdf_reflection.h
@@ -39,42 +39,59 @@ CCL_NAMESPACE_BEGIN
ccl_device int bsdf_reflection_setup(MicrofacetBsdf *bsdf)
{
- bsdf->type = CLOSURE_BSDF_REFLECTION_ID;
- return SD_BSDF;
+ bsdf->type = CLOSURE_BSDF_REFLECTION_ID;
+ return SD_BSDF;
}
-ccl_device float3 bsdf_reflection_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_reflection_eval_reflect(const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ float *pdf)
{
- return make_float3(0.0f, 0.0f, 0.0f);
+ return make_float3(0.0f, 0.0f, 0.0f);
}
-ccl_device float3 bsdf_reflection_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_reflection_eval_transmit(const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ float *pdf)
{
- return make_float3(0.0f, 0.0f, 0.0f);
+ return make_float3(0.0f, 0.0f, 0.0f);
}
-ccl_device int bsdf_reflection_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
+ccl_device int bsdf_reflection_sample(const ShaderClosure *sc,
+ float3 Ng,
+ float3 I,
+ float3 dIdx,
+ float3 dIdy,
+ float randu,
+ float randv,
+ float3 *eval,
+ float3 *omega_in,
+ float3 *domega_in_dx,
+ float3 *domega_in_dy,
+ float *pdf)
{
- const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
- float3 N = bsdf->N;
+ const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+ float3 N = bsdf->N;
- // only one direction is possible
- float cosNO = dot(N, I);
- if(cosNO > 0) {
- *omega_in = (2 * cosNO) * N - I;
- if(dot(Ng, *omega_in) > 0) {
+ // only one direction is possible
+ float cosNO = dot(N, I);
+ if (cosNO > 0) {
+ *omega_in = (2 * cosNO) * N - I;
+ if (dot(Ng, *omega_in) > 0) {
#ifdef __RAY_DIFFERENTIALS__
- *domega_in_dx = 2 * dot(N, dIdx) * N - dIdx;
- *domega_in_dy = 2 * dot(N, dIdy) * N - dIdy;
+ *domega_in_dx = 2 * dot(N, dIdx) * N - dIdx;
+ *domega_in_dy = 2 * dot(N, dIdy) * N - dIdy;
#endif
- /* Some high number for MIS. */
- *pdf = 1e6f;
- *eval = make_float3(1e6f, 1e6f, 1e6f);
- }
- }
- return LABEL_REFLECT|LABEL_SINGULAR;
+ /* Some high number for MIS. */
+ *pdf = 1e6f;
+ *eval = make_float3(1e6f, 1e6f, 1e6f);
+ }
+ }
+ return LABEL_REFLECT | LABEL_SINGULAR;
}
CCL_NAMESPACE_END
-#endif /* __BSDF_REFLECTION_H__ */
+#endif /* __BSDF_REFLECTION_H__ */
diff --git a/intern/cycles/kernel/closure/bsdf_refraction.h b/intern/cycles/kernel/closure/bsdf_refraction.h
index abdd01c7a1d..d4fbe86dac0 100644
--- a/intern/cycles/kernel/closure/bsdf_refraction.h
+++ b/intern/cycles/kernel/closure/bsdf_refraction.h
@@ -39,51 +39,77 @@ CCL_NAMESPACE_BEGIN
ccl_device int bsdf_refraction_setup(MicrofacetBsdf *bsdf)
{
- bsdf->type = CLOSURE_BSDF_REFRACTION_ID;
- return SD_BSDF;
+ bsdf->type = CLOSURE_BSDF_REFRACTION_ID;
+ return SD_BSDF;
}
-ccl_device float3 bsdf_refraction_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_refraction_eval_reflect(const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ float *pdf)
{
- return make_float3(0.0f, 0.0f, 0.0f);
+ return make_float3(0.0f, 0.0f, 0.0f);
}
-ccl_device float3 bsdf_refraction_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_refraction_eval_transmit(const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ float *pdf)
{
- return make_float3(0.0f, 0.0f, 0.0f);
+ return make_float3(0.0f, 0.0f, 0.0f);
}
-ccl_device int bsdf_refraction_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
+ccl_device int bsdf_refraction_sample(const ShaderClosure *sc,
+ float3 Ng,
+ float3 I,
+ float3 dIdx,
+ float3 dIdy,
+ float randu,
+ float randv,
+ float3 *eval,
+ float3 *omega_in,
+ float3 *domega_in_dx,
+ float3 *domega_in_dy,
+ float *pdf)
{
- const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
- float m_eta = bsdf->ior;
- float3 N = bsdf->N;
+ const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+ float m_eta = bsdf->ior;
+ float3 N = bsdf->N;
- float3 R, T;
+ float3 R, T;
#ifdef __RAY_DIFFERENTIALS__
- float3 dRdx, dRdy, dTdx, dTdy;
+ float3 dRdx, dRdy, dTdx, dTdy;
#endif
- bool inside;
- float fresnel;
- fresnel = fresnel_dielectric(m_eta, N, I, &R, &T,
+ bool inside;
+ float fresnel;
+ fresnel = fresnel_dielectric(m_eta,
+ N,
+ I,
+ &R,
+ &T,
#ifdef __RAY_DIFFERENTIALS__
- dIdx, dIdy, &dRdx, &dRdy, &dTdx, &dTdy,
+ dIdx,
+ dIdy,
+ &dRdx,
+ &dRdy,
+ &dTdx,
+ &dTdy,
#endif
- &inside);
+ &inside);
- if(!inside && fresnel != 1.0f) {
- /* Some high number for MIS. */
- *pdf = 1e6f;
- *eval = make_float3(1e6f, 1e6f, 1e6f);
- *omega_in = T;
+ if (!inside && fresnel != 1.0f) {
+ /* Some high number for MIS. */
+ *pdf = 1e6f;
+ *eval = make_float3(1e6f, 1e6f, 1e6f);
+ *omega_in = T;
#ifdef __RAY_DIFFERENTIALS__
- *domega_in_dx = dTdx;
- *domega_in_dy = dTdy;
+ *domega_in_dx = dTdx;
+ *domega_in_dy = dTdy;
#endif
- }
- return LABEL_TRANSMIT|LABEL_SINGULAR;
+ }
+ return LABEL_TRANSMIT | LABEL_SINGULAR;
}
CCL_NAMESPACE_END
-#endif /* __BSDF_REFRACTION_H__ */
+#endif /* __BSDF_REFRACTION_H__ */
diff --git a/intern/cycles/kernel/closure/bsdf_toon.h b/intern/cycles/kernel/closure/bsdf_toon.h
index 097a56f22eb..f37fd228087 100644
--- a/intern/cycles/kernel/closure/bsdf_toon.h
+++ b/intern/cycles/kernel/closure/bsdf_toon.h
@@ -36,183 +36,215 @@
CCL_NAMESPACE_BEGIN
typedef ccl_addr_space struct ToonBsdf {
- SHADER_CLOSURE_BASE;
+ SHADER_CLOSURE_BASE;
- float size;
- float smooth;
+ float size;
+ float smooth;
} ToonBsdf;
/* DIFFUSE TOON */
ccl_device int bsdf_diffuse_toon_setup(ToonBsdf *bsdf)
{
- bsdf->type = CLOSURE_BSDF_DIFFUSE_TOON_ID;
- bsdf->size = saturate(bsdf->size);
- bsdf->smooth = saturate(bsdf->smooth);
+ bsdf->type = CLOSURE_BSDF_DIFFUSE_TOON_ID;
+ bsdf->size = saturate(bsdf->size);
+ bsdf->smooth = saturate(bsdf->smooth);
- return SD_BSDF|SD_BSDF_HAS_EVAL;
+ return SD_BSDF | SD_BSDF_HAS_EVAL;
}
ccl_device bool bsdf_toon_merge(const ShaderClosure *a, const ShaderClosure *b)
{
- const ToonBsdf *bsdf_a = (const ToonBsdf*)a;
- const ToonBsdf *bsdf_b = (const ToonBsdf*)b;
+ const ToonBsdf *bsdf_a = (const ToonBsdf *)a;
+ const ToonBsdf *bsdf_b = (const ToonBsdf *)b;
- return (isequal_float3(bsdf_a->N, bsdf_b->N)) &&
- (bsdf_a->size == bsdf_b->size) &&
- (bsdf_a->smooth == bsdf_b->smooth);
+ return (isequal_float3(bsdf_a->N, bsdf_b->N)) && (bsdf_a->size == bsdf_b->size) &&
+ (bsdf_a->smooth == bsdf_b->smooth);
}
ccl_device float3 bsdf_toon_get_intensity(float max_angle, float smooth, float angle)
{
- float is;
+ float is;
- if(angle < max_angle)
- is = 1.0f;
- else if(angle < (max_angle + smooth) && smooth != 0.0f)
- is = (1.0f - (angle - max_angle)/smooth);
- else
- is = 0.0f;
+ if (angle < max_angle)
+ is = 1.0f;
+ else if (angle < (max_angle + smooth) && smooth != 0.0f)
+ is = (1.0f - (angle - max_angle) / smooth);
+ else
+ is = 0.0f;
- return make_float3(is, is, is);
+ return make_float3(is, is, is);
}
ccl_device float bsdf_toon_get_sample_angle(float max_angle, float smooth)
{
- return fminf(max_angle + smooth, M_PI_2_F);
+ return fminf(max_angle + smooth, M_PI_2_F);
}
-ccl_device float3 bsdf_diffuse_toon_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_diffuse_toon_eval_reflect(const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ float *pdf)
{
- const ToonBsdf *bsdf = (const ToonBsdf*)sc;
- float max_angle = bsdf->size*M_PI_2_F;
- float smooth = bsdf->smooth*M_PI_2_F;
- float angle = safe_acosf(fmaxf(dot(bsdf->N, omega_in), 0.0f));
+ const ToonBsdf *bsdf = (const ToonBsdf *)sc;
+ float max_angle = bsdf->size * M_PI_2_F;
+ float smooth = bsdf->smooth * M_PI_2_F;
+ float angle = safe_acosf(fmaxf(dot(bsdf->N, omega_in), 0.0f));
- float3 eval = bsdf_toon_get_intensity(max_angle, smooth, angle);
+ float3 eval = bsdf_toon_get_intensity(max_angle, smooth, angle);
- if(eval.x > 0.0f) {
- float sample_angle = bsdf_toon_get_sample_angle(max_angle, smooth);
+ if (eval.x > 0.0f) {
+ float sample_angle = bsdf_toon_get_sample_angle(max_angle, smooth);
- *pdf = 0.5f * M_1_PI_F / (1.0f - cosf(sample_angle));
- return *pdf * eval;
- }
+ *pdf = 0.5f * M_1_PI_F / (1.0f - cosf(sample_angle));
+ return *pdf * eval;
+ }
- return make_float3(0.0f, 0.0f, 0.0f);
+ return make_float3(0.0f, 0.0f, 0.0f);
}
-ccl_device float3 bsdf_diffuse_toon_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_diffuse_toon_eval_transmit(const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ float *pdf)
{
- return make_float3(0.0f, 0.0f, 0.0f);
+ return make_float3(0.0f, 0.0f, 0.0f);
}
-ccl_device int bsdf_diffuse_toon_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
+ccl_device int bsdf_diffuse_toon_sample(const ShaderClosure *sc,
+ float3 Ng,
+ float3 I,
+ float3 dIdx,
+ float3 dIdy,
+ float randu,
+ float randv,
+ float3 *eval,
+ float3 *omega_in,
+ float3 *domega_in_dx,
+ float3 *domega_in_dy,
+ float *pdf)
{
- const ToonBsdf *bsdf = (const ToonBsdf*)sc;
- float max_angle = bsdf->size*M_PI_2_F;
- float smooth = bsdf->smooth*M_PI_2_F;
- float sample_angle = bsdf_toon_get_sample_angle(max_angle, smooth);
- float angle = sample_angle*randu;
+ const ToonBsdf *bsdf = (const ToonBsdf *)sc;
+ float max_angle = bsdf->size * M_PI_2_F;
+ float smooth = bsdf->smooth * M_PI_2_F;
+ float sample_angle = bsdf_toon_get_sample_angle(max_angle, smooth);
+ float angle = sample_angle * randu;
- if(sample_angle > 0.0f) {
- sample_uniform_cone(bsdf->N, sample_angle, randu, randv, omega_in, pdf);
+ if (sample_angle > 0.0f) {
+ sample_uniform_cone(bsdf->N, sample_angle, randu, randv, omega_in, pdf);
- if(dot(Ng, *omega_in) > 0.0f) {
- *eval = *pdf * bsdf_toon_get_intensity(max_angle, smooth, angle);
+ if (dot(Ng, *omega_in) > 0.0f) {
+ *eval = *pdf * bsdf_toon_get_intensity(max_angle, smooth, angle);
#ifdef __RAY_DIFFERENTIALS__
- // TODO: find a better approximation for the bounce
- *domega_in_dx = (2.0f * dot(bsdf->N, dIdx)) * bsdf->N - dIdx;
- *domega_in_dy = (2.0f * dot(bsdf->N, dIdy)) * bsdf->N - dIdy;
+ // TODO: find a better approximation for the bounce
+ *domega_in_dx = (2.0f * dot(bsdf->N, dIdx)) * bsdf->N - dIdx;
+ *domega_in_dy = (2.0f * dot(bsdf->N, dIdy)) * bsdf->N - dIdy;
#endif
- }
- else
- *pdf = 0.0f;
- }
-
- return LABEL_REFLECT | LABEL_DIFFUSE;
+ }
+ else
+ *pdf = 0.0f;
+ }
+ return LABEL_REFLECT | LABEL_DIFFUSE;
}
/* GLOSSY TOON */
ccl_device int bsdf_glossy_toon_setup(ToonBsdf *bsdf)
{
- bsdf->type = CLOSURE_BSDF_GLOSSY_TOON_ID;
- bsdf->size = saturate(bsdf->size);
- bsdf->smooth = saturate(bsdf->smooth);
+ bsdf->type = CLOSURE_BSDF_GLOSSY_TOON_ID;
+ bsdf->size = saturate(bsdf->size);
+ bsdf->smooth = saturate(bsdf->smooth);
- return SD_BSDF|SD_BSDF_HAS_EVAL;
+ return SD_BSDF | SD_BSDF_HAS_EVAL;
}
-ccl_device float3 bsdf_glossy_toon_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_glossy_toon_eval_reflect(const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ float *pdf)
{
- const ToonBsdf *bsdf = (const ToonBsdf*)sc;
- float max_angle = bsdf->size*M_PI_2_F;
- float smooth = bsdf->smooth*M_PI_2_F;
- float cosNI = dot(bsdf->N, omega_in);
- float cosNO = dot(bsdf->N, I);
+ const ToonBsdf *bsdf = (const ToonBsdf *)sc;
+ float max_angle = bsdf->size * M_PI_2_F;
+ float smooth = bsdf->smooth * M_PI_2_F;
+ float cosNI = dot(bsdf->N, omega_in);
+ float cosNO = dot(bsdf->N, I);
- if(cosNI > 0 && cosNO > 0) {
- /* reflect the view vector */
- float3 R = (2 * cosNO) * bsdf->N - I;
- float cosRI = dot(R, omega_in);
+ if (cosNI > 0 && cosNO > 0) {
+ /* reflect the view vector */
+ float3 R = (2 * cosNO) * bsdf->N - I;
+ float cosRI = dot(R, omega_in);
- float angle = safe_acosf(fmaxf(cosRI, 0.0f));
+ float angle = safe_acosf(fmaxf(cosRI, 0.0f));
- float3 eval = bsdf_toon_get_intensity(max_angle, smooth, angle);
- float sample_angle = bsdf_toon_get_sample_angle(max_angle, smooth);
+ float3 eval = bsdf_toon_get_intensity(max_angle, smooth, angle);
+ float sample_angle = bsdf_toon_get_sample_angle(max_angle, smooth);
- *pdf = 0.5f * M_1_PI_F / (1.0f - cosf(sample_angle));
- return *pdf * eval;
- }
+ *pdf = 0.5f * M_1_PI_F / (1.0f - cosf(sample_angle));
+ return *pdf * eval;
+ }
- return make_float3(0.0f, 0.0f, 0.0f);
+ return make_float3(0.0f, 0.0f, 0.0f);
}
-ccl_device float3 bsdf_glossy_toon_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_glossy_toon_eval_transmit(const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ float *pdf)
{
- return make_float3(0.0f, 0.0f, 0.0f);
+ return make_float3(0.0f, 0.0f, 0.0f);
}
-ccl_device int bsdf_glossy_toon_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
+ccl_device int bsdf_glossy_toon_sample(const ShaderClosure *sc,
+ float3 Ng,
+ float3 I,
+ float3 dIdx,
+ float3 dIdy,
+ float randu,
+ float randv,
+ float3 *eval,
+ float3 *omega_in,
+ float3 *domega_in_dx,
+ float3 *domega_in_dy,
+ float *pdf)
{
- const ToonBsdf *bsdf = (const ToonBsdf*)sc;
- float max_angle = bsdf->size*M_PI_2_F;
- float smooth = bsdf->smooth*M_PI_2_F;
- float cosNO = dot(bsdf->N, I);
+ const ToonBsdf *bsdf = (const ToonBsdf *)sc;
+ float max_angle = bsdf->size * M_PI_2_F;
+ float smooth = bsdf->smooth * M_PI_2_F;
+ float cosNO = dot(bsdf->N, I);
- if(cosNO > 0) {
- /* reflect the view vector */
- float3 R = (2 * cosNO) * bsdf->N - I;
+ if (cosNO > 0) {
+ /* reflect the view vector */
+ float3 R = (2 * cosNO) * bsdf->N - I;
- float sample_angle = bsdf_toon_get_sample_angle(max_angle, smooth);
- float angle = sample_angle*randu;
+ float sample_angle = bsdf_toon_get_sample_angle(max_angle, smooth);
+ float angle = sample_angle * randu;
- sample_uniform_cone(R, sample_angle, randu, randv, omega_in, pdf);
+ sample_uniform_cone(R, sample_angle, randu, randv, omega_in, pdf);
- if(dot(Ng, *omega_in) > 0.0f) {
- float cosNI = dot(bsdf->N, *omega_in);
+ if (dot(Ng, *omega_in) > 0.0f) {
+ float cosNI = dot(bsdf->N, *omega_in);
- /* make sure the direction we chose is still in the right hemisphere */
- if(cosNI > 0) {
- *eval = *pdf * bsdf_toon_get_intensity(max_angle, smooth, angle);
+ /* make sure the direction we chose is still in the right hemisphere */
+ if (cosNI > 0) {
+ *eval = *pdf * bsdf_toon_get_intensity(max_angle, smooth, angle);
#ifdef __RAY_DIFFERENTIALS__
- *domega_in_dx = (2 * dot(bsdf->N, dIdx)) * bsdf->N - dIdx;
- *domega_in_dy = (2 * dot(bsdf->N, dIdy)) * bsdf->N - dIdy;
+ *domega_in_dx = (2 * dot(bsdf->N, dIdx)) * bsdf->N - dIdx;
+ *domega_in_dy = (2 * dot(bsdf->N, dIdy)) * bsdf->N - dIdy;
#endif
- }
- else
- *pdf = 0.0f;
- }
- else
- *pdf = 0.0f;
- }
-
- return LABEL_GLOSSY | LABEL_REFLECT;
+ }
+ else
+ *pdf = 0.0f;
+ }
+ else
+ *pdf = 0.0f;
+ }
+
+ return LABEL_GLOSSY | LABEL_REFLECT;
}
CCL_NAMESPACE_END
-#endif /* __BSDF_TOON_H__ */
+#endif /* __BSDF_TOON_H__ */
diff --git a/intern/cycles/kernel/closure/bsdf_transparent.h b/intern/cycles/kernel/closure/bsdf_transparent.h
index 060dff69f52..4e5513499e8 100644
--- a/intern/cycles/kernel/closure/bsdf_transparent.h
+++ b/intern/cycles/kernel/closure/bsdf_transparent.h
@@ -37,73 +37,91 @@ CCL_NAMESPACE_BEGIN
ccl_device void bsdf_transparent_setup(ShaderData *sd, const float3 weight, int path_flag)
{
- /* Check cutoff weight. */
- float sample_weight = fabsf(average(weight));
- if(!(sample_weight >= CLOSURE_WEIGHT_CUTOFF)) {
- return;
- }
+ /* Check cutoff weight. */
+ float sample_weight = fabsf(average(weight));
+ if (!(sample_weight >= CLOSURE_WEIGHT_CUTOFF)) {
+ return;
+ }
- if(sd->flag & SD_TRANSPARENT) {
- sd->closure_transparent_extinction += weight;
+ if (sd->flag & SD_TRANSPARENT) {
+ sd->closure_transparent_extinction += weight;
- /* Add weight to existing transparent BSDF. */
- for(int i = 0; i < sd->num_closure; i++) {
- ShaderClosure *sc = &sd->closure[i];
+ /* Add weight to existing transparent BSDF. */
+ for (int i = 0; i < sd->num_closure; i++) {
+ ShaderClosure *sc = &sd->closure[i];
- if(sc->type == CLOSURE_BSDF_TRANSPARENT_ID) {
- sc->weight += weight;
- sc->sample_weight += sample_weight;
- break;
- }
- }
- }
- else {
- sd->flag |= SD_BSDF|SD_TRANSPARENT;
- sd->closure_transparent_extinction = weight;
+ if (sc->type == CLOSURE_BSDF_TRANSPARENT_ID) {
+ sc->weight += weight;
+ sc->sample_weight += sample_weight;
+ break;
+ }
+ }
+ }
+ else {
+ sd->flag |= SD_BSDF | SD_TRANSPARENT;
+ sd->closure_transparent_extinction = weight;
- if(path_flag & PATH_RAY_TERMINATE) {
- /* In this case the number of closures is set to zero to disable
- * all others, but we still want to get transparency so increase
- * the number just for this. */
- sd->num_closure_left = 1;
- }
+ if (path_flag & PATH_RAY_TERMINATE) {
+ /* In this case the number of closures is set to zero to disable
+ * all others, but we still want to get transparency so increase
+ * the number just for this. */
+ sd->num_closure_left = 1;
+ }
- /* Create new transparent BSDF. */
- ShaderClosure *bsdf = closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_BSDF_TRANSPARENT_ID, weight);
+ /* Create new transparent BSDF. */
+ ShaderClosure *bsdf = closure_alloc(
+ sd, sizeof(ShaderClosure), CLOSURE_BSDF_TRANSPARENT_ID, weight);
- if(bsdf) {
- bsdf->sample_weight = sample_weight;
- bsdf->N = sd->N;
- }
- else if(path_flag & PATH_RAY_TERMINATE) {
- sd->num_closure_left = 0;
- }
- }
+ if (bsdf) {
+ bsdf->sample_weight = sample_weight;
+ bsdf->N = sd->N;
+ }
+ else if (path_flag & PATH_RAY_TERMINATE) {
+ sd->num_closure_left = 0;
+ }
+ }
}
-ccl_device float3 bsdf_transparent_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_transparent_eval_reflect(const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ float *pdf)
{
- return make_float3(0.0f, 0.0f, 0.0f);
+ return make_float3(0.0f, 0.0f, 0.0f);
}
-ccl_device float3 bsdf_transparent_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_transparent_eval_transmit(const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ float *pdf)
{
- return make_float3(0.0f, 0.0f, 0.0f);
+ return make_float3(0.0f, 0.0f, 0.0f);
}
-ccl_device int bsdf_transparent_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
+ccl_device int bsdf_transparent_sample(const ShaderClosure *sc,
+ float3 Ng,
+ float3 I,
+ float3 dIdx,
+ float3 dIdy,
+ float randu,
+ float randv,
+ float3 *eval,
+ float3 *omega_in,
+ float3 *domega_in_dx,
+ float3 *domega_in_dy,
+ float *pdf)
{
- // only one direction is possible
- *omega_in = -I;
+ // only one direction is possible
+ *omega_in = -I;
#ifdef __RAY_DIFFERENTIALS__
- *domega_in_dx = -dIdx;
- *domega_in_dy = -dIdy;
+ *domega_in_dx = -dIdx;
+ *domega_in_dy = -dIdy;
#endif
- *pdf = 1;
- *eval = make_float3(1, 1, 1);
- return LABEL_TRANSMIT|LABEL_TRANSPARENT;
+ *pdf = 1;
+ *eval = make_float3(1, 1, 1);
+ return LABEL_TRANSMIT | LABEL_TRANSPARENT;
}
CCL_NAMESPACE_END
-#endif /* __BSDF_TRANSPARENT_H__ */
+#endif /* __BSDF_TRANSPARENT_H__ */
diff --git a/intern/cycles/kernel/closure/bsdf_util.h b/intern/cycles/kernel/closure/bsdf_util.h
index 4f3453675c7..a9a27edd7de 100644
--- a/intern/cycles/kernel/closure/bsdf_util.h
+++ b/intern/cycles/kernel/closure/bsdf_util.h
@@ -35,127 +35,134 @@
CCL_NAMESPACE_BEGIN
-ccl_device float fresnel_dielectric(
- float eta, const float3 N,
- const float3 I, float3 *R, float3 *T,
+ccl_device float fresnel_dielectric(float eta,
+ const float3 N,
+ const float3 I,
+ float3 *R,
+ float3 *T,
#ifdef __RAY_DIFFERENTIALS__
- const float3 dIdx, const float3 dIdy,
- float3 *dRdx, float3 *dRdy,
- float3 *dTdx, float3 *dTdy,
+ const float3 dIdx,
+ const float3 dIdy,
+ float3 *dRdx,
+ float3 *dRdy,
+ float3 *dTdx,
+ float3 *dTdy,
#endif
- bool *is_inside)
+ bool *is_inside)
{
- float cos = dot(N, I), neta;
- float3 Nn;
+ float cos = dot(N, I), neta;
+ float3 Nn;
- // check which side of the surface we are on
- if(cos > 0) {
- // we are on the outside of the surface, going in
- neta = 1 / eta;
- Nn = N;
- *is_inside = false;
- }
- else {
- // we are inside the surface
- cos = -cos;
- neta = eta;
- Nn = -N;
- *is_inside = true;
- }
+ // check which side of the surface we are on
+ if (cos > 0) {
+ // we are on the outside of the surface, going in
+ neta = 1 / eta;
+ Nn = N;
+ *is_inside = false;
+ }
+ else {
+ // we are inside the surface
+ cos = -cos;
+ neta = eta;
+ Nn = -N;
+ *is_inside = true;
+ }
- // compute reflection
- *R = (2 * cos)* Nn - I;
+ // compute reflection
+ *R = (2 * cos) * Nn - I;
#ifdef __RAY_DIFFERENTIALS__
- *dRdx = (2 * dot(Nn, dIdx)) * Nn - dIdx;
- *dRdy = (2 * dot(Nn, dIdy)) * Nn - dIdy;
+ *dRdx = (2 * dot(Nn, dIdx)) * Nn - dIdx;
+ *dRdy = (2 * dot(Nn, dIdy)) * Nn - dIdy;
#endif
- float arg = 1 -(neta * neta *(1 -(cos * cos)));
- if(arg < 0) {
- *T = make_float3(0.0f, 0.0f, 0.0f);
+ float arg = 1 - (neta * neta * (1 - (cos * cos)));
+ if (arg < 0) {
+ *T = make_float3(0.0f, 0.0f, 0.0f);
#ifdef __RAY_DIFFERENTIALS__
- *dTdx = make_float3(0.0f, 0.0f, 0.0f);
- *dTdy = make_float3(0.0f, 0.0f, 0.0f);
+ *dTdx = make_float3(0.0f, 0.0f, 0.0f);
+ *dTdy = make_float3(0.0f, 0.0f, 0.0f);
#endif
- return 1; // total internal reflection
- }
- else {
- float dnp = max(sqrtf(arg), 1e-7f);
- float nK = (neta * cos)- dnp;
- *T = -(neta * I)+(nK * Nn);
+ return 1; // total internal reflection
+ }
+ else {
+ float dnp = max(sqrtf(arg), 1e-7f);
+ float nK = (neta * cos) - dnp;
+ *T = -(neta * I) + (nK * Nn);
#ifdef __RAY_DIFFERENTIALS__
- *dTdx = -(neta * dIdx) + ((neta - neta * neta * cos / dnp) * dot(dIdx, Nn)) * Nn;
- *dTdy = -(neta * dIdy) + ((neta - neta * neta * cos / dnp) * dot(dIdy, Nn)) * Nn;
+ *dTdx = -(neta * dIdx) + ((neta - neta * neta * cos / dnp) * dot(dIdx, Nn)) * Nn;
+ *dTdy = -(neta * dIdy) + ((neta - neta * neta * cos / dnp) * dot(dIdy, Nn)) * Nn;
#endif
- // compute Fresnel terms
- float cosTheta1 = cos; // N.R
- float cosTheta2 = -dot(Nn, *T);
- float pPara = (cosTheta1 - eta * cosTheta2)/(cosTheta1 + eta * cosTheta2);
- float pPerp = (eta * cosTheta1 - cosTheta2)/(eta * cosTheta1 + cosTheta2);
- return 0.5f * (pPara * pPara + pPerp * pPerp);
- }
+ // compute Fresnel terms
+ float cosTheta1 = cos; // N.R
+ float cosTheta2 = -dot(Nn, *T);
+ float pPara = (cosTheta1 - eta * cosTheta2) / (cosTheta1 + eta * cosTheta2);
+ float pPerp = (eta * cosTheta1 - cosTheta2) / (eta * cosTheta1 + cosTheta2);
+ return 0.5f * (pPara * pPara + pPerp * pPerp);
+ }
}
ccl_device float fresnel_dielectric_cos(float cosi, float eta)
{
- // compute fresnel reflectance without explicitly computing
- // the refracted direction
- float c = fabsf(cosi);
- float g = eta * eta - 1 + c * c;
- if(g > 0) {
- g = sqrtf(g);
- float A = (g - c)/(g + c);
- float B = (c *(g + c)- 1)/(c *(g - c)+ 1);
- return 0.5f * A * A *(1 + B * B);
- }
- return 1.0f; // TIR(no refracted component)
+ // compute fresnel reflectance without explicitly computing
+ // the refracted direction
+ float c = fabsf(cosi);
+ float g = eta * eta - 1 + c * c;
+ if (g > 0) {
+ g = sqrtf(g);
+ float A = (g - c) / (g + c);
+ float B = (c * (g + c) - 1) / (c * (g - c) + 1);
+ return 0.5f * A * A * (1 + B * B);
+ }
+ return 1.0f; // TIR(no refracted component)
}
ccl_device float3 fresnel_conductor(float cosi, const float3 eta, const float3 k)
{
- float3 cosi2 = make_float3(cosi*cosi, cosi*cosi, cosi*cosi);
- float3 one = make_float3(1.0f, 1.0f, 1.0f);
- float3 tmp_f = eta * eta + k * k;
- float3 tmp = tmp_f * cosi2;
- float3 Rparl2 = (tmp - (2.0f * eta * cosi) + one) /
- (tmp + (2.0f * eta * cosi) + one);
- float3 Rperp2 = (tmp_f - (2.0f * eta * cosi) + cosi2) /
- (tmp_f + (2.0f * eta * cosi) + cosi2);
- return(Rparl2 + Rperp2) * 0.5f;
+ float3 cosi2 = make_float3(cosi * cosi, cosi * cosi, cosi * cosi);
+ float3 one = make_float3(1.0f, 1.0f, 1.0f);
+ float3 tmp_f = eta * eta + k * k;
+ float3 tmp = tmp_f * cosi2;
+ float3 Rparl2 = (tmp - (2.0f * eta * cosi) + one) / (tmp + (2.0f * eta * cosi) + one);
+ float3 Rperp2 = (tmp_f - (2.0f * eta * cosi) + cosi2) / (tmp_f + (2.0f * eta * cosi) + cosi2);
+ return (Rparl2 + Rperp2) * 0.5f;
}
ccl_device float schlick_fresnel(float u)
{
- float m = clamp(1.0f - u, 0.0f, 1.0f);
- float m2 = m * m;
- return m2 * m2 * m; // pow(m, 5)
+ float m = clamp(1.0f - u, 0.0f, 1.0f);
+ float m2 = m * m;
+ return m2 * m2 * m; // pow(m, 5)
}
ccl_device float smooth_step(float edge0, float edge1, float x)
{
- float result;
- if(x < edge0) result = 0.0f;
- else if(x >= edge1) result = 1.0f;
- else {
- float t = (x - edge0)/(edge1 - edge0);
- result = (3.0f-2.0f*t)*(t*t);
- }
- return result;
+ float result;
+ if (x < edge0)
+ result = 0.0f;
+ else if (x >= edge1)
+ result = 1.0f;
+ else {
+ float t = (x - edge0) / (edge1 - edge0);
+ result = (3.0f - 2.0f * t) * (t * t);
+ }
+ return result;
}
/* Calculate the fresnel color which is a blend between white and the F0 color (cspec0) */
-ccl_device_forceinline float3 interpolate_fresnel_color(float3 L, float3 H, float ior, float F0, float3 cspec0) {
- /* Calculate the fresnel interpolation factor
- * The value from fresnel_dielectric_cos(...) has to be normalized because
- * the cspec0 keeps the F0 color
- */
- float F0_norm = 1.0f / (1.0f - F0);
- float FH = (fresnel_dielectric_cos(dot(L, H), ior) - F0) * F0_norm;
+ccl_device_forceinline float3
+interpolate_fresnel_color(float3 L, float3 H, float ior, float F0, float3 cspec0)
+{
+ /* Calculate the fresnel interpolation factor
+ * The value from fresnel_dielectric_cos(...) has to be normalized because
+ * the cspec0 keeps the F0 color
+ */
+ float F0_norm = 1.0f / (1.0f - F0);
+ float FH = (fresnel_dielectric_cos(dot(L, H), ior) - F0) * F0_norm;
- /* Blend between white and a specular color with respect to the fresnel */
- return cspec0 * (1.0f - FH) + make_float3(1.0f, 1.0f, 1.0f) * FH;
+ /* Blend between white and a specular color with respect to the fresnel */
+ return cspec0 * (1.0f - FH) + make_float3(1.0f, 1.0f, 1.0f) * FH;
}
CCL_NAMESPACE_END
-#endif /* __BSDF_UTIL_H__ */
+#endif /* __BSDF_UTIL_H__ */
diff --git a/intern/cycles/kernel/closure/bssrdf.h b/intern/cycles/kernel/closure/bssrdf.h
index 98c7f23c288..57804eca269 100644
--- a/intern/cycles/kernel/closure/bssrdf.h
+++ b/intern/cycles/kernel/closure/bssrdf.h
@@ -20,14 +20,14 @@
CCL_NAMESPACE_BEGIN
typedef ccl_addr_space struct Bssrdf {
- SHADER_CLOSURE_BASE;
-
- float3 radius;
- float3 albedo;
- float sharpness;
- float texture_blur;
- float roughness;
- float channels;
+ SHADER_CLOSURE_BASE;
+
+ float3 radius;
+ float3 albedo;
+ float sharpness;
+ float texture_blur;
+ float roughness;
+ float channels;
} Bssrdf;
/* Planar Truncated Gaussian
@@ -41,41 +41,41 @@ typedef ccl_addr_space struct Bssrdf {
ccl_device float bssrdf_gaussian_eval(const float radius, float r)
{
- /* integrate (2*pi*r * exp(-r*r/(2*v)))/(2*pi*v)) from 0 to Rm
- * = 1 - exp(-Rm*Rm/(2*v)) */
- const float v = radius*radius*(0.25f*0.25f);
- const float Rm = sqrtf(v*GAUSS_TRUNCATE);
+ /* integrate (2*pi*r * exp(-r*r/(2*v)))/(2*pi*v)) from 0 to Rm
+ * = 1 - exp(-Rm*Rm/(2*v)) */
+ const float v = radius * radius * (0.25f * 0.25f);
+ const float Rm = sqrtf(v * GAUSS_TRUNCATE);
- if(r >= Rm)
- return 0.0f;
+ if (r >= Rm)
+ return 0.0f;
- return expf(-r*r/(2.0f*v))/(2.0f*M_PI_F*v);
+ return expf(-r * r / (2.0f * v)) / (2.0f * M_PI_F * v);
}
ccl_device float bssrdf_gaussian_pdf(const float radius, float r)
{
- /* 1.0 - expf(-Rm*Rm/(2*v)) simplified */
- const float area_truncated = 1.0f - expf(-0.5f*GAUSS_TRUNCATE);
+ /* 1.0 - expf(-Rm*Rm/(2*v)) simplified */
+ const float area_truncated = 1.0f - expf(-0.5f * GAUSS_TRUNCATE);
- return bssrdf_gaussian_eval(radius, r) * (1.0f/(area_truncated));
+ return bssrdf_gaussian_eval(radius, r) * (1.0f / (area_truncated));
}
ccl_device void bssrdf_gaussian_sample(const float radius, float xi, float *r, float *h)
{
- /* xi = integrate (2*pi*r * exp(-r*r/(2*v)))/(2*pi*v)) = -exp(-r^2/(2*v))
- * r = sqrt(-2*v*logf(xi)) */
- const float v = radius*radius*(0.25f*0.25f);
- const float Rm = sqrtf(v*GAUSS_TRUNCATE);
+ /* xi = integrate (2*pi*r * exp(-r*r/(2*v)))/(2*pi*v)) = -exp(-r^2/(2*v))
+ * r = sqrt(-2*v*logf(xi)) */
+ const float v = radius * radius * (0.25f * 0.25f);
+ const float Rm = sqrtf(v * GAUSS_TRUNCATE);
- /* 1.0 - expf(-Rm*Rm/(2*v)) simplified */
- const float area_truncated = 1.0f - expf(-0.5f*GAUSS_TRUNCATE);
+ /* 1.0 - expf(-Rm*Rm/(2*v)) simplified */
+ const float area_truncated = 1.0f - expf(-0.5f * GAUSS_TRUNCATE);
- /* r(xi) */
- const float r_squared = -2.0f*v*logf(1.0f - xi*area_truncated);
- *r = sqrtf(r_squared);
+ /* r(xi) */
+ const float r_squared = -2.0f * v * logf(1.0f - xi * area_truncated);
+ *r = sqrtf(r_squared);
- /* h^2 + r^2 = Rm^2 */
- *h = safe_sqrtf(Rm*Rm - r_squared);
+ /* h^2 + r^2 = Rm^2 */
+ *h = safe_sqrtf(Rm * Rm - r_squared);
}
/* Planar Cubic BSSRDF falloff
@@ -87,97 +87,97 @@ ccl_device void bssrdf_gaussian_sample(const float radius, float xi, float *r, f
ccl_device float bssrdf_cubic_eval(const float radius, const float sharpness, float r)
{
- if(sharpness == 0.0f) {
- const float Rm = radius;
-
- if(r >= Rm)
- return 0.0f;
-
- /* integrate (2*pi*r * 10*(R - r)^3)/(pi * R^5) from 0 to R = 1 */
- const float Rm5 = (Rm*Rm) * (Rm*Rm) * Rm;
- const float f = Rm - r;
- const float num = f*f*f;
-
- return (10.0f * num) / (Rm5 * M_PI_F);
-
- }
- else {
- float Rm = radius*(1.0f + sharpness);
-
- if(r >= Rm)
- return 0.0f;
-
- /* custom variation with extra sharpness, to match the previous code */
- const float y = 1.0f/(1.0f + sharpness);
- float Rmy, ry, ryinv;
-
- if(sharpness == 1.0f) {
- Rmy = sqrtf(Rm);
- ry = sqrtf(r);
- ryinv = (ry > 0.0f)? 1.0f/ry: 0.0f;
- }
- else {
- Rmy = powf(Rm, y);
- ry = powf(r, y);
- ryinv = (r > 0.0f)? powf(r, y - 1.0f): 0.0f;
- }
-
- const float Rmy5 = (Rmy*Rmy) * (Rmy*Rmy) * Rmy;
- const float f = Rmy - ry;
- const float num = f*(f*f)*(y*ryinv);
-
- return (10.0f * num) / (Rmy5 * M_PI_F);
- }
+ if (sharpness == 0.0f) {
+ const float Rm = radius;
+
+ if (r >= Rm)
+ return 0.0f;
+
+ /* integrate (2*pi*r * 10*(R - r)^3)/(pi * R^5) from 0 to R = 1 */
+ const float Rm5 = (Rm * Rm) * (Rm * Rm) * Rm;
+ const float f = Rm - r;
+ const float num = f * f * f;
+
+ return (10.0f * num) / (Rm5 * M_PI_F);
+ }
+ else {
+ float Rm = radius * (1.0f + sharpness);
+
+ if (r >= Rm)
+ return 0.0f;
+
+ /* custom variation with extra sharpness, to match the previous code */
+ const float y = 1.0f / (1.0f + sharpness);
+ float Rmy, ry, ryinv;
+
+ if (sharpness == 1.0f) {
+ Rmy = sqrtf(Rm);
+ ry = sqrtf(r);
+ ryinv = (ry > 0.0f) ? 1.0f / ry : 0.0f;
+ }
+ else {
+ Rmy = powf(Rm, y);
+ ry = powf(r, y);
+ ryinv = (r > 0.0f) ? powf(r, y - 1.0f) : 0.0f;
+ }
+
+ const float Rmy5 = (Rmy * Rmy) * (Rmy * Rmy) * Rmy;
+ const float f = Rmy - ry;
+ const float num = f * (f * f) * (y * ryinv);
+
+ return (10.0f * num) / (Rmy5 * M_PI_F);
+ }
}
ccl_device float bssrdf_cubic_pdf(const float radius, const float sharpness, float r)
{
- return bssrdf_cubic_eval(radius, sharpness, r);
+ return bssrdf_cubic_eval(radius, sharpness, r);
}
/* solve 10x^2 - 20x^3 + 15x^4 - 4x^5 - xi == 0 */
ccl_device_forceinline float bssrdf_cubic_quintic_root_find(float xi)
{
- /* newton-raphson iteration, usually succeeds in 2-4 iterations, except
- * outside 0.02 ... 0.98 where it can go up to 10, so overall performance
- * should not be too bad */
- const float tolerance = 1e-6f;
- const int max_iteration_count = 10;
- float x = 0.25f;
- int i;
+ /* newton-raphson iteration, usually succeeds in 2-4 iterations, except
+ * outside 0.02 ... 0.98 where it can go up to 10, so overall performance
+ * should not be too bad */
+ const float tolerance = 1e-6f;
+ const int max_iteration_count = 10;
+ float x = 0.25f;
+ int i;
- for(i = 0; i < max_iteration_count; i++) {
- float x2 = x*x;
- float x3 = x2*x;
- float nx = (1.0f - x);
+ for (i = 0; i < max_iteration_count; i++) {
+ float x2 = x * x;
+ float x3 = x2 * x;
+ float nx = (1.0f - x);
- float f = 10.0f*x2 - 20.0f*x3 + 15.0f*x2*x2 - 4.0f*x2*x3 - xi;
- float f_ = 20.0f*(x*nx)*(nx*nx);
+ float f = 10.0f * x2 - 20.0f * x3 + 15.0f * x2 * x2 - 4.0f * x2 * x3 - xi;
+ float f_ = 20.0f * (x * nx) * (nx * nx);
- if(fabsf(f) < tolerance || f_ == 0.0f)
- break;
+ if (fabsf(f) < tolerance || f_ == 0.0f)
+ break;
- x = saturate(x - f/f_);
- }
+ x = saturate(x - f / f_);
+ }
- return x;
+ return x;
}
-ccl_device void bssrdf_cubic_sample(const float radius, const float sharpness, float xi, float *r, float *h)
+ccl_device void bssrdf_cubic_sample(
+ const float radius, const float sharpness, float xi, float *r, float *h)
{
- float Rm = radius;
- float r_ = bssrdf_cubic_quintic_root_find(xi);
+ float Rm = radius;
+ float r_ = bssrdf_cubic_quintic_root_find(xi);
- if(sharpness != 0.0f) {
- r_ = powf(r_, 1.0f + sharpness);
- Rm *= (1.0f + sharpness);
- }
+ if (sharpness != 0.0f) {
+ r_ = powf(r_, 1.0f + sharpness);
+ Rm *= (1.0f + sharpness);
+ }
- r_ *= Rm;
- *r = r_;
+ r_ *= Rm;
+ *r = r_;
- /* h^2 + r^2 = Rm^2 */
- *h = safe_sqrtf(Rm*Rm - r_*r_);
+ /* h^2 + r^2 = Rm^2 */
+ *h = safe_sqrtf(Rm * Rm - r_ * r_);
}
/* Approximate Reflectance Profiles
@@ -188,13 +188,13 @@ ccl_device void bssrdf_cubic_sample(const float radius, const float sharpness, f
* the mean free length, but still not too big so sampling is still
* effective. Might need some further tweaks.
*/
-#define BURLEY_TRUNCATE 16.0f
-#define BURLEY_TRUNCATE_CDF 0.9963790093708328f // cdf(BURLEY_TRUNCATE)
+#define BURLEY_TRUNCATE 16.0f
+#define BURLEY_TRUNCATE_CDF 0.9963790093708328f // cdf(BURLEY_TRUNCATE)
ccl_device_inline float bssrdf_burley_fitting(float A)
{
- /* Diffuse surface transmission, equation (6). */
- return 1.9f - A + 3.5f * (A - 0.8f) * (A - 0.8f);
+ /* Diffuse surface transmission, equation (6). */
+ return 1.9f - A + 3.5f * (A - 0.8f) * (A - 0.8f);
}
/* Scale mean free path length so it gives similar looking result
@@ -202,45 +202,44 @@ ccl_device_inline float bssrdf_burley_fitting(float A)
*/
ccl_device_inline float3 bssrdf_burley_compatible_mfp(float3 r)
{
- return 0.25f * M_1_PI_F * r;
+ return 0.25f * M_1_PI_F * r;
}
ccl_device void bssrdf_burley_setup(Bssrdf *bssrdf)
{
- /* Mean free path length. */
- const float3 l = bssrdf_burley_compatible_mfp(bssrdf->radius);
- /* Surface albedo. */
- const float3 A = bssrdf->albedo;
- const float3 s = make_float3(bssrdf_burley_fitting(A.x),
- bssrdf_burley_fitting(A.y),
- bssrdf_burley_fitting(A.z));
-
- bssrdf->radius = l / s;
+ /* Mean free path length. */
+ const float3 l = bssrdf_burley_compatible_mfp(bssrdf->radius);
+ /* Surface albedo. */
+ const float3 A = bssrdf->albedo;
+ const float3 s = make_float3(
+ bssrdf_burley_fitting(A.x), bssrdf_burley_fitting(A.y), bssrdf_burley_fitting(A.z));
+
+ bssrdf->radius = l / s;
}
ccl_device float bssrdf_burley_eval(const float d, float r)
{
- const float Rm = BURLEY_TRUNCATE * d;
-
- if(r >= Rm)
- return 0.0f;
-
- /* Burley refletance profile, equation (3).
- *
- * NOTES:
- * - Surface albedo is already included into sc->weight, no need to
- * multiply by this term here.
- * - This is normalized diffuse model, so the equation is mutliplied
- * by 2*pi, which also matches cdf().
- */
- float exp_r_3_d = expf(-r / (3.0f * d));
- float exp_r_d = exp_r_3_d * exp_r_3_d * exp_r_3_d;
- return (exp_r_d + exp_r_3_d) / (4.0f*d);
+ const float Rm = BURLEY_TRUNCATE * d;
+
+ if (r >= Rm)
+ return 0.0f;
+
+ /* Burley refletance profile, equation (3).
+ *
+ * NOTES:
+ * - Surface albedo is already included into sc->weight, no need to
+ * multiply by this term here.
+ * - This is normalized diffuse model, so the equation is mutliplied
+ * by 2*pi, which also matches cdf().
+ */
+ float exp_r_3_d = expf(-r / (3.0f * d));
+ float exp_r_d = exp_r_3_d * exp_r_3_d * exp_r_3_d;
+ return (exp_r_d + exp_r_3_d) / (4.0f * d);
}
ccl_device float bssrdf_burley_pdf(const float d, float r)
{
- return bssrdf_burley_eval(d, r) * (1.0f/BURLEY_TRUNCATE_CDF);
+ return bssrdf_burley_eval(d, r) * (1.0f / BURLEY_TRUNCATE_CDF);
}
/* Find the radius for desired CDF value.
@@ -249,52 +248,49 @@ ccl_device float bssrdf_burley_pdf(const float d, float r)
*/
ccl_device_forceinline float bssrdf_burley_root_find(float xi)
{
- const float tolerance = 1e-6f;
- const int max_iteration_count = 10;
- /* Do initial guess based on manual curve fitting, this allows us to reduce
- * number of iterations to maximum 4 across the [0..1] range. We keep maximum
- * number of iteration higher just to be sure we didn't miss root in some
- * corner case.
- */
- float r;
- if(xi <= 0.9f) {
- r = expf(xi * xi * 2.4f) - 1.0f;
- }
- else {
- /* TODO(sergey): Some nicer curve fit is possible here. */
- r = 15.0f;
- }
- /* Solve against scaled radius. */
- for(int i = 0; i < max_iteration_count; i++) {
- float exp_r_3 = expf(-r / 3.0f);
- float exp_r = exp_r_3 * exp_r_3 * exp_r_3;
- float f = 1.0f - 0.25f * exp_r - 0.75f * exp_r_3 - xi;
- float f_ = 0.25f * exp_r + 0.25f * exp_r_3;
-
- if(fabsf(f) < tolerance || f_ == 0.0f) {
- break;
- }
-
- r = r - f/f_;
- if(r < 0.0f) {
- r = 0.0f;
- }
- }
- return r;
+ const float tolerance = 1e-6f;
+ const int max_iteration_count = 10;
+ /* Do initial guess based on manual curve fitting, this allows us to reduce
+ * number of iterations to maximum 4 across the [0..1] range. We keep maximum
+ * number of iteration higher just to be sure we didn't miss root in some
+ * corner case.
+ */
+ float r;
+ if (xi <= 0.9f) {
+ r = expf(xi * xi * 2.4f) - 1.0f;
+ }
+ else {
+ /* TODO(sergey): Some nicer curve fit is possible here. */
+ r = 15.0f;
+ }
+ /* Solve against scaled radius. */
+ for (int i = 0; i < max_iteration_count; i++) {
+ float exp_r_3 = expf(-r / 3.0f);
+ float exp_r = exp_r_3 * exp_r_3 * exp_r_3;
+ float f = 1.0f - 0.25f * exp_r - 0.75f * exp_r_3 - xi;
+ float f_ = 0.25f * exp_r + 0.25f * exp_r_3;
+
+ if (fabsf(f) < tolerance || f_ == 0.0f) {
+ break;
+ }
+
+ r = r - f / f_;
+ if (r < 0.0f) {
+ r = 0.0f;
+ }
+ }
+ return r;
}
-ccl_device void bssrdf_burley_sample(const float d,
- float xi,
- float *r,
- float *h)
+ccl_device void bssrdf_burley_sample(const float d, float xi, float *r, float *h)
{
- const float Rm = BURLEY_TRUNCATE * d;
- const float r_ = bssrdf_burley_root_find(xi * BURLEY_TRUNCATE_CDF) * d;
+ const float Rm = BURLEY_TRUNCATE * d;
+ const float r_ = bssrdf_burley_root_find(xi * BURLEY_TRUNCATE_CDF) * d;
- *r = r_;
+ *r = r_;
- /* h^2 + r^2 = Rm^2 */
- *h = safe_sqrtf(Rm*Rm - r_*r_);
+ /* h^2 + r^2 = Rm^2 */
+ *h = safe_sqrtf(Rm * Rm - r_ * r_);
}
/* None BSSRDF falloff
@@ -303,200 +299,195 @@ ccl_device void bssrdf_burley_sample(const float d,
ccl_device float bssrdf_none_eval(const float radius, float r)
{
- const float Rm = radius;
- return (r < Rm)? 1.0f: 0.0f;
+ const float Rm = radius;
+ return (r < Rm) ? 1.0f : 0.0f;
}
ccl_device float bssrdf_none_pdf(const float radius, float r)
{
- /* integrate (2*pi*r)/(pi*Rm*Rm) from 0 to Rm = 1 */
- const float Rm = radius;
- const float area = (M_PI_F*Rm*Rm);
+ /* integrate (2*pi*r)/(pi*Rm*Rm) from 0 to Rm = 1 */
+ const float Rm = radius;
+ const float area = (M_PI_F * Rm * Rm);
- return bssrdf_none_eval(radius, r) / area;
+ return bssrdf_none_eval(radius, r) / area;
}
ccl_device void bssrdf_none_sample(const float radius, float xi, float *r, float *h)
{
- /* xi = integrate (2*pi*r)/(pi*Rm*Rm) = r^2/Rm^2
- * r = sqrt(xi)*Rm */
- const float Rm = radius;
- const float r_ = sqrtf(xi)*Rm;
+ /* xi = integrate (2*pi*r)/(pi*Rm*Rm) = r^2/Rm^2
+ * r = sqrt(xi)*Rm */
+ const float Rm = radius;
+ const float r_ = sqrtf(xi) * Rm;
- *r = r_;
+ *r = r_;
- /* h^2 + r^2 = Rm^2 */
- *h = safe_sqrtf(Rm*Rm - r_*r_);
+ /* h^2 + r^2 = Rm^2 */
+ *h = safe_sqrtf(Rm * Rm - r_ * r_);
}
/* Generic */
ccl_device_inline Bssrdf *bssrdf_alloc(ShaderData *sd, float3 weight)
{
- Bssrdf *bssrdf = (Bssrdf*)closure_alloc(sd, sizeof(Bssrdf), CLOSURE_NONE_ID, weight);
+ Bssrdf *bssrdf = (Bssrdf *)closure_alloc(sd, sizeof(Bssrdf), CLOSURE_NONE_ID, weight);
- if(bssrdf == NULL) {
- return NULL;
- }
+ if (bssrdf == NULL) {
+ return NULL;
+ }
- float sample_weight = fabsf(average(weight));
- bssrdf->sample_weight = sample_weight;
- return (sample_weight >= CLOSURE_WEIGHT_CUTOFF) ? bssrdf : NULL;
+ float sample_weight = fabsf(average(weight));
+ bssrdf->sample_weight = sample_weight;
+ return (sample_weight >= CLOSURE_WEIGHT_CUTOFF) ? bssrdf : NULL;
}
ccl_device int bssrdf_setup(ShaderData *sd, Bssrdf *bssrdf, ClosureType type)
{
- int flag = 0;
- int bssrdf_channels = 3;
- float3 diffuse_weight = make_float3(0.0f, 0.0f, 0.0f);
-
- /* Verify if the radii are large enough to sample without precision issues. */
- if(bssrdf->radius.x < BSSRDF_MIN_RADIUS) {
- diffuse_weight.x = bssrdf->weight.x;
- bssrdf->weight.x = 0.0f;
- bssrdf->radius.x = 0.0f;
- bssrdf_channels--;
- }
- if(bssrdf->radius.y < BSSRDF_MIN_RADIUS) {
- diffuse_weight.y = bssrdf->weight.y;
- bssrdf->weight.y = 0.0f;
- bssrdf->radius.y = 0.0f;
- bssrdf_channels--;
- }
- if(bssrdf->radius.z < BSSRDF_MIN_RADIUS) {
- diffuse_weight.z = bssrdf->weight.z;
- bssrdf->weight.z = 0.0f;
- bssrdf->radius.z = 0.0f;
- bssrdf_channels--;
- }
-
- if(bssrdf_channels < 3) {
- /* Add diffuse BSDF if any radius too small. */
+ int flag = 0;
+ int bssrdf_channels = 3;
+ float3 diffuse_weight = make_float3(0.0f, 0.0f, 0.0f);
+
+ /* Verify if the radii are large enough to sample without precision issues. */
+ if (bssrdf->radius.x < BSSRDF_MIN_RADIUS) {
+ diffuse_weight.x = bssrdf->weight.x;
+ bssrdf->weight.x = 0.0f;
+ bssrdf->radius.x = 0.0f;
+ bssrdf_channels--;
+ }
+ if (bssrdf->radius.y < BSSRDF_MIN_RADIUS) {
+ diffuse_weight.y = bssrdf->weight.y;
+ bssrdf->weight.y = 0.0f;
+ bssrdf->radius.y = 0.0f;
+ bssrdf_channels--;
+ }
+ if (bssrdf->radius.z < BSSRDF_MIN_RADIUS) {
+ diffuse_weight.z = bssrdf->weight.z;
+ bssrdf->weight.z = 0.0f;
+ bssrdf->radius.z = 0.0f;
+ bssrdf_channels--;
+ }
+
+ if (bssrdf_channels < 3) {
+ /* Add diffuse BSDF if any radius too small. */
#ifdef __PRINCIPLED__
- if(type == CLOSURE_BSSRDF_PRINCIPLED_ID ||
- type == CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID)
- {
- float roughness = bssrdf->roughness;
- float3 N = bssrdf->N;
-
- PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf*)bsdf_alloc(sd, sizeof(PrincipledDiffuseBsdf), diffuse_weight);
-
- if(bsdf) {
- bsdf->type = CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID;
- bsdf->N = N;
- bsdf->roughness = roughness;
- flag |= bsdf_principled_diffuse_setup(bsdf);
- }
- }
- else
-#endif /* __PRINCIPLED__ */
- {
- DiffuseBsdf *bsdf = (DiffuseBsdf*)bsdf_alloc(sd, sizeof(DiffuseBsdf), diffuse_weight);
-
- if(bsdf) {
- bsdf->type = CLOSURE_BSDF_BSSRDF_ID;
- bsdf->N = bssrdf->N;
- flag |= bsdf_diffuse_setup(bsdf);
- }
- }
- }
-
- /* Setup BSSRDF if radius is large enough. */
- if(bssrdf_channels > 0) {
- bssrdf->type = type;
- bssrdf->channels = bssrdf_channels;
- bssrdf->sample_weight = fabsf(average(bssrdf->weight)) * bssrdf->channels;
- bssrdf->texture_blur = saturate(bssrdf->texture_blur);
- bssrdf->sharpness = saturate(bssrdf->sharpness);
-
- if(type == CLOSURE_BSSRDF_BURLEY_ID ||
- type == CLOSURE_BSSRDF_PRINCIPLED_ID ||
- type == CLOSURE_BSSRDF_RANDOM_WALK_ID ||
- type == CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID)
- {
- bssrdf_burley_setup(bssrdf);
- }
-
- flag |= SD_BSSRDF;
- }
- else {
- bssrdf->type = type;
- bssrdf->sample_weight = 0.0f;
- }
-
- return flag;
+ if (type == CLOSURE_BSSRDF_PRINCIPLED_ID || type == CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID) {
+ float roughness = bssrdf->roughness;
+ float3 N = bssrdf->N;
+
+ PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf *)bsdf_alloc(
+ sd, sizeof(PrincipledDiffuseBsdf), diffuse_weight);
+
+ if (bsdf) {
+ bsdf->type = CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID;
+ bsdf->N = N;
+ bsdf->roughness = roughness;
+ flag |= bsdf_principled_diffuse_setup(bsdf);
+ }
+ }
+ else
+#endif /* __PRINCIPLED__ */
+ {
+ DiffuseBsdf *bsdf = (DiffuseBsdf *)bsdf_alloc(sd, sizeof(DiffuseBsdf), diffuse_weight);
+
+ if (bsdf) {
+ bsdf->type = CLOSURE_BSDF_BSSRDF_ID;
+ bsdf->N = bssrdf->N;
+ flag |= bsdf_diffuse_setup(bsdf);
+ }
+ }
+ }
+
+ /* Setup BSSRDF if radius is large enough. */
+ if (bssrdf_channels > 0) {
+ bssrdf->type = type;
+ bssrdf->channels = bssrdf_channels;
+ bssrdf->sample_weight = fabsf(average(bssrdf->weight)) * bssrdf->channels;
+ bssrdf->texture_blur = saturate(bssrdf->texture_blur);
+ bssrdf->sharpness = saturate(bssrdf->sharpness);
+
+ if (type == CLOSURE_BSSRDF_BURLEY_ID || type == CLOSURE_BSSRDF_PRINCIPLED_ID ||
+ type == CLOSURE_BSSRDF_RANDOM_WALK_ID ||
+ type == CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID) {
+ bssrdf_burley_setup(bssrdf);
+ }
+
+ flag |= SD_BSSRDF;
+ }
+ else {
+ bssrdf->type = type;
+ bssrdf->sample_weight = 0.0f;
+ }
+
+ return flag;
}
ccl_device void bssrdf_sample(const ShaderClosure *sc, float xi, float *r, float *h)
{
- const Bssrdf *bssrdf = (const Bssrdf*)sc;
- float radius;
-
- /* Sample color channel and reuse random number. Only a subset of channels
- * may be used if their radius was too small to handle as BSSRDF. */
- xi *= bssrdf->channels;
-
- if(xi < 1.0f) {
- radius = (bssrdf->radius.x > 0.0f)? bssrdf->radius.x:
- (bssrdf->radius.y > 0.0f)? bssrdf->radius.y:
- bssrdf->radius.z;
- }
- else if(xi < 2.0f) {
- xi -= 1.0f;
- radius = (bssrdf->radius.x > 0.0f)? bssrdf->radius.y:
- bssrdf->radius.z;
- }
- else {
- xi -= 2.0f;
- radius = bssrdf->radius.z;
- }
-
- /* Sample BSSRDF. */
- if(bssrdf->type == CLOSURE_BSSRDF_CUBIC_ID) {
- bssrdf_cubic_sample(radius, bssrdf->sharpness, xi, r, h);
- }
- else if(bssrdf->type == CLOSURE_BSSRDF_GAUSSIAN_ID){
- bssrdf_gaussian_sample(radius, xi, r, h);
- }
- else { /*if(bssrdf->type == CLOSURE_BSSRDF_BURLEY_ID || bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_ID)*/
- bssrdf_burley_sample(radius, xi, r, h);
- }
+ const Bssrdf *bssrdf = (const Bssrdf *)sc;
+ float radius;
+
+ /* Sample color channel and reuse random number. Only a subset of channels
+ * may be used if their radius was too small to handle as BSSRDF. */
+ xi *= bssrdf->channels;
+
+ if (xi < 1.0f) {
+ radius = (bssrdf->radius.x > 0.0f) ?
+ bssrdf->radius.x :
+ (bssrdf->radius.y > 0.0f) ? bssrdf->radius.y : bssrdf->radius.z;
+ }
+ else if (xi < 2.0f) {
+ xi -= 1.0f;
+ radius = (bssrdf->radius.x > 0.0f) ? bssrdf->radius.y : bssrdf->radius.z;
+ }
+ else {
+ xi -= 2.0f;
+ radius = bssrdf->radius.z;
+ }
+
+ /* Sample BSSRDF. */
+ if (bssrdf->type == CLOSURE_BSSRDF_CUBIC_ID) {
+ bssrdf_cubic_sample(radius, bssrdf->sharpness, xi, r, h);
+ }
+ else if (bssrdf->type == CLOSURE_BSSRDF_GAUSSIAN_ID) {
+ bssrdf_gaussian_sample(radius, xi, r, h);
+ }
+ else { /*if(bssrdf->type == CLOSURE_BSSRDF_BURLEY_ID || bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_ID)*/
+ bssrdf_burley_sample(radius, xi, r, h);
+ }
}
ccl_device float bssrdf_channel_pdf(const Bssrdf *bssrdf, float radius, float r)
{
- if(radius == 0.0f) {
- return 0.0f;
- }
- else if(bssrdf->type == CLOSURE_BSSRDF_CUBIC_ID) {
- return bssrdf_cubic_pdf(radius, bssrdf->sharpness, r);
- }
- else if(bssrdf->type == CLOSURE_BSSRDF_GAUSSIAN_ID) {
- return bssrdf_gaussian_pdf(radius, r);
- }
- else { /*if(bssrdf->type == CLOSURE_BSSRDF_BURLEY_ID || bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_ID)*/
- return bssrdf_burley_pdf(radius, r);
- }
+ if (radius == 0.0f) {
+ return 0.0f;
+ }
+ else if (bssrdf->type == CLOSURE_BSSRDF_CUBIC_ID) {
+ return bssrdf_cubic_pdf(radius, bssrdf->sharpness, r);
+ }
+ else if (bssrdf->type == CLOSURE_BSSRDF_GAUSSIAN_ID) {
+ return bssrdf_gaussian_pdf(radius, r);
+ }
+ else { /*if(bssrdf->type == CLOSURE_BSSRDF_BURLEY_ID || bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_ID)*/
+ return bssrdf_burley_pdf(radius, r);
+ }
}
ccl_device_forceinline float3 bssrdf_eval(const ShaderClosure *sc, float r)
{
- const Bssrdf *bssrdf = (const Bssrdf*)sc;
+ const Bssrdf *bssrdf = (const Bssrdf *)sc;
- return make_float3(
- bssrdf_channel_pdf(bssrdf, bssrdf->radius.x, r),
- bssrdf_channel_pdf(bssrdf, bssrdf->radius.y, r),
- bssrdf_channel_pdf(bssrdf, bssrdf->radius.z, r));
+ return make_float3(bssrdf_channel_pdf(bssrdf, bssrdf->radius.x, r),
+ bssrdf_channel_pdf(bssrdf, bssrdf->radius.y, r),
+ bssrdf_channel_pdf(bssrdf, bssrdf->radius.z, r));
}
ccl_device_forceinline float bssrdf_pdf(const ShaderClosure *sc, float r)
{
- const Bssrdf *bssrdf = (const Bssrdf*)sc;
- float3 pdf = bssrdf_eval(sc, r);
+ const Bssrdf *bssrdf = (const Bssrdf *)sc;
+ float3 pdf = bssrdf_eval(sc, r);
- return (pdf.x + pdf.y + pdf.z) / bssrdf->channels;
+ return (pdf.x + pdf.y + pdf.z) / bssrdf->channels;
}
CCL_NAMESPACE_END
-#endif /* __KERNEL_BSSRDF_H__ */
+#endif /* __KERNEL_BSSRDF_H__ */
diff --git a/intern/cycles/kernel/closure/emissive.h b/intern/cycles/kernel/closure/emissive.h
index a7f4a2a7327..911382e6865 100644
--- a/intern/cycles/kernel/closure/emissive.h
+++ b/intern/cycles/kernel/closure/emissive.h
@@ -36,26 +36,26 @@ CCL_NAMESPACE_BEGIN
ccl_device void background_setup(ShaderData *sd, const float3 weight)
{
- if(sd->flag & SD_EMISSION) {
- sd->closure_emission_background += weight;
- }
- else {
- sd->flag |= SD_EMISSION;
- sd->closure_emission_background = weight;
- }
+ if (sd->flag & SD_EMISSION) {
+ sd->closure_emission_background += weight;
+ }
+ else {
+ sd->flag |= SD_EMISSION;
+ sd->closure_emission_background = weight;
+ }
}
/* EMISSION CLOSURE */
ccl_device void emission_setup(ShaderData *sd, const float3 weight)
{
- if(sd->flag & SD_EMISSION) {
- sd->closure_emission_background += weight;
- }
- else {
- sd->flag |= SD_EMISSION;
- sd->closure_emission_background = weight;
- }
+ if (sd->flag & SD_EMISSION) {
+ sd->closure_emission_background += weight;
+ }
+ else {
+ sd->flag |= SD_EMISSION;
+ sd->closure_emission_background = weight;
+ }
}
/* return the probability distribution function in the direction I,
@@ -63,21 +63,21 @@ ccl_device void emission_setup(ShaderData *sd, const float3 weight)
* the PDF computed by sample(). */
ccl_device float emissive_pdf(const float3 Ng, const float3 I)
{
- float cosNO = fabsf(dot(Ng, I));
- return (cosNO > 0.0f)? 1.0f: 0.0f;
+ float cosNO = fabsf(dot(Ng, I));
+ return (cosNO > 0.0f) ? 1.0f : 0.0f;
}
-ccl_device void emissive_sample(const float3 Ng, float randu, float randv,
- float3 *omega_out, float *pdf)
+ccl_device void emissive_sample(
+ const float3 Ng, float randu, float randv, float3 *omega_out, float *pdf)
{
- /* todo: not implemented and used yet */
+ /* todo: not implemented and used yet */
}
ccl_device float3 emissive_simple_eval(const float3 Ng, const float3 I)
{
- float res = emissive_pdf(Ng, I);
+ float res = emissive_pdf(Ng, I);
- return make_float3(res, res, res);
+ return make_float3(res, res, res);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/closure/volume.h b/intern/cycles/kernel/closure/volume.h
index 872d06c936a..473bc0e8a82 100644
--- a/intern/cycles/kernel/closure/volume.h
+++ b/intern/cycles/kernel/closure/volume.h
@@ -23,21 +23,21 @@ CCL_NAMESPACE_BEGIN
ccl_device void volume_extinction_setup(ShaderData *sd, float3 weight)
{
- if(sd->flag & SD_EXTINCTION) {
- sd->closure_transparent_extinction += weight;
- }
- else {
- sd->flag |= SD_EXTINCTION;
- sd->closure_transparent_extinction = weight;
- }
+ if (sd->flag & SD_EXTINCTION) {
+ sd->closure_transparent_extinction += weight;
+ }
+ else {
+ sd->flag |= SD_EXTINCTION;
+ sd->closure_transparent_extinction = weight;
+ }
}
/* HENYEY-GREENSTEIN CLOSURE */
typedef ccl_addr_space struct HenyeyGreensteinVolume {
- SHADER_CLOSURE_BASE;
+ SHADER_CLOSURE_BASE;
- float g;
+ float g;
} HenyeyGreensteinVolume;
/* Given cosine between rays, return probability density that a photon bounces
@@ -45,119 +45,152 @@ typedef ccl_addr_space struct HenyeyGreensteinVolume {
* uniform sphere. g=0 uniform diffuse-like, g=1 close to sharp single ray. */
ccl_device float single_peaked_henyey_greenstein(float cos_theta, float g)
{
- return ((1.0f - g * g) / safe_powf(1.0f + g * g - 2.0f * g * cos_theta, 1.5f)) * (M_1_PI_F * 0.25f);
+ return ((1.0f - g * g) / safe_powf(1.0f + g * g - 2.0f * g * cos_theta, 1.5f)) *
+ (M_1_PI_F * 0.25f);
};
ccl_device int volume_henyey_greenstein_setup(HenyeyGreensteinVolume *volume)
{
- volume->type = CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID;
+ volume->type = CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID;
- /* clamp anisotropy to avoid delta function */
- volume->g = signf(volume->g) * min(fabsf(volume->g), 1.0f - 1e-3f);
+ /* clamp anisotropy to avoid delta function */
+ volume->g = signf(volume->g) * min(fabsf(volume->g), 1.0f - 1e-3f);
- return SD_SCATTER;
+ return SD_SCATTER;
}
ccl_device bool volume_henyey_greenstein_merge(const ShaderClosure *a, const ShaderClosure *b)
{
- const HenyeyGreensteinVolume *volume_a = (const HenyeyGreensteinVolume*)a;
- const HenyeyGreensteinVolume *volume_b = (const HenyeyGreensteinVolume*)b;
+ const HenyeyGreensteinVolume *volume_a = (const HenyeyGreensteinVolume *)a;
+ const HenyeyGreensteinVolume *volume_b = (const HenyeyGreensteinVolume *)b;
- return (volume_a->g == volume_b->g);
+ return (volume_a->g == volume_b->g);
}
-ccl_device float3 volume_henyey_greenstein_eval_phase(const ShaderClosure *sc, const float3 I, float3 omega_in, float *pdf)
+ccl_device float3 volume_henyey_greenstein_eval_phase(const ShaderClosure *sc,
+ const float3 I,
+ float3 omega_in,
+ float *pdf)
{
- const HenyeyGreensteinVolume *volume = (const HenyeyGreensteinVolume*)sc;
- float g = volume->g;
-
- /* note that I points towards the viewer */
- if(fabsf(g) < 1e-3f) {
- *pdf = M_1_PI_F * 0.25f;
- }
- else {
- float cos_theta = dot(-I, omega_in);
- *pdf = single_peaked_henyey_greenstein(cos_theta, g);
- }
-
- return make_float3(*pdf, *pdf, *pdf);
+ const HenyeyGreensteinVolume *volume = (const HenyeyGreensteinVolume *)sc;
+ float g = volume->g;
+
+ /* note that I points towards the viewer */
+ if (fabsf(g) < 1e-3f) {
+ *pdf = M_1_PI_F * 0.25f;
+ }
+ else {
+ float cos_theta = dot(-I, omega_in);
+ *pdf = single_peaked_henyey_greenstein(cos_theta, g);
+ }
+
+ return make_float3(*pdf, *pdf, *pdf);
}
-ccl_device float3 henyey_greenstrein_sample(float3 D, float g, float randu, float randv, float *pdf)
+ccl_device float3
+henyey_greenstrein_sample(float3 D, float g, float randu, float randv, float *pdf)
{
- /* match pdf for small g */
- float cos_theta;
- bool isotropic = fabsf(g) < 1e-3f;
-
- if(isotropic) {
- cos_theta = (1.0f - 2.0f * randu);
- if(pdf) {
- *pdf = M_1_PI_F * 0.25f;
- }
- }
- else {
- float k = (1.0f - g * g) / (1.0f - g + 2.0f * g * randu);
- cos_theta = (1.0f + g * g - k * k) / (2.0f * g);
- if(pdf) {
- *pdf = single_peaked_henyey_greenstein(cos_theta, g);
- }
- }
-
- float sin_theta = safe_sqrtf(1.0f - cos_theta * cos_theta);
- float phi = M_2PI_F * randv;
- float3 dir = make_float3(sin_theta * cosf(phi), sin_theta * sinf(phi), cos_theta);
-
- float3 T, B;
- make_orthonormals(D, &T, &B);
- dir = dir.x * T + dir.y * B + dir.z * D;
-
- return dir;
+ /* match pdf for small g */
+ float cos_theta;
+ bool isotropic = fabsf(g) < 1e-3f;
+
+ if (isotropic) {
+ cos_theta = (1.0f - 2.0f * randu);
+ if (pdf) {
+ *pdf = M_1_PI_F * 0.25f;
+ }
+ }
+ else {
+ float k = (1.0f - g * g) / (1.0f - g + 2.0f * g * randu);
+ cos_theta = (1.0f + g * g - k * k) / (2.0f * g);
+ if (pdf) {
+ *pdf = single_peaked_henyey_greenstein(cos_theta, g);
+ }
+ }
+
+ float sin_theta = safe_sqrtf(1.0f - cos_theta * cos_theta);
+ float phi = M_2PI_F * randv;
+ float3 dir = make_float3(sin_theta * cosf(phi), sin_theta * sinf(phi), cos_theta);
+
+ float3 T, B;
+ make_orthonormals(D, &T, &B);
+ dir = dir.x * T + dir.y * B + dir.z * D;
+
+ return dir;
}
-ccl_device int volume_henyey_greenstein_sample(const ShaderClosure *sc, float3 I, float3 dIdx, float3 dIdy, float randu, float randv,
- float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
+ccl_device int volume_henyey_greenstein_sample(const ShaderClosure *sc,
+ float3 I,
+ float3 dIdx,
+ float3 dIdy,
+ float randu,
+ float randv,
+ float3 *eval,
+ float3 *omega_in,
+ float3 *domega_in_dx,
+ float3 *domega_in_dy,
+ float *pdf)
{
- const HenyeyGreensteinVolume *volume = (const HenyeyGreensteinVolume*)sc;
- float g = volume->g;
+ const HenyeyGreensteinVolume *volume = (const HenyeyGreensteinVolume *)sc;
+ float g = volume->g;
- /* note that I points towards the viewer and so is used negated */
- *omega_in = henyey_greenstrein_sample(-I, g, randu, randv, pdf);
- *eval = make_float3(*pdf, *pdf, *pdf); /* perfect importance sampling */
+ /* note that I points towards the viewer and so is used negated */
+ *omega_in = henyey_greenstrein_sample(-I, g, randu, randv, pdf);
+ *eval = make_float3(*pdf, *pdf, *pdf); /* perfect importance sampling */
#ifdef __RAY_DIFFERENTIALS__
- /* todo: implement ray differential estimation */
- *domega_in_dx = make_float3(0.0f, 0.0f, 0.0f);
- *domega_in_dy = make_float3(0.0f, 0.0f, 0.0f);
+ /* todo: implement ray differential estimation */
+ *domega_in_dx = make_float3(0.0f, 0.0f, 0.0f);
+ *domega_in_dy = make_float3(0.0f, 0.0f, 0.0f);
#endif
- return LABEL_VOLUME_SCATTER;
+ return LABEL_VOLUME_SCATTER;
}
/* VOLUME CLOSURE */
-ccl_device float3 volume_phase_eval(const ShaderData *sd, const ShaderClosure *sc, float3 omega_in, float *pdf)
+ccl_device float3 volume_phase_eval(const ShaderData *sd,
+ const ShaderClosure *sc,
+ float3 omega_in,
+ float *pdf)
{
- kernel_assert(sc->type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID);
+ kernel_assert(sc->type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID);
- return volume_henyey_greenstein_eval_phase(sc, sd->I, omega_in, pdf);
+ return volume_henyey_greenstein_eval_phase(sc, sd->I, omega_in, pdf);
}
-ccl_device int volume_phase_sample(const ShaderData *sd, const ShaderClosure *sc, float randu,
- float randv, float3 *eval, float3 *omega_in, differential3 *domega_in, float *pdf)
+ccl_device int volume_phase_sample(const ShaderData *sd,
+ const ShaderClosure *sc,
+ float randu,
+ float randv,
+ float3 *eval,
+ float3 *omega_in,
+ differential3 *domega_in,
+ float *pdf)
{
- int label;
-
- switch(sc->type) {
- case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID:
- label = volume_henyey_greenstein_sample(sc, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
- break;
- default:
- *eval = make_float3(0.0f, 0.0f, 0.0f);
- label = LABEL_NONE;
- break;
- }
-
- return label;
+ int label;
+
+ switch (sc->type) {
+ case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID:
+ label = volume_henyey_greenstein_sample(sc,
+ sd->I,
+ sd->dI.dx,
+ sd->dI.dy,
+ randu,
+ randv,
+ eval,
+ omega_in,
+ &domega_in->dx,
+ &domega_in->dy,
+ pdf);
+ break;
+ default:
+ *eval = make_float3(0.0f, 0.0f, 0.0f);
+ label = LABEL_NONE;
+ break;
+ }
+
+ return label;
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/filter/filter.h b/intern/cycles/kernel/filter/filter.h
index 4209d69ee73..b067e53a8bf 100644
--- a/intern/cycles/kernel/filter/filter.h
+++ b/intern/cycles/kernel/filter/filter.h
@@ -25,8 +25,8 @@
CCL_NAMESPACE_BEGIN
-#define KERNEL_NAME_JOIN(x, y, z) x ## _ ## y ## _ ## z
-#define KERNEL_NAME_EVAL(arch, name) KERNEL_NAME_JOIN(kernel, arch, name)
+#define KERNEL_NAME_JOIN(x, y, z) x##_##y##_##z
+#define KERNEL_NAME_EVAL(arch, name) KERNEL_NAME_JOIN(kernel, arch, name)
#define KERNEL_FUNCTION_FULL_NAME(name) KERNEL_NAME_EVAL(KERNEL_ARCH, name)
#define KERNEL_ARCH cpu
@@ -49,4 +49,4 @@ CCL_NAMESPACE_BEGIN
CCL_NAMESPACE_END
-#endif /* __FILTER_H__ */
+#endif /* __FILTER_H__ */
diff --git a/intern/cycles/kernel/filter/filter_defines.h b/intern/cycles/kernel/filter/filter_defines.h
index cb04aac35f4..0e51eeef92f 100644
--- a/intern/cycles/kernel/filter/filter_defines.h
+++ b/intern/cycles/kernel/filter/filter_defines.h
@@ -18,59 +18,56 @@
#define __FILTER_DEFINES_H__
#define DENOISE_FEATURES 11
-#define TRANSFORM_SIZE (DENOISE_FEATURES*DENOISE_FEATURES)
-#define XTWX_SIZE (((DENOISE_FEATURES+1)*(DENOISE_FEATURES+2))/2)
-#define XTWY_SIZE (DENOISE_FEATURES+1)
+#define TRANSFORM_SIZE (DENOISE_FEATURES * DENOISE_FEATURES)
+#define XTWX_SIZE (((DENOISE_FEATURES + 1) * (DENOISE_FEATURES + 2)) / 2)
+#define XTWY_SIZE (DENOISE_FEATURES + 1)
#define DENOISE_MAX_FRAMES 16
typedef struct TileInfo {
- int offsets[9];
- int strides[9];
- int x[4];
- int y[4];
- int from_render;
- int frames[DENOISE_MAX_FRAMES];
- int num_frames;
- /* TODO(lukas): CUDA doesn't have uint64_t... */
+ int offsets[9];
+ int strides[9];
+ int x[4];
+ int y[4];
+ int from_render;
+ int frames[DENOISE_MAX_FRAMES];
+ int num_frames;
+ /* TODO(lukas): CUDA doesn't have uint64_t... */
#ifdef __KERNEL_OPENCL__
- ccl_global float *buffers[9];
+ ccl_global float *buffers[9];
#else
- long long int buffers[9];
+ long long int buffers[9];
#endif
} TileInfo;
#ifdef __KERNEL_OPENCL__
-# define CCL_FILTER_TILE_INFO ccl_global TileInfo* tile_info, \
- ccl_global float *tile_buffer_1, \
- ccl_global float *tile_buffer_2, \
- ccl_global float *tile_buffer_3, \
- ccl_global float *tile_buffer_4, \
- ccl_global float *tile_buffer_5, \
- ccl_global float *tile_buffer_6, \
- ccl_global float *tile_buffer_7, \
- ccl_global float *tile_buffer_8, \
- ccl_global float *tile_buffer_9
-# define CCL_FILTER_TILE_INFO_ARG tile_info, \
- tile_buffer_1, tile_buffer_2, tile_buffer_3, \
- tile_buffer_4, tile_buffer_5, tile_buffer_6, \
- tile_buffer_7, tile_buffer_8, tile_buffer_9
-# define ccl_get_tile_buffer(id) (id == 0 ? tile_buffer_1 \
- : id == 1 ? tile_buffer_2 \
- : id == 2 ? tile_buffer_3 \
- : id == 3 ? tile_buffer_4 \
- : id == 4 ? tile_buffer_5 \
- : id == 5 ? tile_buffer_6 \
- : id == 6 ? tile_buffer_7 \
- : id == 7 ? tile_buffer_8 \
- : tile_buffer_9)
+# define CCL_FILTER_TILE_INFO \
+ ccl_global TileInfo *tile_info, ccl_global float *tile_buffer_1, \
+ ccl_global float *tile_buffer_2, ccl_global float *tile_buffer_3, \
+ ccl_global float *tile_buffer_4, ccl_global float *tile_buffer_5, \
+ ccl_global float *tile_buffer_6, ccl_global float *tile_buffer_7, \
+ ccl_global float *tile_buffer_8, ccl_global float *tile_buffer_9
+# define CCL_FILTER_TILE_INFO_ARG \
+ tile_info, tile_buffer_1, tile_buffer_2, tile_buffer_3, tile_buffer_4, tile_buffer_5, \
+ tile_buffer_6, tile_buffer_7, tile_buffer_8, tile_buffer_9
+# define ccl_get_tile_buffer(id) \
+ (id == 0 ? tile_buffer_1 : \
+ id == 1 ? \
+ tile_buffer_2 : \
+ id == 2 ? \
+ tile_buffer_3 : \
+ id == 3 ? tile_buffer_4 : \
+ id == 4 ? tile_buffer_5 : \
+ id == 5 ? tile_buffer_6 : \
+ id == 6 ? tile_buffer_7 : \
+ id == 7 ? tile_buffer_8 : tile_buffer_9)
#else
# ifdef __KERNEL_CUDA__
-# define CCL_FILTER_TILE_INFO ccl_global TileInfo* tile_info
+# define CCL_FILTER_TILE_INFO ccl_global TileInfo *tile_info
# else
-# define CCL_FILTER_TILE_INFO TileInfo* tile_info
+# define CCL_FILTER_TILE_INFO TileInfo *tile_info
# endif
# define ccl_get_tile_buffer(id) (tile_info->buffers[id])
#endif
-#endif /* __FILTER_DEFINES_H__*/
+#endif /* __FILTER_DEFINES_H__*/
diff --git a/intern/cycles/kernel/filter/filter_features.h b/intern/cycles/kernel/filter/filter_features.h
index e1ea6487aa9..809ccfe8be6 100644
--- a/intern/cycles/kernel/filter/filter_features.h
+++ b/intern/cycles/kernel/filter/filter_features.h
@@ -14,22 +14,25 @@
* limitations under the License.
*/
- CCL_NAMESPACE_BEGIN
+CCL_NAMESPACE_BEGIN
#define ccl_get_feature(buffer, pass) (buffer)[(pass)*pass_stride]
/* Loop over the pixels in the range [low.x, high.x) x [low.y, high.y).+ * pixel_buffer always points to the current pixel in the first pass.
* Repeat the loop for every secondary frame if there are any. */
-#define FOR_PIXEL_WINDOW for(int frame = 0; frame < tile_info->num_frames; frame++) { \
- pixel.z = tile_info->frames[frame]; \
- pixel_buffer = buffer + (low.y - rect.y)*buffer_w + (low.x - rect.x) + frame*frame_stride; \
- for(pixel.y = low.y; pixel.y < high.y; pixel.y++) { \
- for(pixel.x = low.x; pixel.x < high.x; pixel.x++, pixel_buffer++) {
+#define FOR_PIXEL_WINDOW \
+ for (int frame = 0; frame < tile_info->num_frames; frame++) { \
+ pixel.z = tile_info->frames[frame]; \
+ pixel_buffer = buffer + (low.y - rect.y) * buffer_w + (low.x - rect.x) + \
+ frame * frame_stride; \
+ for (pixel.y = low.y; pixel.y < high.y; pixel.y++) { \
+ for (pixel.x = low.x; pixel.x < high.x; pixel.x++, pixel_buffer++) {
-#define END_FOR_PIXEL_WINDOW } \
- pixel_buffer += buffer_w - (high.x - low.x); \
- } \
- }
+#define END_FOR_PIXEL_WINDOW \
+ } \
+ pixel_buffer += buffer_w - (high.x - low.x); \
+ } \
+ }
ccl_device_inline void filter_get_features(int3 pixel,
const ccl_global float *ccl_restrict buffer,
@@ -38,24 +41,24 @@ ccl_device_inline void filter_get_features(int3 pixel,
const float *ccl_restrict mean,
int pass_stride)
{
- features[0] = pixel.x;
- features[1] = pixel.y;
- features[2] = fabsf(ccl_get_feature(buffer, 0));
- features[3] = ccl_get_feature(buffer, 1);
- features[4] = ccl_get_feature(buffer, 2);
- features[5] = ccl_get_feature(buffer, 3);
- features[6] = ccl_get_feature(buffer, 4);
- features[7] = ccl_get_feature(buffer, 5);
- features[8] = ccl_get_feature(buffer, 6);
- features[9] = ccl_get_feature(buffer, 7);
- if(use_time) {
- features[10] = pixel.z;
- }
- if(mean) {
- for(int i = 0; i < (use_time? 11 : 10); i++) {
- features[i] -= mean[i];
- }
- }
+ features[0] = pixel.x;
+ features[1] = pixel.y;
+ features[2] = fabsf(ccl_get_feature(buffer, 0));
+ features[3] = ccl_get_feature(buffer, 1);
+ features[4] = ccl_get_feature(buffer, 2);
+ features[5] = ccl_get_feature(buffer, 3);
+ features[6] = ccl_get_feature(buffer, 4);
+ features[7] = ccl_get_feature(buffer, 5);
+ features[8] = ccl_get_feature(buffer, 6);
+ features[9] = ccl_get_feature(buffer, 7);
+ if (use_time) {
+ features[10] = pixel.z;
+ }
+ if (mean) {
+ for (int i = 0; i < (use_time ? 11 : 10); i++) {
+ features[i] -= mean[i];
+ }
+ }
}
ccl_device_inline void filter_get_feature_scales(int3 pixel,
@@ -65,38 +68,39 @@ ccl_device_inline void filter_get_feature_scales(int3 pixel,
const float *ccl_restrict mean,
int pass_stride)
{
- scales[0] = fabsf(pixel.x - mean[0]);
- scales[1] = fabsf(pixel.y - mean[1]);
- scales[2] = fabsf(fabsf(ccl_get_feature(buffer, 0)) - mean[2]);
- scales[3] = len_squared(make_float3(ccl_get_feature(buffer, 1) - mean[3],
- ccl_get_feature(buffer, 2) - mean[4],
- ccl_get_feature(buffer, 3) - mean[5]));
- scales[4] = fabsf(ccl_get_feature(buffer, 4) - mean[6]);
- scales[5] = len_squared(make_float3(ccl_get_feature(buffer, 5) - mean[7],
- ccl_get_feature(buffer, 6) - mean[8],
- ccl_get_feature(buffer, 7) - mean[9]));
- if(use_time) {
- scales[6] = fabsf(pixel.z - mean[10]);
- }
+ scales[0] = fabsf(pixel.x - mean[0]);
+ scales[1] = fabsf(pixel.y - mean[1]);
+ scales[2] = fabsf(fabsf(ccl_get_feature(buffer, 0)) - mean[2]);
+ scales[3] = len_squared(make_float3(ccl_get_feature(buffer, 1) - mean[3],
+ ccl_get_feature(buffer, 2) - mean[4],
+ ccl_get_feature(buffer, 3) - mean[5]));
+ scales[4] = fabsf(ccl_get_feature(buffer, 4) - mean[6]);
+ scales[5] = len_squared(make_float3(ccl_get_feature(buffer, 5) - mean[7],
+ ccl_get_feature(buffer, 6) - mean[8],
+ ccl_get_feature(buffer, 7) - mean[9]));
+ if (use_time) {
+ scales[6] = fabsf(pixel.z - mean[10]);
+ }
}
ccl_device_inline void filter_calculate_scale(float *scale, bool use_time)
{
- scale[0] = 1.0f/max(scale[0], 0.01f);
- scale[1] = 1.0f/max(scale[1], 0.01f);
- scale[2] = 1.0f/max(scale[2], 0.01f);
- if(use_time) {
- scale[10] = 1.0f/max(scale[6], 0.01f);
- }
- scale[6] = 1.0f/max(scale[4], 0.01f);
- scale[7] = scale[8] = scale[9] = 1.0f/max(sqrtf(scale[5]), 0.01f);
- scale[3] = scale[4] = scale[5] = 1.0f/max(sqrtf(scale[3]), 0.01f);
+ scale[0] = 1.0f / max(scale[0], 0.01f);
+ scale[1] = 1.0f / max(scale[1], 0.01f);
+ scale[2] = 1.0f / max(scale[2], 0.01f);
+ if (use_time) {
+ scale[10] = 1.0f / max(scale[6], 0.01f);
+ }
+ scale[6] = 1.0f / max(scale[4], 0.01f);
+ scale[7] = scale[8] = scale[9] = 1.0f / max(sqrtf(scale[5]), 0.01f);
+ scale[3] = scale[4] = scale[5] = 1.0f / max(sqrtf(scale[3]), 0.01f);
}
ccl_device_inline float3 filter_get_color(const ccl_global float *ccl_restrict buffer,
int pass_stride)
{
- return make_float3(ccl_get_feature(buffer, 8), ccl_get_feature(buffer, 9), ccl_get_feature(buffer, 10));
+ return make_float3(
+ ccl_get_feature(buffer, 8), ccl_get_feature(buffer, 9), ccl_get_feature(buffer, 10));
}
ccl_device_inline void design_row_add(float *design_row,
@@ -107,42 +111,44 @@ ccl_device_inline void design_row_add(float *design_row,
float feature,
int transform_row_stride)
{
- for(int i = 0; i < rank; i++) {
- design_row[1+i] += transform[(row*transform_row_stride + i)*stride]*feature;
- }
+ for (int i = 0; i < rank; i++) {
+ design_row[1 + i] += transform[(row * transform_row_stride + i) * stride] * feature;
+ }
}
/* Fill the design row. */
-ccl_device_inline void filter_get_design_row_transform(int3 p_pixel,
- const ccl_global float *ccl_restrict p_buffer,
- int3 q_pixel,
- const ccl_global float *ccl_restrict q_buffer,
- int pass_stride,
- int rank,
- float *design_row,
- const ccl_global float *ccl_restrict transform,
- int stride,
- bool use_time)
+ccl_device_inline void filter_get_design_row_transform(
+ int3 p_pixel,
+ const ccl_global float *ccl_restrict p_buffer,
+ int3 q_pixel,
+ const ccl_global float *ccl_restrict q_buffer,
+ int pass_stride,
+ int rank,
+ float *design_row,
+ const ccl_global float *ccl_restrict transform,
+ int stride,
+ bool use_time)
{
- int num_features = use_time? 11 : 10;
+ int num_features = use_time ? 11 : 10;
- design_row[0] = 1.0f;
- math_vector_zero(design_row+1, rank);
+ design_row[0] = 1.0f;
+ math_vector_zero(design_row + 1, rank);
-#define DESIGN_ROW_ADD(I, F) design_row_add(design_row, rank, transform, stride, I, F, num_features);
- DESIGN_ROW_ADD(0, q_pixel.x - p_pixel.x);
- DESIGN_ROW_ADD(1, q_pixel.y - p_pixel.y);
- DESIGN_ROW_ADD(2, fabsf(ccl_get_feature(q_buffer, 0)) - fabsf(ccl_get_feature(p_buffer, 0)));
- DESIGN_ROW_ADD(3, ccl_get_feature(q_buffer, 1) - ccl_get_feature(p_buffer, 1));
- DESIGN_ROW_ADD(4, ccl_get_feature(q_buffer, 2) - ccl_get_feature(p_buffer, 2));
- DESIGN_ROW_ADD(5, ccl_get_feature(q_buffer, 3) - ccl_get_feature(p_buffer, 3));
- DESIGN_ROW_ADD(6, ccl_get_feature(q_buffer, 4) - ccl_get_feature(p_buffer, 4));
- DESIGN_ROW_ADD(7, ccl_get_feature(q_buffer, 5) - ccl_get_feature(p_buffer, 5));
- DESIGN_ROW_ADD(8, ccl_get_feature(q_buffer, 6) - ccl_get_feature(p_buffer, 6));
- DESIGN_ROW_ADD(9, ccl_get_feature(q_buffer, 7) - ccl_get_feature(p_buffer, 7));
- if(use_time) {
- DESIGN_ROW_ADD(10, q_pixel.z - p_pixel.z)
- }
+#define DESIGN_ROW_ADD(I, F) \
+ design_row_add(design_row, rank, transform, stride, I, F, num_features);
+ DESIGN_ROW_ADD(0, q_pixel.x - p_pixel.x);
+ DESIGN_ROW_ADD(1, q_pixel.y - p_pixel.y);
+ DESIGN_ROW_ADD(2, fabsf(ccl_get_feature(q_buffer, 0)) - fabsf(ccl_get_feature(p_buffer, 0)));
+ DESIGN_ROW_ADD(3, ccl_get_feature(q_buffer, 1) - ccl_get_feature(p_buffer, 1));
+ DESIGN_ROW_ADD(4, ccl_get_feature(q_buffer, 2) - ccl_get_feature(p_buffer, 2));
+ DESIGN_ROW_ADD(5, ccl_get_feature(q_buffer, 3) - ccl_get_feature(p_buffer, 3));
+ DESIGN_ROW_ADD(6, ccl_get_feature(q_buffer, 4) - ccl_get_feature(p_buffer, 4));
+ DESIGN_ROW_ADD(7, ccl_get_feature(q_buffer, 5) - ccl_get_feature(p_buffer, 5));
+ DESIGN_ROW_ADD(8, ccl_get_feature(q_buffer, 6) - ccl_get_feature(p_buffer, 6));
+ DESIGN_ROW_ADD(9, ccl_get_feature(q_buffer, 7) - ccl_get_feature(p_buffer, 7));
+ if (use_time) {
+ DESIGN_ROW_ADD(10, q_pixel.z - p_pixel.z)
+ }
#undef DESIGN_ROW_ADD
}
diff --git a/intern/cycles/kernel/filter/filter_features_sse.h b/intern/cycles/kernel/filter/filter_features_sse.h
index 5dd001ffb93..1e0d6e93453 100644
--- a/intern/cycles/kernel/filter/filter_features_sse.h
+++ b/intern/cycles/kernel/filter/filter_features_sse.h
@@ -22,22 +22,27 @@ CCL_NAMESPACE_BEGIN
* pixel_buffer always points to the first of the 4 current pixel in the first pass.
* x4 and y4 contain the coordinates of the four pixels, active_pixels contains a mask that's set for all pixels within the window.
* Repeat the loop for every secondary frame if there are any. */
-#define FOR_PIXEL_WINDOW_SSE for(int frame = 0; frame < tile_info->num_frames; frame++) { \
- pixel.z = tile_info->frames[frame]; \
- pixel_buffer = buffer + (low.y - rect.y)*buffer_w + (low.x - rect.x) + frame*frame_stride; \
- float4 t4 = make_float4(pixel.z); \
- for(pixel.y = low.y; pixel.y < high.y; pixel.y++) { \
- float4 y4 = make_float4(pixel.y); \
- for(pixel.x = low.x; pixel.x < high.x; pixel.x += 4, pixel_buffer += 4) { \
- float4 x4 = make_float4(pixel.x) + make_float4(0.0f, 1.0f, 2.0f, 3.0f); \
- int4 active_pixels = x4 < make_float4(high.x);
+#define FOR_PIXEL_WINDOW_SSE \
+ for (int frame = 0; frame < tile_info->num_frames; frame++) { \
+ pixel.z = tile_info->frames[frame]; \
+ pixel_buffer = buffer + (low.y - rect.y) * buffer_w + (low.x - rect.x) + \
+ frame * frame_stride; \
+ float4 t4 = make_float4(pixel.z); \
+ for (pixel.y = low.y; pixel.y < high.y; pixel.y++) { \
+ float4 y4 = make_float4(pixel.y); \
+ for (pixel.x = low.x; pixel.x < high.x; pixel.x += 4, pixel_buffer += 4) { \
+ float4 x4 = make_float4(pixel.x) + make_float4(0.0f, 1.0f, 2.0f, 3.0f); \
+ int4 active_pixels = x4 < make_float4(high.x);
-#define END_FOR_PIXEL_WINDOW_SSE } \
- pixel_buffer += buffer_w - (high.x - low.x); \
- } \
- }
+#define END_FOR_PIXEL_WINDOW_SSE \
+ } \
+ pixel_buffer += buffer_w - (high.x - low.x); \
+ } \
+ }
-ccl_device_inline void filter_get_features_sse(float4 x, float4 y, float4 t,
+ccl_device_inline void filter_get_features_sse(float4 x,
+ float4 y,
+ float4 t,
int4 active_pixels,
const float *ccl_restrict buffer,
float4 *features,
@@ -45,33 +50,35 @@ ccl_device_inline void filter_get_features_sse(float4 x, float4 y, float4 t,
const float4 *ccl_restrict mean,
int pass_stride)
{
- int num_features = use_time? 11 : 10;
+ int num_features = use_time ? 11 : 10;
- features[0] = x;
- features[1] = y;
- features[2] = fabs(ccl_get_feature_sse(0));
- features[3] = ccl_get_feature_sse(1);
- features[4] = ccl_get_feature_sse(2);
- features[5] = ccl_get_feature_sse(3);
- features[6] = ccl_get_feature_sse(4);
- features[7] = ccl_get_feature_sse(5);
- features[8] = ccl_get_feature_sse(6);
- features[9] = ccl_get_feature_sse(7);
- if(use_time) {
- features[10] = t;
- }
+ features[0] = x;
+ features[1] = y;
+ features[2] = fabs(ccl_get_feature_sse(0));
+ features[3] = ccl_get_feature_sse(1);
+ features[4] = ccl_get_feature_sse(2);
+ features[5] = ccl_get_feature_sse(3);
+ features[6] = ccl_get_feature_sse(4);
+ features[7] = ccl_get_feature_sse(5);
+ features[8] = ccl_get_feature_sse(6);
+ features[9] = ccl_get_feature_sse(7);
+ if (use_time) {
+ features[10] = t;
+ }
- if(mean) {
- for(int i = 0; i < num_features; i++) {
- features[i] = features[i] - mean[i];
- }
- }
- for(int i = 0; i < num_features; i++) {
- features[i] = mask(active_pixels, features[i]);
- }
+ if (mean) {
+ for (int i = 0; i < num_features; i++) {
+ features[i] = features[i] - mean[i];
+ }
+ }
+ for (int i = 0; i < num_features; i++) {
+ features[i] = mask(active_pixels, features[i]);
+ }
}
-ccl_device_inline void filter_get_feature_scales_sse(float4 x, float4 y, float4 t,
+ccl_device_inline void filter_get_feature_scales_sse(float4 x,
+ float4 y,
+ float4 t,
int4 active_pixels,
const float *ccl_restrict buffer,
float4 *scales,
@@ -79,36 +86,34 @@ ccl_device_inline void filter_get_feature_scales_sse(float4 x, float4 y, float4
const float4 *ccl_restrict mean,
int pass_stride)
{
- scales[0] = fabs(x - mean[0]);
- scales[1] = fabs(y - mean[1]);
- scales[2] = fabs(fabs(ccl_get_feature_sse(0)) - mean[2]);
- scales[3] = sqr(ccl_get_feature_sse(1) - mean[3]) +
- sqr(ccl_get_feature_sse(2) - mean[4]) +
- sqr(ccl_get_feature_sse(3) - mean[5]);
- scales[4] = fabs(ccl_get_feature_sse(4) - mean[6]);
- scales[5] = sqr(ccl_get_feature_sse(5) - mean[7]) +
- sqr(ccl_get_feature_sse(6) - mean[8]) +
- sqr(ccl_get_feature_sse(7) - mean[9]);
- if(use_time) {
- scales[6] = fabs(t - mean[10]);
- }
+ scales[0] = fabs(x - mean[0]);
+ scales[1] = fabs(y - mean[1]);
+ scales[2] = fabs(fabs(ccl_get_feature_sse(0)) - mean[2]);
+ scales[3] = sqr(ccl_get_feature_sse(1) - mean[3]) + sqr(ccl_get_feature_sse(2) - mean[4]) +
+ sqr(ccl_get_feature_sse(3) - mean[5]);
+ scales[4] = fabs(ccl_get_feature_sse(4) - mean[6]);
+ scales[5] = sqr(ccl_get_feature_sse(5) - mean[7]) + sqr(ccl_get_feature_sse(6) - mean[8]) +
+ sqr(ccl_get_feature_sse(7) - mean[9]);
+ if (use_time) {
+ scales[6] = fabs(t - mean[10]);
+ }
- for(int i = 0; i < (use_time? 7 : 6); i++)
- scales[i] = mask(active_pixels, scales[i]);
+ for (int i = 0; i < (use_time ? 7 : 6); i++)
+ scales[i] = mask(active_pixels, scales[i]);
}
ccl_device_inline void filter_calculate_scale_sse(float4 *scale, bool use_time)
{
- scale[0] = rcp(max(reduce_max(scale[0]), make_float4(0.01f)));
- scale[1] = rcp(max(reduce_max(scale[1]), make_float4(0.01f)));
- scale[2] = rcp(max(reduce_max(scale[2]), make_float4(0.01f)));
- if(use_time) {
- scale[10] = rcp(max(reduce_max(scale[6]), make_float4(0.01f)));;
- }
- scale[6] = rcp(max(reduce_max(scale[4]), make_float4(0.01f)));
- scale[7] = scale[8] = scale[9] = rcp(max(reduce_max(sqrt(scale[5])), make_float4(0.01f)));
- scale[3] = scale[4] = scale[5] = rcp(max(reduce_max(sqrt(scale[3])), make_float4(0.01f)));
+ scale[0] = rcp(max(reduce_max(scale[0]), make_float4(0.01f)));
+ scale[1] = rcp(max(reduce_max(scale[1]), make_float4(0.01f)));
+ scale[2] = rcp(max(reduce_max(scale[2]), make_float4(0.01f)));
+ if (use_time) {
+ scale[10] = rcp(max(reduce_max(scale[6]), make_float4(0.01f)));
+ ;
+ }
+ scale[6] = rcp(max(reduce_max(scale[4]), make_float4(0.01f)));
+ scale[7] = scale[8] = scale[9] = rcp(max(reduce_max(sqrt(scale[5])), make_float4(0.01f)));
+ scale[3] = scale[4] = scale[5] = rcp(max(reduce_max(sqrt(scale[3])), make_float4(0.01f)));
}
-
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/filter/filter_nlm_cpu.h b/intern/cycles/kernel/filter/filter_nlm_cpu.h
index 9eb3c603a4a..a94266a8786 100644
--- a/intern/cycles/kernel/filter/filter_nlm_cpu.h
+++ b/intern/cycles/kernel/filter/filter_nlm_cpu.h
@@ -16,10 +16,11 @@
CCL_NAMESPACE_BEGIN
-#define load4_a(buf, ofs) (*((float4*) ((buf) + (ofs))))
-#define load4_u(buf, ofs) load_float4((buf)+(ofs))
+#define load4_a(buf, ofs) (*((float4 *)((buf) + (ofs))))
+#define load4_u(buf, ofs) load_float4((buf) + (ofs))
-ccl_device_inline void kernel_filter_nlm_calc_difference(int dx, int dy,
+ccl_device_inline void kernel_filter_nlm_calc_difference(int dx,
+ int dy,
const float *ccl_restrict weight_image,
const float *ccl_restrict variance_image,
const float *ccl_restrict scale_image,
@@ -31,122 +32,117 @@ ccl_device_inline void kernel_filter_nlm_calc_difference(int dx, int dy,
float a,
float k_2)
{
- /* Strides need to be aligned to 16 bytes. */
- kernel_assert((stride % 4) == 0 && (channel_offset % 4) == 0);
-
- int aligned_lowx = rect.x & (~3);
- const int numChannels = (channel_offset > 0)? 3 : 1;
- const float4 channel_fac = make_float4(1.0f / numChannels);
-
- for(int y = rect.y; y < rect.w; y++) {
- int idx_p = y*stride + aligned_lowx;
- int idx_q = (y+dy)*stride + aligned_lowx + dx + frame_offset;
- for(int x = aligned_lowx; x < rect.z; x += 4, idx_p += 4, idx_q += 4) {
- float4 diff = make_float4(0.0f);
- float4 scale_fac;
- if(scale_image) {
- scale_fac = clamp(load4_a(scale_image, idx_p) / load4_u(scale_image, idx_q),
- make_float4(0.25f), make_float4(4.0f));
- }
- else {
- scale_fac = make_float4(1.0f);
- }
- for(int c = 0, chan_ofs = 0; c < numChannels; c++, chan_ofs += channel_offset) {
- /* idx_p is guaranteed to be aligned, but idx_q isn't. */
- float4 color_p = load4_a(weight_image, idx_p + chan_ofs);
- float4 color_q = scale_fac*load4_u(weight_image, idx_q + chan_ofs);
- float4 cdiff = color_p - color_q;
- float4 var_p = load4_a(variance_image, idx_p + chan_ofs);
- float4 var_q = sqr(scale_fac)*load4_u(variance_image, idx_q + chan_ofs);
- diff += (cdiff*cdiff - a*(var_p + min(var_p, var_q))) / (make_float4(1e-8f) + k_2*(var_p+var_q));
- }
- load4_a(difference_image, idx_p) = diff*channel_fac;
- }
- }
+ /* Strides need to be aligned to 16 bytes. */
+ kernel_assert((stride % 4) == 0 && (channel_offset % 4) == 0);
+
+ int aligned_lowx = rect.x & (~3);
+ const int numChannels = (channel_offset > 0) ? 3 : 1;
+ const float4 channel_fac = make_float4(1.0f / numChannels);
+
+ for (int y = rect.y; y < rect.w; y++) {
+ int idx_p = y * stride + aligned_lowx;
+ int idx_q = (y + dy) * stride + aligned_lowx + dx + frame_offset;
+ for (int x = aligned_lowx; x < rect.z; x += 4, idx_p += 4, idx_q += 4) {
+ float4 diff = make_float4(0.0f);
+ float4 scale_fac;
+ if (scale_image) {
+ scale_fac = clamp(load4_a(scale_image, idx_p) / load4_u(scale_image, idx_q),
+ make_float4(0.25f),
+ make_float4(4.0f));
+ }
+ else {
+ scale_fac = make_float4(1.0f);
+ }
+ for (int c = 0, chan_ofs = 0; c < numChannels; c++, chan_ofs += channel_offset) {
+ /* idx_p is guaranteed to be aligned, but idx_q isn't. */
+ float4 color_p = load4_a(weight_image, idx_p + chan_ofs);
+ float4 color_q = scale_fac * load4_u(weight_image, idx_q + chan_ofs);
+ float4 cdiff = color_p - color_q;
+ float4 var_p = load4_a(variance_image, idx_p + chan_ofs);
+ float4 var_q = sqr(scale_fac) * load4_u(variance_image, idx_q + chan_ofs);
+ diff += (cdiff * cdiff - a * (var_p + min(var_p, var_q))) /
+ (make_float4(1e-8f) + k_2 * (var_p + var_q));
+ }
+ load4_a(difference_image, idx_p) = diff * channel_fac;
+ }
+ }
}
-ccl_device_inline void kernel_filter_nlm_blur(const float *ccl_restrict difference_image,
- float *out_image,
- int4 rect,
- int stride,
- int f)
+ccl_device_inline void kernel_filter_nlm_blur(
+ const float *ccl_restrict difference_image, float *out_image, int4 rect, int stride, int f)
{
- int aligned_lowx = round_down(rect.x, 4);
- for(int y = rect.y; y < rect.w; y++) {
- const int low = max(rect.y, y-f);
- const int high = min(rect.w, y+f+1);
- for(int x = aligned_lowx; x < rect.z; x += 4) {
- load4_a(out_image, y*stride + x) = make_float4(0.0f);
- }
- for(int y1 = low; y1 < high; y1++) {
- for(int x = aligned_lowx; x < rect.z; x += 4) {
- load4_a(out_image, y*stride + x) += load4_a(difference_image, y1*stride + x);
- }
- }
- float fac = 1.0f/(high - low);
- for(int x = aligned_lowx; x < rect.z; x += 4) {
- load4_a(out_image, y*stride + x) *= fac;
- }
- }
+ int aligned_lowx = round_down(rect.x, 4);
+ for (int y = rect.y; y < rect.w; y++) {
+ const int low = max(rect.y, y - f);
+ const int high = min(rect.w, y + f + 1);
+ for (int x = aligned_lowx; x < rect.z; x += 4) {
+ load4_a(out_image, y * stride + x) = make_float4(0.0f);
+ }
+ for (int y1 = low; y1 < high; y1++) {
+ for (int x = aligned_lowx; x < rect.z; x += 4) {
+ load4_a(out_image, y * stride + x) += load4_a(difference_image, y1 * stride + x);
+ }
+ }
+ float fac = 1.0f / (high - low);
+ for (int x = aligned_lowx; x < rect.z; x += 4) {
+ load4_a(out_image, y * stride + x) *= fac;
+ }
+ }
}
-ccl_device_inline void nlm_blur_horizontal(const float *ccl_restrict difference_image,
- float *out_image,
- int4 rect,
- int stride,
- int f)
+ccl_device_inline void nlm_blur_horizontal(
+ const float *ccl_restrict difference_image, float *out_image, int4 rect, int stride, int f)
{
- int aligned_lowx = round_down(rect.x, 4);
- for(int y = rect.y; y < rect.w; y++) {
- for(int x = aligned_lowx; x < rect.z; x += 4) {
- load4_a(out_image, y*stride + x) = make_float4(0.0f);
- }
- }
-
- for(int dx = -f; dx <= f; dx++) {
- aligned_lowx = round_down(rect.x - min(0, dx), 4);
- int highx = rect.z - max(0, dx);
- int4 lowx4 = make_int4(rect.x - min(0, dx));
- int4 highx4 = make_int4(rect.z - max(0, dx));
- for(int y = rect.y; y < rect.w; y++) {
- for(int x = aligned_lowx; x < highx; x += 4) {
- int4 x4 = make_int4(x) + make_int4(0, 1, 2, 3);
- int4 active = (x4 >= lowx4) & (x4 < highx4);
-
- float4 diff = load4_u(difference_image, y*stride + x + dx);
- load4_a(out_image, y*stride + x) += mask(active, diff);
- }
- }
- }
-
- aligned_lowx = round_down(rect.x, 4);
- for(int y = rect.y; y < rect.w; y++) {
- for(int x = aligned_lowx; x < rect.z; x += 4) {
- float4 x4 = make_float4(x) + make_float4(0.0f, 1.0f, 2.0f, 3.0f);
- float4 low = max(make_float4(rect.x), x4 - make_float4(f));
- float4 high = min(make_float4(rect.z), x4 + make_float4(f+1));
- load4_a(out_image, y*stride + x) *= rcp(high - low);
- }
- }
+ int aligned_lowx = round_down(rect.x, 4);
+ for (int y = rect.y; y < rect.w; y++) {
+ for (int x = aligned_lowx; x < rect.z; x += 4) {
+ load4_a(out_image, y * stride + x) = make_float4(0.0f);
+ }
+ }
+
+ for (int dx = -f; dx <= f; dx++) {
+ aligned_lowx = round_down(rect.x - min(0, dx), 4);
+ int highx = rect.z - max(0, dx);
+ int4 lowx4 = make_int4(rect.x - min(0, dx));
+ int4 highx4 = make_int4(rect.z - max(0, dx));
+ for (int y = rect.y; y < rect.w; y++) {
+ for (int x = aligned_lowx; x < highx; x += 4) {
+ int4 x4 = make_int4(x) + make_int4(0, 1, 2, 3);
+ int4 active = (x4 >= lowx4) & (x4 < highx4);
+
+ float4 diff = load4_u(difference_image, y * stride + x + dx);
+ load4_a(out_image, y * stride + x) += mask(active, diff);
+ }
+ }
+ }
+
+ aligned_lowx = round_down(rect.x, 4);
+ for (int y = rect.y; y < rect.w; y++) {
+ for (int x = aligned_lowx; x < rect.z; x += 4) {
+ float4 x4 = make_float4(x) + make_float4(0.0f, 1.0f, 2.0f, 3.0f);
+ float4 low = max(make_float4(rect.x), x4 - make_float4(f));
+ float4 high = min(make_float4(rect.z), x4 + make_float4(f + 1));
+ load4_a(out_image, y * stride + x) *= rcp(high - low);
+ }
+ }
}
-ccl_device_inline void kernel_filter_nlm_calc_weight(const float *ccl_restrict difference_image,
- float *out_image,
- int4 rect,
- int stride,
- int f)
+ccl_device_inline void kernel_filter_nlm_calc_weight(
+ const float *ccl_restrict difference_image, float *out_image, int4 rect, int stride, int f)
{
- nlm_blur_horizontal(difference_image, out_image, rect, stride, f);
-
- int aligned_lowx = round_down(rect.x, 4);
- for(int y = rect.y; y < rect.w; y++) {
- for(int x = aligned_lowx; x < rect.z; x += 4) {
- load4_a(out_image, y*stride + x) = fast_expf4(-max(load4_a(out_image, y*stride + x), make_float4(0.0f)));
- }
- }
+ nlm_blur_horizontal(difference_image, out_image, rect, stride, f);
+
+ int aligned_lowx = round_down(rect.x, 4);
+ for (int y = rect.y; y < rect.w; y++) {
+ for (int x = aligned_lowx; x < rect.z; x += 4) {
+ load4_a(out_image, y * stride + x) = fast_expf4(
+ -max(load4_a(out_image, y * stride + x), make_float4(0.0f)));
+ }
+ }
}
-ccl_device_inline void kernel_filter_nlm_update_output(int dx, int dy,
+ccl_device_inline void kernel_filter_nlm_update_output(int dx,
+ int dy,
const float *ccl_restrict difference_image,
const float *ccl_restrict image,
float *temp_image,
@@ -157,33 +153,36 @@ ccl_device_inline void kernel_filter_nlm_update_output(int dx, int dy,
int stride,
int f)
{
- nlm_blur_horizontal(difference_image, temp_image, rect, stride, f);
+ nlm_blur_horizontal(difference_image, temp_image, rect, stride, f);
- int aligned_lowx = round_down(rect.x, 4);
- for(int y = rect.y; y < rect.w; y++) {
- for(int x = aligned_lowx; x < rect.z; x += 4) {
- int4 x4 = make_int4(x) + make_int4(0, 1, 2, 3);
- int4 active = (x4 >= make_int4(rect.x)) & (x4 < make_int4(rect.z));
+ int aligned_lowx = round_down(rect.x, 4);
+ for (int y = rect.y; y < rect.w; y++) {
+ for (int x = aligned_lowx; x < rect.z; x += 4) {
+ int4 x4 = make_int4(x) + make_int4(0, 1, 2, 3);
+ int4 active = (x4 >= make_int4(rect.x)) & (x4 < make_int4(rect.z));
- int idx_p = y*stride + x, idx_q = (y+dy)*stride + (x+dx);
+ int idx_p = y * stride + x, idx_q = (y + dy) * stride + (x + dx);
- float4 weight = load4_a(temp_image, idx_p);
- load4_a(accum_image, idx_p) += mask(active, weight);
+ float4 weight = load4_a(temp_image, idx_p);
+ load4_a(accum_image, idx_p) += mask(active, weight);
- float4 val = load4_u(image, idx_q);
- if(channel_offset) {
- val += load4_u(image, idx_q + channel_offset);
- val += load4_u(image, idx_q + 2*channel_offset);
- val *= 1.0f/3.0f;
- }
+ float4 val = load4_u(image, idx_q);
+ if (channel_offset) {
+ val += load4_u(image, idx_q + channel_offset);
+ val += load4_u(image, idx_q + 2 * channel_offset);
+ val *= 1.0f / 3.0f;
+ }
- load4_a(out_image, idx_p) += mask(active, weight*val);
- }
- }
+ load4_a(out_image, idx_p) += mask(active, weight * val);
+ }
+ }
}
-ccl_device_inline void kernel_filter_nlm_construct_gramian(int dx, int dy, int t,
- const float *ccl_restrict difference_image,
+ccl_device_inline void kernel_filter_nlm_construct_gramian(int dx,
+ int dy,
+ int t,
+ const float *ccl_restrict
+ difference_image,
const float *ccl_restrict buffer,
float *transform,
int *rank,
@@ -191,40 +190,49 @@ ccl_device_inline void kernel_filter_nlm_construct_gramian(int dx, int dy, int t
float3 *XtWY,
int4 rect,
int4 filter_window,
- int stride, int f,
+ int stride,
+ int f,
int pass_stride,
int frame_offset,
bool use_time)
{
- int4 clip_area = rect_clip(rect, filter_window);
- /* fy and fy are in filter-window-relative coordinates, while x and y are in feature-window-relative coordinates. */
- for(int y = clip_area.y; y < clip_area.w; y++) {
- for(int x = clip_area.x; x < clip_area.z; x++) {
- const int low = max(rect.x, x-f);
- const int high = min(rect.z, x+f+1);
- float sum = 0.0f;
- for(int x1 = low; x1 < high; x1++) {
- sum += difference_image[y*stride + x1];
- }
- float weight = sum * (1.0f/(high - low));
-
- int storage_ofs = coord_to_local_index(filter_window, x, y);
- float *l_transform = transform + storage_ofs*TRANSFORM_SIZE;
- float *l_XtWX = XtWX + storage_ofs*XTWX_SIZE;
- float3 *l_XtWY = XtWY + storage_ofs*XTWY_SIZE;
- int *l_rank = rank + storage_ofs;
-
- kernel_filter_construct_gramian(x, y, 1,
- dx, dy, t,
- stride,
- pass_stride,
- frame_offset,
- use_time,
- buffer,
- l_transform, l_rank,
- weight, l_XtWX, l_XtWY, 0);
- }
- }
+ int4 clip_area = rect_clip(rect, filter_window);
+ /* fy and fy are in filter-window-relative coordinates, while x and y are in feature-window-relative coordinates. */
+ for (int y = clip_area.y; y < clip_area.w; y++) {
+ for (int x = clip_area.x; x < clip_area.z; x++) {
+ const int low = max(rect.x, x - f);
+ const int high = min(rect.z, x + f + 1);
+ float sum = 0.0f;
+ for (int x1 = low; x1 < high; x1++) {
+ sum += difference_image[y * stride + x1];
+ }
+ float weight = sum * (1.0f / (high - low));
+
+ int storage_ofs = coord_to_local_index(filter_window, x, y);
+ float *l_transform = transform + storage_ofs * TRANSFORM_SIZE;
+ float *l_XtWX = XtWX + storage_ofs * XTWX_SIZE;
+ float3 *l_XtWY = XtWY + storage_ofs * XTWY_SIZE;
+ int *l_rank = rank + storage_ofs;
+
+ kernel_filter_construct_gramian(x,
+ y,
+ 1,
+ dx,
+ dy,
+ t,
+ stride,
+ pass_stride,
+ frame_offset,
+ use_time,
+ buffer,
+ l_transform,
+ l_rank,
+ weight,
+ l_XtWX,
+ l_XtWY,
+ 0);
+ }
+ }
}
ccl_device_inline void kernel_filter_nlm_normalize(float *out_image,
@@ -232,11 +240,11 @@ ccl_device_inline void kernel_filter_nlm_normalize(float *out_image,
int4 rect,
int w)
{
- for(int y = rect.y; y < rect.w; y++) {
- for(int x = rect.x; x < rect.z; x++) {
- out_image[y*w+x] /= accum_image[y*w+x];
- }
- }
+ for (int y = rect.y; y < rect.w; y++) {
+ for (int x = rect.x; x < rect.z; x++) {
+ out_image[y * w + x] /= accum_image[y * w + x];
+ }
+ }
}
#undef load4_a
diff --git a/intern/cycles/kernel/filter/filter_nlm_gpu.h b/intern/cycles/kernel/filter/filter_nlm_gpu.h
index 12636393243..650c743f34f 100644
--- a/intern/cycles/kernel/filter/filter_nlm_gpu.h
+++ b/intern/cycles/kernel/filter/filter_nlm_gpu.h
@@ -24,203 +24,232 @@ CCL_NAMESPACE_BEGIN
* Window is the rect that should be processed.
* co is filled with (x, y, dx, dy).
*/
-ccl_device_inline bool get_nlm_coords_window(int w, int h, int r, int stride,
- int4 *rect, int4 *co, int *ofs,
- int4 window)
+ccl_device_inline bool get_nlm_coords_window(
+ int w, int h, int r, int stride, int4 *rect, int4 *co, int *ofs, int4 window)
{
- /* Determine the pixel offset that this thread should apply. */
- int s = 2*r+1;
- int si = ccl_global_id(1);
- int sx = si % s;
- int sy = si / s;
- if(sy >= s) {
- return false;
- }
-
- /* Pixels still need to lie inside the denoising buffer after applying the offset,
- * so determine the area for which this is the case. */
- int dx = sx - r;
- int dy = sy - r;
-
- *rect = make_int4(max(0, -dx), max(0, -dy),
- w - max(0, dx), h - max(0, dy));
-
- /* Find the intersection of the area that we want to process (window) and the area
- * that can be processed (rect) to get the final area for this offset. */
- int4 clip_area = rect_clip(window, *rect);
-
- /* If the radius is larger than one of the sides of the window,
- * there will be shifts for which there is no usable pixel at all. */
- if(!rect_is_valid(clip_area)) {
- return false;
- }
-
- /* Map the linear thread index to pixels inside the clip area. */
- int x, y;
- if(!local_index_to_coord(clip_area, ccl_global_id(0), &x, &y)) {
- return false;
- }
-
- *co = make_int4(x, y, dx, dy);
-
- *ofs = (sy*s + sx) * stride;
-
- return true;
+ /* Determine the pixel offset that this thread should apply. */
+ int s = 2 * r + 1;
+ int si = ccl_global_id(1);
+ int sx = si % s;
+ int sy = si / s;
+ if (sy >= s) {
+ return false;
+ }
+
+ /* Pixels still need to lie inside the denoising buffer after applying the offset,
+ * so determine the area for which this is the case. */
+ int dx = sx - r;
+ int dy = sy - r;
+
+ *rect = make_int4(max(0, -dx), max(0, -dy), w - max(0, dx), h - max(0, dy));
+
+ /* Find the intersection of the area that we want to process (window) and the area
+ * that can be processed (rect) to get the final area for this offset. */
+ int4 clip_area = rect_clip(window, *rect);
+
+ /* If the radius is larger than one of the sides of the window,
+ * there will be shifts for which there is no usable pixel at all. */
+ if (!rect_is_valid(clip_area)) {
+ return false;
+ }
+
+ /* Map the linear thread index to pixels inside the clip area. */
+ int x, y;
+ if (!local_index_to_coord(clip_area, ccl_global_id(0), &x, &y)) {
+ return false;
+ }
+
+ *co = make_int4(x, y, dx, dy);
+
+ *ofs = (sy * s + sx) * stride;
+
+ return true;
}
-ccl_device_inline bool get_nlm_coords(int w, int h, int r, int stride,
- int4 *rect, int4 *co, int *ofs)
+ccl_device_inline bool get_nlm_coords(
+ int w, int h, int r, int stride, int4 *rect, int4 *co, int *ofs)
{
- return get_nlm_coords_window(w, h, r, stride, rect, co, ofs, make_int4(0, 0, w, h));
+ return get_nlm_coords_window(w, h, r, stride, rect, co, ofs, make_int4(0, 0, w, h));
}
-ccl_device_inline void kernel_filter_nlm_calc_difference(int x, int y,
- int dx, int dy,
- const ccl_global float *ccl_restrict weight_image,
- const ccl_global float *ccl_restrict variance_image,
- const ccl_global float *ccl_restrict scale_image,
- ccl_global float *difference_image,
- int4 rect, int stride,
- int channel_offset,
- int frame_offset,
- float a, float k_2)
+ccl_device_inline void kernel_filter_nlm_calc_difference(
+ int x,
+ int y,
+ int dx,
+ int dy,
+ const ccl_global float *ccl_restrict weight_image,
+ const ccl_global float *ccl_restrict variance_image,
+ const ccl_global float *ccl_restrict scale_image,
+ ccl_global float *difference_image,
+ int4 rect,
+ int stride,
+ int channel_offset,
+ int frame_offset,
+ float a,
+ float k_2)
{
- int idx_p = y*stride + x, idx_q = (y+dy)*stride + (x+dx) + frame_offset;
- int numChannels = channel_offset? 3 : 1;
-
- float diff = 0.0f;
- float scale_fac = 1.0f;
- if(scale_image) {
- scale_fac = clamp(scale_image[idx_p] / scale_image[idx_q], 0.25f, 4.0f);
- }
-
- for(int c = 0; c < numChannels; c++, idx_p += channel_offset, idx_q += channel_offset) {
- float cdiff = weight_image[idx_p] - scale_fac*weight_image[idx_q];
- float pvar = variance_image[idx_p];
- float qvar = sqr(scale_fac)*variance_image[idx_q];
- diff += (cdiff*cdiff - a*(pvar + min(pvar, qvar))) / (1e-8f + k_2*(pvar+qvar));
- }
- if(numChannels > 1) {
- diff *= 1.0f/numChannels;
- }
- difference_image[y*stride + x] = diff;
+ int idx_p = y * stride + x, idx_q = (y + dy) * stride + (x + dx) + frame_offset;
+ int numChannels = channel_offset ? 3 : 1;
+
+ float diff = 0.0f;
+ float scale_fac = 1.0f;
+ if (scale_image) {
+ scale_fac = clamp(scale_image[idx_p] / scale_image[idx_q], 0.25f, 4.0f);
+ }
+
+ for (int c = 0; c < numChannels; c++, idx_p += channel_offset, idx_q += channel_offset) {
+ float cdiff = weight_image[idx_p] - scale_fac * weight_image[idx_q];
+ float pvar = variance_image[idx_p];
+ float qvar = sqr(scale_fac) * variance_image[idx_q];
+ diff += (cdiff * cdiff - a * (pvar + min(pvar, qvar))) / (1e-8f + k_2 * (pvar + qvar));
+ }
+ if (numChannels > 1) {
+ diff *= 1.0f / numChannels;
+ }
+ difference_image[y * stride + x] = diff;
}
-ccl_device_inline void kernel_filter_nlm_blur(int x, int y,
- const ccl_global float *ccl_restrict difference_image,
+ccl_device_inline void kernel_filter_nlm_blur(int x,
+ int y,
+ const ccl_global float *ccl_restrict
+ difference_image,
ccl_global float *out_image,
- int4 rect, int stride, int f)
+ int4 rect,
+ int stride,
+ int f)
{
- float sum = 0.0f;
- const int low = max(rect.y, y-f);
- const int high = min(rect.w, y+f+1);
- for(int y1 = low; y1 < high; y1++) {
- sum += difference_image[y1*stride + x];
- }
- sum *= 1.0f/(high-low);
- out_image[y*stride + x] = sum;
+ float sum = 0.0f;
+ const int low = max(rect.y, y - f);
+ const int high = min(rect.w, y + f + 1);
+ for (int y1 = low; y1 < high; y1++) {
+ sum += difference_image[y1 * stride + x];
+ }
+ sum *= 1.0f / (high - low);
+ out_image[y * stride + x] = sum;
}
-ccl_device_inline void kernel_filter_nlm_calc_weight(int x, int y,
- const ccl_global float *ccl_restrict difference_image,
+ccl_device_inline void kernel_filter_nlm_calc_weight(int x,
+ int y,
+ const ccl_global float *ccl_restrict
+ difference_image,
ccl_global float *out_image,
- int4 rect, int stride, int f)
+ int4 rect,
+ int stride,
+ int f)
{
- float sum = 0.0f;
- const int low = max(rect.x, x-f);
- const int high = min(rect.z, x+f+1);
- for(int x1 = low; x1 < high; x1++) {
- sum += difference_image[y*stride + x1];
- }
- sum *= 1.0f/(high-low);
- out_image[y*stride + x] = fast_expf(-max(sum, 0.0f));
+ float sum = 0.0f;
+ const int low = max(rect.x, x - f);
+ const int high = min(rect.z, x + f + 1);
+ for (int x1 = low; x1 < high; x1++) {
+ sum += difference_image[y * stride + x1];
+ }
+ sum *= 1.0f / (high - low);
+ out_image[y * stride + x] = fast_expf(-max(sum, 0.0f));
}
-ccl_device_inline void kernel_filter_nlm_update_output(int x, int y,
- int dx, int dy,
- const ccl_global float *ccl_restrict difference_image,
+ccl_device_inline void kernel_filter_nlm_update_output(int x,
+ int y,
+ int dx,
+ int dy,
+ const ccl_global float *ccl_restrict
+ difference_image,
const ccl_global float *ccl_restrict image,
ccl_global float *out_image,
ccl_global float *accum_image,
- int4 rect, int channel_offset,
- int stride, int f)
+ int4 rect,
+ int channel_offset,
+ int stride,
+ int f)
{
- float sum = 0.0f;
- const int low = max(rect.x, x-f);
- const int high = min(rect.z, x+f+1);
- for(int x1 = low; x1 < high; x1++) {
- sum += difference_image[y*stride + x1];
- }
- sum *= 1.0f/(high-low);
-
- int idx_p = y*stride + x, idx_q = (y+dy)*stride + (x+dx);
- if(out_image) {
- atomic_add_and_fetch_float(accum_image + idx_p, sum);
-
- float val = image[idx_q];
- if(channel_offset) {
- val += image[idx_q + channel_offset];
- val += image[idx_q + 2*channel_offset];
- val *= 1.0f/3.0f;
- }
- atomic_add_and_fetch_float(out_image + idx_p, sum*val);
- }
- else {
- accum_image[idx_p] = sum;
- }
+ float sum = 0.0f;
+ const int low = max(rect.x, x - f);
+ const int high = min(rect.z, x + f + 1);
+ for (int x1 = low; x1 < high; x1++) {
+ sum += difference_image[y * stride + x1];
+ }
+ sum *= 1.0f / (high - low);
+
+ int idx_p = y * stride + x, idx_q = (y + dy) * stride + (x + dx);
+ if (out_image) {
+ atomic_add_and_fetch_float(accum_image + idx_p, sum);
+
+ float val = image[idx_q];
+ if (channel_offset) {
+ val += image[idx_q + channel_offset];
+ val += image[idx_q + 2 * channel_offset];
+ val *= 1.0f / 3.0f;
+ }
+ atomic_add_and_fetch_float(out_image + idx_p, sum * val);
+ }
+ else {
+ accum_image[idx_p] = sum;
+ }
}
-ccl_device_inline void kernel_filter_nlm_construct_gramian(int x, int y,
- int dx, int dy, int t,
- const ccl_global float *ccl_restrict difference_image,
- const ccl_global float *ccl_restrict buffer,
- const ccl_global float *ccl_restrict transform,
- ccl_global int *rank,
- ccl_global float *XtWX,
- ccl_global float3 *XtWY,
- int4 rect,
- int4 filter_window,
- int stride, int f,
- int pass_stride,
- int frame_offset,
- bool use_time,
- int localIdx)
+ccl_device_inline void kernel_filter_nlm_construct_gramian(
+ int x,
+ int y,
+ int dx,
+ int dy,
+ int t,
+ const ccl_global float *ccl_restrict difference_image,
+ const ccl_global float *ccl_restrict buffer,
+ const ccl_global float *ccl_restrict transform,
+ ccl_global int *rank,
+ ccl_global float *XtWX,
+ ccl_global float3 *XtWY,
+ int4 rect,
+ int4 filter_window,
+ int stride,
+ int f,
+ int pass_stride,
+ int frame_offset,
+ bool use_time,
+ int localIdx)
{
- const int low = max(rect.x, x-f);
- const int high = min(rect.z, x+f+1);
- float sum = 0.0f;
- for(int x1 = low; x1 < high; x1++) {
- sum += difference_image[y*stride + x1];
- }
- float weight = sum * (1.0f/(high - low));
-
- /* Reconstruction data is only stored for pixels inside the filter window,
- * so compute the pixels's index in there. */
- int storage_ofs = coord_to_local_index(filter_window, x, y);
- transform += storage_ofs;
- rank += storage_ofs;
- XtWX += storage_ofs;
- XtWY += storage_ofs;
-
- kernel_filter_construct_gramian(x, y,
- rect_size(filter_window),
- dx, dy, t,
- stride,
- pass_stride,
- frame_offset,
- use_time,
- buffer,
- transform, rank,
- weight, XtWX, XtWY,
- localIdx);
+ const int low = max(rect.x, x - f);
+ const int high = min(rect.z, x + f + 1);
+ float sum = 0.0f;
+ for (int x1 = low; x1 < high; x1++) {
+ sum += difference_image[y * stride + x1];
+ }
+ float weight = sum * (1.0f / (high - low));
+
+ /* Reconstruction data is only stored for pixels inside the filter window,
+ * so compute the pixels's index in there. */
+ int storage_ofs = coord_to_local_index(filter_window, x, y);
+ transform += storage_ofs;
+ rank += storage_ofs;
+ XtWX += storage_ofs;
+ XtWY += storage_ofs;
+
+ kernel_filter_construct_gramian(x,
+ y,
+ rect_size(filter_window),
+ dx,
+ dy,
+ t,
+ stride,
+ pass_stride,
+ frame_offset,
+ use_time,
+ buffer,
+ transform,
+ rank,
+ weight,
+ XtWX,
+ XtWY,
+ localIdx);
}
-ccl_device_inline void kernel_filter_nlm_normalize(int x, int y,
+ccl_device_inline void kernel_filter_nlm_normalize(int x,
+ int y,
ccl_global float *out_image,
- const ccl_global float *ccl_restrict accum_image,
+ const ccl_global float *ccl_restrict
+ accum_image,
int stride)
{
- out_image[y*stride + x] /= accum_image[y*stride + x];
+ out_image[y * stride + x] /= accum_image[y * stride + x];
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/filter/filter_prefilter.h b/intern/cycles/kernel/filter/filter_prefilter.h
index e24f4feb28d..8211311313d 100644
--- a/intern/cycles/kernel/filter/filter_prefilter.h
+++ b/intern/cycles/kernel/filter/filter_prefilter.h
@@ -27,7 +27,8 @@ CCL_NAMESPACE_BEGIN
*/
ccl_device void kernel_filter_divide_shadow(int sample,
CCL_FILTER_TILE_INFO,
- int x, int y,
+ int x,
+ int y,
ccl_global float *unfilteredA,
ccl_global float *unfilteredB,
ccl_global float *sampleVariance,
@@ -37,37 +38,39 @@ ccl_device void kernel_filter_divide_shadow(int sample,
int buffer_pass_stride,
int buffer_denoising_offset)
{
- int xtile = (x < tile_info->x[1])? 0: ((x < tile_info->x[2])? 1: 2);
- int ytile = (y < tile_info->y[1])? 0: ((y < tile_info->y[2])? 1: 2);
- int tile = ytile*3+xtile;
+ int xtile = (x < tile_info->x[1]) ? 0 : ((x < tile_info->x[2]) ? 1 : 2);
+ int ytile = (y < tile_info->y[1]) ? 0 : ((y < tile_info->y[2]) ? 1 : 2);
+ int tile = ytile * 3 + xtile;
- int offset = tile_info->offsets[tile];
- int stride = tile_info->strides[tile];
- const ccl_global float *ccl_restrict center_buffer = (ccl_global float*) ccl_get_tile_buffer(tile);
- center_buffer += (y*stride + x + offset)*buffer_pass_stride;
- center_buffer += buffer_denoising_offset + 14;
+ int offset = tile_info->offsets[tile];
+ int stride = tile_info->strides[tile];
+ const ccl_global float *ccl_restrict center_buffer = (ccl_global float *)ccl_get_tile_buffer(
+ tile);
+ center_buffer += (y * stride + x + offset) * buffer_pass_stride;
+ center_buffer += buffer_denoising_offset + 14;
- int buffer_w = align_up(rect.z - rect.x, 4);
- int idx = (y-rect.y)*buffer_w + (x - rect.x);
- unfilteredA[idx] = center_buffer[1] / max(center_buffer[0], 1e-7f);
- unfilteredB[idx] = center_buffer[4] / max(center_buffer[3], 1e-7f);
+ int buffer_w = align_up(rect.z - rect.x, 4);
+ int idx = (y - rect.y) * buffer_w + (x - rect.x);
+ unfilteredA[idx] = center_buffer[1] / max(center_buffer[0], 1e-7f);
+ unfilteredB[idx] = center_buffer[4] / max(center_buffer[3], 1e-7f);
- float varA = center_buffer[2];
- float varB = center_buffer[5];
- int odd_sample = (sample+1)/2;
- int even_sample = sample/2;
+ float varA = center_buffer[2];
+ float varB = center_buffer[5];
+ int odd_sample = (sample + 1) / 2;
+ int even_sample = sample / 2;
- /* Approximate variance as E[x^2] - 1/N * (E[x])^2, since online variance
- * update does not work efficiently with atomics in the kernel. */
- varA = max(0.0f, varA - unfilteredA[idx]*unfilteredA[idx]*odd_sample);
- varB = max(0.0f, varB - unfilteredB[idx]*unfilteredB[idx]*even_sample);
+ /* Approximate variance as E[x^2] - 1/N * (E[x])^2, since online variance
+ * update does not work efficiently with atomics in the kernel. */
+ varA = max(0.0f, varA - unfilteredA[idx] * unfilteredA[idx] * odd_sample);
+ varB = max(0.0f, varB - unfilteredB[idx] * unfilteredB[idx] * even_sample);
- varA /= max(odd_sample - 1, 1);
- varB /= max(even_sample - 1, 1);
+ varA /= max(odd_sample - 1, 1);
+ varB /= max(even_sample - 1, 1);
- sampleVariance[idx] = 0.5f*(varA + varB) / sample;
- sampleVarianceV[idx] = 0.5f * (varA - varB) * (varA - varB) / (sample*sample);
- bufferVariance[idx] = 0.5f * (unfilteredA[idx] - unfilteredB[idx]) * (unfilteredA[idx] - unfilteredB[idx]);
+ sampleVariance[idx] = 0.5f * (varA + varB) / sample;
+ sampleVarianceV[idx] = 0.5f * (varA - varB) * (varA - varB) / (sample * sample);
+ bufferVariance[idx] = 0.5f * (unfilteredA[idx] - unfilteredB[idx]) *
+ (unfilteredA[idx] - unfilteredB[idx]);
}
/* Load a regular feature from the render buffers into the denoise buffer.
@@ -80,55 +83,65 @@ ccl_device void kernel_filter_divide_shadow(int sample,
*/
ccl_device void kernel_filter_get_feature(int sample,
CCL_FILTER_TILE_INFO,
- int m_offset, int v_offset,
- int x, int y,
+ int m_offset,
+ int v_offset,
+ int x,
+ int y,
ccl_global float *mean,
ccl_global float *variance,
float scale,
- int4 rect, int buffer_pass_stride,
+ int4 rect,
+ int buffer_pass_stride,
int buffer_denoising_offset)
{
- int xtile = (x < tile_info->x[1])? 0: ((x < tile_info->x[2])? 1: 2);
- int ytile = (y < tile_info->y[1])? 0: ((y < tile_info->y[2])? 1: 2);
- int tile = ytile*3+xtile;
- ccl_global float *center_buffer = ((ccl_global float*) ccl_get_tile_buffer(tile)) + (tile_info->offsets[tile] + y*tile_info->strides[tile] + x)*buffer_pass_stride + buffer_denoising_offset;
+ int xtile = (x < tile_info->x[1]) ? 0 : ((x < tile_info->x[2]) ? 1 : 2);
+ int ytile = (y < tile_info->y[1]) ? 0 : ((y < tile_info->y[2]) ? 1 : 2);
+ int tile = ytile * 3 + xtile;
+ ccl_global float *center_buffer = ((ccl_global float *)ccl_get_tile_buffer(tile)) +
+ (tile_info->offsets[tile] + y * tile_info->strides[tile] + x) *
+ buffer_pass_stride +
+ buffer_denoising_offset;
- int buffer_w = align_up(rect.z - rect.x, 4);
- int idx = (y-rect.y)*buffer_w + (x - rect.x);
+ int buffer_w = align_up(rect.z - rect.x, 4);
+ int idx = (y - rect.y) * buffer_w + (x - rect.x);
- float val = scale * center_buffer[m_offset];
- mean[idx] = val;
+ float val = scale * center_buffer[m_offset];
+ mean[idx] = val;
- if(v_offset >= 0) {
- if(sample > 1) {
- /* Approximate variance as E[x^2] - 1/N * (E[x])^2, since online variance
- * update does not work efficiently with atomics in the kernel. */
- variance[idx] = max(0.0f, (center_buffer[v_offset] - val*val*sample) / (sample * (sample-1)));
- }
- else {
- /* Can't compute variance with single sample, just set it very high. */
- variance[idx] = 1e10f;
- }
- }
+ if (v_offset >= 0) {
+ if (sample > 1) {
+ /* Approximate variance as E[x^2] - 1/N * (E[x])^2, since online variance
+ * update does not work efficiently with atomics in the kernel. */
+ variance[idx] = max(
+ 0.0f, (center_buffer[v_offset] - val * val * sample) / (sample * (sample - 1)));
+ }
+ else {
+ /* Can't compute variance with single sample, just set it very high. */
+ variance[idx] = 1e10f;
+ }
+ }
}
ccl_device void kernel_filter_write_feature(int sample,
- int x, int y,
+ int x,
+ int y,
int4 buffer_params,
ccl_global float *from,
ccl_global float *buffer,
int out_offset,
int4 rect)
{
- ccl_global float *combined_buffer = buffer + (y*buffer_params.y + x + buffer_params.x)*buffer_params.z;
+ ccl_global float *combined_buffer = buffer + (y * buffer_params.y + x + buffer_params.x) *
+ buffer_params.z;
- int buffer_w = align_up(rect.z - rect.x, 4);
- int idx = (y-rect.y)*buffer_w + (x - rect.x);
+ int buffer_w = align_up(rect.z - rect.x, 4);
+ int idx = (y - rect.y) * buffer_w + (x - rect.x);
- combined_buffer[out_offset] = from[idx];
+ combined_buffer[out_offset] = from[idx];
}
-ccl_device void kernel_filter_detect_outliers(int x, int y,
+ccl_device void kernel_filter_detect_outliers(int x,
+ int y,
ccl_global float *image,
ccl_global float *variance,
ccl_global float *depth,
@@ -136,123 +149,131 @@ ccl_device void kernel_filter_detect_outliers(int x, int y,
int4 rect,
int pass_stride)
{
- int buffer_w = align_up(rect.z - rect.x, 4);
+ int buffer_w = align_up(rect.z - rect.x, 4);
- int n = 0;
- float values[25];
- float pixel_variance, max_variance = 0.0f;
- for(int y1 = max(y-2, rect.y); y1 < min(y+3, rect.w); y1++) {
- for(int x1 = max(x-2, rect.x); x1 < min(x+3, rect.z); x1++) {
- int idx = (y1-rect.y)*buffer_w + (x1-rect.x);
- float3 color = make_float3(image[idx], image[idx+pass_stride], image[idx+2*pass_stride]);
- color = max(color, make_float3(0.0f, 0.0f, 0.0f));
- float L = average(color);
+ int n = 0;
+ float values[25];
+ float pixel_variance, max_variance = 0.0f;
+ for (int y1 = max(y - 2, rect.y); y1 < min(y + 3, rect.w); y1++) {
+ for (int x1 = max(x - 2, rect.x); x1 < min(x + 3, rect.z); x1++) {
+ int idx = (y1 - rect.y) * buffer_w + (x1 - rect.x);
+ float3 color = make_float3(
+ image[idx], image[idx + pass_stride], image[idx + 2 * pass_stride]);
+ color = max(color, make_float3(0.0f, 0.0f, 0.0f));
+ float L = average(color);
- /* Find the position of L. */
- int i;
- for(i = 0; i < n; i++) {
- if(values[i] > L) break;
- }
- /* Make space for L by shifting all following values to the right. */
- for(int j = n; j > i; j--) {
- values[j] = values[j-1];
- }
- /* Insert L. */
- values[i] = L;
- n++;
+ /* Find the position of L. */
+ int i;
+ for (i = 0; i < n; i++) {
+ if (values[i] > L)
+ break;
+ }
+ /* Make space for L by shifting all following values to the right. */
+ for (int j = n; j > i; j--) {
+ values[j] = values[j - 1];
+ }
+ /* Insert L. */
+ values[i] = L;
+ n++;
- float3 pixel_var = make_float3(variance[idx], variance[idx+pass_stride], variance[idx+2*pass_stride]);
- float var = average(pixel_var);
- if((x1 == x) && (y1 == y)) {
- pixel_variance = (pixel_var.x < 0.0f || pixel_var.y < 0.0f || pixel_var.z < 0.0f)? -1.0f : var;
- }
- else {
- max_variance = max(max_variance, var);
- }
- }
- }
+ float3 pixel_var = make_float3(
+ variance[idx], variance[idx + pass_stride], variance[idx + 2 * pass_stride]);
+ float var = average(pixel_var);
+ if ((x1 == x) && (y1 == y)) {
+ pixel_variance = (pixel_var.x < 0.0f || pixel_var.y < 0.0f || pixel_var.z < 0.0f) ? -1.0f :
+ var;
+ }
+ else {
+ max_variance = max(max_variance, var);
+ }
+ }
+ }
- max_variance += 1e-4f;
+ max_variance += 1e-4f;
- int idx = (y-rect.y)*buffer_w + (x-rect.x);
- float3 color = make_float3(image[idx], image[idx+pass_stride], image[idx+2*pass_stride]);
- color = max(color, make_float3(0.0f, 0.0f, 0.0f));
- float L = average(color);
+ int idx = (y - rect.y) * buffer_w + (x - rect.x);
+ float3 color = make_float3(image[idx], image[idx + pass_stride], image[idx + 2 * pass_stride]);
+ color = max(color, make_float3(0.0f, 0.0f, 0.0f));
+ float L = average(color);
- float ref = 2.0f*values[(int)(n*0.75f)];
+ float ref = 2.0f * values[(int)(n * 0.75f)];
- /* Slightly offset values to avoid false positives in (almost) black areas. */
- max_variance += 1e-5f;
- ref -= 1e-5f;
+ /* Slightly offset values to avoid false positives in (almost) black areas. */
+ max_variance += 1e-5f;
+ ref -= 1e-5f;
- if(L > ref) {
- /* The pixel appears to be an outlier.
- * However, it may just be a legitimate highlight. Therefore, it is checked how likely it is that the pixel
- * should actually be at the reference value:
- * If the reference is within the 3-sigma interval, the pixel is assumed to be a statistical outlier.
- * Otherwise, it is very unlikely that the pixel should be darker, which indicates a legitimate highlight.
- */
+ if (L > ref) {
+ /* The pixel appears to be an outlier.
+ * However, it may just be a legitimate highlight. Therefore, it is checked how likely it is that the pixel
+ * should actually be at the reference value:
+ * If the reference is within the 3-sigma interval, the pixel is assumed to be a statistical outlier.
+ * Otherwise, it is very unlikely that the pixel should be darker, which indicates a legitimate highlight.
+ */
- if(pixel_variance < 0.0f || pixel_variance > 9.0f * max_variance) {
- depth[idx] = -depth[idx];
- color *= ref/L;
- variance[idx] = variance[idx + pass_stride] = variance[idx + 2*pass_stride] = max_variance;
- }
- else {
- float stddev = sqrtf(pixel_variance);
- if(L - 3*stddev < ref) {
- /* The pixel is an outlier, so negate the depth value to mark it as one.
- * Also, scale its brightness down to the outlier threshold to avoid trouble with the NLM weights. */
- depth[idx] = -depth[idx];
- float fac = ref/L;
- color *= fac;
- variance[idx ] *= fac*fac;
- variance[idx + pass_stride] *= fac*fac;
- variance[idx+2*pass_stride] *= fac*fac;
- }
- }
- }
- out[idx ] = color.x;
- out[idx + pass_stride] = color.y;
- out[idx+2*pass_stride] = color.z;
+ if (pixel_variance < 0.0f || pixel_variance > 9.0f * max_variance) {
+ depth[idx] = -depth[idx];
+ color *= ref / L;
+ variance[idx] = variance[idx + pass_stride] = variance[idx + 2 * pass_stride] = max_variance;
+ }
+ else {
+ float stddev = sqrtf(pixel_variance);
+ if (L - 3 * stddev < ref) {
+ /* The pixel is an outlier, so negate the depth value to mark it as one.
+ * Also, scale its brightness down to the outlier threshold to avoid trouble with the NLM weights. */
+ depth[idx] = -depth[idx];
+ float fac = ref / L;
+ color *= fac;
+ variance[idx] *= fac * fac;
+ variance[idx + pass_stride] *= fac * fac;
+ variance[idx + 2 * pass_stride] *= fac * fac;
+ }
+ }
+ }
+ out[idx] = color.x;
+ out[idx + pass_stride] = color.y;
+ out[idx + 2 * pass_stride] = color.z;
}
/* Combine A/B buffers.
* Calculates the combined mean and the buffer variance. */
-ccl_device void kernel_filter_combine_halves(int x, int y,
+ccl_device void kernel_filter_combine_halves(int x,
+ int y,
ccl_global float *mean,
ccl_global float *variance,
ccl_global float *a,
ccl_global float *b,
- int4 rect, int r)
+ int4 rect,
+ int r)
{
- int buffer_w = align_up(rect.z - rect.x, 4);
- int idx = (y-rect.y)*buffer_w + (x - rect.x);
+ int buffer_w = align_up(rect.z - rect.x, 4);
+ int idx = (y - rect.y) * buffer_w + (x - rect.x);
- if(mean) mean[idx] = 0.5f * (a[idx]+b[idx]);
- if(variance) {
- if(r == 0) variance[idx] = 0.25f * (a[idx]-b[idx])*(a[idx]-b[idx]);
- else {
- variance[idx] = 0.0f;
- float values[25];
- int numValues = 0;
- for(int py = max(y-r, rect.y); py < min(y+r+1, rect.w); py++) {
- for(int px = max(x-r, rect.x); px < min(x+r+1, rect.z); px++) {
- int pidx = (py-rect.y)*buffer_w + (px-rect.x);
- values[numValues++] = 0.25f * (a[pidx]-b[pidx])*(a[pidx]-b[pidx]);
- }
- }
- /* Insertion-sort the variances (fast enough for 25 elements). */
- for(int i = 1; i < numValues; i++) {
- float v = values[i];
- int j;
- for(j = i-1; j >= 0 && values[j] > v; j--)
- values[j+1] = values[j];
- values[j+1] = v;
- }
- variance[idx] = values[(7*numValues)/8];
- }
- }
+ if (mean)
+ mean[idx] = 0.5f * (a[idx] + b[idx]);
+ if (variance) {
+ if (r == 0)
+ variance[idx] = 0.25f * (a[idx] - b[idx]) * (a[idx] - b[idx]);
+ else {
+ variance[idx] = 0.0f;
+ float values[25];
+ int numValues = 0;
+ for (int py = max(y - r, rect.y); py < min(y + r + 1, rect.w); py++) {
+ for (int px = max(x - r, rect.x); px < min(x + r + 1, rect.z); px++) {
+ int pidx = (py - rect.y) * buffer_w + (px - rect.x);
+ values[numValues++] = 0.25f * (a[pidx] - b[pidx]) * (a[pidx] - b[pidx]);
+ }
+ }
+ /* Insertion-sort the variances (fast enough for 25 elements). */
+ for (int i = 1; i < numValues; i++) {
+ float v = values[i];
+ int j;
+ for (j = i - 1; j >= 0 && values[j] > v; j--)
+ values[j + 1] = values[j];
+ values[j + 1] = v;
+ }
+ variance[idx] = values[(7 * numValues) / 8];
+ }
+ }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/filter/filter_reconstruction.h b/intern/cycles/kernel/filter/filter_reconstruction.h
index ceda8f71f98..850f20584da 100644
--- a/intern/cycles/kernel/filter/filter_reconstruction.h
+++ b/intern/cycles/kernel/filter/filter_reconstruction.h
@@ -16,63 +16,75 @@
CCL_NAMESPACE_BEGIN
-ccl_device_inline void kernel_filter_construct_gramian(int x, int y,
+ccl_device_inline void kernel_filter_construct_gramian(int x,
+ int y,
int storage_stride,
- int dx, int dy, int t,
+ int dx,
+ int dy,
+ int t,
int buffer_stride,
int pass_stride,
int frame_offset,
bool use_time,
const ccl_global float *ccl_restrict buffer,
- const ccl_global float *ccl_restrict transform,
+ const ccl_global float *ccl_restrict
+ transform,
ccl_global int *rank,
float weight,
ccl_global float *XtWX,
ccl_global float3 *XtWY,
int localIdx)
{
- if(weight < 1e-3f) {
- return;
- }
+ if (weight < 1e-3f) {
+ return;
+ }
- int p_offset = y * buffer_stride + x;
- int q_offset = (y+dy) * buffer_stride + (x+dx) + frame_offset;
+ int p_offset = y * buffer_stride + x;
+ int q_offset = (y + dy) * buffer_stride + (x + dx) + frame_offset;
#ifdef __KERNEL_GPU__
- const int stride = storage_stride;
+ const int stride = storage_stride;
#else
- const int stride = 1;
- (void) storage_stride;
+ const int stride = 1;
+ (void)storage_stride;
#endif
#ifdef __KERNEL_CUDA__
- ccl_local float shared_design_row[(DENOISE_FEATURES+1)*CCL_MAX_LOCAL_SIZE];
- ccl_local_param float *design_row = shared_design_row + localIdx*(DENOISE_FEATURES+1);
+ ccl_local float shared_design_row[(DENOISE_FEATURES + 1) * CCL_MAX_LOCAL_SIZE];
+ ccl_local_param float *design_row = shared_design_row + localIdx * (DENOISE_FEATURES + 1);
#else
- float design_row[DENOISE_FEATURES+1];
+ float design_row[DENOISE_FEATURES + 1];
#endif
- float3 q_color = filter_get_color(buffer + q_offset, pass_stride);
+ float3 q_color = filter_get_color(buffer + q_offset, pass_stride);
- /* If the pixel was flagged as an outlier during prefiltering, skip it. */
- if(ccl_get_feature(buffer + q_offset, 0) < 0.0f) {
- return;
- }
+ /* If the pixel was flagged as an outlier during prefiltering, skip it. */
+ if (ccl_get_feature(buffer + q_offset, 0) < 0.0f) {
+ return;
+ }
- filter_get_design_row_transform(make_int3(x, y, t), buffer + p_offset,
- make_int3(x+dx, y+dy, t), buffer + q_offset,
- pass_stride, *rank, design_row, transform, stride, use_time);
+ filter_get_design_row_transform(make_int3(x, y, t),
+ buffer + p_offset,
+ make_int3(x + dx, y + dy, t),
+ buffer + q_offset,
+ pass_stride,
+ *rank,
+ design_row,
+ transform,
+ stride,
+ use_time);
#ifdef __KERNEL_GPU__
- math_trimatrix_add_gramian_strided(XtWX, (*rank)+1, design_row, weight, stride);
- math_vec3_add_strided(XtWY, (*rank)+1, design_row, weight * q_color, stride);
+ math_trimatrix_add_gramian_strided(XtWX, (*rank) + 1, design_row, weight, stride);
+ math_vec3_add_strided(XtWY, (*rank) + 1, design_row, weight * q_color, stride);
#else
- math_trimatrix_add_gramian(XtWX, (*rank)+1, design_row, weight);
- math_vec3_add(XtWY, (*rank)+1, design_row, weight * q_color);
+ math_trimatrix_add_gramian(XtWX, (*rank) + 1, design_row, weight);
+ math_vec3_add(XtWY, (*rank) + 1, design_row, weight * q_color);
#endif
}
-ccl_device_inline void kernel_filter_finalize(int x, int y,
+ccl_device_inline void kernel_filter_finalize(int x,
+ int y,
ccl_global float *buffer,
ccl_global int *rank,
int storage_stride,
@@ -82,47 +94,47 @@ ccl_device_inline void kernel_filter_finalize(int x, int y,
int sample)
{
#ifdef __KERNEL_GPU__
- const int stride = storage_stride;
+ const int stride = storage_stride;
#else
- const int stride = 1;
- (void) storage_stride;
+ const int stride = 1;
+ (void)storage_stride;
#endif
- if(XtWX[0] < 1e-3f) {
- /* There is not enough information to determine a denoised result.
- * As a fallback, keep the original value of the pixel. */
- return;
- }
-
- /* The weighted average of pixel colors (essentially, the NLM-filtered image).
- * In case the solution of the linear model fails due to numerical issues or
- * returns non-sensical negative values, fall back to this value. */
- float3 mean_color = XtWY[0]/XtWX[0];
-
- math_trimatrix_vec3_solve(XtWX, XtWY, (*rank)+1, stride);
-
- float3 final_color = XtWY[0];
- if(!isfinite3_safe(final_color) ||
- (final_color.x < -0.01f || final_color.y < -0.01f || final_color.z < -0.01f))
- {
- final_color = mean_color;
- }
-
- /* Clamp pixel value to positive values. */
- final_color = max(final_color, make_float3(0.0f, 0.0f, 0.0f));
-
- ccl_global float *combined_buffer = buffer + (y*buffer_params.y + x + buffer_params.x)*buffer_params.z;
- if(buffer_params.w >= 0) {
- final_color *= sample;
- if(buffer_params.w > 0) {
- final_color.x += combined_buffer[buffer_params.w+0];
- final_color.y += combined_buffer[buffer_params.w+1];
- final_color.z += combined_buffer[buffer_params.w+2];
- }
- }
- combined_buffer[0] = final_color.x;
- combined_buffer[1] = final_color.y;
- combined_buffer[2] = final_color.z;
+ if (XtWX[0] < 1e-3f) {
+ /* There is not enough information to determine a denoised result.
+ * As a fallback, keep the original value of the pixel. */
+ return;
+ }
+
+ /* The weighted average of pixel colors (essentially, the NLM-filtered image).
+ * In case the solution of the linear model fails due to numerical issues or
+ * returns non-sensical negative values, fall back to this value. */
+ float3 mean_color = XtWY[0] / XtWX[0];
+
+ math_trimatrix_vec3_solve(XtWX, XtWY, (*rank) + 1, stride);
+
+ float3 final_color = XtWY[0];
+ if (!isfinite3_safe(final_color) ||
+ (final_color.x < -0.01f || final_color.y < -0.01f || final_color.z < -0.01f)) {
+ final_color = mean_color;
+ }
+
+ /* Clamp pixel value to positive values. */
+ final_color = max(final_color, make_float3(0.0f, 0.0f, 0.0f));
+
+ ccl_global float *combined_buffer = buffer + (y * buffer_params.y + x + buffer_params.x) *
+ buffer_params.z;
+ if (buffer_params.w >= 0) {
+ final_color *= sample;
+ if (buffer_params.w > 0) {
+ final_color.x += combined_buffer[buffer_params.w + 0];
+ final_color.y += combined_buffer[buffer_params.w + 1];
+ final_color.z += combined_buffer[buffer_params.w + 2];
+ }
+ }
+ combined_buffer[0] = final_color.x;
+ combined_buffer[1] = final_color.y;
+ combined_buffer[2] = final_color.z;
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/filter/filter_transform.h b/intern/cycles/kernel/filter/filter_transform.h
index 94e27bb02fd..69e3c7c458d 100644
--- a/intern/cycles/kernel/filter/filter_transform.h
+++ b/intern/cycles/kernel/filter/filter_transform.h
@@ -18,92 +18,101 @@ CCL_NAMESPACE_BEGIN
ccl_device void kernel_filter_construct_transform(const float *ccl_restrict buffer,
CCL_FILTER_TILE_INFO,
- int x, int y, int4 rect,
- int pass_stride, int frame_stride,
+ int x,
+ int y,
+ int4 rect,
+ int pass_stride,
+ int frame_stride,
bool use_time,
- float *transform, int *rank,
- int radius, float pca_threshold)
+ float *transform,
+ int *rank,
+ int radius,
+ float pca_threshold)
{
- int buffer_w = align_up(rect.z - rect.x, 4);
-
- float features[DENOISE_FEATURES];
-
- const float *ccl_restrict pixel_buffer;
- int3 pixel;
-
- int num_features = use_time? 11 : 10;
-
- /* === Calculate denoising window. === */
- int2 low = make_int2(max(rect.x, x - radius),
- max(rect.y, y - radius));
- int2 high = make_int2(min(rect.z, x + radius + 1),
- min(rect.w, y + radius + 1));
- int num_pixels = (high.y - low.y) * (high.x - low.x) * tile_info->num_frames;
-
- /* === Shift feature passes to have mean 0. === */
- float feature_means[DENOISE_FEATURES];
- math_vector_zero(feature_means, num_features);
- FOR_PIXEL_WINDOW {
- filter_get_features(pixel, pixel_buffer, features, use_time, NULL, pass_stride);
- math_vector_add(feature_means, features, num_features);
- } END_FOR_PIXEL_WINDOW
-
- math_vector_scale(feature_means, 1.0f / num_pixels, num_features);
-
- /* === Scale the shifted feature passes to a range of [-1; 1], will be baked into the transform later. === */
- float feature_scale[DENOISE_FEATURES];
- math_vector_zero(feature_scale, num_features);
-
- FOR_PIXEL_WINDOW {
- filter_get_feature_scales(pixel, pixel_buffer, features, use_time, feature_means, pass_stride);
- math_vector_max(feature_scale, features, num_features);
- } END_FOR_PIXEL_WINDOW
-
- filter_calculate_scale(feature_scale, use_time);
-
- /* === Generate the feature transformation. ===
- * This transformation maps the num_features-dimentional feature space to a reduced feature (r-feature) space
- * which generally has fewer dimensions. This mainly helps to prevent overfitting. */
- float feature_matrix[DENOISE_FEATURES*DENOISE_FEATURES];
- math_matrix_zero(feature_matrix, num_features);
- FOR_PIXEL_WINDOW {
- filter_get_features(pixel, pixel_buffer, features, use_time, feature_means, pass_stride);
- math_vector_mul(features, feature_scale, num_features);
- math_matrix_add_gramian(feature_matrix, num_features, features, 1.0f);
- } END_FOR_PIXEL_WINDOW
-
- math_matrix_jacobi_eigendecomposition(feature_matrix, transform, num_features, 1);
- *rank = 0;
- /* Prevent overfitting when a small window is used. */
- int max_rank = min(num_features, num_pixels/3);
- if(pca_threshold < 0.0f) {
- float threshold_energy = 0.0f;
- for(int i = 0; i < num_features; i++) {
- threshold_energy += feature_matrix[i*num_features+i];
- }
- threshold_energy *= 1.0f - (-pca_threshold);
-
- float reduced_energy = 0.0f;
- for(int i = 0; i < max_rank; i++, (*rank)++) {
- if(i >= 2 && reduced_energy >= threshold_energy)
- break;
- float s = feature_matrix[i*num_features+i];
- reduced_energy += s;
- }
- }
- else {
- for(int i = 0; i < max_rank; i++, (*rank)++) {
- float s = feature_matrix[i*num_features+i];
- if(i >= 2 && sqrtf(s) < pca_threshold)
- break;
- }
- }
-
- /* Bake the feature scaling into the transformation matrix. */
- for(int i = 0; i < (*rank); i++) {
- math_vector_mul(transform + i*num_features, feature_scale, num_features);
- }
- math_matrix_transpose(transform, num_features, 1);
+ int buffer_w = align_up(rect.z - rect.x, 4);
+
+ float features[DENOISE_FEATURES];
+
+ const float *ccl_restrict pixel_buffer;
+ int3 pixel;
+
+ int num_features = use_time ? 11 : 10;
+
+ /* === Calculate denoising window. === */
+ int2 low = make_int2(max(rect.x, x - radius), max(rect.y, y - radius));
+ int2 high = make_int2(min(rect.z, x + radius + 1), min(rect.w, y + radius + 1));
+ int num_pixels = (high.y - low.y) * (high.x - low.x) * tile_info->num_frames;
+
+ /* === Shift feature passes to have mean 0. === */
+ float feature_means[DENOISE_FEATURES];
+ math_vector_zero(feature_means, num_features);
+ FOR_PIXEL_WINDOW
+ {
+ filter_get_features(pixel, pixel_buffer, features, use_time, NULL, pass_stride);
+ math_vector_add(feature_means, features, num_features);
+ }
+ END_FOR_PIXEL_WINDOW
+
+ math_vector_scale(feature_means, 1.0f / num_pixels, num_features);
+
+ /* === Scale the shifted feature passes to a range of [-1; 1], will be baked into the transform later. === */
+ float feature_scale[DENOISE_FEATURES];
+ math_vector_zero(feature_scale, num_features);
+
+ FOR_PIXEL_WINDOW
+ {
+ filter_get_feature_scales(pixel, pixel_buffer, features, use_time, feature_means, pass_stride);
+ math_vector_max(feature_scale, features, num_features);
+ }
+ END_FOR_PIXEL_WINDOW
+
+ filter_calculate_scale(feature_scale, use_time);
+
+ /* === Generate the feature transformation. ===
+ * This transformation maps the num_features-dimentional feature space to a reduced feature (r-feature) space
+ * which generally has fewer dimensions. This mainly helps to prevent overfitting. */
+ float feature_matrix[DENOISE_FEATURES * DENOISE_FEATURES];
+ math_matrix_zero(feature_matrix, num_features);
+ FOR_PIXEL_WINDOW
+ {
+ filter_get_features(pixel, pixel_buffer, features, use_time, feature_means, pass_stride);
+ math_vector_mul(features, feature_scale, num_features);
+ math_matrix_add_gramian(feature_matrix, num_features, features, 1.0f);
+ }
+ END_FOR_PIXEL_WINDOW
+
+ math_matrix_jacobi_eigendecomposition(feature_matrix, transform, num_features, 1);
+ *rank = 0;
+ /* Prevent overfitting when a small window is used. */
+ int max_rank = min(num_features, num_pixels / 3);
+ if (pca_threshold < 0.0f) {
+ float threshold_energy = 0.0f;
+ for (int i = 0; i < num_features; i++) {
+ threshold_energy += feature_matrix[i * num_features + i];
+ }
+ threshold_energy *= 1.0f - (-pca_threshold);
+
+ float reduced_energy = 0.0f;
+ for (int i = 0; i < max_rank; i++, (*rank)++) {
+ if (i >= 2 && reduced_energy >= threshold_energy)
+ break;
+ float s = feature_matrix[i * num_features + i];
+ reduced_energy += s;
+ }
+ }
+ else {
+ for (int i = 0; i < max_rank; i++, (*rank)++) {
+ float s = feature_matrix[i * num_features + i];
+ if (i >= 2 && sqrtf(s) < pca_threshold)
+ break;
+ }
+ }
+
+ /* Bake the feature scaling into the transformation matrix. */
+ for (int i = 0; i < (*rank); i++) {
+ math_vector_mul(transform + i * num_features, feature_scale, num_features);
+ }
+ math_matrix_transpose(transform, num_features, 1);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/filter/filter_transform_gpu.h b/intern/cycles/kernel/filter/filter_transform_gpu.h
index ed8ddcb49b1..89cddfd927f 100644
--- a/intern/cycles/kernel/filter/filter_transform_gpu.h
+++ b/intern/cycles/kernel/filter/filter_transform_gpu.h
@@ -18,106 +18,110 @@ CCL_NAMESPACE_BEGIN
ccl_device void kernel_filter_construct_transform(const ccl_global float *ccl_restrict buffer,
CCL_FILTER_TILE_INFO,
- int x, int y, int4 rect,
- int pass_stride, int frame_stride,
+ int x,
+ int y,
+ int4 rect,
+ int pass_stride,
+ int frame_stride,
bool use_time,
ccl_global float *transform,
ccl_global int *rank,
- int radius, float pca_threshold,
- int transform_stride, int localIdx)
+ int radius,
+ float pca_threshold,
+ int transform_stride,
+ int localIdx)
{
- int buffer_w = align_up(rect.z - rect.x, 4);
+ int buffer_w = align_up(rect.z - rect.x, 4);
#ifdef __KERNEL_CUDA__
- ccl_local float shared_features[DENOISE_FEATURES*CCL_MAX_LOCAL_SIZE];
- ccl_local_param float *features = shared_features + localIdx*DENOISE_FEATURES;
+ ccl_local float shared_features[DENOISE_FEATURES * CCL_MAX_LOCAL_SIZE];
+ ccl_local_param float *features = shared_features + localIdx * DENOISE_FEATURES;
#else
- float features[DENOISE_FEATURES];
+ float features[DENOISE_FEATURES];
#endif
- int num_features = use_time? 11 : 10;
-
- /* === Calculate denoising window. === */
- int2 low = make_int2(max(rect.x, x - radius),
- max(rect.y, y - radius));
- int2 high = make_int2(min(rect.z, x + radius + 1),
- min(rect.w, y + radius + 1));
- int num_pixels = (high.y - low.y) * (high.x - low.x) * tile_info->num_frames;
- const ccl_global float *ccl_restrict pixel_buffer;
- int3 pixel;
-
-
-
-
- /* === Shift feature passes to have mean 0. === */
- float feature_means[DENOISE_FEATURES];
- math_vector_zero(feature_means, num_features);
- FOR_PIXEL_WINDOW {
- filter_get_features(pixel, pixel_buffer, features, use_time, NULL, pass_stride);
- math_vector_add(feature_means, features, num_features);
- } END_FOR_PIXEL_WINDOW
-
- math_vector_scale(feature_means, 1.0f / num_pixels, num_features);
-
- /* === Scale the shifted feature passes to a range of [-1; 1], will be baked into the transform later. === */
- float feature_scale[DENOISE_FEATURES];
- math_vector_zero(feature_scale, num_features);
-
- FOR_PIXEL_WINDOW {
- filter_get_feature_scales(pixel, pixel_buffer, features, use_time, feature_means, pass_stride);
- math_vector_max(feature_scale, features, num_features);
- } END_FOR_PIXEL_WINDOW
-
- filter_calculate_scale(feature_scale, use_time);
-
-
-
- /* === Generate the feature transformation. ===
- * This transformation maps the num_features-dimentional feature space to a reduced feature (r-feature) space
- * which generally has fewer dimensions. This mainly helps to prevent overfitting. */
- float feature_matrix[DENOISE_FEATURES*DENOISE_FEATURES];
- math_matrix_zero(feature_matrix, num_features);
- FOR_PIXEL_WINDOW {
- filter_get_features(pixel, pixel_buffer, features, use_time, feature_means, pass_stride);
- math_vector_mul(features, feature_scale, num_features);
- math_matrix_add_gramian(feature_matrix, num_features, features, 1.0f);
- } END_FOR_PIXEL_WINDOW
-
- math_matrix_jacobi_eigendecomposition(feature_matrix, transform, num_features, transform_stride);
- *rank = 0;
- /* Prevent overfitting when a small window is used. */
- int max_rank = min(num_features, num_pixels/3);
- if(pca_threshold < 0.0f) {
- float threshold_energy = 0.0f;
- for(int i = 0; i < num_features; i++) {
- threshold_energy += feature_matrix[i*num_features+i];
- }
- threshold_energy *= 1.0f - (-pca_threshold);
-
- float reduced_energy = 0.0f;
- for(int i = 0; i < max_rank; i++, (*rank)++) {
- if(i >= 2 && reduced_energy >= threshold_energy)
- break;
- float s = feature_matrix[i*num_features+i];
- reduced_energy += s;
- }
- }
- else {
- for(int i = 0; i < max_rank; i++, (*rank)++) {
- float s = feature_matrix[i*num_features+i];
- if(i >= 2 && sqrtf(s) < pca_threshold)
- break;
- }
- }
-
- math_matrix_transpose(transform, num_features, transform_stride);
-
- /* Bake the feature scaling into the transformation matrix. */
- for(int i = 0; i < num_features; i++) {
- for(int j = 0; j < (*rank); j++) {
- transform[(i*num_features + j)*transform_stride] *= feature_scale[i];
- }
- }
+ int num_features = use_time ? 11 : 10;
+
+ /* === Calculate denoising window. === */
+ int2 low = make_int2(max(rect.x, x - radius), max(rect.y, y - radius));
+ int2 high = make_int2(min(rect.z, x + radius + 1), min(rect.w, y + radius + 1));
+ int num_pixels = (high.y - low.y) * (high.x - low.x) * tile_info->num_frames;
+ const ccl_global float *ccl_restrict pixel_buffer;
+ int3 pixel;
+
+ /* === Shift feature passes to have mean 0. === */
+ float feature_means[DENOISE_FEATURES];
+ math_vector_zero(feature_means, num_features);
+ FOR_PIXEL_WINDOW
+ {
+ filter_get_features(pixel, pixel_buffer, features, use_time, NULL, pass_stride);
+ math_vector_add(feature_means, features, num_features);
+ }
+ END_FOR_PIXEL_WINDOW
+
+ math_vector_scale(feature_means, 1.0f / num_pixels, num_features);
+
+ /* === Scale the shifted feature passes to a range of [-1; 1], will be baked into the transform later. === */
+ float feature_scale[DENOISE_FEATURES];
+ math_vector_zero(feature_scale, num_features);
+
+ FOR_PIXEL_WINDOW
+ {
+ filter_get_feature_scales(pixel, pixel_buffer, features, use_time, feature_means, pass_stride);
+ math_vector_max(feature_scale, features, num_features);
+ }
+ END_FOR_PIXEL_WINDOW
+
+ filter_calculate_scale(feature_scale, use_time);
+
+ /* === Generate the feature transformation. ===
+ * This transformation maps the num_features-dimentional feature space to a reduced feature (r-feature) space
+ * which generally has fewer dimensions. This mainly helps to prevent overfitting. */
+ float feature_matrix[DENOISE_FEATURES * DENOISE_FEATURES];
+ math_matrix_zero(feature_matrix, num_features);
+ FOR_PIXEL_WINDOW
+ {
+ filter_get_features(pixel, pixel_buffer, features, use_time, feature_means, pass_stride);
+ math_vector_mul(features, feature_scale, num_features);
+ math_matrix_add_gramian(feature_matrix, num_features, features, 1.0f);
+ }
+ END_FOR_PIXEL_WINDOW
+
+ math_matrix_jacobi_eigendecomposition(feature_matrix, transform, num_features, transform_stride);
+ *rank = 0;
+ /* Prevent overfitting when a small window is used. */
+ int max_rank = min(num_features, num_pixels / 3);
+ if (pca_threshold < 0.0f) {
+ float threshold_energy = 0.0f;
+ for (int i = 0; i < num_features; i++) {
+ threshold_energy += feature_matrix[i * num_features + i];
+ }
+ threshold_energy *= 1.0f - (-pca_threshold);
+
+ float reduced_energy = 0.0f;
+ for (int i = 0; i < max_rank; i++, (*rank)++) {
+ if (i >= 2 && reduced_energy >= threshold_energy)
+ break;
+ float s = feature_matrix[i * num_features + i];
+ reduced_energy += s;
+ }
+ }
+ else {
+ for (int i = 0; i < max_rank; i++, (*rank)++) {
+ float s = feature_matrix[i * num_features + i];
+ if (i >= 2 && sqrtf(s) < pca_threshold)
+ break;
+ }
+ }
+
+ math_matrix_transpose(transform, num_features, transform_stride);
+
+ /* Bake the feature scaling into the transformation matrix. */
+ for (int i = 0; i < num_features; i++) {
+ for (int j = 0; j < (*rank); j++) {
+ transform[(i * num_features + j) * transform_stride] *= feature_scale[i];
+ }
+ }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/filter/filter_transform_sse.h b/intern/cycles/kernel/filter/filter_transform_sse.h
index 10bd3e477e9..22397b292db 100644
--- a/intern/cycles/kernel/filter/filter_transform_sse.h
+++ b/intern/cycles/kernel/filter/filter_transform_sse.h
@@ -18,98 +18,110 @@ CCL_NAMESPACE_BEGIN
ccl_device void kernel_filter_construct_transform(const float *ccl_restrict buffer,
CCL_FILTER_TILE_INFO,
- int x, int y, int4 rect,
- int pass_stride, int frame_stride,
+ int x,
+ int y,
+ int4 rect,
+ int pass_stride,
+ int frame_stride,
bool use_time,
- float *transform, int *rank,
- int radius, float pca_threshold)
+ float *transform,
+ int *rank,
+ int radius,
+ float pca_threshold)
{
- int buffer_w = align_up(rect.z - rect.x, 4);
-
- float4 features[DENOISE_FEATURES];
- const float *ccl_restrict pixel_buffer;
- int3 pixel;
-
- int num_features = use_time? 11 : 10;
-
- /* === Calculate denoising window. === */
- int2 low = make_int2(max(rect.x, x - radius),
- max(rect.y, y - radius));
- int2 high = make_int2(min(rect.z, x + radius + 1),
- min(rect.w, y + radius + 1));
- int num_pixels = (high.y - low.y) * (high.x - low.x) * tile_info->num_frames;
-
- /* === Shift feature passes to have mean 0. === */
- float4 feature_means[DENOISE_FEATURES];
- math_vector_zero_sse(feature_means, num_features);
- FOR_PIXEL_WINDOW_SSE {
- filter_get_features_sse(x4, y4, t4, active_pixels, pixel_buffer, features, use_time, NULL, pass_stride);
- math_vector_add_sse(feature_means, num_features, features);
- } END_FOR_PIXEL_WINDOW_SSE
-
- float4 pixel_scale = make_float4(1.0f / num_pixels);
- for(int i = 0; i < num_features; i++) {
- feature_means[i] = reduce_add(feature_means[i]) * pixel_scale;
- }
-
- /* === Scale the shifted feature passes to a range of [-1; 1], will be baked into the transform later. === */
- float4 feature_scale[DENOISE_FEATURES];
- math_vector_zero_sse(feature_scale, num_features);
- FOR_PIXEL_WINDOW_SSE {
- filter_get_feature_scales_sse(x4, y4, t4, active_pixels, pixel_buffer, features, use_time, feature_means, pass_stride);
- math_vector_max_sse(feature_scale, features, num_features);
- } END_FOR_PIXEL_WINDOW_SSE
-
- filter_calculate_scale_sse(feature_scale, use_time);
-
- /* === Generate the feature transformation. ===
- * This transformation maps the num_features-dimentional feature space to a reduced feature (r-feature) space
- * which generally has fewer dimensions. This mainly helps to prevent overfitting. */
- float4 feature_matrix_sse[DENOISE_FEATURES*DENOISE_FEATURES];
- math_matrix_zero_sse(feature_matrix_sse, num_features);
- FOR_PIXEL_WINDOW_SSE {
- filter_get_features_sse(x4, y4, t4, active_pixels, pixel_buffer, features, use_time, feature_means, pass_stride);
- math_vector_mul_sse(features, num_features, feature_scale);
- math_matrix_add_gramian_sse(feature_matrix_sse, num_features, features, make_float4(1.0f));
- } END_FOR_PIXEL_WINDOW_SSE
-
- float feature_matrix[DENOISE_FEATURES*DENOISE_FEATURES];
- math_matrix_hsum(feature_matrix, num_features, feature_matrix_sse);
-
- math_matrix_jacobi_eigendecomposition(feature_matrix, transform, num_features, 1);
-
- *rank = 0;
- /* Prevent overfitting when a small window is used. */
- int max_rank = min(num_features, num_pixels/3);
- if(pca_threshold < 0.0f) {
- float threshold_energy = 0.0f;
- for(int i = 0; i < num_features; i++) {
- threshold_energy += feature_matrix[i*num_features+i];
- }
- threshold_energy *= 1.0f - (-pca_threshold);
-
- float reduced_energy = 0.0f;
- for(int i = 0; i < max_rank; i++, (*rank)++) {
- if(i >= 2 && reduced_energy >= threshold_energy)
- break;
- float s = feature_matrix[i*num_features+i];
- reduced_energy += s;
- }
- }
- else {
- for(int i = 0; i < max_rank; i++, (*rank)++) {
- float s = feature_matrix[i*num_features+i];
- if(i >= 2 && sqrtf(s) < pca_threshold)
- break;
- }
- }
-
- math_matrix_transpose(transform, num_features, 1);
-
- /* Bake the feature scaling into the transformation matrix. */
- for(int i = 0; i < num_features; i++) {
- math_vector_scale(transform + i*num_features, feature_scale[i][0], *rank);
- }
+ int buffer_w = align_up(rect.z - rect.x, 4);
+
+ float4 features[DENOISE_FEATURES];
+ const float *ccl_restrict pixel_buffer;
+ int3 pixel;
+
+ int num_features = use_time ? 11 : 10;
+
+ /* === Calculate denoising window. === */
+ int2 low = make_int2(max(rect.x, x - radius), max(rect.y, y - radius));
+ int2 high = make_int2(min(rect.z, x + radius + 1), min(rect.w, y + radius + 1));
+ int num_pixels = (high.y - low.y) * (high.x - low.x) * tile_info->num_frames;
+
+ /* === Shift feature passes to have mean 0. === */
+ float4 feature_means[DENOISE_FEATURES];
+ math_vector_zero_sse(feature_means, num_features);
+ FOR_PIXEL_WINDOW_SSE
+ {
+ filter_get_features_sse(
+ x4, y4, t4, active_pixels, pixel_buffer, features, use_time, NULL, pass_stride);
+ math_vector_add_sse(feature_means, num_features, features);
+ }
+ END_FOR_PIXEL_WINDOW_SSE
+
+ float4 pixel_scale = make_float4(1.0f / num_pixels);
+ for (int i = 0; i < num_features; i++) {
+ feature_means[i] = reduce_add(feature_means[i]) * pixel_scale;
+ }
+
+ /* === Scale the shifted feature passes to a range of [-1; 1], will be baked into the transform later. === */
+ float4 feature_scale[DENOISE_FEATURES];
+ math_vector_zero_sse(feature_scale, num_features);
+ FOR_PIXEL_WINDOW_SSE
+ {
+ filter_get_feature_scales_sse(
+ x4, y4, t4, active_pixels, pixel_buffer, features, use_time, feature_means, pass_stride);
+ math_vector_max_sse(feature_scale, features, num_features);
+ }
+ END_FOR_PIXEL_WINDOW_SSE
+
+ filter_calculate_scale_sse(feature_scale, use_time);
+
+ /* === Generate the feature transformation. ===
+ * This transformation maps the num_features-dimentional feature space to a reduced feature (r-feature) space
+ * which generally has fewer dimensions. This mainly helps to prevent overfitting. */
+ float4 feature_matrix_sse[DENOISE_FEATURES * DENOISE_FEATURES];
+ math_matrix_zero_sse(feature_matrix_sse, num_features);
+ FOR_PIXEL_WINDOW_SSE
+ {
+ filter_get_features_sse(
+ x4, y4, t4, active_pixels, pixel_buffer, features, use_time, feature_means, pass_stride);
+ math_vector_mul_sse(features, num_features, feature_scale);
+ math_matrix_add_gramian_sse(feature_matrix_sse, num_features, features, make_float4(1.0f));
+ }
+ END_FOR_PIXEL_WINDOW_SSE
+
+ float feature_matrix[DENOISE_FEATURES * DENOISE_FEATURES];
+ math_matrix_hsum(feature_matrix, num_features, feature_matrix_sse);
+
+ math_matrix_jacobi_eigendecomposition(feature_matrix, transform, num_features, 1);
+
+ *rank = 0;
+ /* Prevent overfitting when a small window is used. */
+ int max_rank = min(num_features, num_pixels / 3);
+ if (pca_threshold < 0.0f) {
+ float threshold_energy = 0.0f;
+ for (int i = 0; i < num_features; i++) {
+ threshold_energy += feature_matrix[i * num_features + i];
+ }
+ threshold_energy *= 1.0f - (-pca_threshold);
+
+ float reduced_energy = 0.0f;
+ for (int i = 0; i < max_rank; i++, (*rank)++) {
+ if (i >= 2 && reduced_energy >= threshold_energy)
+ break;
+ float s = feature_matrix[i * num_features + i];
+ reduced_energy += s;
+ }
+ }
+ else {
+ for (int i = 0; i < max_rank; i++, (*rank)++) {
+ float s = feature_matrix[i * num_features + i];
+ if (i >= 2 && sqrtf(s) < pca_threshold)
+ break;
+ }
+ }
+
+ math_matrix_transpose(transform, num_features, 1);
+
+ /* Bake the feature scaling into the transformation matrix. */
+ for (int i = 0; i < num_features; i++) {
+ math_vector_scale(transform + i * num_features, feature_scale[i][0], *rank);
+ }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_attribute.h b/intern/cycles/kernel/geom/geom_attribute.h
index e991f3d685a..456608bfa22 100644
--- a/intern/cycles/kernel/geom/geom_attribute.h
+++ b/intern/cycles/kernel/geom/geom_attribute.h
@@ -30,81 +30,83 @@ ccl_device_inline uint subd_triangle_patch(KernelGlobals *kg, const ShaderData *
ccl_device_inline uint attribute_primitive_type(KernelGlobals *kg, const ShaderData *sd)
{
#ifdef __HAIR__
- if(sd->type & PRIMITIVE_ALL_CURVE) {
- return ATTR_PRIM_CURVE;
- }
- else
+ if (sd->type & PRIMITIVE_ALL_CURVE) {
+ return ATTR_PRIM_CURVE;
+ }
+ else
#endif
- if(subd_triangle_patch(kg, sd) != ~0) {
- return ATTR_PRIM_SUBD;
- }
- else {
- return ATTR_PRIM_TRIANGLE;
- }
+ if (subd_triangle_patch(kg, sd) != ~0) {
+ return ATTR_PRIM_SUBD;
+ }
+ else {
+ return ATTR_PRIM_TRIANGLE;
+ }
}
ccl_device_inline AttributeDescriptor attribute_not_found()
{
- const AttributeDescriptor desc = {ATTR_ELEMENT_NONE, (NodeAttributeType)0, 0, ATTR_STD_NOT_FOUND};
- return desc;
+ const AttributeDescriptor desc = {
+ ATTR_ELEMENT_NONE, (NodeAttributeType)0, 0, ATTR_STD_NOT_FOUND};
+ return desc;
}
/* Find attribute based on ID */
ccl_device_inline uint object_attribute_map_offset(KernelGlobals *kg, int object)
{
- return kernel_tex_fetch(__objects, object).attribute_map_offset;
+ return kernel_tex_fetch(__objects, object).attribute_map_offset;
}
-ccl_device_inline AttributeDescriptor find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id)
+ccl_device_inline AttributeDescriptor find_attribute(KernelGlobals *kg,
+ const ShaderData *sd,
+ uint id)
{
- if(sd->object == OBJECT_NONE) {
- return attribute_not_found();
- }
-
- /* for SVM, find attribute by unique id */
- uint attr_offset = object_attribute_map_offset(kg, sd->object);
- attr_offset += attribute_primitive_type(kg, sd);
- uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
-
- while(attr_map.x != id) {
- if(UNLIKELY(attr_map.x == ATTR_STD_NONE)) {
- return attribute_not_found();
- }
- attr_offset += ATTR_PRIM_TYPES;
- attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
- }
-
- AttributeDescriptor desc;
- desc.element = (AttributeElement)attr_map.y;
-
- if(sd->prim == PRIM_NONE &&
- desc.element != ATTR_ELEMENT_MESH &&
- desc.element != ATTR_ELEMENT_VOXEL &&
- desc.element != ATTR_ELEMENT_OBJECT)
- {
- return attribute_not_found();
- }
-
- /* return result */
- desc.offset = (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z;
- desc.type = (NodeAttributeType)(attr_map.w & 0xff);
- desc.flags = (AttributeFlag)(attr_map.w >> 8);
-
- return desc;
+ if (sd->object == OBJECT_NONE) {
+ return attribute_not_found();
+ }
+
+ /* for SVM, find attribute by unique id */
+ uint attr_offset = object_attribute_map_offset(kg, sd->object);
+ attr_offset += attribute_primitive_type(kg, sd);
+ uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
+
+ while (attr_map.x != id) {
+ if (UNLIKELY(attr_map.x == ATTR_STD_NONE)) {
+ return attribute_not_found();
+ }
+ attr_offset += ATTR_PRIM_TYPES;
+ attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
+ }
+
+ AttributeDescriptor desc;
+ desc.element = (AttributeElement)attr_map.y;
+
+ if (sd->prim == PRIM_NONE && desc.element != ATTR_ELEMENT_MESH &&
+ desc.element != ATTR_ELEMENT_VOXEL && desc.element != ATTR_ELEMENT_OBJECT) {
+ return attribute_not_found();
+ }
+
+ /* return result */
+ desc.offset = (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z;
+ desc.type = (NodeAttributeType)(attr_map.w & 0xff);
+ desc.flags = (AttributeFlag)(attr_map.w >> 8);
+
+ return desc;
}
/* Transform matrix attribute on meshes */
-ccl_device Transform primitive_attribute_matrix(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc)
+ccl_device Transform primitive_attribute_matrix(KernelGlobals *kg,
+ const ShaderData *sd,
+ const AttributeDescriptor desc)
{
- Transform tfm;
+ Transform tfm;
- tfm.x = kernel_tex_fetch(__attributes_float3, desc.offset + 0);
- tfm.y = kernel_tex_fetch(__attributes_float3, desc.offset + 1);
- tfm.z = kernel_tex_fetch(__attributes_float3, desc.offset + 2);
+ tfm.x = kernel_tex_fetch(__attributes_float3, desc.offset + 0);
+ tfm.y = kernel_tex_fetch(__attributes_float3, desc.offset + 1);
+ tfm.z = kernel_tex_fetch(__attributes_float3, desc.offset + 2);
- return tfm;
+ return tfm;
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_curve.h b/intern/cycles/kernel/geom/geom_curve.h
index 9b60cf6d56b..e0aacb434eb 100644
--- a/intern/cycles/kernel/geom/geom_curve.h
+++ b/intern/cycles/kernel/geom/geom_curve.h
@@ -27,169 +27,199 @@ CCL_NAMESPACE_BEGIN
ccl_device_inline float3 curvetangent(float t, float3 p0, float3 p1, float3 p2, float3 p3)
{
- float fc = 0.71f;
- float data[4];
- float t2 = t * t;
- data[0] = -3.0f * fc * t2 + 4.0f * fc * t - fc;
- data[1] = 3.0f * (2.0f - fc) * t2 + 2.0f * (fc - 3.0f) * t;
- data[2] = 3.0f * (fc - 2.0f) * t2 + 2.0f * (3.0f - 2.0f * fc) * t + fc;
- data[3] = 3.0f * fc * t2 - 2.0f * fc * t;
- return data[0] * p0 + data[1] * p1 + data[2] * p2 + data[3] * p3;
+ float fc = 0.71f;
+ float data[4];
+ float t2 = t * t;
+ data[0] = -3.0f * fc * t2 + 4.0f * fc * t - fc;
+ data[1] = 3.0f * (2.0f - fc) * t2 + 2.0f * (fc - 3.0f) * t;
+ data[2] = 3.0f * (fc - 2.0f) * t2 + 2.0f * (3.0f - 2.0f * fc) * t + fc;
+ data[3] = 3.0f * fc * t2 - 2.0f * fc * t;
+ return data[0] * p0 + data[1] * p1 + data[2] * p2 + data[3] * p3;
}
ccl_device_inline float3 curvepoint(float t, float3 p0, float3 p1, float3 p2, float3 p3)
{
- float data[4];
- float fc = 0.71f;
- float t2 = t * t;
- float t3 = t2 * t;
- data[0] = -fc * t3 + 2.0f * fc * t2 - fc * t;
- data[1] = (2.0f - fc) * t3 + (fc - 3.0f) * t2 + 1.0f;
- data[2] = (fc - 2.0f) * t3 + (3.0f - 2.0f * fc) * t2 + fc * t;
- data[3] = fc * t3 - fc * t2;
- return data[0] * p0 + data[1] * p1 + data[2] * p2 + data[3] * p3;
+ float data[4];
+ float fc = 0.71f;
+ float t2 = t * t;
+ float t3 = t2 * t;
+ data[0] = -fc * t3 + 2.0f * fc * t2 - fc * t;
+ data[1] = (2.0f - fc) * t3 + (fc - 3.0f) * t2 + 1.0f;
+ data[2] = (fc - 2.0f) * t3 + (3.0f - 2.0f * fc) * t2 + fc * t;
+ data[3] = fc * t3 - fc * t2;
+ return data[0] * p0 + data[1] * p1 + data[2] * p2 + data[3] * p3;
}
/* Reading attributes on various curve elements */
-ccl_device float curve_attribute_float(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy)
+ccl_device float curve_attribute_float(
+ KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy)
{
- if(desc.element == ATTR_ELEMENT_CURVE) {
-#ifdef __RAY_DIFFERENTIALS__
- if(dx) *dx = 0.0f;
- if(dy) *dy = 0.0f;
-#endif
-
- return kernel_tex_fetch(__attributes_float, desc.offset + sd->prim);
- }
- else if(desc.element == ATTR_ELEMENT_CURVE_KEY || desc.element == ATTR_ELEMENT_CURVE_KEY_MOTION) {
- float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
- int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
- int k1 = k0 + 1;
-
- float f0 = kernel_tex_fetch(__attributes_float, desc.offset + k0);
- float f1 = kernel_tex_fetch(__attributes_float, desc.offset + k1);
-
-#ifdef __RAY_DIFFERENTIALS__
- if(dx) *dx = sd->du.dx*(f1 - f0);
- if(dy) *dy = 0.0f;
-#endif
-
- return (1.0f - sd->u)*f0 + sd->u*f1;
- }
- else {
-#ifdef __RAY_DIFFERENTIALS__
- if(dx) *dx = 0.0f;
- if(dy) *dy = 0.0f;
-#endif
-
- return 0.0f;
- }
+ if (desc.element == ATTR_ELEMENT_CURVE) {
+# ifdef __RAY_DIFFERENTIALS__
+ if (dx)
+ *dx = 0.0f;
+ if (dy)
+ *dy = 0.0f;
+# endif
+
+ return kernel_tex_fetch(__attributes_float, desc.offset + sd->prim);
+ }
+ else if (desc.element == ATTR_ELEMENT_CURVE_KEY ||
+ desc.element == ATTR_ELEMENT_CURVE_KEY_MOTION) {
+ float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
+ int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
+ int k1 = k0 + 1;
+
+ float f0 = kernel_tex_fetch(__attributes_float, desc.offset + k0);
+ float f1 = kernel_tex_fetch(__attributes_float, desc.offset + k1);
+
+# ifdef __RAY_DIFFERENTIALS__
+ if (dx)
+ *dx = sd->du.dx * (f1 - f0);
+ if (dy)
+ *dy = 0.0f;
+# endif
+
+ return (1.0f - sd->u) * f0 + sd->u * f1;
+ }
+ else {
+# ifdef __RAY_DIFFERENTIALS__
+ if (dx)
+ *dx = 0.0f;
+ if (dy)
+ *dy = 0.0f;
+# endif
+
+ return 0.0f;
+ }
}
-ccl_device float2 curve_attribute_float2(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float2 *dx, float2 *dy)
+ccl_device float2 curve_attribute_float2(KernelGlobals *kg,
+ const ShaderData *sd,
+ const AttributeDescriptor desc,
+ float2 *dx,
+ float2 *dy)
{
- if(desc.element == ATTR_ELEMENT_CURVE) {
- /* idea: we can't derive any useful differentials here, but for tiled
- * mipmap image caching it would be useful to avoid reading the highest
- * detail level always. maybe a derivative based on the hair density
- * could be computed somehow? */
-#ifdef __RAY_DIFFERENTIALS__
- if(dx) *dx = make_float2(0.0f, 0.0f);
- if(dy) *dy = make_float2(0.0f, 0.0f);
-#endif
-
- return kernel_tex_fetch(__attributes_float2, desc.offset + sd->prim);
- }
- else if(desc.element == ATTR_ELEMENT_CURVE_KEY || desc.element == ATTR_ELEMENT_CURVE_KEY_MOTION) {
- float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
- int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
- int k1 = k0 + 1;
-
- float2 f0 = kernel_tex_fetch(__attributes_float2, desc.offset + k0);
- float2 f1 = kernel_tex_fetch(__attributes_float2, desc.offset + k1);
-
-#ifdef __RAY_DIFFERENTIALS__
- if(dx) *dx = sd->du.dx*(f1 - f0);
- if(dy) *dy = make_float2(0.0f, 0.0f);
-#endif
-
- return (1.0f - sd->u)*f0 + sd->u*f1;
- }
- else {
-#ifdef __RAY_DIFFERENTIALS__
- if(dx) *dx = make_float2(0.0f, 0.0f);
- if(dy) *dy = make_float2(0.0f, 0.0f);
-#endif
-
- return make_float2(0.0f, 0.0f);
- }
+ if (desc.element == ATTR_ELEMENT_CURVE) {
+ /* idea: we can't derive any useful differentials here, but for tiled
+ * mipmap image caching it would be useful to avoid reading the highest
+ * detail level always. maybe a derivative based on the hair density
+ * could be computed somehow? */
+# ifdef __RAY_DIFFERENTIALS__
+ if (dx)
+ *dx = make_float2(0.0f, 0.0f);
+ if (dy)
+ *dy = make_float2(0.0f, 0.0f);
+# endif
+
+ return kernel_tex_fetch(__attributes_float2, desc.offset + sd->prim);
+ }
+ else if (desc.element == ATTR_ELEMENT_CURVE_KEY ||
+ desc.element == ATTR_ELEMENT_CURVE_KEY_MOTION) {
+ float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
+ int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
+ int k1 = k0 + 1;
+
+ float2 f0 = kernel_tex_fetch(__attributes_float2, desc.offset + k0);
+ float2 f1 = kernel_tex_fetch(__attributes_float2, desc.offset + k1);
+
+# ifdef __RAY_DIFFERENTIALS__
+ if (dx)
+ *dx = sd->du.dx * (f1 - f0);
+ if (dy)
+ *dy = make_float2(0.0f, 0.0f);
+# endif
+
+ return (1.0f - sd->u) * f0 + sd->u * f1;
+ }
+ else {
+# ifdef __RAY_DIFFERENTIALS__
+ if (dx)
+ *dx = make_float2(0.0f, 0.0f);
+ if (dy)
+ *dy = make_float2(0.0f, 0.0f);
+# endif
+
+ return make_float2(0.0f, 0.0f);
+ }
}
-ccl_device float3 curve_attribute_float3(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float3 *dx, float3 *dy)
+ccl_device float3 curve_attribute_float3(KernelGlobals *kg,
+ const ShaderData *sd,
+ const AttributeDescriptor desc,
+ float3 *dx,
+ float3 *dy)
{
- if(desc.element == ATTR_ELEMENT_CURVE) {
- /* idea: we can't derive any useful differentials here, but for tiled
- * mipmap image caching it would be useful to avoid reading the highest
- * detail level always. maybe a derivative based on the hair density
- * could be computed somehow? */
-#ifdef __RAY_DIFFERENTIALS__
- if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
- if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
-#endif
-
- return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + sd->prim));
- }
- else if(desc.element == ATTR_ELEMENT_CURVE_KEY || desc.element == ATTR_ELEMENT_CURVE_KEY_MOTION) {
- float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
- int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
- int k1 = k0 + 1;
-
- float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + k0));
- float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + k1));
-
-#ifdef __RAY_DIFFERENTIALS__
- if(dx) *dx = sd->du.dx*(f1 - f0);
- if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
-#endif
-
- return (1.0f - sd->u)*f0 + sd->u*f1;
- }
- else {
-#ifdef __RAY_DIFFERENTIALS__
- if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
- if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
-#endif
-
- return make_float3(0.0f, 0.0f, 0.0f);
- }
+ if (desc.element == ATTR_ELEMENT_CURVE) {
+ /* idea: we can't derive any useful differentials here, but for tiled
+ * mipmap image caching it would be useful to avoid reading the highest
+ * detail level always. maybe a derivative based on the hair density
+ * could be computed somehow? */
+# ifdef __RAY_DIFFERENTIALS__
+ if (dx)
+ *dx = make_float3(0.0f, 0.0f, 0.0f);
+ if (dy)
+ *dy = make_float3(0.0f, 0.0f, 0.0f);
+# endif
+
+ return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + sd->prim));
+ }
+ else if (desc.element == ATTR_ELEMENT_CURVE_KEY ||
+ desc.element == ATTR_ELEMENT_CURVE_KEY_MOTION) {
+ float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
+ int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
+ int k1 = k0 + 1;
+
+ float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + k0));
+ float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + k1));
+
+# ifdef __RAY_DIFFERENTIALS__
+ if (dx)
+ *dx = sd->du.dx * (f1 - f0);
+ if (dy)
+ *dy = make_float3(0.0f, 0.0f, 0.0f);
+# endif
+
+ return (1.0f - sd->u) * f0 + sd->u * f1;
+ }
+ else {
+# ifdef __RAY_DIFFERENTIALS__
+ if (dx)
+ *dx = make_float3(0.0f, 0.0f, 0.0f);
+ if (dy)
+ *dy = make_float3(0.0f, 0.0f, 0.0f);
+# endif
+
+ return make_float3(0.0f, 0.0f, 0.0f);
+ }
}
/* Curve thickness */
ccl_device float curve_thickness(KernelGlobals *kg, ShaderData *sd)
{
- float r = 0.0f;
+ float r = 0.0f;
- if(sd->type & PRIMITIVE_ALL_CURVE) {
- float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
- int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
- int k1 = k0 + 1;
+ if (sd->type & PRIMITIVE_ALL_CURVE) {
+ float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
+ int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
+ int k1 = k0 + 1;
- float4 P_curve[2];
+ float4 P_curve[2];
- if(sd->type & PRIMITIVE_CURVE) {
- P_curve[0]= kernel_tex_fetch(__curve_keys, k0);
- P_curve[1]= kernel_tex_fetch(__curve_keys, k1);
- }
- else {
- motion_curve_keys(kg, sd->object, sd->prim, sd->time, k0, k1, P_curve);
- }
+ if (sd->type & PRIMITIVE_CURVE) {
+ P_curve[0] = kernel_tex_fetch(__curve_keys, k0);
+ P_curve[1] = kernel_tex_fetch(__curve_keys, k1);
+ }
+ else {
+ motion_curve_keys(kg, sd->object, sd->prim, sd->time, k0, k1, P_curve);
+ }
- r = (P_curve[1].w - P_curve[0].w) * sd->u + P_curve[0].w;
- }
+ r = (P_curve[1].w - P_curve[0].w) * sd->u + P_curve[0].w;
+ }
- return r*2.0f;
+ return r * 2.0f;
}
/* Curve location for motion pass, linear interpolation between keys and
@@ -197,89 +227,98 @@ ccl_device float curve_thickness(KernelGlobals *kg, ShaderData *sd)
ccl_device float3 curve_motion_center_location(KernelGlobals *kg, ShaderData *sd)
{
- float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
- int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
- int k1 = k0 + 1;
+ float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
+ int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
+ int k1 = k0 + 1;
- float4 P_curve[2];
+ float4 P_curve[2];
- P_curve[0]= kernel_tex_fetch(__curve_keys, k0);
- P_curve[1]= kernel_tex_fetch(__curve_keys, k1);
+ P_curve[0] = kernel_tex_fetch(__curve_keys, k0);
+ P_curve[1] = kernel_tex_fetch(__curve_keys, k1);
- return float4_to_float3(P_curve[1]) * sd->u + float4_to_float3(P_curve[0]) * (1.0f - sd->u);
+ return float4_to_float3(P_curve[1]) * sd->u + float4_to_float3(P_curve[0]) * (1.0f - sd->u);
}
/* Curve tangent normal */
ccl_device float3 curve_tangent_normal(KernelGlobals *kg, ShaderData *sd)
{
- float3 tgN = make_float3(0.0f,0.0f,0.0f);
+ float3 tgN = make_float3(0.0f, 0.0f, 0.0f);
- if(sd->type & PRIMITIVE_ALL_CURVE) {
+ if (sd->type & PRIMITIVE_ALL_CURVE) {
- tgN = -(-sd->I - sd->dPdu * (dot(sd->dPdu,-sd->I) / len_squared(sd->dPdu)));
- tgN = normalize(tgN);
+ tgN = -(-sd->I - sd->dPdu * (dot(sd->dPdu, -sd->I) / len_squared(sd->dPdu)));
+ tgN = normalize(tgN);
- /* need to find suitable scaled gd for corrected normal */
-#if 0
- tgN = normalize(tgN - gd * sd->dPdu);
-#endif
- }
+ /* need to find suitable scaled gd for corrected normal */
+# if 0
+ tgN = normalize(tgN - gd * sd->dPdu);
+# endif
+ }
- return tgN;
+ return tgN;
}
/* Curve bounds utility function */
-ccl_device_inline void curvebounds(float *lower, float *upper, float *extremta, float *extrema, float *extremtb, float *extremb, float p0, float p1, float p2, float p3)
+ccl_device_inline void curvebounds(float *lower,
+ float *upper,
+ float *extremta,
+ float *extrema,
+ float *extremtb,
+ float *extremb,
+ float p0,
+ float p1,
+ float p2,
+ float p3)
{
- float halfdiscroot = (p2 * p2 - 3 * p3 * p1);
- float ta = -1.0f;
- float tb = -1.0f;
-
- *extremta = -1.0f;
- *extremtb = -1.0f;
- *upper = p0;
- *lower = (p0 + p1) + (p2 + p3);
- *extrema = *upper;
- *extremb = *lower;
-
- if(*lower >= *upper) {
- *upper = *lower;
- *lower = p0;
- }
-
- if(halfdiscroot >= 0) {
- float inv3p3 = (1.0f/3.0f)/p3;
- halfdiscroot = sqrtf(halfdiscroot);
- ta = (-p2 - halfdiscroot) * inv3p3;
- tb = (-p2 + halfdiscroot) * inv3p3;
- }
-
- float t2;
- float t3;
-
- if(ta > 0.0f && ta < 1.0f) {
- t2 = ta * ta;
- t3 = t2 * ta;
- *extremta = ta;
- *extrema = p3 * t3 + p2 * t2 + p1 * ta + p0;
-
- *upper = fmaxf(*extrema, *upper);
- *lower = fminf(*extrema, *lower);
- }
-
- if(tb > 0.0f && tb < 1.0f) {
- t2 = tb * tb;
- t3 = t2 * tb;
- *extremtb = tb;
- *extremb = p3 * t3 + p2 * t2 + p1 * tb + p0;
-
- *upper = fmaxf(*extremb, *upper);
- *lower = fminf(*extremb, *lower);
- }
+ float halfdiscroot = (p2 * p2 - 3 * p3 * p1);
+ float ta = -1.0f;
+ float tb = -1.0f;
+
+ *extremta = -1.0f;
+ *extremtb = -1.0f;
+ *upper = p0;
+ *lower = (p0 + p1) + (p2 + p3);
+ *extrema = *upper;
+ *extremb = *lower;
+
+ if (*lower >= *upper) {
+ *upper = *lower;
+ *lower = p0;
+ }
+
+ if (halfdiscroot >= 0) {
+ float inv3p3 = (1.0f / 3.0f) / p3;
+ halfdiscroot = sqrtf(halfdiscroot);
+ ta = (-p2 - halfdiscroot) * inv3p3;
+ tb = (-p2 + halfdiscroot) * inv3p3;
+ }
+
+ float t2;
+ float t3;
+
+ if (ta > 0.0f && ta < 1.0f) {
+ t2 = ta * ta;
+ t3 = t2 * ta;
+ *extremta = ta;
+ *extrema = p3 * t3 + p2 * t2 + p1 * ta + p0;
+
+ *upper = fmaxf(*extrema, *upper);
+ *lower = fminf(*extrema, *lower);
+ }
+
+ if (tb > 0.0f && tb < 1.0f) {
+ t2 = tb * tb;
+ t3 = t2 * tb;
+ *extremtb = tb;
+ *extremb = p3 * t3 + p2 * t2 + p1 * tb + p0;
+
+ *upper = fmaxf(*extremb, *upper);
+ *lower = fminf(*extremb, *lower);
+ }
}
-#endif /* __HAIR__ */
+#endif /* __HAIR__ */
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_curve_intersect.h b/intern/cycles/kernel/geom/geom_curve_intersect.h
index 5cf8713e3a8..5fd277c2f99 100644
--- a/intern/cycles/kernel/geom/geom_curve_intersect.h
+++ b/intern/cycles/kernel/geom/geom_curve_intersect.h
@@ -18,484 +18,534 @@ CCL_NAMESPACE_BEGIN
#ifdef __HAIR__
-#ifdef __KERNEL_SSE2__
+# ifdef __KERNEL_SSE2__
ccl_device_inline ssef transform_point_T3(const ssef t[3], const ssef &a)
{
- return madd(shuffle<0>(a), t[0], madd(shuffle<1>(a), t[1], shuffle<2>(a) * t[2]));
+ return madd(shuffle<0>(a), t[0], madd(shuffle<1>(a), t[1], shuffle<2>(a) * t[2]));
}
-#endif
+# endif
/* On CPU pass P and dir by reference to aligned vector. */
-ccl_device_forceinline bool cardinal_curve_intersect(
- KernelGlobals *kg,
- Intersection *isect,
- const float3 ccl_ref P,
- const float3 ccl_ref dir,
- uint visibility,
- int object,
- int curveAddr,
- float time,
- int type,
- uint *lcg_state,
- float difl,
- float extmax)
+ccl_device_forceinline bool cardinal_curve_intersect(KernelGlobals *kg,
+ Intersection *isect,
+ const float3 ccl_ref P,
+ const float3 ccl_ref dir,
+ uint visibility,
+ int object,
+ int curveAddr,
+ float time,
+ int type,
+ uint *lcg_state,
+ float difl,
+ float extmax)
{
- const bool is_curve_primitive = (type & PRIMITIVE_CURVE);
-
- if(!is_curve_primitive && kernel_data.bvh.use_bvh_steps) {
- const float2 prim_time = kernel_tex_fetch(__prim_time, curveAddr);
- if(time < prim_time.x || time > prim_time.y) {
- return false;
- }
- }
-
- int segment = PRIMITIVE_UNPACK_SEGMENT(type);
- float epsilon = 0.0f;
- float r_st, r_en;
-
- int depth = kernel_data.curve.subdivisions;
- int flags = kernel_data.curve.curveflags;
- int prim = kernel_tex_fetch(__prim_index, curveAddr);
-
-#ifdef __KERNEL_SSE2__
- ssef vdir = load4f(dir);
- ssef vcurve_coef[4];
- const float3 *curve_coef = (float3 *)vcurve_coef;
-
- {
- ssef dtmp = vdir * vdir;
- ssef d_ss = mm_sqrt(dtmp + shuffle<2>(dtmp));
- ssef rd_ss = load1f_first(1.0f) / d_ss;
-
- ssei v00vec = load4i((ssei *)&kg->__curves.data[prim]);
- int2 &v00 = (int2 &)v00vec;
-
- int k0 = v00.x + segment;
- int k1 = k0 + 1;
- int ka = max(k0 - 1, v00.x);
- int kb = min(k1 + 1, v00.x + v00.y - 1);
-
-#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__) && (!defined(_MSC_VER) || _MSC_VER > 1800)
- avxf P_curve_0_1, P_curve_2_3;
- if(is_curve_primitive) {
- P_curve_0_1 = _mm256_loadu2_m128(&kg->__curve_keys.data[k0].x, &kg->__curve_keys.data[ka].x);
- P_curve_2_3 = _mm256_loadu2_m128(&kg->__curve_keys.data[kb].x, &kg->__curve_keys.data[k1].x);
- }
- else {
- int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, curveAddr) : object;
- motion_cardinal_curve_keys_avx(kg, fobject, prim, time, ka, k0, k1, kb, &P_curve_0_1,&P_curve_2_3);
- }
-#else /* __KERNEL_AVX2__ */
- ssef P_curve[4];
-
- if(is_curve_primitive) {
- P_curve[0] = load4f(&kg->__curve_keys.data[ka].x);
- P_curve[1] = load4f(&kg->__curve_keys.data[k0].x);
- P_curve[2] = load4f(&kg->__curve_keys.data[k1].x);
- P_curve[3] = load4f(&kg->__curve_keys.data[kb].x);
- }
- else {
- int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, curveAddr): object;
- motion_cardinal_curve_keys(kg, fobject, prim, time, ka, k0, k1, kb, (float4*)&P_curve);
- }
-#endif /* __KERNEL_AVX2__ */
-
- ssef rd_sgn = set_sign_bit<0, 1, 1, 1>(shuffle<0>(rd_ss));
- ssef mul_zxxy = shuffle<2, 0, 0, 1>(vdir) * rd_sgn;
- ssef mul_yz = shuffle<1, 2, 1, 2>(vdir) * mul_zxxy;
- ssef mul_shuf = shuffle<0, 1, 2, 3>(mul_zxxy, mul_yz);
- ssef vdir0 = vdir & cast(ssei(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0));
-
- ssef htfm0 = shuffle<0, 2, 0, 3>(mul_shuf, vdir0);
- ssef htfm1 = shuffle<1, 0, 1, 3>(load1f_first(extract<0>(d_ss)), vdir0);
- ssef htfm2 = shuffle<1, 3, 2, 3>(mul_shuf, vdir0);
-
-#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__) && (!defined(_MSC_VER) || _MSC_VER > 1800)
- const avxf vPP = _mm256_broadcast_ps(&P.m128);
- const avxf htfm00 = avxf(htfm0.m128, htfm0.m128);
- const avxf htfm11 = avxf(htfm1.m128, htfm1.m128);
- const avxf htfm22 = avxf(htfm2.m128, htfm2.m128);
-
- const avxf p01 = madd(shuffle<0>(P_curve_0_1 - vPP),
- htfm00,
- madd(shuffle<1>(P_curve_0_1 - vPP),
- htfm11,
- shuffle<2>(P_curve_0_1 - vPP) * htfm22));
- const avxf p23 = madd(shuffle<0>(P_curve_2_3 - vPP),
- htfm00,
- madd(shuffle<1>(P_curve_2_3 - vPP),
- htfm11,
- shuffle<2>(P_curve_2_3 - vPP)*htfm22));
-
- const ssef p0 = _mm256_castps256_ps128(p01);
- const ssef p1 = _mm256_extractf128_ps(p01, 1);
- const ssef p2 = _mm256_castps256_ps128(p23);
- const ssef p3 = _mm256_extractf128_ps(p23, 1);
-
- const ssef P_curve_1 = _mm256_extractf128_ps(P_curve_0_1, 1);
- r_st = ((float4 &)P_curve_1).w;
- const ssef P_curve_2 = _mm256_castps256_ps128(P_curve_2_3);
- r_en = ((float4 &)P_curve_2).w;
-#else /* __KERNEL_AVX2__ */
- ssef htfm[] = { htfm0, htfm1, htfm2 };
- ssef vP = load4f(P);
- ssef p0 = transform_point_T3(htfm, P_curve[0] - vP);
- ssef p1 = transform_point_T3(htfm, P_curve[1] - vP);
- ssef p2 = transform_point_T3(htfm, P_curve[2] - vP);
- ssef p3 = transform_point_T3(htfm, P_curve[3] - vP);
-
- r_st = ((float4 &)P_curve[1]).w;
- r_en = ((float4 &)P_curve[2]).w;
-#endif /* __KERNEL_AVX2__ */
-
- float fc = 0.71f;
- ssef vfc = ssef(fc);
- ssef vfcxp3 = vfc * p3;
-
- vcurve_coef[0] = p1;
- vcurve_coef[1] = vfc * (p2 - p0);
- vcurve_coef[2] = madd(ssef(fc * 2.0f), p0, madd(ssef(fc - 3.0f), p1, msub(ssef(3.0f - 2.0f * fc), p2, vfcxp3)));
- vcurve_coef[3] = msub(ssef(fc - 2.0f), p2 - p1, msub(vfc, p0, vfcxp3));
-
- }
-#else
- float3 curve_coef[4];
-
- /* curve Intersection check */
- /* obtain curve parameters */
- {
- /* ray transform created - this should be created at beginning of intersection loop */
- Transform htfm;
- float d = sqrtf(dir.x * dir.x + dir.z * dir.z);
- htfm = make_transform(
- dir.z / d, 0, -dir.x /d, 0,
- -dir.x * dir.y /d, d, -dir.y * dir.z /d, 0,
- dir.x, dir.y, dir.z, 0);
-
- float4 v00 = kernel_tex_fetch(__curves, prim);
-
- int k0 = __float_as_int(v00.x) + segment;
- int k1 = k0 + 1;
-
- int ka = max(k0 - 1,__float_as_int(v00.x));
- int kb = min(k1 + 1,__float_as_int(v00.x) + __float_as_int(v00.y) - 1);
-
- float4 P_curve[4];
-
- if(is_curve_primitive) {
- P_curve[0] = kernel_tex_fetch(__curve_keys, ka);
- P_curve[1] = kernel_tex_fetch(__curve_keys, k0);
- P_curve[2] = kernel_tex_fetch(__curve_keys, k1);
- P_curve[3] = kernel_tex_fetch(__curve_keys, kb);
- }
- else {
- int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, curveAddr): object;
- motion_cardinal_curve_keys(kg, fobject, prim, time, ka, k0, k1, kb, P_curve);
- }
-
- float3 p0 = transform_point(&htfm, float4_to_float3(P_curve[0]) - P);
- float3 p1 = transform_point(&htfm, float4_to_float3(P_curve[1]) - P);
- float3 p2 = transform_point(&htfm, float4_to_float3(P_curve[2]) - P);
- float3 p3 = transform_point(&htfm, float4_to_float3(P_curve[3]) - P);
-
- float fc = 0.71f;
- curve_coef[0] = p1;
- curve_coef[1] = -fc*p0 + fc*p2;
- curve_coef[2] = 2.0f * fc * p0 + (fc - 3.0f) * p1 + (3.0f - 2.0f * fc) * p2 - fc * p3;
- curve_coef[3] = -fc * p0 + (2.0f - fc) * p1 + (fc - 2.0f) * p2 + fc * p3;
- r_st = P_curve[1].w;
- r_en = P_curve[2].w;
- }
-#endif
-
- float r_curr = max(r_st, r_en);
-
- if((flags & CURVE_KN_RIBBONS) || !(flags & CURVE_KN_BACKFACING))
- epsilon = 2 * r_curr;
-
- /* find bounds - this is slow for cubic curves */
- float upper, lower;
-
- float zextrem[4];
- curvebounds(&lower, &upper, &zextrem[0], &zextrem[1], &zextrem[2], &zextrem[3], curve_coef[0].z, curve_coef[1].z, curve_coef[2].z, curve_coef[3].z);
- if(lower - r_curr > isect->t || upper + r_curr < epsilon)
- return false;
-
- /* minimum width extension */
- float mw_extension = min(difl * fabsf(upper), extmax);
- float r_ext = mw_extension + r_curr;
-
- float xextrem[4];
- curvebounds(&lower, &upper, &xextrem[0], &xextrem[1], &xextrem[2], &xextrem[3], curve_coef[0].x, curve_coef[1].x, curve_coef[2].x, curve_coef[3].x);
- if(lower > r_ext || upper < -r_ext)
- return false;
-
- float yextrem[4];
- curvebounds(&lower, &upper, &yextrem[0], &yextrem[1], &yextrem[2], &yextrem[3], curve_coef[0].y, curve_coef[1].y, curve_coef[2].y, curve_coef[3].y);
- if(lower > r_ext || upper < -r_ext)
- return false;
-
- /* setup recurrent loop */
- int level = 1 << depth;
- int tree = 0;
- float resol = 1.0f / (float)level;
- bool hit = false;
-
- /* begin loop */
- while(!(tree >> (depth))) {
- const float i_st = tree * resol;
- const float i_en = i_st + (level * resol);
-
-#ifdef __KERNEL_SSE2__
- ssef vi_st = ssef(i_st), vi_en = ssef(i_en);
- ssef vp_st = madd(madd(madd(vcurve_coef[3], vi_st, vcurve_coef[2]), vi_st, vcurve_coef[1]), vi_st, vcurve_coef[0]);
- ssef vp_en = madd(madd(madd(vcurve_coef[3], vi_en, vcurve_coef[2]), vi_en, vcurve_coef[1]), vi_en, vcurve_coef[0]);
-
- ssef vbmin = min(vp_st, vp_en);
- ssef vbmax = max(vp_st, vp_en);
-
- float3 &bmin = (float3 &)vbmin, &bmax = (float3 &)vbmax;
- float &bminx = bmin.x, &bminy = bmin.y, &bminz = bmin.z;
- float &bmaxx = bmax.x, &bmaxy = bmax.y, &bmaxz = bmax.z;
- float3 &p_st = (float3 &)vp_st, &p_en = (float3 &)vp_en;
-#else
- float3 p_st = ((curve_coef[3] * i_st + curve_coef[2]) * i_st + curve_coef[1]) * i_st + curve_coef[0];
- float3 p_en = ((curve_coef[3] * i_en + curve_coef[2]) * i_en + curve_coef[1]) * i_en + curve_coef[0];
-
- float bminx = min(p_st.x, p_en.x);
- float bmaxx = max(p_st.x, p_en.x);
- float bminy = min(p_st.y, p_en.y);
- float bmaxy = max(p_st.y, p_en.y);
- float bminz = min(p_st.z, p_en.z);
- float bmaxz = max(p_st.z, p_en.z);
-#endif
-
- if(xextrem[0] >= i_st && xextrem[0] <= i_en) {
- bminx = min(bminx,xextrem[1]);
- bmaxx = max(bmaxx,xextrem[1]);
- }
- if(xextrem[2] >= i_st && xextrem[2] <= i_en) {
- bminx = min(bminx,xextrem[3]);
- bmaxx = max(bmaxx,xextrem[3]);
- }
- if(yextrem[0] >= i_st && yextrem[0] <= i_en) {
- bminy = min(bminy,yextrem[1]);
- bmaxy = max(bmaxy,yextrem[1]);
- }
- if(yextrem[2] >= i_st && yextrem[2] <= i_en) {
- bminy = min(bminy,yextrem[3]);
- bmaxy = max(bmaxy,yextrem[3]);
- }
- if(zextrem[0] >= i_st && zextrem[0] <= i_en) {
- bminz = min(bminz,zextrem[1]);
- bmaxz = max(bmaxz,zextrem[1]);
- }
- if(zextrem[2] >= i_st && zextrem[2] <= i_en) {
- bminz = min(bminz,zextrem[3]);
- bmaxz = max(bmaxz,zextrem[3]);
- }
-
- float r1 = r_st + (r_en - r_st) * i_st;
- float r2 = r_st + (r_en - r_st) * i_en;
- r_curr = max(r1, r2);
-
- mw_extension = min(difl * fabsf(bmaxz), extmax);
- float r_ext = mw_extension + r_curr;
- float coverage = 1.0f;
-
- if(bminz - r_curr > isect->t || bmaxz + r_curr < epsilon || bminx > r_ext|| bmaxx < -r_ext|| bminy > r_ext|| bmaxy < -r_ext) {
- /* the bounding box does not overlap the square centered at O */
- tree += level;
- level = tree & -tree;
- }
- else if(level == 1) {
-
- /* the maximum recursion depth is reached.
- * check if dP0.(Q-P0)>=0 and dPn.(Pn-Q)>=0.
- * dP* is reversed if necessary.*/
- float t = isect->t;
- float u = 0.0f;
- float gd = 0.0f;
-
- if(flags & CURVE_KN_RIBBONS) {
- float3 tg = (p_en - p_st);
-#ifdef __KERNEL_SSE__
- const float3 tg_sq = tg * tg;
- float w = tg_sq.x + tg_sq.y;
-#else
- float w = tg.x * tg.x + tg.y * tg.y;
-#endif
- if(w == 0) {
- tree++;
- level = tree & -tree;
- continue;
- }
-#ifdef __KERNEL_SSE__
- const float3 p_sttg = p_st * tg;
- w = -(p_sttg.x + p_sttg.y) / w;
-#else
- w = -(p_st.x * tg.x + p_st.y * tg.y) / w;
-#endif
- w = saturate(w);
-
- /* compute u on the curve segment */
- u = i_st * (1 - w) + i_en * w;
- r_curr = r_st + (r_en - r_st) * u;
- /* compare x-y distances */
- float3 p_curr = ((curve_coef[3] * u + curve_coef[2]) * u + curve_coef[1]) * u + curve_coef[0];
-
- float3 dp_st = (3 * curve_coef[3] * i_st + 2 * curve_coef[2]) * i_st + curve_coef[1];
- if(dot(tg, dp_st)< 0)
- dp_st *= -1;
- if(dot(dp_st, -p_st) + p_curr.z * dp_st.z < 0) {
- tree++;
- level = tree & -tree;
- continue;
- }
- float3 dp_en = (3 * curve_coef[3] * i_en + 2 * curve_coef[2]) * i_en + curve_coef[1];
- if(dot(tg, dp_en) < 0)
- dp_en *= -1;
- if(dot(dp_en, p_en) - p_curr.z * dp_en.z < 0) {
- tree++;
- level = tree & -tree;
- continue;
- }
-
- /* compute coverage */
- float r_ext = r_curr;
- coverage = 1.0f;
- if(difl != 0.0f) {
- mw_extension = min(difl * fabsf(bmaxz), extmax);
- r_ext = mw_extension + r_curr;
-#ifdef __KERNEL_SSE__
- const float3 p_curr_sq = p_curr * p_curr;
- const float3 dxxx(_mm_sqrt_ss(_mm_hadd_ps(p_curr_sq.m128, p_curr_sq.m128)));
- float d = dxxx.x;
-#else
- float d = sqrtf(p_curr.x * p_curr.x + p_curr.y * p_curr.y);
-#endif
- float d0 = d - r_curr;
- float d1 = d + r_curr;
- float inv_mw_extension = 1.0f/mw_extension;
- if(d0 >= 0)
- coverage = (min(d1 * inv_mw_extension, 1.0f) - min(d0 * inv_mw_extension, 1.0f)) * 0.5f;
- else // inside
- coverage = (min(d1 * inv_mw_extension, 1.0f) + min(-d0 * inv_mw_extension, 1.0f)) * 0.5f;
- }
-
- if(p_curr.x * p_curr.x + p_curr.y * p_curr.y >= r_ext * r_ext || p_curr.z <= epsilon || isect->t < p_curr.z) {
- tree++;
- level = tree & -tree;
- continue;
- }
-
- t = p_curr.z;
-
- /* stochastic fade from minimum width */
- if(difl != 0.0f && lcg_state) {
- if(coverage != 1.0f && (lcg_step_float(lcg_state) > coverage))
- return hit;
- }
- }
- else {
- float l = len(p_en - p_st);
- /* minimum width extension */
- float or1 = r1;
- float or2 = r2;
-
- if(difl != 0.0f) {
- mw_extension = min(len(p_st - P) * difl, extmax);
- or1 = r1 < mw_extension ? mw_extension : r1;
- mw_extension = min(len(p_en - P) * difl, extmax);
- or2 = r2 < mw_extension ? mw_extension : r2;
- }
- /* --- */
- float invl = 1.0f/l;
- float3 tg = (p_en - p_st) * invl;
- gd = (or2 - or1) * invl;
- float difz = -dot(p_st,tg);
- float cyla = 1.0f - (tg.z * tg.z * (1 + gd*gd));
- float invcyla = 1.0f/cyla;
- float halfb = (-p_st.z - tg.z*(difz + gd*(difz*gd + or1)));
- float tcentre = -halfb*invcyla;
- float zcentre = difz + (tg.z * tcentre);
- float3 tdif = - p_st;
- tdif.z += tcentre;
- float tdifz = dot(tdif,tg);
- float tb = 2*(tdif.z - tg.z*(tdifz + gd*(tdifz*gd + or1)));
- float tc = dot(tdif,tdif) - tdifz * tdifz * (1 + gd*gd) - or1*or1 - 2*or1*tdifz*gd;
- float td = tb*tb - 4*cyla*tc;
- if(td < 0.0f) {
- tree++;
- level = tree & -tree;
- continue;
- }
-
- float rootd = sqrtf(td);
- float correction = (-tb - rootd) * 0.5f * invcyla;
- t = tcentre + correction;
-
- float3 dp_st = (3 * curve_coef[3] * i_st + 2 * curve_coef[2]) * i_st + curve_coef[1];
- if(dot(tg, dp_st)< 0)
- dp_st *= -1;
- float3 dp_en = (3 * curve_coef[3] * i_en + 2 * curve_coef[2]) * i_en + curve_coef[1];
- if(dot(tg, dp_en) < 0)
- dp_en *= -1;
-
- if(flags & CURVE_KN_BACKFACING && (dot(dp_st, -p_st) + t * dp_st.z < 0 || dot(dp_en, p_en) - t * dp_en.z < 0 || isect->t < t || t <= 0.0f)) {
- correction = (-tb + rootd) * 0.5f * invcyla;
- t = tcentre + correction;
- }
-
- if(dot(dp_st, -p_st) + t * dp_st.z < 0 || dot(dp_en, p_en) - t * dp_en.z < 0 || isect->t < t || t <= 0.0f) {
- tree++;
- level = tree & -tree;
- continue;
- }
-
- float w = (zcentre + (tg.z * correction)) * invl;
- w = saturate(w);
- /* compute u on the curve segment */
- u = i_st * (1 - w) + i_en * w;
-
- /* stochastic fade from minimum width */
- if(difl != 0.0f && lcg_state) {
- r_curr = r1 + (r2 - r1) * w;
- r_ext = or1 + (or2 - or1) * w;
- coverage = r_curr/r_ext;
-
- if(coverage != 1.0f && (lcg_step_float(lcg_state) > coverage))
- return hit;
- }
- }
- /* we found a new intersection */
-
-#ifdef __VISIBILITY_FLAG__
- /* visibility flag test. we do it here under the assumption
- * that most triangles are culled by node flags */
- if(kernel_tex_fetch(__prim_visibility, curveAddr) & visibility)
-#endif
- {
- /* record intersection */
- isect->t = t;
- isect->u = u;
- isect->v = gd;
- isect->prim = curveAddr;
- isect->object = object;
- isect->type = type;
- hit = true;
- }
-
- tree++;
- level = tree & -tree;
- }
- else {
- /* split the curve into two curves and process */
- level = level >> 1;
- }
- }
-
- return hit;
+ const bool is_curve_primitive = (type & PRIMITIVE_CURVE);
+
+ if (!is_curve_primitive && kernel_data.bvh.use_bvh_steps) {
+ const float2 prim_time = kernel_tex_fetch(__prim_time, curveAddr);
+ if (time < prim_time.x || time > prim_time.y) {
+ return false;
+ }
+ }
+
+ int segment = PRIMITIVE_UNPACK_SEGMENT(type);
+ float epsilon = 0.0f;
+ float r_st, r_en;
+
+ int depth = kernel_data.curve.subdivisions;
+ int flags = kernel_data.curve.curveflags;
+ int prim = kernel_tex_fetch(__prim_index, curveAddr);
+
+# ifdef __KERNEL_SSE2__
+ ssef vdir = load4f(dir);
+ ssef vcurve_coef[4];
+ const float3 *curve_coef = (float3 *)vcurve_coef;
+
+ {
+ ssef dtmp = vdir * vdir;
+ ssef d_ss = mm_sqrt(dtmp + shuffle<2>(dtmp));
+ ssef rd_ss = load1f_first(1.0f) / d_ss;
+
+ ssei v00vec = load4i((ssei *)&kg->__curves.data[prim]);
+ int2 &v00 = (int2 &)v00vec;
+
+ int k0 = v00.x + segment;
+ int k1 = k0 + 1;
+ int ka = max(k0 - 1, v00.x);
+ int kb = min(k1 + 1, v00.x + v00.y - 1);
+
+# if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__) && \
+ (!defined(_MSC_VER) || _MSC_VER > 1800)
+ avxf P_curve_0_1, P_curve_2_3;
+ if (is_curve_primitive) {
+ P_curve_0_1 = _mm256_loadu2_m128(&kg->__curve_keys.data[k0].x, &kg->__curve_keys.data[ka].x);
+ P_curve_2_3 = _mm256_loadu2_m128(&kg->__curve_keys.data[kb].x, &kg->__curve_keys.data[k1].x);
+ }
+ else {
+ int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, curveAddr) : object;
+ motion_cardinal_curve_keys_avx(
+ kg, fobject, prim, time, ka, k0, k1, kb, &P_curve_0_1, &P_curve_2_3);
+ }
+# else /* __KERNEL_AVX2__ */
+ ssef P_curve[4];
+
+ if (is_curve_primitive) {
+ P_curve[0] = load4f(&kg->__curve_keys.data[ka].x);
+ P_curve[1] = load4f(&kg->__curve_keys.data[k0].x);
+ P_curve[2] = load4f(&kg->__curve_keys.data[k1].x);
+ P_curve[3] = load4f(&kg->__curve_keys.data[kb].x);
+ }
+ else {
+ int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, curveAddr) : object;
+ motion_cardinal_curve_keys(kg, fobject, prim, time, ka, k0, k1, kb, (float4 *)&P_curve);
+ }
+# endif /* __KERNEL_AVX2__ */
+
+ ssef rd_sgn = set_sign_bit<0, 1, 1, 1>(shuffle<0>(rd_ss));
+ ssef mul_zxxy = shuffle<2, 0, 0, 1>(vdir) * rd_sgn;
+ ssef mul_yz = shuffle<1, 2, 1, 2>(vdir) * mul_zxxy;
+ ssef mul_shuf = shuffle<0, 1, 2, 3>(mul_zxxy, mul_yz);
+ ssef vdir0 = vdir & cast(ssei(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0));
+
+ ssef htfm0 = shuffle<0, 2, 0, 3>(mul_shuf, vdir0);
+ ssef htfm1 = shuffle<1, 0, 1, 3>(load1f_first(extract<0>(d_ss)), vdir0);
+ ssef htfm2 = shuffle<1, 3, 2, 3>(mul_shuf, vdir0);
+
+# if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__) && \
+ (!defined(_MSC_VER) || _MSC_VER > 1800)
+ const avxf vPP = _mm256_broadcast_ps(&P.m128);
+ const avxf htfm00 = avxf(htfm0.m128, htfm0.m128);
+ const avxf htfm11 = avxf(htfm1.m128, htfm1.m128);
+ const avxf htfm22 = avxf(htfm2.m128, htfm2.m128);
+
+ const avxf p01 = madd(
+ shuffle<0>(P_curve_0_1 - vPP),
+ htfm00,
+ madd(shuffle<1>(P_curve_0_1 - vPP), htfm11, shuffle<2>(P_curve_0_1 - vPP) * htfm22));
+ const avxf p23 = madd(
+ shuffle<0>(P_curve_2_3 - vPP),
+ htfm00,
+ madd(shuffle<1>(P_curve_2_3 - vPP), htfm11, shuffle<2>(P_curve_2_3 - vPP) * htfm22));
+
+ const ssef p0 = _mm256_castps256_ps128(p01);
+ const ssef p1 = _mm256_extractf128_ps(p01, 1);
+ const ssef p2 = _mm256_castps256_ps128(p23);
+ const ssef p3 = _mm256_extractf128_ps(p23, 1);
+
+ const ssef P_curve_1 = _mm256_extractf128_ps(P_curve_0_1, 1);
+ r_st = ((float4 &)P_curve_1).w;
+ const ssef P_curve_2 = _mm256_castps256_ps128(P_curve_2_3);
+ r_en = ((float4 &)P_curve_2).w;
+# else /* __KERNEL_AVX2__ */
+ ssef htfm[] = {htfm0, htfm1, htfm2};
+ ssef vP = load4f(P);
+ ssef p0 = transform_point_T3(htfm, P_curve[0] - vP);
+ ssef p1 = transform_point_T3(htfm, P_curve[1] - vP);
+ ssef p2 = transform_point_T3(htfm, P_curve[2] - vP);
+ ssef p3 = transform_point_T3(htfm, P_curve[3] - vP);
+
+ r_st = ((float4 &)P_curve[1]).w;
+ r_en = ((float4 &)P_curve[2]).w;
+# endif /* __KERNEL_AVX2__ */
+
+ float fc = 0.71f;
+ ssef vfc = ssef(fc);
+ ssef vfcxp3 = vfc * p3;
+
+ vcurve_coef[0] = p1;
+ vcurve_coef[1] = vfc * (p2 - p0);
+ vcurve_coef[2] = madd(
+ ssef(fc * 2.0f), p0, madd(ssef(fc - 3.0f), p1, msub(ssef(3.0f - 2.0f * fc), p2, vfcxp3)));
+ vcurve_coef[3] = msub(ssef(fc - 2.0f), p2 - p1, msub(vfc, p0, vfcxp3));
+ }
+# else
+ float3 curve_coef[4];
+
+ /* curve Intersection check */
+ /* obtain curve parameters */
+ {
+ /* ray transform created - this should be created at beginning of intersection loop */
+ Transform htfm;
+ float d = sqrtf(dir.x * dir.x + dir.z * dir.z);
+ htfm = make_transform(dir.z / d,
+ 0,
+ -dir.x / d,
+ 0,
+ -dir.x * dir.y / d,
+ d,
+ -dir.y * dir.z / d,
+ 0,
+ dir.x,
+ dir.y,
+ dir.z,
+ 0);
+
+ float4 v00 = kernel_tex_fetch(__curves, prim);
+
+ int k0 = __float_as_int(v00.x) + segment;
+ int k1 = k0 + 1;
+
+ int ka = max(k0 - 1, __float_as_int(v00.x));
+ int kb = min(k1 + 1, __float_as_int(v00.x) + __float_as_int(v00.y) - 1);
+
+ float4 P_curve[4];
+
+ if (is_curve_primitive) {
+ P_curve[0] = kernel_tex_fetch(__curve_keys, ka);
+ P_curve[1] = kernel_tex_fetch(__curve_keys, k0);
+ P_curve[2] = kernel_tex_fetch(__curve_keys, k1);
+ P_curve[3] = kernel_tex_fetch(__curve_keys, kb);
+ }
+ else {
+ int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, curveAddr) : object;
+ motion_cardinal_curve_keys(kg, fobject, prim, time, ka, k0, k1, kb, P_curve);
+ }
+
+ float3 p0 = transform_point(&htfm, float4_to_float3(P_curve[0]) - P);
+ float3 p1 = transform_point(&htfm, float4_to_float3(P_curve[1]) - P);
+ float3 p2 = transform_point(&htfm, float4_to_float3(P_curve[2]) - P);
+ float3 p3 = transform_point(&htfm, float4_to_float3(P_curve[3]) - P);
+
+ float fc = 0.71f;
+ curve_coef[0] = p1;
+ curve_coef[1] = -fc * p0 + fc * p2;
+ curve_coef[2] = 2.0f * fc * p0 + (fc - 3.0f) * p1 + (3.0f - 2.0f * fc) * p2 - fc * p3;
+ curve_coef[3] = -fc * p0 + (2.0f - fc) * p1 + (fc - 2.0f) * p2 + fc * p3;
+ r_st = P_curve[1].w;
+ r_en = P_curve[2].w;
+ }
+# endif
+
+ float r_curr = max(r_st, r_en);
+
+ if ((flags & CURVE_KN_RIBBONS) || !(flags & CURVE_KN_BACKFACING))
+ epsilon = 2 * r_curr;
+
+ /* find bounds - this is slow for cubic curves */
+ float upper, lower;
+
+ float zextrem[4];
+ curvebounds(&lower,
+ &upper,
+ &zextrem[0],
+ &zextrem[1],
+ &zextrem[2],
+ &zextrem[3],
+ curve_coef[0].z,
+ curve_coef[1].z,
+ curve_coef[2].z,
+ curve_coef[3].z);
+ if (lower - r_curr > isect->t || upper + r_curr < epsilon)
+ return false;
+
+ /* minimum width extension */
+ float mw_extension = min(difl * fabsf(upper), extmax);
+ float r_ext = mw_extension + r_curr;
+
+ float xextrem[4];
+ curvebounds(&lower,
+ &upper,
+ &xextrem[0],
+ &xextrem[1],
+ &xextrem[2],
+ &xextrem[3],
+ curve_coef[0].x,
+ curve_coef[1].x,
+ curve_coef[2].x,
+ curve_coef[3].x);
+ if (lower > r_ext || upper < -r_ext)
+ return false;
+
+ float yextrem[4];
+ curvebounds(&lower,
+ &upper,
+ &yextrem[0],
+ &yextrem[1],
+ &yextrem[2],
+ &yextrem[3],
+ curve_coef[0].y,
+ curve_coef[1].y,
+ curve_coef[2].y,
+ curve_coef[3].y);
+ if (lower > r_ext || upper < -r_ext)
+ return false;
+
+ /* setup recurrent loop */
+ int level = 1 << depth;
+ int tree = 0;
+ float resol = 1.0f / (float)level;
+ bool hit = false;
+
+ /* begin loop */
+ while (!(tree >> (depth))) {
+ const float i_st = tree * resol;
+ const float i_en = i_st + (level * resol);
+
+# ifdef __KERNEL_SSE2__
+ ssef vi_st = ssef(i_st), vi_en = ssef(i_en);
+ ssef vp_st = madd(madd(madd(vcurve_coef[3], vi_st, vcurve_coef[2]), vi_st, vcurve_coef[1]),
+ vi_st,
+ vcurve_coef[0]);
+ ssef vp_en = madd(madd(madd(vcurve_coef[3], vi_en, vcurve_coef[2]), vi_en, vcurve_coef[1]),
+ vi_en,
+ vcurve_coef[0]);
+
+ ssef vbmin = min(vp_st, vp_en);
+ ssef vbmax = max(vp_st, vp_en);
+
+ float3 &bmin = (float3 &)vbmin, &bmax = (float3 &)vbmax;
+ float &bminx = bmin.x, &bminy = bmin.y, &bminz = bmin.z;
+ float &bmaxx = bmax.x, &bmaxy = bmax.y, &bmaxz = bmax.z;
+ float3 &p_st = (float3 &)vp_st, &p_en = (float3 &)vp_en;
+# else
+ float3 p_st = ((curve_coef[3] * i_st + curve_coef[2]) * i_st + curve_coef[1]) * i_st +
+ curve_coef[0];
+ float3 p_en = ((curve_coef[3] * i_en + curve_coef[2]) * i_en + curve_coef[1]) * i_en +
+ curve_coef[0];
+
+ float bminx = min(p_st.x, p_en.x);
+ float bmaxx = max(p_st.x, p_en.x);
+ float bminy = min(p_st.y, p_en.y);
+ float bmaxy = max(p_st.y, p_en.y);
+ float bminz = min(p_st.z, p_en.z);
+ float bmaxz = max(p_st.z, p_en.z);
+# endif
+
+ if (xextrem[0] >= i_st && xextrem[0] <= i_en) {
+ bminx = min(bminx, xextrem[1]);
+ bmaxx = max(bmaxx, xextrem[1]);
+ }
+ if (xextrem[2] >= i_st && xextrem[2] <= i_en) {
+ bminx = min(bminx, xextrem[3]);
+ bmaxx = max(bmaxx, xextrem[3]);
+ }
+ if (yextrem[0] >= i_st && yextrem[0] <= i_en) {
+ bminy = min(bminy, yextrem[1]);
+ bmaxy = max(bmaxy, yextrem[1]);
+ }
+ if (yextrem[2] >= i_st && yextrem[2] <= i_en) {
+ bminy = min(bminy, yextrem[3]);
+ bmaxy = max(bmaxy, yextrem[3]);
+ }
+ if (zextrem[0] >= i_st && zextrem[0] <= i_en) {
+ bminz = min(bminz, zextrem[1]);
+ bmaxz = max(bmaxz, zextrem[1]);
+ }
+ if (zextrem[2] >= i_st && zextrem[2] <= i_en) {
+ bminz = min(bminz, zextrem[3]);
+ bmaxz = max(bmaxz, zextrem[3]);
+ }
+
+ float r1 = r_st + (r_en - r_st) * i_st;
+ float r2 = r_st + (r_en - r_st) * i_en;
+ r_curr = max(r1, r2);
+
+ mw_extension = min(difl * fabsf(bmaxz), extmax);
+ float r_ext = mw_extension + r_curr;
+ float coverage = 1.0f;
+
+ if (bminz - r_curr > isect->t || bmaxz + r_curr < epsilon || bminx > r_ext || bmaxx < -r_ext ||
+ bminy > r_ext || bmaxy < -r_ext) {
+ /* the bounding box does not overlap the square centered at O */
+ tree += level;
+ level = tree & -tree;
+ }
+ else if (level == 1) {
+
+ /* the maximum recursion depth is reached.
+ * check if dP0.(Q-P0)>=0 and dPn.(Pn-Q)>=0.
+ * dP* is reversed if necessary.*/
+ float t = isect->t;
+ float u = 0.0f;
+ float gd = 0.0f;
+
+ if (flags & CURVE_KN_RIBBONS) {
+ float3 tg = (p_en - p_st);
+# ifdef __KERNEL_SSE__
+ const float3 tg_sq = tg * tg;
+ float w = tg_sq.x + tg_sq.y;
+# else
+ float w = tg.x * tg.x + tg.y * tg.y;
+# endif
+ if (w == 0) {
+ tree++;
+ level = tree & -tree;
+ continue;
+ }
+# ifdef __KERNEL_SSE__
+ const float3 p_sttg = p_st * tg;
+ w = -(p_sttg.x + p_sttg.y) / w;
+# else
+ w = -(p_st.x * tg.x + p_st.y * tg.y) / w;
+# endif
+ w = saturate(w);
+
+ /* compute u on the curve segment */
+ u = i_st * (1 - w) + i_en * w;
+ r_curr = r_st + (r_en - r_st) * u;
+ /* compare x-y distances */
+ float3 p_curr = ((curve_coef[3] * u + curve_coef[2]) * u + curve_coef[1]) * u +
+ curve_coef[0];
+
+ float3 dp_st = (3 * curve_coef[3] * i_st + 2 * curve_coef[2]) * i_st + curve_coef[1];
+ if (dot(tg, dp_st) < 0)
+ dp_st *= -1;
+ if (dot(dp_st, -p_st) + p_curr.z * dp_st.z < 0) {
+ tree++;
+ level = tree & -tree;
+ continue;
+ }
+ float3 dp_en = (3 * curve_coef[3] * i_en + 2 * curve_coef[2]) * i_en + curve_coef[1];
+ if (dot(tg, dp_en) < 0)
+ dp_en *= -1;
+ if (dot(dp_en, p_en) - p_curr.z * dp_en.z < 0) {
+ tree++;
+ level = tree & -tree;
+ continue;
+ }
+
+ /* compute coverage */
+ float r_ext = r_curr;
+ coverage = 1.0f;
+ if (difl != 0.0f) {
+ mw_extension = min(difl * fabsf(bmaxz), extmax);
+ r_ext = mw_extension + r_curr;
+# ifdef __KERNEL_SSE__
+ const float3 p_curr_sq = p_curr * p_curr;
+ const float3 dxxx(_mm_sqrt_ss(_mm_hadd_ps(p_curr_sq.m128, p_curr_sq.m128)));
+ float d = dxxx.x;
+# else
+ float d = sqrtf(p_curr.x * p_curr.x + p_curr.y * p_curr.y);
+# endif
+ float d0 = d - r_curr;
+ float d1 = d + r_curr;
+ float inv_mw_extension = 1.0f / mw_extension;
+ if (d0 >= 0)
+ coverage = (min(d1 * inv_mw_extension, 1.0f) - min(d0 * inv_mw_extension, 1.0f)) *
+ 0.5f;
+ else // inside
+ coverage = (min(d1 * inv_mw_extension, 1.0f) + min(-d0 * inv_mw_extension, 1.0f)) *
+ 0.5f;
+ }
+
+ if (p_curr.x * p_curr.x + p_curr.y * p_curr.y >= r_ext * r_ext || p_curr.z <= epsilon ||
+ isect->t < p_curr.z) {
+ tree++;
+ level = tree & -tree;
+ continue;
+ }
+
+ t = p_curr.z;
+
+ /* stochastic fade from minimum width */
+ if (difl != 0.0f && lcg_state) {
+ if (coverage != 1.0f && (lcg_step_float(lcg_state) > coverage))
+ return hit;
+ }
+ }
+ else {
+ float l = len(p_en - p_st);
+ /* minimum width extension */
+ float or1 = r1;
+ float or2 = r2;
+
+ if (difl != 0.0f) {
+ mw_extension = min(len(p_st - P) * difl, extmax);
+ or1 = r1 < mw_extension ? mw_extension : r1;
+ mw_extension = min(len(p_en - P) * difl, extmax);
+ or2 = r2 < mw_extension ? mw_extension : r2;
+ }
+ /* --- */
+ float invl = 1.0f / l;
+ float3 tg = (p_en - p_st) * invl;
+ gd = (or2 - or1) * invl;
+ float difz = -dot(p_st, tg);
+ float cyla = 1.0f - (tg.z * tg.z * (1 + gd * gd));
+ float invcyla = 1.0f / cyla;
+ float halfb = (-p_st.z - tg.z * (difz + gd * (difz * gd + or1)));
+ float tcentre = -halfb * invcyla;
+ float zcentre = difz + (tg.z * tcentre);
+ float3 tdif = -p_st;
+ tdif.z += tcentre;
+ float tdifz = dot(tdif, tg);
+ float tb = 2 * (tdif.z - tg.z * (tdifz + gd * (tdifz * gd + or1)));
+ float tc = dot(tdif, tdif) - tdifz * tdifz * (1 + gd * gd) - or1 * or1 -
+ 2 * or1 * tdifz * gd;
+ float td = tb * tb - 4 * cyla * tc;
+ if (td < 0.0f) {
+ tree++;
+ level = tree & -tree;
+ continue;
+ }
+
+ float rootd = sqrtf(td);
+ float correction = (-tb - rootd) * 0.5f * invcyla;
+ t = tcentre + correction;
+
+ float3 dp_st = (3 * curve_coef[3] * i_st + 2 * curve_coef[2]) * i_st + curve_coef[1];
+ if (dot(tg, dp_st) < 0)
+ dp_st *= -1;
+ float3 dp_en = (3 * curve_coef[3] * i_en + 2 * curve_coef[2]) * i_en + curve_coef[1];
+ if (dot(tg, dp_en) < 0)
+ dp_en *= -1;
+
+ if (flags & CURVE_KN_BACKFACING &&
+ (dot(dp_st, -p_st) + t * dp_st.z < 0 || dot(dp_en, p_en) - t * dp_en.z < 0 ||
+ isect->t < t || t <= 0.0f)) {
+ correction = (-tb + rootd) * 0.5f * invcyla;
+ t = tcentre + correction;
+ }
+
+ if (dot(dp_st, -p_st) + t * dp_st.z < 0 || dot(dp_en, p_en) - t * dp_en.z < 0 ||
+ isect->t < t || t <= 0.0f) {
+ tree++;
+ level = tree & -tree;
+ continue;
+ }
+
+ float w = (zcentre + (tg.z * correction)) * invl;
+ w = saturate(w);
+ /* compute u on the curve segment */
+ u = i_st * (1 - w) + i_en * w;
+
+ /* stochastic fade from minimum width */
+ if (difl != 0.0f && lcg_state) {
+ r_curr = r1 + (r2 - r1) * w;
+ r_ext = or1 + (or2 - or1) * w;
+ coverage = r_curr / r_ext;
+
+ if (coverage != 1.0f && (lcg_step_float(lcg_state) > coverage))
+ return hit;
+ }
+ }
+ /* we found a new intersection */
+
+# ifdef __VISIBILITY_FLAG__
+ /* visibility flag test. we do it here under the assumption
+ * that most triangles are culled by node flags */
+ if (kernel_tex_fetch(__prim_visibility, curveAddr) & visibility)
+# endif
+ {
+ /* record intersection */
+ isect->t = t;
+ isect->u = u;
+ isect->v = gd;
+ isect->prim = curveAddr;
+ isect->object = object;
+ isect->type = type;
+ hit = true;
+ }
+
+ tree++;
+ level = tree & -tree;
+ }
+ else {
+ /* split the curve into two curves and process */
+ level = level >> 1;
+ }
+ }
+
+ return hit;
}
ccl_device_forceinline bool curve_intersect(KernelGlobals *kg,
@@ -511,245 +561,247 @@ ccl_device_forceinline bool curve_intersect(KernelGlobals *kg,
float difl,
float extmax)
{
- /* define few macros to minimize code duplication for SSE */
-#ifndef __KERNEL_SSE2__
-# define len3_squared(x) len_squared(x)
-# define len3(x) len(x)
-# define dot3(x, y) dot(x, y)
-#endif
-
- const bool is_curve_primitive = (type & PRIMITIVE_CURVE);
-
- if(!is_curve_primitive && kernel_data.bvh.use_bvh_steps) {
- const float2 prim_time = kernel_tex_fetch(__prim_time, curveAddr);
- if(time < prim_time.x || time > prim_time.y) {
- return false;
- }
- }
-
- int segment = PRIMITIVE_UNPACK_SEGMENT(type);
- /* curve Intersection check */
- int flags = kernel_data.curve.curveflags;
-
- int prim = kernel_tex_fetch(__prim_index, curveAddr);
- float4 v00 = kernel_tex_fetch(__curves, prim);
-
- int cnum = __float_as_int(v00.x);
- int k0 = cnum + segment;
- int k1 = k0 + 1;
-
-#ifndef __KERNEL_SSE2__
- float4 P_curve[2];
-
- if(is_curve_primitive) {
- P_curve[0] = kernel_tex_fetch(__curve_keys, k0);
- P_curve[1] = kernel_tex_fetch(__curve_keys, k1);
- }
- else {
- int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, curveAddr): object;
- motion_curve_keys(kg, fobject, prim, time, k0, k1, P_curve);
- }
-
- float or1 = P_curve[0].w;
- float or2 = P_curve[1].w;
- float3 p1 = float4_to_float3(P_curve[0]);
- float3 p2 = float4_to_float3(P_curve[1]);
-
- /* minimum width extension */
- float r1 = or1;
- float r2 = or2;
- float3 dif = P - p1;
- float3 dif_second = P - p2;
- if(difl != 0.0f) {
- float pixelsize = min(len3(dif) * difl, extmax);
- r1 = or1 < pixelsize ? pixelsize : or1;
- pixelsize = min(len3(dif_second) * difl, extmax);
- r2 = or2 < pixelsize ? pixelsize : or2;
- }
- /* --- */
-
- float3 p21_diff = p2 - p1;
- float3 sphere_dif1 = (dif + dif_second) * 0.5f;
- float3 dir = direction;
- float sphere_b_tmp = dot3(dir, sphere_dif1);
- float3 sphere_dif2 = sphere_dif1 - sphere_b_tmp * dir;
-#else
- ssef P_curve[2];
-
- if(is_curve_primitive) {
- P_curve[0] = load4f(&kg->__curve_keys.data[k0].x);
- P_curve[1] = load4f(&kg->__curve_keys.data[k1].x);
- }
- else {
- int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, curveAddr): object;
- motion_curve_keys(kg, fobject, prim, time, k0, k1, (float4*)&P_curve);
- }
-
- const ssef or12 = shuffle<3, 3, 3, 3>(P_curve[0], P_curve[1]);
-
- ssef r12 = or12;
- const ssef vP = load4f(P);
- const ssef dif = vP - P_curve[0];
- const ssef dif_second = vP - P_curve[1];
- if(difl != 0.0f) {
- const ssef len1_sq = len3_squared_splat(dif);
- const ssef len2_sq = len3_squared_splat(dif_second);
- const ssef len12 = mm_sqrt(shuffle<0, 0, 0, 0>(len1_sq, len2_sq));
- const ssef pixelsize12 = min(len12 * difl, ssef(extmax));
- r12 = max(or12, pixelsize12);
- }
- float or1 = extract<0>(or12), or2 = extract<0>(shuffle<2>(or12));
- float r1 = extract<0>(r12), r2 = extract<0>(shuffle<2>(r12));
-
- const ssef p21_diff = P_curve[1] - P_curve[0];
- const ssef sphere_dif1 = (dif + dif_second) * 0.5f;
- const ssef dir = load4f(direction);
- const ssef sphere_b_tmp = dot3_splat(dir, sphere_dif1);
- const ssef sphere_dif2 = nmadd(sphere_b_tmp, dir, sphere_dif1);
-#endif
-
- float mr = max(r1, r2);
- float l = len3(p21_diff);
- float invl = 1.0f / l;
- float sp_r = mr + 0.5f * l;
-
- float sphere_b = dot3(dir, sphere_dif2);
- float sdisc = sphere_b * sphere_b - len3_squared(sphere_dif2) + sp_r * sp_r;
-
- if(sdisc < 0.0f)
- return false;
-
- /* obtain parameters and test midpoint distance for suitable modes */
-#ifndef __KERNEL_SSE2__
- float3 tg = p21_diff * invl;
-#else
- const ssef tg = p21_diff * invl;
-#endif
- float gd = (r2 - r1) * invl;
-
- float dirz = dot3(dir, tg);
- float difz = dot3(dif, tg);
-
- float a = 1.0f - (dirz*dirz*(1 + gd*gd));
-
- float halfb = dot3(dir, dif) - dirz*(difz + gd*(difz*gd + r1));
-
- float tcentre = -halfb/a;
- float zcentre = difz + (dirz * tcentre);
-
- if((tcentre > isect->t) && !(flags & CURVE_KN_ACCURATE))
- return false;
- if((zcentre < 0 || zcentre > l) && !(flags & CURVE_KN_ACCURATE) && !(flags & CURVE_KN_INTERSECTCORRECTION))
- return false;
-
- /* test minimum separation */
-#ifndef __KERNEL_SSE2__
- float3 cprod = cross(tg, dir);
- float cprod2sq = len3_squared(cross(tg, dif));
-#else
- const ssef cprod = cross(tg, dir);
- float cprod2sq = len3_squared(cross_zxy(tg, dif));
-#endif
- float cprodsq = len3_squared(cprod);
- float distscaled = dot3(cprod, dif);
-
- if(cprodsq == 0)
- distscaled = cprod2sq;
- else
- distscaled = (distscaled*distscaled)/cprodsq;
-
- if(distscaled > mr*mr)
- return false;
-
- /* calculate true intersection */
-#ifndef __KERNEL_SSE2__
- float3 tdif = dif + tcentre * dir;
-#else
- const ssef tdif = madd(ssef(tcentre), dir, dif);
-#endif
- float tdifz = dot3(tdif, tg);
- float tdifma = tdifz*gd + r1;
- float tb = 2*(dot3(dir, tdif) - dirz*(tdifz + gd*tdifma));
- float tc = dot3(tdif, tdif) - tdifz*tdifz - tdifma*tdifma;
- float td = tb*tb - 4*a*tc;
-
- if(td < 0.0f)
- return false;
-
- float rootd = 0.0f;
- float correction = 0.0f;
- if(flags & CURVE_KN_ACCURATE) {
- rootd = sqrtf(td);
- correction = ((-tb - rootd)/(2*a));
- }
-
- float t = tcentre + correction;
-
- if(t < isect->t) {
-
- if(flags & CURVE_KN_INTERSECTCORRECTION) {
- rootd = sqrtf(td);
- correction = ((-tb - rootd)/(2*a));
- t = tcentre + correction;
- }
-
- float z = zcentre + (dirz * correction);
- // bool backface = false;
-
- if(flags & CURVE_KN_BACKFACING && (t < 0.0f || z < 0 || z > l)) {
- // backface = true;
- correction = ((-tb + rootd)/(2*a));
- t = tcentre + correction;
- z = zcentre + (dirz * correction);
- }
-
- /* stochastic fade from minimum width */
- float adjradius = or1 + z * (or2 - or1) * invl;
- adjradius = adjradius / (r1 + z * gd);
- if(lcg_state && adjradius != 1.0f) {
- if(lcg_step_float(lcg_state) > adjradius)
- return false;
- }
- /* --- */
-
- if(t > 0.0f && t < isect->t && z >= 0 && z <= l) {
-
- if(flags & CURVE_KN_ENCLOSEFILTER) {
- float enc_ratio = 1.01f;
- if((difz > -r1 * enc_ratio) && (dot3(dif_second, tg) < r2 * enc_ratio)) {
- float a2 = 1.0f - (dirz*dirz*(1 + gd*gd*enc_ratio*enc_ratio));
- float c2 = dot3(dif, dif) - difz * difz * (1 + gd*gd*enc_ratio*enc_ratio) - r1*r1*enc_ratio*enc_ratio - 2*r1*difz*gd*enc_ratio;
- if(a2*c2 < 0.0f)
- return false;
- }
- }
-
-#ifdef __VISIBILITY_FLAG__
- /* visibility flag test. we do it here under the assumption
- * that most triangles are culled by node flags */
- if(kernel_tex_fetch(__prim_visibility, curveAddr) & visibility)
-#endif
- {
- /* record intersection */
- isect->t = t;
- isect->u = z*invl;
- isect->v = gd;
- isect->prim = curveAddr;
- isect->object = object;
- isect->type = type;
-
- return true;
- }
- }
- }
-
- return false;
-
-#ifndef __KERNEL_SSE2__
-# undef len3_squared
-# undef len3
-# undef dot3
-#endif
+ /* define few macros to minimize code duplication for SSE */
+# ifndef __KERNEL_SSE2__
+# define len3_squared(x) len_squared(x)
+# define len3(x) len(x)
+# define dot3(x, y) dot(x, y)
+# endif
+
+ const bool is_curve_primitive = (type & PRIMITIVE_CURVE);
+
+ if (!is_curve_primitive && kernel_data.bvh.use_bvh_steps) {
+ const float2 prim_time = kernel_tex_fetch(__prim_time, curveAddr);
+ if (time < prim_time.x || time > prim_time.y) {
+ return false;
+ }
+ }
+
+ int segment = PRIMITIVE_UNPACK_SEGMENT(type);
+ /* curve Intersection check */
+ int flags = kernel_data.curve.curveflags;
+
+ int prim = kernel_tex_fetch(__prim_index, curveAddr);
+ float4 v00 = kernel_tex_fetch(__curves, prim);
+
+ int cnum = __float_as_int(v00.x);
+ int k0 = cnum + segment;
+ int k1 = k0 + 1;
+
+# ifndef __KERNEL_SSE2__
+ float4 P_curve[2];
+
+ if (is_curve_primitive) {
+ P_curve[0] = kernel_tex_fetch(__curve_keys, k0);
+ P_curve[1] = kernel_tex_fetch(__curve_keys, k1);
+ }
+ else {
+ int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, curveAddr) : object;
+ motion_curve_keys(kg, fobject, prim, time, k0, k1, P_curve);
+ }
+
+ float or1 = P_curve[0].w;
+ float or2 = P_curve[1].w;
+ float3 p1 = float4_to_float3(P_curve[0]);
+ float3 p2 = float4_to_float3(P_curve[1]);
+
+ /* minimum width extension */
+ float r1 = or1;
+ float r2 = or2;
+ float3 dif = P - p1;
+ float3 dif_second = P - p2;
+ if (difl != 0.0f) {
+ float pixelsize = min(len3(dif) * difl, extmax);
+ r1 = or1 < pixelsize ? pixelsize : or1;
+ pixelsize = min(len3(dif_second) * difl, extmax);
+ r2 = or2 < pixelsize ? pixelsize : or2;
+ }
+ /* --- */
+
+ float3 p21_diff = p2 - p1;
+ float3 sphere_dif1 = (dif + dif_second) * 0.5f;
+ float3 dir = direction;
+ float sphere_b_tmp = dot3(dir, sphere_dif1);
+ float3 sphere_dif2 = sphere_dif1 - sphere_b_tmp * dir;
+# else
+ ssef P_curve[2];
+
+ if (is_curve_primitive) {
+ P_curve[0] = load4f(&kg->__curve_keys.data[k0].x);
+ P_curve[1] = load4f(&kg->__curve_keys.data[k1].x);
+ }
+ else {
+ int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, curveAddr) : object;
+ motion_curve_keys(kg, fobject, prim, time, k0, k1, (float4 *)&P_curve);
+ }
+
+ const ssef or12 = shuffle<3, 3, 3, 3>(P_curve[0], P_curve[1]);
+
+ ssef r12 = or12;
+ const ssef vP = load4f(P);
+ const ssef dif = vP - P_curve[0];
+ const ssef dif_second = vP - P_curve[1];
+ if (difl != 0.0f) {
+ const ssef len1_sq = len3_squared_splat(dif);
+ const ssef len2_sq = len3_squared_splat(dif_second);
+ const ssef len12 = mm_sqrt(shuffle<0, 0, 0, 0>(len1_sq, len2_sq));
+ const ssef pixelsize12 = min(len12 * difl, ssef(extmax));
+ r12 = max(or12, pixelsize12);
+ }
+ float or1 = extract<0>(or12), or2 = extract<0>(shuffle<2>(or12));
+ float r1 = extract<0>(r12), r2 = extract<0>(shuffle<2>(r12));
+
+ const ssef p21_diff = P_curve[1] - P_curve[0];
+ const ssef sphere_dif1 = (dif + dif_second) * 0.5f;
+ const ssef dir = load4f(direction);
+ const ssef sphere_b_tmp = dot3_splat(dir, sphere_dif1);
+ const ssef sphere_dif2 = nmadd(sphere_b_tmp, dir, sphere_dif1);
+# endif
+
+ float mr = max(r1, r2);
+ float l = len3(p21_diff);
+ float invl = 1.0f / l;
+ float sp_r = mr + 0.5f * l;
+
+ float sphere_b = dot3(dir, sphere_dif2);
+ float sdisc = sphere_b * sphere_b - len3_squared(sphere_dif2) + sp_r * sp_r;
+
+ if (sdisc < 0.0f)
+ return false;
+
+ /* obtain parameters and test midpoint distance for suitable modes */
+# ifndef __KERNEL_SSE2__
+ float3 tg = p21_diff * invl;
+# else
+ const ssef tg = p21_diff * invl;
+# endif
+ float gd = (r2 - r1) * invl;
+
+ float dirz = dot3(dir, tg);
+ float difz = dot3(dif, tg);
+
+ float a = 1.0f - (dirz * dirz * (1 + gd * gd));
+
+ float halfb = dot3(dir, dif) - dirz * (difz + gd * (difz * gd + r1));
+
+ float tcentre = -halfb / a;
+ float zcentre = difz + (dirz * tcentre);
+
+ if ((tcentre > isect->t) && !(flags & CURVE_KN_ACCURATE))
+ return false;
+ if ((zcentre < 0 || zcentre > l) && !(flags & CURVE_KN_ACCURATE) &&
+ !(flags & CURVE_KN_INTERSECTCORRECTION))
+ return false;
+
+ /* test minimum separation */
+# ifndef __KERNEL_SSE2__
+ float3 cprod = cross(tg, dir);
+ float cprod2sq = len3_squared(cross(tg, dif));
+# else
+ const ssef cprod = cross(tg, dir);
+ float cprod2sq = len3_squared(cross_zxy(tg, dif));
+# endif
+ float cprodsq = len3_squared(cprod);
+ float distscaled = dot3(cprod, dif);
+
+ if (cprodsq == 0)
+ distscaled = cprod2sq;
+ else
+ distscaled = (distscaled * distscaled) / cprodsq;
+
+ if (distscaled > mr * mr)
+ return false;
+
+ /* calculate true intersection */
+# ifndef __KERNEL_SSE2__
+ float3 tdif = dif + tcentre * dir;
+# else
+ const ssef tdif = madd(ssef(tcentre), dir, dif);
+# endif
+ float tdifz = dot3(tdif, tg);
+ float tdifma = tdifz * gd + r1;
+ float tb = 2 * (dot3(dir, tdif) - dirz * (tdifz + gd * tdifma));
+ float tc = dot3(tdif, tdif) - tdifz * tdifz - tdifma * tdifma;
+ float td = tb * tb - 4 * a * tc;
+
+ if (td < 0.0f)
+ return false;
+
+ float rootd = 0.0f;
+ float correction = 0.0f;
+ if (flags & CURVE_KN_ACCURATE) {
+ rootd = sqrtf(td);
+ correction = ((-tb - rootd) / (2 * a));
+ }
+
+ float t = tcentre + correction;
+
+ if (t < isect->t) {
+
+ if (flags & CURVE_KN_INTERSECTCORRECTION) {
+ rootd = sqrtf(td);
+ correction = ((-tb - rootd) / (2 * a));
+ t = tcentre + correction;
+ }
+
+ float z = zcentre + (dirz * correction);
+ // bool backface = false;
+
+ if (flags & CURVE_KN_BACKFACING && (t < 0.0f || z < 0 || z > l)) {
+ // backface = true;
+ correction = ((-tb + rootd) / (2 * a));
+ t = tcentre + correction;
+ z = zcentre + (dirz * correction);
+ }
+
+ /* stochastic fade from minimum width */
+ float adjradius = or1 + z * (or2 - or1) * invl;
+ adjradius = adjradius / (r1 + z * gd);
+ if (lcg_state && adjradius != 1.0f) {
+ if (lcg_step_float(lcg_state) > adjradius)
+ return false;
+ }
+ /* --- */
+
+ if (t > 0.0f && t < isect->t && z >= 0 && z <= l) {
+
+ if (flags & CURVE_KN_ENCLOSEFILTER) {
+ float enc_ratio = 1.01f;
+ if ((difz > -r1 * enc_ratio) && (dot3(dif_second, tg) < r2 * enc_ratio)) {
+ float a2 = 1.0f - (dirz * dirz * (1 + gd * gd * enc_ratio * enc_ratio));
+ float c2 = dot3(dif, dif) - difz * difz * (1 + gd * gd * enc_ratio * enc_ratio) -
+ r1 * r1 * enc_ratio * enc_ratio - 2 * r1 * difz * gd * enc_ratio;
+ if (a2 * c2 < 0.0f)
+ return false;
+ }
+ }
+
+# ifdef __VISIBILITY_FLAG__
+ /* visibility flag test. we do it here under the assumption
+ * that most triangles are culled by node flags */
+ if (kernel_tex_fetch(__prim_visibility, curveAddr) & visibility)
+# endif
+ {
+ /* record intersection */
+ isect->t = t;
+ isect->u = z * invl;
+ isect->v = gd;
+ isect->prim = curveAddr;
+ isect->object = object;
+ isect->type = type;
+
+ return true;
+ }
+ }
+ }
+
+ return false;
+
+# ifndef __KERNEL_SSE2__
+# undef len3_squared
+# undef len3
+# undef dot3
+# endif
}
ccl_device_inline float3 curve_refine(KernelGlobals *kg,
@@ -757,154 +809,154 @@ ccl_device_inline float3 curve_refine(KernelGlobals *kg,
const Intersection *isect,
const Ray *ray)
{
- int flag = kernel_data.curve.curveflags;
- float t = isect->t;
- float3 P = ray->P;
- float3 D = ray->D;
-
- if(isect->object != OBJECT_NONE) {
-#ifdef __OBJECT_MOTION__
- Transform tfm = sd->ob_itfm;
-#else
- Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
-#endif
-
- P = transform_point(&tfm, P);
- D = transform_direction(&tfm, D*t);
- D = normalize_len(D, &t);
- }
-
- int prim = kernel_tex_fetch(__prim_index, isect->prim);
- float4 v00 = kernel_tex_fetch(__curves, prim);
-
- int k0 = __float_as_int(v00.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
- int k1 = k0 + 1;
-
- float3 tg;
-
- if(flag & CURVE_KN_INTERPOLATE) {
- int ka = max(k0 - 1,__float_as_int(v00.x));
- int kb = min(k1 + 1,__float_as_int(v00.x) + __float_as_int(v00.y) - 1);
-
- float4 P_curve[4];
-
- if(sd->type & PRIMITIVE_CURVE) {
- P_curve[0] = kernel_tex_fetch(__curve_keys, ka);
- P_curve[1] = kernel_tex_fetch(__curve_keys, k0);
- P_curve[2] = kernel_tex_fetch(__curve_keys, k1);
- P_curve[3] = kernel_tex_fetch(__curve_keys, kb);
- }
- else {
- motion_cardinal_curve_keys(kg, sd->object, sd->prim, sd->time, ka, k0, k1, kb, P_curve);
- }
-
- float3 p[4];
- p[0] = float4_to_float3(P_curve[0]);
- p[1] = float4_to_float3(P_curve[1]);
- p[2] = float4_to_float3(P_curve[2]);
- p[3] = float4_to_float3(P_curve[3]);
-
- P = P + D*t;
-
-#ifdef __UV__
- sd->u = isect->u;
- sd->v = 0.0f;
-#endif
-
- tg = normalize(curvetangent(isect->u, p[0], p[1], p[2], p[3]));
-
- if(kernel_data.curve.curveflags & CURVE_KN_RIBBONS) {
- sd->Ng = normalize(-(D - tg * (dot(tg, D))));
- }
- else {
-#ifdef __EMBREE__
- if(kernel_data.bvh.scene) {
- sd->Ng = normalize(isect->Ng);
- }
- else
-#endif
- {
- /* direction from inside to surface of curve */
- float3 p_curr = curvepoint(isect->u, p[0], p[1], p[2], p[3]);
- sd->Ng = normalize(P - p_curr);
-
- /* adjustment for changing radius */
- float gd = isect->v;
-
- if(gd != 0.0f) {
- sd->Ng = sd->Ng - gd * tg;
- sd->Ng = normalize(sd->Ng);
- }
- }
- }
-
- /* todo: sometimes the normal is still so that this is detected as
- * backfacing even if cull backfaces is enabled */
-
- sd->N = sd->Ng;
- }
- else {
- float4 P_curve[2];
-
- if(sd->type & PRIMITIVE_CURVE) {
- P_curve[0]= kernel_tex_fetch(__curve_keys, k0);
- P_curve[1]= kernel_tex_fetch(__curve_keys, k1);
- }
- else {
- motion_curve_keys(kg, sd->object, sd->prim, sd->time, k0, k1, P_curve);
- }
-
- float l = 1.0f;
- tg = normalize_len(float4_to_float3(P_curve[1] - P_curve[0]), &l);
-
- P = P + D*t;
-
- float3 dif = P - float4_to_float3(P_curve[0]);
-
-#ifdef __UV__
- sd->u = dot(dif,tg)/l;
- sd->v = 0.0f;
-#endif
-
- if(flag & CURVE_KN_TRUETANGENTGNORMAL) {
- sd->Ng = -(D - tg * dot(tg, D));
- sd->Ng = normalize(sd->Ng);
- }
- else {
- float gd = isect->v;
-
- /* direction from inside to surface of curve */
- float denom = fmaxf(P_curve[0].w + sd->u * l * gd, 1e-8f);
- sd->Ng = (dif - tg * sd->u * l) / denom;
-
- /* adjustment for changing radius */
- if(gd != 0.0f) {
- sd->Ng = sd->Ng - gd * tg;
- }
-
- sd->Ng = normalize(sd->Ng);
- }
-
- sd->N = sd->Ng;
- }
-
-#ifdef __DPDU__
- /* dPdu/dPdv */
- sd->dPdu = tg;
- sd->dPdv = cross(tg, sd->Ng);
-#endif
-
- if(isect->object != OBJECT_NONE) {
-#ifdef __OBJECT_MOTION__
- Transform tfm = sd->ob_tfm;
-#else
- Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
-#endif
-
- P = transform_point(&tfm, P);
- }
-
- return P;
+ int flag = kernel_data.curve.curveflags;
+ float t = isect->t;
+ float3 P = ray->P;
+ float3 D = ray->D;
+
+ if (isect->object != OBJECT_NONE) {
+# ifdef __OBJECT_MOTION__
+ Transform tfm = sd->ob_itfm;
+# else
+ Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
+# endif
+
+ P = transform_point(&tfm, P);
+ D = transform_direction(&tfm, D * t);
+ D = normalize_len(D, &t);
+ }
+
+ int prim = kernel_tex_fetch(__prim_index, isect->prim);
+ float4 v00 = kernel_tex_fetch(__curves, prim);
+
+ int k0 = __float_as_int(v00.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
+ int k1 = k0 + 1;
+
+ float3 tg;
+
+ if (flag & CURVE_KN_INTERPOLATE) {
+ int ka = max(k0 - 1, __float_as_int(v00.x));
+ int kb = min(k1 + 1, __float_as_int(v00.x) + __float_as_int(v00.y) - 1);
+
+ float4 P_curve[4];
+
+ if (sd->type & PRIMITIVE_CURVE) {
+ P_curve[0] = kernel_tex_fetch(__curve_keys, ka);
+ P_curve[1] = kernel_tex_fetch(__curve_keys, k0);
+ P_curve[2] = kernel_tex_fetch(__curve_keys, k1);
+ P_curve[3] = kernel_tex_fetch(__curve_keys, kb);
+ }
+ else {
+ motion_cardinal_curve_keys(kg, sd->object, sd->prim, sd->time, ka, k0, k1, kb, P_curve);
+ }
+
+ float3 p[4];
+ p[0] = float4_to_float3(P_curve[0]);
+ p[1] = float4_to_float3(P_curve[1]);
+ p[2] = float4_to_float3(P_curve[2]);
+ p[3] = float4_to_float3(P_curve[3]);
+
+ P = P + D * t;
+
+# ifdef __UV__
+ sd->u = isect->u;
+ sd->v = 0.0f;
+# endif
+
+ tg = normalize(curvetangent(isect->u, p[0], p[1], p[2], p[3]));
+
+ if (kernel_data.curve.curveflags & CURVE_KN_RIBBONS) {
+ sd->Ng = normalize(-(D - tg * (dot(tg, D))));
+ }
+ else {
+# ifdef __EMBREE__
+ if (kernel_data.bvh.scene) {
+ sd->Ng = normalize(isect->Ng);
+ }
+ else
+# endif
+ {
+ /* direction from inside to surface of curve */
+ float3 p_curr = curvepoint(isect->u, p[0], p[1], p[2], p[3]);
+ sd->Ng = normalize(P - p_curr);
+
+ /* adjustment for changing radius */
+ float gd = isect->v;
+
+ if (gd != 0.0f) {
+ sd->Ng = sd->Ng - gd * tg;
+ sd->Ng = normalize(sd->Ng);
+ }
+ }
+ }
+
+ /* todo: sometimes the normal is still so that this is detected as
+ * backfacing even if cull backfaces is enabled */
+
+ sd->N = sd->Ng;
+ }
+ else {
+ float4 P_curve[2];
+
+ if (sd->type & PRIMITIVE_CURVE) {
+ P_curve[0] = kernel_tex_fetch(__curve_keys, k0);
+ P_curve[1] = kernel_tex_fetch(__curve_keys, k1);
+ }
+ else {
+ motion_curve_keys(kg, sd->object, sd->prim, sd->time, k0, k1, P_curve);
+ }
+
+ float l = 1.0f;
+ tg = normalize_len(float4_to_float3(P_curve[1] - P_curve[0]), &l);
+
+ P = P + D * t;
+
+ float3 dif = P - float4_to_float3(P_curve[0]);
+
+# ifdef __UV__
+ sd->u = dot(dif, tg) / l;
+ sd->v = 0.0f;
+# endif
+
+ if (flag & CURVE_KN_TRUETANGENTGNORMAL) {
+ sd->Ng = -(D - tg * dot(tg, D));
+ sd->Ng = normalize(sd->Ng);
+ }
+ else {
+ float gd = isect->v;
+
+ /* direction from inside to surface of curve */
+ float denom = fmaxf(P_curve[0].w + sd->u * l * gd, 1e-8f);
+ sd->Ng = (dif - tg * sd->u * l) / denom;
+
+ /* adjustment for changing radius */
+ if (gd != 0.0f) {
+ sd->Ng = sd->Ng - gd * tg;
+ }
+
+ sd->Ng = normalize(sd->Ng);
+ }
+
+ sd->N = sd->Ng;
+ }
+
+# ifdef __DPDU__
+ /* dPdu/dPdv */
+ sd->dPdu = tg;
+ sd->dPdv = cross(tg, sd->Ng);
+# endif
+
+ if (isect->object != OBJECT_NONE) {
+# ifdef __OBJECT_MOTION__
+ Transform tfm = sd->ob_tfm;
+# else
+ Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
+# endif
+
+ P = transform_point(&tfm, P);
+ }
+
+ return P;
}
#endif
diff --git a/intern/cycles/kernel/geom/geom_motion_curve.h b/intern/cycles/kernel/geom/geom_motion_curve.h
index 5cc22ae2155..7380c506bf4 100644
--- a/intern/cycles/kernel/geom/geom_motion_curve.h
+++ b/intern/cycles/kernel/geom/geom_motion_curve.h
@@ -25,96 +25,116 @@ CCL_NAMESPACE_BEGIN
#ifdef __HAIR__
-ccl_device_inline int find_attribute_curve_motion(KernelGlobals *kg, int object, uint id, AttributeElement *elem)
+ccl_device_inline int find_attribute_curve_motion(KernelGlobals *kg,
+ int object,
+ uint id,
+ AttributeElement *elem)
{
- /* todo: find a better (faster) solution for this, maybe store offset per object.
- *
- * NOTE: currently it's not a bottleneck because in test scenes the loop below runs
- * zero iterations and rendering is really slow with motion curves. For until other
- * areas are speed up it's probably not so crucial to optimize this out.
- */
- uint attr_offset = object_attribute_map_offset(kg, object) + ATTR_PRIM_CURVE;
- uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
-
- while(attr_map.x != id) {
- attr_offset += ATTR_PRIM_TYPES;
- attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
- }
-
- *elem = (AttributeElement)attr_map.y;
-
- /* return result */
- return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z;
+ /* todo: find a better (faster) solution for this, maybe store offset per object.
+ *
+ * NOTE: currently it's not a bottleneck because in test scenes the loop below runs
+ * zero iterations and rendering is really slow with motion curves. For until other
+ * areas are speed up it's probably not so crucial to optimize this out.
+ */
+ uint attr_offset = object_attribute_map_offset(kg, object) + ATTR_PRIM_CURVE;
+ uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
+
+ while (attr_map.x != id) {
+ attr_offset += ATTR_PRIM_TYPES;
+ attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
+ }
+
+ *elem = (AttributeElement)attr_map.y;
+
+ /* return result */
+ return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z;
}
-ccl_device_inline void motion_curve_keys_for_step(KernelGlobals *kg, int offset, int numkeys, int numsteps, int step, int k0, int k1, float4 keys[2])
+ccl_device_inline void motion_curve_keys_for_step(KernelGlobals *kg,
+ int offset,
+ int numkeys,
+ int numsteps,
+ int step,
+ int k0,
+ int k1,
+ float4 keys[2])
{
- if(step == numsteps) {
- /* center step: regular key location */
- keys[0] = kernel_tex_fetch(__curve_keys, k0);
- keys[1] = kernel_tex_fetch(__curve_keys, k1);
- }
- else {
- /* center step is not stored in this array */
- if(step > numsteps)
- step--;
-
- offset += step*numkeys;
-
- keys[0] = kernel_tex_fetch(__attributes_float3, offset + k0);
- keys[1] = kernel_tex_fetch(__attributes_float3, offset + k1);
- }
+ if (step == numsteps) {
+ /* center step: regular key location */
+ keys[0] = kernel_tex_fetch(__curve_keys, k0);
+ keys[1] = kernel_tex_fetch(__curve_keys, k1);
+ }
+ else {
+ /* center step is not stored in this array */
+ if (step > numsteps)
+ step--;
+
+ offset += step * numkeys;
+
+ keys[0] = kernel_tex_fetch(__attributes_float3, offset + k0);
+ keys[1] = kernel_tex_fetch(__attributes_float3, offset + k1);
+ }
}
/* return 2 curve key locations */
-ccl_device_inline void motion_curve_keys(KernelGlobals *kg, int object, int prim, float time, int k0, int k1, float4 keys[2])
+ccl_device_inline void motion_curve_keys(
+ KernelGlobals *kg, int object, int prim, float time, int k0, int k1, float4 keys[2])
{
- /* get motion info */
- int numsteps, numkeys;
- object_motion_info(kg, object, &numsteps, NULL, &numkeys);
+ /* get motion info */
+ int numsteps, numkeys;
+ object_motion_info(kg, object, &numsteps, NULL, &numkeys);
- /* figure out which steps we need to fetch and their interpolation factor */
- int maxstep = numsteps*2;
- int step = min((int)(time*maxstep), maxstep-1);
- float t = time*maxstep - step;
+ /* figure out which steps we need to fetch and their interpolation factor */
+ int maxstep = numsteps * 2;
+ int step = min((int)(time * maxstep), maxstep - 1);
+ float t = time * maxstep - step;
- /* find attribute */
- AttributeElement elem;
- int offset = find_attribute_curve_motion(kg, object, ATTR_STD_MOTION_VERTEX_POSITION, &elem);
- kernel_assert(offset != ATTR_STD_NOT_FOUND);
+ /* find attribute */
+ AttributeElement elem;
+ int offset = find_attribute_curve_motion(kg, object, ATTR_STD_MOTION_VERTEX_POSITION, &elem);
+ kernel_assert(offset != ATTR_STD_NOT_FOUND);
- /* fetch key coordinates */
- float4 next_keys[2];
+ /* fetch key coordinates */
+ float4 next_keys[2];
- motion_curve_keys_for_step(kg, offset, numkeys, numsteps, step, k0, k1, keys);
- motion_curve_keys_for_step(kg, offset, numkeys, numsteps, step+1, k0, k1, next_keys);
+ motion_curve_keys_for_step(kg, offset, numkeys, numsteps, step, k0, k1, keys);
+ motion_curve_keys_for_step(kg, offset, numkeys, numsteps, step + 1, k0, k1, next_keys);
- /* interpolate between steps */
- keys[0] = (1.0f - t)*keys[0] + t*next_keys[0];
- keys[1] = (1.0f - t)*keys[1] + t*next_keys[1];
+ /* interpolate between steps */
+ keys[0] = (1.0f - t) * keys[0] + t * next_keys[0];
+ keys[1] = (1.0f - t) * keys[1] + t * next_keys[1];
}
-ccl_device_inline void motion_cardinal_curve_keys_for_step(KernelGlobals *kg, int offset, int numkeys, int numsteps, int step, int k0, int k1, int k2, int k3, float4 keys[4])
+ccl_device_inline void motion_cardinal_curve_keys_for_step(KernelGlobals *kg,
+ int offset,
+ int numkeys,
+ int numsteps,
+ int step,
+ int k0,
+ int k1,
+ int k2,
+ int k3,
+ float4 keys[4])
{
- if(step == numsteps) {
- /* center step: regular key location */
- keys[0] = kernel_tex_fetch(__curve_keys, k0);
- keys[1] = kernel_tex_fetch(__curve_keys, k1);
- keys[2] = kernel_tex_fetch(__curve_keys, k2);
- keys[3] = kernel_tex_fetch(__curve_keys, k3);
- }
- else {
- /* center step is not stored in this array */
- if(step > numsteps)
- step--;
-
- offset += step*numkeys;
-
- keys[0] = kernel_tex_fetch(__attributes_float3, offset + k0);
- keys[1] = kernel_tex_fetch(__attributes_float3, offset + k1);
- keys[2] = kernel_tex_fetch(__attributes_float3, offset + k2);
- keys[3] = kernel_tex_fetch(__attributes_float3, offset + k3);
- }
+ if (step == numsteps) {
+ /* center step: regular key location */
+ keys[0] = kernel_tex_fetch(__curve_keys, k0);
+ keys[1] = kernel_tex_fetch(__curve_keys, k1);
+ keys[2] = kernel_tex_fetch(__curve_keys, k2);
+ keys[3] = kernel_tex_fetch(__curve_keys, k3);
+ }
+ else {
+ /* center step is not stored in this array */
+ if (step > numsteps)
+ step--;
+
+ offset += step * numkeys;
+
+ keys[0] = kernel_tex_fetch(__attributes_float3, offset + k0);
+ keys[1] = kernel_tex_fetch(__attributes_float3, offset + k1);
+ keys[2] = kernel_tex_fetch(__attributes_float3, offset + k2);
+ keys[3] = kernel_tex_fetch(__attributes_float3, offset + k3);
+ }
}
/* return 2 curve key locations */
@@ -122,37 +142,41 @@ ccl_device_inline void motion_cardinal_curve_keys(KernelGlobals *kg,
int object,
int prim,
float time,
- int k0, int k1, int k2, int k3,
+ int k0,
+ int k1,
+ int k2,
+ int k3,
float4 keys[4])
{
- /* get motion info */
- int numsteps, numkeys;
- object_motion_info(kg, object, &numsteps, NULL, &numkeys);
-
- /* figure out which steps we need to fetch and their interpolation factor */
- int maxstep = numsteps*2;
- int step = min((int)(time*maxstep), maxstep-1);
- float t = time*maxstep - step;
-
- /* find attribute */
- AttributeElement elem;
- int offset = find_attribute_curve_motion(kg, object, ATTR_STD_MOTION_VERTEX_POSITION, &elem);
- kernel_assert(offset != ATTR_STD_NOT_FOUND);
-
- /* fetch key coordinates */
- float4 next_keys[4];
-
- motion_cardinal_curve_keys_for_step(kg, offset, numkeys, numsteps, step, k0, k1, k2, k3, keys);
- motion_cardinal_curve_keys_for_step(kg, offset, numkeys, numsteps, step+1, k0, k1, k2, k3, next_keys);
-
- /* interpolate between steps */
- keys[0] = (1.0f - t)*keys[0] + t*next_keys[0];
- keys[1] = (1.0f - t)*keys[1] + t*next_keys[1];
- keys[2] = (1.0f - t)*keys[2] + t*next_keys[2];
- keys[3] = (1.0f - t)*keys[3] + t*next_keys[3];
+ /* get motion info */
+ int numsteps, numkeys;
+ object_motion_info(kg, object, &numsteps, NULL, &numkeys);
+
+ /* figure out which steps we need to fetch and their interpolation factor */
+ int maxstep = numsteps * 2;
+ int step = min((int)(time * maxstep), maxstep - 1);
+ float t = time * maxstep - step;
+
+ /* find attribute */
+ AttributeElement elem;
+ int offset = find_attribute_curve_motion(kg, object, ATTR_STD_MOTION_VERTEX_POSITION, &elem);
+ kernel_assert(offset != ATTR_STD_NOT_FOUND);
+
+ /* fetch key coordinates */
+ float4 next_keys[4];
+
+ motion_cardinal_curve_keys_for_step(kg, offset, numkeys, numsteps, step, k0, k1, k2, k3, keys);
+ motion_cardinal_curve_keys_for_step(
+ kg, offset, numkeys, numsteps, step + 1, k0, k1, k2, k3, next_keys);
+
+ /* interpolate between steps */
+ keys[0] = (1.0f - t) * keys[0] + t * next_keys[0];
+ keys[1] = (1.0f - t) * keys[1] + t * next_keys[1];
+ keys[2] = (1.0f - t) * keys[2] + t * next_keys[2];
+ keys[3] = (1.0f - t) * keys[3] + t * next_keys[3];
}
-#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
+# if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
/* Similar to above, but returns keys as pair of two AVX registers with each
* holding two float4.
*/
@@ -160,56 +184,44 @@ ccl_device_inline void motion_cardinal_curve_keys_avx(KernelGlobals *kg,
int object,
int prim,
float time,
- int k0, int k1,
- int k2, int k3,
+ int k0,
+ int k1,
+ int k2,
+ int k3,
avxf *out_keys_0_1,
avxf *out_keys_2_3)
{
- /* Get motion info. */
- int numsteps, numkeys;
- object_motion_info(kg, object, &numsteps, NULL, &numkeys);
-
- /* Figure out which steps we need to fetch and their interpolation factor. */
- int maxstep = numsteps * 2;
- int step = min((int)(time*maxstep), maxstep - 1);
- float t = time*maxstep - step;
-
- /* Find attribute. */
- AttributeElement elem;
- int offset = find_attribute_curve_motion(kg,
- object,
- ATTR_STD_MOTION_VERTEX_POSITION,
- &elem);
- kernel_assert(offset != ATTR_STD_NOT_FOUND);
-
- /* Fetch key coordinates. */
- float4 next_keys[4];
- float4 keys[4];
- motion_cardinal_curve_keys_for_step(kg,
- offset,
- numkeys,
- numsteps,
- step,
- k0, k1, k2, k3,
- keys);
- motion_cardinal_curve_keys_for_step(kg,
- offset,
- numkeys,
- numsteps,
- step + 1,
- k0, k1, k2, k3,
- next_keys);
-
- const avxf keys_0_1 = avxf(keys[0].m128, keys[1].m128);
- const avxf keys_2_3 = avxf(keys[2].m128, keys[3].m128);
- const avxf next_keys_0_1 = avxf(next_keys[0].m128, next_keys[1].m128);
- const avxf next_keys_2_3 = avxf(next_keys[2].m128, next_keys[3].m128);
-
- /* Interpolate between steps. */
- *out_keys_0_1 = (1.0f - t) * keys_0_1 + t*next_keys_0_1;
- *out_keys_2_3 = (1.0f - t) * keys_2_3 + t*next_keys_2_3;
+ /* Get motion info. */
+ int numsteps, numkeys;
+ object_motion_info(kg, object, &numsteps, NULL, &numkeys);
+
+ /* Figure out which steps we need to fetch and their interpolation factor. */
+ int maxstep = numsteps * 2;
+ int step = min((int)(time * maxstep), maxstep - 1);
+ float t = time * maxstep - step;
+
+ /* Find attribute. */
+ AttributeElement elem;
+ int offset = find_attribute_curve_motion(kg, object, ATTR_STD_MOTION_VERTEX_POSITION, &elem);
+ kernel_assert(offset != ATTR_STD_NOT_FOUND);
+
+ /* Fetch key coordinates. */
+ float4 next_keys[4];
+ float4 keys[4];
+ motion_cardinal_curve_keys_for_step(kg, offset, numkeys, numsteps, step, k0, k1, k2, k3, keys);
+ motion_cardinal_curve_keys_for_step(
+ kg, offset, numkeys, numsteps, step + 1, k0, k1, k2, k3, next_keys);
+
+ const avxf keys_0_1 = avxf(keys[0].m128, keys[1].m128);
+ const avxf keys_2_3 = avxf(keys[2].m128, keys[3].m128);
+ const avxf next_keys_0_1 = avxf(next_keys[0].m128, next_keys[1].m128);
+ const avxf next_keys_2_3 = avxf(next_keys[2].m128, next_keys[3].m128);
+
+ /* Interpolate between steps. */
+ *out_keys_0_1 = (1.0f - t) * keys_0_1 + t * next_keys_0_1;
+ *out_keys_2_3 = (1.0f - t) * keys_2_3 + t * next_keys_2_3;
}
-#endif
+# endif
#endif
diff --git a/intern/cycles/kernel/geom/geom_motion_triangle.h b/intern/cycles/kernel/geom/geom_motion_triangle.h
index 64f6d027b99..53d6b92dd7e 100644
--- a/intern/cycles/kernel/geom/geom_motion_triangle.h
+++ b/intern/cycles/kernel/geom/geom_motion_triangle.h
@@ -29,127 +29,145 @@ CCL_NAMESPACE_BEGIN
/* Time interpolation of vertex positions and normals */
-ccl_device_inline int find_attribute_motion(KernelGlobals *kg, int object, uint id, AttributeElement *elem)
+ccl_device_inline int find_attribute_motion(KernelGlobals *kg,
+ int object,
+ uint id,
+ AttributeElement *elem)
{
- /* todo: find a better (faster) solution for this, maybe store offset per object */
- uint attr_offset = object_attribute_map_offset(kg, object);
- uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
+ /* todo: find a better (faster) solution for this, maybe store offset per object */
+ uint attr_offset = object_attribute_map_offset(kg, object);
+ uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
- while(attr_map.x != id) {
- attr_offset += ATTR_PRIM_TYPES;
- attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
- }
+ while (attr_map.x != id) {
+ attr_offset += ATTR_PRIM_TYPES;
+ attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
+ }
- *elem = (AttributeElement)attr_map.y;
+ *elem = (AttributeElement)attr_map.y;
- /* return result */
- return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z;
+ /* return result */
+ return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z;
}
-ccl_device_inline void motion_triangle_verts_for_step(KernelGlobals *kg, uint4 tri_vindex, int offset, int numverts, int numsteps, int step, float3 verts[3])
+ccl_device_inline void motion_triangle_verts_for_step(KernelGlobals *kg,
+ uint4 tri_vindex,
+ int offset,
+ int numverts,
+ int numsteps,
+ int step,
+ float3 verts[3])
{
- if(step == numsteps) {
- /* center step: regular vertex location */
- verts[0] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+0));
- verts[1] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+1));
- verts[2] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+2));
- }
- else {
- /* center step not store in this array */
- if(step > numsteps)
- step--;
-
- offset += step*numverts;
-
- verts[0] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.x));
- verts[1] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.y));
- verts[2] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.z));
- }
+ if (step == numsteps) {
+ /* center step: regular vertex location */
+ verts[0] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 0));
+ verts[1] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 1));
+ verts[2] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 2));
+ }
+ else {
+ /* center step not store in this array */
+ if (step > numsteps)
+ step--;
+
+ offset += step * numverts;
+
+ verts[0] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.x));
+ verts[1] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.y));
+ verts[2] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.z));
+ }
}
-ccl_device_inline void motion_triangle_normals_for_step(KernelGlobals *kg, uint4 tri_vindex, int offset, int numverts, int numsteps, int step, float3 normals[3])
+ccl_device_inline void motion_triangle_normals_for_step(KernelGlobals *kg,
+ uint4 tri_vindex,
+ int offset,
+ int numverts,
+ int numsteps,
+ int step,
+ float3 normals[3])
{
- if(step == numsteps) {
- /* center step: regular vertex location */
- normals[0] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.x));
- normals[1] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.y));
- normals[2] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.z));
- }
- else {
- /* center step is not stored in this array */
- if(step > numsteps)
- step--;
-
- offset += step*numverts;
-
- normals[0] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.x));
- normals[1] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.y));
- normals[2] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.z));
- }
+ if (step == numsteps) {
+ /* center step: regular vertex location */
+ normals[0] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.x));
+ normals[1] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.y));
+ normals[2] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.z));
+ }
+ else {
+ /* center step is not stored in this array */
+ if (step > numsteps)
+ step--;
+
+ offset += step * numverts;
+
+ normals[0] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.x));
+ normals[1] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.y));
+ normals[2] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.z));
+ }
}
-ccl_device_inline void motion_triangle_vertices(KernelGlobals *kg, int object, int prim, float time, float3 verts[3])
+ccl_device_inline void motion_triangle_vertices(
+ KernelGlobals *kg, int object, int prim, float time, float3 verts[3])
{
- /* get motion info */
- int numsteps, numverts;
- object_motion_info(kg, object, &numsteps, &numverts, NULL);
-
- /* figure out which steps we need to fetch and their interpolation factor */
- int maxstep = numsteps*2;
- int step = min((int)(time*maxstep), maxstep-1);
- float t = time*maxstep - step;
-
- /* find attribute */
- AttributeElement elem;
- int offset = find_attribute_motion(kg, object, ATTR_STD_MOTION_VERTEX_POSITION, &elem);
- kernel_assert(offset != ATTR_STD_NOT_FOUND);
-
- /* fetch vertex coordinates */
- float3 next_verts[3];
- uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
-
- motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts);
- motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step+1, next_verts);
-
- /* interpolate between steps */
- verts[0] = (1.0f - t)*verts[0] + t*next_verts[0];
- verts[1] = (1.0f - t)*verts[1] + t*next_verts[1];
- verts[2] = (1.0f - t)*verts[2] + t*next_verts[2];
+ /* get motion info */
+ int numsteps, numverts;
+ object_motion_info(kg, object, &numsteps, &numverts, NULL);
+
+ /* figure out which steps we need to fetch and their interpolation factor */
+ int maxstep = numsteps * 2;
+ int step = min((int)(time * maxstep), maxstep - 1);
+ float t = time * maxstep - step;
+
+ /* find attribute */
+ AttributeElement elem;
+ int offset = find_attribute_motion(kg, object, ATTR_STD_MOTION_VERTEX_POSITION, &elem);
+ kernel_assert(offset != ATTR_STD_NOT_FOUND);
+
+ /* fetch vertex coordinates */
+ float3 next_verts[3];
+ uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
+
+ motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts);
+ motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step + 1, next_verts);
+
+ /* interpolate between steps */
+ verts[0] = (1.0f - t) * verts[0] + t * next_verts[0];
+ verts[1] = (1.0f - t) * verts[1] + t * next_verts[1];
+ verts[2] = (1.0f - t) * verts[2] + t * next_verts[2];
}
-ccl_device_inline float3 motion_triangle_smooth_normal(KernelGlobals *kg, float3 Ng, int object, int prim, float u, float v, float time)
+ccl_device_inline float3 motion_triangle_smooth_normal(
+ KernelGlobals *kg, float3 Ng, int object, int prim, float u, float v, float time)
{
- /* get motion info */
- int numsteps, numverts;
- object_motion_info(kg, object, &numsteps, &numverts, NULL);
-
- /* figure out which steps we need to fetch and their interpolation factor */
- int maxstep = numsteps*2;
- int step = min((int)(time*maxstep), maxstep-1);
- float t = time*maxstep - step;
-
- /* find attribute */
- AttributeElement elem;
- int offset = find_attribute_motion(kg, object, ATTR_STD_MOTION_VERTEX_NORMAL, &elem);
- kernel_assert(offset != ATTR_STD_NOT_FOUND);
-
- /* fetch normals */
- float3 normals[3], next_normals[3];
- uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
-
- motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step, normals);
- motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step+1, next_normals);
-
- /* interpolate between steps */
- normals[0] = (1.0f - t)*normals[0] + t*next_normals[0];
- normals[1] = (1.0f - t)*normals[1] + t*next_normals[1];
- normals[2] = (1.0f - t)*normals[2] + t*next_normals[2];
-
- /* interpolate between vertices */
- float w = 1.0f - u - v;
- float3 N = safe_normalize(u*normals[0] + v*normals[1] + w*normals[2]);
-
- return is_zero(N)? Ng: N;
+ /* get motion info */
+ int numsteps, numverts;
+ object_motion_info(kg, object, &numsteps, &numverts, NULL);
+
+ /* figure out which steps we need to fetch and their interpolation factor */
+ int maxstep = numsteps * 2;
+ int step = min((int)(time * maxstep), maxstep - 1);
+ float t = time * maxstep - step;
+
+ /* find attribute */
+ AttributeElement elem;
+ int offset = find_attribute_motion(kg, object, ATTR_STD_MOTION_VERTEX_NORMAL, &elem);
+ kernel_assert(offset != ATTR_STD_NOT_FOUND);
+
+ /* fetch normals */
+ float3 normals[3], next_normals[3];
+ uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
+
+ motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step, normals);
+ motion_triangle_normals_for_step(
+ kg, tri_vindex, offset, numverts, numsteps, step + 1, next_normals);
+
+ /* interpolate between steps */
+ normals[0] = (1.0f - t) * normals[0] + t * next_normals[0];
+ normals[1] = (1.0f - t) * normals[1] + t * next_normals[1];
+ normals[2] = (1.0f - t) * normals[2] + t * next_normals[2];
+
+ /* interpolate between vertices */
+ float w = 1.0f - u - v;
+ float3 N = safe_normalize(u * normals[0] + v * normals[1] + w * normals[2]);
+
+ return is_zero(N) ? Ng : N;
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h b/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h
index ec7bfad7349..49d4829af38 100644
--- a/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h
+++ b/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h
@@ -32,64 +32,57 @@ CCL_NAMESPACE_BEGIN
* a closer distance.
*/
-ccl_device_inline float3 motion_triangle_refine(KernelGlobals *kg,
- ShaderData *sd,
- const Intersection *isect,
- const Ray *ray,
- float3 verts[3])
+ccl_device_inline float3 motion_triangle_refine(
+ KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray, float3 verts[3])
{
- float3 P = ray->P;
- float3 D = ray->D;
- float t = isect->t;
+ float3 P = ray->P;
+ float3 D = ray->D;
+ float t = isect->t;
#ifdef __INTERSECTION_REFINE__
- if(isect->object != OBJECT_NONE) {
- if(UNLIKELY(t == 0.0f)) {
- return P;
- }
+ if (isect->object != OBJECT_NONE) {
+ if (UNLIKELY(t == 0.0f)) {
+ return P;
+ }
# ifdef __OBJECT_MOTION__
- Transform tfm = sd->ob_itfm;
+ Transform tfm = sd->ob_itfm;
# else
- Transform tfm = object_fetch_transform(kg,
- isect->object,
- OBJECT_INVERSE_TRANSFORM);
+ Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
# endif
- P = transform_point(&tfm, P);
- D = transform_direction(&tfm, D*t);
- D = normalize_len(D, &t);
- }
+ P = transform_point(&tfm, P);
+ D = transform_direction(&tfm, D * t);
+ D = normalize_len(D, &t);
+ }
- P = P + D*t;
+ P = P + D * t;
- /* Compute refined intersection distance. */
- const float3 e1 = verts[0] - verts[2];
- const float3 e2 = verts[1] - verts[2];
- const float3 s1 = cross(D, e2);
+ /* Compute refined intersection distance. */
+ const float3 e1 = verts[0] - verts[2];
+ const float3 e2 = verts[1] - verts[2];
+ const float3 s1 = cross(D, e2);
- const float invdivisor = 1.0f/dot(s1, e1);
- const float3 d = P - verts[2];
- const float3 s2 = cross(d, e1);
- float rt = dot(e2, s2)*invdivisor;
+ const float invdivisor = 1.0f / dot(s1, e1);
+ const float3 d = P - verts[2];
+ const float3 s2 = cross(d, e1);
+ float rt = dot(e2, s2) * invdivisor;
- /* Compute refined position. */
- P = P + D*rt;
+ /* Compute refined position. */
+ P = P + D * rt;
- if(isect->object != OBJECT_NONE) {
+ if (isect->object != OBJECT_NONE) {
# ifdef __OBJECT_MOTION__
- Transform tfm = sd->ob_tfm;
+ Transform tfm = sd->ob_tfm;
# else
- Transform tfm = object_fetch_transform(kg,
- isect->object,
- OBJECT_TRANSFORM);
+ Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
# endif
- P = transform_point(&tfm, P);
- }
+ P = transform_point(&tfm, P);
+ }
- return P;
+ return P;
#else
- return P + D*t;
+ return P + D * t;
#endif
}
@@ -103,116 +96,112 @@ ccl_device_noinline
# else
ccl_device_inline
# endif
-float3 motion_triangle_refine_local(KernelGlobals *kg,
- ShaderData *sd,
- const Intersection *isect,
- const Ray *ray,
- float3 verts[3])
+ float3
+ motion_triangle_refine_local(KernelGlobals *kg,
+ ShaderData *sd,
+ const Intersection *isect,
+ const Ray *ray,
+ float3 verts[3])
{
- float3 P = ray->P;
- float3 D = ray->D;
- float t = isect->t;
+ float3 P = ray->P;
+ float3 D = ray->D;
+ float t = isect->t;
# ifdef __INTERSECTION_REFINE__
- if(isect->object != OBJECT_NONE) {
+ if (isect->object != OBJECT_NONE) {
# ifdef __OBJECT_MOTION__
- Transform tfm = sd->ob_itfm;
+ Transform tfm = sd->ob_itfm;
# else
- Transform tfm = object_fetch_transform(kg,
- isect->object,
- OBJECT_INVERSE_TRANSFORM);
+ Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
# endif
- P = transform_point(&tfm, P);
- D = transform_direction(&tfm, D);
- D = normalize(D);
- }
+ P = transform_point(&tfm, P);
+ D = transform_direction(&tfm, D);
+ D = normalize(D);
+ }
- P = P + D*t;
+ P = P + D * t;
- /* compute refined intersection distance */
- const float3 e1 = verts[0] - verts[2];
- const float3 e2 = verts[1] - verts[2];
- const float3 s1 = cross(D, e2);
+ /* compute refined intersection distance */
+ const float3 e1 = verts[0] - verts[2];
+ const float3 e2 = verts[1] - verts[2];
+ const float3 s1 = cross(D, e2);
- const float invdivisor = 1.0f/dot(s1, e1);
- const float3 d = P - verts[2];
- const float3 s2 = cross(d, e1);
- float rt = dot(e2, s2)*invdivisor;
+ const float invdivisor = 1.0f / dot(s1, e1);
+ const float3 d = P - verts[2];
+ const float3 s2 = cross(d, e1);
+ float rt = dot(e2, s2) * invdivisor;
- P = P + D*rt;
+ P = P + D * rt;
- if(isect->object != OBJECT_NONE) {
+ if (isect->object != OBJECT_NONE) {
# ifdef __OBJECT_MOTION__
- Transform tfm = sd->ob_tfm;
+ Transform tfm = sd->ob_tfm;
# else
- Transform tfm = object_fetch_transform(kg,
- isect->object,
- OBJECT_TRANSFORM);
+ Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
# endif
- P = transform_point(&tfm, P);
- }
+ P = transform_point(&tfm, P);
+ }
- return P;
+ return P;
# else /* __INTERSECTION_REFINE__ */
- return P + D*t;
-# endif /* __INTERSECTION_REFINE__ */
+ return P + D * t;
+# endif /* __INTERSECTION_REFINE__ */
}
-#endif /* __BVH_LOCAL__ */
-
+#endif /* __BVH_LOCAL__ */
/* Ray intersection. We simply compute the vertex positions at the given ray
* time and do a ray intersection with the resulting triangle.
*/
-ccl_device_inline bool motion_triangle_intersect(
- KernelGlobals *kg,
- Intersection *isect,
- float3 P,
- float3 dir,
- float time,
- uint visibility,
- int object,
- int prim_addr)
+ccl_device_inline bool motion_triangle_intersect(KernelGlobals *kg,
+ Intersection *isect,
+ float3 P,
+ float3 dir,
+ float time,
+ uint visibility,
+ int object,
+ int prim_addr)
{
- /* Primitive index for vertex location lookup. */
- int prim = kernel_tex_fetch(__prim_index, prim_addr);
- int fobject = (object == OBJECT_NONE)
- ? kernel_tex_fetch(__prim_object, prim_addr)
- : object;
- /* Get vertex locations for intersection. */
- float3 verts[3];
- motion_triangle_vertices(kg, fobject, prim, time, verts);
- /* Ray-triangle intersection, unoptimized. */
- float t, u, v;
- if(ray_triangle_intersect(P,
- dir,
- isect->t,
+ /* Primitive index for vertex location lookup. */
+ int prim = kernel_tex_fetch(__prim_index, prim_addr);
+ int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, prim_addr) : object;
+ /* Get vertex locations for intersection. */
+ float3 verts[3];
+ motion_triangle_vertices(kg, fobject, prim, time, verts);
+ /* Ray-triangle intersection, unoptimized. */
+ float t, u, v;
+ if (ray_triangle_intersect(P,
+ dir,
+ isect->t,
#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
- (ssef*)verts,
+ (ssef *)verts,
#else
- verts[0], verts[1], verts[2],
+ verts[0],
+ verts[1],
+ verts[2],
#endif
- &u, &v, &t))
- {
+ &u,
+ &v,
+ &t)) {
#ifdef __VISIBILITY_FLAG__
- /* Visibility flag test. we do it here under the assumption
- * that most triangles are culled by node flags.
- */
- if(kernel_tex_fetch(__prim_visibility, prim_addr) & visibility)
+ /* Visibility flag test. we do it here under the assumption
+ * that most triangles are culled by node flags.
+ */
+ if (kernel_tex_fetch(__prim_visibility, prim_addr) & visibility)
#endif
- {
- isect->t = t;
- isect->u = u;
- isect->v = v;
- isect->prim = prim_addr;
- isect->object = object;
- isect->type = PRIMITIVE_MOTION_TRIANGLE;
- return true;
- }
- }
- return false;
+ {
+ isect->t = t;
+ isect->u = u;
+ isect->v = v;
+ isect->prim = prim_addr;
+ isect->object = object;
+ isect->type = PRIMITIVE_MOTION_TRIANGLE;
+ return true;
+ }
+ }
+ return false;
}
/* Special ray intersection routines for local intersections. In that case we
@@ -221,101 +210,102 @@ ccl_device_inline bool motion_triangle_intersect(
* Returns whether traversal should be stopped.
*/
#ifdef __BVH_LOCAL__
-ccl_device_inline bool motion_triangle_intersect_local(
- KernelGlobals *kg,
- LocalIntersection *local_isect,
- float3 P,
- float3 dir,
- float time,
- int object,
- int local_object,
- int prim_addr,
- float tmax,
- uint *lcg_state,
- int max_hits)
+ccl_device_inline bool motion_triangle_intersect_local(KernelGlobals *kg,
+ LocalIntersection *local_isect,
+ float3 P,
+ float3 dir,
+ float time,
+ int object,
+ int local_object,
+ int prim_addr,
+ float tmax,
+ uint *lcg_state,
+ int max_hits)
{
- /* Only intersect with matching object, for instanced objects we
- * already know we are only intersecting the right object. */
- if(object == OBJECT_NONE) {
- if(kernel_tex_fetch(__prim_object, prim_addr) != local_object) {
- return false;
- }
- }
-
- /* Primitive index for vertex location lookup. */
- int prim = kernel_tex_fetch(__prim_index, prim_addr);
- /* Get vertex locations for intersection. */
- float3 verts[3];
- motion_triangle_vertices(kg, local_object, prim, time, verts);
- /* Ray-triangle intersection, unoptimized. */
- float t, u, v;
- if(!ray_triangle_intersect(P,
- dir,
- tmax,
-#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
- (ssef*)verts,
-#else
- verts[0], verts[1], verts[2],
-#endif
- &u, &v, &t))
- {
- return false;
- }
-
- /* If no actual hit information is requested, just return here. */
- if(max_hits == 0) {
- return true;
- }
-
- int hit;
- if(lcg_state) {
- /* Record up to max_hits intersections. */
- for(int i = min(max_hits, local_isect->num_hits) - 1; i >= 0; --i) {
- if(local_isect->hits[i].t == t) {
- return false;
- }
- }
-
- local_isect->num_hits++;
-
- if(local_isect->num_hits <= max_hits) {
- hit = local_isect->num_hits - 1;
- }
- else {
- /* Reservoir sampling: if we are at the maximum number of
- * hits, randomly replace element or skip it.
- */
- hit = lcg_step_uint(lcg_state) % local_isect->num_hits;
-
- if(hit >= max_hits)
- return false;
- }
- }
- else {
- /* Record closest intersection only. */
- if(local_isect->num_hits && t > local_isect->hits[0].t) {
- return false;
- }
-
- hit = 0;
- local_isect->num_hits = 1;
- }
-
- /* Record intersection. */
- Intersection *isect = &local_isect->hits[hit];
- isect->t = t;
- isect->u = u;
- isect->v = v;
- isect->prim = prim_addr;
- isect->object = object;
- isect->type = PRIMITIVE_MOTION_TRIANGLE;
-
- /* Record geometric normal. */
- local_isect->Ng[hit] = normalize(cross(verts[1] - verts[0],
- verts[2] - verts[0]));
-
- return false;
+ /* Only intersect with matching object, for instanced objects we
+ * already know we are only intersecting the right object. */
+ if (object == OBJECT_NONE) {
+ if (kernel_tex_fetch(__prim_object, prim_addr) != local_object) {
+ return false;
+ }
+ }
+
+ /* Primitive index for vertex location lookup. */
+ int prim = kernel_tex_fetch(__prim_index, prim_addr);
+ /* Get vertex locations for intersection. */
+ float3 verts[3];
+ motion_triangle_vertices(kg, local_object, prim, time, verts);
+ /* Ray-triangle intersection, unoptimized. */
+ float t, u, v;
+ if (!ray_triangle_intersect(P,
+ dir,
+ tmax,
+# if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
+ (ssef *)verts,
+# else
+ verts[0],
+ verts[1],
+ verts[2],
+# endif
+ &u,
+ &v,
+ &t)) {
+ return false;
+ }
+
+ /* If no actual hit information is requested, just return here. */
+ if (max_hits == 0) {
+ return true;
+ }
+
+ int hit;
+ if (lcg_state) {
+ /* Record up to max_hits intersections. */
+ for (int i = min(max_hits, local_isect->num_hits) - 1; i >= 0; --i) {
+ if (local_isect->hits[i].t == t) {
+ return false;
+ }
+ }
+
+ local_isect->num_hits++;
+
+ if (local_isect->num_hits <= max_hits) {
+ hit = local_isect->num_hits - 1;
+ }
+ else {
+ /* Reservoir sampling: if we are at the maximum number of
+ * hits, randomly replace element or skip it.
+ */
+ hit = lcg_step_uint(lcg_state) % local_isect->num_hits;
+
+ if (hit >= max_hits)
+ return false;
+ }
+ }
+ else {
+ /* Record closest intersection only. */
+ if (local_isect->num_hits && t > local_isect->hits[0].t) {
+ return false;
+ }
+
+ hit = 0;
+ local_isect->num_hits = 1;
+ }
+
+ /* Record intersection. */
+ Intersection *isect = &local_isect->hits[hit];
+ isect->t = t;
+ isect->u = u;
+ isect->v = v;
+ isect->prim = prim_addr;
+ isect->object = object;
+ isect->type = PRIMITIVE_MOTION_TRIANGLE;
+
+ /* Record geometric normal. */
+ local_isect->Ng[hit] = normalize(cross(verts[1] - verts[0], verts[2] - verts[0]));
+
+ return false;
}
-#endif /* __BVH_LOCAL__ */
+#endif /* __BVH_LOCAL__ */
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_motion_triangle_shader.h b/intern/cycles/kernel/geom/geom_motion_triangle_shader.h
index e91a4be96ba..5333e82b346 100644
--- a/intern/cycles/kernel/geom/geom_motion_triangle_shader.h
+++ b/intern/cycles/kernel/geom/geom_motion_triangle_shader.h
@@ -32,91 +32,80 @@ CCL_NAMESPACE_BEGIN
* normals */
/* return 3 triangle vertex normals */
-ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals *kg,
- ShaderData *sd, const
- Intersection *isect,
- const Ray *ray,
- bool is_local)
+ccl_device_noinline void motion_triangle_shader_setup(
+ KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray, bool is_local)
{
- /* Get shader. */
- sd->shader = kernel_tex_fetch(__tri_shader, sd->prim);
- /* Get motion info. */
- /* TODO(sergey): This logic is really similar to motion_triangle_vertices(),
- * can we de-duplicate something here?
- */
- int numsteps, numverts;
- object_motion_info(kg, sd->object, &numsteps, &numverts, NULL);
- /* Figure out which steps we need to fetch and their interpolation factor. */
- int maxstep = numsteps*2;
- int step = min((int)(sd->time*maxstep), maxstep-1);
- float t = sd->time*maxstep - step;
- /* Find attribute. */
- AttributeElement elem;
- int offset = find_attribute_motion(kg, sd->object,
- ATTR_STD_MOTION_VERTEX_POSITION,
- &elem);
- kernel_assert(offset != ATTR_STD_NOT_FOUND);
- /* Fetch vertex coordinates. */
- float3 verts[3], next_verts[3];
- uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
- motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts);
- motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step+1, next_verts);
- /* Interpolate between steps. */
- verts[0] = (1.0f - t)*verts[0] + t*next_verts[0];
- verts[1] = (1.0f - t)*verts[1] + t*next_verts[1];
- verts[2] = (1.0f - t)*verts[2] + t*next_verts[2];
- /* Compute refined position. */
+ /* Get shader. */
+ sd->shader = kernel_tex_fetch(__tri_shader, sd->prim);
+ /* Get motion info. */
+ /* TODO(sergey): This logic is really similar to motion_triangle_vertices(),
+ * can we de-duplicate something here?
+ */
+ int numsteps, numverts;
+ object_motion_info(kg, sd->object, &numsteps, &numverts, NULL);
+ /* Figure out which steps we need to fetch and their interpolation factor. */
+ int maxstep = numsteps * 2;
+ int step = min((int)(sd->time * maxstep), maxstep - 1);
+ float t = sd->time * maxstep - step;
+ /* Find attribute. */
+ AttributeElement elem;
+ int offset = find_attribute_motion(kg, sd->object, ATTR_STD_MOTION_VERTEX_POSITION, &elem);
+ kernel_assert(offset != ATTR_STD_NOT_FOUND);
+ /* Fetch vertex coordinates. */
+ float3 verts[3], next_verts[3];
+ uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
+ motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts);
+ motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step + 1, next_verts);
+ /* Interpolate between steps. */
+ verts[0] = (1.0f - t) * verts[0] + t * next_verts[0];
+ verts[1] = (1.0f - t) * verts[1] + t * next_verts[1];
+ verts[2] = (1.0f - t) * verts[2] + t * next_verts[2];
+ /* Compute refined position. */
#ifdef __BVH_LOCAL__
- if(is_local) {
- sd->P = motion_triangle_refine_local(kg,
- sd,
- isect,
- ray,
- verts);
- }
- else
-#endif /* __BVH_LOCAL__*/
- {
- sd->P = motion_triangle_refine(kg, sd, isect, ray, verts);
- }
- /* Compute face normal. */
- float3 Ng;
- if(sd->object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) {
- Ng = normalize(cross(verts[2] - verts[0], verts[1] - verts[0]));
- }
- else {
- Ng = normalize(cross(verts[1] - verts[0], verts[2] - verts[0]));
- }
- sd->Ng = Ng;
- sd->N = Ng;
- /* Compute derivatives of P w.r.t. uv. */
+ if (is_local) {
+ sd->P = motion_triangle_refine_local(kg, sd, isect, ray, verts);
+ }
+ else
+#endif /* __BVH_LOCAL__*/
+ {
+ sd->P = motion_triangle_refine(kg, sd, isect, ray, verts);
+ }
+ /* Compute face normal. */
+ float3 Ng;
+ if (sd->object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) {
+ Ng = normalize(cross(verts[2] - verts[0], verts[1] - verts[0]));
+ }
+ else {
+ Ng = normalize(cross(verts[1] - verts[0], verts[2] - verts[0]));
+ }
+ sd->Ng = Ng;
+ sd->N = Ng;
+ /* Compute derivatives of P w.r.t. uv. */
#ifdef __DPDU__
- sd->dPdu = (verts[0] - verts[2]);
- sd->dPdv = (verts[1] - verts[2]);
+ sd->dPdu = (verts[0] - verts[2]);
+ sd->dPdv = (verts[1] - verts[2]);
#endif
- /* Compute smooth normal. */
- if(sd->shader & SHADER_SMOOTH_NORMAL) {
- /* Find attribute. */
- AttributeElement elem;
- int offset = find_attribute_motion(kg,
- sd->object,
- ATTR_STD_MOTION_VERTEX_NORMAL,
- &elem);
- kernel_assert(offset != ATTR_STD_NOT_FOUND);
- /* Fetch vertex coordinates. */
- float3 normals[3], next_normals[3];
- motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step, normals);
- motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step+1, next_normals);
- /* Interpolate between steps. */
- normals[0] = (1.0f - t)*normals[0] + t*next_normals[0];
- normals[1] = (1.0f - t)*normals[1] + t*next_normals[1];
- normals[2] = (1.0f - t)*normals[2] + t*next_normals[2];
- /* Interpolate between vertices. */
- float u = sd->u;
- float v = sd->v;
- float w = 1.0f - u - v;
- sd->N = (u*normals[0] + v*normals[1] + w*normals[2]);
- }
+ /* Compute smooth normal. */
+ if (sd->shader & SHADER_SMOOTH_NORMAL) {
+ /* Find attribute. */
+ AttributeElement elem;
+ int offset = find_attribute_motion(kg, sd->object, ATTR_STD_MOTION_VERTEX_NORMAL, &elem);
+ kernel_assert(offset != ATTR_STD_NOT_FOUND);
+ /* Fetch vertex coordinates. */
+ float3 normals[3], next_normals[3];
+ motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step, normals);
+ motion_triangle_normals_for_step(
+ kg, tri_vindex, offset, numverts, numsteps, step + 1, next_normals);
+ /* Interpolate between steps. */
+ normals[0] = (1.0f - t) * normals[0] + t * next_normals[0];
+ normals[1] = (1.0f - t) * normals[1] + t * next_normals[1];
+ normals[2] = (1.0f - t) * normals[2] + t * next_normals[2];
+ /* Interpolate between vertices. */
+ float u = sd->u;
+ float v = sd->v;
+ float w = 1.0f - u - v;
+ sd->N = (u * normals[0] + v * normals[1] + w * normals[2]);
+ }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_object.h b/intern/cycles/kernel/geom/geom_object.h
index 669c932d720..2792fd64c61 100644
--- a/intern/cycles/kernel/geom/geom_object.h
+++ b/intern/cycles/kernel/geom/geom_object.h
@@ -27,131 +27,143 @@ CCL_NAMESPACE_BEGIN
/* Object attributes, for now a fixed size and contents */
enum ObjectTransform {
- OBJECT_TRANSFORM = 0,
- OBJECT_INVERSE_TRANSFORM = 1,
+ OBJECT_TRANSFORM = 0,
+ OBJECT_INVERSE_TRANSFORM = 1,
};
-enum ObjectVectorTransform {
- OBJECT_PASS_MOTION_PRE = 0,
- OBJECT_PASS_MOTION_POST = 1
-};
+enum ObjectVectorTransform { OBJECT_PASS_MOTION_PRE = 0, OBJECT_PASS_MOTION_POST = 1 };
/* Object to world space transformation */
-ccl_device_inline Transform object_fetch_transform(KernelGlobals *kg, int object, enum ObjectTransform type)
+ccl_device_inline Transform object_fetch_transform(KernelGlobals *kg,
+ int object,
+ enum ObjectTransform type)
{
- if(type == OBJECT_INVERSE_TRANSFORM) {
- return kernel_tex_fetch(__objects, object).itfm;
- }
- else {
- return kernel_tex_fetch(__objects, object).tfm;
- }
+ if (type == OBJECT_INVERSE_TRANSFORM) {
+ return kernel_tex_fetch(__objects, object).itfm;
+ }
+ else {
+ return kernel_tex_fetch(__objects, object).tfm;
+ }
}
/* Lamp to world space transformation */
ccl_device_inline Transform lamp_fetch_transform(KernelGlobals *kg, int lamp, bool inverse)
{
- if(inverse) {
- return kernel_tex_fetch(__lights, lamp).itfm;
- }
- else {
- return kernel_tex_fetch(__lights, lamp).tfm;
- }
+ if (inverse) {
+ return kernel_tex_fetch(__lights, lamp).itfm;
+ }
+ else {
+ return kernel_tex_fetch(__lights, lamp).tfm;
+ }
}
/* Object to world space transformation for motion vectors */
-ccl_device_inline Transform object_fetch_motion_pass_transform(KernelGlobals *kg, int object, enum ObjectVectorTransform type)
+ccl_device_inline Transform object_fetch_motion_pass_transform(KernelGlobals *kg,
+ int object,
+ enum ObjectVectorTransform type)
{
- int offset = object*OBJECT_MOTION_PASS_SIZE + (int)type;
- return kernel_tex_fetch(__object_motion_pass, offset);
+ int offset = object * OBJECT_MOTION_PASS_SIZE + (int)type;
+ return kernel_tex_fetch(__object_motion_pass, offset);
}
/* Motion blurred object transformations */
#ifdef __OBJECT_MOTION__
-ccl_device_inline Transform object_fetch_transform_motion(KernelGlobals *kg, int object, float time)
-{
- const uint motion_offset = kernel_tex_fetch(__objects, object).motion_offset;
- const ccl_global DecomposedTransform *motion = &kernel_tex_fetch(__object_motion, motion_offset);
- const uint num_steps = kernel_tex_fetch(__objects, object).numsteps * 2 + 1;
-
- Transform tfm;
-#ifdef __EMBREE__
- if(kernel_data.bvh.scene) {
- transform_motion_array_interpolate_straight(&tfm, motion, num_steps, time);
- }
- else
-#endif
- transform_motion_array_interpolate(&tfm, motion, num_steps, time);
+ccl_device_inline Transform object_fetch_transform_motion(KernelGlobals *kg,
+ int object,
+ float time)
+{
+ const uint motion_offset = kernel_tex_fetch(__objects, object).motion_offset;
+ const ccl_global DecomposedTransform *motion = &kernel_tex_fetch(__object_motion, motion_offset);
+ const uint num_steps = kernel_tex_fetch(__objects, object).numsteps * 2 + 1;
+
+ Transform tfm;
+# ifdef __EMBREE__
+ if (kernel_data.bvh.scene) {
+ transform_motion_array_interpolate_straight(&tfm, motion, num_steps, time);
+ }
+ else
+# endif
+ transform_motion_array_interpolate(&tfm, motion, num_steps, time);
- return tfm;
+ return tfm;
}
-ccl_device_inline Transform object_fetch_transform_motion_test(KernelGlobals *kg, int object, float time, Transform *itfm)
+ccl_device_inline Transform object_fetch_transform_motion_test(KernelGlobals *kg,
+ int object,
+ float time,
+ Transform *itfm)
{
- int object_flag = kernel_tex_fetch(__object_flag, object);
- if(object_flag & SD_OBJECT_MOTION) {
- /* if we do motion blur */
- Transform tfm = object_fetch_transform_motion(kg, object, time);
+ int object_flag = kernel_tex_fetch(__object_flag, object);
+ if (object_flag & SD_OBJECT_MOTION) {
+ /* if we do motion blur */
+ Transform tfm = object_fetch_transform_motion(kg, object, time);
- if(itfm)
- *itfm = transform_quick_inverse(tfm);
+ if (itfm)
+ *itfm = transform_quick_inverse(tfm);
- return tfm;
- }
- else {
- Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
- if(itfm)
- *itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
+ return tfm;
+ }
+ else {
+ Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
+ if (itfm)
+ *itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
- return tfm;
- }
+ return tfm;
+ }
}
#endif
/* Transform position from object to world space */
-ccl_device_inline void object_position_transform(KernelGlobals *kg, const ShaderData *sd, float3 *P)
+ccl_device_inline void object_position_transform(KernelGlobals *kg,
+ const ShaderData *sd,
+ float3 *P)
{
#ifdef __OBJECT_MOTION__
- *P = transform_point_auto(&sd->ob_tfm, *P);
+ *P = transform_point_auto(&sd->ob_tfm, *P);
#else
- Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
- *P = transform_point(&tfm, *P);
+ Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
+ *P = transform_point(&tfm, *P);
#endif
}
/* Transform position from world to object space */
-ccl_device_inline void object_inverse_position_transform(KernelGlobals *kg, const ShaderData *sd, float3 *P)
+ccl_device_inline void object_inverse_position_transform(KernelGlobals *kg,
+ const ShaderData *sd,
+ float3 *P)
{
#ifdef __OBJECT_MOTION__
- *P = transform_point_auto(&sd->ob_itfm, *P);
+ *P = transform_point_auto(&sd->ob_itfm, *P);
#else
- Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
- *P = transform_point(&tfm, *P);
+ Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
+ *P = transform_point(&tfm, *P);
#endif
}
/* Transform normal from world to object space */
-ccl_device_inline void object_inverse_normal_transform(KernelGlobals *kg, const ShaderData *sd, float3 *N)
+ccl_device_inline void object_inverse_normal_transform(KernelGlobals *kg,
+ const ShaderData *sd,
+ float3 *N)
{
#ifdef __OBJECT_MOTION__
- if((sd->object != OBJECT_NONE) || (sd->type == PRIMITIVE_LAMP)) {
- *N = normalize(transform_direction_transposed_auto(&sd->ob_tfm, *N));
- }
+ if ((sd->object != OBJECT_NONE) || (sd->type == PRIMITIVE_LAMP)) {
+ *N = normalize(transform_direction_transposed_auto(&sd->ob_tfm, *N));
+ }
#else
- if(sd->object != OBJECT_NONE) {
- Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
- *N = normalize(transform_direction_transposed(&tfm, *N));
- }
- else if(sd->type == PRIMITIVE_LAMP) {
- Transform tfm = lamp_fetch_transform(kg, sd->lamp, false);
- *N = normalize(transform_direction_transposed(&tfm, *N));
- }
+ if (sd->object != OBJECT_NONE) {
+ Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
+ *N = normalize(transform_direction_transposed(&tfm, *N));
+ }
+ else if (sd->type == PRIMITIVE_LAMP) {
+ Transform tfm = lamp_fetch_transform(kg, sd->lamp, false);
+ *N = normalize(transform_direction_transposed(&tfm, *N));
+ }
#endif
}
@@ -160,10 +172,10 @@ ccl_device_inline void object_inverse_normal_transform(KernelGlobals *kg, const
ccl_device_inline void object_normal_transform(KernelGlobals *kg, const ShaderData *sd, float3 *N)
{
#ifdef __OBJECT_MOTION__
- *N = normalize(transform_direction_transposed_auto(&sd->ob_itfm, *N));
+ *N = normalize(transform_direction_transposed_auto(&sd->ob_itfm, *N));
#else
- Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
- *N = normalize(transform_direction_transposed(&tfm, *N));
+ Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
+ *N = normalize(transform_direction_transposed(&tfm, *N));
#endif
}
@@ -172,22 +184,24 @@ ccl_device_inline void object_normal_transform(KernelGlobals *kg, const ShaderDa
ccl_device_inline void object_dir_transform(KernelGlobals *kg, const ShaderData *sd, float3 *D)
{
#ifdef __OBJECT_MOTION__
- *D = transform_direction_auto(&sd->ob_tfm, *D);
+ *D = transform_direction_auto(&sd->ob_tfm, *D);
#else
- Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
- *D = transform_direction(&tfm, *D);
+ Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
+ *D = transform_direction(&tfm, *D);
#endif
}
/* Transform direction vector from world to object space */
-ccl_device_inline void object_inverse_dir_transform(KernelGlobals *kg, const ShaderData *sd, float3 *D)
+ccl_device_inline void object_inverse_dir_transform(KernelGlobals *kg,
+ const ShaderData *sd,
+ float3 *D)
{
#ifdef __OBJECT_MOTION__
- *D = transform_direction_auto(&sd->ob_itfm, *D);
+ *D = transform_direction_auto(&sd->ob_itfm, *D);
#else
- Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
- *D = transform_direction(&tfm, *D);
+ Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
+ *D = transform_direction(&tfm, *D);
#endif
}
@@ -195,14 +209,14 @@ ccl_device_inline void object_inverse_dir_transform(KernelGlobals *kg, const Sha
ccl_device_inline float3 object_location(KernelGlobals *kg, const ShaderData *sd)
{
- if(sd->object == OBJECT_NONE)
- return make_float3(0.0f, 0.0f, 0.0f);
+ if (sd->object == OBJECT_NONE)
+ return make_float3(0.0f, 0.0f, 0.0f);
#ifdef __OBJECT_MOTION__
- return make_float3(sd->ob_tfm.x.w, sd->ob_tfm.y.w, sd->ob_tfm.z.w);
+ return make_float3(sd->ob_tfm.x.w, sd->ob_tfm.y.w, sd->ob_tfm.z.w);
#else
- Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
- return make_float3(tfm.x.w, tfm.y.w, tfm.z.w);
+ Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
+ return make_float3(tfm.x.w, tfm.y.w, tfm.z.w);
#endif
}
@@ -210,218 +224,211 @@ ccl_device_inline float3 object_location(KernelGlobals *kg, const ShaderData *sd
ccl_device_inline float object_surface_area(KernelGlobals *kg, int object)
{
- return kernel_tex_fetch(__objects, object).surface_area;
+ return kernel_tex_fetch(__objects, object).surface_area;
}
/* Pass ID number of object */
ccl_device_inline float object_pass_id(KernelGlobals *kg, int object)
{
- if(object == OBJECT_NONE)
- return 0.0f;
+ if (object == OBJECT_NONE)
+ return 0.0f;
- return kernel_tex_fetch(__objects, object).pass_id;
+ return kernel_tex_fetch(__objects, object).pass_id;
}
/* Per lamp random number for shader variation */
ccl_device_inline float lamp_random_number(KernelGlobals *kg, int lamp)
{
- if(lamp == LAMP_NONE)
- return 0.0f;
+ if (lamp == LAMP_NONE)
+ return 0.0f;
- return kernel_tex_fetch(__lights, lamp).random;
+ return kernel_tex_fetch(__lights, lamp).random;
}
/* Per object random number for shader variation */
ccl_device_inline float object_random_number(KernelGlobals *kg, int object)
{
- if(object == OBJECT_NONE)
- return 0.0f;
+ if (object == OBJECT_NONE)
+ return 0.0f;
- return kernel_tex_fetch(__objects, object).random_number;
+ return kernel_tex_fetch(__objects, object).random_number;
}
/* Particle ID from which this object was generated */
ccl_device_inline int object_particle_id(KernelGlobals *kg, int object)
{
- if(object == OBJECT_NONE)
- return 0;
+ if (object == OBJECT_NONE)
+ return 0;
- return kernel_tex_fetch(__objects, object).particle_index;
+ return kernel_tex_fetch(__objects, object).particle_index;
}
/* Generated texture coordinate on surface from where object was instanced */
ccl_device_inline float3 object_dupli_generated(KernelGlobals *kg, int object)
{
- if(object == OBJECT_NONE)
- return make_float3(0.0f, 0.0f, 0.0f);
+ if (object == OBJECT_NONE)
+ return make_float3(0.0f, 0.0f, 0.0f);
- const ccl_global KernelObject *kobject = &kernel_tex_fetch(__objects, object);
- return make_float3(kobject->dupli_generated[0],
- kobject->dupli_generated[1],
- kobject->dupli_generated[2]);
+ const ccl_global KernelObject *kobject = &kernel_tex_fetch(__objects, object);
+ return make_float3(
+ kobject->dupli_generated[0], kobject->dupli_generated[1], kobject->dupli_generated[2]);
}
/* UV texture coordinate on surface from where object was instanced */
ccl_device_inline float3 object_dupli_uv(KernelGlobals *kg, int object)
{
- if(object == OBJECT_NONE)
- return make_float3(0.0f, 0.0f, 0.0f);
+ if (object == OBJECT_NONE)
+ return make_float3(0.0f, 0.0f, 0.0f);
- const ccl_global KernelObject *kobject = &kernel_tex_fetch(__objects, object);
- return make_float3(kobject->dupli_uv[0],
- kobject->dupli_uv[1],
- 0.0f);
+ const ccl_global KernelObject *kobject = &kernel_tex_fetch(__objects, object);
+ return make_float3(kobject->dupli_uv[0], kobject->dupli_uv[1], 0.0f);
}
/* Information about mesh for motion blurred triangles and curves */
-ccl_device_inline void object_motion_info(KernelGlobals *kg, int object, int *numsteps, int *numverts, int *numkeys)
+ccl_device_inline void object_motion_info(
+ KernelGlobals *kg, int object, int *numsteps, int *numverts, int *numkeys)
{
- if(numkeys) {
- *numkeys = kernel_tex_fetch(__objects, object).numkeys;
- }
+ if (numkeys) {
+ *numkeys = kernel_tex_fetch(__objects, object).numkeys;
+ }
- if(numsteps)
- *numsteps = kernel_tex_fetch(__objects, object).numsteps;
- if(numverts)
- *numverts = kernel_tex_fetch(__objects, object).numverts;
+ if (numsteps)
+ *numsteps = kernel_tex_fetch(__objects, object).numsteps;
+ if (numverts)
+ *numverts = kernel_tex_fetch(__objects, object).numverts;
}
/* Offset to an objects patch map */
ccl_device_inline uint object_patch_map_offset(KernelGlobals *kg, int object)
{
- if(object == OBJECT_NONE)
- return 0;
+ if (object == OBJECT_NONE)
+ return 0;
- return kernel_tex_fetch(__objects, object).patch_map_offset;
+ return kernel_tex_fetch(__objects, object).patch_map_offset;
}
/* Pass ID for shader */
ccl_device int shader_pass_id(KernelGlobals *kg, const ShaderData *sd)
{
- return kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).pass_id;
+ return kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).pass_id;
}
/* Cryptomatte ID */
ccl_device_inline float object_cryptomatte_id(KernelGlobals *kg, int object)
{
- if(object == OBJECT_NONE)
- return 0.0f;
+ if (object == OBJECT_NONE)
+ return 0.0f;
- return kernel_tex_fetch(__objects, object).cryptomatte_object;
+ return kernel_tex_fetch(__objects, object).cryptomatte_object;
}
ccl_device_inline float object_cryptomatte_asset_id(KernelGlobals *kg, int object)
{
- if(object == OBJECT_NONE)
- return 0;
+ if (object == OBJECT_NONE)
+ return 0;
- return kernel_tex_fetch(__objects, object).cryptomatte_asset;
+ return kernel_tex_fetch(__objects, object).cryptomatte_asset;
}
/* Particle data from which object was instanced */
ccl_device_inline uint particle_index(KernelGlobals *kg, int particle)
{
- return kernel_tex_fetch(__particles, particle).index;
+ return kernel_tex_fetch(__particles, particle).index;
}
ccl_device float particle_age(KernelGlobals *kg, int particle)
{
- return kernel_tex_fetch(__particles, particle).age;
+ return kernel_tex_fetch(__particles, particle).age;
}
ccl_device float particle_lifetime(KernelGlobals *kg, int particle)
{
- return kernel_tex_fetch(__particles, particle).lifetime;
+ return kernel_tex_fetch(__particles, particle).lifetime;
}
ccl_device float particle_size(KernelGlobals *kg, int particle)
{
- return kernel_tex_fetch(__particles, particle).size;
+ return kernel_tex_fetch(__particles, particle).size;
}
ccl_device float4 particle_rotation(KernelGlobals *kg, int particle)
{
- return kernel_tex_fetch(__particles, particle).rotation;
+ return kernel_tex_fetch(__particles, particle).rotation;
}
ccl_device float3 particle_location(KernelGlobals *kg, int particle)
{
- return float4_to_float3(kernel_tex_fetch(__particles, particle).location);
+ return float4_to_float3(kernel_tex_fetch(__particles, particle).location);
}
ccl_device float3 particle_velocity(KernelGlobals *kg, int particle)
{
- return float4_to_float3(kernel_tex_fetch(__particles, particle).velocity);
+ return float4_to_float3(kernel_tex_fetch(__particles, particle).velocity);
}
ccl_device float3 particle_angular_velocity(KernelGlobals *kg, int particle)
{
- return float4_to_float3(kernel_tex_fetch(__particles, particle).angular_velocity);
+ return float4_to_float3(kernel_tex_fetch(__particles, particle).angular_velocity);
}
/* Object intersection in BVH */
ccl_device_inline float3 bvh_clamp_direction(float3 dir)
{
- /* clamp absolute values by exp2f(-80.0f) to avoid division by zero when calculating inverse direction */
+ /* clamp absolute values by exp2f(-80.0f) to avoid division by zero when calculating inverse direction */
#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__)
- const ssef oopes(8.271806E-25f,8.271806E-25f,8.271806E-25f,0.0f);
- const ssef mask = _mm_cmpgt_ps(fabs(dir), oopes);
- const ssef signdir = signmsk(dir.m128) | oopes;
+ const ssef oopes(8.271806E-25f, 8.271806E-25f, 8.271806E-25f, 0.0f);
+ const ssef mask = _mm_cmpgt_ps(fabs(dir), oopes);
+ const ssef signdir = signmsk(dir.m128) | oopes;
# ifndef __KERNEL_AVX__
- ssef res = mask & ssef(dir);
- res = _mm_or_ps(res,_mm_andnot_ps(mask, signdir));
+ ssef res = mask & ssef(dir);
+ res = _mm_or_ps(res, _mm_andnot_ps(mask, signdir));
# else
- ssef res = _mm_blendv_ps(signdir, dir, mask);
+ ssef res = _mm_blendv_ps(signdir, dir, mask);
# endif
- return float3(res);
+ return float3(res);
#else /* __KERNEL_SSE__ && __KERNEL_SSE2__ */
- const float ooeps = 8.271806E-25f;
- return make_float3((fabsf(dir.x) > ooeps)? dir.x: copysignf(ooeps, dir.x),
- (fabsf(dir.y) > ooeps)? dir.y: copysignf(ooeps, dir.y),
- (fabsf(dir.z) > ooeps)? dir.z: copysignf(ooeps, dir.z));
-#endif /* __KERNEL_SSE__ && __KERNEL_SSE2__ */
+ const float ooeps = 8.271806E-25f;
+ return make_float3((fabsf(dir.x) > ooeps) ? dir.x : copysignf(ooeps, dir.x),
+ (fabsf(dir.y) > ooeps) ? dir.y : copysignf(ooeps, dir.y),
+ (fabsf(dir.z) > ooeps) ? dir.z : copysignf(ooeps, dir.z));
+#endif /* __KERNEL_SSE__ && __KERNEL_SSE2__ */
}
ccl_device_inline float3 bvh_inverse_direction(float3 dir)
{
- return rcp(dir);
+ return rcp(dir);
}
/* Transform ray into object space to enter static object in BVH */
-ccl_device_inline float bvh_instance_push(KernelGlobals *kg,
- int object,
- const Ray *ray,
- float3 *P,
- float3 *dir,
- float3 *idir,
- float t)
+ccl_device_inline float bvh_instance_push(
+ KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float t)
{
- Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
+ Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
- *P = transform_point(&tfm, ray->P);
+ *P = transform_point(&tfm, ray->P);
- float len;
- *dir = bvh_clamp_direction(normalize_len(transform_direction(&tfm, ray->D), &len));
- *idir = bvh_inverse_direction(*dir);
+ float len;
+ *dir = bvh_clamp_direction(normalize_len(transform_direction(&tfm, ray->D), &len));
+ *idir = bvh_inverse_direction(*dir);
- if(t != FLT_MAX) {
- t *= len;
- }
+ if (t != FLT_MAX) {
+ t *= len;
+ }
- return t;
+ return t;
}
#ifdef __QBVH__
@@ -440,85 +447,85 @@ ccl_device_inline void qbvh_instance_push(KernelGlobals *kg,
float *t,
float *t1)
{
- Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
+ Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
- *P = transform_point(&tfm, ray->P);
+ *P = transform_point(&tfm, ray->P);
- float len;
- *dir = bvh_clamp_direction(normalize_len(transform_direction(&tfm, ray->D), &len));
- *idir = bvh_inverse_direction(*dir);
+ float len;
+ *dir = bvh_clamp_direction(normalize_len(transform_direction(&tfm, ray->D), &len));
+ *idir = bvh_inverse_direction(*dir);
- if(*t != FLT_MAX)
- *t *= len;
+ if (*t != FLT_MAX)
+ *t *= len;
- if(*t1 != -FLT_MAX)
- *t1 *= len;
+ if (*t1 != -FLT_MAX)
+ *t1 *= len;
}
#endif
/* Transorm ray to exit static object in BVH */
-ccl_device_inline float bvh_instance_pop(KernelGlobals *kg,
- int object,
- const Ray *ray,
- float3 *P,
- float3 *dir,
- float3 *idir,
- float t)
+ccl_device_inline float bvh_instance_pop(
+ KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float t)
{
- if(t != FLT_MAX) {
- Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
- t /= len(transform_direction(&tfm, ray->D));
- }
+ if (t != FLT_MAX) {
+ Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
+ t /= len(transform_direction(&tfm, ray->D));
+ }
- *P = ray->P;
- *dir = bvh_clamp_direction(ray->D);
- *idir = bvh_inverse_direction(*dir);
+ *P = ray->P;
+ *dir = bvh_clamp_direction(ray->D);
+ *idir = bvh_inverse_direction(*dir);
- return t;
+ return t;
}
/* Same as above, but returns scale factor to apply to multiple intersection distances */
-ccl_device_inline void bvh_instance_pop_factor(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float *t_fac)
+ccl_device_inline void bvh_instance_pop_factor(KernelGlobals *kg,
+ int object,
+ const Ray *ray,
+ float3 *P,
+ float3 *dir,
+ float3 *idir,
+ float *t_fac)
{
- Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
- *t_fac = 1.0f / len(transform_direction(&tfm, ray->D));
+ Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
+ *t_fac = 1.0f / len(transform_direction(&tfm, ray->D));
- *P = ray->P;
- *dir = bvh_clamp_direction(ray->D);
- *idir = bvh_inverse_direction(*dir);
+ *P = ray->P;
+ *dir = bvh_clamp_direction(ray->D);
+ *idir = bvh_inverse_direction(*dir);
}
-
#ifdef __OBJECT_MOTION__
/* Transform ray into object space to enter motion blurred object in BVH */
ccl_device_inline float bvh_instance_motion_push(KernelGlobals *kg,
- int object,
- const Ray *ray,
- float3 *P,
- float3 *dir,
- float3 *idir,
- float t,
- Transform *itfm)
+ int object,
+ const Ray *ray,
+ float3 *P,
+ float3 *dir,
+ float3 *idir,
+ float t,
+ Transform *itfm)
{
- object_fetch_transform_motion_test(kg, object, ray->time, itfm);
+ object_fetch_transform_motion_test(kg, object, ray->time, itfm);
- *P = transform_point(itfm, ray->P);
+ *P = transform_point(itfm, ray->P);
- float len;
- *dir = bvh_clamp_direction(normalize_len(transform_direction(itfm, ray->D), &len));
- *idir = bvh_inverse_direction(*dir);
+ float len;
+ *dir = bvh_clamp_direction(normalize_len(transform_direction(itfm, ray->D), &len));
+ *idir = bvh_inverse_direction(*dir);
- if(t != FLT_MAX) {
- t *= len;
- }
+ if (t != FLT_MAX) {
+ t *= len;
+ }
- return t;
+ return t;
}
-#ifdef __QBVH__
+# ifdef __QBVH__
/* Same as above, but optimized for QBVH scene intersection,
* which needs to modify two max distances.
*
@@ -535,21 +542,21 @@ ccl_device_inline void qbvh_instance_motion_push(KernelGlobals *kg,
float *t1,
Transform *itfm)
{
- object_fetch_transform_motion_test(kg, object, ray->time, itfm);
+ object_fetch_transform_motion_test(kg, object, ray->time, itfm);
- *P = transform_point(itfm, ray->P);
+ *P = transform_point(itfm, ray->P);
- float len;
- *dir = bvh_clamp_direction(normalize_len(transform_direction(itfm, ray->D), &len));
- *idir = bvh_inverse_direction(*dir);
+ float len;
+ *dir = bvh_clamp_direction(normalize_len(transform_direction(itfm, ray->D), &len));
+ *idir = bvh_inverse_direction(*dir);
- if(*t != FLT_MAX)
- *t *= len;
+ if (*t != FLT_MAX)
+ *t *= len;
- if(*t1 != -FLT_MAX)
- *t1 *= len;
+ if (*t1 != -FLT_MAX)
+ *t1 *= len;
}
-#endif
+# endif
/* Transorm ray to exit motion blurred object in BVH */
@@ -562,15 +569,15 @@ ccl_device_inline float bvh_instance_motion_pop(KernelGlobals *kg,
float t,
Transform *itfm)
{
- if(t != FLT_MAX) {
- t /= len(transform_direction(itfm, ray->D));
- }
+ if (t != FLT_MAX) {
+ t /= len(transform_direction(itfm, ray->D));
+ }
- *P = ray->P;
- *dir = bvh_clamp_direction(ray->D);
- *idir = bvh_inverse_direction(*dir);
+ *P = ray->P;
+ *dir = bvh_clamp_direction(ray->D);
+ *idir = bvh_inverse_direction(*dir);
- return t;
+ return t;
}
/* Same as above, but returns scale factor to apply to multiple intersection distances */
@@ -584,10 +591,10 @@ ccl_device_inline void bvh_instance_motion_pop_factor(KernelGlobals *kg,
float *t_fac,
Transform *itfm)
{
- *t_fac = 1.0f / len(transform_direction(itfm, ray->D));
- *P = ray->P;
- *dir = bvh_clamp_direction(ray->D);
- *idir = bvh_inverse_direction(*dir);
+ *t_fac = 1.0f / len(transform_direction(itfm, ray->D));
+ *P = ray->P;
+ *dir = bvh_clamp_direction(ray->D);
+ *idir = bvh_inverse_direction(*dir);
}
#endif
@@ -599,30 +606,30 @@ ccl_device_inline void bvh_instance_motion_pop_factor(KernelGlobals *kg,
#ifdef __KERNEL_OPENCL__
ccl_device_inline void object_position_transform_addrspace(KernelGlobals *kg,
- const ShaderData *sd,
- ccl_addr_space float3 *P)
+ const ShaderData *sd,
+ ccl_addr_space float3 *P)
{
- float3 private_P = *P;
- object_position_transform(kg, sd, &private_P);
- *P = private_P;
+ float3 private_P = *P;
+ object_position_transform(kg, sd, &private_P);
+ *P = private_P;
}
ccl_device_inline void object_dir_transform_addrspace(KernelGlobals *kg,
const ShaderData *sd,
ccl_addr_space float3 *D)
{
- float3 private_D = *D;
- object_dir_transform(kg, sd, &private_D);
- *D = private_D;
+ float3 private_D = *D;
+ object_dir_transform(kg, sd, &private_D);
+ *D = private_D;
}
ccl_device_inline void object_normal_transform_addrspace(KernelGlobals *kg,
const ShaderData *sd,
ccl_addr_space float3 *N)
{
- float3 private_N = *N;
- object_normal_transform(kg, sd, &private_N);
- *N = private_N;
+ float3 private_N = *N;
+ object_normal_transform(kg, sd, &private_N);
+ *N = private_N;
}
#endif
diff --git a/intern/cycles/kernel/geom/geom_patch.h b/intern/cycles/kernel/geom/geom_patch.h
index edb82172959..df19199f68e 100644
--- a/intern/cycles/kernel/geom/geom_patch.h
+++ b/intern/cycles/kernel/geom/geom_patch.h
@@ -27,342 +27,394 @@
CCL_NAMESPACE_BEGIN
typedef struct PatchHandle {
- int array_index, patch_index, vert_index;
+ int array_index, patch_index, vert_index;
} PatchHandle;
ccl_device_inline int patch_map_resolve_quadrant(float median, float *u, float *v)
{
- int quadrant = -1;
-
- if(*u < median) {
- if(*v < median) {
- quadrant = 0;
- }
- else {
- quadrant = 1;
- *v -= median;
- }
- }
- else {
- if(*v < median) {
- quadrant = 3;
- }
- else {
- quadrant = 2;
- *v -= median;
- }
- *u -= median;
- }
-
- return quadrant;
+ int quadrant = -1;
+
+ if (*u < median) {
+ if (*v < median) {
+ quadrant = 0;
+ }
+ else {
+ quadrant = 1;
+ *v -= median;
+ }
+ }
+ else {
+ if (*v < median) {
+ quadrant = 3;
+ }
+ else {
+ quadrant = 2;
+ *v -= median;
+ }
+ *u -= median;
+ }
+
+ return quadrant;
}
/* retrieve PatchHandle from patch coords */
-ccl_device_inline PatchHandle patch_map_find_patch(KernelGlobals *kg, int object, int patch, float u, float v)
+ccl_device_inline PatchHandle
+patch_map_find_patch(KernelGlobals *kg, int object, int patch, float u, float v)
{
- PatchHandle handle;
+ PatchHandle handle;
- kernel_assert((u >= 0.0f) && (u <= 1.0f) && (v >= 0.0f) && (v <= 1.0f));
+ kernel_assert((u >= 0.0f) && (u <= 1.0f) && (v >= 0.0f) && (v <= 1.0f));
- int node = (object_patch_map_offset(kg, object) + patch)/2;
- float median = 0.5f;
+ int node = (object_patch_map_offset(kg, object) + patch) / 2;
+ float median = 0.5f;
- for(int depth = 0; depth < 0xff; depth++) {
- float delta = median * 0.5f;
+ for (int depth = 0; depth < 0xff; depth++) {
+ float delta = median * 0.5f;
- int quadrant = patch_map_resolve_quadrant(median, &u, &v);
- kernel_assert(quadrant >= 0);
+ int quadrant = patch_map_resolve_quadrant(median, &u, &v);
+ kernel_assert(quadrant >= 0);
- uint child = kernel_tex_fetch(__patches, node + quadrant);
+ uint child = kernel_tex_fetch(__patches, node + quadrant);
- /* is the quadrant a hole? */
- if(!(child & PATCH_MAP_NODE_IS_SET)) {
- handle.array_index = -1;
- return handle;
- }
+ /* is the quadrant a hole? */
+ if (!(child & PATCH_MAP_NODE_IS_SET)) {
+ handle.array_index = -1;
+ return handle;
+ }
- uint index = child & PATCH_MAP_NODE_INDEX_MASK;
+ uint index = child & PATCH_MAP_NODE_INDEX_MASK;
- if(child & PATCH_MAP_NODE_IS_LEAF) {
- handle.array_index = kernel_tex_fetch(__patches, index + 0);
- handle.patch_index = kernel_tex_fetch(__patches, index + 1);
- handle.vert_index = kernel_tex_fetch(__patches, index + 2);
+ if (child & PATCH_MAP_NODE_IS_LEAF) {
+ handle.array_index = kernel_tex_fetch(__patches, index + 0);
+ handle.patch_index = kernel_tex_fetch(__patches, index + 1);
+ handle.vert_index = kernel_tex_fetch(__patches, index + 2);
- return handle;
- } else {
- node = index;
- }
+ return handle;
+ }
+ else {
+ node = index;
+ }
- median = delta;
- }
+ median = delta;
+ }
- /* no leaf found */
- kernel_assert(0);
+ /* no leaf found */
+ kernel_assert(0);
- handle.array_index = -1;
- return handle;
+ handle.array_index = -1;
+ return handle;
}
ccl_device_inline void patch_eval_bspline_weights(float t, float *point, float *deriv)
{
- /* The four uniform cubic B-Spline basis functions evaluated at t */
- float inv_6 = 1.0f / 6.0f;
-
- float t2 = t * t;
- float t3 = t * t2;
-
- point[0] = inv_6 * (1.0f - 3.0f*(t - t2) - t3);
- point[1] = inv_6 * (4.0f - 6.0f*t2 + 3.0f*t3);
- point[2] = inv_6 * (1.0f + 3.0f*(t + t2 - t3));
- point[3] = inv_6 * t3;
-
- /* Derivatives of the above four basis functions at t */
- deriv[0] = -0.5f*t2 + t - 0.5f;
- deriv[1] = 1.5f*t2 - 2.0f*t;
- deriv[2] = -1.5f*t2 + t + 0.5f;
- deriv[3] = 0.5f*t2;
+ /* The four uniform cubic B-Spline basis functions evaluated at t */
+ float inv_6 = 1.0f / 6.0f;
+
+ float t2 = t * t;
+ float t3 = t * t2;
+
+ point[0] = inv_6 * (1.0f - 3.0f * (t - t2) - t3);
+ point[1] = inv_6 * (4.0f - 6.0f * t2 + 3.0f * t3);
+ point[2] = inv_6 * (1.0f + 3.0f * (t + t2 - t3));
+ point[3] = inv_6 * t3;
+
+ /* Derivatives of the above four basis functions at t */
+ deriv[0] = -0.5f * t2 + t - 0.5f;
+ deriv[1] = 1.5f * t2 - 2.0f * t;
+ deriv[2] = -1.5f * t2 + t + 0.5f;
+ deriv[3] = 0.5f * t2;
}
ccl_device_inline void patch_eval_adjust_boundary_weights(uint bits, float *s, float *t)
{
- int boundary = ((bits >> 8) & 0xf);
-
- if(boundary & 1) {
- t[2] -= t[0];
- t[1] += 2*t[0];
- t[0] = 0;
- }
-
- if(boundary & 2) {
- s[1] -= s[3];
- s[2] += 2*s[3];
- s[3] = 0;
- }
-
- if(boundary & 4) {
- t[1] -= t[3];
- t[2] += 2*t[3];
- t[3] = 0;
- }
-
- if(boundary & 8) {
- s[2] -= s[0];
- s[1] += 2*s[0];
- s[0] = 0;
- }
+ int boundary = ((bits >> 8) & 0xf);
+
+ if (boundary & 1) {
+ t[2] -= t[0];
+ t[1] += 2 * t[0];
+ t[0] = 0;
+ }
+
+ if (boundary & 2) {
+ s[1] -= s[3];
+ s[2] += 2 * s[3];
+ s[3] = 0;
+ }
+
+ if (boundary & 4) {
+ t[1] -= t[3];
+ t[2] += 2 * t[3];
+ t[3] = 0;
+ }
+
+ if (boundary & 8) {
+ s[2] -= s[0];
+ s[1] += 2 * s[0];
+ s[0] = 0;
+ }
}
ccl_device_inline int patch_eval_depth(uint patch_bits)
{
- return (patch_bits & 0xf);
+ return (patch_bits & 0xf);
}
ccl_device_inline float patch_eval_param_fraction(uint patch_bits)
{
- bool non_quad_root = (patch_bits >> 4) & 0x1;
- int depth = patch_eval_depth(patch_bits);
-
- if(non_quad_root) {
- return 1.0f / (float)(1 << (depth-1));
- }
- else {
- return 1.0f / (float)(1 << depth);
- }
+ bool non_quad_root = (patch_bits >> 4) & 0x1;
+ int depth = patch_eval_depth(patch_bits);
+
+ if (non_quad_root) {
+ return 1.0f / (float)(1 << (depth - 1));
+ }
+ else {
+ return 1.0f / (float)(1 << depth);
+ }
}
ccl_device_inline void patch_eval_normalize_coords(uint patch_bits, float *u, float *v)
{
- float frac = patch_eval_param_fraction(patch_bits);
+ float frac = patch_eval_param_fraction(patch_bits);
- int iu = (patch_bits >> 22) & 0x3ff;
- int iv = (patch_bits >> 12) & 0x3ff;
+ int iu = (patch_bits >> 22) & 0x3ff;
+ int iv = (patch_bits >> 12) & 0x3ff;
- /* top left corner */
- float pu = (float)iu*frac;
- float pv = (float)iv*frac;
+ /* top left corner */
+ float pu = (float)iu * frac;
+ float pv = (float)iv * frac;
- /* normalize uv coordinates */
- *u = (*u - pu) / frac;
- *v = (*v - pv) / frac;
+ /* normalize uv coordinates */
+ *u = (*u - pu) / frac;
+ *v = (*v - pv) / frac;
}
/* retrieve patch control indices */
-ccl_device_inline int patch_eval_indices(KernelGlobals *kg, const PatchHandle *handle, int channel,
+ccl_device_inline int patch_eval_indices(KernelGlobals *kg,
+ const PatchHandle *handle,
+ int channel,
int indices[PATCH_MAX_CONTROL_VERTS])
{
- int index_base = kernel_tex_fetch(__patches, handle->array_index + 2) + handle->vert_index;
+ int index_base = kernel_tex_fetch(__patches, handle->array_index + 2) + handle->vert_index;
- /* XXX: regular patches only */
- for(int i = 0; i < 16; i++) {
- indices[i] = kernel_tex_fetch(__patches, index_base + i);
- }
+ /* XXX: regular patches only */
+ for (int i = 0; i < 16; i++) {
+ indices[i] = kernel_tex_fetch(__patches, index_base + i);
+ }
- return 16;
+ return 16;
}
/* evaluate patch basis functions */
-ccl_device_inline void patch_eval_basis(KernelGlobals *kg, const PatchHandle *handle, float u, float v,
- float weights[PATCH_MAX_CONTROL_VERTS],
- float weights_du[PATCH_MAX_CONTROL_VERTS],
- float weights_dv[PATCH_MAX_CONTROL_VERTS])
+ccl_device_inline void patch_eval_basis(KernelGlobals *kg,
+ const PatchHandle *handle,
+ float u,
+ float v,
+ float weights[PATCH_MAX_CONTROL_VERTS],
+ float weights_du[PATCH_MAX_CONTROL_VERTS],
+ float weights_dv[PATCH_MAX_CONTROL_VERTS])
{
- uint patch_bits = kernel_tex_fetch(__patches, handle->patch_index + 1); /* read patch param */
- float d_scale = 1 << patch_eval_depth(patch_bits);
+ uint patch_bits = kernel_tex_fetch(__patches, handle->patch_index + 1); /* read patch param */
+ float d_scale = 1 << patch_eval_depth(patch_bits);
- bool non_quad_root = (patch_bits >> 4) & 0x1;
- if(non_quad_root) {
- d_scale *= 0.5f;
- }
+ bool non_quad_root = (patch_bits >> 4) & 0x1;
+ if (non_quad_root) {
+ d_scale *= 0.5f;
+ }
- patch_eval_normalize_coords(patch_bits, &u, &v);
+ patch_eval_normalize_coords(patch_bits, &u, &v);
- /* XXX: regular patches only for now. */
+ /* XXX: regular patches only for now. */
- float s[4], t[4], ds[4], dt[4];
+ float s[4], t[4], ds[4], dt[4];
- patch_eval_bspline_weights(u, s, ds);
- patch_eval_bspline_weights(v, t, dt);
+ patch_eval_bspline_weights(u, s, ds);
+ patch_eval_bspline_weights(v, t, dt);
- patch_eval_adjust_boundary_weights(patch_bits, s, t);
- patch_eval_adjust_boundary_weights(patch_bits, ds, dt);
+ patch_eval_adjust_boundary_weights(patch_bits, s, t);
+ patch_eval_adjust_boundary_weights(patch_bits, ds, dt);
- for(int k = 0; k < 4; k++) {
- for(int l = 0; l < 4; l++) {
- weights[4*k+l] = s[l] * t[k];
- weights_du[4*k+l] = ds[l] * t[k] * d_scale;
- weights_dv[4*k+l] = s[l] * dt[k] * d_scale;
- }
- }
+ for (int k = 0; k < 4; k++) {
+ for (int l = 0; l < 4; l++) {
+ weights[4 * k + l] = s[l] * t[k];
+ weights_du[4 * k + l] = ds[l] * t[k] * d_scale;
+ weights_dv[4 * k + l] = s[l] * dt[k] * d_scale;
+ }
+ }
}
/* generic function for evaluating indices and weights from patch coords */
-ccl_device_inline int patch_eval_control_verts(KernelGlobals *kg, int object, int patch, float u, float v, int channel,
- int indices[PATCH_MAX_CONTROL_VERTS],
- float weights[PATCH_MAX_CONTROL_VERTS],
- float weights_du[PATCH_MAX_CONTROL_VERTS],
- float weights_dv[PATCH_MAX_CONTROL_VERTS])
+ccl_device_inline int patch_eval_control_verts(KernelGlobals *kg,
+ int object,
+ int patch,
+ float u,
+ float v,
+ int channel,
+ int indices[PATCH_MAX_CONTROL_VERTS],
+ float weights[PATCH_MAX_CONTROL_VERTS],
+ float weights_du[PATCH_MAX_CONTROL_VERTS],
+ float weights_dv[PATCH_MAX_CONTROL_VERTS])
{
- PatchHandle handle = patch_map_find_patch(kg, object, patch, u, v);
- kernel_assert(handle.array_index >= 0);
+ PatchHandle handle = patch_map_find_patch(kg, object, patch, u, v);
+ kernel_assert(handle.array_index >= 0);
- int num_control = patch_eval_indices(kg, &handle, channel, indices);
- patch_eval_basis(kg, &handle, u, v, weights, weights_du, weights_dv);
+ int num_control = patch_eval_indices(kg, &handle, channel, indices);
+ patch_eval_basis(kg, &handle, u, v, weights, weights_du, weights_dv);
- return num_control;
+ return num_control;
}
/* functions for evaluating attributes on patches */
-ccl_device float patch_eval_float(KernelGlobals *kg, const ShaderData *sd, int offset,
- int patch, float u, float v, int channel,
- float *du, float* dv)
+ccl_device float patch_eval_float(KernelGlobals *kg,
+ const ShaderData *sd,
+ int offset,
+ int patch,
+ float u,
+ float v,
+ int channel,
+ float *du,
+ float *dv)
{
- int indices[PATCH_MAX_CONTROL_VERTS];
- float weights[PATCH_MAX_CONTROL_VERTS];
- float weights_du[PATCH_MAX_CONTROL_VERTS];
- float weights_dv[PATCH_MAX_CONTROL_VERTS];
-
- int num_control = patch_eval_control_verts(kg, sd->object, patch, u, v, channel,
- indices, weights, weights_du, weights_dv);
-
- float val = 0.0f;
- if(du) *du = 0.0f;
- if(dv) *dv = 0.0f;
-
- for(int i = 0; i < num_control; i++) {
- float v = kernel_tex_fetch(__attributes_float, offset + indices[i]);
-
- val += v * weights[i];
- if(du) *du += v * weights_du[i];
- if(dv) *dv += v * weights_dv[i];
- }
-
- return val;
+ int indices[PATCH_MAX_CONTROL_VERTS];
+ float weights[PATCH_MAX_CONTROL_VERTS];
+ float weights_du[PATCH_MAX_CONTROL_VERTS];
+ float weights_dv[PATCH_MAX_CONTROL_VERTS];
+
+ int num_control = patch_eval_control_verts(
+ kg, sd->object, patch, u, v, channel, indices, weights, weights_du, weights_dv);
+
+ float val = 0.0f;
+ if (du)
+ *du = 0.0f;
+ if (dv)
+ *dv = 0.0f;
+
+ for (int i = 0; i < num_control; i++) {
+ float v = kernel_tex_fetch(__attributes_float, offset + indices[i]);
+
+ val += v * weights[i];
+ if (du)
+ *du += v * weights_du[i];
+ if (dv)
+ *dv += v * weights_dv[i];
+ }
+
+ return val;
}
-ccl_device float2 patch_eval_float2(KernelGlobals *kg, const ShaderData *sd, int offset,
- int patch, float u, float v, int channel,
- float2 *du, float2 *dv)
+ccl_device float2 patch_eval_float2(KernelGlobals *kg,
+ const ShaderData *sd,
+ int offset,
+ int patch,
+ float u,
+ float v,
+ int channel,
+ float2 *du,
+ float2 *dv)
{
- int indices[PATCH_MAX_CONTROL_VERTS];
- float weights[PATCH_MAX_CONTROL_VERTS];
- float weights_du[PATCH_MAX_CONTROL_VERTS];
- float weights_dv[PATCH_MAX_CONTROL_VERTS];
-
- int num_control = patch_eval_control_verts(kg, sd->object, patch, u, v, channel,
- indices, weights, weights_du, weights_dv);
-
- float2 val = make_float2(0.0f, 0.0f);
- if(du) *du = make_float2(0.0f, 0.0f);
- if(dv) *dv = make_float2(0.0f, 0.0f);
-
- for(int i = 0; i < num_control; i++) {
- float2 v = kernel_tex_fetch(__attributes_float2, offset + indices[i]);
-
- val += v * weights[i];
- if(du) *du += v * weights_du[i];
- if(dv) *dv += v * weights_dv[i];
- }
-
- return val;
+ int indices[PATCH_MAX_CONTROL_VERTS];
+ float weights[PATCH_MAX_CONTROL_VERTS];
+ float weights_du[PATCH_MAX_CONTROL_VERTS];
+ float weights_dv[PATCH_MAX_CONTROL_VERTS];
+
+ int num_control = patch_eval_control_verts(
+ kg, sd->object, patch, u, v, channel, indices, weights, weights_du, weights_dv);
+
+ float2 val = make_float2(0.0f, 0.0f);
+ if (du)
+ *du = make_float2(0.0f, 0.0f);
+ if (dv)
+ *dv = make_float2(0.0f, 0.0f);
+
+ for (int i = 0; i < num_control; i++) {
+ float2 v = kernel_tex_fetch(__attributes_float2, offset + indices[i]);
+
+ val += v * weights[i];
+ if (du)
+ *du += v * weights_du[i];
+ if (dv)
+ *dv += v * weights_dv[i];
+ }
+
+ return val;
}
-ccl_device float3 patch_eval_float3(KernelGlobals *kg, const ShaderData *sd, int offset,
- int patch, float u, float v, int channel,
- float3 *du, float3 *dv)
+ccl_device float3 patch_eval_float3(KernelGlobals *kg,
+ const ShaderData *sd,
+ int offset,
+ int patch,
+ float u,
+ float v,
+ int channel,
+ float3 *du,
+ float3 *dv)
{
- int indices[PATCH_MAX_CONTROL_VERTS];
- float weights[PATCH_MAX_CONTROL_VERTS];
- float weights_du[PATCH_MAX_CONTROL_VERTS];
- float weights_dv[PATCH_MAX_CONTROL_VERTS];
-
- int num_control = patch_eval_control_verts(kg, sd->object, patch, u, v, channel,
- indices, weights, weights_du, weights_dv);
-
- float3 val = make_float3(0.0f, 0.0f, 0.0f);
- if(du) *du = make_float3(0.0f, 0.0f, 0.0f);
- if(dv) *dv = make_float3(0.0f, 0.0f, 0.0f);
-
- for(int i = 0; i < num_control; i++) {
- float3 v = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + indices[i]));
-
- val += v * weights[i];
- if(du) *du += v * weights_du[i];
- if(dv) *dv += v * weights_dv[i];
- }
-
- return val;
+ int indices[PATCH_MAX_CONTROL_VERTS];
+ float weights[PATCH_MAX_CONTROL_VERTS];
+ float weights_du[PATCH_MAX_CONTROL_VERTS];
+ float weights_dv[PATCH_MAX_CONTROL_VERTS];
+
+ int num_control = patch_eval_control_verts(
+ kg, sd->object, patch, u, v, channel, indices, weights, weights_du, weights_dv);
+
+ float3 val = make_float3(0.0f, 0.0f, 0.0f);
+ if (du)
+ *du = make_float3(0.0f, 0.0f, 0.0f);
+ if (dv)
+ *dv = make_float3(0.0f, 0.0f, 0.0f);
+
+ for (int i = 0; i < num_control; i++) {
+ float3 v = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + indices[i]));
+
+ val += v * weights[i];
+ if (du)
+ *du += v * weights_du[i];
+ if (dv)
+ *dv += v * weights_dv[i];
+ }
+
+ return val;
}
-ccl_device float3 patch_eval_uchar4(KernelGlobals *kg, const ShaderData *sd, int offset,
- int patch, float u, float v, int channel,
- float3 *du, float3 *dv)
+ccl_device float3 patch_eval_uchar4(KernelGlobals *kg,
+ const ShaderData *sd,
+ int offset,
+ int patch,
+ float u,
+ float v,
+ int channel,
+ float3 *du,
+ float3 *dv)
{
- int indices[PATCH_MAX_CONTROL_VERTS];
- float weights[PATCH_MAX_CONTROL_VERTS];
- float weights_du[PATCH_MAX_CONTROL_VERTS];
- float weights_dv[PATCH_MAX_CONTROL_VERTS];
-
- int num_control = patch_eval_control_verts(kg, sd->object, patch, u, v, channel,
- indices, weights, weights_du, weights_dv);
-
- float3 val = make_float3(0.0f, 0.0f, 0.0f);
- if(du) *du = make_float3(0.0f, 0.0f, 0.0f);
- if(dv) *dv = make_float3(0.0f, 0.0f, 0.0f);
-
- for(int i = 0; i < num_control; i++) {
- float3 v = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, offset + indices[i]));
-
- val += v * weights[i];
- if(du) *du += v * weights_du[i];
- if(dv) *dv += v * weights_dv[i];
- }
-
- return val;
+ int indices[PATCH_MAX_CONTROL_VERTS];
+ float weights[PATCH_MAX_CONTROL_VERTS];
+ float weights_du[PATCH_MAX_CONTROL_VERTS];
+ float weights_dv[PATCH_MAX_CONTROL_VERTS];
+
+ int num_control = patch_eval_control_verts(
+ kg, sd->object, patch, u, v, channel, indices, weights, weights_du, weights_dv);
+
+ float3 val = make_float3(0.0f, 0.0f, 0.0f);
+ if (du)
+ *du = make_float3(0.0f, 0.0f, 0.0f);
+ if (dv)
+ *dv = make_float3(0.0f, 0.0f, 0.0f);
+
+ for (int i = 0; i < num_control; i++) {
+ float3 v = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, offset + indices[i]));
+
+ val += v * weights[i];
+ if (du)
+ *du += v * weights_du[i];
+ if (dv)
+ *dv += v * weights_dv[i];
+ }
+
+ return val;
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_primitive.h b/intern/cycles/kernel/geom/geom_primitive.h
index 95d9d1050fb..7f2b52a24c4 100644
--- a/intern/cycles/kernel/geom/geom_primitive.h
+++ b/intern/cycles/kernel/geom/geom_primitive.h
@@ -22,57 +22,59 @@
CCL_NAMESPACE_BEGIN
/* Generic primitive attribute reading functions */
-ccl_device_inline float primitive_attribute_float(KernelGlobals *kg,
- const ShaderData *sd,
- const AttributeDescriptor desc,
- float *dx, float *dy)
+ccl_device_inline float primitive_attribute_float(
+ KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy)
{
- if(sd->type & PRIMITIVE_ALL_TRIANGLE) {
- if(subd_triangle_patch(kg, sd) == ~0)
- return triangle_attribute_float(kg, sd, desc, dx, dy);
- else
- return subd_triangle_attribute_float(kg, sd, desc, dx, dy);
- }
+ if (sd->type & PRIMITIVE_ALL_TRIANGLE) {
+ if (subd_triangle_patch(kg, sd) == ~0)
+ return triangle_attribute_float(kg, sd, desc, dx, dy);
+ else
+ return subd_triangle_attribute_float(kg, sd, desc, dx, dy);
+ }
#ifdef __HAIR__
- else if(sd->type & PRIMITIVE_ALL_CURVE) {
- return curve_attribute_float(kg, sd, desc, dx, dy);
- }
+ else if (sd->type & PRIMITIVE_ALL_CURVE) {
+ return curve_attribute_float(kg, sd, desc, dx, dy);
+ }
#endif
#ifdef __VOLUME__
- else if(sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) {
- if(dx) *dx = 0.0f;
- if(dy) *dy = 0.0f;
- return volume_attribute_float(kg, sd, desc);
- }
+ else if (sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) {
+ if (dx)
+ *dx = 0.0f;
+ if (dy)
+ *dy = 0.0f;
+ return volume_attribute_float(kg, sd, desc);
+ }
#endif
- else {
- if(dx) *dx = 0.0f;
- if(dy) *dy = 0.0f;
- return 0.0f;
- }
+ else {
+ if (dx)
+ *dx = 0.0f;
+ if (dy)
+ *dy = 0.0f;
+ return 0.0f;
+ }
}
-ccl_device_inline float primitive_surface_attribute_float(KernelGlobals *kg,
- const ShaderData *sd,
- const AttributeDescriptor desc,
- float *dx, float *dy)
+ccl_device_inline float primitive_surface_attribute_float(
+ KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy)
{
- if(sd->type & PRIMITIVE_ALL_TRIANGLE) {
- if(subd_triangle_patch(kg, sd) == ~0)
- return triangle_attribute_float(kg, sd, desc, dx, dy);
- else
- return subd_triangle_attribute_float(kg, sd, desc, dx, dy);
- }
+ if (sd->type & PRIMITIVE_ALL_TRIANGLE) {
+ if (subd_triangle_patch(kg, sd) == ~0)
+ return triangle_attribute_float(kg, sd, desc, dx, dy);
+ else
+ return subd_triangle_attribute_float(kg, sd, desc, dx, dy);
+ }
#ifdef __HAIR__
- else if(sd->type & PRIMITIVE_ALL_CURVE) {
- return curve_attribute_float(kg, sd, desc, dx, dy);
- }
+ else if (sd->type & PRIMITIVE_ALL_CURVE) {
+ return curve_attribute_float(kg, sd, desc, dx, dy);
+ }
#endif
- else {
- if(dx) *dx = 0.0f;
- if(dy) *dy = 0.0f;
- return 0.0f;
- }
+ else {
+ if (dx)
+ *dx = 0.0f;
+ if (dy)
+ *dy = 0.0f;
+ return 0.0f;
+ }
}
#ifdef __VOLUME__
@@ -80,120 +82,136 @@ ccl_device_inline float primitive_volume_attribute_float(KernelGlobals *kg,
const ShaderData *sd,
const AttributeDescriptor desc)
{
- if(sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) {
- return volume_attribute_float(kg, sd, desc);
- }
- else {
- return 0.0f;
- }
+ if (sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) {
+ return volume_attribute_float(kg, sd, desc);
+ }
+ else {
+ return 0.0f;
+ }
}
#endif
ccl_device_inline float2 primitive_attribute_float2(KernelGlobals *kg,
const ShaderData *sd,
const AttributeDescriptor desc,
- float2 *dx, float2 *dy)
+ float2 *dx,
+ float2 *dy)
{
- if(sd->type & PRIMITIVE_ALL_TRIANGLE) {
- if(subd_triangle_patch(kg, sd) == ~0)
- return triangle_attribute_float2(kg, sd, desc, dx, dy);
- else
- return subd_triangle_attribute_float2(kg, sd, desc, dx, dy);
- }
+ if (sd->type & PRIMITIVE_ALL_TRIANGLE) {
+ if (subd_triangle_patch(kg, sd) == ~0)
+ return triangle_attribute_float2(kg, sd, desc, dx, dy);
+ else
+ return subd_triangle_attribute_float2(kg, sd, desc, dx, dy);
+ }
#ifdef __HAIR__
- else if(sd->type & PRIMITIVE_ALL_CURVE) {
- return curve_attribute_float2(kg, sd, desc, dx, dy);
- }
+ else if (sd->type & PRIMITIVE_ALL_CURVE) {
+ return curve_attribute_float2(kg, sd, desc, dx, dy);
+ }
#endif
#ifdef __VOLUME__
- else if(sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) {
- kernel_assert(0);
- if(dx) *dx = make_float2(0.0f, 0.0f);
- if(dy) *dy = make_float2(0.0f, 0.0f);
- return make_float2(0.0f, 0.0f);
- }
+ else if (sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) {
+ kernel_assert(0);
+ if (dx)
+ *dx = make_float2(0.0f, 0.0f);
+ if (dy)
+ *dy = make_float2(0.0f, 0.0f);
+ return make_float2(0.0f, 0.0f);
+ }
#endif
- else {
- if(dx) *dx = make_float2(0.0f, 0.0f);
- if(dy) *dy = make_float2(0.0f, 0.0f);
- return make_float2(0.0f, 0.0f);
- }
+ else {
+ if (dx)
+ *dx = make_float2(0.0f, 0.0f);
+ if (dy)
+ *dy = make_float2(0.0f, 0.0f);
+ return make_float2(0.0f, 0.0f);
+ }
}
ccl_device_inline float3 primitive_attribute_float3(KernelGlobals *kg,
const ShaderData *sd,
const AttributeDescriptor desc,
- float3 *dx, float3 *dy)
+ float3 *dx,
+ float3 *dy)
{
- if(sd->type & PRIMITIVE_ALL_TRIANGLE) {
- if(subd_triangle_patch(kg, sd) == ~0)
- return triangle_attribute_float3(kg, sd, desc, dx, dy);
- else
- return subd_triangle_attribute_float3(kg, sd, desc, dx, dy);
- }
+ if (sd->type & PRIMITIVE_ALL_TRIANGLE) {
+ if (subd_triangle_patch(kg, sd) == ~0)
+ return triangle_attribute_float3(kg, sd, desc, dx, dy);
+ else
+ return subd_triangle_attribute_float3(kg, sd, desc, dx, dy);
+ }
#ifdef __HAIR__
- else if(sd->type & PRIMITIVE_ALL_CURVE) {
- return curve_attribute_float3(kg, sd, desc, dx, dy);
- }
+ else if (sd->type & PRIMITIVE_ALL_CURVE) {
+ return curve_attribute_float3(kg, sd, desc, dx, dy);
+ }
#endif
#ifdef __VOLUME__
- else if(sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) {
- if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
- if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
- return volume_attribute_float3(kg, sd, desc);
- }
+ else if (sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) {
+ if (dx)
+ *dx = make_float3(0.0f, 0.0f, 0.0f);
+ if (dy)
+ *dy = make_float3(0.0f, 0.0f, 0.0f);
+ return volume_attribute_float3(kg, sd, desc);
+ }
#endif
- else {
- if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
- if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
- return make_float3(0.0f, 0.0f, 0.0f);
- }
+ else {
+ if (dx)
+ *dx = make_float3(0.0f, 0.0f, 0.0f);
+ if (dy)
+ *dy = make_float3(0.0f, 0.0f, 0.0f);
+ return make_float3(0.0f, 0.0f, 0.0f);
+ }
}
ccl_device_inline float2 primitive_surface_attribute_float2(KernelGlobals *kg,
const ShaderData *sd,
const AttributeDescriptor desc,
- float2 *dx, float2 *dy)
+ float2 *dx,
+ float2 *dy)
{
- if(sd->type & PRIMITIVE_ALL_TRIANGLE) {
- if(subd_triangle_patch(kg, sd) == ~0)
- return triangle_attribute_float2(kg, sd, desc, dx, dy);
- else
- return subd_triangle_attribute_float2(kg, sd, desc, dx, dy);
- }
+ if (sd->type & PRIMITIVE_ALL_TRIANGLE) {
+ if (subd_triangle_patch(kg, sd) == ~0)
+ return triangle_attribute_float2(kg, sd, desc, dx, dy);
+ else
+ return subd_triangle_attribute_float2(kg, sd, desc, dx, dy);
+ }
#ifdef __HAIR__
- else if(sd->type & PRIMITIVE_ALL_CURVE) {
- return curve_attribute_float2(kg, sd, desc, dx, dy);
- }
+ else if (sd->type & PRIMITIVE_ALL_CURVE) {
+ return curve_attribute_float2(kg, sd, desc, dx, dy);
+ }
#endif
- else {
- if(dx) *dx = make_float2(0.0f, 0.0f);
- if(dy) *dy = make_float2(0.0f, 0.0f);
- return make_float2(0.0f, 0.0f);
- }
+ else {
+ if (dx)
+ *dx = make_float2(0.0f, 0.0f);
+ if (dy)
+ *dy = make_float2(0.0f, 0.0f);
+ return make_float2(0.0f, 0.0f);
+ }
}
ccl_device_inline float3 primitive_surface_attribute_float3(KernelGlobals *kg,
const ShaderData *sd,
const AttributeDescriptor desc,
- float3 *dx, float3 *dy)
+ float3 *dx,
+ float3 *dy)
{
- if(sd->type & PRIMITIVE_ALL_TRIANGLE) {
- if(subd_triangle_patch(kg, sd) == ~0)
- return triangle_attribute_float3(kg, sd, desc, dx, dy);
- else
- return subd_triangle_attribute_float3(kg, sd, desc, dx, dy);
- }
+ if (sd->type & PRIMITIVE_ALL_TRIANGLE) {
+ if (subd_triangle_patch(kg, sd) == ~0)
+ return triangle_attribute_float3(kg, sd, desc, dx, dy);
+ else
+ return subd_triangle_attribute_float3(kg, sd, desc, dx, dy);
+ }
#ifdef __HAIR__
- else if(sd->type & PRIMITIVE_ALL_CURVE) {
- return curve_attribute_float3(kg, sd, desc, dx, dy);
- }
+ else if (sd->type & PRIMITIVE_ALL_CURVE) {
+ return curve_attribute_float3(kg, sd, desc, dx, dy);
+ }
#endif
- else {
- if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
- if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
- return make_float3(0.0f, 0.0f, 0.0f);
- }
+ else {
+ if (dx)
+ *dx = make_float3(0.0f, 0.0f, 0.0f);
+ if (dy)
+ *dy = make_float3(0.0f, 0.0f, 0.0f);
+ return make_float3(0.0f, 0.0f, 0.0f);
+ }
}
#ifdef __VOLUME__
@@ -201,12 +219,12 @@ ccl_device_inline float3 primitive_volume_attribute_float3(KernelGlobals *kg,
const ShaderData *sd,
const AttributeDescriptor desc)
{
- if(sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) {
- return volume_attribute_float3(kg, sd, desc);
- }
- else {
- return make_float3(0.0f, 0.0f, 0.0f);
- }
+ if (sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) {
+ return volume_attribute_float3(kg, sd, desc);
+ }
+ else {
+ return make_float3(0.0f, 0.0f, 0.0f);
+ }
}
#endif
@@ -214,33 +232,33 @@ ccl_device_inline float3 primitive_volume_attribute_float3(KernelGlobals *kg,
ccl_device_inline float3 primitive_uv(KernelGlobals *kg, ShaderData *sd)
{
- const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_UV);
+ const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_UV);
- if(desc.offset == ATTR_STD_NOT_FOUND)
- return make_float3(0.0f, 0.0f, 0.0f);
+ if (desc.offset == ATTR_STD_NOT_FOUND)
+ return make_float3(0.0f, 0.0f, 0.0f);
- float2 uv = primitive_surface_attribute_float2(kg, sd, desc, NULL, NULL);
- return make_float3(uv.x, uv.y, 1.0f);
+ float2 uv = primitive_surface_attribute_float2(kg, sd, desc, NULL, NULL);
+ return make_float3(uv.x, uv.y, 1.0f);
}
/* Ptex coordinates */
ccl_device bool primitive_ptex(KernelGlobals *kg, ShaderData *sd, float2 *uv, int *face_id)
{
- /* storing ptex data as attributes is not memory efficient but simple for tests */
- const AttributeDescriptor desc_face_id = find_attribute(kg, sd, ATTR_STD_PTEX_FACE_ID);
- const AttributeDescriptor desc_uv = find_attribute(kg, sd, ATTR_STD_PTEX_UV);
+ /* storing ptex data as attributes is not memory efficient but simple for tests */
+ const AttributeDescriptor desc_face_id = find_attribute(kg, sd, ATTR_STD_PTEX_FACE_ID);
+ const AttributeDescriptor desc_uv = find_attribute(kg, sd, ATTR_STD_PTEX_UV);
- if(desc_face_id.offset == ATTR_STD_NOT_FOUND || desc_uv.offset == ATTR_STD_NOT_FOUND)
- return false;
+ if (desc_face_id.offset == ATTR_STD_NOT_FOUND || desc_uv.offset == ATTR_STD_NOT_FOUND)
+ return false;
- float3 uv3 = primitive_surface_attribute_float3(kg, sd, desc_uv, NULL, NULL);
- float face_id_f = primitive_surface_attribute_float(kg, sd, desc_face_id, NULL, NULL);
+ float3 uv3 = primitive_surface_attribute_float3(kg, sd, desc_uv, NULL, NULL);
+ float face_id_f = primitive_surface_attribute_float(kg, sd, desc_face_id, NULL, NULL);
- *uv = make_float2(uv3.x, uv3.y);
- *face_id = (int)face_id_f;
+ *uv = make_float2(uv3.x, uv3.y);
+ *face_id = (int)face_id_f;
- return true;
+ return true;
}
/* Surface tangent */
@@ -248,125 +266,125 @@ ccl_device bool primitive_ptex(KernelGlobals *kg, ShaderData *sd, float2 *uv, in
ccl_device float3 primitive_tangent(KernelGlobals *kg, ShaderData *sd)
{
#ifdef __HAIR__
- if(sd->type & PRIMITIVE_ALL_CURVE)
+ if (sd->type & PRIMITIVE_ALL_CURVE)
# ifdef __DPDU__
- return normalize(sd->dPdu);
+ return normalize(sd->dPdu);
# else
- return make_float3(0.0f, 0.0f, 0.0f);
+ return make_float3(0.0f, 0.0f, 0.0f);
# endif
#endif
- /* try to create spherical tangent from generated coordinates */
- const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_GENERATED);
-
- if(desc.offset != ATTR_STD_NOT_FOUND) {
- float3 data = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL);
- data = make_float3(-(data.y - 0.5f), (data.x - 0.5f), 0.0f);
- object_normal_transform(kg, sd, &data);
- return cross(sd->N, normalize(cross(data, sd->N)));
- }
- else {
- /* otherwise use surface derivatives */
+ /* try to create spherical tangent from generated coordinates */
+ const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_GENERATED);
+
+ if (desc.offset != ATTR_STD_NOT_FOUND) {
+ float3 data = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL);
+ data = make_float3(-(data.y - 0.5f), (data.x - 0.5f), 0.0f);
+ object_normal_transform(kg, sd, &data);
+ return cross(sd->N, normalize(cross(data, sd->N)));
+ }
+ else {
+ /* otherwise use surface derivatives */
#ifdef __DPDU__
- return normalize(sd->dPdu);
+ return normalize(sd->dPdu);
#else
- return make_float3(0.0f, 0.0f, 0.0f);
+ return make_float3(0.0f, 0.0f, 0.0f);
#endif
- }
+ }
}
/* Motion vector for motion pass */
ccl_device_inline float4 primitive_motion_vector(KernelGlobals *kg, ShaderData *sd)
{
- /* center position */
- float3 center;
+ /* center position */
+ float3 center;
#ifdef __HAIR__
- bool is_curve_primitive = sd->type & PRIMITIVE_ALL_CURVE;
- if(is_curve_primitive) {
- center = curve_motion_center_location(kg, sd);
-
- if(!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
- object_position_transform(kg, sd, &center);
- }
- }
- else
+ bool is_curve_primitive = sd->type & PRIMITIVE_ALL_CURVE;
+ if (is_curve_primitive) {
+ center = curve_motion_center_location(kg, sd);
+
+ if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+ object_position_transform(kg, sd, &center);
+ }
+ }
+ else
#endif
- center = sd->P;
+ center = sd->P;
- float3 motion_pre = center, motion_post = center;
+ float3 motion_pre = center, motion_post = center;
- /* deformation motion */
- AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_MOTION_VERTEX_POSITION);
+ /* deformation motion */
+ AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_MOTION_VERTEX_POSITION);
- if(desc.offset != ATTR_STD_NOT_FOUND) {
- /* get motion info */
- int numverts, numkeys;
- object_motion_info(kg, sd->object, NULL, &numverts, &numkeys);
+ if (desc.offset != ATTR_STD_NOT_FOUND) {
+ /* get motion info */
+ int numverts, numkeys;
+ object_motion_info(kg, sd->object, NULL, &numverts, &numkeys);
- /* lookup attributes */
- motion_pre = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL);
+ /* lookup attributes */
+ motion_pre = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL);
- desc.offset += (sd->type & PRIMITIVE_ALL_TRIANGLE)? numverts: numkeys;
- motion_post = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL);
+ desc.offset += (sd->type & PRIMITIVE_ALL_TRIANGLE) ? numverts : numkeys;
+ motion_post = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL);
#ifdef __HAIR__
- if(is_curve_primitive && (sd->object_flag & SD_OBJECT_HAS_VERTEX_MOTION) == 0) {
- object_position_transform(kg, sd, &motion_pre);
- object_position_transform(kg, sd, &motion_post);
- }
+ if (is_curve_primitive && (sd->object_flag & SD_OBJECT_HAS_VERTEX_MOTION) == 0) {
+ object_position_transform(kg, sd, &motion_pre);
+ object_position_transform(kg, sd, &motion_post);
+ }
#endif
- }
-
- /* object motion. note that depending on the mesh having motion vectors, this
- * transformation was set match the world/object space of motion_pre/post */
- Transform tfm;
-
- tfm = object_fetch_motion_pass_transform(kg, sd->object, OBJECT_PASS_MOTION_PRE);
- motion_pre = transform_point(&tfm, motion_pre);
-
- tfm = object_fetch_motion_pass_transform(kg, sd->object, OBJECT_PASS_MOTION_POST);
- motion_post = transform_point(&tfm, motion_post);
-
- float3 motion_center;
-
- /* camera motion, for perspective/orthographic motion.pre/post will be a
- * world-to-raster matrix, for panorama it's world-to-camera */
- if(kernel_data.cam.type != CAMERA_PANORAMA) {
- ProjectionTransform projection = kernel_data.cam.worldtoraster;
- motion_center = transform_perspective(&projection, center);
-
- projection = kernel_data.cam.perspective_pre;
- motion_pre = transform_perspective(&projection, motion_pre);
-
- projection = kernel_data.cam.perspective_post;
- motion_post = transform_perspective(&projection, motion_post);
- }
- else {
- tfm = kernel_data.cam.worldtocamera;
- motion_center = normalize(transform_point(&tfm, center));
- motion_center = float2_to_float3(direction_to_panorama(&kernel_data.cam, motion_center));
- motion_center.x *= kernel_data.cam.width;
- motion_center.y *= kernel_data.cam.height;
-
- tfm = kernel_data.cam.motion_pass_pre;
- motion_pre = normalize(transform_point(&tfm, motion_pre));
- motion_pre = float2_to_float3(direction_to_panorama(&kernel_data.cam, motion_pre));
- motion_pre.x *= kernel_data.cam.width;
- motion_pre.y *= kernel_data.cam.height;
-
- tfm = kernel_data.cam.motion_pass_post;
- motion_post = normalize(transform_point(&tfm, motion_post));
- motion_post = float2_to_float3(direction_to_panorama(&kernel_data.cam, motion_post));
- motion_post.x *= kernel_data.cam.width;
- motion_post.y *= kernel_data.cam.height;
- }
-
- motion_pre = motion_pre - motion_center;
- motion_post = motion_center - motion_post;
-
- return make_float4(motion_pre.x, motion_pre.y, motion_post.x, motion_post.y);
+ }
+
+ /* object motion. note that depending on the mesh having motion vectors, this
+ * transformation was set match the world/object space of motion_pre/post */
+ Transform tfm;
+
+ tfm = object_fetch_motion_pass_transform(kg, sd->object, OBJECT_PASS_MOTION_PRE);
+ motion_pre = transform_point(&tfm, motion_pre);
+
+ tfm = object_fetch_motion_pass_transform(kg, sd->object, OBJECT_PASS_MOTION_POST);
+ motion_post = transform_point(&tfm, motion_post);
+
+ float3 motion_center;
+
+ /* camera motion, for perspective/orthographic motion.pre/post will be a
+ * world-to-raster matrix, for panorama it's world-to-camera */
+ if (kernel_data.cam.type != CAMERA_PANORAMA) {
+ ProjectionTransform projection = kernel_data.cam.worldtoraster;
+ motion_center = transform_perspective(&projection, center);
+
+ projection = kernel_data.cam.perspective_pre;
+ motion_pre = transform_perspective(&projection, motion_pre);
+
+ projection = kernel_data.cam.perspective_post;
+ motion_post = transform_perspective(&projection, motion_post);
+ }
+ else {
+ tfm = kernel_data.cam.worldtocamera;
+ motion_center = normalize(transform_point(&tfm, center));
+ motion_center = float2_to_float3(direction_to_panorama(&kernel_data.cam, motion_center));
+ motion_center.x *= kernel_data.cam.width;
+ motion_center.y *= kernel_data.cam.height;
+
+ tfm = kernel_data.cam.motion_pass_pre;
+ motion_pre = normalize(transform_point(&tfm, motion_pre));
+ motion_pre = float2_to_float3(direction_to_panorama(&kernel_data.cam, motion_pre));
+ motion_pre.x *= kernel_data.cam.width;
+ motion_pre.y *= kernel_data.cam.height;
+
+ tfm = kernel_data.cam.motion_pass_post;
+ motion_post = normalize(transform_point(&tfm, motion_post));
+ motion_post = float2_to_float3(direction_to_panorama(&kernel_data.cam, motion_post));
+ motion_post.x *= kernel_data.cam.width;
+ motion_post.y *= kernel_data.cam.height;
+ }
+
+ motion_pre = motion_pre - motion_center;
+ motion_post = motion_center - motion_post;
+
+ return make_float4(motion_pre.x, motion_pre.y, motion_post.x, motion_post.y);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_subd_triangle.h b/intern/cycles/kernel/geom/geom_subd_triangle.h
index 251e070c21f..8d5b3c12833 100644
--- a/intern/cycles/kernel/geom/geom_subd_triangle.h
+++ b/intern/cycles/kernel/geom/geom_subd_triangle.h
@@ -22,455 +22,492 @@ CCL_NAMESPACE_BEGIN
ccl_device_inline uint subd_triangle_patch(KernelGlobals *kg, const ShaderData *sd)
{
- return (sd->prim != PRIM_NONE) ? kernel_tex_fetch(__tri_patch, sd->prim) : ~0;
+ return (sd->prim != PRIM_NONE) ? kernel_tex_fetch(__tri_patch, sd->prim) : ~0;
}
/* UV coords of triangle within patch */
-ccl_device_inline void subd_triangle_patch_uv(KernelGlobals *kg, const ShaderData *sd, float2 uv[3])
+ccl_device_inline void subd_triangle_patch_uv(KernelGlobals *kg,
+ const ShaderData *sd,
+ float2 uv[3])
{
- uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
+ uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
- uv[0] = kernel_tex_fetch(__tri_patch_uv, tri_vindex.x);
- uv[1] = kernel_tex_fetch(__tri_patch_uv, tri_vindex.y);
- uv[2] = kernel_tex_fetch(__tri_patch_uv, tri_vindex.z);
+ uv[0] = kernel_tex_fetch(__tri_patch_uv, tri_vindex.x);
+ uv[1] = kernel_tex_fetch(__tri_patch_uv, tri_vindex.y);
+ uv[2] = kernel_tex_fetch(__tri_patch_uv, tri_vindex.z);
}
/* Vertex indices of patch */
ccl_device_inline uint4 subd_triangle_patch_indices(KernelGlobals *kg, int patch)
{
- uint4 indices;
+ uint4 indices;
- indices.x = kernel_tex_fetch(__patches, patch+0);
- indices.y = kernel_tex_fetch(__patches, patch+1);
- indices.z = kernel_tex_fetch(__patches, patch+2);
- indices.w = kernel_tex_fetch(__patches, patch+3);
+ indices.x = kernel_tex_fetch(__patches, patch + 0);
+ indices.y = kernel_tex_fetch(__patches, patch + 1);
+ indices.z = kernel_tex_fetch(__patches, patch + 2);
+ indices.w = kernel_tex_fetch(__patches, patch + 3);
- return indices;
+ return indices;
}
/* Originating face for patch */
ccl_device_inline uint subd_triangle_patch_face(KernelGlobals *kg, int patch)
{
- return kernel_tex_fetch(__patches, patch+4);
+ return kernel_tex_fetch(__patches, patch + 4);
}
/* Number of corners on originating face */
ccl_device_inline uint subd_triangle_patch_num_corners(KernelGlobals *kg, int patch)
{
- return kernel_tex_fetch(__patches, patch+5) & 0xffff;
+ return kernel_tex_fetch(__patches, patch + 5) & 0xffff;
}
/* Indices of the four corners that are used by the patch */
ccl_device_inline void subd_triangle_patch_corners(KernelGlobals *kg, int patch, int corners[4])
{
- uint4 data;
-
- data.x = kernel_tex_fetch(__patches, patch+4);
- data.y = kernel_tex_fetch(__patches, patch+5);
- data.z = kernel_tex_fetch(__patches, patch+6);
- data.w = kernel_tex_fetch(__patches, patch+7);
-
- int num_corners = data.y & 0xffff;
-
- if(num_corners == 4) {
- /* quad */
- corners[0] = data.z;
- corners[1] = data.z+1;
- corners[2] = data.z+2;
- corners[3] = data.z+3;
- }
- else {
- /* ngon */
- int c = data.y >> 16;
-
- corners[0] = data.z + c;
- corners[1] = data.z + mod(c+1, num_corners);
- corners[2] = data.w;
- corners[3] = data.z + mod(c-1, num_corners);
- }
+ uint4 data;
+
+ data.x = kernel_tex_fetch(__patches, patch + 4);
+ data.y = kernel_tex_fetch(__patches, patch + 5);
+ data.z = kernel_tex_fetch(__patches, patch + 6);
+ data.w = kernel_tex_fetch(__patches, patch + 7);
+
+ int num_corners = data.y & 0xffff;
+
+ if (num_corners == 4) {
+ /* quad */
+ corners[0] = data.z;
+ corners[1] = data.z + 1;
+ corners[2] = data.z + 2;
+ corners[3] = data.z + 3;
+ }
+ else {
+ /* ngon */
+ int c = data.y >> 16;
+
+ corners[0] = data.z + c;
+ corners[1] = data.z + mod(c + 1, num_corners);
+ corners[2] = data.w;
+ corners[3] = data.z + mod(c - 1, num_corners);
+ }
}
/* Reading attributes on various subdivision triangle elements */
-ccl_device_noinline float subd_triangle_attribute_float(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy)
+ccl_device_noinline float subd_triangle_attribute_float(
+ KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy)
{
- int patch = subd_triangle_patch(kg, sd);
+ int patch = subd_triangle_patch(kg, sd);
#ifdef __PATCH_EVAL__
- if(desc.flags & ATTR_SUBDIVIDED) {
- float2 uv[3];
- subd_triangle_patch_uv(kg, sd, uv);
-
- float2 dpdu = uv[0] - uv[2];
- float2 dpdv = uv[1] - uv[2];
-
- /* p is [s, t] */
- float2 p = dpdu * sd->u + dpdv * sd->v + uv[2];
-
- float a, dads, dadt;
- a = patch_eval_float(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt);
+ if (desc.flags & ATTR_SUBDIVIDED) {
+ float2 uv[3];
+ subd_triangle_patch_uv(kg, sd, uv);
+
+ float2 dpdu = uv[0] - uv[2];
+ float2 dpdv = uv[1] - uv[2];
+
+ /* p is [s, t] */
+ float2 p = dpdu * sd->u + dpdv * sd->v + uv[2];
+
+ float a, dads, dadt;
+ a = patch_eval_float(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt);
+
+# ifdef __RAY_DIFFERENTIALS__
+ if (dx || dy) {
+ float dsdu = dpdu.x;
+ float dtdu = dpdu.y;
+ float dsdv = dpdv.x;
+ float dtdv = dpdv.y;
+
+ if (dx) {
+ float dudx = sd->du.dx;
+ float dvdx = sd->dv.dx;
+
+ float dsdx = dsdu * dudx + dsdv * dvdx;
+ float dtdx = dtdu * dudx + dtdv * dvdx;
+
+ *dx = dads * dsdx + dadt * dtdx;
+ }
+ if (dy) {
+ float dudy = sd->du.dy;
+ float dvdy = sd->dv.dy;
+
+ float dsdy = dsdu * dudy + dsdv * dvdy;
+ float dtdy = dtdu * dudy + dtdv * dvdy;
+
+ *dy = dads * dsdy + dadt * dtdy;
+ }
+ }
+# endif
+
+ return a;
+ }
+ else
+#endif /* __PATCH_EVAL__ */
+ if (desc.element == ATTR_ELEMENT_FACE) {
+ if (dx)
+ *dx = 0.0f;
+ if (dy)
+ *dy = 0.0f;
+
+ return kernel_tex_fetch(__attributes_float, desc.offset + subd_triangle_patch_face(kg, patch));
+ }
+ else if (desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) {
+ float2 uv[3];
+ subd_triangle_patch_uv(kg, sd, uv);
+
+ uint4 v = subd_triangle_patch_indices(kg, patch);
+
+ float f0 = kernel_tex_fetch(__attributes_float, desc.offset + v.x);
+ float f1 = kernel_tex_fetch(__attributes_float, desc.offset + v.y);
+ float f2 = kernel_tex_fetch(__attributes_float, desc.offset + v.z);
+ float f3 = kernel_tex_fetch(__attributes_float, desc.offset + v.w);
+
+ if (subd_triangle_patch_num_corners(kg, patch) != 4) {
+ f1 = (f1 + f0) * 0.5f;
+ f3 = (f3 + f0) * 0.5f;
+ }
+
+ float a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
+ float b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
+ float c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
#ifdef __RAY_DIFFERENTIALS__
- if(dx || dy) {
- float dsdu = dpdu.x;
- float dtdu = dpdu.y;
- float dsdv = dpdv.x;
- float dtdv = dpdv.y;
-
- if(dx) {
- float dudx = sd->du.dx;
- float dvdx = sd->dv.dx;
-
- float dsdx = dsdu*dudx + dsdv*dvdx;
- float dtdx = dtdu*dudx + dtdv*dvdx;
-
- *dx = dads*dsdx + dadt*dtdx;
- }
- if(dy) {
- float dudy = sd->du.dy;
- float dvdy = sd->dv.dy;
-
- float dsdy = dsdu*dudy + dsdv*dvdy;
- float dtdy = dtdu*dudy + dtdv*dvdy;
-
- *dy = dads*dsdy + dadt*dtdy;
- }
- }
+ if (dx)
+ *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c;
+ if (dy)
+ *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c;
#endif
- return a;
- }
- else
-#endif /* __PATCH_EVAL__ */
- if(desc.element == ATTR_ELEMENT_FACE) {
- if(dx) *dx = 0.0f;
- if(dy) *dy = 0.0f;
+ return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
+ }
+ else if (desc.element == ATTR_ELEMENT_CORNER) {
+ float2 uv[3];
+ subd_triangle_patch_uv(kg, sd, uv);
- return kernel_tex_fetch(__attributes_float, desc.offset + subd_triangle_patch_face(kg, patch));
- }
- else if(desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) {
- float2 uv[3];
- subd_triangle_patch_uv(kg, sd, uv);
+ int corners[4];
+ subd_triangle_patch_corners(kg, patch, corners);
- uint4 v = subd_triangle_patch_indices(kg, patch);
+ float f0 = kernel_tex_fetch(__attributes_float, corners[0] + desc.offset);
+ float f1 = kernel_tex_fetch(__attributes_float, corners[1] + desc.offset);
+ float f2 = kernel_tex_fetch(__attributes_float, corners[2] + desc.offset);
+ float f3 = kernel_tex_fetch(__attributes_float, corners[3] + desc.offset);
- float f0 = kernel_tex_fetch(__attributes_float, desc.offset + v.x);
- float f1 = kernel_tex_fetch(__attributes_float, desc.offset + v.y);
- float f2 = kernel_tex_fetch(__attributes_float, desc.offset + v.z);
- float f3 = kernel_tex_fetch(__attributes_float, desc.offset + v.w);
+ if (subd_triangle_patch_num_corners(kg, patch) != 4) {
+ f1 = (f1 + f0) * 0.5f;
+ f3 = (f3 + f0) * 0.5f;
+ }
- if(subd_triangle_patch_num_corners(kg, patch) != 4) {
- f1 = (f1+f0)*0.5f;
- f3 = (f3+f0)*0.5f;
- }
-
- float a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
- float b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
- float c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
+ float a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
+ float b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
+ float c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
#ifdef __RAY_DIFFERENTIALS__
- if(dx) *dx = sd->du.dx*a + sd->dv.dx*b - (sd->du.dx + sd->dv.dx)*c;
- if(dy) *dy = sd->du.dy*a + sd->dv.dy*b - (sd->du.dy + sd->dv.dy)*c;
+ if (dx)
+ *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c;
+ if (dy)
+ *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c;
#endif
- return sd->u*a + sd->v*b + (1.0f - sd->u - sd->v)*c;
- }
- else if(desc.element == ATTR_ELEMENT_CORNER) {
- float2 uv[3];
- subd_triangle_patch_uv(kg, sd, uv);
-
- int corners[4];
- subd_triangle_patch_corners(kg, patch, corners);
+ return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
+ }
+ else {
+ if (dx)
+ *dx = 0.0f;
+ if (dy)
+ *dy = 0.0f;
- float f0 = kernel_tex_fetch(__attributes_float, corners[0] + desc.offset);
- float f1 = kernel_tex_fetch(__attributes_float, corners[1] + desc.offset);
- float f2 = kernel_tex_fetch(__attributes_float, corners[2] + desc.offset);
- float f3 = kernel_tex_fetch(__attributes_float, corners[3] + desc.offset);
-
- if(subd_triangle_patch_num_corners(kg, patch) != 4) {
- f1 = (f1+f0)*0.5f;
- f3 = (f3+f0)*0.5f;
- }
-
- float a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
- float b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
- float c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
-
-#ifdef __RAY_DIFFERENTIALS__
- if(dx) *dx = sd->du.dx*a + sd->dv.dx*b - (sd->du.dx + sd->dv.dx)*c;
- if(dy) *dy = sd->du.dy*a + sd->dv.dy*b - (sd->du.dy + sd->dv.dy)*c;
-#endif
-
- return sd->u*a + sd->v*b + (1.0f - sd->u - sd->v)*c;
- }
- else {
- if(dx) *dx = 0.0f;
- if(dy) *dy = 0.0f;
-
- return 0.0f;
- }
+ return 0.0f;
+ }
}
-ccl_device_noinline float2 subd_triangle_attribute_float2(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float2 *dx, float2 *dy)
+ccl_device_noinline float2 subd_triangle_attribute_float2(KernelGlobals *kg,
+ const ShaderData *sd,
+ const AttributeDescriptor desc,
+ float2 *dx,
+ float2 *dy)
{
- int patch = subd_triangle_patch(kg, sd);
+ int patch = subd_triangle_patch(kg, sd);
#ifdef __PATCH_EVAL__
- if(desc.flags & ATTR_SUBDIVIDED) {
- float2 uv[3];
- subd_triangle_patch_uv(kg, sd, uv);
-
- float2 dpdu = uv[0] - uv[2];
- float2 dpdv = uv[1] - uv[2];
-
- /* p is [s, t] */
- float2 p = dpdu * sd->u + dpdv * sd->v + uv[2];
-
- float2 a, dads, dadt;
-
- a = patch_eval_float2(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt);
-
-#ifdef __RAY_DIFFERENTIALS__
- if(dx || dy) {
- float dsdu = dpdu.x;
- float dtdu = dpdu.y;
- float dsdv = dpdv.x;
- float dtdv = dpdv.y;
-
- if(dx) {
- float dudx = sd->du.dx;
- float dvdx = sd->dv.dx;
-
- float dsdx = dsdu*dudx + dsdv*dvdx;
- float dtdx = dtdu*dudx + dtdv*dvdx;
-
- *dx = dads*dsdx + dadt*dtdx;
- }
- if(dy) {
- float dudy = sd->du.dy;
- float dvdy = sd->dv.dy;
-
- float dsdy = dsdu*dudy + dsdv*dvdy;
- float dtdy = dtdu*dudy + dtdv*dvdy;
-
- *dy = dads*dsdy + dadt*dtdy;
- }
- }
-#endif
-
- return a;
- }
- else
-#endif /* __PATCH_EVAL__ */
- if(desc.element == ATTR_ELEMENT_FACE) {
- if(dx) *dx = make_float2(0.0f, 0.0f);
- if(dy) *dy = make_float2(0.0f, 0.0f);
-
- return kernel_tex_fetch(__attributes_float2, desc.offset + subd_triangle_patch_face(kg, patch));
- }
- else if(desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) {
- float2 uv[3];
- subd_triangle_patch_uv(kg, sd, uv);
-
- uint4 v = subd_triangle_patch_indices(kg, patch);
-
- float2 f0 = kernel_tex_fetch(__attributes_float2, desc.offset + v.x);
- float2 f1 = kernel_tex_fetch(__attributes_float2, desc.offset + v.y);
- float2 f2 = kernel_tex_fetch(__attributes_float2, desc.offset + v.z);
- float2 f3 = kernel_tex_fetch(__attributes_float2, desc.offset + v.w);
-
- if(subd_triangle_patch_num_corners(kg, patch) != 4) {
- f1 = (f1+f0)*0.5f;
- f3 = (f3+f0)*0.5f;
- }
-
- float2 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
- float2 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
- float2 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
+ if (desc.flags & ATTR_SUBDIVIDED) {
+ float2 uv[3];
+ subd_triangle_patch_uv(kg, sd, uv);
+
+ float2 dpdu = uv[0] - uv[2];
+ float2 dpdv = uv[1] - uv[2];
+
+ /* p is [s, t] */
+ float2 p = dpdu * sd->u + dpdv * sd->v + uv[2];
+
+ float2 a, dads, dadt;
+
+ a = patch_eval_float2(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt);
+
+# ifdef __RAY_DIFFERENTIALS__
+ if (dx || dy) {
+ float dsdu = dpdu.x;
+ float dtdu = dpdu.y;
+ float dsdv = dpdv.x;
+ float dtdv = dpdv.y;
+
+ if (dx) {
+ float dudx = sd->du.dx;
+ float dvdx = sd->dv.dx;
+
+ float dsdx = dsdu * dudx + dsdv * dvdx;
+ float dtdx = dtdu * dudx + dtdv * dvdx;
+
+ *dx = dads * dsdx + dadt * dtdx;
+ }
+ if (dy) {
+ float dudy = sd->du.dy;
+ float dvdy = sd->dv.dy;
+
+ float dsdy = dsdu * dudy + dsdv * dvdy;
+ float dtdy = dtdu * dudy + dtdv * dvdy;
+
+ *dy = dads * dsdy + dadt * dtdy;
+ }
+ }
+# endif
+
+ return a;
+ }
+ else
+#endif /* __PATCH_EVAL__ */
+ if (desc.element == ATTR_ELEMENT_FACE) {
+ if (dx)
+ *dx = make_float2(0.0f, 0.0f);
+ if (dy)
+ *dy = make_float2(0.0f, 0.0f);
+
+ return kernel_tex_fetch(__attributes_float2,
+ desc.offset + subd_triangle_patch_face(kg, patch));
+ }
+ else if (desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) {
+ float2 uv[3];
+ subd_triangle_patch_uv(kg, sd, uv);
+
+ uint4 v = subd_triangle_patch_indices(kg, patch);
+
+ float2 f0 = kernel_tex_fetch(__attributes_float2, desc.offset + v.x);
+ float2 f1 = kernel_tex_fetch(__attributes_float2, desc.offset + v.y);
+ float2 f2 = kernel_tex_fetch(__attributes_float2, desc.offset + v.z);
+ float2 f3 = kernel_tex_fetch(__attributes_float2, desc.offset + v.w);
+
+ if (subd_triangle_patch_num_corners(kg, patch) != 4) {
+ f1 = (f1 + f0) * 0.5f;
+ f3 = (f3 + f0) * 0.5f;
+ }
+
+ float2 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
+ float2 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
+ float2 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
#ifdef __RAY_DIFFERENTIALS__
- if(dx) *dx = sd->du.dx*a + sd->dv.dx*b - (sd->du.dx + sd->dv.dx)*c;
- if(dy) *dy = sd->du.dy*a + sd->dv.dy*b - (sd->du.dy + sd->dv.dy)*c;
+ if (dx)
+ *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c;
+ if (dy)
+ *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c;
#endif
- return sd->u*a + sd->v*b + (1.0f - sd->u - sd->v)*c;
- }
- else if(desc.element == ATTR_ELEMENT_CORNER) {
- float2 uv[3];
- subd_triangle_patch_uv(kg, sd, uv);
+ return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
+ }
+ else if (desc.element == ATTR_ELEMENT_CORNER) {
+ float2 uv[3];
+ subd_triangle_patch_uv(kg, sd, uv);
- int corners[4];
- subd_triangle_patch_corners(kg, patch, corners);
+ int corners[4];
+ subd_triangle_patch_corners(kg, patch, corners);
- float2 f0, f1, f2, f3;
+ float2 f0, f1, f2, f3;
- f0 = kernel_tex_fetch(__attributes_float2, corners[0] + desc.offset);
- f1 = kernel_tex_fetch(__attributes_float2, corners[1] + desc.offset);
- f2 = kernel_tex_fetch(__attributes_float2, corners[2] + desc.offset);
- f3 = kernel_tex_fetch(__attributes_float2, corners[3] + desc.offset);
+ f0 = kernel_tex_fetch(__attributes_float2, corners[0] + desc.offset);
+ f1 = kernel_tex_fetch(__attributes_float2, corners[1] + desc.offset);
+ f2 = kernel_tex_fetch(__attributes_float2, corners[2] + desc.offset);
+ f3 = kernel_tex_fetch(__attributes_float2, corners[3] + desc.offset);
- if(subd_triangle_patch_num_corners(kg, patch) != 4) {
- f1 = (f1+f0)*0.5f;
- f3 = (f3+f0)*0.5f;
- }
+ if (subd_triangle_patch_num_corners(kg, patch) != 4) {
+ f1 = (f1 + f0) * 0.5f;
+ f3 = (f3 + f0) * 0.5f;
+ }
- float2 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
- float2 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
- float2 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
+ float2 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
+ float2 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
+ float2 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
#ifdef __RAY_DIFFERENTIALS__
- if(dx) *dx = sd->du.dx*a + sd->dv.dx*b - (sd->du.dx + sd->dv.dx)*c;
- if(dy) *dy = sd->du.dy*a + sd->dv.dy*b - (sd->du.dy + sd->dv.dy)*c;
+ if (dx)
+ *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c;
+ if (dy)
+ *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c;
#endif
- return sd->u*a + sd->v*b + (1.0f - sd->u - sd->v)*c;
- }
- else {
- if(dx) *dx = make_float2(0.0f, 0.0f);
- if(dy) *dy = make_float2(0.0f, 0.0f);
+ return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
+ }
+ else {
+ if (dx)
+ *dx = make_float2(0.0f, 0.0f);
+ if (dy)
+ *dy = make_float2(0.0f, 0.0f);
- return make_float2(0.0f, 0.0f);
- }
+ return make_float2(0.0f, 0.0f);
+ }
}
-ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float3 *dx, float3 *dy)
+ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals *kg,
+ const ShaderData *sd,
+ const AttributeDescriptor desc,
+ float3 *dx,
+ float3 *dy)
{
- int patch = subd_triangle_patch(kg, sd);
+ int patch = subd_triangle_patch(kg, sd);
#ifdef __PATCH_EVAL__
- if(desc.flags & ATTR_SUBDIVIDED) {
- float2 uv[3];
- subd_triangle_patch_uv(kg, sd, uv);
-
- float2 dpdu = uv[0] - uv[2];
- float2 dpdv = uv[1] - uv[2];
-
- /* p is [s, t] */
- float2 p = dpdu * sd->u + dpdv * sd->v + uv[2];
-
- float3 a, dads, dadt;
-
- if(desc.element == ATTR_ELEMENT_CORNER_BYTE) {
- a = patch_eval_uchar4(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt);
- }
- else {
- a = patch_eval_float3(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt);
- }
-
-#ifdef __RAY_DIFFERENTIALS__
- if(dx || dy) {
- float dsdu = dpdu.x;
- float dtdu = dpdu.y;
- float dsdv = dpdv.x;
- float dtdv = dpdv.y;
-
- if(dx) {
- float dudx = sd->du.dx;
- float dvdx = sd->dv.dx;
-
- float dsdx = dsdu*dudx + dsdv*dvdx;
- float dtdx = dtdu*dudx + dtdv*dvdx;
-
- *dx = dads*dsdx + dadt*dtdx;
- }
- if(dy) {
- float dudy = sd->du.dy;
- float dvdy = sd->dv.dy;
-
- float dsdy = dsdu*dudy + dsdv*dvdy;
- float dtdy = dtdu*dudy + dtdv*dvdy;
-
- *dy = dads*dsdy + dadt*dtdy;
- }
- }
-#endif
-
- return a;
- }
- else
-#endif /* __PATCH_EVAL__ */
- if(desc.element == ATTR_ELEMENT_FACE) {
- if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
- if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
-
- return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + subd_triangle_patch_face(kg, patch)));
- }
- else if(desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) {
- float2 uv[3];
- subd_triangle_patch_uv(kg, sd, uv);
-
- uint4 v = subd_triangle_patch_indices(kg, patch);
-
- float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.x));
- float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.y));
- float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.z));
- float3 f3 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.w));
-
- if(subd_triangle_patch_num_corners(kg, patch) != 4) {
- f1 = (f1+f0)*0.5f;
- f3 = (f3+f0)*0.5f;
- }
-
- float3 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
- float3 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
- float3 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
+ if (desc.flags & ATTR_SUBDIVIDED) {
+ float2 uv[3];
+ subd_triangle_patch_uv(kg, sd, uv);
+
+ float2 dpdu = uv[0] - uv[2];
+ float2 dpdv = uv[1] - uv[2];
+
+ /* p is [s, t] */
+ float2 p = dpdu * sd->u + dpdv * sd->v + uv[2];
+
+ float3 a, dads, dadt;
+
+ if (desc.element == ATTR_ELEMENT_CORNER_BYTE) {
+ a = patch_eval_uchar4(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt);
+ }
+ else {
+ a = patch_eval_float3(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt);
+ }
+
+# ifdef __RAY_DIFFERENTIALS__
+ if (dx || dy) {
+ float dsdu = dpdu.x;
+ float dtdu = dpdu.y;
+ float dsdv = dpdv.x;
+ float dtdv = dpdv.y;
+
+ if (dx) {
+ float dudx = sd->du.dx;
+ float dvdx = sd->dv.dx;
+
+ float dsdx = dsdu * dudx + dsdv * dvdx;
+ float dtdx = dtdu * dudx + dtdv * dvdx;
+
+ *dx = dads * dsdx + dadt * dtdx;
+ }
+ if (dy) {
+ float dudy = sd->du.dy;
+ float dvdy = sd->dv.dy;
+
+ float dsdy = dsdu * dudy + dsdv * dvdy;
+ float dtdy = dtdu * dudy + dtdv * dvdy;
+
+ *dy = dads * dsdy + dadt * dtdy;
+ }
+ }
+# endif
+
+ return a;
+ }
+ else
+#endif /* __PATCH_EVAL__ */
+ if (desc.element == ATTR_ELEMENT_FACE) {
+ if (dx)
+ *dx = make_float3(0.0f, 0.0f, 0.0f);
+ if (dy)
+ *dy = make_float3(0.0f, 0.0f, 0.0f);
+
+ return float4_to_float3(
+ kernel_tex_fetch(__attributes_float3, desc.offset + subd_triangle_patch_face(kg, patch)));
+ }
+ else if (desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) {
+ float2 uv[3];
+ subd_triangle_patch_uv(kg, sd, uv);
+
+ uint4 v = subd_triangle_patch_indices(kg, patch);
+
+ float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.x));
+ float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.y));
+ float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.z));
+ float3 f3 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.w));
+
+ if (subd_triangle_patch_num_corners(kg, patch) != 4) {
+ f1 = (f1 + f0) * 0.5f;
+ f3 = (f3 + f0) * 0.5f;
+ }
+
+ float3 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
+ float3 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
+ float3 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
#ifdef __RAY_DIFFERENTIALS__
- if(dx) *dx = sd->du.dx*a + sd->dv.dx*b - (sd->du.dx + sd->dv.dx)*c;
- if(dy) *dy = sd->du.dy*a + sd->dv.dy*b - (sd->du.dy + sd->dv.dy)*c;
+ if (dx)
+ *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c;
+ if (dy)
+ *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c;
#endif
- return sd->u*a + sd->v*b + (1.0f - sd->u - sd->v)*c;
- }
- else if(desc.element == ATTR_ELEMENT_CORNER || desc.element == ATTR_ELEMENT_CORNER_BYTE) {
- float2 uv[3];
- subd_triangle_patch_uv(kg, sd, uv);
-
- int corners[4];
- subd_triangle_patch_corners(kg, patch, corners);
-
- float3 f0, f1, f2, f3;
-
- if(desc.element == ATTR_ELEMENT_CORNER) {
- f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[0] + desc.offset));
- f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[1] + desc.offset));
- f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[2] + desc.offset));
- f3 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[3] + desc.offset));
- }
- else {
- f0 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[0] + desc.offset));
- f1 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[1] + desc.offset));
- f2 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[2] + desc.offset));
- f3 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[3] + desc.offset));
- }
-
- if(subd_triangle_patch_num_corners(kg, patch) != 4) {
- f1 = (f1+f0)*0.5f;
- f3 = (f3+f0)*0.5f;
- }
-
- float3 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
- float3 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
- float3 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
+ return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
+ }
+ else if (desc.element == ATTR_ELEMENT_CORNER || desc.element == ATTR_ELEMENT_CORNER_BYTE) {
+ float2 uv[3];
+ subd_triangle_patch_uv(kg, sd, uv);
+
+ int corners[4];
+ subd_triangle_patch_corners(kg, patch, corners);
+
+ float3 f0, f1, f2, f3;
+
+ if (desc.element == ATTR_ELEMENT_CORNER) {
+ f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[0] + desc.offset));
+ f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[1] + desc.offset));
+ f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[2] + desc.offset));
+ f3 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[3] + desc.offset));
+ }
+ else {
+ f0 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[0] + desc.offset));
+ f1 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[1] + desc.offset));
+ f2 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[2] + desc.offset));
+ f3 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[3] + desc.offset));
+ }
+
+ if (subd_triangle_patch_num_corners(kg, patch) != 4) {
+ f1 = (f1 + f0) * 0.5f;
+ f3 = (f3 + f0) * 0.5f;
+ }
+
+ float3 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
+ float3 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
+ float3 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
#ifdef __RAY_DIFFERENTIALS__
- if(dx) *dx = sd->du.dx*a + sd->dv.dx*b - (sd->du.dx + sd->dv.dx)*c;
- if(dy) *dy = sd->du.dy*a + sd->dv.dy*b - (sd->du.dy + sd->dv.dy)*c;
+ if (dx)
+ *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c;
+ if (dy)
+ *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c;
#endif
- return sd->u*a + sd->v*b + (1.0f - sd->u - sd->v)*c;
- }
- else {
- if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
- if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
+ return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
+ }
+ else {
+ if (dx)
+ *dx = make_float3(0.0f, 0.0f, 0.0f);
+ if (dy)
+ *dy = make_float3(0.0f, 0.0f, 0.0f);
- return make_float3(0.0f, 0.0f, 0.0f);
- }
+ return make_float3(0.0f, 0.0f, 0.0f);
+ }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_triangle.h b/intern/cycles/kernel/geom/geom_triangle.h
index 300227c38e6..9938c0ba2c3 100644
--- a/intern/cycles/kernel/geom/geom_triangle.h
+++ b/intern/cycles/kernel/geom/geom_triangle.h
@@ -25,227 +25,268 @@ CCL_NAMESPACE_BEGIN
/* normal on triangle */
ccl_device_inline float3 triangle_normal(KernelGlobals *kg, ShaderData *sd)
{
- /* load triangle vertices */
- const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
- const float3 v0 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+0));
- const float3 v1 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+1));
- const float3 v2 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+2));
-
- /* return normal */
- if(sd->object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) {
- return normalize(cross(v2 - v0, v1 - v0));
- }
- else {
- return normalize(cross(v1 - v0, v2 - v0));
- }
+ /* load triangle vertices */
+ const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
+ const float3 v0 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 0));
+ const float3 v1 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 1));
+ const float3 v2 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 2));
+
+ /* return normal */
+ if (sd->object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) {
+ return normalize(cross(v2 - v0, v1 - v0));
+ }
+ else {
+ return normalize(cross(v1 - v0, v2 - v0));
+ }
}
/* point and normal on triangle */
-ccl_device_inline void triangle_point_normal(KernelGlobals *kg, int object, int prim, float u, float v, float3 *P, float3 *Ng, int *shader)
+ccl_device_inline void triangle_point_normal(
+ KernelGlobals *kg, int object, int prim, float u, float v, float3 *P, float3 *Ng, int *shader)
{
- /* load triangle vertices */
- const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
- float3 v0 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+0));
- float3 v1 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+1));
- float3 v2 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+2));
- /* compute point */
- float t = 1.0f - u - v;
- *P = (u*v0 + v*v1 + t*v2);
- /* get object flags */
- int object_flag = kernel_tex_fetch(__object_flag, object);
- /* compute normal */
- if(object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) {
- *Ng = normalize(cross(v2 - v0, v1 - v0));
- }
- else {
- *Ng = normalize(cross(v1 - v0, v2 - v0));
- }
- /* shader`*/
- *shader = kernel_tex_fetch(__tri_shader, prim);
+ /* load triangle vertices */
+ const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
+ float3 v0 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 0));
+ float3 v1 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 1));
+ float3 v2 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 2));
+ /* compute point */
+ float t = 1.0f - u - v;
+ *P = (u * v0 + v * v1 + t * v2);
+ /* get object flags */
+ int object_flag = kernel_tex_fetch(__object_flag, object);
+ /* compute normal */
+ if (object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) {
+ *Ng = normalize(cross(v2 - v0, v1 - v0));
+ }
+ else {
+ *Ng = normalize(cross(v1 - v0, v2 - v0));
+ }
+ /* shader`*/
+ *shader = kernel_tex_fetch(__tri_shader, prim);
}
/* Triangle vertex locations */
ccl_device_inline void triangle_vertices(KernelGlobals *kg, int prim, float3 P[3])
{
- const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
- P[0] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+0));
- P[1] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+1));
- P[2] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+2));
+ const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
+ P[0] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 0));
+ P[1] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 1));
+ P[2] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 2));
}
/* Interpolate smooth vertex normal from vertices */
-ccl_device_inline float3 triangle_smooth_normal(KernelGlobals *kg, float3 Ng, int prim, float u, float v)
+ccl_device_inline float3
+triangle_smooth_normal(KernelGlobals *kg, float3 Ng, int prim, float u, float v)
{
- /* load triangle vertices */
- const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
- float3 n0 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.x));
- float3 n1 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.y));
- float3 n2 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.z));
+ /* load triangle vertices */
+ const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
+ float3 n0 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.x));
+ float3 n1 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.y));
+ float3 n2 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.z));
- float3 N = safe_normalize((1.0f - u - v)*n2 + u*n0 + v*n1);
+ float3 N = safe_normalize((1.0f - u - v) * n2 + u * n0 + v * n1);
- return is_zero(N)? Ng: N;
+ return is_zero(N) ? Ng : N;
}
/* Ray differentials on triangle */
-ccl_device_inline void triangle_dPdudv(KernelGlobals *kg, int prim, ccl_addr_space float3 *dPdu, ccl_addr_space float3 *dPdv)
+ccl_device_inline void triangle_dPdudv(KernelGlobals *kg,
+ int prim,
+ ccl_addr_space float3 *dPdu,
+ ccl_addr_space float3 *dPdv)
{
- /* fetch triangle vertex coordinates */
- const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
- const float3 p0 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+0));
- const float3 p1 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+1));
- const float3 p2 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+2));
-
- /* compute derivatives of P w.r.t. uv */
- *dPdu = (p0 - p2);
- *dPdv = (p1 - p2);
+ /* fetch triangle vertex coordinates */
+ const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
+ const float3 p0 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 0));
+ const float3 p1 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 1));
+ const float3 p2 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 2));
+
+ /* compute derivatives of P w.r.t. uv */
+ *dPdu = (p0 - p2);
+ *dPdv = (p1 - p2);
}
/* Reading attributes on various triangle elements */
-ccl_device float triangle_attribute_float(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy)
+ccl_device float triangle_attribute_float(
+ KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy)
{
- if(desc.element == ATTR_ELEMENT_FACE) {
- if(dx) *dx = 0.0f;
- if(dy) *dy = 0.0f;
+ if (desc.element == ATTR_ELEMENT_FACE) {
+ if (dx)
+ *dx = 0.0f;
+ if (dy)
+ *dy = 0.0f;
- return kernel_tex_fetch(__attributes_float, desc.offset + sd->prim);
- }
- else if(desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) {
- uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
+ return kernel_tex_fetch(__attributes_float, desc.offset + sd->prim);
+ }
+ else if (desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) {
+ uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
- float f0 = kernel_tex_fetch(__attributes_float, desc.offset + tri_vindex.x);
- float f1 = kernel_tex_fetch(__attributes_float, desc.offset + tri_vindex.y);
- float f2 = kernel_tex_fetch(__attributes_float, desc.offset + tri_vindex.z);
+ float f0 = kernel_tex_fetch(__attributes_float, desc.offset + tri_vindex.x);
+ float f1 = kernel_tex_fetch(__attributes_float, desc.offset + tri_vindex.y);
+ float f2 = kernel_tex_fetch(__attributes_float, desc.offset + tri_vindex.z);
#ifdef __RAY_DIFFERENTIALS__
- if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2;
- if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2;
+ if (dx)
+ *dx = sd->du.dx * f0 + sd->dv.dx * f1 - (sd->du.dx + sd->dv.dx) * f2;
+ if (dy)
+ *dy = sd->du.dy * f0 + sd->dv.dy * f1 - (sd->du.dy + sd->dv.dy) * f2;
#endif
- return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2;
- }
- else if(desc.element == ATTR_ELEMENT_CORNER) {
- int tri = desc.offset + sd->prim*3;
- float f0 = kernel_tex_fetch(__attributes_float, tri + 0);
- float f1 = kernel_tex_fetch(__attributes_float, tri + 1);
- float f2 = kernel_tex_fetch(__attributes_float, tri + 2);
+ return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2;
+ }
+ else if (desc.element == ATTR_ELEMENT_CORNER) {
+ int tri = desc.offset + sd->prim * 3;
+ float f0 = kernel_tex_fetch(__attributes_float, tri + 0);
+ float f1 = kernel_tex_fetch(__attributes_float, tri + 1);
+ float f2 = kernel_tex_fetch(__attributes_float, tri + 2);
#ifdef __RAY_DIFFERENTIALS__
- if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2;
- if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2;
+ if (dx)
+ *dx = sd->du.dx * f0 + sd->dv.dx * f1 - (sd->du.dx + sd->dv.dx) * f2;
+ if (dy)
+ *dy = sd->du.dy * f0 + sd->dv.dy * f1 - (sd->du.dy + sd->dv.dy) * f2;
#endif
- return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2;
- }
- else {
- if(dx) *dx = 0.0f;
- if(dy) *dy = 0.0f;
+ return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2;
+ }
+ else {
+ if (dx)
+ *dx = 0.0f;
+ if (dy)
+ *dy = 0.0f;
- return 0.0f;
- }
+ return 0.0f;
+ }
}
-ccl_device float2 triangle_attribute_float2(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float2 *dx, float2 *dy)
+ccl_device float2 triangle_attribute_float2(KernelGlobals *kg,
+ const ShaderData *sd,
+ const AttributeDescriptor desc,
+ float2 *dx,
+ float2 *dy)
{
- if(desc.element == ATTR_ELEMENT_FACE) {
- if(dx) *dx = make_float2(0.0f, 0.0f);
- if(dy) *dy = make_float2(0.0f, 0.0f);
+ if (desc.element == ATTR_ELEMENT_FACE) {
+ if (dx)
+ *dx = make_float2(0.0f, 0.0f);
+ if (dy)
+ *dy = make_float2(0.0f, 0.0f);
- return kernel_tex_fetch(__attributes_float2, desc.offset + sd->prim);
- }
- else if(desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) {
- uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
+ return kernel_tex_fetch(__attributes_float2, desc.offset + sd->prim);
+ }
+ else if (desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) {
+ uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
- float2 f0 = kernel_tex_fetch(__attributes_float2, desc.offset + tri_vindex.x);
- float2 f1 = kernel_tex_fetch(__attributes_float2, desc.offset + tri_vindex.y);
- float2 f2 = kernel_tex_fetch(__attributes_float2, desc.offset + tri_vindex.z);
+ float2 f0 = kernel_tex_fetch(__attributes_float2, desc.offset + tri_vindex.x);
+ float2 f1 = kernel_tex_fetch(__attributes_float2, desc.offset + tri_vindex.y);
+ float2 f2 = kernel_tex_fetch(__attributes_float2, desc.offset + tri_vindex.z);
#ifdef __RAY_DIFFERENTIALS__
- if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2;
- if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2;
+ if (dx)
+ *dx = sd->du.dx * f0 + sd->dv.dx * f1 - (sd->du.dx + sd->dv.dx) * f2;
+ if (dy)
+ *dy = sd->du.dy * f0 + sd->dv.dy * f1 - (sd->du.dy + sd->dv.dy) * f2;
#endif
- return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2;
- }
- else if(desc.element == ATTR_ELEMENT_CORNER) {
- int tri = desc.offset + sd->prim*3;
- float2 f0, f1, f2;
+ return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2;
+ }
+ else if (desc.element == ATTR_ELEMENT_CORNER) {
+ int tri = desc.offset + sd->prim * 3;
+ float2 f0, f1, f2;
- if(desc.element == ATTR_ELEMENT_CORNER) {
- f0 = kernel_tex_fetch(__attributes_float2, tri + 0);
- f1 = kernel_tex_fetch(__attributes_float2, tri + 1);
- f2 = kernel_tex_fetch(__attributes_float2, tri + 2);
- }
+ if (desc.element == ATTR_ELEMENT_CORNER) {
+ f0 = kernel_tex_fetch(__attributes_float2, tri + 0);
+ f1 = kernel_tex_fetch(__attributes_float2, tri + 1);
+ f2 = kernel_tex_fetch(__attributes_float2, tri + 2);
+ }
#ifdef __RAY_DIFFERENTIALS__
- if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2;
- if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2;
+ if (dx)
+ *dx = sd->du.dx * f0 + sd->dv.dx * f1 - (sd->du.dx + sd->dv.dx) * f2;
+ if (dy)
+ *dy = sd->du.dy * f0 + sd->dv.dy * f1 - (sd->du.dy + sd->dv.dy) * f2;
#endif
- return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2;
- }
- else {
- if(dx) *dx = make_float2(0.0f, 0.0f);
- if(dy) *dy = make_float2(0.0f, 0.0f);
+ return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2;
+ }
+ else {
+ if (dx)
+ *dx = make_float2(0.0f, 0.0f);
+ if (dy)
+ *dy = make_float2(0.0f, 0.0f);
- return make_float2(0.0f, 0.0f);
- }
+ return make_float2(0.0f, 0.0f);
+ }
}
-ccl_device float3 triangle_attribute_float3(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float3 *dx, float3 *dy)
+ccl_device float3 triangle_attribute_float3(KernelGlobals *kg,
+ const ShaderData *sd,
+ const AttributeDescriptor desc,
+ float3 *dx,
+ float3 *dy)
{
- if(desc.element == ATTR_ELEMENT_FACE) {
- if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
- if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
-
- return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + sd->prim));
- }
- else if(desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) {
- uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
-
- float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.x));
- float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.y));
- float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.z));
+ if (desc.element == ATTR_ELEMENT_FACE) {
+ if (dx)
+ *dx = make_float3(0.0f, 0.0f, 0.0f);
+ if (dy)
+ *dy = make_float3(0.0f, 0.0f, 0.0f);
+
+ return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + sd->prim));
+ }
+ else if (desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) {
+ uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
+
+ float3 f0 = float4_to_float3(
+ kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.x));
+ float3 f1 = float4_to_float3(
+ kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.y));
+ float3 f2 = float4_to_float3(
+ kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.z));
#ifdef __RAY_DIFFERENTIALS__
- if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2;
- if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2;
+ if (dx)
+ *dx = sd->du.dx * f0 + sd->dv.dx * f1 - (sd->du.dx + sd->dv.dx) * f2;
+ if (dy)
+ *dy = sd->du.dy * f0 + sd->dv.dy * f1 - (sd->du.dy + sd->dv.dy) * f2;
#endif
- return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2;
- }
- else if(desc.element == ATTR_ELEMENT_CORNER || desc.element == ATTR_ELEMENT_CORNER_BYTE) {
- int tri = desc.offset + sd->prim*3;
- float3 f0, f1, f2;
-
- if(desc.element == ATTR_ELEMENT_CORNER) {
- f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 0));
- f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 1));
- f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 2));
- }
- else {
- f0 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, tri + 0));
- f1 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, tri + 1));
- f2 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, tri + 2));
- }
+ return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2;
+ }
+ else if (desc.element == ATTR_ELEMENT_CORNER || desc.element == ATTR_ELEMENT_CORNER_BYTE) {
+ int tri = desc.offset + sd->prim * 3;
+ float3 f0, f1, f2;
+
+ if (desc.element == ATTR_ELEMENT_CORNER) {
+ f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 0));
+ f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 1));
+ f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 2));
+ }
+ else {
+ f0 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, tri + 0));
+ f1 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, tri + 1));
+ f2 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, tri + 2));
+ }
#ifdef __RAY_DIFFERENTIALS__
- if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2;
- if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2;
+ if (dx)
+ *dx = sd->du.dx * f0 + sd->dv.dx * f1 - (sd->du.dx + sd->dv.dx) * f2;
+ if (dy)
+ *dy = sd->du.dy * f0 + sd->dv.dy * f1 - (sd->du.dy + sd->dv.dy) * f2;
#endif
- return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2;
- }
- else {
- if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
- if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
+ return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2;
+ }
+ else {
+ if (dx)
+ *dx = make_float3(0.0f, 0.0f, 0.0f);
+ if (dy)
+ *dy = make_float3(0.0f, 0.0f, 0.0f);
- return make_float3(0.0f, 0.0f, 0.0f);
- }
+ return make_float3(0.0f, 0.0f, 0.0f);
+ }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_triangle_intersect.h b/intern/cycles/kernel/geom/geom_triangle_intersect.h
index 56dbc4473fa..bcad03102d2 100644
--- a/intern/cycles/kernel/geom/geom_triangle_intersect.h
+++ b/intern/cycles/kernel/geom/geom_triangle_intersect.h
@@ -1,4 +1,4 @@
- /*
+/*
* Copyright 2014, Blender Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
@@ -30,447 +30,464 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
int object,
int prim_addr)
{
- const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr);
+ const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr);
#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
- const ssef *ssef_verts = (ssef*)&kg->__prim_tri_verts.data[tri_vindex];
+ const ssef *ssef_verts = (ssef *)&kg->__prim_tri_verts.data[tri_vindex];
#else
- const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex+0),
- tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex+1),
- tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex+2);
+ const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0),
+ tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1),
+ tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2);
#endif
- float t, u, v;
- if(ray_triangle_intersect(P,
- dir,
- isect->t,
+ float t, u, v;
+ if (ray_triangle_intersect(P,
+ dir,
+ isect->t,
#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
- ssef_verts,
+ ssef_verts,
#else
- float4_to_float3(tri_a),
- float4_to_float3(tri_b),
- float4_to_float3(tri_c),
+ float4_to_float3(tri_a),
+ float4_to_float3(tri_b),
+ float4_to_float3(tri_c),
#endif
- &u, &v, &t))
- {
+ &u,
+ &v,
+ &t)) {
#ifdef __VISIBILITY_FLAG__
- /* Visibility flag test. we do it here under the assumption
- * that most triangles are culled by node flags.
- */
- if(kernel_tex_fetch(__prim_visibility, prim_addr) & visibility)
+ /* Visibility flag test. we do it here under the assumption
+ * that most triangles are culled by node flags.
+ */
+ if (kernel_tex_fetch(__prim_visibility, prim_addr) & visibility)
#endif
- {
- isect->prim = prim_addr;
- isect->object = object;
- isect->type = PRIMITIVE_TRIANGLE;
- isect->u = u;
- isect->v = v;
- isect->t = t;
- return true;
- }
- }
- return false;
+ {
+ isect->prim = prim_addr;
+ isect->object = object;
+ isect->type = PRIMITIVE_TRIANGLE;
+ isect->u = u;
+ isect->v = v;
+ isect->t = t;
+ return true;
+ }
+ }
+ return false;
}
#ifdef __KERNEL_AVX2__
-#define cross256(A,B, C,D) _mm256_fmsub_ps(A,B, _mm256_mul_ps(C,D))
-ccl_device_inline int ray_triangle_intersect8(
- KernelGlobals *kg,
- float3 ray_P,
- float3 ray_dir,
- Intersection **isect,
- uint visibility,
- int object,
- __m256 *triA,
- __m256 *triB,
- __m256 *triC,
- int prim_addr,
- int prim_num,
- uint *num_hits,
- uint max_hits,
- int *num_hits_in_instance,
- float isect_t)
+# define cross256(A, B, C, D) _mm256_fmsub_ps(A, B, _mm256_mul_ps(C, D))
+ccl_device_inline int ray_triangle_intersect8(KernelGlobals *kg,
+ float3 ray_P,
+ float3 ray_dir,
+ Intersection **isect,
+ uint visibility,
+ int object,
+ __m256 *triA,
+ __m256 *triB,
+ __m256 *triC,
+ int prim_addr,
+ int prim_num,
+ uint *num_hits,
+ uint max_hits,
+ int *num_hits_in_instance,
+ float isect_t)
{
- const unsigned char prim_num_mask = (1 << prim_num) - 1;
-
- const __m256i zero256 = _mm256_setzero_si256();
-
- const __m256 Px256 = _mm256_set1_ps(ray_P.x);
- const __m256 Py256 = _mm256_set1_ps(ray_P.y);
- const __m256 Pz256 = _mm256_set1_ps(ray_P.z);
-
- const __m256 dirx256 = _mm256_set1_ps(ray_dir.x);
- const __m256 diry256 = _mm256_set1_ps(ray_dir.y);
- const __m256 dirz256 = _mm256_set1_ps(ray_dir.z);
-
- /* Calculate vertices relative to ray origin. */
- __m256 v0_x_256 = _mm256_sub_ps(triC[0], Px256);
- __m256 v0_y_256 = _mm256_sub_ps(triC[1], Py256);
- __m256 v0_z_256 = _mm256_sub_ps(triC[2], Pz256);
-
- __m256 v1_x_256 = _mm256_sub_ps(triA[0], Px256);
- __m256 v1_y_256 = _mm256_sub_ps(triA[1], Py256);
- __m256 v1_z_256 = _mm256_sub_ps(triA[2], Pz256);
-
- __m256 v2_x_256 = _mm256_sub_ps(triB[0], Px256);
- __m256 v2_y_256 = _mm256_sub_ps(triB[1], Py256);
- __m256 v2_z_256 = _mm256_sub_ps(triB[2], Pz256);
-
- __m256 v0_v1_x_256 = _mm256_add_ps(v0_x_256, v1_x_256);
- __m256 v0_v1_y_256 = _mm256_add_ps(v0_y_256, v1_y_256);
- __m256 v0_v1_z_256 = _mm256_add_ps(v0_z_256, v1_z_256);
-
- __m256 v0_v2_x_256 = _mm256_add_ps(v0_x_256, v2_x_256);
- __m256 v0_v2_y_256 = _mm256_add_ps(v0_y_256, v2_y_256);
- __m256 v0_v2_z_256 = _mm256_add_ps(v0_z_256, v2_z_256);
-
- __m256 v1_v2_x_256 = _mm256_add_ps(v1_x_256, v2_x_256);
- __m256 v1_v2_y_256 = _mm256_add_ps(v1_y_256, v2_y_256);
- __m256 v1_v2_z_256 = _mm256_add_ps(v1_z_256, v2_z_256);
-
- /* Calculate triangle edges. */
- __m256 e0_x_256 = _mm256_sub_ps(v2_x_256, v0_x_256);
- __m256 e0_y_256 = _mm256_sub_ps(v2_y_256, v0_y_256);
- __m256 e0_z_256 = _mm256_sub_ps(v2_z_256, v0_z_256);
-
- __m256 e1_x_256 = _mm256_sub_ps(v0_x_256, v1_x_256);
- __m256 e1_y_256 = _mm256_sub_ps(v0_y_256, v1_y_256);
- __m256 e1_z_256 = _mm256_sub_ps(v0_z_256, v1_z_256);
-
- __m256 e2_x_256 = _mm256_sub_ps(v1_x_256, v2_x_256);
- __m256 e2_y_256 = _mm256_sub_ps(v1_y_256, v2_y_256);
- __m256 e2_z_256 = _mm256_sub_ps(v1_z_256, v2_z_256);
-
- /* Perform edge tests. */
- /* cross (AyBz - AzBy, AzBx -AxBz, AxBy - AyBx) */
- __m256 U_x_256 = cross256(v0_v2_y_256, e0_z_256, v0_v2_z_256, e0_y_256);
- __m256 U_y_256 = cross256(v0_v2_z_256, e0_x_256, v0_v2_x_256, e0_z_256);
- __m256 U_z_256 = cross256(v0_v2_x_256, e0_y_256, v0_v2_y_256, e0_x_256);
- /* vertical dot */
- __m256 U_256 = _mm256_mul_ps(U_x_256, dirx256);
- U_256 = _mm256_fmadd_ps(U_y_256, diry256, U_256);
- U_256 = _mm256_fmadd_ps(U_z_256, dirz256, U_256);
-
- __m256 V_x_256 = cross256(v0_v1_y_256, e1_z_256, v0_v1_z_256, e1_y_256);
- __m256 V_y_256 = cross256(v0_v1_z_256, e1_x_256, v0_v1_x_256, e1_z_256);
- __m256 V_z_256 = cross256(v0_v1_x_256, e1_y_256, v0_v1_y_256, e1_x_256);
- /* vertical dot */
- __m256 V_256 = _mm256_mul_ps(V_x_256, dirx256);
- V_256 = _mm256_fmadd_ps(V_y_256, diry256, V_256);
- V_256 = _mm256_fmadd_ps(V_z_256, dirz256, V_256);
-
- __m256 W_x_256 = cross256(v1_v2_y_256, e2_z_256, v1_v2_z_256, e2_y_256);
- __m256 W_y_256 = cross256(v1_v2_z_256, e2_x_256, v1_v2_x_256, e2_z_256);
- __m256 W_z_256 = cross256(v1_v2_x_256, e2_y_256, v1_v2_y_256, e2_x_256);
- /* vertical dot */
- __m256 W_256 = _mm256_mul_ps(W_x_256, dirx256);
- W_256 = _mm256_fmadd_ps(W_y_256, diry256,W_256);
- W_256 = _mm256_fmadd_ps(W_z_256, dirz256,W_256);
-
- __m256i U_256_1 = _mm256_srli_epi32(_mm256_castps_si256(U_256), 31);
- __m256i V_256_1 = _mm256_srli_epi32(_mm256_castps_si256(V_256), 31);
- __m256i W_256_1 = _mm256_srli_epi32(_mm256_castps_si256(W_256), 31);
- __m256i UVW_256_1 = _mm256_add_epi32(_mm256_add_epi32(U_256_1, V_256_1), W_256_1);
-
- const __m256i one256 = _mm256_set1_epi32(1);
- const __m256i two256 = _mm256_set1_epi32(2);
-
- __m256i mask_minmaxUVW_256 = _mm256_or_si256(
- _mm256_cmpeq_epi32(one256, UVW_256_1),
- _mm256_cmpeq_epi32(two256, UVW_256_1));
-
- unsigned char mask_minmaxUVW_pos = _mm256_movemask_ps(_mm256_castsi256_ps(mask_minmaxUVW_256));
- if((mask_minmaxUVW_pos & prim_num_mask) == prim_num_mask) { //all bits set
- return false;
- }
-
- /* Calculate geometry normal and denominator. */
- __m256 Ng1_x_256 = cross256(e1_y_256, e0_z_256, e1_z_256, e0_y_256);
- __m256 Ng1_y_256 = cross256(e1_z_256, e0_x_256, e1_x_256, e0_z_256);
- __m256 Ng1_z_256 = cross256(e1_x_256, e0_y_256, e1_y_256, e0_x_256);
-
- Ng1_x_256 = _mm256_add_ps(Ng1_x_256, Ng1_x_256);
- Ng1_y_256 = _mm256_add_ps(Ng1_y_256, Ng1_y_256);
- Ng1_z_256 = _mm256_add_ps(Ng1_z_256, Ng1_z_256);
-
- /* vertical dot */
- __m256 den_256 = _mm256_mul_ps(Ng1_x_256, dirx256);
- den_256 = _mm256_fmadd_ps(Ng1_y_256, diry256,den_256);
- den_256 = _mm256_fmadd_ps(Ng1_z_256, dirz256,den_256);
-
- /* Perform depth test. */
- __m256 T_256 = _mm256_mul_ps(Ng1_x_256, v0_x_256);
- T_256 = _mm256_fmadd_ps(Ng1_y_256, v0_y_256,T_256);
- T_256 = _mm256_fmadd_ps(Ng1_z_256, v0_z_256,T_256);
-
- const __m256i c0x80000000 = _mm256_set1_epi32(0x80000000);
- __m256i sign_den_256 = _mm256_and_si256(_mm256_castps_si256(den_256), c0x80000000);
-
- __m256 sign_T_256 = _mm256_castsi256_ps(_mm256_xor_si256(_mm256_castps_si256(T_256), sign_den_256));
-
- unsigned char mask_sign_T = _mm256_movemask_ps(sign_T_256);
- if(((mask_minmaxUVW_pos | mask_sign_T) & prim_num_mask) == prim_num_mask) {
- return false;
- }
-
- __m256 xor_signmask_256 = _mm256_castsi256_ps(_mm256_xor_si256(_mm256_castps_si256(den_256), sign_den_256));
-
- ccl_align(32) float den8[8], U8[8], V8[8], T8[8], sign_T8[8], xor_signmask8[8];
- ccl_align(32) unsigned int mask_minmaxUVW8[8];
-
- if(visibility == PATH_RAY_SHADOW_OPAQUE) {
- __m256i mask_final_256 = _mm256_cmpeq_epi32(mask_minmaxUVW_256, zero256);
- __m256i maskden256 = _mm256_cmpeq_epi32(_mm256_castps_si256(den_256), zero256);
- __m256i mask0 = _mm256_cmpgt_epi32(zero256, _mm256_castps_si256(sign_T_256));
- __m256 rayt_256 = _mm256_set1_ps((*isect)->t);
- __m256i mask1 = _mm256_cmpgt_epi32(_mm256_castps_si256(sign_T_256),
- _mm256_castps_si256(
- _mm256_mul_ps(_mm256_castsi256_ps(_mm256_xor_si256(_mm256_castps_si256(den_256), sign_den_256)), rayt_256)
- )
- );
- mask0 = _mm256_or_si256(mask1, mask0);
- mask_final_256 = _mm256_andnot_si256(mask0, mask_final_256); //(~mask_minmaxUVW_pos) &(~mask)
- mask_final_256 = _mm256_andnot_si256(maskden256, mask_final_256); //(~mask_minmaxUVW_pos) &(~mask) & (~maskden)
- unsigned char mask_final = _mm256_movemask_ps(_mm256_castsi256_ps(mask_final_256));
- if((mask_final & prim_num_mask) == 0) {
- return false;
- }
- const int i = __bsf(mask_final);
- __m256 inv_den_256 = _mm256_rcp_ps(den_256);
- U_256 = _mm256_mul_ps(U_256, inv_den_256);
- V_256 = _mm256_mul_ps(V_256, inv_den_256);
- T_256 = _mm256_mul_ps(T_256, inv_den_256);
- _mm256_store_ps(U8, U_256);
- _mm256_store_ps(V8, V_256);
- _mm256_store_ps(T8, T_256);
- /* NOTE: Here we assume visibility for all triangles in the node is
- * the same. */
- (*isect)->u = U8[i];
- (*isect)->v = V8[i];
- (*isect)->t = T8[i];
- (*isect)->prim = (prim_addr + i);
- (*isect)->object = object;
- (*isect)->type = PRIMITIVE_TRIANGLE;
- return true;
- }
- else {
- _mm256_store_ps(den8, den_256);
- _mm256_store_ps(U8, U_256);
- _mm256_store_ps(V8, V_256);
- _mm256_store_ps(T8, T_256);
-
- _mm256_store_ps(sign_T8, sign_T_256);
- _mm256_store_ps(xor_signmask8, xor_signmask_256);
- _mm256_store_si256((__m256i*)mask_minmaxUVW8, mask_minmaxUVW_256);
-
- int ret = false;
-
- if(visibility == PATH_RAY_SHADOW) {
- for(int i = 0; i < prim_num; i++) {
- if(mask_minmaxUVW8[i]) {
- continue;
- }
-#ifdef __VISIBILITY_FLAG__
- if((kernel_tex_fetch(__prim_visibility, (prim_addr + i)) & visibility) == 0) {
- continue;
- }
-#endif
- if((sign_T8[i] < 0.0f) ||
- (sign_T8[i] > (*isect)->t * xor_signmask8[i]))
- {
- continue;
- }
- if(!den8[i]) {
- continue;
- }
- const float inv_den = 1.0f / den8[i];
- (*isect)->u = U8[i] * inv_den;
- (*isect)->v = V8[i] * inv_den;
- (*isect)->t = T8[i] * inv_den;
- (*isect)->prim = (prim_addr + i);
- (*isect)->object = object;
- (*isect)->type = PRIMITIVE_TRIANGLE;
- const int prim = kernel_tex_fetch(__prim_index, (*isect)->prim);
- int shader = 0;
-#ifdef __HAIR__
- if(kernel_tex_fetch(__prim_type, (*isect)->prim) & PRIMITIVE_ALL_TRIANGLE)
-#endif
- {
- shader = kernel_tex_fetch(__tri_shader, prim);
- }
-#ifdef __HAIR__
- else {
- float4 str = kernel_tex_fetch(__curves, prim);
- shader = __float_as_int(str.z);
- }
-#endif
- const int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
- /* If no transparent shadows, all light is blocked. */
- if(!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
- return 2;
- }
- /* If maximum number of hits reached, block all light. */
- else if(num_hits == NULL || *num_hits == max_hits) {
- return 2;
- }
- /* Move on to next entry in intersections array. */
- ret = true;
- (*isect)++;
- (*num_hits)++;
- (*num_hits_in_instance)++;
- (*isect)->t = isect_t;
- }
- }
- else {
- for(int i = 0; i < prim_num; i++) {
- if(mask_minmaxUVW8[i]) {
- continue;
- }
-#ifdef __VISIBILITY_FLAG__
- if((kernel_tex_fetch(__prim_visibility, (prim_addr + i)) & visibility) == 0) {
- continue;
- }
-#endif
- if((sign_T8[i] < 0.0f) ||
- (sign_T8[i] > (*isect)->t * xor_signmask8[i]))
- {
- continue;
- }
- if(!den8[i]) {
- continue;
- }
- const float inv_den = 1.0f / den8[i];
- (*isect)->u = U8[i] * inv_den;
- (*isect)->v = V8[i] * inv_den;
- (*isect)->t = T8[i] * inv_den;
- (*isect)->prim = (prim_addr + i);
- (*isect)->object = object;
- (*isect)->type = PRIMITIVE_TRIANGLE;
- ret = true;
- }
- }
- return ret;
- }
+ const unsigned char prim_num_mask = (1 << prim_num) - 1;
+
+ const __m256i zero256 = _mm256_setzero_si256();
+
+ const __m256 Px256 = _mm256_set1_ps(ray_P.x);
+ const __m256 Py256 = _mm256_set1_ps(ray_P.y);
+ const __m256 Pz256 = _mm256_set1_ps(ray_P.z);
+
+ const __m256 dirx256 = _mm256_set1_ps(ray_dir.x);
+ const __m256 diry256 = _mm256_set1_ps(ray_dir.y);
+ const __m256 dirz256 = _mm256_set1_ps(ray_dir.z);
+
+ /* Calculate vertices relative to ray origin. */
+ __m256 v0_x_256 = _mm256_sub_ps(triC[0], Px256);
+ __m256 v0_y_256 = _mm256_sub_ps(triC[1], Py256);
+ __m256 v0_z_256 = _mm256_sub_ps(triC[2], Pz256);
+
+ __m256 v1_x_256 = _mm256_sub_ps(triA[0], Px256);
+ __m256 v1_y_256 = _mm256_sub_ps(triA[1], Py256);
+ __m256 v1_z_256 = _mm256_sub_ps(triA[2], Pz256);
+
+ __m256 v2_x_256 = _mm256_sub_ps(triB[0], Px256);
+ __m256 v2_y_256 = _mm256_sub_ps(triB[1], Py256);
+ __m256 v2_z_256 = _mm256_sub_ps(triB[2], Pz256);
+
+ __m256 v0_v1_x_256 = _mm256_add_ps(v0_x_256, v1_x_256);
+ __m256 v0_v1_y_256 = _mm256_add_ps(v0_y_256, v1_y_256);
+ __m256 v0_v1_z_256 = _mm256_add_ps(v0_z_256, v1_z_256);
+
+ __m256 v0_v2_x_256 = _mm256_add_ps(v0_x_256, v2_x_256);
+ __m256 v0_v2_y_256 = _mm256_add_ps(v0_y_256, v2_y_256);
+ __m256 v0_v2_z_256 = _mm256_add_ps(v0_z_256, v2_z_256);
+
+ __m256 v1_v2_x_256 = _mm256_add_ps(v1_x_256, v2_x_256);
+ __m256 v1_v2_y_256 = _mm256_add_ps(v1_y_256, v2_y_256);
+ __m256 v1_v2_z_256 = _mm256_add_ps(v1_z_256, v2_z_256);
+
+ /* Calculate triangle edges. */
+ __m256 e0_x_256 = _mm256_sub_ps(v2_x_256, v0_x_256);
+ __m256 e0_y_256 = _mm256_sub_ps(v2_y_256, v0_y_256);
+ __m256 e0_z_256 = _mm256_sub_ps(v2_z_256, v0_z_256);
+
+ __m256 e1_x_256 = _mm256_sub_ps(v0_x_256, v1_x_256);
+ __m256 e1_y_256 = _mm256_sub_ps(v0_y_256, v1_y_256);
+ __m256 e1_z_256 = _mm256_sub_ps(v0_z_256, v1_z_256);
+
+ __m256 e2_x_256 = _mm256_sub_ps(v1_x_256, v2_x_256);
+ __m256 e2_y_256 = _mm256_sub_ps(v1_y_256, v2_y_256);
+ __m256 e2_z_256 = _mm256_sub_ps(v1_z_256, v2_z_256);
+
+ /* Perform edge tests. */
+ /* cross (AyBz - AzBy, AzBx -AxBz, AxBy - AyBx) */
+ __m256 U_x_256 = cross256(v0_v2_y_256, e0_z_256, v0_v2_z_256, e0_y_256);
+ __m256 U_y_256 = cross256(v0_v2_z_256, e0_x_256, v0_v2_x_256, e0_z_256);
+ __m256 U_z_256 = cross256(v0_v2_x_256, e0_y_256, v0_v2_y_256, e0_x_256);
+ /* vertical dot */
+ __m256 U_256 = _mm256_mul_ps(U_x_256, dirx256);
+ U_256 = _mm256_fmadd_ps(U_y_256, diry256, U_256);
+ U_256 = _mm256_fmadd_ps(U_z_256, dirz256, U_256);
+
+ __m256 V_x_256 = cross256(v0_v1_y_256, e1_z_256, v0_v1_z_256, e1_y_256);
+ __m256 V_y_256 = cross256(v0_v1_z_256, e1_x_256, v0_v1_x_256, e1_z_256);
+ __m256 V_z_256 = cross256(v0_v1_x_256, e1_y_256, v0_v1_y_256, e1_x_256);
+ /* vertical dot */
+ __m256 V_256 = _mm256_mul_ps(V_x_256, dirx256);
+ V_256 = _mm256_fmadd_ps(V_y_256, diry256, V_256);
+ V_256 = _mm256_fmadd_ps(V_z_256, dirz256, V_256);
+
+ __m256 W_x_256 = cross256(v1_v2_y_256, e2_z_256, v1_v2_z_256, e2_y_256);
+ __m256 W_y_256 = cross256(v1_v2_z_256, e2_x_256, v1_v2_x_256, e2_z_256);
+ __m256 W_z_256 = cross256(v1_v2_x_256, e2_y_256, v1_v2_y_256, e2_x_256);
+ /* vertical dot */
+ __m256 W_256 = _mm256_mul_ps(W_x_256, dirx256);
+ W_256 = _mm256_fmadd_ps(W_y_256, diry256, W_256);
+ W_256 = _mm256_fmadd_ps(W_z_256, dirz256, W_256);
+
+ __m256i U_256_1 = _mm256_srli_epi32(_mm256_castps_si256(U_256), 31);
+ __m256i V_256_1 = _mm256_srli_epi32(_mm256_castps_si256(V_256), 31);
+ __m256i W_256_1 = _mm256_srli_epi32(_mm256_castps_si256(W_256), 31);
+ __m256i UVW_256_1 = _mm256_add_epi32(_mm256_add_epi32(U_256_1, V_256_1), W_256_1);
+
+ const __m256i one256 = _mm256_set1_epi32(1);
+ const __m256i two256 = _mm256_set1_epi32(2);
+
+ __m256i mask_minmaxUVW_256 = _mm256_or_si256(_mm256_cmpeq_epi32(one256, UVW_256_1),
+ _mm256_cmpeq_epi32(two256, UVW_256_1));
+
+ unsigned char mask_minmaxUVW_pos = _mm256_movemask_ps(_mm256_castsi256_ps(mask_minmaxUVW_256));
+ if ((mask_minmaxUVW_pos & prim_num_mask) == prim_num_mask) { //all bits set
+ return false;
+ }
+
+ /* Calculate geometry normal and denominator. */
+ __m256 Ng1_x_256 = cross256(e1_y_256, e0_z_256, e1_z_256, e0_y_256);
+ __m256 Ng1_y_256 = cross256(e1_z_256, e0_x_256, e1_x_256, e0_z_256);
+ __m256 Ng1_z_256 = cross256(e1_x_256, e0_y_256, e1_y_256, e0_x_256);
+
+ Ng1_x_256 = _mm256_add_ps(Ng1_x_256, Ng1_x_256);
+ Ng1_y_256 = _mm256_add_ps(Ng1_y_256, Ng1_y_256);
+ Ng1_z_256 = _mm256_add_ps(Ng1_z_256, Ng1_z_256);
+
+ /* vertical dot */
+ __m256 den_256 = _mm256_mul_ps(Ng1_x_256, dirx256);
+ den_256 = _mm256_fmadd_ps(Ng1_y_256, diry256, den_256);
+ den_256 = _mm256_fmadd_ps(Ng1_z_256, dirz256, den_256);
+
+ /* Perform depth test. */
+ __m256 T_256 = _mm256_mul_ps(Ng1_x_256, v0_x_256);
+ T_256 = _mm256_fmadd_ps(Ng1_y_256, v0_y_256, T_256);
+ T_256 = _mm256_fmadd_ps(Ng1_z_256, v0_z_256, T_256);
+
+ const __m256i c0x80000000 = _mm256_set1_epi32(0x80000000);
+ __m256i sign_den_256 = _mm256_and_si256(_mm256_castps_si256(den_256), c0x80000000);
+
+ __m256 sign_T_256 = _mm256_castsi256_ps(
+ _mm256_xor_si256(_mm256_castps_si256(T_256), sign_den_256));
+
+ unsigned char mask_sign_T = _mm256_movemask_ps(sign_T_256);
+ if (((mask_minmaxUVW_pos | mask_sign_T) & prim_num_mask) == prim_num_mask) {
+ return false;
+ }
+
+ __m256 xor_signmask_256 = _mm256_castsi256_ps(
+ _mm256_xor_si256(_mm256_castps_si256(den_256), sign_den_256));
+
+ ccl_align(32) float den8[8], U8[8], V8[8], T8[8], sign_T8[8], xor_signmask8[8];
+ ccl_align(32) unsigned int mask_minmaxUVW8[8];
+
+ if (visibility == PATH_RAY_SHADOW_OPAQUE) {
+ __m256i mask_final_256 = _mm256_cmpeq_epi32(mask_minmaxUVW_256, zero256);
+ __m256i maskden256 = _mm256_cmpeq_epi32(_mm256_castps_si256(den_256), zero256);
+ __m256i mask0 = _mm256_cmpgt_epi32(zero256, _mm256_castps_si256(sign_T_256));
+ __m256 rayt_256 = _mm256_set1_ps((*isect)->t);
+ __m256i mask1 = _mm256_cmpgt_epi32(
+ _mm256_castps_si256(sign_T_256),
+ _mm256_castps_si256(_mm256_mul_ps(
+ _mm256_castsi256_ps(_mm256_xor_si256(_mm256_castps_si256(den_256), sign_den_256)),
+ rayt_256)));
+ mask0 = _mm256_or_si256(mask1, mask0);
+ mask_final_256 = _mm256_andnot_si256(mask0, mask_final_256); //(~mask_minmaxUVW_pos) &(~mask)
+ mask_final_256 = _mm256_andnot_si256(
+ maskden256, mask_final_256); //(~mask_minmaxUVW_pos) &(~mask) & (~maskden)
+ unsigned char mask_final = _mm256_movemask_ps(_mm256_castsi256_ps(mask_final_256));
+ if ((mask_final & prim_num_mask) == 0) {
+ return false;
+ }
+ const int i = __bsf(mask_final);
+ __m256 inv_den_256 = _mm256_rcp_ps(den_256);
+ U_256 = _mm256_mul_ps(U_256, inv_den_256);
+ V_256 = _mm256_mul_ps(V_256, inv_den_256);
+ T_256 = _mm256_mul_ps(T_256, inv_den_256);
+ _mm256_store_ps(U8, U_256);
+ _mm256_store_ps(V8, V_256);
+ _mm256_store_ps(T8, T_256);
+ /* NOTE: Here we assume visibility for all triangles in the node is
+ * the same. */
+ (*isect)->u = U8[i];
+ (*isect)->v = V8[i];
+ (*isect)->t = T8[i];
+ (*isect)->prim = (prim_addr + i);
+ (*isect)->object = object;
+ (*isect)->type = PRIMITIVE_TRIANGLE;
+ return true;
+ }
+ else {
+ _mm256_store_ps(den8, den_256);
+ _mm256_store_ps(U8, U_256);
+ _mm256_store_ps(V8, V_256);
+ _mm256_store_ps(T8, T_256);
+
+ _mm256_store_ps(sign_T8, sign_T_256);
+ _mm256_store_ps(xor_signmask8, xor_signmask_256);
+ _mm256_store_si256((__m256i *)mask_minmaxUVW8, mask_minmaxUVW_256);
+
+ int ret = false;
+
+ if (visibility == PATH_RAY_SHADOW) {
+ for (int i = 0; i < prim_num; i++) {
+ if (mask_minmaxUVW8[i]) {
+ continue;
+ }
+# ifdef __VISIBILITY_FLAG__
+ if ((kernel_tex_fetch(__prim_visibility, (prim_addr + i)) & visibility) == 0) {
+ continue;
+ }
+# endif
+ if ((sign_T8[i] < 0.0f) || (sign_T8[i] > (*isect)->t * xor_signmask8[i])) {
+ continue;
+ }
+ if (!den8[i]) {
+ continue;
+ }
+ const float inv_den = 1.0f / den8[i];
+ (*isect)->u = U8[i] * inv_den;
+ (*isect)->v = V8[i] * inv_den;
+ (*isect)->t = T8[i] * inv_den;
+ (*isect)->prim = (prim_addr + i);
+ (*isect)->object = object;
+ (*isect)->type = PRIMITIVE_TRIANGLE;
+ const int prim = kernel_tex_fetch(__prim_index, (*isect)->prim);
+ int shader = 0;
+# ifdef __HAIR__
+ if (kernel_tex_fetch(__prim_type, (*isect)->prim) & PRIMITIVE_ALL_TRIANGLE)
+# endif
+ {
+ shader = kernel_tex_fetch(__tri_shader, prim);
+ }
+# ifdef __HAIR__
+ else {
+ float4 str = kernel_tex_fetch(__curves, prim);
+ shader = __float_as_int(str.z);
+ }
+# endif
+ const int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
+ /* If no transparent shadows, all light is blocked. */
+ if (!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
+ return 2;
+ }
+ /* If maximum number of hits reached, block all light. */
+ else if (num_hits == NULL || *num_hits == max_hits) {
+ return 2;
+ }
+ /* Move on to next entry in intersections array. */
+ ret = true;
+ (*isect)++;
+ (*num_hits)++;
+ (*num_hits_in_instance)++;
+ (*isect)->t = isect_t;
+ }
+ }
+ else {
+ for (int i = 0; i < prim_num; i++) {
+ if (mask_minmaxUVW8[i]) {
+ continue;
+ }
+# ifdef __VISIBILITY_FLAG__
+ if ((kernel_tex_fetch(__prim_visibility, (prim_addr + i)) & visibility) == 0) {
+ continue;
+ }
+# endif
+ if ((sign_T8[i] < 0.0f) || (sign_T8[i] > (*isect)->t * xor_signmask8[i])) {
+ continue;
+ }
+ if (!den8[i]) {
+ continue;
+ }
+ const float inv_den = 1.0f / den8[i];
+ (*isect)->u = U8[i] * inv_den;
+ (*isect)->v = V8[i] * inv_den;
+ (*isect)->t = T8[i] * inv_den;
+ (*isect)->prim = (prim_addr + i);
+ (*isect)->object = object;
+ (*isect)->type = PRIMITIVE_TRIANGLE;
+ ret = true;
+ }
+ }
+ return ret;
+ }
}
-ccl_device_inline int triangle_intersect8(
- KernelGlobals *kg,
- Intersection **isect,
- float3 P,
- float3 dir,
- uint visibility,
- int object,
- int prim_addr,
- int prim_num,
- uint *num_hits,
- uint max_hits,
- int *num_hits_in_instance,
- float isect_t)
- {
- __m128 tri_a[8], tri_b[8], tri_c[8];
- __m256 tritmp[12], tri[12];
- __m256 triA[3], triB[3], triC[3];
-
- int i, r;
-
- uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr);
- for(i = 0; i < prim_num; i++) {
- tri_a[i] = *(__m128*)&kg->__prim_tri_verts.data[tri_vindex++];
- tri_b[i] = *(__m128*)&kg->__prim_tri_verts.data[tri_vindex++];
- tri_c[i] = *(__m128*)&kg->__prim_tri_verts.data[tri_vindex++];
- }
- //create 9 or 12 placeholders
- tri[0] = _mm256_castps128_ps256(tri_a[0]); //_mm256_zextps128_ps256
- tri[1] = _mm256_castps128_ps256(tri_b[0]);//_mm256_zextps128_ps256
- tri[2] = _mm256_castps128_ps256(tri_c[0]);//_mm256_zextps128_ps256
-
- tri[3] = _mm256_castps128_ps256(tri_a[1]); //_mm256_zextps128_ps256
- tri[4] = _mm256_castps128_ps256(tri_b[1]);//_mm256_zextps128_ps256
- tri[5] = _mm256_castps128_ps256(tri_c[1]);//_mm256_zextps128_ps256
-
- tri[6] = _mm256_castps128_ps256(tri_a[2]); //_mm256_zextps128_ps256
- tri[7] = _mm256_castps128_ps256(tri_b[2]);//_mm256_zextps128_ps256
- tri[8] = _mm256_castps128_ps256(tri_c[2]);//_mm256_zextps128_ps256
-
- if(prim_num > 3) {
- tri[9] = _mm256_castps128_ps256(tri_a[3]); //_mm256_zextps128_ps256
- tri[10] = _mm256_castps128_ps256(tri_b[3]);//_mm256_zextps128_ps256
- tri[11] = _mm256_castps128_ps256(tri_c[3]);//_mm256_zextps128_ps256
- }
-
- for(i = 4, r = 0; i < prim_num; i ++, r += 3) {
- tri[r] = _mm256_insertf128_ps(tri[r] , tri_a[i], 1);
- tri[r + 1] = _mm256_insertf128_ps(tri[r + 1], tri_b[i], 1);
- tri[r + 2] = _mm256_insertf128_ps(tri[r + 2], tri_c[i], 1);
- }
-
- //------------------------------------------------
- //0! Xa0 Ya0 Za0 1 Xa4 Ya4 Za4 1
- //1! Xb0 Yb0 Zb0 1 Xb4 Yb4 Zb4 1
- //2! Xc0 Yc0 Zc0 1 Xc4 Yc4 Zc4 1
-
- //3! Xa1 Ya1 Za1 1 Xa5 Ya5 Za5 1
- //4! Xb1 Yb1 Zb1 1 Xb5 Yb5 Zb5 1
- //5! Xc1 Yc1 Zc1 1 Xc5 Yc5 Zc5 1
-
- //6! Xa2 Ya2 Za2 1 Xa6 Ya6 Za6 1
- //7! Xb2 Yb2 Zb2 1 Xb6 Yb6 Zb6 1
- //8! Xc2 Yc2 Zc2 1 Xc6 Yc6 Zc6 1
-
- //9! Xa3 Ya3 Za3 1 Xa7 Ya7 Za7 1
- //10! Xb3 Yb3 Zb3 1 Xb7 Yb7 Zb7 1
- //11! Xc3 Yc3 Zc3 1 Xc7 Yc7 Zc7 1
-
- //"transpose"
- tritmp[0] = _mm256_unpacklo_ps(tri[0], tri[3]); //0! Xa0 Xa1 Ya0 Ya1 Xa4 Xa5 Ya4 Ya5
- tritmp[1] = _mm256_unpackhi_ps(tri[0], tri[3]); //1! Za0 Za1 1 1 Za4 Za5 1 1
-
- tritmp[2] = _mm256_unpacklo_ps(tri[6], tri[9]); //2! Xa2 Xa3 Ya2 Ya3 Xa6 Xa7 Ya6 Ya7
- tritmp[3] = _mm256_unpackhi_ps(tri[6], tri[9]); //3! Za2 Za3 1 1 Za6 Za7 1 1
-
- tritmp[4] = _mm256_unpacklo_ps(tri[1], tri[4]); //4! Xb0 Xb1 Yb0 Yb1 Xb4 Xb5 Yb4 Yb5
- tritmp[5] = _mm256_unpackhi_ps(tri[1], tri[4]); //5! Zb0 Zb1 1 1 Zb4 Zb5 1 1
-
- tritmp[6] = _mm256_unpacklo_ps(tri[7], tri[10]); //6! Xb2 Xb3 Yb2 Yb3 Xb6 Xb7 Yb6 Yb7
- tritmp[7] = _mm256_unpackhi_ps(tri[7], tri[10]); //7! Zb2 Zb3 1 1 Zb6 Zb7 1 1
-
- tritmp[8] = _mm256_unpacklo_ps(tri[2], tri[5]); //8! Xc0 Xc1 Yc0 Yc1 Xc4 Xc5 Yc4 Yc5
- tritmp[9] = _mm256_unpackhi_ps(tri[2], tri[5]); //9! Zc0 Zc1 1 1 Zc4 Zc5 1 1
-
- tritmp[10] = _mm256_unpacklo_ps(tri[8], tri[11]); //10! Xc2 Xc3 Yc2 Yc3 Xc6 Xc7 Yc6 Yc7
- tritmp[11] = _mm256_unpackhi_ps(tri[8], tri[11]); //11! Zc2 Zc3 1 1 Zc6 Zc7 1 1
-
- /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
- triA[0] = _mm256_castpd_ps(_mm256_unpacklo_pd(_mm256_castps_pd(tritmp[0]), _mm256_castps_pd(tritmp[2]))); // Xa0 Xa1 Xa2 Xa3 Xa4 Xa5 Xa6 Xa7
- triA[1] = _mm256_castpd_ps(_mm256_unpackhi_pd(_mm256_castps_pd(tritmp[0]), _mm256_castps_pd(tritmp[2]))); // Ya0 Ya1 Ya2 Ya3 Ya4 Ya5 Ya6 Ya7
- triA[2] = _mm256_castpd_ps(_mm256_unpacklo_pd(_mm256_castps_pd(tritmp[1]), _mm256_castps_pd(tritmp[3]))); // Za0 Za1 Za2 Za3 Za4 Za5 Za6 Za7
-
- triB[0] = _mm256_castpd_ps(_mm256_unpacklo_pd(_mm256_castps_pd(tritmp[4]), _mm256_castps_pd(tritmp[6]))); // Xb0 Xb1 Xb2 Xb3 Xb4 Xb5 Xb5 Xb7
- triB[1] = _mm256_castpd_ps(_mm256_unpackhi_pd(_mm256_castps_pd(tritmp[4]), _mm256_castps_pd(tritmp[6]))); // Yb0 Yb1 Yb2 Yb3 Yb4 Yb5 Yb5 Yb7
- triB[2] = _mm256_castpd_ps(_mm256_unpacklo_pd(_mm256_castps_pd(tritmp[5]), _mm256_castps_pd(tritmp[7]))); // Zb0 Zb1 Zb2 Zb3 Zb4 Zb5 Zb5 Zb7
-
- triC[0] = _mm256_castpd_ps(_mm256_unpacklo_pd(_mm256_castps_pd(tritmp[8]), _mm256_castps_pd(tritmp[10]))); //Xc0 Xc1 Xc2 Xc3 Xc4 Xc5 Xc6 Xc7
- triC[1] = _mm256_castpd_ps(_mm256_unpackhi_pd(_mm256_castps_pd(tritmp[8]), _mm256_castps_pd(tritmp[10]))); //Yc0 Yc1 Yc2 Yc3 Yc4 Yc5 Yc6 Yc7
- triC[2] = _mm256_castpd_ps(_mm256_unpacklo_pd(_mm256_castps_pd(tritmp[9]), _mm256_castps_pd(tritmp[11]))); //Zc0 Zc1 Zc2 Zc3 Zc4 Zc5 Zc6 Zc7
-
- /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
-
- int result = ray_triangle_intersect8(kg, P,
- dir,
- isect,
- visibility, object,
- triA,
- triB,
- triC,
- prim_addr,
- prim_num,
- num_hits,
- max_hits,
- num_hits_in_instance,
- isect_t);
- return result;
+ccl_device_inline int triangle_intersect8(KernelGlobals *kg,
+ Intersection **isect,
+ float3 P,
+ float3 dir,
+ uint visibility,
+ int object,
+ int prim_addr,
+ int prim_num,
+ uint *num_hits,
+ uint max_hits,
+ int *num_hits_in_instance,
+ float isect_t)
+{
+ __m128 tri_a[8], tri_b[8], tri_c[8];
+ __m256 tritmp[12], tri[12];
+ __m256 triA[3], triB[3], triC[3];
+
+ int i, r;
+
+ uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr);
+ for (i = 0; i < prim_num; i++) {
+ tri_a[i] = *(__m128 *)&kg->__prim_tri_verts.data[tri_vindex++];
+ tri_b[i] = *(__m128 *)&kg->__prim_tri_verts.data[tri_vindex++];
+ tri_c[i] = *(__m128 *)&kg->__prim_tri_verts.data[tri_vindex++];
+ }
+ //create 9 or 12 placeholders
+ tri[0] = _mm256_castps128_ps256(tri_a[0]); //_mm256_zextps128_ps256
+ tri[1] = _mm256_castps128_ps256(tri_b[0]); //_mm256_zextps128_ps256
+ tri[2] = _mm256_castps128_ps256(tri_c[0]); //_mm256_zextps128_ps256
+
+ tri[3] = _mm256_castps128_ps256(tri_a[1]); //_mm256_zextps128_ps256
+ tri[4] = _mm256_castps128_ps256(tri_b[1]); //_mm256_zextps128_ps256
+ tri[5] = _mm256_castps128_ps256(tri_c[1]); //_mm256_zextps128_ps256
+
+ tri[6] = _mm256_castps128_ps256(tri_a[2]); //_mm256_zextps128_ps256
+ tri[7] = _mm256_castps128_ps256(tri_b[2]); //_mm256_zextps128_ps256
+ tri[8] = _mm256_castps128_ps256(tri_c[2]); //_mm256_zextps128_ps256
+
+ if (prim_num > 3) {
+ tri[9] = _mm256_castps128_ps256(tri_a[3]); //_mm256_zextps128_ps256
+ tri[10] = _mm256_castps128_ps256(tri_b[3]); //_mm256_zextps128_ps256
+ tri[11] = _mm256_castps128_ps256(tri_c[3]); //_mm256_zextps128_ps256
+ }
+
+ for (i = 4, r = 0; i < prim_num; i++, r += 3) {
+ tri[r] = _mm256_insertf128_ps(tri[r], tri_a[i], 1);
+ tri[r + 1] = _mm256_insertf128_ps(tri[r + 1], tri_b[i], 1);
+ tri[r + 2] = _mm256_insertf128_ps(tri[r + 2], tri_c[i], 1);
+ }
+
+ //------------------------------------------------
+ //0! Xa0 Ya0 Za0 1 Xa4 Ya4 Za4 1
+ //1! Xb0 Yb0 Zb0 1 Xb4 Yb4 Zb4 1
+ //2! Xc0 Yc0 Zc0 1 Xc4 Yc4 Zc4 1
+
+ //3! Xa1 Ya1 Za1 1 Xa5 Ya5 Za5 1
+ //4! Xb1 Yb1 Zb1 1 Xb5 Yb5 Zb5 1
+ //5! Xc1 Yc1 Zc1 1 Xc5 Yc5 Zc5 1
+
+ //6! Xa2 Ya2 Za2 1 Xa6 Ya6 Za6 1
+ //7! Xb2 Yb2 Zb2 1 Xb6 Yb6 Zb6 1
+ //8! Xc2 Yc2 Zc2 1 Xc6 Yc6 Zc6 1
+
+ //9! Xa3 Ya3 Za3 1 Xa7 Ya7 Za7 1
+ //10! Xb3 Yb3 Zb3 1 Xb7 Yb7 Zb7 1
+ //11! Xc3 Yc3 Zc3 1 Xc7 Yc7 Zc7 1
+
+ //"transpose"
+ tritmp[0] = _mm256_unpacklo_ps(tri[0], tri[3]); //0! Xa0 Xa1 Ya0 Ya1 Xa4 Xa5 Ya4 Ya5
+ tritmp[1] = _mm256_unpackhi_ps(tri[0], tri[3]); //1! Za0 Za1 1 1 Za4 Za5 1 1
+
+ tritmp[2] = _mm256_unpacklo_ps(tri[6], tri[9]); //2! Xa2 Xa3 Ya2 Ya3 Xa6 Xa7 Ya6 Ya7
+ tritmp[3] = _mm256_unpackhi_ps(tri[6], tri[9]); //3! Za2 Za3 1 1 Za6 Za7 1 1
+
+ tritmp[4] = _mm256_unpacklo_ps(tri[1], tri[4]); //4! Xb0 Xb1 Yb0 Yb1 Xb4 Xb5 Yb4 Yb5
+ tritmp[5] = _mm256_unpackhi_ps(tri[1], tri[4]); //5! Zb0 Zb1 1 1 Zb4 Zb5 1 1
+
+ tritmp[6] = _mm256_unpacklo_ps(tri[7], tri[10]); //6! Xb2 Xb3 Yb2 Yb3 Xb6 Xb7 Yb6 Yb7
+ tritmp[7] = _mm256_unpackhi_ps(tri[7], tri[10]); //7! Zb2 Zb3 1 1 Zb6 Zb7 1 1
+
+ tritmp[8] = _mm256_unpacklo_ps(tri[2], tri[5]); //8! Xc0 Xc1 Yc0 Yc1 Xc4 Xc5 Yc4 Yc5
+ tritmp[9] = _mm256_unpackhi_ps(tri[2], tri[5]); //9! Zc0 Zc1 1 1 Zc4 Zc5 1 1
+
+ tritmp[10] = _mm256_unpacklo_ps(tri[8], tri[11]); //10! Xc2 Xc3 Yc2 Yc3 Xc6 Xc7 Yc6 Yc7
+ tritmp[11] = _mm256_unpackhi_ps(tri[8], tri[11]); //11! Zc2 Zc3 1 1 Zc6 Zc7 1 1
+
+ /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
+ triA[0] = _mm256_castpd_ps(
+ _mm256_unpacklo_pd(_mm256_castps_pd(tritmp[0]),
+ _mm256_castps_pd(tritmp[2]))); // Xa0 Xa1 Xa2 Xa3 Xa4 Xa5 Xa6 Xa7
+ triA[1] = _mm256_castpd_ps(
+ _mm256_unpackhi_pd(_mm256_castps_pd(tritmp[0]),
+ _mm256_castps_pd(tritmp[2]))); // Ya0 Ya1 Ya2 Ya3 Ya4 Ya5 Ya6 Ya7
+ triA[2] = _mm256_castpd_ps(
+ _mm256_unpacklo_pd(_mm256_castps_pd(tritmp[1]),
+ _mm256_castps_pd(tritmp[3]))); // Za0 Za1 Za2 Za3 Za4 Za5 Za6 Za7
+
+ triB[0] = _mm256_castpd_ps(
+ _mm256_unpacklo_pd(_mm256_castps_pd(tritmp[4]),
+ _mm256_castps_pd(tritmp[6]))); // Xb0 Xb1 Xb2 Xb3 Xb4 Xb5 Xb5 Xb7
+ triB[1] = _mm256_castpd_ps(
+ _mm256_unpackhi_pd(_mm256_castps_pd(tritmp[4]),
+ _mm256_castps_pd(tritmp[6]))); // Yb0 Yb1 Yb2 Yb3 Yb4 Yb5 Yb5 Yb7
+ triB[2] = _mm256_castpd_ps(
+ _mm256_unpacklo_pd(_mm256_castps_pd(tritmp[5]),
+ _mm256_castps_pd(tritmp[7]))); // Zb0 Zb1 Zb2 Zb3 Zb4 Zb5 Zb5 Zb7
+
+ triC[0] = _mm256_castpd_ps(
+ _mm256_unpacklo_pd(_mm256_castps_pd(tritmp[8]),
+ _mm256_castps_pd(tritmp[10]))); //Xc0 Xc1 Xc2 Xc3 Xc4 Xc5 Xc6 Xc7
+ triC[1] = _mm256_castpd_ps(
+ _mm256_unpackhi_pd(_mm256_castps_pd(tritmp[8]),
+ _mm256_castps_pd(tritmp[10]))); //Yc0 Yc1 Yc2 Yc3 Yc4 Yc5 Yc6 Yc7
+ triC[2] = _mm256_castpd_ps(
+ _mm256_unpacklo_pd(_mm256_castps_pd(tritmp[9]),
+ _mm256_castps_pd(tritmp[11]))); //Zc0 Zc1 Zc2 Zc3 Zc4 Zc5 Zc6 Zc7
+
+ /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
+
+ int result = ray_triangle_intersect8(kg,
+ P,
+ dir,
+ isect,
+ visibility,
+ object,
+ triA,
+ triB,
+ triC,
+ prim_addr,
+ prim_num,
+ num_hits,
+ max_hits,
+ num_hits_in_instance,
+ isect_t);
+ return result;
}
-#endif /* __KERNEL_AVX2__ */
+#endif /* __KERNEL_AVX2__ */
/* Special ray intersection routines for subsurface scattering. In that case we
* only want to intersect with primitives in the same object, and if case of
@@ -479,106 +496,108 @@ ccl_device_inline int triangle_intersect8(
*/
#ifdef __BVH_LOCAL__
-ccl_device_inline bool triangle_intersect_local(
- KernelGlobals *kg,
- LocalIntersection *local_isect,
- float3 P,
- float3 dir,
- int object,
- int local_object,
- int prim_addr,
- float tmax,
- uint *lcg_state,
- int max_hits)
+ccl_device_inline bool triangle_intersect_local(KernelGlobals *kg,
+ LocalIntersection *local_isect,
+ float3 P,
+ float3 dir,
+ int object,
+ int local_object,
+ int prim_addr,
+ float tmax,
+ uint *lcg_state,
+ int max_hits)
{
- /* Only intersect with matching object, for instanced objects we
- * already know we are only intersecting the right object. */
- if(object == OBJECT_NONE) {
- if(kernel_tex_fetch(__prim_object, prim_addr) != local_object) {
- return false;
- }
- }
-
- const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr);
-#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
- const ssef *ssef_verts = (ssef*)&kg->__prim_tri_verts.data[tri_vindex];
-#else
- const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+0)),
- tri_b = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+1)),
- tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+2));
-#endif
- float t, u, v;
- if(!ray_triangle_intersect(P,
- dir,
- tmax,
-#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
- ssef_verts,
-#else
- tri_a, tri_b, tri_c,
-#endif
- &u, &v, &t))
- {
- return false;
- }
-
- /* If no actual hit information is requested, just return here. */
- if(max_hits == 0) {
- return true;
- }
-
- int hit;
- if(lcg_state) {
- /* Record up to max_hits intersections. */
- for(int i = min(max_hits, local_isect->num_hits) - 1; i >= 0; --i) {
- if(local_isect->hits[i].t == t) {
- return false;
- }
- }
-
- local_isect->num_hits++;
-
- if(local_isect->num_hits <= max_hits) {
- hit = local_isect->num_hits - 1;
- }
- else {
- /* reservoir sampling: if we are at the maximum number of
- * hits, randomly replace element or skip it */
- hit = lcg_step_uint(lcg_state) % local_isect->num_hits;
-
- if(hit >= max_hits)
- return false;
- }
- }
- else {
- /* Record closest intersection only. */
- if(local_isect->num_hits && t > local_isect->hits[0].t) {
- return false;
- }
-
- hit = 0;
- local_isect->num_hits = 1;
- }
-
- /* Record intersection. */
- Intersection *isect = &local_isect->hits[hit];
- isect->prim = prim_addr;
- isect->object = object;
- isect->type = PRIMITIVE_TRIANGLE;
- isect->u = u;
- isect->v = v;
- isect->t = t;
-
- /* Record geometric normal. */
-#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
- const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+0)),
- tri_b = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+1)),
- tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+2));
-#endif
- local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a));
+ /* Only intersect with matching object, for instanced objects we
+ * already know we are only intersecting the right object. */
+ if (object == OBJECT_NONE) {
+ if (kernel_tex_fetch(__prim_object, prim_addr) != local_object) {
+ return false;
+ }
+ }
+
+ const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr);
+# if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
+ const ssef *ssef_verts = (ssef *)&kg->__prim_tri_verts.data[tri_vindex];
+# else
+ const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0)),
+ tri_b = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1)),
+ tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2));
+# endif
+ float t, u, v;
+ if (!ray_triangle_intersect(P,
+ dir,
+ tmax,
+# if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
+ ssef_verts,
+# else
+ tri_a,
+ tri_b,
+ tri_c,
+# endif
+ &u,
+ &v,
+ &t)) {
+ return false;
+ }
+
+ /* If no actual hit information is requested, just return here. */
+ if (max_hits == 0) {
+ return true;
+ }
+
+ int hit;
+ if (lcg_state) {
+ /* Record up to max_hits intersections. */
+ for (int i = min(max_hits, local_isect->num_hits) - 1; i >= 0; --i) {
+ if (local_isect->hits[i].t == t) {
+ return false;
+ }
+ }
+
+ local_isect->num_hits++;
+
+ if (local_isect->num_hits <= max_hits) {
+ hit = local_isect->num_hits - 1;
+ }
+ else {
+ /* reservoir sampling: if we are at the maximum number of
+ * hits, randomly replace element or skip it */
+ hit = lcg_step_uint(lcg_state) % local_isect->num_hits;
+
+ if (hit >= max_hits)
+ return false;
+ }
+ }
+ else {
+ /* Record closest intersection only. */
+ if (local_isect->num_hits && t > local_isect->hits[0].t) {
+ return false;
+ }
+
+ hit = 0;
+ local_isect->num_hits = 1;
+ }
+
+ /* Record intersection. */
+ Intersection *isect = &local_isect->hits[hit];
+ isect->prim = prim_addr;
+ isect->object = object;
+ isect->type = PRIMITIVE_TRIANGLE;
+ isect->u = u;
+ isect->v = v;
+ isect->t = t;
+
+ /* Record geometric normal. */
+# if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
+ const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0)),
+ tri_b = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1)),
+ tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2));
+# endif
+ local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a));
- return false;
+ return false;
}
-#endif /* __BVH_LOCAL__ */
+#endif /* __BVH_LOCAL__ */
/* Refine triangle intersection to more precise hit point. For rays that travel
* far the precision is often not so good, this reintersects the primitive from
@@ -596,61 +615,61 @@ ccl_device_inline float3 triangle_refine(KernelGlobals *kg,
const Intersection *isect,
const Ray *ray)
{
- float3 P = ray->P;
- float3 D = ray->D;
- float t = isect->t;
+ float3 P = ray->P;
+ float3 D = ray->D;
+ float t = isect->t;
#ifdef __INTERSECTION_REFINE__
- if(isect->object != OBJECT_NONE) {
- if(UNLIKELY(t == 0.0f)) {
- return P;
- }
+ if (isect->object != OBJECT_NONE) {
+ if (UNLIKELY(t == 0.0f)) {
+ return P;
+ }
# ifdef __OBJECT_MOTION__
- Transform tfm = sd->ob_itfm;
+ Transform tfm = sd->ob_itfm;
# else
- Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
+ Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
# endif
- P = transform_point(&tfm, P);
- D = transform_direction(&tfm, D*t);
- D = normalize_len(D, &t);
- }
-
- P = P + D*t;
-
- const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, isect->prim);
- const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex+0),
- tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex+1),
- tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex+2);
- float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z);
- float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z);
- float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z);
- float3 qvec = cross(tvec, edge1);
- float3 pvec = cross(D, edge2);
- float det = dot(edge1, pvec);
- if(det != 0.0f) {
- /* If determinant is zero it means ray lies in the plane of
- * the triangle. It is possible in theory due to watertight
- * nature of triangle intersection. For such cases we simply
- * don't refine intersection hoping it'll go all fine.
- */
- float rt = dot(edge2, qvec) / det;
- P = P + D*rt;
- }
-
- if(isect->object != OBJECT_NONE) {
+ P = transform_point(&tfm, P);
+ D = transform_direction(&tfm, D * t);
+ D = normalize_len(D, &t);
+ }
+
+ P = P + D * t;
+
+ const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, isect->prim);
+ const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0),
+ tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1),
+ tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2);
+ float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z);
+ float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z);
+ float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z);
+ float3 qvec = cross(tvec, edge1);
+ float3 pvec = cross(D, edge2);
+ float det = dot(edge1, pvec);
+ if (det != 0.0f) {
+ /* If determinant is zero it means ray lies in the plane of
+ * the triangle. It is possible in theory due to watertight
+ * nature of triangle intersection. For such cases we simply
+ * don't refine intersection hoping it'll go all fine.
+ */
+ float rt = dot(edge2, qvec) / det;
+ P = P + D * rt;
+ }
+
+ if (isect->object != OBJECT_NONE) {
# ifdef __OBJECT_MOTION__
- Transform tfm = sd->ob_tfm;
+ Transform tfm = sd->ob_tfm;
# else
- Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
+ Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
# endif
- P = transform_point(&tfm, P);
- }
+ P = transform_point(&tfm, P);
+ }
- return P;
+ return P;
#else
- return P + D*t;
+ return P + D * t;
#endif
}
@@ -662,61 +681,57 @@ ccl_device_inline float3 triangle_refine_local(KernelGlobals *kg,
const Intersection *isect,
const Ray *ray)
{
- float3 P = ray->P;
- float3 D = ray->D;
- float t = isect->t;
+ float3 P = ray->P;
+ float3 D = ray->D;
+ float t = isect->t;
- if(isect->object != OBJECT_NONE) {
+ if (isect->object != OBJECT_NONE) {
#ifdef __OBJECT_MOTION__
- Transform tfm = sd->ob_itfm;
+ Transform tfm = sd->ob_itfm;
#else
- Transform tfm = object_fetch_transform(kg,
- isect->object,
- OBJECT_INVERSE_TRANSFORM);
+ Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
#endif
- P = transform_point(&tfm, P);
- D = transform_direction(&tfm, D);
- D = normalize(D);
- }
+ P = transform_point(&tfm, P);
+ D = transform_direction(&tfm, D);
+ D = normalize(D);
+ }
- P = P + D*t;
+ P = P + D * t;
#ifdef __INTERSECTION_REFINE__
- const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, isect->prim);
- const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex+0),
- tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex+1),
- tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex+2);
- float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z);
- float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z);
- float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z);
- float3 qvec = cross(tvec, edge1);
- float3 pvec = cross(D, edge2);
- float det = dot(edge1, pvec);
- if(det != 0.0f) {
- /* If determinant is zero it means ray lies in the plane of
- * the triangle. It is possible in theory due to watertight
- * nature of triangle intersection. For such cases we simply
- * don't refine intersection hoping it'll go all fine.
- */
- float rt = dot(edge2, qvec) / det;
- P = P + D*rt;
- }
-#endif /* __INTERSECTION_REFINE__ */
-
- if(isect->object != OBJECT_NONE) {
+ const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, isect->prim);
+ const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0),
+ tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1),
+ tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2);
+ float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z);
+ float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z);
+ float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z);
+ float3 qvec = cross(tvec, edge1);
+ float3 pvec = cross(D, edge2);
+ float det = dot(edge1, pvec);
+ if (det != 0.0f) {
+ /* If determinant is zero it means ray lies in the plane of
+ * the triangle. It is possible in theory due to watertight
+ * nature of triangle intersection. For such cases we simply
+ * don't refine intersection hoping it'll go all fine.
+ */
+ float rt = dot(edge2, qvec) / det;
+ P = P + D * rt;
+ }
+#endif /* __INTERSECTION_REFINE__ */
+
+ if (isect->object != OBJECT_NONE) {
#ifdef __OBJECT_MOTION__
- Transform tfm = sd->ob_tfm;
+ Transform tfm = sd->ob_tfm;
#else
- Transform tfm = object_fetch_transform(kg,
- isect->object,
- OBJECT_TRANSFORM);
+ Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
#endif
- P = transform_point(&tfm, P);
- }
+ P = transform_point(&tfm, P);
+ }
- return P;
+ return P;
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_volume.h b/intern/cycles/kernel/geom/geom_volume.h
index 1977d263ece..96cf35a40dc 100644
--- a/intern/cycles/kernel/geom/geom_volume.h
+++ b/intern/cycles/kernel/geom/geom_volume.h
@@ -33,41 +33,47 @@ ccl_device_inline float3 volume_normalized_position(KernelGlobals *kg,
const ShaderData *sd,
float3 P)
{
- /* todo: optimize this so it's just a single matrix multiplication when
- * possible (not motion blur), or perhaps even just translation + scale */
- const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_GENERATED_TRANSFORM);
+ /* todo: optimize this so it's just a single matrix multiplication when
+ * possible (not motion blur), or perhaps even just translation + scale */
+ const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_GENERATED_TRANSFORM);
- object_inverse_position_transform(kg, sd, &P);
+ object_inverse_position_transform(kg, sd, &P);
- if(desc.offset != ATTR_STD_NOT_FOUND) {
- Transform tfm = primitive_attribute_matrix(kg, sd, desc);
- P = transform_point(&tfm, P);
- }
+ if (desc.offset != ATTR_STD_NOT_FOUND) {
+ Transform tfm = primitive_attribute_matrix(kg, sd, desc);
+ P = transform_point(&tfm, P);
+ }
- return P;
+ return P;
}
-ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc)
+ccl_device float volume_attribute_float(KernelGlobals *kg,
+ const ShaderData *sd,
+ const AttributeDescriptor desc)
{
- float3 P = volume_normalized_position(kg, sd, sd->P);
- InterpolationType interp = (sd->flag & SD_VOLUME_CUBIC)? INTERPOLATION_CUBIC: INTERPOLATION_NONE;
- float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z, interp);
- return average(float4_to_float3(r));
+ float3 P = volume_normalized_position(kg, sd, sd->P);
+ InterpolationType interp = (sd->flag & SD_VOLUME_CUBIC) ? INTERPOLATION_CUBIC :
+ INTERPOLATION_NONE;
+ float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z, interp);
+ return average(float4_to_float3(r));
}
-ccl_device float3 volume_attribute_float3(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc)
+ccl_device float3 volume_attribute_float3(KernelGlobals *kg,
+ const ShaderData *sd,
+ const AttributeDescriptor desc)
{
- float3 P = volume_normalized_position(kg, sd, sd->P);
- InterpolationType interp = (sd->flag & SD_VOLUME_CUBIC)? INTERPOLATION_CUBIC: INTERPOLATION_NONE;
- float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z, interp);
+ float3 P = volume_normalized_position(kg, sd, sd->P);
+ InterpolationType interp = (sd->flag & SD_VOLUME_CUBIC) ? INTERPOLATION_CUBIC :
+ INTERPOLATION_NONE;
+ float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z, interp);
- if(r.w > 1e-6f && r.w != 1.0f) {
- /* For RGBA colors, unpremultiply after interpolation. */
- return float4_to_float3(r) / r.w;
- }
- else {
- return float4_to_float3(r);
- }
+ if (r.w > 1e-6f && r.w != 1.0f) {
+ /* For RGBA colors, unpremultiply after interpolation. */
+ return float4_to_float3(r) / r.w;
+ }
+ else {
+ return float4_to_float3(r);
+ }
}
#endif
diff --git a/intern/cycles/kernel/kernel.h b/intern/cycles/kernel/kernel.h
index 1c8c91d15e6..dfdd8843f29 100644
--- a/intern/cycles/kernel/kernel.h
+++ b/intern/cycles/kernel/kernel.h
@@ -24,8 +24,8 @@
CCL_NAMESPACE_BEGIN
-#define KERNEL_NAME_JOIN(x, y, z) x ## _ ## y ## _ ## z
-#define KERNEL_NAME_EVAL(arch, name) KERNEL_NAME_JOIN(kernel, arch, name)
+#define KERNEL_NAME_JOIN(x, y, z) x##_##y##_##z
+#define KERNEL_NAME_EVAL(arch, name) KERNEL_NAME_JOIN(kernel, arch, name)
#define KERNEL_FUNCTION_FULL_NAME(name) KERNEL_NAME_EVAL(KERNEL_ARCH, name)
struct KernelGlobals;
@@ -38,10 +38,7 @@ void *kernel_osl_memory(KernelGlobals *kg);
bool kernel_osl_use(KernelGlobals *kg);
void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t size);
-void kernel_tex_copy(KernelGlobals *kg,
- const char *name,
- void *mem,
- size_t size);
+void kernel_tex_copy(KernelGlobals *kg, const char *name, void *mem, size_t size);
#define KERNEL_ARCH cpu
#include "kernel/kernels/cpu/kernel_cpu.h"
@@ -63,4 +60,4 @@ void kernel_tex_copy(KernelGlobals *kg,
CCL_NAMESPACE_END
-#endif /* __KERNEL_H__ */
+#endif /* __KERNEL_H__ */
diff --git a/intern/cycles/kernel/kernel_accumulate.h b/intern/cycles/kernel/kernel_accumulate.h
index 86ad6e1a061..b9d723222a1 100644
--- a/intern/cycles/kernel/kernel_accumulate.h
+++ b/intern/cycles/kernel/kernel_accumulate.h
@@ -21,149 +21,150 @@ CCL_NAMESPACE_BEGIN
* BSDF evaluation result, split per BSDF type. This is used to accumulate
* render passes separately. */
-ccl_device float3 shader_bsdf_transparency(KernelGlobals *kg,
- const ShaderData *sd);
+ccl_device float3 shader_bsdf_transparency(KernelGlobals *kg, const ShaderData *sd);
-ccl_device_inline void bsdf_eval_init(BsdfEval *eval, ClosureType type, float3 value, int use_light_pass)
+ccl_device_inline void bsdf_eval_init(BsdfEval *eval,
+ ClosureType type,
+ float3 value,
+ int use_light_pass)
{
#ifdef __PASSES__
- eval->use_light_pass = use_light_pass;
-
- if(eval->use_light_pass) {
- eval->diffuse = make_float3(0.0f, 0.0f, 0.0f);
- eval->glossy = make_float3(0.0f, 0.0f, 0.0f);
- eval->transmission = make_float3(0.0f, 0.0f, 0.0f);
- eval->transparent = make_float3(0.0f, 0.0f, 0.0f);
- eval->subsurface = make_float3(0.0f, 0.0f, 0.0f);
- eval->scatter = make_float3(0.0f, 0.0f, 0.0f);
-
- if(type == CLOSURE_BSDF_TRANSPARENT_ID)
- eval->transparent = value;
- else if(CLOSURE_IS_BSDF_DIFFUSE(type))
- eval->diffuse = value;
- else if(CLOSURE_IS_BSDF_GLOSSY(type))
- eval->glossy = value;
- else if(CLOSURE_IS_BSDF_TRANSMISSION(type))
- eval->transmission = value;
- else if(CLOSURE_IS_BSDF_BSSRDF(type))
- eval->subsurface = value;
- else if(CLOSURE_IS_PHASE(type))
- eval->scatter = value;
- }
- else
-#endif
- {
- eval->diffuse = value;
- }
+ eval->use_light_pass = use_light_pass;
+
+ if (eval->use_light_pass) {
+ eval->diffuse = make_float3(0.0f, 0.0f, 0.0f);
+ eval->glossy = make_float3(0.0f, 0.0f, 0.0f);
+ eval->transmission = make_float3(0.0f, 0.0f, 0.0f);
+ eval->transparent = make_float3(0.0f, 0.0f, 0.0f);
+ eval->subsurface = make_float3(0.0f, 0.0f, 0.0f);
+ eval->scatter = make_float3(0.0f, 0.0f, 0.0f);
+
+ if (type == CLOSURE_BSDF_TRANSPARENT_ID)
+ eval->transparent = value;
+ else if (CLOSURE_IS_BSDF_DIFFUSE(type))
+ eval->diffuse = value;
+ else if (CLOSURE_IS_BSDF_GLOSSY(type))
+ eval->glossy = value;
+ else if (CLOSURE_IS_BSDF_TRANSMISSION(type))
+ eval->transmission = value;
+ else if (CLOSURE_IS_BSDF_BSSRDF(type))
+ eval->subsurface = value;
+ else if (CLOSURE_IS_PHASE(type))
+ eval->scatter = value;
+ }
+ else
+#endif
+ {
+ eval->diffuse = value;
+ }
#ifdef __SHADOW_TRICKS__
- eval->sum_no_mis = make_float3(0.0f, 0.0f, 0.0f);
+ eval->sum_no_mis = make_float3(0.0f, 0.0f, 0.0f);
#endif
}
-ccl_device_inline void bsdf_eval_accum(BsdfEval *eval, ClosureType type, float3 value, float mis_weight)
+ccl_device_inline void bsdf_eval_accum(BsdfEval *eval,
+ ClosureType type,
+ float3 value,
+ float mis_weight)
{
#ifdef __SHADOW_TRICKS__
- eval->sum_no_mis += value;
+ eval->sum_no_mis += value;
#endif
- value *= mis_weight;
+ value *= mis_weight;
#ifdef __PASSES__
- if(eval->use_light_pass) {
- if(CLOSURE_IS_BSDF_DIFFUSE(type))
- eval->diffuse += value;
- else if(CLOSURE_IS_BSDF_GLOSSY(type))
- eval->glossy += value;
- else if(CLOSURE_IS_BSDF_TRANSMISSION(type))
- eval->transmission += value;
- else if(CLOSURE_IS_BSDF_BSSRDF(type))
- eval->subsurface += value;
- else if(CLOSURE_IS_PHASE(type))
- eval->scatter += value;
-
- /* skipping transparent, this function is used by for eval(), will be zero then */
- }
- else
-#endif
- {
- eval->diffuse += value;
- }
+ if (eval->use_light_pass) {
+ if (CLOSURE_IS_BSDF_DIFFUSE(type))
+ eval->diffuse += value;
+ else if (CLOSURE_IS_BSDF_GLOSSY(type))
+ eval->glossy += value;
+ else if (CLOSURE_IS_BSDF_TRANSMISSION(type))
+ eval->transmission += value;
+ else if (CLOSURE_IS_BSDF_BSSRDF(type))
+ eval->subsurface += value;
+ else if (CLOSURE_IS_PHASE(type))
+ eval->scatter += value;
+
+ /* skipping transparent, this function is used by for eval(), will be zero then */
+ }
+ else
+#endif
+ {
+ eval->diffuse += value;
+ }
}
ccl_device_inline bool bsdf_eval_is_zero(BsdfEval *eval)
{
#ifdef __PASSES__
- if(eval->use_light_pass) {
- return is_zero(eval->diffuse)
- && is_zero(eval->glossy)
- && is_zero(eval->transmission)
- && is_zero(eval->transparent)
- && is_zero(eval->subsurface)
- && is_zero(eval->scatter);
- }
- else
-#endif
- {
- return is_zero(eval->diffuse);
- }
+ if (eval->use_light_pass) {
+ return is_zero(eval->diffuse) && is_zero(eval->glossy) && is_zero(eval->transmission) &&
+ is_zero(eval->transparent) && is_zero(eval->subsurface) && is_zero(eval->scatter);
+ }
+ else
+#endif
+ {
+ return is_zero(eval->diffuse);
+ }
}
ccl_device_inline void bsdf_eval_mis(BsdfEval *eval, float value)
{
#ifdef __PASSES__
- if(eval->use_light_pass) {
- eval->diffuse *= value;
- eval->glossy *= value;
- eval->transmission *= value;
- eval->subsurface *= value;
- eval->scatter *= value;
-
- /* skipping transparent, this function is used by for eval(), will be zero then */
- }
- else
-#endif
- {
- eval->diffuse *= value;
- }
+ if (eval->use_light_pass) {
+ eval->diffuse *= value;
+ eval->glossy *= value;
+ eval->transmission *= value;
+ eval->subsurface *= value;
+ eval->scatter *= value;
+
+ /* skipping transparent, this function is used by for eval(), will be zero then */
+ }
+ else
+#endif
+ {
+ eval->diffuse *= value;
+ }
}
ccl_device_inline void bsdf_eval_mul(BsdfEval *eval, float value)
{
#ifdef __SHADOW_TRICKS__
- eval->sum_no_mis *= value;
+ eval->sum_no_mis *= value;
#endif
- bsdf_eval_mis(eval, value);
+ bsdf_eval_mis(eval, value);
}
ccl_device_inline void bsdf_eval_mul3(BsdfEval *eval, float3 value)
{
#ifdef __SHADOW_TRICKS__
- eval->sum_no_mis *= value;
+ eval->sum_no_mis *= value;
#endif
#ifdef __PASSES__
- if(eval->use_light_pass) {
- eval->diffuse *= value;
- eval->glossy *= value;
- eval->transmission *= value;
- eval->subsurface *= value;
- eval->scatter *= value;
-
- /* skipping transparent, this function is used by for eval(), will be zero then */
- }
- else
- eval->diffuse *= value;
+ if (eval->use_light_pass) {
+ eval->diffuse *= value;
+ eval->glossy *= value;
+ eval->transmission *= value;
+ eval->subsurface *= value;
+ eval->scatter *= value;
+
+ /* skipping transparent, this function is used by for eval(), will be zero then */
+ }
+ else
+ eval->diffuse *= value;
#else
- eval->diffuse *= value;
+ eval->diffuse *= value;
#endif
}
ccl_device_inline float3 bsdf_eval_sum(const BsdfEval *eval)
{
#ifdef __PASSES__
- if(eval->use_light_pass) {
- return eval->diffuse + eval->glossy + eval->transmission + eval->subsurface + eval->scatter;
- }
- else
+ if (eval->use_light_pass) {
+ return eval->diffuse + eval->glossy + eval->transmission + eval->subsurface + eval->scatter;
+ }
+ else
#endif
- return eval->diffuse;
+ return eval->diffuse;
}
/* Path Radiance
@@ -175,115 +176,113 @@ ccl_device_inline float3 bsdf_eval_sum(const BsdfEval *eval)
ccl_device_inline void path_radiance_init(PathRadiance *L, int use_light_pass)
{
- /* clear all */
+ /* clear all */
#ifdef __PASSES__
- L->use_light_pass = use_light_pass;
-
- if(use_light_pass) {
- L->indirect = make_float3(0.0f, 0.0f, 0.0f);
- L->direct_emission = make_float3(0.0f, 0.0f, 0.0f);
-
- L->color_diffuse = make_float3(0.0f, 0.0f, 0.0f);
- L->color_glossy = make_float3(0.0f, 0.0f, 0.0f);
- L->color_transmission = make_float3(0.0f, 0.0f, 0.0f);
- L->color_subsurface = make_float3(0.0f, 0.0f, 0.0f);
-
- L->direct_diffuse = make_float3(0.0f, 0.0f, 0.0f);
- L->direct_glossy = make_float3(0.0f, 0.0f, 0.0f);
- L->direct_transmission = make_float3(0.0f, 0.0f, 0.0f);
- L->direct_subsurface = make_float3(0.0f, 0.0f, 0.0f);
- L->direct_scatter = make_float3(0.0f, 0.0f, 0.0f);
-
- L->indirect_diffuse = make_float3(0.0f, 0.0f, 0.0f);
- L->indirect_glossy = make_float3(0.0f, 0.0f, 0.0f);
- L->indirect_transmission = make_float3(0.0f, 0.0f, 0.0f);
- L->indirect_subsurface = make_float3(0.0f, 0.0f, 0.0f);
- L->indirect_scatter = make_float3(0.0f, 0.0f, 0.0f);
-
- L->transparent = 0.0f;
- L->emission = make_float3(0.0f, 0.0f, 0.0f);
- L->background = make_float3(0.0f, 0.0f, 0.0f);
- L->ao = make_float3(0.0f, 0.0f, 0.0f);
- L->shadow = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- L->mist = 0.0f;
-
- L->state.diffuse = make_float3(0.0f, 0.0f, 0.0f);
- L->state.glossy = make_float3(0.0f, 0.0f, 0.0f);
- L->state.transmission = make_float3(0.0f, 0.0f, 0.0f);
- L->state.subsurface = make_float3(0.0f, 0.0f, 0.0f);
- L->state.scatter = make_float3(0.0f, 0.0f, 0.0f);
- L->state.direct = make_float3(0.0f, 0.0f, 0.0f);
- }
- else
-#endif
- {
- L->transparent = 0.0f;
- L->emission = make_float3(0.0f, 0.0f, 0.0f);
- }
+ L->use_light_pass = use_light_pass;
+
+ if (use_light_pass) {
+ L->indirect = make_float3(0.0f, 0.0f, 0.0f);
+ L->direct_emission = make_float3(0.0f, 0.0f, 0.0f);
+
+ L->color_diffuse = make_float3(0.0f, 0.0f, 0.0f);
+ L->color_glossy = make_float3(0.0f, 0.0f, 0.0f);
+ L->color_transmission = make_float3(0.0f, 0.0f, 0.0f);
+ L->color_subsurface = make_float3(0.0f, 0.0f, 0.0f);
+
+ L->direct_diffuse = make_float3(0.0f, 0.0f, 0.0f);
+ L->direct_glossy = make_float3(0.0f, 0.0f, 0.0f);
+ L->direct_transmission = make_float3(0.0f, 0.0f, 0.0f);
+ L->direct_subsurface = make_float3(0.0f, 0.0f, 0.0f);
+ L->direct_scatter = make_float3(0.0f, 0.0f, 0.0f);
+
+ L->indirect_diffuse = make_float3(0.0f, 0.0f, 0.0f);
+ L->indirect_glossy = make_float3(0.0f, 0.0f, 0.0f);
+ L->indirect_transmission = make_float3(0.0f, 0.0f, 0.0f);
+ L->indirect_subsurface = make_float3(0.0f, 0.0f, 0.0f);
+ L->indirect_scatter = make_float3(0.0f, 0.0f, 0.0f);
+
+ L->transparent = 0.0f;
+ L->emission = make_float3(0.0f, 0.0f, 0.0f);
+ L->background = make_float3(0.0f, 0.0f, 0.0f);
+ L->ao = make_float3(0.0f, 0.0f, 0.0f);
+ L->shadow = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ L->mist = 0.0f;
+
+ L->state.diffuse = make_float3(0.0f, 0.0f, 0.0f);
+ L->state.glossy = make_float3(0.0f, 0.0f, 0.0f);
+ L->state.transmission = make_float3(0.0f, 0.0f, 0.0f);
+ L->state.subsurface = make_float3(0.0f, 0.0f, 0.0f);
+ L->state.scatter = make_float3(0.0f, 0.0f, 0.0f);
+ L->state.direct = make_float3(0.0f, 0.0f, 0.0f);
+ }
+ else
+#endif
+ {
+ L->transparent = 0.0f;
+ L->emission = make_float3(0.0f, 0.0f, 0.0f);
+ }
#ifdef __SHADOW_TRICKS__
- L->path_total = make_float3(0.0f, 0.0f, 0.0f);
- L->path_total_shaded = make_float3(0.0f, 0.0f, 0.0f);
- L->shadow_background_color = make_float3(0.0f, 0.0f, 0.0f);
- L->shadow_throughput = 0.0f;
- L->shadow_transparency = 1.0f;
- L->has_shadow_catcher = 0;
+ L->path_total = make_float3(0.0f, 0.0f, 0.0f);
+ L->path_total_shaded = make_float3(0.0f, 0.0f, 0.0f);
+ L->shadow_background_color = make_float3(0.0f, 0.0f, 0.0f);
+ L->shadow_throughput = 0.0f;
+ L->shadow_transparency = 1.0f;
+ L->has_shadow_catcher = 0;
#endif
#ifdef __DENOISING_FEATURES__
- L->denoising_normal = make_float3(0.0f, 0.0f, 0.0f);
- L->denoising_albedo = make_float3(0.0f, 0.0f, 0.0f);
- L->denoising_depth = 0.0f;
+ L->denoising_normal = make_float3(0.0f, 0.0f, 0.0f);
+ L->denoising_albedo = make_float3(0.0f, 0.0f, 0.0f);
+ L->denoising_depth = 0.0f;
#endif
#ifdef __KERNEL_DEBUG__
- L->debug_data.num_bvh_traversed_nodes = 0;
- L->debug_data.num_bvh_traversed_instances = 0;
- L->debug_data.num_bvh_intersections = 0;
- L->debug_data.num_ray_bounces = 0;
+ L->debug_data.num_bvh_traversed_nodes = 0;
+ L->debug_data.num_bvh_traversed_instances = 0;
+ L->debug_data.num_bvh_intersections = 0;
+ L->debug_data.num_ray_bounces = 0;
#endif
}
-ccl_device_inline void path_radiance_bsdf_bounce(
- KernelGlobals *kg,
- PathRadianceState *L_state,
- ccl_addr_space float3 *throughput,
- BsdfEval *bsdf_eval,
- float bsdf_pdf, int bounce, int bsdf_label)
+ccl_device_inline void path_radiance_bsdf_bounce(KernelGlobals *kg,
+ PathRadianceState *L_state,
+ ccl_addr_space float3 *throughput,
+ BsdfEval *bsdf_eval,
+ float bsdf_pdf,
+ int bounce,
+ int bsdf_label)
{
- float inverse_pdf = 1.0f/bsdf_pdf;
+ float inverse_pdf = 1.0f / bsdf_pdf;
#ifdef __PASSES__
- if(kernel_data.film.use_light_pass) {
- if(bounce == 0 && !(bsdf_label & LABEL_TRANSPARENT)) {
- /* first on directly visible surface */
- float3 value = *throughput*inverse_pdf;
-
- L_state->diffuse = bsdf_eval->diffuse*value;
- L_state->glossy = bsdf_eval->glossy*value;
- L_state->transmission = bsdf_eval->transmission*value;
- L_state->subsurface = bsdf_eval->subsurface*value;
- L_state->scatter = bsdf_eval->scatter*value;
-
- *throughput = L_state->diffuse +
- L_state->glossy +
- L_state->transmission +
- L_state->subsurface +
- L_state->scatter;
-
- L_state->direct = *throughput;
- }
- else {
- /* transparent bounce before first hit, or indirectly visible through BSDF */
- float3 sum = (bsdf_eval_sum(bsdf_eval) + bsdf_eval->transparent) * inverse_pdf;
- *throughput *= sum;
- }
- }
- else
-#endif
- {
- *throughput *= bsdf_eval->diffuse*inverse_pdf;
- }
+ if (kernel_data.film.use_light_pass) {
+ if (bounce == 0 && !(bsdf_label & LABEL_TRANSPARENT)) {
+ /* first on directly visible surface */
+ float3 value = *throughput * inverse_pdf;
+
+ L_state->diffuse = bsdf_eval->diffuse * value;
+ L_state->glossy = bsdf_eval->glossy * value;
+ L_state->transmission = bsdf_eval->transmission * value;
+ L_state->subsurface = bsdf_eval->subsurface * value;
+ L_state->scatter = bsdf_eval->scatter * value;
+
+ *throughput = L_state->diffuse + L_state->glossy + L_state->transmission +
+ L_state->subsurface + L_state->scatter;
+
+ L_state->direct = *throughput;
+ }
+ else {
+ /* transparent bounce before first hit, or indirectly visible through BSDF */
+ float3 sum = (bsdf_eval_sum(bsdf_eval) + bsdf_eval->transparent) * inverse_pdf;
+ *throughput *= sum;
+ }
+ }
+ else
+#endif
+ {
+ *throughput *= bsdf_eval->diffuse * inverse_pdf;
+ }
}
ccl_device_inline void path_radiance_accum_emission(PathRadiance *L,
@@ -292,25 +291,25 @@ ccl_device_inline void path_radiance_accum_emission(PathRadiance *L,
float3 value)
{
#ifdef __SHADOW_TRICKS__
- if(state->flag & PATH_RAY_SHADOW_CATCHER) {
- return;
- }
+ if (state->flag & PATH_RAY_SHADOW_CATCHER) {
+ return;
+ }
#endif
#ifdef __PASSES__
- if(L->use_light_pass) {
- if(state->bounce == 0)
- L->emission += throughput*value;
- else if(state->bounce == 1)
- L->direct_emission += throughput*value;
- else
- L->indirect += throughput*value;
- }
- else
-#endif
- {
- L->emission += throughput*value;
- }
+ if (L->use_light_pass) {
+ if (state->bounce == 0)
+ L->emission += throughput * value;
+ else if (state->bounce == 1)
+ L->direct_emission += throughput * value;
+ else
+ L->indirect += throughput * value;
+ }
+ else
+#endif
+ {
+ L->emission += throughput * value;
+ }
}
ccl_device_inline void path_radiance_accum_ao(PathRadiance *L,
@@ -320,57 +319,56 @@ ccl_device_inline void path_radiance_accum_ao(PathRadiance *L,
float3 bsdf,
float3 ao)
{
- /* Store AO pass. */
- if(L->use_light_pass && state->bounce == 0) {
- L->ao += alpha*throughput*ao;
- }
+ /* Store AO pass. */
+ if (L->use_light_pass && state->bounce == 0) {
+ L->ao += alpha * throughput * ao;
+ }
#ifdef __SHADOW_TRICKS__
- /* For shadow catcher, accumulate ratio. */
- if(state->flag & PATH_RAY_STORE_SHADOW_INFO) {
- float3 light = throughput * bsdf;
- L->path_total += light;
- L->path_total_shaded += ao * light;
+ /* For shadow catcher, accumulate ratio. */
+ if (state->flag & PATH_RAY_STORE_SHADOW_INFO) {
+ float3 light = throughput * bsdf;
+ L->path_total += light;
+ L->path_total_shaded += ao * light;
- if(state->flag & PATH_RAY_SHADOW_CATCHER) {
- return;
- }
- }
+ if (state->flag & PATH_RAY_SHADOW_CATCHER) {
+ return;
+ }
+ }
#endif
#ifdef __PASSES__
- if(L->use_light_pass) {
- if(state->bounce == 0) {
- /* Directly visible lighting. */
- L->direct_diffuse += throughput*bsdf*ao;
- }
- else {
- /* Indirectly visible lighting after BSDF bounce. */
- L->indirect += throughput*bsdf*ao;
- }
- }
- else
-#endif
- {
- L->emission += throughput*bsdf*ao;
- }
+ if (L->use_light_pass) {
+ if (state->bounce == 0) {
+ /* Directly visible lighting. */
+ L->direct_diffuse += throughput * bsdf * ao;
+ }
+ else {
+ /* Indirectly visible lighting after BSDF bounce. */
+ L->indirect += throughput * bsdf * ao;
+ }
+ }
+ else
+#endif
+ {
+ L->emission += throughput * bsdf * ao;
+ }
}
-ccl_device_inline void path_radiance_accum_total_ao(
- PathRadiance *L,
- ccl_addr_space PathState *state,
- float3 throughput,
- float3 bsdf)
+ccl_device_inline void path_radiance_accum_total_ao(PathRadiance *L,
+ ccl_addr_space PathState *state,
+ float3 throughput,
+ float3 bsdf)
{
#ifdef __SHADOW_TRICKS__
- if(state->flag & PATH_RAY_STORE_SHADOW_INFO) {
- L->path_total += throughput * bsdf;
- }
+ if (state->flag & PATH_RAY_STORE_SHADOW_INFO) {
+ L->path_total += throughput * bsdf;
+ }
#else
- (void) L;
- (void) state;
- (void) throughput;
- (void) bsdf;
+ (void)L;
+ (void)state;
+ (void)throughput;
+ (void)bsdf;
#endif
}
@@ -383,171 +381,166 @@ ccl_device_inline void path_radiance_accum_light(PathRadiance *L,
bool is_lamp)
{
#ifdef __SHADOW_TRICKS__
- if(state->flag & PATH_RAY_STORE_SHADOW_INFO) {
- float3 light = throughput * bsdf_eval->sum_no_mis;
- L->path_total += light;
- L->path_total_shaded += shadow * light;
+ if (state->flag & PATH_RAY_STORE_SHADOW_INFO) {
+ float3 light = throughput * bsdf_eval->sum_no_mis;
+ L->path_total += light;
+ L->path_total_shaded += shadow * light;
- if(state->flag & PATH_RAY_SHADOW_CATCHER) {
- return;
- }
- }
+ if (state->flag & PATH_RAY_SHADOW_CATCHER) {
+ return;
+ }
+ }
#endif
#ifdef __PASSES__
- if(L->use_light_pass) {
- if(state->bounce == 0) {
- /* directly visible lighting */
- L->direct_diffuse += throughput*bsdf_eval->diffuse*shadow;
- L->direct_glossy += throughput*bsdf_eval->glossy*shadow;
- L->direct_transmission += throughput*bsdf_eval->transmission*shadow;
- L->direct_subsurface += throughput*bsdf_eval->subsurface*shadow;
- L->direct_scatter += throughput*bsdf_eval->scatter*shadow;
-
- if(is_lamp) {
- L->shadow.x += shadow.x*shadow_fac;
- L->shadow.y += shadow.y*shadow_fac;
- L->shadow.z += shadow.z*shadow_fac;
- }
- }
- else {
- /* indirectly visible lighting after BSDF bounce */
- L->indirect += throughput*bsdf_eval_sum(bsdf_eval)*shadow;
- }
- }
- else
-#endif
- {
- L->emission += throughput*bsdf_eval->diffuse*shadow;
- }
+ if (L->use_light_pass) {
+ if (state->bounce == 0) {
+ /* directly visible lighting */
+ L->direct_diffuse += throughput * bsdf_eval->diffuse * shadow;
+ L->direct_glossy += throughput * bsdf_eval->glossy * shadow;
+ L->direct_transmission += throughput * bsdf_eval->transmission * shadow;
+ L->direct_subsurface += throughput * bsdf_eval->subsurface * shadow;
+ L->direct_scatter += throughput * bsdf_eval->scatter * shadow;
+
+ if (is_lamp) {
+ L->shadow.x += shadow.x * shadow_fac;
+ L->shadow.y += shadow.y * shadow_fac;
+ L->shadow.z += shadow.z * shadow_fac;
+ }
+ }
+ else {
+ /* indirectly visible lighting after BSDF bounce */
+ L->indirect += throughput * bsdf_eval_sum(bsdf_eval) * shadow;
+ }
+ }
+ else
+#endif
+ {
+ L->emission += throughput * bsdf_eval->diffuse * shadow;
+ }
}
-ccl_device_inline void path_radiance_accum_total_light(
- PathRadiance *L,
- ccl_addr_space PathState *state,
- float3 throughput,
- const BsdfEval *bsdf_eval)
+ccl_device_inline void path_radiance_accum_total_light(PathRadiance *L,
+ ccl_addr_space PathState *state,
+ float3 throughput,
+ const BsdfEval *bsdf_eval)
{
#ifdef __SHADOW_TRICKS__
- if(state->flag & PATH_RAY_STORE_SHADOW_INFO) {
- L->path_total += throughput * bsdf_eval->sum_no_mis;
- }
+ if (state->flag & PATH_RAY_STORE_SHADOW_INFO) {
+ L->path_total += throughput * bsdf_eval->sum_no_mis;
+ }
#else
- (void) L;
- (void) state;
- (void) throughput;
- (void) bsdf_eval;
+ (void)L;
+ (void)state;
+ (void)throughput;
+ (void)bsdf_eval;
#endif
}
-ccl_device_inline void path_radiance_accum_background(
- PathRadiance *L,
- ccl_addr_space PathState *state,
- float3 throughput,
- float3 value)
+ccl_device_inline void path_radiance_accum_background(PathRadiance *L,
+ ccl_addr_space PathState *state,
+ float3 throughput,
+ float3 value)
{
#ifdef __SHADOW_TRICKS__
- if(state->flag & PATH_RAY_STORE_SHADOW_INFO) {
- L->path_total += throughput * value;
- L->path_total_shaded += throughput * value * L->shadow_transparency;
+ if (state->flag & PATH_RAY_STORE_SHADOW_INFO) {
+ L->path_total += throughput * value;
+ L->path_total_shaded += throughput * value * L->shadow_transparency;
- if(state->flag & PATH_RAY_SHADOW_CATCHER) {
- return;
- }
- }
+ if (state->flag & PATH_RAY_SHADOW_CATCHER) {
+ return;
+ }
+ }
#endif
#ifdef __PASSES__
- if(L->use_light_pass) {
- if(state->flag & PATH_RAY_TRANSPARENT_BACKGROUND)
- L->background += throughput*value;
- else if(state->bounce == 1)
- L->direct_emission += throughput*value;
- else
- L->indirect += throughput*value;
- }
- else
-#endif
- {
- L->emission += throughput*value;
- }
+ if (L->use_light_pass) {
+ if (state->flag & PATH_RAY_TRANSPARENT_BACKGROUND)
+ L->background += throughput * value;
+ else if (state->bounce == 1)
+ L->direct_emission += throughput * value;
+ else
+ L->indirect += throughput * value;
+ }
+ else
+#endif
+ {
+ L->emission += throughput * value;
+ }
#ifdef __DENOISING_FEATURES__
- L->denoising_albedo += state->denoising_feature_weight * value;
-#endif /* __DENOISING_FEATURES__ */
+ L->denoising_albedo += state->denoising_feature_weight * value;
+#endif /* __DENOISING_FEATURES__ */
}
-ccl_device_inline void path_radiance_accum_transparent(
- PathRadiance *L,
- ccl_addr_space PathState *state,
- float3 throughput)
+ccl_device_inline void path_radiance_accum_transparent(PathRadiance *L,
+ ccl_addr_space PathState *state,
+ float3 throughput)
{
- L->transparent += average(throughput);
+ L->transparent += average(throughput);
}
#ifdef __SHADOW_TRICKS__
-ccl_device_inline void path_radiance_accum_shadowcatcher(
- PathRadiance *L,
- float3 throughput,
- float3 background)
+ccl_device_inline void path_radiance_accum_shadowcatcher(PathRadiance *L,
+ float3 throughput,
+ float3 background)
{
- L->shadow_throughput += average(throughput);
- L->shadow_background_color += throughput * background;
- L->has_shadow_catcher = 1;
+ L->shadow_throughput += average(throughput);
+ L->shadow_background_color += throughput * background;
+ L->has_shadow_catcher = 1;
}
#endif
ccl_device_inline void path_radiance_sum_indirect(PathRadiance *L)
{
#ifdef __PASSES__
- /* this division is a bit ugly, but means we only have to keep track of
- * only a single throughput further along the path, here we recover just
- * the indirect path that is not influenced by any particular BSDF type */
- if(L->use_light_pass) {
- L->direct_emission = safe_divide_color(L->direct_emission, L->state.direct);
- L->direct_diffuse += L->state.diffuse*L->direct_emission;
- L->direct_glossy += L->state.glossy*L->direct_emission;
- L->direct_transmission += L->state.transmission*L->direct_emission;
- L->direct_subsurface += L->state.subsurface*L->direct_emission;
- L->direct_scatter += L->state.scatter*L->direct_emission;
-
- L->indirect = safe_divide_color(L->indirect, L->state.direct);
- L->indirect_diffuse += L->state.diffuse*L->indirect;
- L->indirect_glossy += L->state.glossy*L->indirect;
- L->indirect_transmission += L->state.transmission*L->indirect;
- L->indirect_subsurface += L->state.subsurface*L->indirect;
- L->indirect_scatter += L->state.scatter*L->indirect;
- }
+ /* this division is a bit ugly, but means we only have to keep track of
+ * only a single throughput further along the path, here we recover just
+ * the indirect path that is not influenced by any particular BSDF type */
+ if (L->use_light_pass) {
+ L->direct_emission = safe_divide_color(L->direct_emission, L->state.direct);
+ L->direct_diffuse += L->state.diffuse * L->direct_emission;
+ L->direct_glossy += L->state.glossy * L->direct_emission;
+ L->direct_transmission += L->state.transmission * L->direct_emission;
+ L->direct_subsurface += L->state.subsurface * L->direct_emission;
+ L->direct_scatter += L->state.scatter * L->direct_emission;
+
+ L->indirect = safe_divide_color(L->indirect, L->state.direct);
+ L->indirect_diffuse += L->state.diffuse * L->indirect;
+ L->indirect_glossy += L->state.glossy * L->indirect;
+ L->indirect_transmission += L->state.transmission * L->indirect;
+ L->indirect_subsurface += L->state.subsurface * L->indirect;
+ L->indirect_scatter += L->state.scatter * L->indirect;
+ }
#endif
}
ccl_device_inline void path_radiance_reset_indirect(PathRadiance *L)
{
#ifdef __PASSES__
- if(L->use_light_pass) {
- L->state.diffuse = make_float3(0.0f, 0.0f, 0.0f);
- L->state.glossy = make_float3(0.0f, 0.0f, 0.0f);
- L->state.transmission = make_float3(0.0f, 0.0f, 0.0f);
- L->state.subsurface = make_float3(0.0f, 0.0f, 0.0f);
- L->state.scatter = make_float3(0.0f, 0.0f, 0.0f);
+ if (L->use_light_pass) {
+ L->state.diffuse = make_float3(0.0f, 0.0f, 0.0f);
+ L->state.glossy = make_float3(0.0f, 0.0f, 0.0f);
+ L->state.transmission = make_float3(0.0f, 0.0f, 0.0f);
+ L->state.subsurface = make_float3(0.0f, 0.0f, 0.0f);
+ L->state.scatter = make_float3(0.0f, 0.0f, 0.0f);
- L->direct_emission = make_float3(0.0f, 0.0f, 0.0f);
- L->indirect = make_float3(0.0f, 0.0f, 0.0f);
- }
+ L->direct_emission = make_float3(0.0f, 0.0f, 0.0f);
+ L->indirect = make_float3(0.0f, 0.0f, 0.0f);
+ }
#endif
}
-ccl_device_inline void path_radiance_copy_indirect(PathRadiance *L,
- const PathRadiance *L_src)
+ccl_device_inline void path_radiance_copy_indirect(PathRadiance *L, const PathRadiance *L_src)
{
#ifdef __PASSES__
- if(L->use_light_pass) {
- L->state = L_src->state;
+ if (L->use_light_pass) {
+ L->state = L_src->state;
- L->direct_emission = L_src->direct_emission;
- L->indirect = L_src->indirect;
- }
+ L->direct_emission = L_src->direct_emission;
+ L->indirect = L_src->indirect;
+ }
#endif
}
@@ -557,213 +550,219 @@ ccl_device_inline void path_radiance_sum_shadowcatcher(KernelGlobals *kg,
float3 *L_sum,
float *alpha)
{
- /* Calculate current shadow of the path. */
- float path_total = average(L->path_total);
- float shadow;
-
- if(UNLIKELY(!isfinite_safe(path_total))) {
- kernel_assert(!"Non-finite total radiance along the path");
- shadow = 0.0f;
- }
- else if(path_total == 0.0f) {
- shadow = L->shadow_transparency;
- }
- else {
- float path_total_shaded = average(L->path_total_shaded);
- shadow = path_total_shaded / path_total;
- }
-
- /* Calculate final light sum and transparency for shadow catcher object. */
- if(kernel_data.background.transparent) {
- *alpha -= L->shadow_throughput * shadow;
- }
- else {
- L->shadow_background_color *= shadow;
- *L_sum += L->shadow_background_color;
- }
+ /* Calculate current shadow of the path. */
+ float path_total = average(L->path_total);
+ float shadow;
+
+ if (UNLIKELY(!isfinite_safe(path_total))) {
+ kernel_assert(!"Non-finite total radiance along the path");
+ shadow = 0.0f;
+ }
+ else if (path_total == 0.0f) {
+ shadow = L->shadow_transparency;
+ }
+ else {
+ float path_total_shaded = average(L->path_total_shaded);
+ shadow = path_total_shaded / path_total;
+ }
+
+ /* Calculate final light sum and transparency for shadow catcher object. */
+ if (kernel_data.background.transparent) {
+ *alpha -= L->shadow_throughput * shadow;
+ }
+ else {
+ L->shadow_background_color *= shadow;
+ *L_sum += L->shadow_background_color;
+ }
}
#endif
-ccl_device_inline float3 path_radiance_clamp_and_sum(KernelGlobals *kg, PathRadiance *L, float *alpha)
+ccl_device_inline float3 path_radiance_clamp_and_sum(KernelGlobals *kg,
+ PathRadiance *L,
+ float *alpha)
{
- float3 L_sum;
- /* Light Passes are used */
+ float3 L_sum;
+ /* Light Passes are used */
#ifdef __PASSES__
- float3 L_direct, L_indirect;
- float clamp_direct = kernel_data.integrator.sample_clamp_direct;
- float clamp_indirect = kernel_data.integrator.sample_clamp_indirect;
- if(L->use_light_pass) {
- path_radiance_sum_indirect(L);
-
- L_direct = L->direct_diffuse + L->direct_glossy + L->direct_transmission + L->direct_subsurface + L->direct_scatter + L->emission;
- L_indirect = L->indirect_diffuse + L->indirect_glossy + L->indirect_transmission + L->indirect_subsurface + L->indirect_scatter;
-
- if(!kernel_data.background.transparent)
- L_direct += L->background;
-
- L_sum = L_direct + L_indirect;
- float sum = fabsf((L_sum).x) + fabsf((L_sum).y) + fabsf((L_sum).z);
-
- /* Reject invalid value */
- if(!isfinite_safe(sum)) {
- kernel_assert(!"Non-finite sum in path_radiance_clamp_and_sum!");
- L_sum = make_float3(0.0f, 0.0f, 0.0f);
-
- L->direct_diffuse = make_float3(0.0f, 0.0f, 0.0f);
- L->direct_glossy = make_float3(0.0f, 0.0f, 0.0f);
- L->direct_transmission = make_float3(0.0f, 0.0f, 0.0f);
- L->direct_subsurface = make_float3(0.0f, 0.0f, 0.0f);
- L->direct_scatter = make_float3(0.0f, 0.0f, 0.0f);
-
- L->indirect_diffuse = make_float3(0.0f, 0.0f, 0.0f);
- L->indirect_glossy = make_float3(0.0f, 0.0f, 0.0f);
- L->indirect_transmission = make_float3(0.0f, 0.0f, 0.0f);
- L->indirect_subsurface = make_float3(0.0f, 0.0f, 0.0f);
- L->indirect_scatter = make_float3(0.0f, 0.0f, 0.0f);
-
- L->emission = make_float3(0.0f, 0.0f, 0.0f);
- }
-
- /* Clamp direct and indirect samples */
-#ifdef __CLAMP_SAMPLE__
- else if(sum > clamp_direct || sum > clamp_indirect) {
- float scale;
-
- /* Direct */
- float sum_direct = fabsf(L_direct.x) + fabsf(L_direct.y) + fabsf(L_direct.z);
- if(sum_direct > clamp_direct) {
- scale = clamp_direct/sum_direct;
- L_direct *= scale;
-
- L->direct_diffuse *= scale;
- L->direct_glossy *= scale;
- L->direct_transmission *= scale;
- L->direct_subsurface *= scale;
- L->direct_scatter *= scale;
- L->emission *= scale;
- L->background *= scale;
- }
-
- /* Indirect */
- float sum_indirect = fabsf(L_indirect.x) + fabsf(L_indirect.y) + fabsf(L_indirect.z);
- if(sum_indirect > clamp_indirect) {
- scale = clamp_indirect/sum_indirect;
- L_indirect *= scale;
-
- L->indirect_diffuse *= scale;
- L->indirect_glossy *= scale;
- L->indirect_transmission *= scale;
- L->indirect_subsurface *= scale;
- L->indirect_scatter *= scale;
- }
-
- /* Sum again, after clamping */
- L_sum = L_direct + L_indirect;
- }
-#endif
- }
-
- /* No Light Passes */
- else
-#endif
- {
- L_sum = L->emission;
-
- /* Reject invalid value */
- float sum = fabsf((L_sum).x) + fabsf((L_sum).y) + fabsf((L_sum).z);
- if(!isfinite_safe(sum)) {
- kernel_assert(!"Non-finite final sum in path_radiance_clamp_and_sum!");
- L_sum = make_float3(0.0f, 0.0f, 0.0f);
- }
- }
-
- /* Compute alpha. */
- *alpha = 1.0f - L->transparent;
-
- /* Add shadow catcher contributions. */
+ float3 L_direct, L_indirect;
+ float clamp_direct = kernel_data.integrator.sample_clamp_direct;
+ float clamp_indirect = kernel_data.integrator.sample_clamp_indirect;
+ if (L->use_light_pass) {
+ path_radiance_sum_indirect(L);
+
+ L_direct = L->direct_diffuse + L->direct_glossy + L->direct_transmission +
+ L->direct_subsurface + L->direct_scatter + L->emission;
+ L_indirect = L->indirect_diffuse + L->indirect_glossy + L->indirect_transmission +
+ L->indirect_subsurface + L->indirect_scatter;
+
+ if (!kernel_data.background.transparent)
+ L_direct += L->background;
+
+ L_sum = L_direct + L_indirect;
+ float sum = fabsf((L_sum).x) + fabsf((L_sum).y) + fabsf((L_sum).z);
+
+ /* Reject invalid value */
+ if (!isfinite_safe(sum)) {
+ kernel_assert(!"Non-finite sum in path_radiance_clamp_and_sum!");
+ L_sum = make_float3(0.0f, 0.0f, 0.0f);
+
+ L->direct_diffuse = make_float3(0.0f, 0.0f, 0.0f);
+ L->direct_glossy = make_float3(0.0f, 0.0f, 0.0f);
+ L->direct_transmission = make_float3(0.0f, 0.0f, 0.0f);
+ L->direct_subsurface = make_float3(0.0f, 0.0f, 0.0f);
+ L->direct_scatter = make_float3(0.0f, 0.0f, 0.0f);
+
+ L->indirect_diffuse = make_float3(0.0f, 0.0f, 0.0f);
+ L->indirect_glossy = make_float3(0.0f, 0.0f, 0.0f);
+ L->indirect_transmission = make_float3(0.0f, 0.0f, 0.0f);
+ L->indirect_subsurface = make_float3(0.0f, 0.0f, 0.0f);
+ L->indirect_scatter = make_float3(0.0f, 0.0f, 0.0f);
+
+ L->emission = make_float3(0.0f, 0.0f, 0.0f);
+ }
+
+ /* Clamp direct and indirect samples */
+# ifdef __CLAMP_SAMPLE__
+ else if (sum > clamp_direct || sum > clamp_indirect) {
+ float scale;
+
+ /* Direct */
+ float sum_direct = fabsf(L_direct.x) + fabsf(L_direct.y) + fabsf(L_direct.z);
+ if (sum_direct > clamp_direct) {
+ scale = clamp_direct / sum_direct;
+ L_direct *= scale;
+
+ L->direct_diffuse *= scale;
+ L->direct_glossy *= scale;
+ L->direct_transmission *= scale;
+ L->direct_subsurface *= scale;
+ L->direct_scatter *= scale;
+ L->emission *= scale;
+ L->background *= scale;
+ }
+
+ /* Indirect */
+ float sum_indirect = fabsf(L_indirect.x) + fabsf(L_indirect.y) + fabsf(L_indirect.z);
+ if (sum_indirect > clamp_indirect) {
+ scale = clamp_indirect / sum_indirect;
+ L_indirect *= scale;
+
+ L->indirect_diffuse *= scale;
+ L->indirect_glossy *= scale;
+ L->indirect_transmission *= scale;
+ L->indirect_subsurface *= scale;
+ L->indirect_scatter *= scale;
+ }
+
+ /* Sum again, after clamping */
+ L_sum = L_direct + L_indirect;
+ }
+# endif
+ }
+
+ /* No Light Passes */
+ else
+#endif
+ {
+ L_sum = L->emission;
+
+ /* Reject invalid value */
+ float sum = fabsf((L_sum).x) + fabsf((L_sum).y) + fabsf((L_sum).z);
+ if (!isfinite_safe(sum)) {
+ kernel_assert(!"Non-finite final sum in path_radiance_clamp_and_sum!");
+ L_sum = make_float3(0.0f, 0.0f, 0.0f);
+ }
+ }
+
+ /* Compute alpha. */
+ *alpha = 1.0f - L->transparent;
+
+ /* Add shadow catcher contributions. */
#ifdef __SHADOW_TRICKS__
- if(L->has_shadow_catcher) {
- path_radiance_sum_shadowcatcher(kg, L, &L_sum, alpha);
- }
-#endif /* __SHADOW_TRICKS__ */
+ if (L->has_shadow_catcher) {
+ path_radiance_sum_shadowcatcher(kg, L, &L_sum, alpha);
+ }
+#endif /* __SHADOW_TRICKS__ */
- return L_sum;
+ return L_sum;
}
-ccl_device_inline void path_radiance_split_denoising(KernelGlobals *kg, PathRadiance *L, float3 *noisy, float3 *clean)
+ccl_device_inline void path_radiance_split_denoising(KernelGlobals *kg,
+ PathRadiance *L,
+ float3 *noisy,
+ float3 *clean)
{
#ifdef __PASSES__
- kernel_assert(L->use_light_pass);
-
- *clean = L->emission + L->background;
- *noisy = L->direct_scatter + L->indirect_scatter;
-
-# define ADD_COMPONENT(flag, component) \
- if(kernel_data.film.denoising_flags & flag) \
- *clean += component; \
- else \
- *noisy += component;
-
- ADD_COMPONENT(DENOISING_CLEAN_DIFFUSE_DIR, L->direct_diffuse);
- ADD_COMPONENT(DENOISING_CLEAN_DIFFUSE_IND, L->indirect_diffuse);
- ADD_COMPONENT(DENOISING_CLEAN_GLOSSY_DIR, L->direct_glossy);
- ADD_COMPONENT(DENOISING_CLEAN_GLOSSY_IND, L->indirect_glossy);
- ADD_COMPONENT(DENOISING_CLEAN_TRANSMISSION_DIR, L->direct_transmission);
- ADD_COMPONENT(DENOISING_CLEAN_TRANSMISSION_IND, L->indirect_transmission);
- ADD_COMPONENT(DENOISING_CLEAN_SUBSURFACE_DIR, L->direct_subsurface);
- ADD_COMPONENT(DENOISING_CLEAN_SUBSURFACE_IND, L->indirect_subsurface);
+ kernel_assert(L->use_light_pass);
+
+ *clean = L->emission + L->background;
+ *noisy = L->direct_scatter + L->indirect_scatter;
+
+# define ADD_COMPONENT(flag, component) \
+ if (kernel_data.film.denoising_flags & flag) \
+ *clean += component; \
+ else \
+ *noisy += component;
+
+ ADD_COMPONENT(DENOISING_CLEAN_DIFFUSE_DIR, L->direct_diffuse);
+ ADD_COMPONENT(DENOISING_CLEAN_DIFFUSE_IND, L->indirect_diffuse);
+ ADD_COMPONENT(DENOISING_CLEAN_GLOSSY_DIR, L->direct_glossy);
+ ADD_COMPONENT(DENOISING_CLEAN_GLOSSY_IND, L->indirect_glossy);
+ ADD_COMPONENT(DENOISING_CLEAN_TRANSMISSION_DIR, L->direct_transmission);
+ ADD_COMPONENT(DENOISING_CLEAN_TRANSMISSION_IND, L->indirect_transmission);
+ ADD_COMPONENT(DENOISING_CLEAN_SUBSURFACE_DIR, L->direct_subsurface);
+ ADD_COMPONENT(DENOISING_CLEAN_SUBSURFACE_IND, L->indirect_subsurface);
# undef ADD_COMPONENT
#else
- *noisy = L->emission;
- *clean = make_float3(0.0f, 0.0f, 0.0f);
+ *noisy = L->emission;
+ *clean = make_float3(0.0f, 0.0f, 0.0f);
#endif
#ifdef __SHADOW_TRICKS__
- if(L->has_shadow_catcher) {
- *noisy += L->shadow_background_color;
- }
+ if (L->has_shadow_catcher) {
+ *noisy += L->shadow_background_color;
+ }
#endif
- *noisy = ensure_finite3(*noisy);
- *clean = ensure_finite3(*clean);
+ *noisy = ensure_finite3(*noisy);
+ *clean = ensure_finite3(*clean);
}
ccl_device_inline void path_radiance_accum_sample(PathRadiance *L, PathRadiance *L_sample)
{
#ifdef __SPLIT_KERNEL__
# define safe_float3_add(f, v) \
- do { \
- ccl_global float *p = (ccl_global float*)(&(f)); \
- atomic_add_and_fetch_float(p+0, (v).x); \
- atomic_add_and_fetch_float(p+1, (v).y); \
- atomic_add_and_fetch_float(p+2, (v).z); \
- } while(0)
-# define safe_float_add(f, v) \
- atomic_add_and_fetch_float(&(f), (v))
+ do { \
+ ccl_global float *p = (ccl_global float *)(&(f)); \
+ atomic_add_and_fetch_float(p + 0, (v).x); \
+ atomic_add_and_fetch_float(p + 1, (v).y); \
+ atomic_add_and_fetch_float(p + 2, (v).z); \
+ } while (0)
+# define safe_float_add(f, v) atomic_add_and_fetch_float(&(f), (v))
#else
# define safe_float3_add(f, v) (f) += (v)
# define safe_float_add(f, v) (f) += (v)
-#endif /* __SPLIT_KERNEL__ */
+#endif /* __SPLIT_KERNEL__ */
#ifdef __PASSES__
- safe_float3_add(L->direct_diffuse, L_sample->direct_diffuse);
- safe_float3_add(L->direct_glossy, L_sample->direct_glossy);
- safe_float3_add(L->direct_transmission, L_sample->direct_transmission);
- safe_float3_add(L->direct_subsurface, L_sample->direct_subsurface);
- safe_float3_add(L->direct_scatter, L_sample->direct_scatter);
-
- safe_float3_add(L->indirect_diffuse, L_sample->indirect_diffuse);
- safe_float3_add(L->indirect_glossy, L_sample->indirect_glossy);
- safe_float3_add(L->indirect_transmission, L_sample->indirect_transmission);
- safe_float3_add(L->indirect_subsurface, L_sample->indirect_subsurface);
- safe_float3_add(L->indirect_scatter, L_sample->indirect_scatter);
-
- safe_float3_add(L->background, L_sample->background);
- safe_float3_add(L->ao, L_sample->ao);
- safe_float3_add(L->shadow, L_sample->shadow);
- safe_float_add(L->mist, L_sample->mist);
-#endif /* __PASSES__ */
- safe_float3_add(L->emission, L_sample->emission);
+ safe_float3_add(L->direct_diffuse, L_sample->direct_diffuse);
+ safe_float3_add(L->direct_glossy, L_sample->direct_glossy);
+ safe_float3_add(L->direct_transmission, L_sample->direct_transmission);
+ safe_float3_add(L->direct_subsurface, L_sample->direct_subsurface);
+ safe_float3_add(L->direct_scatter, L_sample->direct_scatter);
+
+ safe_float3_add(L->indirect_diffuse, L_sample->indirect_diffuse);
+ safe_float3_add(L->indirect_glossy, L_sample->indirect_glossy);
+ safe_float3_add(L->indirect_transmission, L_sample->indirect_transmission);
+ safe_float3_add(L->indirect_subsurface, L_sample->indirect_subsurface);
+ safe_float3_add(L->indirect_scatter, L_sample->indirect_scatter);
+
+ safe_float3_add(L->background, L_sample->background);
+ safe_float3_add(L->ao, L_sample->ao);
+ safe_float3_add(L->shadow, L_sample->shadow);
+ safe_float_add(L->mist, L_sample->mist);
+#endif /* __PASSES__ */
+ safe_float3_add(L->emission, L_sample->emission);
#undef safe_float_add
#undef safe_float3_add
diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h
index 37c163f2538..10b71bc6bdf 100644
--- a/intern/cycles/kernel/kernel_bake.h
+++ b/intern/cycles/kernel/kernel_bake.h
@@ -18,191 +18,172 @@ CCL_NAMESPACE_BEGIN
#ifdef __BAKING__
-ccl_device_inline void compute_light_pass(KernelGlobals *kg,
- ShaderData *sd,
- PathRadiance *L,
- uint rng_hash,
- int pass_filter,
- int sample)
+ccl_device_inline void compute_light_pass(
+ KernelGlobals *kg, ShaderData *sd, PathRadiance *L, uint rng_hash, int pass_filter, int sample)
{
- kernel_assert(kernel_data.film.use_light_pass);
-
- PathRadiance L_sample;
- PathState state;
- Ray ray;
- float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
-
- /* emission and indirect shader data memory used by various functions */
- ShaderData emission_sd, indirect_sd;
-
- ray.P = sd->P + sd->Ng;
- ray.D = -sd->Ng;
- ray.t = FLT_MAX;
-#ifdef __CAMERA_MOTION__
- ray.time = 0.5f;
-#endif
-
- /* init radiance */
- path_radiance_init(&L_sample, kernel_data.film.use_light_pass);
-
- /* init path state */
- path_state_init(kg, &emission_sd, &state, rng_hash, sample, NULL);
-
- /* evaluate surface shader */
- shader_eval_surface(kg, sd, &state, state.flag);
-
- /* TODO, disable more closures we don't need besides transparent */
- shader_bsdf_disable_transparency(kg, sd);
-
-#ifdef __BRANCHED_PATH__
- if(!kernel_data.integrator.branched) {
- /* regular path tracer */
-#endif
-
- /* sample ambient occlusion */
- if(pass_filter & BAKE_FILTER_AO) {
- kernel_path_ao(kg, sd, &emission_sd, &L_sample, &state, throughput, shader_bsdf_alpha(kg, sd));
- }
-
- /* sample emission */
- if((pass_filter & BAKE_FILTER_EMISSION) && (sd->flag & SD_EMISSION)) {
- float3 emission = indirect_primitive_emission(kg, sd, 0.0f, state.flag, state.ray_pdf);
- path_radiance_accum_emission(&L_sample, &state, throughput, emission);
- }
-
- bool is_sss_sample = false;
-
-#ifdef __SUBSURFACE__
- /* sample subsurface scattering */
- if((pass_filter & BAKE_FILTER_SUBSURFACE) && (sd->flag & SD_BSSRDF)) {
- /* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */
- SubsurfaceIndirectRays ss_indirect;
- kernel_path_subsurface_init_indirect(&ss_indirect);
- if(kernel_path_subsurface_scatter(kg,
- sd,
- &emission_sd,
- &L_sample,
- &state,
- &ray,
- &throughput,
- &ss_indirect))
- {
- while(ss_indirect.num_rays) {
- kernel_path_subsurface_setup_indirect(kg,
- &ss_indirect,
- &state,
- &ray,
- &L_sample,
- &throughput);
- kernel_path_indirect(kg,
- &indirect_sd,
- &emission_sd,
- &ray,
- throughput,
- &state,
- &L_sample);
- }
- is_sss_sample = true;
- }
- }
-#endif
-
- /* sample light and BSDF */
- if(!is_sss_sample && (pass_filter & (BAKE_FILTER_DIRECT | BAKE_FILTER_INDIRECT))) {
- kernel_path_surface_connect_light(kg, sd, &emission_sd, throughput, &state, &L_sample);
-
- if(kernel_path_surface_bounce(kg, sd, &throughput, &state, &L_sample.state, &ray)) {
-#ifdef __LAMP_MIS__
- state.ray_t = 0.0f;
-#endif
- /* compute indirect light */
- kernel_path_indirect(kg, &indirect_sd, &emission_sd, &ray, throughput, &state, &L_sample);
-
- /* sum and reset indirect light pass variables for the next samples */
- path_radiance_sum_indirect(&L_sample);
- path_radiance_reset_indirect(&L_sample);
- }
- }
-#ifdef __BRANCHED_PATH__
- }
- else {
- /* branched path tracer */
-
- /* sample ambient occlusion */
- if(pass_filter & BAKE_FILTER_AO) {
- kernel_branched_path_ao(kg, sd, &emission_sd, &L_sample, &state, throughput);
- }
-
- /* sample emission */
- if((pass_filter & BAKE_FILTER_EMISSION) && (sd->flag & SD_EMISSION)) {
- float3 emission = indirect_primitive_emission(kg, sd, 0.0f, state.flag, state.ray_pdf);
- path_radiance_accum_emission(&L_sample, &state, throughput, emission);
- }
-
-#ifdef __SUBSURFACE__
- /* sample subsurface scattering */
- if((pass_filter & BAKE_FILTER_SUBSURFACE) && (sd->flag & SD_BSSRDF)) {
- /* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */
- kernel_branched_path_subsurface_scatter(kg, sd, &indirect_sd,
- &emission_sd, &L_sample, &state, &ray, throughput);
- }
-#endif
-
- /* sample light and BSDF */
- if(pass_filter & (BAKE_FILTER_DIRECT | BAKE_FILTER_INDIRECT)) {
-#if defined(__EMISSION__)
- /* direct light */
- if(kernel_data.integrator.use_direct_light) {
- int all = kernel_data.integrator.sample_all_lights_direct;
- kernel_branched_path_surface_connect_light(kg,
- sd, &emission_sd, &state, throughput, 1.0f, &L_sample, all);
- }
-#endif
-
- /* indirect light */
- kernel_branched_path_surface_indirect_light(kg,
- sd, &indirect_sd, &emission_sd, throughput, 1.0f, &state, &L_sample);
- }
- }
-#endif
-
- /* accumulate into master L */
- path_radiance_accum_sample(L, &L_sample);
+ kernel_assert(kernel_data.film.use_light_pass);
+
+ PathRadiance L_sample;
+ PathState state;
+ Ray ray;
+ float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
+
+ /* emission and indirect shader data memory used by various functions */
+ ShaderData emission_sd, indirect_sd;
+
+ ray.P = sd->P + sd->Ng;
+ ray.D = -sd->Ng;
+ ray.t = FLT_MAX;
+# ifdef __CAMERA_MOTION__
+ ray.time = 0.5f;
+# endif
+
+ /* init radiance */
+ path_radiance_init(&L_sample, kernel_data.film.use_light_pass);
+
+ /* init path state */
+ path_state_init(kg, &emission_sd, &state, rng_hash, sample, NULL);
+
+ /* evaluate surface shader */
+ shader_eval_surface(kg, sd, &state, state.flag);
+
+ /* TODO, disable more closures we don't need besides transparent */
+ shader_bsdf_disable_transparency(kg, sd);
+
+# ifdef __BRANCHED_PATH__
+ if (!kernel_data.integrator.branched) {
+ /* regular path tracer */
+# endif
+
+ /* sample ambient occlusion */
+ if (pass_filter & BAKE_FILTER_AO) {
+ kernel_path_ao(
+ kg, sd, &emission_sd, &L_sample, &state, throughput, shader_bsdf_alpha(kg, sd));
+ }
+
+ /* sample emission */
+ if ((pass_filter & BAKE_FILTER_EMISSION) && (sd->flag & SD_EMISSION)) {
+ float3 emission = indirect_primitive_emission(kg, sd, 0.0f, state.flag, state.ray_pdf);
+ path_radiance_accum_emission(&L_sample, &state, throughput, emission);
+ }
+
+ bool is_sss_sample = false;
+
+# ifdef __SUBSURFACE__
+ /* sample subsurface scattering */
+ if ((pass_filter & BAKE_FILTER_SUBSURFACE) && (sd->flag & SD_BSSRDF)) {
+ /* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */
+ SubsurfaceIndirectRays ss_indirect;
+ kernel_path_subsurface_init_indirect(&ss_indirect);
+ if (kernel_path_subsurface_scatter(
+ kg, sd, &emission_sd, &L_sample, &state, &ray, &throughput, &ss_indirect)) {
+ while (ss_indirect.num_rays) {
+ kernel_path_subsurface_setup_indirect(
+ kg, &ss_indirect, &state, &ray, &L_sample, &throughput);
+ kernel_path_indirect(
+ kg, &indirect_sd, &emission_sd, &ray, throughput, &state, &L_sample);
+ }
+ is_sss_sample = true;
+ }
+ }
+# endif
+
+ /* sample light and BSDF */
+ if (!is_sss_sample && (pass_filter & (BAKE_FILTER_DIRECT | BAKE_FILTER_INDIRECT))) {
+ kernel_path_surface_connect_light(kg, sd, &emission_sd, throughput, &state, &L_sample);
+
+ if (kernel_path_surface_bounce(kg, sd, &throughput, &state, &L_sample.state, &ray)) {
+# ifdef __LAMP_MIS__
+ state.ray_t = 0.0f;
+# endif
+ /* compute indirect light */
+ kernel_path_indirect(kg, &indirect_sd, &emission_sd, &ray, throughput, &state, &L_sample);
+
+ /* sum and reset indirect light pass variables for the next samples */
+ path_radiance_sum_indirect(&L_sample);
+ path_radiance_reset_indirect(&L_sample);
+ }
+ }
+# ifdef __BRANCHED_PATH__
+ }
+ else {
+ /* branched path tracer */
+
+ /* sample ambient occlusion */
+ if (pass_filter & BAKE_FILTER_AO) {
+ kernel_branched_path_ao(kg, sd, &emission_sd, &L_sample, &state, throughput);
+ }
+
+ /* sample emission */
+ if ((pass_filter & BAKE_FILTER_EMISSION) && (sd->flag & SD_EMISSION)) {
+ float3 emission = indirect_primitive_emission(kg, sd, 0.0f, state.flag, state.ray_pdf);
+ path_radiance_accum_emission(&L_sample, &state, throughput, emission);
+ }
+
+# ifdef __SUBSURFACE__
+ /* sample subsurface scattering */
+ if ((pass_filter & BAKE_FILTER_SUBSURFACE) && (sd->flag & SD_BSSRDF)) {
+ /* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */
+ kernel_branched_path_subsurface_scatter(
+ kg, sd, &indirect_sd, &emission_sd, &L_sample, &state, &ray, throughput);
+ }
+# endif
+
+ /* sample light and BSDF */
+ if (pass_filter & (BAKE_FILTER_DIRECT | BAKE_FILTER_INDIRECT)) {
+# if defined(__EMISSION__)
+ /* direct light */
+ if (kernel_data.integrator.use_direct_light) {
+ int all = kernel_data.integrator.sample_all_lights_direct;
+ kernel_branched_path_surface_connect_light(
+ kg, sd, &emission_sd, &state, throughput, 1.0f, &L_sample, all);
+ }
+# endif
+
+ /* indirect light */
+ kernel_branched_path_surface_indirect_light(
+ kg, sd, &indirect_sd, &emission_sd, throughput, 1.0f, &state, &L_sample);
+ }
+ }
+# endif
+
+ /* accumulate into master L */
+ path_radiance_accum_sample(L, &L_sample);
}
/* this helps with AA but it's not the real solution as it does not AA the geometry
* but it's better than nothing, thus committed */
ccl_device_inline float bake_clamp_mirror_repeat(float u, float max)
{
- /* use mirror repeat (like opengl texture) so that if the barycentric
- * coordinate goes past the end of the triangle it is not always clamped
- * to the same value, gives ugly patterns */
- u /= max;
- float fu = floorf(u);
- u = u - fu;
-
- return ((((int)fu) & 1)? 1.0f - u: u) * max;
+ /* use mirror repeat (like opengl texture) so that if the barycentric
+ * coordinate goes past the end of the triangle it is not always clamped
+ * to the same value, gives ugly patterns */
+ u /= max;
+ float fu = floorf(u);
+ u = u - fu;
+
+ return ((((int)fu) & 1) ? 1.0f - u : u) * max;
}
ccl_device_inline float3 kernel_bake_shader_bsdf(KernelGlobals *kg,
ShaderData *sd,
const ShaderEvalType type)
{
- switch(type) {
- case SHADER_EVAL_DIFFUSE:
- return shader_bsdf_diffuse(kg, sd);
- case SHADER_EVAL_GLOSSY:
- return shader_bsdf_glossy(kg, sd);
- case SHADER_EVAL_TRANSMISSION:
- return shader_bsdf_transmission(kg, sd);
-#ifdef __SUBSURFACE__
- case SHADER_EVAL_SUBSURFACE:
- return shader_bsdf_subsurface(kg, sd);
-#endif
- default:
- kernel_assert(!"Unknown bake type passed to BSDF evaluate");
- return make_float3(0.0f, 0.0f, 0.0f);
- }
+ switch (type) {
+ case SHADER_EVAL_DIFFUSE:
+ return shader_bsdf_diffuse(kg, sd);
+ case SHADER_EVAL_GLOSSY:
+ return shader_bsdf_glossy(kg, sd);
+ case SHADER_EVAL_TRANSMISSION:
+ return shader_bsdf_transmission(kg, sd);
+# ifdef __SUBSURFACE__
+ case SHADER_EVAL_SUBSURFACE:
+ return shader_bsdf_subsurface(kg, sd);
+# endif
+ default:
+ kernel_assert(!"Unknown bake type passed to BSDF evaluate");
+ return make_float3(0.0f, 0.0f, 0.0f);
+ }
}
ccl_device float3 kernel_bake_evaluate_direct_indirect(KernelGlobals *kg,
@@ -213,316 +194,301 @@ ccl_device float3 kernel_bake_evaluate_direct_indirect(KernelGlobals *kg,
const ShaderEvalType type,
const int pass_filter)
{
- float3 color;
- const bool is_color = (pass_filter & BAKE_FILTER_COLOR) != 0;
- const bool is_direct = (pass_filter & BAKE_FILTER_DIRECT) != 0;
- const bool is_indirect = (pass_filter & BAKE_FILTER_INDIRECT) != 0;
- float3 out = make_float3(0.0f, 0.0f, 0.0f);
-
- if(is_color) {
- if(is_direct || is_indirect) {
- /* Leave direct and diffuse channel colored. */
- color = make_float3(1.0f, 1.0f, 1.0f);
- }
- else {
- /* surface color of the pass only */
- shader_eval_surface(kg, sd, state, 0);
- return kernel_bake_shader_bsdf(kg, sd, type);
- }
- }
- else {
- shader_eval_surface(kg, sd, state, 0);
- color = kernel_bake_shader_bsdf(kg, sd, type);
- }
-
- if(is_direct) {
- out += safe_divide_even_color(direct, color);
- }
-
- if(is_indirect) {
- out += safe_divide_even_color(indirect, color);
- }
-
- return out;
+ float3 color;
+ const bool is_color = (pass_filter & BAKE_FILTER_COLOR) != 0;
+ const bool is_direct = (pass_filter & BAKE_FILTER_DIRECT) != 0;
+ const bool is_indirect = (pass_filter & BAKE_FILTER_INDIRECT) != 0;
+ float3 out = make_float3(0.0f, 0.0f, 0.0f);
+
+ if (is_color) {
+ if (is_direct || is_indirect) {
+ /* Leave direct and diffuse channel colored. */
+ color = make_float3(1.0f, 1.0f, 1.0f);
+ }
+ else {
+ /* surface color of the pass only */
+ shader_eval_surface(kg, sd, state, 0);
+ return kernel_bake_shader_bsdf(kg, sd, type);
+ }
+ }
+ else {
+ shader_eval_surface(kg, sd, state, 0);
+ color = kernel_bake_shader_bsdf(kg, sd, type);
+ }
+
+ if (is_direct) {
+ out += safe_divide_even_color(direct, color);
+ }
+
+ if (is_indirect) {
+ out += safe_divide_even_color(indirect, color);
+ }
+
+ return out;
}
-ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input, ccl_global float4 *output,
- ShaderEvalType type, int pass_filter, int i, int offset, int sample)
+ccl_device void kernel_bake_evaluate(KernelGlobals *kg,
+ ccl_global uint4 *input,
+ ccl_global float4 *output,
+ ShaderEvalType type,
+ int pass_filter,
+ int i,
+ int offset,
+ int sample)
{
- ShaderData sd;
- PathState state = {0};
- uint4 in = input[i * 2];
- uint4 diff = input[i * 2 + 1];
-
- float3 out = make_float3(0.0f, 0.0f, 0.0f);
-
- int object = in.x;
- int prim = in.y;
-
- if(prim == -1)
- return;
-
- float u = __uint_as_float(in.z);
- float v = __uint_as_float(in.w);
-
- float dudx = __uint_as_float(diff.x);
- float dudy = __uint_as_float(diff.y);
- float dvdx = __uint_as_float(diff.z);
- float dvdy = __uint_as_float(diff.w);
-
- int num_samples = kernel_data.integrator.aa_samples;
-
- /* random number generator */
- uint rng_hash = cmj_hash(offset + i, kernel_data.integrator.seed);
-
- float filter_x, filter_y;
- if(sample == 0) {
- filter_x = filter_y = 0.5f;
- }
- else {
- path_rng_2D(kg, rng_hash, sample, num_samples, PRNG_FILTER_U, &filter_x, &filter_y);
- }
-
- /* subpixel u/v offset */
- if(sample > 0) {
- u = bake_clamp_mirror_repeat(u + dudx*(filter_x - 0.5f) + dudy*(filter_y - 0.5f), 1.0f);
- v = bake_clamp_mirror_repeat(v + dvdx*(filter_x - 0.5f) + dvdy*(filter_y - 0.5f), 1.0f - u);
- }
-
- /* triangle */
- int shader;
- float3 P, Ng;
-
- triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader);
-
- /* light passes */
- PathRadiance L;
- path_radiance_init(&L, kernel_data.film.use_light_pass);
-
- shader_setup_from_sample(kg, &sd,
- P, Ng, Ng,
- shader, object, prim,
- u, v, 1.0f, 0.5f,
- !(kernel_tex_fetch(__object_flag, object) & SD_OBJECT_TRANSFORM_APPLIED),
- LAMP_NONE);
- sd.I = sd.N;
-
- /* update differentials */
- sd.dP.dx = sd.dPdu * dudx + sd.dPdv * dvdx;
- sd.dP.dy = sd.dPdu * dudy + sd.dPdv * dvdy;
- sd.du.dx = dudx;
- sd.du.dy = dudy;
- sd.dv.dx = dvdx;
- sd.dv.dy = dvdy;
-
- /* set RNG state for shaders that use sampling */
- state.rng_hash = rng_hash;
- state.rng_offset = 0;
- state.sample = sample;
- state.num_samples = num_samples;
- state.min_ray_pdf = FLT_MAX;
-
- /* light passes if we need more than color */
- if(pass_filter & ~BAKE_FILTER_COLOR)
- compute_light_pass(kg, &sd, &L, rng_hash, pass_filter, sample);
-
- switch(type) {
- /* data passes */
- case SHADER_EVAL_NORMAL:
- case SHADER_EVAL_ROUGHNESS:
- case SHADER_EVAL_EMISSION:
- {
- if(type != SHADER_EVAL_NORMAL || (sd.flag & SD_HAS_BUMP)) {
- int path_flag = (type == SHADER_EVAL_EMISSION) ? PATH_RAY_EMISSION : 0;
- shader_eval_surface(kg, &sd, &state, path_flag);
- }
-
- if(type == SHADER_EVAL_NORMAL) {
- float3 N = sd.N;
- if(sd.flag & SD_HAS_BUMP) {
- N = shader_bsdf_average_normal(kg, &sd);
- }
-
- /* encoding: normal = (2 * color) - 1 */
- out = N * 0.5f + make_float3(0.5f, 0.5f, 0.5f);
- }
- else if(type == SHADER_EVAL_ROUGHNESS) {
- float roughness = shader_bsdf_average_roughness(&sd);
- out = make_float3(roughness, roughness, roughness);
- }
- else {
- out = shader_emissive_eval(&sd);
- }
- break;
- }
- case SHADER_EVAL_UV:
- {
- out = primitive_uv(kg, &sd);
- break;
- }
-#ifdef __PASSES__
- /* light passes */
- case SHADER_EVAL_AO:
- {
- out = L.ao;
- break;
- }
- case SHADER_EVAL_COMBINED:
- {
- if((pass_filter & BAKE_FILTER_COMBINED) == BAKE_FILTER_COMBINED) {
- float alpha;
- out = path_radiance_clamp_and_sum(kg, &L, &alpha);
- break;
- }
-
- if((pass_filter & BAKE_FILTER_DIFFUSE_DIRECT) == BAKE_FILTER_DIFFUSE_DIRECT)
- out += L.direct_diffuse;
- if((pass_filter & BAKE_FILTER_DIFFUSE_INDIRECT) == BAKE_FILTER_DIFFUSE_INDIRECT)
- out += L.indirect_diffuse;
-
- if((pass_filter & BAKE_FILTER_GLOSSY_DIRECT) == BAKE_FILTER_GLOSSY_DIRECT)
- out += L.direct_glossy;
- if((pass_filter & BAKE_FILTER_GLOSSY_INDIRECT) == BAKE_FILTER_GLOSSY_INDIRECT)
- out += L.indirect_glossy;
-
- if((pass_filter & BAKE_FILTER_TRANSMISSION_DIRECT) == BAKE_FILTER_TRANSMISSION_DIRECT)
- out += L.direct_transmission;
- if((pass_filter & BAKE_FILTER_TRANSMISSION_INDIRECT) == BAKE_FILTER_TRANSMISSION_INDIRECT)
- out += L.indirect_transmission;
-
- if((pass_filter & BAKE_FILTER_SUBSURFACE_DIRECT) == BAKE_FILTER_SUBSURFACE_DIRECT)
- out += L.direct_subsurface;
- if((pass_filter & BAKE_FILTER_SUBSURFACE_INDIRECT) == BAKE_FILTER_SUBSURFACE_INDIRECT)
- out += L.indirect_subsurface;
-
- if((pass_filter & BAKE_FILTER_EMISSION) != 0)
- out += L.emission;
-
- break;
- }
- case SHADER_EVAL_SHADOW:
- {
- out = make_float3(L.shadow.x, L.shadow.y, L.shadow.z);
- break;
- }
- case SHADER_EVAL_DIFFUSE:
- {
- out = kernel_bake_evaluate_direct_indirect(kg,
- &sd,
- &state,
- L.direct_diffuse,
- L.indirect_diffuse,
- type,
- pass_filter);
- break;
- }
- case SHADER_EVAL_GLOSSY:
- {
- out = kernel_bake_evaluate_direct_indirect(kg,
- &sd,
- &state,
- L.direct_glossy,
- L.indirect_glossy,
- type,
- pass_filter);
- break;
- }
- case SHADER_EVAL_TRANSMISSION:
- {
- out = kernel_bake_evaluate_direct_indirect(kg,
- &sd,
- &state,
- L.direct_transmission,
- L.indirect_transmission,
- type,
- pass_filter);
- break;
- }
- case SHADER_EVAL_SUBSURFACE:
- {
-#ifdef __SUBSURFACE__
- out = kernel_bake_evaluate_direct_indirect(kg,
- &sd,
- &state,
- L.direct_subsurface,
- L.indirect_subsurface,
- type,
- pass_filter);
-#endif
- break;
- }
-#endif
-
- /* extra */
- case SHADER_EVAL_ENVIRONMENT:
- {
- /* setup ray */
- Ray ray;
-
- ray.P = make_float3(0.0f, 0.0f, 0.0f);
- ray.D = normalize(P);
- ray.t = 0.0f;
-#ifdef __CAMERA_MOTION__
- ray.time = 0.5f;
-#endif
-
-#ifdef __RAY_DIFFERENTIALS__
- ray.dD = differential3_zero();
- ray.dP = differential3_zero();
-#endif
-
- /* setup shader data */
- shader_setup_from_background(kg, &sd, &ray);
-
- /* evaluate */
- int path_flag = 0; /* we can't know which type of BSDF this is for */
- shader_eval_surface(kg, &sd, &state, path_flag | PATH_RAY_EMISSION);
- out = shader_background_eval(&sd);
- break;
- }
- default:
- {
- /* no real shader, returning the position of the verts for debugging */
- out = normalize(P);
- break;
- }
- }
-
- /* write output */
- const float output_fac = 1.0f/num_samples;
- const float4 scaled_result = make_float4(out.x, out.y, out.z, 1.0f) * output_fac;
-
- output[i] = (sample == 0)? scaled_result: output[i] + scaled_result;
+ ShaderData sd;
+ PathState state = {0};
+ uint4 in = input[i * 2];
+ uint4 diff = input[i * 2 + 1];
+
+ float3 out = make_float3(0.0f, 0.0f, 0.0f);
+
+ int object = in.x;
+ int prim = in.y;
+
+ if (prim == -1)
+ return;
+
+ float u = __uint_as_float(in.z);
+ float v = __uint_as_float(in.w);
+
+ float dudx = __uint_as_float(diff.x);
+ float dudy = __uint_as_float(diff.y);
+ float dvdx = __uint_as_float(diff.z);
+ float dvdy = __uint_as_float(diff.w);
+
+ int num_samples = kernel_data.integrator.aa_samples;
+
+ /* random number generator */
+ uint rng_hash = cmj_hash(offset + i, kernel_data.integrator.seed);
+
+ float filter_x, filter_y;
+ if (sample == 0) {
+ filter_x = filter_y = 0.5f;
+ }
+ else {
+ path_rng_2D(kg, rng_hash, sample, num_samples, PRNG_FILTER_U, &filter_x, &filter_y);
+ }
+
+ /* subpixel u/v offset */
+ if (sample > 0) {
+ u = bake_clamp_mirror_repeat(u + dudx * (filter_x - 0.5f) + dudy * (filter_y - 0.5f), 1.0f);
+ v = bake_clamp_mirror_repeat(v + dvdx * (filter_x - 0.5f) + dvdy * (filter_y - 0.5f),
+ 1.0f - u);
+ }
+
+ /* triangle */
+ int shader;
+ float3 P, Ng;
+
+ triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader);
+
+ /* light passes */
+ PathRadiance L;
+ path_radiance_init(&L, kernel_data.film.use_light_pass);
+
+ shader_setup_from_sample(
+ kg,
+ &sd,
+ P,
+ Ng,
+ Ng,
+ shader,
+ object,
+ prim,
+ u,
+ v,
+ 1.0f,
+ 0.5f,
+ !(kernel_tex_fetch(__object_flag, object) & SD_OBJECT_TRANSFORM_APPLIED),
+ LAMP_NONE);
+ sd.I = sd.N;
+
+ /* update differentials */
+ sd.dP.dx = sd.dPdu * dudx + sd.dPdv * dvdx;
+ sd.dP.dy = sd.dPdu * dudy + sd.dPdv * dvdy;
+ sd.du.dx = dudx;
+ sd.du.dy = dudy;
+ sd.dv.dx = dvdx;
+ sd.dv.dy = dvdy;
+
+ /* set RNG state for shaders that use sampling */
+ state.rng_hash = rng_hash;
+ state.rng_offset = 0;
+ state.sample = sample;
+ state.num_samples = num_samples;
+ state.min_ray_pdf = FLT_MAX;
+
+ /* light passes if we need more than color */
+ if (pass_filter & ~BAKE_FILTER_COLOR)
+ compute_light_pass(kg, &sd, &L, rng_hash, pass_filter, sample);
+
+ switch (type) {
+ /* data passes */
+ case SHADER_EVAL_NORMAL:
+ case SHADER_EVAL_ROUGHNESS:
+ case SHADER_EVAL_EMISSION: {
+ if (type != SHADER_EVAL_NORMAL || (sd.flag & SD_HAS_BUMP)) {
+ int path_flag = (type == SHADER_EVAL_EMISSION) ? PATH_RAY_EMISSION : 0;
+ shader_eval_surface(kg, &sd, &state, path_flag);
+ }
+
+ if (type == SHADER_EVAL_NORMAL) {
+ float3 N = sd.N;
+ if (sd.flag & SD_HAS_BUMP) {
+ N = shader_bsdf_average_normal(kg, &sd);
+ }
+
+ /* encoding: normal = (2 * color) - 1 */
+ out = N * 0.5f + make_float3(0.5f, 0.5f, 0.5f);
+ }
+ else if (type == SHADER_EVAL_ROUGHNESS) {
+ float roughness = shader_bsdf_average_roughness(&sd);
+ out = make_float3(roughness, roughness, roughness);
+ }
+ else {
+ out = shader_emissive_eval(&sd);
+ }
+ break;
+ }
+ case SHADER_EVAL_UV: {
+ out = primitive_uv(kg, &sd);
+ break;
+ }
+# ifdef __PASSES__
+ /* light passes */
+ case SHADER_EVAL_AO: {
+ out = L.ao;
+ break;
+ }
+ case SHADER_EVAL_COMBINED: {
+ if ((pass_filter & BAKE_FILTER_COMBINED) == BAKE_FILTER_COMBINED) {
+ float alpha;
+ out = path_radiance_clamp_and_sum(kg, &L, &alpha);
+ break;
+ }
+
+ if ((pass_filter & BAKE_FILTER_DIFFUSE_DIRECT) == BAKE_FILTER_DIFFUSE_DIRECT)
+ out += L.direct_diffuse;
+ if ((pass_filter & BAKE_FILTER_DIFFUSE_INDIRECT) == BAKE_FILTER_DIFFUSE_INDIRECT)
+ out += L.indirect_diffuse;
+
+ if ((pass_filter & BAKE_FILTER_GLOSSY_DIRECT) == BAKE_FILTER_GLOSSY_DIRECT)
+ out += L.direct_glossy;
+ if ((pass_filter & BAKE_FILTER_GLOSSY_INDIRECT) == BAKE_FILTER_GLOSSY_INDIRECT)
+ out += L.indirect_glossy;
+
+ if ((pass_filter & BAKE_FILTER_TRANSMISSION_DIRECT) == BAKE_FILTER_TRANSMISSION_DIRECT)
+ out += L.direct_transmission;
+ if ((pass_filter & BAKE_FILTER_TRANSMISSION_INDIRECT) == BAKE_FILTER_TRANSMISSION_INDIRECT)
+ out += L.indirect_transmission;
+
+ if ((pass_filter & BAKE_FILTER_SUBSURFACE_DIRECT) == BAKE_FILTER_SUBSURFACE_DIRECT)
+ out += L.direct_subsurface;
+ if ((pass_filter & BAKE_FILTER_SUBSURFACE_INDIRECT) == BAKE_FILTER_SUBSURFACE_INDIRECT)
+ out += L.indirect_subsurface;
+
+ if ((pass_filter & BAKE_FILTER_EMISSION) != 0)
+ out += L.emission;
+
+ break;
+ }
+ case SHADER_EVAL_SHADOW: {
+ out = make_float3(L.shadow.x, L.shadow.y, L.shadow.z);
+ break;
+ }
+ case SHADER_EVAL_DIFFUSE: {
+ out = kernel_bake_evaluate_direct_indirect(
+ kg, &sd, &state, L.direct_diffuse, L.indirect_diffuse, type, pass_filter);
+ break;
+ }
+ case SHADER_EVAL_GLOSSY: {
+ out = kernel_bake_evaluate_direct_indirect(
+ kg, &sd, &state, L.direct_glossy, L.indirect_glossy, type, pass_filter);
+ break;
+ }
+ case SHADER_EVAL_TRANSMISSION: {
+ out = kernel_bake_evaluate_direct_indirect(
+ kg, &sd, &state, L.direct_transmission, L.indirect_transmission, type, pass_filter);
+ break;
+ }
+ case SHADER_EVAL_SUBSURFACE: {
+# ifdef __SUBSURFACE__
+ out = kernel_bake_evaluate_direct_indirect(
+ kg, &sd, &state, L.direct_subsurface, L.indirect_subsurface, type, pass_filter);
+# endif
+ break;
+ }
+# endif
+
+ /* extra */
+ case SHADER_EVAL_ENVIRONMENT: {
+ /* setup ray */
+ Ray ray;
+
+ ray.P = make_float3(0.0f, 0.0f, 0.0f);
+ ray.D = normalize(P);
+ ray.t = 0.0f;
+# ifdef __CAMERA_MOTION__
+ ray.time = 0.5f;
+# endif
+
+# ifdef __RAY_DIFFERENTIALS__
+ ray.dD = differential3_zero();
+ ray.dP = differential3_zero();
+# endif
+
+ /* setup shader data */
+ shader_setup_from_background(kg, &sd, &ray);
+
+ /* evaluate */
+ int path_flag = 0; /* we can't know which type of BSDF this is for */
+ shader_eval_surface(kg, &sd, &state, path_flag | PATH_RAY_EMISSION);
+ out = shader_background_eval(&sd);
+ break;
+ }
+ default: {
+ /* no real shader, returning the position of the verts for debugging */
+ out = normalize(P);
+ break;
+ }
+ }
+
+ /* write output */
+ const float output_fac = 1.0f / num_samples;
+ const float4 scaled_result = make_float4(out.x, out.y, out.z, 1.0f) * output_fac;
+
+ output[i] = (sample == 0) ? scaled_result : output[i] + scaled_result;
}
-#endif /* __BAKING__ */
+#endif /* __BAKING__ */
ccl_device void kernel_displace_evaluate(KernelGlobals *kg,
ccl_global uint4 *input,
ccl_global float4 *output,
int i)
{
- ShaderData sd;
- PathState state = {0};
- uint4 in = input[i];
+ ShaderData sd;
+ PathState state = {0};
+ uint4 in = input[i];
- /* setup shader data */
- int object = in.x;
- int prim = in.y;
- float u = __uint_as_float(in.z);
- float v = __uint_as_float(in.w);
+ /* setup shader data */
+ int object = in.x;
+ int prim = in.y;
+ float u = __uint_as_float(in.z);
+ float v = __uint_as_float(in.w);
- shader_setup_from_displace(kg, &sd, object, prim, u, v);
+ shader_setup_from_displace(kg, &sd, object, prim, u, v);
- /* evaluate */
- float3 P = sd.P;
- shader_eval_displacement(kg, &sd, &state);
- float3 D = sd.P - P;
+ /* evaluate */
+ float3 P = sd.P;
+ shader_eval_displacement(kg, &sd, &state);
+ float3 D = sd.P - P;
- object_inverse_dir_transform(kg, &sd, &D);
+ object_inverse_dir_transform(kg, &sd, &D);
- /* write output */
- output[i] += make_float4(D.x, D.y, D.z, 0.0f);
+ /* write output */
+ output[i] += make_float4(D.x, D.y, D.z, 0.0f);
}
ccl_device void kernel_background_evaluate(KernelGlobals *kg,
@@ -530,37 +496,37 @@ ccl_device void kernel_background_evaluate(KernelGlobals *kg,
ccl_global float4 *output,
int i)
{
- ShaderData sd;
- PathState state = {0};
- uint4 in = input[i];
-
- /* setup ray */
- Ray ray;
- float u = __uint_as_float(in.x);
- float v = __uint_as_float(in.y);
-
- ray.P = make_float3(0.0f, 0.0f, 0.0f);
- ray.D = equirectangular_to_direction(u, v);
- ray.t = 0.0f;
+ ShaderData sd;
+ PathState state = {0};
+ uint4 in = input[i];
+
+ /* setup ray */
+ Ray ray;
+ float u = __uint_as_float(in.x);
+ float v = __uint_as_float(in.y);
+
+ ray.P = make_float3(0.0f, 0.0f, 0.0f);
+ ray.D = equirectangular_to_direction(u, v);
+ ray.t = 0.0f;
#ifdef __CAMERA_MOTION__
- ray.time = 0.5f;
+ ray.time = 0.5f;
#endif
#ifdef __RAY_DIFFERENTIALS__
- ray.dD = differential3_zero();
- ray.dP = differential3_zero();
+ ray.dD = differential3_zero();
+ ray.dP = differential3_zero();
#endif
- /* setup shader data */
- shader_setup_from_background(kg, &sd, &ray);
+ /* setup shader data */
+ shader_setup_from_background(kg, &sd, &ray);
- /* evaluate */
- int path_flag = 0; /* we can't know which type of BSDF this is for */
- shader_eval_surface(kg, &sd, &state, path_flag | PATH_RAY_EMISSION);
- float3 color = shader_background_eval(&sd);
+ /* evaluate */
+ int path_flag = 0; /* we can't know which type of BSDF this is for */
+ shader_eval_surface(kg, &sd, &state, path_flag | PATH_RAY_EMISSION);
+ float3 color = shader_background_eval(&sd);
- /* write output */
- output[i] += make_float4(color.x, color.y, color.z, 0.0f);
+ /* write output */
+ output[i] += make_float4(color.x, color.y, color.z, 0.0f);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_camera.h b/intern/cycles/kernel/kernel_camera.h
index b73ad47dad3..1085930c33a 100644
--- a/intern/cycles/kernel/kernel_camera.h
+++ b/intern/cycles/kernel/kernel_camera.h
@@ -20,209 +20,217 @@ CCL_NAMESPACE_BEGIN
ccl_device float2 camera_sample_aperture(ccl_constant KernelCamera *cam, float u, float v)
{
- float blades = cam->blades;
- float2 bokeh;
-
- if(blades == 0.0f) {
- /* sample disk */
- bokeh = concentric_sample_disk(u, v);
- }
- else {
- /* sample polygon */
- float rotation = cam->bladesrotation;
- bokeh = regular_polygon_sample(blades, rotation, u, v);
- }
-
- /* anamorphic lens bokeh */
- bokeh.x *= cam->inv_aperture_ratio;
-
- return bokeh;
+ float blades = cam->blades;
+ float2 bokeh;
+
+ if (blades == 0.0f) {
+ /* sample disk */
+ bokeh = concentric_sample_disk(u, v);
+ }
+ else {
+ /* sample polygon */
+ float rotation = cam->bladesrotation;
+ bokeh = regular_polygon_sample(blades, rotation, u, v);
+ }
+
+ /* anamorphic lens bokeh */
+ bokeh.x *= cam->inv_aperture_ratio;
+
+ return bokeh;
}
-ccl_device void camera_sample_perspective(KernelGlobals *kg, float raster_x, float raster_y, float lens_u, float lens_v, ccl_addr_space Ray *ray)
+ccl_device void camera_sample_perspective(KernelGlobals *kg,
+ float raster_x,
+ float raster_y,
+ float lens_u,
+ float lens_v,
+ ccl_addr_space Ray *ray)
{
- /* create ray form raster position */
- ProjectionTransform rastertocamera = kernel_data.cam.rastertocamera;
- float3 raster = make_float3(raster_x, raster_y, 0.0f);
- float3 Pcamera = transform_perspective(&rastertocamera, raster);
+ /* create ray form raster position */
+ ProjectionTransform rastertocamera = kernel_data.cam.rastertocamera;
+ float3 raster = make_float3(raster_x, raster_y, 0.0f);
+ float3 Pcamera = transform_perspective(&rastertocamera, raster);
#ifdef __CAMERA_MOTION__
- if(kernel_data.cam.have_perspective_motion) {
- /* TODO(sergey): Currently we interpolate projected coordinate which
- * gives nice looking result and which is simple, but is in fact a bit
- * different comparing to constructing projective matrix from an
- * interpolated field of view.
- */
- if(ray->time < 0.5f) {
- ProjectionTransform rastertocamera_pre = kernel_data.cam.perspective_pre;
- float3 Pcamera_pre =
- transform_perspective(&rastertocamera_pre, raster);
- Pcamera = interp(Pcamera_pre, Pcamera, ray->time * 2.0f);
- }
- else {
- ProjectionTransform rastertocamera_post = kernel_data.cam.perspective_post;
- float3 Pcamera_post =
- transform_perspective(&rastertocamera_post, raster);
- Pcamera = interp(Pcamera, Pcamera_post, (ray->time - 0.5f) * 2.0f);
- }
- }
+ if (kernel_data.cam.have_perspective_motion) {
+ /* TODO(sergey): Currently we interpolate projected coordinate which
+ * gives nice looking result and which is simple, but is in fact a bit
+ * different comparing to constructing projective matrix from an
+ * interpolated field of view.
+ */
+ if (ray->time < 0.5f) {
+ ProjectionTransform rastertocamera_pre = kernel_data.cam.perspective_pre;
+ float3 Pcamera_pre = transform_perspective(&rastertocamera_pre, raster);
+ Pcamera = interp(Pcamera_pre, Pcamera, ray->time * 2.0f);
+ }
+ else {
+ ProjectionTransform rastertocamera_post = kernel_data.cam.perspective_post;
+ float3 Pcamera_post = transform_perspective(&rastertocamera_post, raster);
+ Pcamera = interp(Pcamera, Pcamera_post, (ray->time - 0.5f) * 2.0f);
+ }
+ }
#endif
- float3 P = make_float3(0.0f, 0.0f, 0.0f);
- float3 D = Pcamera;
+ float3 P = make_float3(0.0f, 0.0f, 0.0f);
+ float3 D = Pcamera;
- /* modify ray for depth of field */
- float aperturesize = kernel_data.cam.aperturesize;
+ /* modify ray for depth of field */
+ float aperturesize = kernel_data.cam.aperturesize;
- if(aperturesize > 0.0f) {
- /* sample point on aperture */
- float2 lensuv = camera_sample_aperture(&kernel_data.cam, lens_u, lens_v)*aperturesize;
+ if (aperturesize > 0.0f) {
+ /* sample point on aperture */
+ float2 lensuv = camera_sample_aperture(&kernel_data.cam, lens_u, lens_v) * aperturesize;
- /* compute point on plane of focus */
- float ft = kernel_data.cam.focaldistance/D.z;
- float3 Pfocus = D*ft;
+ /* compute point on plane of focus */
+ float ft = kernel_data.cam.focaldistance / D.z;
+ float3 Pfocus = D * ft;
- /* update ray for effect of lens */
- P = make_float3(lensuv.x, lensuv.y, 0.0f);
- D = normalize(Pfocus - P);
- }
+ /* update ray for effect of lens */
+ P = make_float3(lensuv.x, lensuv.y, 0.0f);
+ D = normalize(Pfocus - P);
+ }
- /* transform ray from camera to world */
- Transform cameratoworld = kernel_data.cam.cameratoworld;
+ /* transform ray from camera to world */
+ Transform cameratoworld = kernel_data.cam.cameratoworld;
#ifdef __CAMERA_MOTION__
- if(kernel_data.cam.num_motion_steps) {
- transform_motion_array_interpolate(
- &cameratoworld,
- kernel_tex_array(__camera_motion),
- kernel_data.cam.num_motion_steps,
- ray->time);
- }
+ if (kernel_data.cam.num_motion_steps) {
+ transform_motion_array_interpolate(&cameratoworld,
+ kernel_tex_array(__camera_motion),
+ kernel_data.cam.num_motion_steps,
+ ray->time);
+ }
#endif
- P = transform_point(&cameratoworld, P);
- D = normalize(transform_direction(&cameratoworld, D));
+ P = transform_point(&cameratoworld, P);
+ D = normalize(transform_direction(&cameratoworld, D));
- bool use_stereo = kernel_data.cam.interocular_offset != 0.0f;
- if(!use_stereo) {
- /* No stereo */
- ray->P = P;
- ray->D = D;
+ bool use_stereo = kernel_data.cam.interocular_offset != 0.0f;
+ if (!use_stereo) {
+ /* No stereo */
+ ray->P = P;
+ ray->D = D;
#ifdef __RAY_DIFFERENTIALS__
- float3 Dcenter = transform_direction(&cameratoworld, Pcamera);
+ float3 Dcenter = transform_direction(&cameratoworld, Pcamera);
- ray->dP = differential3_zero();
- ray->dD.dx = normalize(Dcenter + float4_to_float3(kernel_data.cam.dx)) - normalize(Dcenter);
- ray->dD.dy = normalize(Dcenter + float4_to_float3(kernel_data.cam.dy)) - normalize(Dcenter);
+ ray->dP = differential3_zero();
+ ray->dD.dx = normalize(Dcenter + float4_to_float3(kernel_data.cam.dx)) - normalize(Dcenter);
+ ray->dD.dy = normalize(Dcenter + float4_to_float3(kernel_data.cam.dy)) - normalize(Dcenter);
#endif
- }
- else {
- /* Spherical stereo */
- spherical_stereo_transform(&kernel_data.cam, &P, &D);
- ray->P = P;
- ray->D = D;
+ }
+ else {
+ /* Spherical stereo */
+ spherical_stereo_transform(&kernel_data.cam, &P, &D);
+ ray->P = P;
+ ray->D = D;
#ifdef __RAY_DIFFERENTIALS__
- /* Ray differentials, computed from scratch using the raster coordinates
- * because we don't want to be affected by depth of field. We compute
- * ray origin and direction for the center and two neighbouring pixels
- * and simply take their differences. */
- float3 Pnostereo = transform_point(&cameratoworld, make_float3(0.0f, 0.0f, 0.0f));
-
- float3 Pcenter = Pnostereo;
- float3 Dcenter = Pcamera;
- Dcenter = normalize(transform_direction(&cameratoworld, Dcenter));
- spherical_stereo_transform(&kernel_data.cam, &Pcenter, &Dcenter);
-
- float3 Px = Pnostereo;
- float3 Dx = transform_perspective(&rastertocamera, make_float3(raster_x + 1.0f, raster_y, 0.0f));
- Dx = normalize(transform_direction(&cameratoworld, Dx));
- spherical_stereo_transform(&kernel_data.cam, &Px, &Dx);
-
- ray->dP.dx = Px - Pcenter;
- ray->dD.dx = Dx - Dcenter;
-
- float3 Py = Pnostereo;
- float3 Dy = transform_perspective(&rastertocamera, make_float3(raster_x, raster_y + 1.0f, 0.0f));
- Dy = normalize(transform_direction(&cameratoworld, Dy));
- spherical_stereo_transform(&kernel_data.cam, &Py, &Dy);
-
- ray->dP.dy = Py - Pcenter;
- ray->dD.dy = Dy - Dcenter;
+ /* Ray differentials, computed from scratch using the raster coordinates
+ * because we don't want to be affected by depth of field. We compute
+ * ray origin and direction for the center and two neighbouring pixels
+ * and simply take their differences. */
+ float3 Pnostereo = transform_point(&cameratoworld, make_float3(0.0f, 0.0f, 0.0f));
+
+ float3 Pcenter = Pnostereo;
+ float3 Dcenter = Pcamera;
+ Dcenter = normalize(transform_direction(&cameratoworld, Dcenter));
+ spherical_stereo_transform(&kernel_data.cam, &Pcenter, &Dcenter);
+
+ float3 Px = Pnostereo;
+ float3 Dx = transform_perspective(&rastertocamera,
+ make_float3(raster_x + 1.0f, raster_y, 0.0f));
+ Dx = normalize(transform_direction(&cameratoworld, Dx));
+ spherical_stereo_transform(&kernel_data.cam, &Px, &Dx);
+
+ ray->dP.dx = Px - Pcenter;
+ ray->dD.dx = Dx - Dcenter;
+
+ float3 Py = Pnostereo;
+ float3 Dy = transform_perspective(&rastertocamera,
+ make_float3(raster_x, raster_y + 1.0f, 0.0f));
+ Dy = normalize(transform_direction(&cameratoworld, Dy));
+ spherical_stereo_transform(&kernel_data.cam, &Py, &Dy);
+
+ ray->dP.dy = Py - Pcenter;
+ ray->dD.dy = Dy - Dcenter;
#endif
- }
+ }
#ifdef __CAMERA_CLIPPING__
- /* clipping */
- float z_inv = 1.0f / normalize(Pcamera).z;
- float nearclip = kernel_data.cam.nearclip * z_inv;
- ray->P += nearclip * ray->D;
- ray->dP.dx += nearclip * ray->dD.dx;
- ray->dP.dy += nearclip * ray->dD.dy;
- ray->t = kernel_data.cam.cliplength * z_inv;
+ /* clipping */
+ float z_inv = 1.0f / normalize(Pcamera).z;
+ float nearclip = kernel_data.cam.nearclip * z_inv;
+ ray->P += nearclip * ray->D;
+ ray->dP.dx += nearclip * ray->dD.dx;
+ ray->dP.dy += nearclip * ray->dD.dy;
+ ray->t = kernel_data.cam.cliplength * z_inv;
#else
- ray->t = FLT_MAX;
+ ray->t = FLT_MAX;
#endif
}
/* Orthographic Camera */
-ccl_device void camera_sample_orthographic(KernelGlobals *kg, float raster_x, float raster_y, float lens_u, float lens_v, ccl_addr_space Ray *ray)
+ccl_device void camera_sample_orthographic(KernelGlobals *kg,
+ float raster_x,
+ float raster_y,
+ float lens_u,
+ float lens_v,
+ ccl_addr_space Ray *ray)
{
- /* create ray form raster position */
- ProjectionTransform rastertocamera = kernel_data.cam.rastertocamera;
- float3 Pcamera = transform_perspective(&rastertocamera, make_float3(raster_x, raster_y, 0.0f));
-
- float3 P;
- float3 D = make_float3(0.0f, 0.0f, 1.0f);
-
- /* modify ray for depth of field */
- float aperturesize = kernel_data.cam.aperturesize;
-
- if(aperturesize > 0.0f) {
- /* sample point on aperture */
- float2 lensuv = camera_sample_aperture(&kernel_data.cam, lens_u, lens_v)*aperturesize;
-
- /* compute point on plane of focus */
- float3 Pfocus = D * kernel_data.cam.focaldistance;
-
- /* update ray for effect of lens */
- float3 lensuvw = make_float3(lensuv.x, lensuv.y, 0.0f);
- P = Pcamera + lensuvw;
- D = normalize(Pfocus - lensuvw);
- }
- else {
- P = Pcamera;
- }
- /* transform ray from camera to world */
- Transform cameratoworld = kernel_data.cam.cameratoworld;
+ /* create ray form raster position */
+ ProjectionTransform rastertocamera = kernel_data.cam.rastertocamera;
+ float3 Pcamera = transform_perspective(&rastertocamera, make_float3(raster_x, raster_y, 0.0f));
+
+ float3 P;
+ float3 D = make_float3(0.0f, 0.0f, 1.0f);
+
+ /* modify ray for depth of field */
+ float aperturesize = kernel_data.cam.aperturesize;
+
+ if (aperturesize > 0.0f) {
+ /* sample point on aperture */
+ float2 lensuv = camera_sample_aperture(&kernel_data.cam, lens_u, lens_v) * aperturesize;
+
+ /* compute point on plane of focus */
+ float3 Pfocus = D * kernel_data.cam.focaldistance;
+
+ /* update ray for effect of lens */
+ float3 lensuvw = make_float3(lensuv.x, lensuv.y, 0.0f);
+ P = Pcamera + lensuvw;
+ D = normalize(Pfocus - lensuvw);
+ }
+ else {
+ P = Pcamera;
+ }
+ /* transform ray from camera to world */
+ Transform cameratoworld = kernel_data.cam.cameratoworld;
#ifdef __CAMERA_MOTION__
- if(kernel_data.cam.num_motion_steps) {
- transform_motion_array_interpolate(
- &cameratoworld,
- kernel_tex_array(__camera_motion),
- kernel_data.cam.num_motion_steps,
- ray->time);
- }
+ if (kernel_data.cam.num_motion_steps) {
+ transform_motion_array_interpolate(&cameratoworld,
+ kernel_tex_array(__camera_motion),
+ kernel_data.cam.num_motion_steps,
+ ray->time);
+ }
#endif
- ray->P = transform_point(&cameratoworld, P);
- ray->D = normalize(transform_direction(&cameratoworld, D));
+ ray->P = transform_point(&cameratoworld, P);
+ ray->D = normalize(transform_direction(&cameratoworld, D));
#ifdef __RAY_DIFFERENTIALS__
- /* ray differential */
- ray->dP.dx = float4_to_float3(kernel_data.cam.dx);
- ray->dP.dy = float4_to_float3(kernel_data.cam.dy);
+ /* ray differential */
+ ray->dP.dx = float4_to_float3(kernel_data.cam.dx);
+ ray->dP.dy = float4_to_float3(kernel_data.cam.dy);
- ray->dD = differential3_zero();
+ ray->dD = differential3_zero();
#endif
#ifdef __CAMERA_CLIPPING__
- /* clipping */
- ray->t = kernel_data.cam.cliplength;
+ /* clipping */
+ ray->t = kernel_data.cam.cliplength;
#else
- ray->t = FLT_MAX;
+ ray->t = FLT_MAX;
#endif
}
@@ -230,242 +238,244 @@ ccl_device void camera_sample_orthographic(KernelGlobals *kg, float raster_x, fl
ccl_device_inline void camera_sample_panorama(ccl_constant KernelCamera *cam,
const ccl_global DecomposedTransform *cam_motion,
- float raster_x, float raster_y,
- float lens_u, float lens_v,
+ float raster_x,
+ float raster_y,
+ float lens_u,
+ float lens_v,
ccl_addr_space Ray *ray)
{
- ProjectionTransform rastertocamera = cam->rastertocamera;
- float3 Pcamera = transform_perspective(&rastertocamera, make_float3(raster_x, raster_y, 0.0f));
+ ProjectionTransform rastertocamera = cam->rastertocamera;
+ float3 Pcamera = transform_perspective(&rastertocamera, make_float3(raster_x, raster_y, 0.0f));
- /* create ray form raster position */
- float3 P = make_float3(0.0f, 0.0f, 0.0f);
- float3 D = panorama_to_direction(cam, Pcamera.x, Pcamera.y);
+ /* create ray form raster position */
+ float3 P = make_float3(0.0f, 0.0f, 0.0f);
+ float3 D = panorama_to_direction(cam, Pcamera.x, Pcamera.y);
- /* indicates ray should not receive any light, outside of the lens */
- if(is_zero(D)) {
- ray->t = 0.0f;
- return;
- }
+ /* indicates ray should not receive any light, outside of the lens */
+ if (is_zero(D)) {
+ ray->t = 0.0f;
+ return;
+ }
- /* modify ray for depth of field */
- float aperturesize = cam->aperturesize;
+ /* modify ray for depth of field */
+ float aperturesize = cam->aperturesize;
- if(aperturesize > 0.0f) {
- /* sample point on aperture */
- float2 lensuv = camera_sample_aperture(cam, lens_u, lens_v)*aperturesize;
+ if (aperturesize > 0.0f) {
+ /* sample point on aperture */
+ float2 lensuv = camera_sample_aperture(cam, lens_u, lens_v) * aperturesize;
- /* compute point on plane of focus */
- float3 Dfocus = normalize(D);
- float3 Pfocus = Dfocus * cam->focaldistance;
+ /* compute point on plane of focus */
+ float3 Dfocus = normalize(D);
+ float3 Pfocus = Dfocus * cam->focaldistance;
- /* calculate orthonormal coordinates perpendicular to Dfocus */
- float3 U, V;
- U = normalize(make_float3(1.0f, 0.0f, 0.0f) - Dfocus.x * Dfocus);
- V = normalize(cross(Dfocus, U));
+ /* calculate orthonormal coordinates perpendicular to Dfocus */
+ float3 U, V;
+ U = normalize(make_float3(1.0f, 0.0f, 0.0f) - Dfocus.x * Dfocus);
+ V = normalize(cross(Dfocus, U));
- /* update ray for effect of lens */
- P = U * lensuv.x + V * lensuv.y;
- D = normalize(Pfocus - P);
- }
+ /* update ray for effect of lens */
+ P = U * lensuv.x + V * lensuv.y;
+ D = normalize(Pfocus - P);
+ }
- /* transform ray from camera to world */
- Transform cameratoworld = cam->cameratoworld;
+ /* transform ray from camera to world */
+ Transform cameratoworld = cam->cameratoworld;
#ifdef __CAMERA_MOTION__
- if(cam->num_motion_steps) {
- transform_motion_array_interpolate(
- &cameratoworld,
- cam_motion,
- cam->num_motion_steps,
- ray->time);
- }
+ if (cam->num_motion_steps) {
+ transform_motion_array_interpolate(
+ &cameratoworld, cam_motion, cam->num_motion_steps, ray->time);
+ }
#endif
- P = transform_point(&cameratoworld, P);
- D = normalize(transform_direction(&cameratoworld, D));
+ P = transform_point(&cameratoworld, P);
+ D = normalize(transform_direction(&cameratoworld, D));
- /* Stereo transform */
- bool use_stereo = cam->interocular_offset != 0.0f;
- if(use_stereo) {
- spherical_stereo_transform(cam, &P, &D);
- }
+ /* Stereo transform */
+ bool use_stereo = cam->interocular_offset != 0.0f;
+ if (use_stereo) {
+ spherical_stereo_transform(cam, &P, &D);
+ }
- ray->P = P;
- ray->D = D;
+ ray->P = P;
+ ray->D = D;
#ifdef __RAY_DIFFERENTIALS__
- /* Ray differentials, computed from scratch using the raster coordinates
- * because we don't want to be affected by depth of field. We compute
- * ray origin and direction for the center and two neighbouring pixels
- * and simply take their differences. */
- float3 Pcenter = Pcamera;
- float3 Dcenter = panorama_to_direction(cam, Pcenter.x, Pcenter.y);
- Pcenter = transform_point(&cameratoworld, Pcenter);
- Dcenter = normalize(transform_direction(&cameratoworld, Dcenter));
- if(use_stereo) {
- spherical_stereo_transform(cam, &Pcenter, &Dcenter);
- }
-
- float3 Px = transform_perspective(&rastertocamera, make_float3(raster_x + 1.0f, raster_y, 0.0f));
- float3 Dx = panorama_to_direction(cam, Px.x, Px.y);
- Px = transform_point(&cameratoworld, Px);
- Dx = normalize(transform_direction(&cameratoworld, Dx));
- if(use_stereo) {
- spherical_stereo_transform(cam, &Px, &Dx);
- }
-
- ray->dP.dx = Px - Pcenter;
- ray->dD.dx = Dx - Dcenter;
-
- float3 Py = transform_perspective(&rastertocamera, make_float3(raster_x, raster_y + 1.0f, 0.0f));
- float3 Dy = panorama_to_direction(cam, Py.x, Py.y);
- Py = transform_point(&cameratoworld, Py);
- Dy = normalize(transform_direction(&cameratoworld, Dy));
- if(use_stereo) {
- spherical_stereo_transform(cam, &Py, &Dy);
- }
-
- ray->dP.dy = Py - Pcenter;
- ray->dD.dy = Dy - Dcenter;
+ /* Ray differentials, computed from scratch using the raster coordinates
+ * because we don't want to be affected by depth of field. We compute
+ * ray origin and direction for the center and two neighbouring pixels
+ * and simply take their differences. */
+ float3 Pcenter = Pcamera;
+ float3 Dcenter = panorama_to_direction(cam, Pcenter.x, Pcenter.y);
+ Pcenter = transform_point(&cameratoworld, Pcenter);
+ Dcenter = normalize(transform_direction(&cameratoworld, Dcenter));
+ if (use_stereo) {
+ spherical_stereo_transform(cam, &Pcenter, &Dcenter);
+ }
+
+ float3 Px = transform_perspective(&rastertocamera, make_float3(raster_x + 1.0f, raster_y, 0.0f));
+ float3 Dx = panorama_to_direction(cam, Px.x, Px.y);
+ Px = transform_point(&cameratoworld, Px);
+ Dx = normalize(transform_direction(&cameratoworld, Dx));
+ if (use_stereo) {
+ spherical_stereo_transform(cam, &Px, &Dx);
+ }
+
+ ray->dP.dx = Px - Pcenter;
+ ray->dD.dx = Dx - Dcenter;
+
+ float3 Py = transform_perspective(&rastertocamera, make_float3(raster_x, raster_y + 1.0f, 0.0f));
+ float3 Dy = panorama_to_direction(cam, Py.x, Py.y);
+ Py = transform_point(&cameratoworld, Py);
+ Dy = normalize(transform_direction(&cameratoworld, Dy));
+ if (use_stereo) {
+ spherical_stereo_transform(cam, &Py, &Dy);
+ }
+
+ ray->dP.dy = Py - Pcenter;
+ ray->dD.dy = Dy - Dcenter;
#endif
#ifdef __CAMERA_CLIPPING__
- /* clipping */
- float nearclip = cam->nearclip;
- ray->P += nearclip * ray->D;
- ray->dP.dx += nearclip * ray->dD.dx;
- ray->dP.dy += nearclip * ray->dD.dy;
- ray->t = cam->cliplength;
+ /* clipping */
+ float nearclip = cam->nearclip;
+ ray->P += nearclip * ray->D;
+ ray->dP.dx += nearclip * ray->dD.dx;
+ ray->dP.dy += nearclip * ray->dD.dy;
+ ray->t = cam->cliplength;
#else
- ray->t = FLT_MAX;
+ ray->t = FLT_MAX;
#endif
}
/* Common */
ccl_device_inline void camera_sample(KernelGlobals *kg,
- int x, int y,
- float filter_u, float filter_v,
- float lens_u, float lens_v,
+ int x,
+ int y,
+ float filter_u,
+ float filter_v,
+ float lens_u,
+ float lens_v,
float time,
ccl_addr_space Ray *ray)
{
- /* pixel filter */
- int filter_table_offset = kernel_data.film.filter_table_offset;
- float raster_x = x + lookup_table_read(kg, filter_u, filter_table_offset, FILTER_TABLE_SIZE);
- float raster_y = y + lookup_table_read(kg, filter_v, filter_table_offset, FILTER_TABLE_SIZE);
+ /* pixel filter */
+ int filter_table_offset = kernel_data.film.filter_table_offset;
+ float raster_x = x + lookup_table_read(kg, filter_u, filter_table_offset, FILTER_TABLE_SIZE);
+ float raster_y = y + lookup_table_read(kg, filter_v, filter_table_offset, FILTER_TABLE_SIZE);
#ifdef __CAMERA_MOTION__
- /* motion blur */
- if(kernel_data.cam.shuttertime == -1.0f) {
- ray->time = 0.5f;
- }
- else {
- /* TODO(sergey): Such lookup is unneeded when there's rolling shutter
- * effect in use but rolling shutter duration is set to 0.0.
- */
- const int shutter_table_offset = kernel_data.cam.shutter_table_offset;
- ray->time = lookup_table_read(kg, time, shutter_table_offset, SHUTTER_TABLE_SIZE);
- /* TODO(sergey): Currently single rolling shutter effect type only
- * where scanlines are acquired from top to bottom and whole scanline
- * is acquired at once (no delay in acquisition happens between pixels
- * of single scanline).
- *
- * Might want to support more models in the future.
- */
- if(kernel_data.cam.rolling_shutter_type) {
- /* Time corresponding to a fully rolling shutter only effect:
- * top of the frame is time 0.0, bottom of the frame is time 1.0.
- */
- const float time = 1.0f - (float)y / kernel_data.cam.height;
- const float duration = kernel_data.cam.rolling_shutter_duration;
- if(duration != 0.0f) {
- /* This isn't fully physical correct, but lets us to have simple
- * controls in the interface. The idea here is basically sort of
- * linear interpolation between how much rolling shutter effect
- * exist on the frame and how much of it is a motion blur effect.
- */
- ray->time = (ray->time - 0.5f) * duration;
- ray->time += (time - 0.5f) * (1.0f - duration) + 0.5f;
- }
- else {
- ray->time = time;
- }
- }
- }
+ /* motion blur */
+ if (kernel_data.cam.shuttertime == -1.0f) {
+ ray->time = 0.5f;
+ }
+ else {
+ /* TODO(sergey): Such lookup is unneeded when there's rolling shutter
+ * effect in use but rolling shutter duration is set to 0.0.
+ */
+ const int shutter_table_offset = kernel_data.cam.shutter_table_offset;
+ ray->time = lookup_table_read(kg, time, shutter_table_offset, SHUTTER_TABLE_SIZE);
+ /* TODO(sergey): Currently single rolling shutter effect type only
+ * where scanlines are acquired from top to bottom and whole scanline
+ * is acquired at once (no delay in acquisition happens between pixels
+ * of single scanline).
+ *
+ * Might want to support more models in the future.
+ */
+ if (kernel_data.cam.rolling_shutter_type) {
+ /* Time corresponding to a fully rolling shutter only effect:
+ * top of the frame is time 0.0, bottom of the frame is time 1.0.
+ */
+ const float time = 1.0f - (float)y / kernel_data.cam.height;
+ const float duration = kernel_data.cam.rolling_shutter_duration;
+ if (duration != 0.0f) {
+ /* This isn't fully physical correct, but lets us to have simple
+ * controls in the interface. The idea here is basically sort of
+ * linear interpolation between how much rolling shutter effect
+ * exist on the frame and how much of it is a motion blur effect.
+ */
+ ray->time = (ray->time - 0.5f) * duration;
+ ray->time += (time - 0.5f) * (1.0f - duration) + 0.5f;
+ }
+ else {
+ ray->time = time;
+ }
+ }
+ }
#endif
- /* sample */
- if(kernel_data.cam.type == CAMERA_PERSPECTIVE) {
- camera_sample_perspective(kg, raster_x, raster_y, lens_u, lens_v, ray);
- }
- else if(kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) {
- camera_sample_orthographic(kg, raster_x, raster_y, lens_u, lens_v, ray);
- }
- else {
- const ccl_global DecomposedTransform *cam_motion = kernel_tex_array(__camera_motion);
- camera_sample_panorama(&kernel_data.cam, cam_motion, raster_x, raster_y, lens_u, lens_v, ray);
- }
+ /* sample */
+ if (kernel_data.cam.type == CAMERA_PERSPECTIVE) {
+ camera_sample_perspective(kg, raster_x, raster_y, lens_u, lens_v, ray);
+ }
+ else if (kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) {
+ camera_sample_orthographic(kg, raster_x, raster_y, lens_u, lens_v, ray);
+ }
+ else {
+ const ccl_global DecomposedTransform *cam_motion = kernel_tex_array(__camera_motion);
+ camera_sample_panorama(&kernel_data.cam, cam_motion, raster_x, raster_y, lens_u, lens_v, ray);
+ }
}
/* Utilities */
ccl_device_inline float3 camera_position(KernelGlobals *kg)
{
- Transform cameratoworld = kernel_data.cam.cameratoworld;
- return make_float3(cameratoworld.x.w, cameratoworld.y.w, cameratoworld.z.w);
+ Transform cameratoworld = kernel_data.cam.cameratoworld;
+ return make_float3(cameratoworld.x.w, cameratoworld.y.w, cameratoworld.z.w);
}
ccl_device_inline float camera_distance(KernelGlobals *kg, float3 P)
{
- Transform cameratoworld = kernel_data.cam.cameratoworld;
- float3 camP = make_float3(cameratoworld.x.w, cameratoworld.y.w, cameratoworld.z.w);
-
- if(kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) {
- float3 camD = make_float3(cameratoworld.x.z, cameratoworld.y.z, cameratoworld.z.z);
- return fabsf(dot((P - camP), camD));
- }
- else
- return len(P - camP);
+ Transform cameratoworld = kernel_data.cam.cameratoworld;
+ float3 camP = make_float3(cameratoworld.x.w, cameratoworld.y.w, cameratoworld.z.w);
+
+ if (kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) {
+ float3 camD = make_float3(cameratoworld.x.z, cameratoworld.y.z, cameratoworld.z.z);
+ return fabsf(dot((P - camP), camD));
+ }
+ else
+ return len(P - camP);
}
ccl_device_inline float3 camera_direction_from_point(KernelGlobals *kg, float3 P)
{
- Transform cameratoworld = kernel_data.cam.cameratoworld;
-
- if(kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) {
- float3 camD = make_float3(cameratoworld.x.z, cameratoworld.y.z, cameratoworld.z.z);
- return -camD;
- }
- else {
- float3 camP = make_float3(cameratoworld.x.w, cameratoworld.y.w, cameratoworld.z.w);
- return normalize(camP - P);
- }
+ Transform cameratoworld = kernel_data.cam.cameratoworld;
+
+ if (kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) {
+ float3 camD = make_float3(cameratoworld.x.z, cameratoworld.y.z, cameratoworld.z.z);
+ return -camD;
+ }
+ else {
+ float3 camP = make_float3(cameratoworld.x.w, cameratoworld.y.w, cameratoworld.z.w);
+ return normalize(camP - P);
+ }
}
ccl_device_inline float3 camera_world_to_ndc(KernelGlobals *kg, ShaderData *sd, float3 P)
{
- if(kernel_data.cam.type != CAMERA_PANORAMA) {
- /* perspective / ortho */
- if(sd->object == PRIM_NONE && kernel_data.cam.type == CAMERA_PERSPECTIVE)
- P += camera_position(kg);
-
- ProjectionTransform tfm = kernel_data.cam.worldtondc;
- return transform_perspective(&tfm, P);
- }
- else {
- /* panorama */
- Transform tfm = kernel_data.cam.worldtocamera;
-
- if(sd->object != OBJECT_NONE)
- P = normalize(transform_point(&tfm, P));
- else
- P = normalize(transform_direction(&tfm, P));
-
- float2 uv = direction_to_panorama(&kernel_data.cam, P);
-
- return make_float3(uv.x, uv.y, 0.0f);
- }
+ if (kernel_data.cam.type != CAMERA_PANORAMA) {
+ /* perspective / ortho */
+ if (sd->object == PRIM_NONE && kernel_data.cam.type == CAMERA_PERSPECTIVE)
+ P += camera_position(kg);
+
+ ProjectionTransform tfm = kernel_data.cam.worldtondc;
+ return transform_perspective(&tfm, P);
+ }
+ else {
+ /* panorama */
+ Transform tfm = kernel_data.cam.worldtocamera;
+
+ if (sd->object != OBJECT_NONE)
+ P = normalize(transform_point(&tfm, P));
+ else
+ P = normalize(transform_direction(&tfm, P));
+
+ float2 uv = direction_to_panorama(&kernel_data.cam, P);
+
+ return make_float3(uv.x, uv.y, 0.0f);
+ }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_color.h b/intern/cycles/kernel/kernel_color.h
index ea478a8a5d3..5eb1bdad02e 100644
--- a/intern/cycles/kernel/kernel_color.h
+++ b/intern/cycles/kernel/kernel_color.h
@@ -23,16 +23,16 @@ CCL_NAMESPACE_BEGIN
ccl_device float3 xyz_to_rgb(KernelGlobals *kg, float3 xyz)
{
- return make_float3(dot(float4_to_float3(kernel_data.film.xyz_to_r), xyz),
- dot(float4_to_float3(kernel_data.film.xyz_to_g), xyz),
- dot(float4_to_float3(kernel_data.film.xyz_to_b), xyz));
+ return make_float3(dot(float4_to_float3(kernel_data.film.xyz_to_r), xyz),
+ dot(float4_to_float3(kernel_data.film.xyz_to_g), xyz),
+ dot(float4_to_float3(kernel_data.film.xyz_to_b), xyz));
}
ccl_device float linear_rgb_to_gray(KernelGlobals *kg, float3 c)
{
- return dot(c, float4_to_float3(kernel_data.film.rgb_to_y));
+ return dot(c, float4_to_float3(kernel_data.film.rgb_to_y));
}
CCL_NAMESPACE_END
-#endif /* __KERNEL_COLOR_H__ */
+#endif /* __KERNEL_COLOR_H__ */
diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h
index 4ee80850402..e8fedca4489 100644
--- a/intern/cycles/kernel/kernel_compat_cpu.h
+++ b/intern/cycles/kernel/kernel_compat_cpu.h
@@ -55,9 +55,9 @@
/* On x86_64, versions of glibc < 2.16 have an issue where expf is
* much slower than the double version. This was fixed in glibc 2.16.
*/
-#if !defined(__KERNEL_GPU__) && defined(__x86_64__) && defined(__x86_64__) && \
- defined(__GNU_LIBRARY__) && defined(__GLIBC__ ) && defined(__GLIBC_MINOR__) && \
- (__GLIBC__ <= 2 && __GLIBC_MINOR__ < 16)
+#if !defined(__KERNEL_GPU__) && defined(__x86_64__) && defined(__x86_64__) && \
+ defined(__GNU_LIBRARY__) && defined(__GLIBC__) && defined(__GLIBC_MINOR__) && \
+ (__GLIBC__ <= 2 && __GLIBC_MINOR__ < 16)
# define expf(x) ((float)exp((double)(x)))
#endif
@@ -71,41 +71,41 @@ CCL_NAMESPACE_BEGIN
/* Texture types to be compatible with CUDA textures. These are really just
* simple arrays and after inlining fetch hopefully revert to being a simple
* pointer lookup. */
-template<typename T> struct texture {
- ccl_always_inline const T& fetch(int index)
- {
- kernel_assert(index >= 0 && index < width);
- return data[index];
- }
+template<typename T> struct texture {
+ ccl_always_inline const T &fetch(int index)
+ {
+ kernel_assert(index >= 0 && index < width);
+ return data[index];
+ }
#if defined(__KERNEL_AVX__) || defined(__KERNEL_AVX2__)
- /* Reads 256 bytes but indexes in blocks of 128 bytes to maintain
- * compatibility with existing indicies and data structures.
- */
- ccl_always_inline avxf fetch_avxf(const int index)
- {
- kernel_assert(index >= 0 && (index+1) < width);
- ssef *ssef_data = (ssef*)data;
- ssef *ssef_node_data = &ssef_data[index];
- return _mm256_loadu_ps((float *)ssef_node_data);
- }
+ /* Reads 256 bytes but indexes in blocks of 128 bytes to maintain
+ * compatibility with existing indicies and data structures.
+ */
+ ccl_always_inline avxf fetch_avxf(const int index)
+ {
+ kernel_assert(index >= 0 && (index + 1) < width);
+ ssef *ssef_data = (ssef *)data;
+ ssef *ssef_node_data = &ssef_data[index];
+ return _mm256_loadu_ps((float *)ssef_node_data);
+ }
#endif
#ifdef __KERNEL_SSE2__
- ccl_always_inline ssef fetch_ssef(int index)
- {
- kernel_assert(index >= 0 && index < width);
- return ((ssef*)data)[index];
- }
-
- ccl_always_inline ssei fetch_ssei(int index)
- {
- kernel_assert(index >= 0 && index < width);
- return ((ssei*)data)[index];
- }
+ ccl_always_inline ssef fetch_ssef(int index)
+ {
+ kernel_assert(index >= 0 && index < width);
+ return ((ssef *)data)[index];
+ }
+
+ ccl_always_inline ssei fetch_ssei(int index)
+ {
+ kernel_assert(index >= 0 && index < width);
+ return ((ssei *)data)[index];
+ }
#endif
- T *data;
- int width;
+ T *data;
+ int width;
};
/* Macros to handle different memory storage on different devices */
@@ -124,33 +124,33 @@ typedef vector3<sseb> sse3b;
typedef vector3<ssef> sse3f;
typedef vector3<ssei> sse3i;
-ccl_device_inline void print_sse3b(const char *label, sse3b& a)
+ccl_device_inline void print_sse3b(const char *label, sse3b &a)
{
- print_sseb(label, a.x);
- print_sseb(label, a.y);
- print_sseb(label, a.z);
+ print_sseb(label, a.x);
+ print_sseb(label, a.y);
+ print_sseb(label, a.z);
}
-ccl_device_inline void print_sse3f(const char *label, sse3f& a)
+ccl_device_inline void print_sse3f(const char *label, sse3f &a)
{
- print_ssef(label, a.x);
- print_ssef(label, a.y);
- print_ssef(label, a.z);
+ print_ssef(label, a.x);
+ print_ssef(label, a.y);
+ print_ssef(label, a.z);
}
-ccl_device_inline void print_sse3i(const char *label, sse3i& a)
+ccl_device_inline void print_sse3i(const char *label, sse3i &a)
{
- print_ssei(label, a.x);
- print_ssei(label, a.y);
- print_ssei(label, a.z);
+ print_ssei(label, a.x);
+ print_ssei(label, a.y);
+ print_ssei(label, a.z);
}
-#if defined(__KERNEL_AVX__) || defined(__KERNEL_AVX2__)
+# if defined(__KERNEL_AVX__) || defined(__KERNEL_AVX2__)
typedef vector3<avxf> avx3f;
-#endif
+# endif
#endif
CCL_NAMESPACE_END
-#endif /* __KERNEL_COMPAT_CPU_H__ */
+#endif /* __KERNEL_COMPAT_CPU_H__ */
diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h
index 8ed96bbae64..469b81d120b 100644
--- a/intern/cycles/kernel/kernel_compat_cuda.h
+++ b/intern/cycles/kernel/kernel_compat_cuda.h
@@ -42,22 +42,22 @@ typedef unsigned long long CUtexObject;
__device__ half __float2half(const float f)
{
- half val;
- asm("{ cvt.rn.f16.f32 %0, %1;}\n" : "=h"(val) : "f"(f));
- return val;
+ half val;
+ asm("{ cvt.rn.f16.f32 %0, %1;}\n" : "=h"(val) : "f"(f));
+ return val;
}
/* Qualifier wrappers for different names on different devices */
-#define ccl_device __device__ __inline__
+#define ccl_device __device__ __inline__
#if __CUDA_ARCH__ < 500
-# define ccl_device_inline __device__ __forceinline__
-# define ccl_device_forceinline __device__ __forceinline__
+# define ccl_device_inline __device__ __forceinline__
+# define ccl_device_forceinline __device__ __forceinline__
#else
-# define ccl_device_inline __device__ __inline__
-# define ccl_device_forceinline __device__ __forceinline__
+# define ccl_device_inline __device__ __inline__
+# define ccl_device_forceinline __device__ __forceinline__
#endif
-#define ccl_device_noinline __device__ __noinline__
+#define ccl_device_noinline __device__ __noinline__
#define ccl_global
#define ccl_static_constant __constant__
#define ccl_constant const
@@ -75,8 +75,7 @@ __device__ half __float2half(const float f)
#define ATTR_FALLTHROUGH
-#define CCL_MAX_LOCAL_SIZE (CUDA_THREADS_BLOCK_WIDTH*CUDA_THREADS_BLOCK_WIDTH)
-
+#define CCL_MAX_LOCAL_SIZE (CUDA_THREADS_BLOCK_WIDTH * CUDA_THREADS_BLOCK_WIDTH)
/* No assert supported for CUDA */
@@ -91,46 +90,62 @@ __device__ half __float2half(const float f)
ccl_device_inline uint ccl_local_id(uint d)
{
- switch(d) {
- case 0: return threadIdx.x;
- case 1: return threadIdx.y;
- case 2: return threadIdx.z;
- default: return 0;
- }
+ switch (d) {
+ case 0:
+ return threadIdx.x;
+ case 1:
+ return threadIdx.y;
+ case 2:
+ return threadIdx.z;
+ default:
+ return 0;
+ }
}
#define ccl_global_id(d) (ccl_group_id(d) * ccl_local_size(d) + ccl_local_id(d))
ccl_device_inline uint ccl_local_size(uint d)
{
- switch(d) {
- case 0: return blockDim.x;
- case 1: return blockDim.y;
- case 2: return blockDim.z;
- default: return 0;
- }
+ switch (d) {
+ case 0:
+ return blockDim.x;
+ case 1:
+ return blockDim.y;
+ case 2:
+ return blockDim.z;
+ default:
+ return 0;
+ }
}
#define ccl_global_size(d) (ccl_num_groups(d) * ccl_local_size(d))
ccl_device_inline uint ccl_group_id(uint d)
{
- switch(d) {
- case 0: return blockIdx.x;
- case 1: return blockIdx.y;
- case 2: return blockIdx.z;
- default: return 0;
- }
+ switch (d) {
+ case 0:
+ return blockIdx.x;
+ case 1:
+ return blockIdx.y;
+ case 2:
+ return blockIdx.z;
+ default:
+ return 0;
+ }
}
ccl_device_inline uint ccl_num_groups(uint d)
{
- switch(d) {
- case 0: return gridDim.x;
- case 1: return gridDim.y;
- case 2: return gridDim.z;
- default: return 0;
- }
+ switch (d) {
+ case 0:
+ return gridDim.x;
+ case 1:
+ return gridDim.y;
+ case 2:
+ return gridDim.z;
+ default:
+ return 0;
+ }
}
/* Textures */
@@ -150,4 +165,4 @@ ccl_device_inline uint ccl_num_groups(uint d)
#define logf(x) __logf(((float)(x)))
#define expf(x) __expf(((float)(x)))
-#endif /* __KERNEL_COMPAT_CUDA_H__ */
+#endif /* __KERNEL_COMPAT_CUDA_H__ */
diff --git a/intern/cycles/kernel/kernel_compat_opencl.h b/intern/cycles/kernel/kernel_compat_opencl.h
index d3d0934a626..e040ea88d7c 100644
--- a/intern/cycles/kernel/kernel_compat_opencl.h
+++ b/intern/cycles/kernel/kernel_compat_opencl.h
@@ -134,7 +134,7 @@
# define expf(x) native_exp(((float)(x)))
# define sqrtf(x) native_sqrt(((float)(x)))
# define logf(x) native_log(((float)(x)))
-# define rcp(x) native_recip(x)
+# define rcp(x) native_recip(x)
#else
# define sinf(x) sin(((float)(x)))
# define cosf(x) cos(((float)(x)))
@@ -142,12 +142,13 @@
# define expf(x) exp(((float)(x)))
# define sqrtf(x) sqrt(((float)(x)))
# define logf(x) log(((float)(x)))
-# define rcp(x) recip(x)
+# define rcp(x) recip(x)
#endif
/* data lookup defines */
#define kernel_data (*kg->data)
-#define kernel_tex_array(tex) ((const ccl_global tex##_t*)(kg->buffers[kg->tex.cl_buffer] + kg->tex.data))
+#define kernel_tex_array(tex) \
+ ((const ccl_global tex##_t *)(kg->buffers[kg->tex.cl_buffer] + kg->tex.data))
#define kernel_tex_fetch(tex, index) kernel_tex_array(tex)[(index)]
/* define NULL */
@@ -155,10 +156,10 @@
/* enable extensions */
#ifdef __KERNEL_CL_KHR_FP16__
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+# pragma OPENCL EXTENSION cl_khr_fp16 : enable
#endif
#include "util/util_half.h"
#include "util/util_types.h"
-#endif /* __KERNEL_COMPAT_OPENCL_H__ */
+#endif /* __KERNEL_COMPAT_OPENCL_H__ */
diff --git a/intern/cycles/kernel/kernel_differential.h b/intern/cycles/kernel/kernel_differential.h
index 924d96c72e5..8513de0d843 100644
--- a/intern/cycles/kernel/kernel_differential.h
+++ b/intern/cycles/kernel/kernel_differential.h
@@ -18,88 +18,98 @@ CCL_NAMESPACE_BEGIN
/* See "Tracing Ray Differentials", Homan Igehy, 1999. */
-ccl_device void differential_transfer(ccl_addr_space differential3 *dP_, const differential3 dP, float3 D, const differential3 dD, float3 Ng, float t)
+ccl_device void differential_transfer(ccl_addr_space differential3 *dP_,
+ const differential3 dP,
+ float3 D,
+ const differential3 dD,
+ float3 Ng,
+ float t)
{
- /* ray differential transfer through homogeneous medium, to
- * compute dPdx/dy at a shading point from the incoming ray */
+ /* ray differential transfer through homogeneous medium, to
+ * compute dPdx/dy at a shading point from the incoming ray */
- float3 tmp = D/dot(D, Ng);
- float3 tmpx = dP.dx + t*dD.dx;
- float3 tmpy = dP.dy + t*dD.dy;
+ float3 tmp = D / dot(D, Ng);
+ float3 tmpx = dP.dx + t * dD.dx;
+ float3 tmpy = dP.dy + t * dD.dy;
- dP_->dx = tmpx - dot(tmpx, Ng)*tmp;
- dP_->dy = tmpy - dot(tmpy, Ng)*tmp;
+ dP_->dx = tmpx - dot(tmpx, Ng) * tmp;
+ dP_->dy = tmpy - dot(tmpy, Ng) * tmp;
}
ccl_device void differential_incoming(ccl_addr_space differential3 *dI, const differential3 dD)
{
- /* compute dIdx/dy at a shading point, we just need to negate the
- * differential of the ray direction */
+ /* compute dIdx/dy at a shading point, we just need to negate the
+ * differential of the ray direction */
- dI->dx = -dD.dx;
- dI->dy = -dD.dy;
+ dI->dx = -dD.dx;
+ dI->dy = -dD.dy;
}
-ccl_device void differential_dudv(ccl_addr_space differential *du, ccl_addr_space differential *dv, float3 dPdu, float3 dPdv, differential3 dP, float3 Ng)
+ccl_device void differential_dudv(ccl_addr_space differential *du,
+ ccl_addr_space differential *dv,
+ float3 dPdu,
+ float3 dPdv,
+ differential3 dP,
+ float3 Ng)
{
- /* now we have dPdx/dy from the ray differential transfer, and dPdu/dv
- * from the primitive, we can compute dudx/dy and dvdx/dy. these are
- * mainly used for differentials of arbitrary mesh attributes. */
-
- /* find most stable axis to project to 2D */
- float xn = fabsf(Ng.x);
- float yn = fabsf(Ng.y);
- float zn = fabsf(Ng.z);
-
- if(zn < xn || zn < yn) {
- if(yn < xn || yn < zn) {
- dPdu.x = dPdu.y;
- dPdv.x = dPdv.y;
- dP.dx.x = dP.dx.y;
- dP.dy.x = dP.dy.y;
- }
-
- dPdu.y = dPdu.z;
- dPdv.y = dPdv.z;
- dP.dx.y = dP.dx.z;
- dP.dy.y = dP.dy.z;
- }
-
- /* using Cramer's rule, we solve for dudx and dvdx in a 2x2 linear system,
- * and the same for dudy and dvdy. the denominator is the same for both
- * solutions, so we compute it only once.
- *
- * dP.dx = dPdu * dudx + dPdv * dvdx;
- * dP.dy = dPdu * dudy + dPdv * dvdy; */
-
- float det = (dPdu.x*dPdv.y - dPdv.x*dPdu.y);
-
- if(det != 0.0f)
- det = 1.0f/det;
-
- du->dx = (dP.dx.x*dPdv.y - dP.dx.y*dPdv.x)*det;
- dv->dx = (dP.dx.y*dPdu.x - dP.dx.x*dPdu.y)*det;
-
- du->dy = (dP.dy.x*dPdv.y - dP.dy.y*dPdv.x)*det;
- dv->dy = (dP.dy.y*dPdu.x - dP.dy.x*dPdu.y)*det;
+ /* now we have dPdx/dy from the ray differential transfer, and dPdu/dv
+ * from the primitive, we can compute dudx/dy and dvdx/dy. these are
+ * mainly used for differentials of arbitrary mesh attributes. */
+
+ /* find most stable axis to project to 2D */
+ float xn = fabsf(Ng.x);
+ float yn = fabsf(Ng.y);
+ float zn = fabsf(Ng.z);
+
+ if (zn < xn || zn < yn) {
+ if (yn < xn || yn < zn) {
+ dPdu.x = dPdu.y;
+ dPdv.x = dPdv.y;
+ dP.dx.x = dP.dx.y;
+ dP.dy.x = dP.dy.y;
+ }
+
+ dPdu.y = dPdu.z;
+ dPdv.y = dPdv.z;
+ dP.dx.y = dP.dx.z;
+ dP.dy.y = dP.dy.z;
+ }
+
+ /* using Cramer's rule, we solve for dudx and dvdx in a 2x2 linear system,
+ * and the same for dudy and dvdy. the denominator is the same for both
+ * solutions, so we compute it only once.
+ *
+ * dP.dx = dPdu * dudx + dPdv * dvdx;
+ * dP.dy = dPdu * dudy + dPdv * dvdy; */
+
+ float det = (dPdu.x * dPdv.y - dPdv.x * dPdu.y);
+
+ if (det != 0.0f)
+ det = 1.0f / det;
+
+ du->dx = (dP.dx.x * dPdv.y - dP.dx.y * dPdv.x) * det;
+ dv->dx = (dP.dx.y * dPdu.x - dP.dx.x * dPdu.y) * det;
+
+ du->dy = (dP.dy.x * dPdv.y - dP.dy.y * dPdv.x) * det;
+ dv->dy = (dP.dy.y * dPdu.x - dP.dy.x * dPdu.y) * det;
}
ccl_device differential differential_zero()
{
- differential d;
- d.dx = 0.0f;
- d.dy = 0.0f;
+ differential d;
+ d.dx = 0.0f;
+ d.dy = 0.0f;
- return d;
+ return d;
}
ccl_device differential3 differential3_zero()
{
- differential3 d;
- d.dx = make_float3(0.0f, 0.0f, 0.0f);
- d.dy = make_float3(0.0f, 0.0f, 0.0f);
+ differential3 d;
+ d.dx = make_float3(0.0f, 0.0f, 0.0f);
+ d.dy = make_float3(0.0f, 0.0f, 0.0f);
- return d;
+ return d;
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_emission.h b/intern/cycles/kernel/kernel_emission.h
index 80bb8d48caf..f2eaa7b50a5 100644
--- a/intern/cycles/kernel/kernel_emission.h
+++ b/intern/cycles/kernel/kernel_emission.h
@@ -26,61 +26,71 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
float t,
float time)
{
- /* setup shading at emitter */
- float3 eval;
-
- if(shader_constant_emission_eval(kg, ls->shader, &eval)) {
- if((ls->prim != PRIM_NONE) && dot(ls->Ng, I) < 0.0f) {
- ls->Ng = -ls->Ng;
- }
- }
- else {
- /* Setup shader data and call shader_eval_surface once, better
- * for GPU coherence and compile times. */
+ /* setup shading at emitter */
+ float3 eval;
+
+ if (shader_constant_emission_eval(kg, ls->shader, &eval)) {
+ if ((ls->prim != PRIM_NONE) && dot(ls->Ng, I) < 0.0f) {
+ ls->Ng = -ls->Ng;
+ }
+ }
+ else {
+ /* Setup shader data and call shader_eval_surface once, better
+ * for GPU coherence and compile times. */
#ifdef __BACKGROUND_MIS__
- if(ls->type == LIGHT_BACKGROUND) {
- Ray ray;
- ray.D = ls->D;
- ray.P = ls->P;
- ray.t = 1.0f;
- ray.time = time;
- ray.dP = differential3_zero();
- ray.dD = dI;
-
- shader_setup_from_background(kg, emission_sd, &ray);
- }
- else
+ if (ls->type == LIGHT_BACKGROUND) {
+ Ray ray;
+ ray.D = ls->D;
+ ray.P = ls->P;
+ ray.t = 1.0f;
+ ray.time = time;
+ ray.dP = differential3_zero();
+ ray.dD = dI;
+
+ shader_setup_from_background(kg, emission_sd, &ray);
+ }
+ else
#endif
- {
- shader_setup_from_sample(kg, emission_sd,
- ls->P, ls->Ng, I,
- ls->shader, ls->object, ls->prim,
- ls->u, ls->v, t, time, false, ls->lamp);
-
- ls->Ng = emission_sd->Ng;
- }
-
- /* No proper path flag, we're evaluating this for all closures. that's
- * weak but we'd have to do multiple evaluations otherwise. */
- path_state_modify_bounce(state, true);
- shader_eval_surface(kg, emission_sd, state, PATH_RAY_EMISSION);
- path_state_modify_bounce(state, false);
-
- /* Evaluate closures. */
+ {
+ shader_setup_from_sample(kg,
+ emission_sd,
+ ls->P,
+ ls->Ng,
+ I,
+ ls->shader,
+ ls->object,
+ ls->prim,
+ ls->u,
+ ls->v,
+ t,
+ time,
+ false,
+ ls->lamp);
+
+ ls->Ng = emission_sd->Ng;
+ }
+
+ /* No proper path flag, we're evaluating this for all closures. that's
+ * weak but we'd have to do multiple evaluations otherwise. */
+ path_state_modify_bounce(state, true);
+ shader_eval_surface(kg, emission_sd, state, PATH_RAY_EMISSION);
+ path_state_modify_bounce(state, false);
+
+ /* Evaluate closures. */
#ifdef __BACKGROUND_MIS__
- if (ls->type == LIGHT_BACKGROUND) {
- eval = shader_background_eval(emission_sd);
- }
- else
+ if (ls->type == LIGHT_BACKGROUND) {
+ eval = shader_background_eval(emission_sd);
+ }
+ else
#endif
- {
- eval = shader_emissive_eval(emission_sd);
- }
- }
+ {
+ eval = shader_emissive_eval(emission_sd);
+ }
+ }
- eval *= ls->eval_fac;
+ eval *= ls->eval_fac;
- return eval;
+ return eval;
}
ccl_device_noinline bool direct_emission(KernelGlobals *kg,
@@ -93,132 +103,128 @@ ccl_device_noinline bool direct_emission(KernelGlobals *kg,
bool *is_lamp,
float rand_terminate)
{
- if(ls->pdf == 0.0f)
- return false;
+ if (ls->pdf == 0.0f)
+ return false;
- /* todo: implement */
- differential3 dD = differential3_zero();
+ /* todo: implement */
+ differential3 dD = differential3_zero();
- /* evaluate closure */
+ /* evaluate closure */
- float3 light_eval = direct_emissive_eval(kg,
- emission_sd,
- ls,
- state,
- -ls->D,
- dD,
- ls->t,
- sd->time);
+ float3 light_eval = direct_emissive_eval(
+ kg, emission_sd, ls, state, -ls->D, dD, ls->t, sd->time);
- if(is_zero(light_eval))
- return false;
+ if (is_zero(light_eval))
+ return false;
- /* evaluate BSDF at shading point */
+ /* evaluate BSDF at shading point */
#ifdef __VOLUME__
- if(sd->prim != PRIM_NONE)
- shader_bsdf_eval(kg, sd, ls->D, eval, ls->pdf, ls->shader & SHADER_USE_MIS);
- else {
- float bsdf_pdf;
- shader_volume_phase_eval(kg, sd, ls->D, eval, &bsdf_pdf);
- if(ls->shader & SHADER_USE_MIS) {
- /* Multiple importance sampling. */
- float mis_weight = power_heuristic(ls->pdf, bsdf_pdf);
- light_eval *= mis_weight;
- }
- }
+ if (sd->prim != PRIM_NONE)
+ shader_bsdf_eval(kg, sd, ls->D, eval, ls->pdf, ls->shader & SHADER_USE_MIS);
+ else {
+ float bsdf_pdf;
+ shader_volume_phase_eval(kg, sd, ls->D, eval, &bsdf_pdf);
+ if (ls->shader & SHADER_USE_MIS) {
+ /* Multiple importance sampling. */
+ float mis_weight = power_heuristic(ls->pdf, bsdf_pdf);
+ light_eval *= mis_weight;
+ }
+ }
#else
- shader_bsdf_eval(kg, sd, ls->D, eval, ls->pdf, ls->shader & SHADER_USE_MIS);
+ shader_bsdf_eval(kg, sd, ls->D, eval, ls->pdf, ls->shader & SHADER_USE_MIS);
#endif
- bsdf_eval_mul3(eval, light_eval/ls->pdf);
+ bsdf_eval_mul3(eval, light_eval / ls->pdf);
#ifdef __PASSES__
- /* use visibility flag to skip lights */
- if(ls->shader & SHADER_EXCLUDE_ANY) {
- if(ls->shader & SHADER_EXCLUDE_DIFFUSE) {
- eval->diffuse = make_float3(0.0f, 0.0f, 0.0f);
- eval->subsurface = make_float3(0.0f, 0.0f, 0.0f);
- }
- if(ls->shader & SHADER_EXCLUDE_GLOSSY)
- eval->glossy = make_float3(0.0f, 0.0f, 0.0f);
- if(ls->shader & SHADER_EXCLUDE_TRANSMIT)
- eval->transmission = make_float3(0.0f, 0.0f, 0.0f);
- if(ls->shader & SHADER_EXCLUDE_SCATTER)
- eval->scatter = make_float3(0.0f, 0.0f, 0.0f);
- }
+ /* use visibility flag to skip lights */
+ if (ls->shader & SHADER_EXCLUDE_ANY) {
+ if (ls->shader & SHADER_EXCLUDE_DIFFUSE) {
+ eval->diffuse = make_float3(0.0f, 0.0f, 0.0f);
+ eval->subsurface = make_float3(0.0f, 0.0f, 0.0f);
+ }
+ if (ls->shader & SHADER_EXCLUDE_GLOSSY)
+ eval->glossy = make_float3(0.0f, 0.0f, 0.0f);
+ if (ls->shader & SHADER_EXCLUDE_TRANSMIT)
+ eval->transmission = make_float3(0.0f, 0.0f, 0.0f);
+ if (ls->shader & SHADER_EXCLUDE_SCATTER)
+ eval->scatter = make_float3(0.0f, 0.0f, 0.0f);
+ }
#endif
- if(bsdf_eval_is_zero(eval))
- return false;
+ if (bsdf_eval_is_zero(eval))
+ return false;
- if(kernel_data.integrator.light_inv_rr_threshold > 0.0f
+ if (kernel_data.integrator.light_inv_rr_threshold > 0.0f
#ifdef __SHADOW_TRICKS__
- && (state->flag & PATH_RAY_SHADOW_CATCHER) == 0
+ && (state->flag & PATH_RAY_SHADOW_CATCHER) == 0
#endif
- )
- {
- float probability = max3(fabs(bsdf_eval_sum(eval))) * kernel_data.integrator.light_inv_rr_threshold;
- if(probability < 1.0f) {
- if(rand_terminate >= probability) {
- return false;
- }
- bsdf_eval_mul(eval, 1.0f / probability);
- }
- }
-
- if(ls->shader & SHADER_CAST_SHADOW) {
- /* setup ray */
- bool transmit = (dot(sd->Ng, ls->D) < 0.0f);
- ray->P = ray_offset(sd->P, (transmit)? -sd->Ng: sd->Ng);
-
- if(ls->t == FLT_MAX) {
- /* distant light */
- ray->D = ls->D;
- ray->t = ls->t;
- }
- else {
- /* other lights, avoid self-intersection */
- ray->D = ray_offset(ls->P, ls->Ng) - ray->P;
- ray->D = normalize_len(ray->D, &ray->t);
- }
-
- ray->dP = sd->dP;
- ray->dD = differential3_zero();
- }
- else {
- /* signal to not cast shadow ray */
- ray->t = 0.0f;
- }
-
- /* return if it's a lamp for shadow pass */
- *is_lamp = (ls->prim == PRIM_NONE && ls->type != LIGHT_BACKGROUND);
-
- return true;
+ ) {
+ float probability = max3(fabs(bsdf_eval_sum(eval))) *
+ kernel_data.integrator.light_inv_rr_threshold;
+ if (probability < 1.0f) {
+ if (rand_terminate >= probability) {
+ return false;
+ }
+ bsdf_eval_mul(eval, 1.0f / probability);
+ }
+ }
+
+ if (ls->shader & SHADER_CAST_SHADOW) {
+ /* setup ray */
+ bool transmit = (dot(sd->Ng, ls->D) < 0.0f);
+ ray->P = ray_offset(sd->P, (transmit) ? -sd->Ng : sd->Ng);
+
+ if (ls->t == FLT_MAX) {
+ /* distant light */
+ ray->D = ls->D;
+ ray->t = ls->t;
+ }
+ else {
+ /* other lights, avoid self-intersection */
+ ray->D = ray_offset(ls->P, ls->Ng) - ray->P;
+ ray->D = normalize_len(ray->D, &ray->t);
+ }
+
+ ray->dP = sd->dP;
+ ray->dD = differential3_zero();
+ }
+ else {
+ /* signal to not cast shadow ray */
+ ray->t = 0.0f;
+ }
+
+ /* return if it's a lamp for shadow pass */
+ *is_lamp = (ls->prim == PRIM_NONE && ls->type != LIGHT_BACKGROUND);
+
+ return true;
}
/* Indirect Primitive Emission */
-ccl_device_noinline float3 indirect_primitive_emission(KernelGlobals *kg, ShaderData *sd, float t, int path_flag, float bsdf_pdf)
+ccl_device_noinline float3 indirect_primitive_emission(
+ KernelGlobals *kg, ShaderData *sd, float t, int path_flag, float bsdf_pdf)
{
- /* evaluate emissive closure */
- float3 L = shader_emissive_eval(sd);
+ /* evaluate emissive closure */
+ float3 L = shader_emissive_eval(sd);
#ifdef __HAIR__
- if(!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS) && (sd->type & PRIMITIVE_ALL_TRIANGLE))
+ if (!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS) &&
+ (sd->type & PRIMITIVE_ALL_TRIANGLE))
#else
- if(!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS))
+ if (!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS))
#endif
- {
- /* multiple importance sampling, get triangle light pdf,
- * and compute weight with respect to BSDF pdf */
- float pdf = triangle_light_pdf(kg, sd, t);
- float mis_weight = power_heuristic(bsdf_pdf, pdf);
+ {
+ /* multiple importance sampling, get triangle light pdf,
+ * and compute weight with respect to BSDF pdf */
+ float pdf = triangle_light_pdf(kg, sd, t);
+ float mis_weight = power_heuristic(bsdf_pdf, pdf);
- return L*mis_weight;
- }
+ return L * mis_weight;
+ }
- return L;
+ return L;
}
/* Indirect Lamp Emission */
@@ -229,60 +235,55 @@ ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg,
Ray *ray,
float3 *emission)
{
- bool hit_lamp = false;
+ bool hit_lamp = false;
- *emission = make_float3(0.0f, 0.0f, 0.0f);
+ *emission = make_float3(0.0f, 0.0f, 0.0f);
- for(int lamp = 0; lamp < kernel_data.integrator.num_all_lights; lamp++) {
- LightSample ls;
+ for (int lamp = 0; lamp < kernel_data.integrator.num_all_lights; lamp++) {
+ LightSample ls;
- if(!lamp_light_eval(kg, lamp, ray->P, ray->D, ray->t, &ls))
- continue;
+ if (!lamp_light_eval(kg, lamp, ray->P, ray->D, ray->t, &ls))
+ continue;
#ifdef __PASSES__
- /* use visibility flag to skip lights */
- if(ls.shader & SHADER_EXCLUDE_ANY) {
- if(((ls.shader & SHADER_EXCLUDE_DIFFUSE) && (state->flag & PATH_RAY_DIFFUSE)) ||
- ((ls.shader & SHADER_EXCLUDE_GLOSSY) &&
- ((state->flag & (PATH_RAY_GLOSSY|PATH_RAY_REFLECT)) == (PATH_RAY_GLOSSY|PATH_RAY_REFLECT))) ||
- ((ls.shader & SHADER_EXCLUDE_TRANSMIT) && (state->flag & PATH_RAY_TRANSMIT)) ||
- ((ls.shader & SHADER_EXCLUDE_SCATTER) && (state->flag & PATH_RAY_VOLUME_SCATTER)))
- continue;
- }
+ /* use visibility flag to skip lights */
+ if (ls.shader & SHADER_EXCLUDE_ANY) {
+ if (((ls.shader & SHADER_EXCLUDE_DIFFUSE) && (state->flag & PATH_RAY_DIFFUSE)) ||
+ ((ls.shader & SHADER_EXCLUDE_GLOSSY) &&
+ ((state->flag & (PATH_RAY_GLOSSY | PATH_RAY_REFLECT)) ==
+ (PATH_RAY_GLOSSY | PATH_RAY_REFLECT))) ||
+ ((ls.shader & SHADER_EXCLUDE_TRANSMIT) && (state->flag & PATH_RAY_TRANSMIT)) ||
+ ((ls.shader & SHADER_EXCLUDE_SCATTER) && (state->flag & PATH_RAY_VOLUME_SCATTER)))
+ continue;
+ }
#endif
- float3 L = direct_emissive_eval(kg,
- emission_sd,
- &ls,
- state,
- -ray->D,
- ray->dD,
- ls.t,
- ray->time);
+ float3 L = direct_emissive_eval(
+ kg, emission_sd, &ls, state, -ray->D, ray->dD, ls.t, ray->time);
#ifdef __VOLUME__
- if(state->volume_stack[0].shader != SHADER_NONE) {
- /* shadow attenuation */
- Ray volume_ray = *ray;
- volume_ray.t = ls.t;
- float3 volume_tp = make_float3(1.0f, 1.0f, 1.0f);
- kernel_volume_shadow(kg, emission_sd, state, &volume_ray, &volume_tp);
- L *= volume_tp;
- }
+ if (state->volume_stack[0].shader != SHADER_NONE) {
+ /* shadow attenuation */
+ Ray volume_ray = *ray;
+ volume_ray.t = ls.t;
+ float3 volume_tp = make_float3(1.0f, 1.0f, 1.0f);
+ kernel_volume_shadow(kg, emission_sd, state, &volume_ray, &volume_tp);
+ L *= volume_tp;
+ }
#endif
- if(!(state->flag & PATH_RAY_MIS_SKIP)) {
- /* multiple importance sampling, get regular light pdf,
- * and compute weight with respect to BSDF pdf */
- float mis_weight = power_heuristic(state->ray_pdf, ls.pdf);
- L *= mis_weight;
- }
+ if (!(state->flag & PATH_RAY_MIS_SKIP)) {
+ /* multiple importance sampling, get regular light pdf,
+ * and compute weight with respect to BSDF pdf */
+ float mis_weight = power_heuristic(state->ray_pdf, ls.pdf);
+ L *= mis_weight;
+ }
- *emission += L;
- hit_lamp = true;
- }
+ *emission += L;
+ hit_lamp = true;
+ }
- return hit_lamp;
+ return hit_lamp;
}
/* Indirect Background */
@@ -293,55 +294,55 @@ ccl_device_noinline float3 indirect_background(KernelGlobals *kg,
ccl_addr_space Ray *ray)
{
#ifdef __BACKGROUND__
- int shader = kernel_data.background.surface_shader;
-
- /* Use visibility flag to skip lights. */
- if(shader & SHADER_EXCLUDE_ANY) {
- if(((shader & SHADER_EXCLUDE_DIFFUSE) && (state->flag & PATH_RAY_DIFFUSE)) ||
- ((shader & SHADER_EXCLUDE_GLOSSY) &&
- ((state->flag & (PATH_RAY_GLOSSY|PATH_RAY_REFLECT)) == (PATH_RAY_GLOSSY|PATH_RAY_REFLECT))) ||
- ((shader & SHADER_EXCLUDE_TRANSMIT) && (state->flag & PATH_RAY_TRANSMIT)) ||
- ((shader & SHADER_EXCLUDE_CAMERA) && (state->flag & PATH_RAY_CAMERA)) ||
- ((shader & SHADER_EXCLUDE_SCATTER) && (state->flag & PATH_RAY_VOLUME_SCATTER)))
- return make_float3(0.0f, 0.0f, 0.0f);
- }
-
-
- /* Evaluate background shader. */
- float3 L;
- if(!shader_constant_emission_eval(kg, shader, &L)) {
+ int shader = kernel_data.background.surface_shader;
+
+ /* Use visibility flag to skip lights. */
+ if (shader & SHADER_EXCLUDE_ANY) {
+ if (((shader & SHADER_EXCLUDE_DIFFUSE) && (state->flag & PATH_RAY_DIFFUSE)) ||
+ ((shader & SHADER_EXCLUDE_GLOSSY) &&
+ ((state->flag & (PATH_RAY_GLOSSY | PATH_RAY_REFLECT)) ==
+ (PATH_RAY_GLOSSY | PATH_RAY_REFLECT))) ||
+ ((shader & SHADER_EXCLUDE_TRANSMIT) && (state->flag & PATH_RAY_TRANSMIT)) ||
+ ((shader & SHADER_EXCLUDE_CAMERA) && (state->flag & PATH_RAY_CAMERA)) ||
+ ((shader & SHADER_EXCLUDE_SCATTER) && (state->flag & PATH_RAY_VOLUME_SCATTER)))
+ return make_float3(0.0f, 0.0f, 0.0f);
+ }
+
+ /* Evaluate background shader. */
+ float3 L;
+ if (!shader_constant_emission_eval(kg, shader, &L)) {
# ifdef __SPLIT_KERNEL__
- Ray priv_ray = *ray;
- shader_setup_from_background(kg, emission_sd, &priv_ray);
+ Ray priv_ray = *ray;
+ shader_setup_from_background(kg, emission_sd, &priv_ray);
# else
- shader_setup_from_background(kg, emission_sd, ray);
+ shader_setup_from_background(kg, emission_sd, ray);
# endif
- path_state_modify_bounce(state, true);
- shader_eval_surface(kg, emission_sd, state, state->flag | PATH_RAY_EMISSION);
- path_state_modify_bounce(state, false);
+ path_state_modify_bounce(state, true);
+ shader_eval_surface(kg, emission_sd, state, state->flag | PATH_RAY_EMISSION);
+ path_state_modify_bounce(state, false);
- L = shader_background_eval(emission_sd);
- }
+ L = shader_background_eval(emission_sd);
+ }
- /* Background MIS weights. */
-#ifdef __BACKGROUND_MIS__
- /* Check if background light exists or if we should skip pdf. */
- int res_x = kernel_data.integrator.pdf_background_res_x;
+ /* Background MIS weights. */
+# ifdef __BACKGROUND_MIS__
+ /* Check if background light exists or if we should skip pdf. */
+ int res_x = kernel_data.integrator.pdf_background_res_x;
- if(!(state->flag & PATH_RAY_MIS_SKIP) && res_x) {
- /* multiple importance sampling, get background light pdf for ray
- * direction, and compute weight with respect to BSDF pdf */
- float pdf = background_light_pdf(kg, ray->P, ray->D);
- float mis_weight = power_heuristic(state->ray_pdf, pdf);
+ if (!(state->flag & PATH_RAY_MIS_SKIP) && res_x) {
+ /* multiple importance sampling, get background light pdf for ray
+ * direction, and compute weight with respect to BSDF pdf */
+ float pdf = background_light_pdf(kg, ray->P, ray->D);
+ float mis_weight = power_heuristic(state->ray_pdf, pdf);
- return L*mis_weight;
- }
-#endif
+ return L * mis_weight;
+ }
+# endif
- return L;
+ return L;
#else
- return make_float3(0.8f, 0.8f, 0.8f);
+ return make_float3(0.8f, 0.8f, 0.8f);
#endif
}
diff --git a/intern/cycles/kernel/kernel_film.h b/intern/cycles/kernel/kernel_film.h
index b5f151d8663..d20f1adf663 100644
--- a/intern/cycles/kernel/kernel_film.h
+++ b/intern/cycles/kernel/kernel_film.h
@@ -18,72 +18,82 @@ CCL_NAMESPACE_BEGIN
ccl_device float4 film_map(KernelGlobals *kg, float4 irradiance, float scale)
{
- float exposure = kernel_data.film.exposure;
- float4 result = irradiance*scale;
+ float exposure = kernel_data.film.exposure;
+ float4 result = irradiance * scale;
- /* conversion to srgb */
- result.x = color_linear_to_srgb(result.x*exposure);
- result.y = color_linear_to_srgb(result.y*exposure);
- result.z = color_linear_to_srgb(result.z*exposure);
+ /* conversion to srgb */
+ result.x = color_linear_to_srgb(result.x * exposure);
+ result.y = color_linear_to_srgb(result.y * exposure);
+ result.z = color_linear_to_srgb(result.z * exposure);
- /* clamp since alpha might be > 1.0 due to russian roulette */
- result.w = saturate(result.w);
+ /* clamp since alpha might be > 1.0 due to russian roulette */
+ result.w = saturate(result.w);
- return result;
+ return result;
}
ccl_device uchar4 film_float_to_byte(float4 color)
{
- uchar4 result;
+ uchar4 result;
- /* simple float to byte conversion */
- result.x = (uchar)(saturate(color.x)*255.0f);
- result.y = (uchar)(saturate(color.y)*255.0f);
- result.z = (uchar)(saturate(color.z)*255.0f);
- result.w = (uchar)(saturate(color.w)*255.0f);
+ /* simple float to byte conversion */
+ result.x = (uchar)(saturate(color.x) * 255.0f);
+ result.y = (uchar)(saturate(color.y) * 255.0f);
+ result.z = (uchar)(saturate(color.z) * 255.0f);
+ result.w = (uchar)(saturate(color.w) * 255.0f);
- return result;
+ return result;
}
ccl_device void kernel_film_convert_to_byte(KernelGlobals *kg,
- ccl_global uchar4 *rgba, ccl_global float *buffer,
- float sample_scale, int x, int y, int offset, int stride)
+ ccl_global uchar4 *rgba,
+ ccl_global float *buffer,
+ float sample_scale,
+ int x,
+ int y,
+ int offset,
+ int stride)
{
- /* buffer offset */
- int index = offset + x + y*stride;
+ /* buffer offset */
+ int index = offset + x + y * stride;
- rgba += index;
- buffer += index*kernel_data.film.pass_stride;
+ rgba += index;
+ buffer += index * kernel_data.film.pass_stride;
- /* map colors */
- float4 irradiance = *((ccl_global float4*)buffer);
- float4 float_result = film_map(kg, irradiance, sample_scale);
- uchar4 byte_result = film_float_to_byte(float_result);
+ /* map colors */
+ float4 irradiance = *((ccl_global float4 *)buffer);
+ float4 float_result = film_map(kg, irradiance, sample_scale);
+ uchar4 byte_result = film_float_to_byte(float_result);
- *rgba = byte_result;
+ *rgba = byte_result;
}
ccl_device void kernel_film_convert_to_half_float(KernelGlobals *kg,
- ccl_global uchar4 *rgba, ccl_global float *buffer,
- float sample_scale, int x, int y, int offset, int stride)
+ ccl_global uchar4 *rgba,
+ ccl_global float *buffer,
+ float sample_scale,
+ int x,
+ int y,
+ int offset,
+ int stride)
{
- /* buffer offset */
- int index = offset + x + y*stride;
+ /* buffer offset */
+ int index = offset + x + y * stride;
- ccl_global float4 *in = (ccl_global float4*)(buffer + index*kernel_data.film.pass_stride);
- ccl_global half *out = (ccl_global half*)rgba + index*4;
+ ccl_global float4 *in = (ccl_global float4 *)(buffer + index * kernel_data.film.pass_stride);
+ ccl_global half *out = (ccl_global half *)rgba + index * 4;
- float exposure = kernel_data.film.exposure;
+ float exposure = kernel_data.film.exposure;
- float4 rgba_in = *in;
+ float4 rgba_in = *in;
- if(exposure != 1.0f) {
- rgba_in.x *= exposure;
- rgba_in.y *= exposure;
- rgba_in.z *= exposure;
- }
+ if (exposure != 1.0f) {
+ rgba_in.x *= exposure;
+ rgba_in.y *= exposure;
+ rgba_in.z *= exposure;
+ }
- float4_store_half(out, rgba_in, sample_scale);
+ float4_store_half(out, rgba_in, sample_scale);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_globals.h b/intern/cycles/kernel/kernel_globals.h
index 59f1e252d21..9dbf3b7ea2e 100644
--- a/intern/cycles/kernel/kernel_globals.h
+++ b/intern/cycles/kernel/kernel_globals.h
@@ -54,41 +54,41 @@ typedef struct KernelGlobals {
# define KERNEL_TEX(type, name) texture<type> name;
# include "kernel/kernel_textures.h"
- KernelData __data;
+ KernelData __data;
# ifdef __OSL__
- /* On the CPU, we also have the OSL globals here. Most data structures are shared
- * with SVM, the difference is in the shaders and object/mesh attributes. */
- OSLGlobals *osl;
- OSLShadingSystem *osl_ss;
- OSLThreadData *osl_tdata;
+ /* On the CPU, we also have the OSL globals here. Most data structures are shared
+ * with SVM, the difference is in the shaders and object/mesh attributes. */
+ OSLGlobals *osl;
+ OSLShadingSystem *osl_ss;
+ OSLThreadData *osl_tdata;
# endif
- /* **** Run-time data **** */
+ /* **** Run-time data **** */
- /* Heap-allocated storage for transparent shadows intersections. */
- Intersection *transparent_shadow_intersections;
+ /* Heap-allocated storage for transparent shadows intersections. */
+ Intersection *transparent_shadow_intersections;
- /* Storage for decoupled volume steps. */
- VolumeStep *decoupled_volume_steps[2];
- int decoupled_volume_steps_index;
+ /* Storage for decoupled volume steps. */
+ VolumeStep *decoupled_volume_steps[2];
+ int decoupled_volume_steps_index;
- /* A buffer for storing per-pixel coverage for Cryptomatte. */
- CoverageMap *coverage_object;
- CoverageMap *coverage_material;
- CoverageMap *coverage_asset;
+ /* A buffer for storing per-pixel coverage for Cryptomatte. */
+ CoverageMap *coverage_object;
+ CoverageMap *coverage_material;
+ CoverageMap *coverage_asset;
- /* split kernel */
- SplitData split_data;
- SplitParams split_param_data;
+ /* split kernel */
+ SplitData split_data;
+ SplitParams split_param_data;
- int2 global_size;
- int2 global_id;
+ int2 global_size;
+ int2 global_id;
- ProfilingState profiler;
+ ProfilingState profiler;
} KernelGlobals;
-#endif /* __KERNEL_CPU__ */
+#endif /* __KERNEL_CPU__ */
/* For CUDA, constant memory textures must be globals, so we can't put them
* into a struct. As a result we don't actually use this struct and use actual
@@ -99,124 +99,117 @@ typedef struct KernelGlobals {
__constant__ KernelData __data;
typedef struct KernelGlobals {
- /* NOTE: Keep the size in sync with SHADOW_STACK_MAX_HITS. */
- Intersection hits_stack[64];
+ /* NOTE: Keep the size in sync with SHADOW_STACK_MAX_HITS. */
+ Intersection hits_stack[64];
} KernelGlobals;
# define KERNEL_TEX(type, name) const __constant__ __device__ type *name;
# include "kernel/kernel_textures.h"
-#endif /* __KERNEL_CUDA__ */
+#endif /* __KERNEL_CUDA__ */
/* OpenCL */
#ifdef __KERNEL_OPENCL__
-# define KERNEL_TEX(type, name) \
-typedef type name##_t;
+# define KERNEL_TEX(type, name) typedef type name##_t;
# include "kernel/kernel_textures.h"
typedef ccl_addr_space struct KernelGlobals {
- ccl_constant KernelData *data;
- ccl_global char *buffers[8];
+ ccl_constant KernelData *data;
+ ccl_global char *buffers[8];
-# define KERNEL_TEX(type, name) \
- TextureInfo name;
+# define KERNEL_TEX(type, name) TextureInfo name;
# include "kernel/kernel_textures.h"
# ifdef __SPLIT_KERNEL__
- SplitData split_data;
- SplitParams split_param_data;
+ SplitData split_data;
+ SplitParams split_param_data;
# endif
} KernelGlobals;
-#define KERNEL_BUFFER_PARAMS \
- ccl_global char *buffer0, \
- ccl_global char *buffer1, \
- ccl_global char *buffer2, \
- ccl_global char *buffer3, \
- ccl_global char *buffer4, \
- ccl_global char *buffer5, \
- ccl_global char *buffer6, \
- ccl_global char *buffer7
+# define KERNEL_BUFFER_PARAMS \
+ ccl_global char *buffer0, ccl_global char *buffer1, ccl_global char *buffer2, \
+ ccl_global char *buffer3, ccl_global char *buffer4, ccl_global char *buffer5, \
+ ccl_global char *buffer6, ccl_global char *buffer7
-#define KERNEL_BUFFER_ARGS buffer0, buffer1, buffer2, buffer3, buffer4, buffer5, buffer6, buffer7
+# define KERNEL_BUFFER_ARGS buffer0, buffer1, buffer2, buffer3, buffer4, buffer5, buffer6, buffer7
ccl_device_inline void kernel_set_buffer_pointers(KernelGlobals *kg, KERNEL_BUFFER_PARAMS)
{
-#ifdef __SPLIT_KERNEL__
- if(ccl_local_id(0) + ccl_local_id(1) == 0)
-#endif
- {
- kg->buffers[0] = buffer0;
- kg->buffers[1] = buffer1;
- kg->buffers[2] = buffer2;
- kg->buffers[3] = buffer3;
- kg->buffers[4] = buffer4;
- kg->buffers[5] = buffer5;
- kg->buffers[6] = buffer6;
- kg->buffers[7] = buffer7;
- }
+# ifdef __SPLIT_KERNEL__
+ if (ccl_local_id(0) + ccl_local_id(1) == 0)
+# endif
+ {
+ kg->buffers[0] = buffer0;
+ kg->buffers[1] = buffer1;
+ kg->buffers[2] = buffer2;
+ kg->buffers[3] = buffer3;
+ kg->buffers[4] = buffer4;
+ kg->buffers[5] = buffer5;
+ kg->buffers[6] = buffer6;
+ kg->buffers[7] = buffer7;
+ }
# ifdef __SPLIT_KERNEL__
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
# endif
}
ccl_device_inline void kernel_set_buffer_info(KernelGlobals *kg)
{
# ifdef __SPLIT_KERNEL__
- if(ccl_local_id(0) + ccl_local_id(1) == 0)
+ if (ccl_local_id(0) + ccl_local_id(1) == 0)
# endif
- {
- ccl_global TextureInfo *info = (ccl_global TextureInfo*)kg->buffers[0];
+ {
+ ccl_global TextureInfo *info = (ccl_global TextureInfo *)kg->buffers[0];
-# define KERNEL_TEX(type, name) \
- kg->name = *(info++);
+# define KERNEL_TEX(type, name) kg->name = *(info++);
# include "kernel/kernel_textures.h"
- }
+ }
# ifdef __SPLIT_KERNEL__
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
# endif
}
-#endif /* __KERNEL_OPENCL__ */
+#endif /* __KERNEL_OPENCL__ */
/* Interpolated lookup table access */
ccl_device float lookup_table_read(KernelGlobals *kg, float x, int offset, int size)
{
- x = saturate(x)*(size-1);
+ x = saturate(x) * (size - 1);
- int index = min(float_to_int(x), size-1);
- int nindex = min(index+1, size-1);
- float t = x - index;
+ int index = min(float_to_int(x), size - 1);
+ int nindex = min(index + 1, size - 1);
+ float t = x - index;
- float data0 = kernel_tex_fetch(__lookup_table, index + offset);
- if(t == 0.0f)
- return data0;
+ float data0 = kernel_tex_fetch(__lookup_table, index + offset);
+ if (t == 0.0f)
+ return data0;
- float data1 = kernel_tex_fetch(__lookup_table, nindex + offset);
- return (1.0f - t)*data0 + t*data1;
+ float data1 = kernel_tex_fetch(__lookup_table, nindex + offset);
+ return (1.0f - t) * data0 + t * data1;
}
-ccl_device float lookup_table_read_2D(KernelGlobals *kg, float x, float y, int offset, int xsize, int ysize)
+ccl_device float lookup_table_read_2D(
+ KernelGlobals *kg, float x, float y, int offset, int xsize, int ysize)
{
- y = saturate(y)*(ysize-1);
+ y = saturate(y) * (ysize - 1);
- int index = min(float_to_int(y), ysize-1);
- int nindex = min(index+1, ysize-1);
- float t = y - index;
+ int index = min(float_to_int(y), ysize - 1);
+ int nindex = min(index + 1, ysize - 1);
+ float t = y - index;
- float data0 = lookup_table_read(kg, x, offset + xsize*index, xsize);
- if(t == 0.0f)
- return data0;
+ float data0 = lookup_table_read(kg, x, offset + xsize * index, xsize);
+ if (t == 0.0f)
+ return data0;
- float data1 = lookup_table_read(kg, x, offset + xsize*nindex, xsize);
- return (1.0f - t)*data0 + t*data1;
+ float data1 = lookup_table_read(kg, x, offset + xsize * nindex, xsize);
+ return (1.0f - t) * data0 + t * data1;
}
CCL_NAMESPACE_END
-#endif /* __KERNEL_GLOBALS_H__ */
+#endif /* __KERNEL_GLOBALS_H__ */
diff --git a/intern/cycles/kernel/kernel_id_passes.h b/intern/cycles/kernel/kernel_id_passes.h
index 0cd65b1f2e8..c1f4e39e5e7 100644
--- a/intern/cycles/kernel/kernel_id_passes.h
+++ b/intern/cycles/kernel/kernel_id_passes.h
@@ -16,78 +16,83 @@
CCL_NAMESPACE_BEGIN
-ccl_device_inline void kernel_write_id_slots(ccl_global float *buffer, int num_slots, float id, float weight)
+ccl_device_inline void kernel_write_id_slots(ccl_global float *buffer,
+ int num_slots,
+ float id,
+ float weight)
{
- kernel_assert(id != ID_NONE);
- if(weight == 0.0f) {
- return;
- }
+ kernel_assert(id != ID_NONE);
+ if (weight == 0.0f) {
+ return;
+ }
- for(int slot = 0; slot < num_slots; slot++) {
- ccl_global float2 *id_buffer = (ccl_global float2*)buffer;
+ for (int slot = 0; slot < num_slots; slot++) {
+ ccl_global float2 *id_buffer = (ccl_global float2 *)buffer;
#ifdef __ATOMIC_PASS_WRITE__
- /* If the loop reaches an empty slot, the ID isn't in any slot yet - so add it! */
- if(id_buffer[slot].x == ID_NONE) {
- /* Use an atomic to claim this slot.
- * If a different thread got here first, try again from this slot on. */
- float old_id = atomic_compare_and_swap_float(buffer+slot*2, ID_NONE, id);
- if(old_id != ID_NONE && old_id != id) {
- continue;
- }
- atomic_add_and_fetch_float(buffer+slot*2+1, weight);
- break;
- }
- /* If there already is a slot for that ID, add the weight.
- * If no slot was found, add it to the last. */
- else if(id_buffer[slot].x == id || slot == num_slots - 1) {
- atomic_add_and_fetch_float(buffer+slot*2+1, weight);
- break;
- }
+ /* If the loop reaches an empty slot, the ID isn't in any slot yet - so add it! */
+ if (id_buffer[slot].x == ID_NONE) {
+ /* Use an atomic to claim this slot.
+ * If a different thread got here first, try again from this slot on. */
+ float old_id = atomic_compare_and_swap_float(buffer + slot * 2, ID_NONE, id);
+ if (old_id != ID_NONE && old_id != id) {
+ continue;
+ }
+ atomic_add_and_fetch_float(buffer + slot * 2 + 1, weight);
+ break;
+ }
+ /* If there already is a slot for that ID, add the weight.
+ * If no slot was found, add it to the last. */
+ else if (id_buffer[slot].x == id || slot == num_slots - 1) {
+ atomic_add_and_fetch_float(buffer + slot * 2 + 1, weight);
+ break;
+ }
#else /* __ATOMIC_PASS_WRITE__ */
- /* If the loop reaches an empty slot, the ID isn't in any slot yet - so add it! */
- if(id_buffer[slot].x == ID_NONE) {
- id_buffer[slot].x = id;
- id_buffer[slot].y = weight;
- break;
- }
- /* If there already is a slot for that ID, add the weight.
- * If no slot was found, add it to the last. */
- else if(id_buffer[slot].x == id || slot == num_slots - 1) {
- id_buffer[slot].y += weight;
- break;
- }
-#endif /* __ATOMIC_PASS_WRITE__ */
- }
+ /* If the loop reaches an empty slot, the ID isn't in any slot yet - so add it! */
+ if (id_buffer[slot].x == ID_NONE) {
+ id_buffer[slot].x = id;
+ id_buffer[slot].y = weight;
+ break;
+ }
+ /* If there already is a slot for that ID, add the weight.
+ * If no slot was found, add it to the last. */
+ else if (id_buffer[slot].x == id || slot == num_slots - 1) {
+ id_buffer[slot].y += weight;
+ break;
+ }
+#endif /* __ATOMIC_PASS_WRITE__ */
+ }
}
ccl_device_inline void kernel_sort_id_slots(ccl_global float *buffer, int num_slots)
{
- ccl_global float2 *id_buffer = (ccl_global float2*)buffer;
- for(int slot = 1; slot < num_slots; ++slot) {
- if(id_buffer[slot].x == ID_NONE) {
- return;
- }
- /* Since we're dealing with a tiny number of elements, insertion sort should be fine. */
- int i = slot;
- while(i > 0 && id_buffer[i].y > id_buffer[i - 1].y) {
- float2 swap = id_buffer[i];
- id_buffer[i] = id_buffer[i - 1];
- id_buffer[i - 1] = swap;
- --i;
- }
- }
+ ccl_global float2 *id_buffer = (ccl_global float2 *)buffer;
+ for (int slot = 1; slot < num_slots; ++slot) {
+ if (id_buffer[slot].x == ID_NONE) {
+ return;
+ }
+ /* Since we're dealing with a tiny number of elements, insertion sort should be fine. */
+ int i = slot;
+ while (i > 0 && id_buffer[i].y > id_buffer[i - 1].y) {
+ float2 swap = id_buffer[i];
+ id_buffer[i] = id_buffer[i - 1];
+ id_buffer[i - 1] = swap;
+ --i;
+ }
+ }
}
#ifdef __KERNEL_GPU__
/* post-sorting for Cryptomatte */
-ccl_device void kernel_cryptomatte_post(KernelGlobals *kg, ccl_global float *buffer, uint sample, int x, int y, int offset, int stride)
+ccl_device void kernel_cryptomatte_post(
+ KernelGlobals *kg, ccl_global float *buffer, uint sample, int x, int y, int offset, int stride)
{
- if(sample - 1 == kernel_data.integrator.aa_samples) {
- int index = offset + x + y * stride;
- int pass_stride = kernel_data.film.pass_stride;
- ccl_global float *cryptomatte_buffer = buffer + index * pass_stride + kernel_data.film.pass_cryptomatte;
- kernel_sort_id_slots(cryptomatte_buffer, 2 * kernel_data.film.cryptomatte_depth);
- }
+ if (sample - 1 == kernel_data.integrator.aa_samples) {
+ int index = offset + x + y * stride;
+ int pass_stride = kernel_data.film.pass_stride;
+ ccl_global float *cryptomatte_buffer = buffer + index * pass_stride +
+ kernel_data.film.pass_cryptomatte;
+ kernel_sort_id_slots(cryptomatte_buffer, 2 * kernel_data.film.cryptomatte_depth);
+ }
}
#endif
diff --git a/intern/cycles/kernel/kernel_jitter.h b/intern/cycles/kernel/kernel_jitter.h
index 3bde96b078c..f7270a14940 100644
--- a/intern/cycles/kernel/kernel_jitter.h
+++ b/intern/cycles/kernel/kernel_jitter.h
@@ -26,202 +26,202 @@ CCL_NAMESPACE_BEGIN
ccl_device_inline bool cmj_is_pow2(int i)
{
- return (i > 1) && ((i & (i - 1)) == 0);
+ return (i > 1) && ((i & (i - 1)) == 0);
}
ccl_device_inline int cmj_fast_mod_pow2(int a, int b)
{
- return (a & (b - 1));
+ return (a & (b - 1));
}
/* b must be > 1 */
ccl_device_inline int cmj_fast_div_pow2(int a, int b)
{
- kernel_assert(b > 1);
+ kernel_assert(b > 1);
#if defined(__KERNEL_SSE2__)
# ifdef _MSC_VER
- unsigned long ctz;
- _BitScanForward(&ctz, b);
- return a >> ctz;
+ unsigned long ctz;
+ _BitScanForward(&ctz, b);
+ return a >> ctz;
# else
- return a >> __builtin_ctz(b);
+ return a >> __builtin_ctz(b);
# endif
#elif defined(__KERNEL_CUDA__)
- return a >> (__ffs(b) - 1);
+ return a >> (__ffs(b) - 1);
#else
- return a/b;
+ return a / b;
#endif
}
ccl_device_inline uint cmj_w_mask(uint w)
{
- kernel_assert(w > 1);
+ kernel_assert(w > 1);
#if defined(__KERNEL_SSE2__)
# ifdef _MSC_VER
- unsigned long leading_zero;
- _BitScanReverse(&leading_zero, w);
- return ((1 << (1 + leading_zero)) - 1);
+ unsigned long leading_zero;
+ _BitScanReverse(&leading_zero, w);
+ return ((1 << (1 + leading_zero)) - 1);
# else
- return ((1 << (32 - __builtin_clz(w))) - 1);
+ return ((1 << (32 - __builtin_clz(w))) - 1);
# endif
#elif defined(__KERNEL_CUDA__)
- return ((1 << (32 - __clz(w))) - 1);
+ return ((1 << (32 - __clz(w))) - 1);
#else
- w |= w >> 1;
- w |= w >> 2;
- w |= w >> 4;
- w |= w >> 8;
- w |= w >> 16;
+ w |= w >> 1;
+ w |= w >> 2;
+ w |= w >> 4;
+ w |= w >> 8;
+ w |= w >> 16;
- return w;
+ return w;
#endif
}
ccl_device_inline uint cmj_permute(uint i, uint l, uint p)
{
- uint w = l - 1;
-
- if((l & w) == 0) {
- /* l is a power of two (fast) */
- i ^= p;
- i *= 0xe170893d;
- i ^= p >> 16;
- i ^= (i & w) >> 4;
- i ^= p >> 8;
- i *= 0x0929eb3f;
- i ^= p >> 23;
- i ^= (i & w) >> 1;
- i *= 1 | p >> 27;
- i *= 0x6935fa69;
- i ^= (i & w) >> 11;
- i *= 0x74dcb303;
- i ^= (i & w) >> 2;
- i *= 0x9e501cc3;
- i ^= (i & w) >> 2;
- i *= 0xc860a3df;
- i &= w;
- i ^= i >> 5;
-
- return (i + p) & w;
- }
- else {
- /* l is not a power of two (slow) */
- w = cmj_w_mask(w);
-
- do {
- i ^= p;
- i *= 0xe170893d;
- i ^= p >> 16;
- i ^= (i & w) >> 4;
- i ^= p >> 8;
- i *= 0x0929eb3f;
- i ^= p >> 23;
- i ^= (i & w) >> 1;
- i *= 1 | p >> 27;
- i *= 0x6935fa69;
- i ^= (i & w) >> 11;
- i *= 0x74dcb303;
- i ^= (i & w) >> 2;
- i *= 0x9e501cc3;
- i ^= (i & w) >> 2;
- i *= 0xc860a3df;
- i &= w;
- i ^= i >> 5;
- } while(i >= l);
-
- return (i + p) % l;
- }
+ uint w = l - 1;
+
+ if ((l & w) == 0) {
+ /* l is a power of two (fast) */
+ i ^= p;
+ i *= 0xe170893d;
+ i ^= p >> 16;
+ i ^= (i & w) >> 4;
+ i ^= p >> 8;
+ i *= 0x0929eb3f;
+ i ^= p >> 23;
+ i ^= (i & w) >> 1;
+ i *= 1 | p >> 27;
+ i *= 0x6935fa69;
+ i ^= (i & w) >> 11;
+ i *= 0x74dcb303;
+ i ^= (i & w) >> 2;
+ i *= 0x9e501cc3;
+ i ^= (i & w) >> 2;
+ i *= 0xc860a3df;
+ i &= w;
+ i ^= i >> 5;
+
+ return (i + p) & w;
+ }
+ else {
+ /* l is not a power of two (slow) */
+ w = cmj_w_mask(w);
+
+ do {
+ i ^= p;
+ i *= 0xe170893d;
+ i ^= p >> 16;
+ i ^= (i & w) >> 4;
+ i ^= p >> 8;
+ i *= 0x0929eb3f;
+ i ^= p >> 23;
+ i ^= (i & w) >> 1;
+ i *= 1 | p >> 27;
+ i *= 0x6935fa69;
+ i ^= (i & w) >> 11;
+ i *= 0x74dcb303;
+ i ^= (i & w) >> 2;
+ i *= 0x9e501cc3;
+ i ^= (i & w) >> 2;
+ i *= 0xc860a3df;
+ i &= w;
+ i ^= i >> 5;
+ } while (i >= l);
+
+ return (i + p) % l;
+ }
}
ccl_device_inline uint cmj_hash(uint i, uint p)
{
- i ^= p;
- i ^= i >> 17;
- i ^= i >> 10;
- i *= 0xb36534e5;
- i ^= i >> 12;
- i ^= i >> 21;
- i *= 0x93fc4795;
- i ^= 0xdf6e307f;
- i ^= i >> 17;
- i *= 1 | p >> 18;
-
- return i;
+ i ^= p;
+ i ^= i >> 17;
+ i ^= i >> 10;
+ i *= 0xb36534e5;
+ i ^= i >> 12;
+ i ^= i >> 21;
+ i *= 0x93fc4795;
+ i ^= 0xdf6e307f;
+ i ^= i >> 17;
+ i *= 1 | p >> 18;
+
+ return i;
}
ccl_device_inline uint cmj_hash_simple(uint i, uint p)
{
- i = (i ^ 61) ^ p;
- i += i << 3;
- i ^= i >> 4;
- i *= 0x27d4eb2d;
- return i;
+ i = (i ^ 61) ^ p;
+ i += i << 3;
+ i ^= i >> 4;
+ i *= 0x27d4eb2d;
+ return i;
}
ccl_device_inline float cmj_randfloat(uint i, uint p)
{
- return cmj_hash(i, p) * (1.0f / 4294967808.0f);
+ return cmj_hash(i, p) * (1.0f / 4294967808.0f);
}
#ifdef __CMJ__
ccl_device float cmj_sample_1D(int s, int N, int p)
{
- kernel_assert(s < N);
+ kernel_assert(s < N);
- uint x = cmj_permute(s, N, p * 0x68bc21eb);
- float jx = cmj_randfloat(s, p * 0x967a889b);
+ uint x = cmj_permute(s, N, p * 0x68bc21eb);
+ float jx = cmj_randfloat(s, p * 0x967a889b);
- float invN = 1.0f/N;
- return (x + jx)*invN;
+ float invN = 1.0f / N;
+ return (x + jx) * invN;
}
/* TODO(sergey): Do some extra tests and consider moving to util_math.h. */
ccl_device_inline int cmj_isqrt(int value)
{
-#if defined(__KERNEL_CUDA__)
- return float_to_int(__fsqrt_ru(value));
-#elif defined(__KERNEL_GPU__)
- return float_to_int(sqrtf(value));
-#else
- /* This is a work around for fast-math on CPU which might replace sqrtf()
- * with am approximated version.
- */
- return float_to_int(sqrtf(value) + 1e-6f);
-#endif
+# if defined(__KERNEL_CUDA__)
+ return float_to_int(__fsqrt_ru(value));
+# elif defined(__KERNEL_GPU__)
+ return float_to_int(sqrtf(value));
+# else
+ /* This is a work around for fast-math on CPU which might replace sqrtf()
+ * with am approximated version.
+ */
+ return float_to_int(sqrtf(value) + 1e-6f);
+# endif
}
ccl_device void cmj_sample_2D(int s, int N, int p, float *fx, float *fy)
{
- kernel_assert(s < N);
+ kernel_assert(s < N);
- int m = cmj_isqrt(N);
- int n = (N - 1)/m + 1;
- float invN = 1.0f/N;
- float invm = 1.0f/m;
- float invn = 1.0f/n;
+ int m = cmj_isqrt(N);
+ int n = (N - 1) / m + 1;
+ float invN = 1.0f / N;
+ float invm = 1.0f / m;
+ float invn = 1.0f / n;
- s = cmj_permute(s, N, p * 0x51633e2d);
+ s = cmj_permute(s, N, p * 0x51633e2d);
- int sdivm, smodm;
+ int sdivm, smodm;
- if(cmj_is_pow2(m)) {
- sdivm = cmj_fast_div_pow2(s, m);
- smodm = cmj_fast_mod_pow2(s, m);
- }
- else {
- /* Doing s*inmv gives precision issues here. */
- sdivm = s / m;
- smodm = s - sdivm*m;
- }
+ if (cmj_is_pow2(m)) {
+ sdivm = cmj_fast_div_pow2(s, m);
+ smodm = cmj_fast_mod_pow2(s, m);
+ }
+ else {
+ /* Doing s*inmv gives precision issues here. */
+ sdivm = s / m;
+ smodm = s - sdivm * m;
+ }
- uint sx = cmj_permute(smodm, m, p * 0x68bc21eb);
- uint sy = cmj_permute(sdivm, n, p * 0x02e5be93);
+ uint sx = cmj_permute(smodm, m, p * 0x68bc21eb);
+ uint sy = cmj_permute(sdivm, n, p * 0x02e5be93);
- float jx = cmj_randfloat(s, p * 0x967a889b);
- float jy = cmj_randfloat(s, p * 0x368cc8b7);
+ float jx = cmj_randfloat(s, p * 0x967a889b);
+ float jy = cmj_randfloat(s, p * 0x368cc8b7);
- *fx = (sx + (sy + jx)*invn)*invm;
- *fy = (s + jy)*invN;
+ *fx = (sx + (sy + jx) * invn) * invm;
+ *fy = (s + jy) * invN;
}
#endif
diff --git a/intern/cycles/kernel/kernel_light.h b/intern/cycles/kernel/kernel_light.h
index 262d7df1364..5e24f8dedaf 100644
--- a/intern/cycles/kernel/kernel_light.h
+++ b/intern/cycles/kernel/kernel_light.h
@@ -19,18 +19,18 @@ CCL_NAMESPACE_BEGIN
/* Light Sample result */
typedef struct LightSample {
- float3 P; /* position on light, or direction for distant light */
- float3 Ng; /* normal on light */
- float3 D; /* direction from shading point to light */
- float t; /* distance to light (FLT_MAX for distant light) */
- float u, v; /* parametric coordinate on primitive */
- float pdf; /* light sampling probability density function */
- float eval_fac; /* intensity multiplier */
- int object; /* object id for triangle/curve lights */
- int prim; /* primitive id for triangle/curve lights */
- int shader; /* shader id */
- int lamp; /* lamp id */
- LightType type; /* type of light */
+ float3 P; /* position on light, or direction for distant light */
+ float3 Ng; /* normal on light */
+ float3 D; /* direction from shading point to light */
+ float t; /* distance to light (FLT_MAX for distant light) */
+ float u, v; /* parametric coordinate on primitive */
+ float pdf; /* light sampling probability density function */
+ float eval_fac; /* intensity multiplier */
+ int object; /* object id for triangle/curve lights */
+ int prim; /* primitive id for triangle/curve lights */
+ int shader; /* shader id */
+ int lamp; /* lamp id */
+ LightType type; /* type of light */
} LightSample;
/* Area light sampling */
@@ -46,130 +46,136 @@ typedef struct LightSample {
*/
ccl_device_inline float rect_light_sample(float3 P,
float3 *light_p,
- float3 axisu, float3 axisv,
- float randu, float randv,
+ float3 axisu,
+ float3 axisv,
+ float randu,
+ float randv,
bool sample_coord)
{
- /* In our name system we're using P for the center,
- * which is o in the paper.
- */
-
- float3 corner = *light_p - axisu * 0.5f - axisv * 0.5f;
- float axisu_len, axisv_len;
- /* Compute local reference system R. */
- float3 x = normalize_len(axisu, &axisu_len);
- float3 y = normalize_len(axisv, &axisv_len);
- float3 z = cross(x, y);
- /* Compute rectangle coords in local reference system. */
- float3 dir = corner - P;
- float z0 = dot(dir, z);
- /* Flip 'z' to make it point against Q. */
- if(z0 > 0.0f) {
- z *= -1.0f;
- z0 *= -1.0f;
- }
- float x0 = dot(dir, x);
- float y0 = dot(dir, y);
- float x1 = x0 + axisu_len;
- float y1 = y0 + axisv_len;
- /* Compute internal angles (gamma_i). */
- float4 diff = make_float4(x0, y1, x1, y0) - make_float4(x1, y0, x0, y1);
- float4 nz = make_float4(y0, x1, y1, x0) * diff;
- nz = nz / sqrt(z0 * z0 * diff * diff + nz * nz);
- float g0 = safe_acosf(-nz.x * nz.y);
- float g1 = safe_acosf(-nz.y * nz.z);
- float g2 = safe_acosf(-nz.z * nz.w);
- float g3 = safe_acosf(-nz.w * nz.x);
- /* Compute predefined constants. */
- float b0 = nz.x;
- float b1 = nz.z;
- float b0sq = b0 * b0;
- float k = M_2PI_F - g2 - g3;
- /* Compute solid angle from internal angles. */
- float S = g0 + g1 - k;
-
- if(sample_coord) {
- /* Compute cu. */
- float au = randu * S + k;
- float fu = (cosf(au) * b0 - b1) / sinf(au);
- float cu = 1.0f / sqrtf(fu * fu + b0sq) * (fu > 0.0f ? 1.0f : -1.0f);
- cu = clamp(cu, -1.0f, 1.0f);
- /* Compute xu. */
- float xu = -(cu * z0) / max(sqrtf(1.0f - cu * cu), 1e-7f);
- xu = clamp(xu, x0, x1);
- /* Compute yv. */
- float z0sq = z0 * z0;
- float y0sq = y0 * y0;
- float y1sq = y1 * y1;
- float d = sqrtf(xu * xu + z0sq);
- float h0 = y0 / sqrtf(d * d + y0sq);
- float h1 = y1 / sqrtf(d * d + y1sq);
- float hv = h0 + randv * (h1 - h0), hv2 = hv * hv;
- float yv = (hv2 < 1.0f - 1e-6f) ? (hv * d) / sqrtf(1.0f - hv2) : y1;
-
- /* Transform (xu, yv, z0) to world coords. */
- *light_p = P + xu * x + yv * y + z0 * z;
- }
-
- /* return pdf */
- if(S != 0.0f)
- return 1.0f / S;
- else
- return 0.0f;
+ /* In our name system we're using P for the center,
+ * which is o in the paper.
+ */
+
+ float3 corner = *light_p - axisu * 0.5f - axisv * 0.5f;
+ float axisu_len, axisv_len;
+ /* Compute local reference system R. */
+ float3 x = normalize_len(axisu, &axisu_len);
+ float3 y = normalize_len(axisv, &axisv_len);
+ float3 z = cross(x, y);
+ /* Compute rectangle coords in local reference system. */
+ float3 dir = corner - P;
+ float z0 = dot(dir, z);
+ /* Flip 'z' to make it point against Q. */
+ if (z0 > 0.0f) {
+ z *= -1.0f;
+ z0 *= -1.0f;
+ }
+ float x0 = dot(dir, x);
+ float y0 = dot(dir, y);
+ float x1 = x0 + axisu_len;
+ float y1 = y0 + axisv_len;
+ /* Compute internal angles (gamma_i). */
+ float4 diff = make_float4(x0, y1, x1, y0) - make_float4(x1, y0, x0, y1);
+ float4 nz = make_float4(y0, x1, y1, x0) * diff;
+ nz = nz / sqrt(z0 * z0 * diff * diff + nz * nz);
+ float g0 = safe_acosf(-nz.x * nz.y);
+ float g1 = safe_acosf(-nz.y * nz.z);
+ float g2 = safe_acosf(-nz.z * nz.w);
+ float g3 = safe_acosf(-nz.w * nz.x);
+ /* Compute predefined constants. */
+ float b0 = nz.x;
+ float b1 = nz.z;
+ float b0sq = b0 * b0;
+ float k = M_2PI_F - g2 - g3;
+ /* Compute solid angle from internal angles. */
+ float S = g0 + g1 - k;
+
+ if (sample_coord) {
+ /* Compute cu. */
+ float au = randu * S + k;
+ float fu = (cosf(au) * b0 - b1) / sinf(au);
+ float cu = 1.0f / sqrtf(fu * fu + b0sq) * (fu > 0.0f ? 1.0f : -1.0f);
+ cu = clamp(cu, -1.0f, 1.0f);
+ /* Compute xu. */
+ float xu = -(cu * z0) / max(sqrtf(1.0f - cu * cu), 1e-7f);
+ xu = clamp(xu, x0, x1);
+ /* Compute yv. */
+ float z0sq = z0 * z0;
+ float y0sq = y0 * y0;
+ float y1sq = y1 * y1;
+ float d = sqrtf(xu * xu + z0sq);
+ float h0 = y0 / sqrtf(d * d + y0sq);
+ float h1 = y1 / sqrtf(d * d + y1sq);
+ float hv = h0 + randv * (h1 - h0), hv2 = hv * hv;
+ float yv = (hv2 < 1.0f - 1e-6f) ? (hv * d) / sqrtf(1.0f - hv2) : y1;
+
+ /* Transform (xu, yv, z0) to world coords. */
+ *light_p = P + xu * x + yv * y + z0 * z;
+ }
+
+ /* return pdf */
+ if (S != 0.0f)
+ return 1.0f / S;
+ else
+ return 0.0f;
}
ccl_device_inline float3 ellipse_sample(float3 ru, float3 rv, float randu, float randv)
{
- to_unit_disk(&randu, &randv);
- return ru*randu + rv*randv;
+ to_unit_disk(&randu, &randv);
+ return ru * randu + rv * randv;
}
ccl_device float3 disk_light_sample(float3 v, float randu, float randv)
{
- float3 ru, rv;
+ float3 ru, rv;
- make_orthonormals(v, &ru, &rv);
+ make_orthonormals(v, &ru, &rv);
- return ellipse_sample(ru, rv, randu, randv);
+ return ellipse_sample(ru, rv, randu, randv);
}
ccl_device float3 distant_light_sample(float3 D, float radius, float randu, float randv)
{
- return normalize(D + disk_light_sample(D, randu, randv)*radius);
+ return normalize(D + disk_light_sample(D, randu, randv) * radius);
}
-ccl_device float3 sphere_light_sample(float3 P, float3 center, float radius, float randu, float randv)
+ccl_device float3
+sphere_light_sample(float3 P, float3 center, float radius, float randu, float randv)
{
- return disk_light_sample(normalize(P - center), randu, randv)*radius;
+ return disk_light_sample(normalize(P - center), randu, randv) * radius;
}
-ccl_device float spot_light_attenuation(float3 dir, float spot_angle, float spot_smooth, LightSample *ls)
+ccl_device float spot_light_attenuation(float3 dir,
+ float spot_angle,
+ float spot_smooth,
+ LightSample *ls)
{
- float3 I = ls->Ng;
+ float3 I = ls->Ng;
- float attenuation = dot(dir, I);
+ float attenuation = dot(dir, I);
- if(attenuation <= spot_angle) {
- attenuation = 0.0f;
- }
- else {
- float t = attenuation - spot_angle;
+ if (attenuation <= spot_angle) {
+ attenuation = 0.0f;
+ }
+ else {
+ float t = attenuation - spot_angle;
- if(t < spot_smooth && spot_smooth != 0.0f)
- attenuation *= smoothstepf(t/spot_smooth);
- }
+ if (t < spot_smooth && spot_smooth != 0.0f)
+ attenuation *= smoothstepf(t / spot_smooth);
+ }
- return attenuation;
+ return attenuation;
}
ccl_device float lamp_light_pdf(KernelGlobals *kg, const float3 Ng, const float3 I, float t)
{
- float cos_pi = dot(Ng, I);
+ float cos_pi = dot(Ng, I);
- if(cos_pi <= 0.0f)
- return 0.0f;
+ if (cos_pi <= 0.0f)
+ return 0.0f;
- return t*t/cos_pi;
+ return t * t / cos_pi;
}
/* Background Light */
@@ -180,203 +186,219 @@ ccl_device float lamp_light_pdf(KernelGlobals *kg, const float3 Ng, const float3
* devices, but we're so close to the release so better not screw things
* up for CPU at least.
*/
-#ifdef __KERNEL_GPU__
+# ifdef __KERNEL_GPU__
ccl_device_noinline
-#else
+# else
ccl_device
-#endif
-float3 background_map_sample(KernelGlobals *kg, float randu, float randv, float *pdf)
+# endif
+ float3
+ background_map_sample(KernelGlobals *kg, float randu, float randv, float *pdf)
{
- /* for the following, the CDF values are actually a pair of floats, with the
- * function value as X and the actual CDF as Y. The last entry's function
- * value is the CDF total. */
- int res_x = kernel_data.integrator.pdf_background_res_x;
- int res_y = kernel_data.integrator.pdf_background_res_y;
- int cdf_width = res_x + 1;
-
- /* this is basically std::lower_bound as used by pbrt */
- int first = 0;
- int count = res_y;
-
- while(count > 0) {
- int step = count >> 1;
- int middle = first + step;
-
- if(kernel_tex_fetch(__light_background_marginal_cdf, middle).y < randv) {
- first = middle + 1;
- count -= step + 1;
- }
- else
- count = step;
- }
-
- int index_v = max(0, first - 1);
- kernel_assert(index_v >= 0 && index_v < res_y);
-
- float2 cdf_v = kernel_tex_fetch(__light_background_marginal_cdf, index_v);
- float2 cdf_next_v = kernel_tex_fetch(__light_background_marginal_cdf, index_v + 1);
- float2 cdf_last_v = kernel_tex_fetch(__light_background_marginal_cdf, res_y);
-
- /* importance-sampled V direction */
- float dv = inverse_lerp(cdf_v.y, cdf_next_v.y, randv);
- float v = (index_v + dv) / res_y;
-
- /* this is basically std::lower_bound as used by pbrt */
- first = 0;
- count = res_x;
- while(count > 0) {
- int step = count >> 1;
- int middle = first + step;
-
- if(kernel_tex_fetch(__light_background_conditional_cdf, index_v * cdf_width + middle).y < randu) {
- first = middle + 1;
- count -= step + 1;
- }
- else
- count = step;
- }
-
- int index_u = max(0, first - 1);
- kernel_assert(index_u >= 0 && index_u < res_x);
-
- float2 cdf_u = kernel_tex_fetch(__light_background_conditional_cdf, index_v * cdf_width + index_u);
- float2 cdf_next_u = kernel_tex_fetch(__light_background_conditional_cdf, index_v * cdf_width + index_u + 1);
- float2 cdf_last_u = kernel_tex_fetch(__light_background_conditional_cdf, index_v * cdf_width + res_x);
-
- /* importance-sampled U direction */
- float du = inverse_lerp(cdf_u.y, cdf_next_u.y, randu);
- float u = (index_u + du) / res_x;
-
- /* compute pdf */
- float denom = cdf_last_u.x * cdf_last_v.x;
- float sin_theta = sinf(M_PI_F * v);
-
- if(sin_theta == 0.0f || denom == 0.0f)
- *pdf = 0.0f;
- else
- *pdf = (cdf_u.x * cdf_v.x)/(M_2PI_F * M_PI_F * sin_theta * denom);
-
- /* compute direction */
- return equirectangular_to_direction(u, v);
+ /* for the following, the CDF values are actually a pair of floats, with the
+ * function value as X and the actual CDF as Y. The last entry's function
+ * value is the CDF total. */
+ int res_x = kernel_data.integrator.pdf_background_res_x;
+ int res_y = kernel_data.integrator.pdf_background_res_y;
+ int cdf_width = res_x + 1;
+
+ /* this is basically std::lower_bound as used by pbrt */
+ int first = 0;
+ int count = res_y;
+
+ while (count > 0) {
+ int step = count >> 1;
+ int middle = first + step;
+
+ if (kernel_tex_fetch(__light_background_marginal_cdf, middle).y < randv) {
+ first = middle + 1;
+ count -= step + 1;
+ }
+ else
+ count = step;
+ }
+
+ int index_v = max(0, first - 1);
+ kernel_assert(index_v >= 0 && index_v < res_y);
+
+ float2 cdf_v = kernel_tex_fetch(__light_background_marginal_cdf, index_v);
+ float2 cdf_next_v = kernel_tex_fetch(__light_background_marginal_cdf, index_v + 1);
+ float2 cdf_last_v = kernel_tex_fetch(__light_background_marginal_cdf, res_y);
+
+ /* importance-sampled V direction */
+ float dv = inverse_lerp(cdf_v.y, cdf_next_v.y, randv);
+ float v = (index_v + dv) / res_y;
+
+ /* this is basically std::lower_bound as used by pbrt */
+ first = 0;
+ count = res_x;
+ while (count > 0) {
+ int step = count >> 1;
+ int middle = first + step;
+
+ if (kernel_tex_fetch(__light_background_conditional_cdf, index_v * cdf_width + middle).y <
+ randu) {
+ first = middle + 1;
+ count -= step + 1;
+ }
+ else
+ count = step;
+ }
+
+ int index_u = max(0, first - 1);
+ kernel_assert(index_u >= 0 && index_u < res_x);
+
+ float2 cdf_u = kernel_tex_fetch(__light_background_conditional_cdf,
+ index_v * cdf_width + index_u);
+ float2 cdf_next_u = kernel_tex_fetch(__light_background_conditional_cdf,
+ index_v * cdf_width + index_u + 1);
+ float2 cdf_last_u = kernel_tex_fetch(__light_background_conditional_cdf,
+ index_v * cdf_width + res_x);
+
+ /* importance-sampled U direction */
+ float du = inverse_lerp(cdf_u.y, cdf_next_u.y, randu);
+ float u = (index_u + du) / res_x;
+
+ /* compute pdf */
+ float denom = cdf_last_u.x * cdf_last_v.x;
+ float sin_theta = sinf(M_PI_F * v);
+
+ if (sin_theta == 0.0f || denom == 0.0f)
+ *pdf = 0.0f;
+ else
+ *pdf = (cdf_u.x * cdf_v.x) / (M_2PI_F * M_PI_F * sin_theta * denom);
+
+ /* compute direction */
+ return equirectangular_to_direction(u, v);
}
/* TODO(sergey): Same as above, after the release we should consider using
* 'noinline' for all devices.
*/
-#ifdef __KERNEL_GPU__
+# ifdef __KERNEL_GPU__
ccl_device_noinline
-#else
+# else
ccl_device
-#endif
-float background_map_pdf(KernelGlobals *kg, float3 direction)
+# endif
+ float
+ background_map_pdf(KernelGlobals *kg, float3 direction)
{
- float2 uv = direction_to_equirectangular(direction);
- int res_x = kernel_data.integrator.pdf_background_res_x;
- int res_y = kernel_data.integrator.pdf_background_res_y;
- int cdf_width = res_x + 1;
+ float2 uv = direction_to_equirectangular(direction);
+ int res_x = kernel_data.integrator.pdf_background_res_x;
+ int res_y = kernel_data.integrator.pdf_background_res_y;
+ int cdf_width = res_x + 1;
- float sin_theta = sinf(uv.y * M_PI_F);
+ float sin_theta = sinf(uv.y * M_PI_F);
- if(sin_theta == 0.0f)
- return 0.0f;
+ if (sin_theta == 0.0f)
+ return 0.0f;
- int index_u = clamp(float_to_int(uv.x * res_x), 0, res_x - 1);
- int index_v = clamp(float_to_int(uv.y * res_y), 0, res_y - 1);
+ int index_u = clamp(float_to_int(uv.x * res_x), 0, res_x - 1);
+ int index_v = clamp(float_to_int(uv.y * res_y), 0, res_y - 1);
- /* pdfs in V direction */
- float2 cdf_last_u = kernel_tex_fetch(__light_background_conditional_cdf, index_v * cdf_width + res_x);
- float2 cdf_last_v = kernel_tex_fetch(__light_background_marginal_cdf, res_y);
+ /* pdfs in V direction */
+ float2 cdf_last_u = kernel_tex_fetch(__light_background_conditional_cdf,
+ index_v * cdf_width + res_x);
+ float2 cdf_last_v = kernel_tex_fetch(__light_background_marginal_cdf, res_y);
- float denom = cdf_last_u.x * cdf_last_v.x;
+ float denom = cdf_last_u.x * cdf_last_v.x;
- if(denom == 0.0f)
- return 0.0f;
+ if (denom == 0.0f)
+ return 0.0f;
- /* pdfs in U direction */
- float2 cdf_u = kernel_tex_fetch(__light_background_conditional_cdf, index_v * cdf_width + index_u);
- float2 cdf_v = kernel_tex_fetch(__light_background_marginal_cdf, index_v);
+ /* pdfs in U direction */
+ float2 cdf_u = kernel_tex_fetch(__light_background_conditional_cdf,
+ index_v * cdf_width + index_u);
+ float2 cdf_v = kernel_tex_fetch(__light_background_marginal_cdf, index_v);
- return (cdf_u.x * cdf_v.x)/(M_2PI_F * M_PI_F * sin_theta * denom);
+ return (cdf_u.x * cdf_v.x) / (M_2PI_F * M_PI_F * sin_theta * denom);
}
-ccl_device_inline bool background_portal_data_fetch_and_check_side(KernelGlobals *kg,
- float3 P,
- int index,
- float3 *lightpos,
- float3 *dir)
+ccl_device_inline bool background_portal_data_fetch_and_check_side(
+ KernelGlobals *kg, float3 P, int index, float3 *lightpos, float3 *dir)
{
- int portal = kernel_data.integrator.portal_offset + index;
- const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal);
+ int portal = kernel_data.integrator.portal_offset + index;
+ const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal);
- *lightpos = make_float3(klight->co[0], klight->co[1], klight->co[2]);
- *dir = make_float3(klight->area.dir[0], klight->area.dir[1], klight->area.dir[2]);
+ *lightpos = make_float3(klight->co[0], klight->co[1], klight->co[2]);
+ *dir = make_float3(klight->area.dir[0], klight->area.dir[1], klight->area.dir[2]);
- /* Check whether portal is on the right side. */
- if(dot(*dir, P - *lightpos) > 1e-4f)
- return true;
+ /* Check whether portal is on the right side. */
+ if (dot(*dir, P - *lightpos) > 1e-4f)
+ return true;
- return false;
+ return false;
}
-ccl_device_inline float background_portal_pdf(KernelGlobals *kg,
- float3 P,
- float3 direction,
- int ignore_portal,
- bool *is_possible)
+ccl_device_inline float background_portal_pdf(
+ KernelGlobals *kg, float3 P, float3 direction, int ignore_portal, bool *is_possible)
{
- float portal_pdf = 0.0f;
-
- int num_possible = 0;
- for(int p = 0; p < kernel_data.integrator.num_portals; p++) {
- if(p == ignore_portal)
- continue;
-
- float3 lightpos, dir;
- if(!background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir))
- continue;
-
- /* There's a portal that could be sampled from this position. */
- if(is_possible) {
- *is_possible = true;
- }
- num_possible++;
-
- int portal = kernel_data.integrator.portal_offset + p;
- const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal);
- float3 axisu = make_float3(klight->area.axisu[0], klight->area.axisu[1], klight->area.axisu[2]);
- float3 axisv = make_float3(klight->area.axisv[0], klight->area.axisv[1], klight->area.axisv[2]);
- bool is_round = (klight->area.invarea < 0.0f);
-
- if(!ray_quad_intersect(P, direction, 1e-4f, FLT_MAX, lightpos, axisu, axisv, dir, NULL, NULL, NULL, NULL, is_round))
- continue;
-
- if(is_round) {
- float t;
- float3 D = normalize_len(lightpos - P, &t);
- portal_pdf += fabsf(klight->area.invarea) * lamp_light_pdf(kg, dir, -D, t);
- }
- else {
- portal_pdf += rect_light_sample(P, &lightpos, axisu, axisv, 0.0f, 0.0f, false);
- }
- }
-
- if(ignore_portal >= 0) {
- /* We have skipped a portal that could be sampled as well. */
- num_possible++;
- }
-
- return (num_possible > 0)? portal_pdf / num_possible: 0.0f;
+ float portal_pdf = 0.0f;
+
+ int num_possible = 0;
+ for (int p = 0; p < kernel_data.integrator.num_portals; p++) {
+ if (p == ignore_portal)
+ continue;
+
+ float3 lightpos, dir;
+ if (!background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir))
+ continue;
+
+ /* There's a portal that could be sampled from this position. */
+ if (is_possible) {
+ *is_possible = true;
+ }
+ num_possible++;
+
+ int portal = kernel_data.integrator.portal_offset + p;
+ const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal);
+ float3 axisu = make_float3(
+ klight->area.axisu[0], klight->area.axisu[1], klight->area.axisu[2]);
+ float3 axisv = make_float3(
+ klight->area.axisv[0], klight->area.axisv[1], klight->area.axisv[2]);
+ bool is_round = (klight->area.invarea < 0.0f);
+
+ if (!ray_quad_intersect(P,
+ direction,
+ 1e-4f,
+ FLT_MAX,
+ lightpos,
+ axisu,
+ axisv,
+ dir,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ is_round))
+ continue;
+
+ if (is_round) {
+ float t;
+ float3 D = normalize_len(lightpos - P, &t);
+ portal_pdf += fabsf(klight->area.invarea) * lamp_light_pdf(kg, dir, -D, t);
+ }
+ else {
+ portal_pdf += rect_light_sample(P, &lightpos, axisu, axisv, 0.0f, 0.0f, false);
+ }
+ }
+
+ if (ignore_portal >= 0) {
+ /* We have skipped a portal that could be sampled as well. */
+ num_possible++;
+ }
+
+ return (num_possible > 0) ? portal_pdf / num_possible : 0.0f;
}
ccl_device int background_num_possible_portals(KernelGlobals *kg, float3 P)
{
- int num_possible_portals = 0;
- for(int p = 0; p < kernel_data.integrator.num_portals; p++) {
- float3 lightpos, dir;
- if(background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir))
- num_possible_portals++;
- }
- return num_possible_portals;
+ int num_possible_portals = 0;
+ for (int p = 0; p < kernel_data.integrator.num_portals; p++) {
+ float3 lightpos, dir;
+ if (background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir))
+ num_possible_portals++;
+ }
+ return num_possible_portals;
}
ccl_device float3 background_portal_sample(KernelGlobals *kg,
@@ -387,774 +409,754 @@ ccl_device float3 background_portal_sample(KernelGlobals *kg,
int *sampled_portal,
float *pdf)
{
- /* Pick a portal, then re-normalize randv. */
- randv *= num_possible;
- int portal = (int)randv;
- randv -= portal;
-
- /* TODO(sergey): Some smarter way of finding portal to sample
- * is welcome.
- */
- for(int p = 0; p < kernel_data.integrator.num_portals; p++) {
- /* Search for the sampled portal. */
- float3 lightpos, dir;
- if(!background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir))
- continue;
-
- if(portal == 0) {
- /* p is the portal to be sampled. */
- int portal = kernel_data.integrator.portal_offset + p;
- const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal);
- float3 axisu = make_float3(klight->area.axisu[0], klight->area.axisu[1], klight->area.axisu[2]);
- float3 axisv = make_float3(klight->area.axisv[0], klight->area.axisv[1], klight->area.axisv[2]);
- bool is_round = (klight->area.invarea < 0.0f);
-
- float3 D;
- if(is_round) {
- lightpos += ellipse_sample(axisu*0.5f, axisv*0.5f, randu, randv);
- float t;
- D = normalize_len(lightpos - P, &t);
- *pdf = fabsf(klight->area.invarea) * lamp_light_pdf(kg, dir, -D, t);
- }
- else {
- *pdf = rect_light_sample(P, &lightpos,
- axisu, axisv,
- randu, randv,
- true);
- D = normalize(lightpos - P);
- }
-
- *pdf /= num_possible;
- *sampled_portal = p;
- return D;
- }
-
- portal--;
- }
-
- return make_float3(0.0f, 0.0f, 0.0f);
+ /* Pick a portal, then re-normalize randv. */
+ randv *= num_possible;
+ int portal = (int)randv;
+ randv -= portal;
+
+ /* TODO(sergey): Some smarter way of finding portal to sample
+ * is welcome.
+ */
+ for (int p = 0; p < kernel_data.integrator.num_portals; p++) {
+ /* Search for the sampled portal. */
+ float3 lightpos, dir;
+ if (!background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir))
+ continue;
+
+ if (portal == 0) {
+ /* p is the portal to be sampled. */
+ int portal = kernel_data.integrator.portal_offset + p;
+ const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal);
+ float3 axisu = make_float3(
+ klight->area.axisu[0], klight->area.axisu[1], klight->area.axisu[2]);
+ float3 axisv = make_float3(
+ klight->area.axisv[0], klight->area.axisv[1], klight->area.axisv[2]);
+ bool is_round = (klight->area.invarea < 0.0f);
+
+ float3 D;
+ if (is_round) {
+ lightpos += ellipse_sample(axisu * 0.5f, axisv * 0.5f, randu, randv);
+ float t;
+ D = normalize_len(lightpos - P, &t);
+ *pdf = fabsf(klight->area.invarea) * lamp_light_pdf(kg, dir, -D, t);
+ }
+ else {
+ *pdf = rect_light_sample(P, &lightpos, axisu, axisv, randu, randv, true);
+ D = normalize(lightpos - P);
+ }
+
+ *pdf /= num_possible;
+ *sampled_portal = p;
+ return D;
+ }
+
+ portal--;
+ }
+
+ return make_float3(0.0f, 0.0f, 0.0f);
}
-ccl_device_inline float3 background_light_sample(KernelGlobals *kg,
- float3 P,
- float randu, float randv,
- float *pdf)
+ccl_device_inline float3
+background_light_sample(KernelGlobals *kg, float3 P, float randu, float randv, float *pdf)
{
- /* Probability of sampling portals instead of the map. */
- float portal_sampling_pdf = kernel_data.integrator.portal_pdf;
-
- /* Check if there are portals in the scene which we can sample. */
- if(portal_sampling_pdf > 0.0f) {
- int num_portals = background_num_possible_portals(kg, P);
- if(num_portals > 0) {
- if(portal_sampling_pdf == 1.0f || randu < portal_sampling_pdf) {
- if(portal_sampling_pdf < 1.0f) {
- randu /= portal_sampling_pdf;
- }
- int portal;
- float3 D = background_portal_sample(kg, P, randu, randv, num_portals, &portal, pdf);
- if(num_portals > 1) {
- /* Ignore the chosen portal, its pdf is already included. */
- *pdf += background_portal_pdf(kg, P, D, portal, NULL);
- }
- /* We could also have sampled the map, so combine with MIS. */
- if(portal_sampling_pdf < 1.0f) {
- float cdf_pdf = background_map_pdf(kg, D);
- *pdf = (portal_sampling_pdf * (*pdf)
- + (1.0f - portal_sampling_pdf) * cdf_pdf);
- }
- return D;
- }
- else {
- /* Sample map, but with nonzero portal_sampling_pdf for MIS. */
- randu = (randu - portal_sampling_pdf) / (1.0f - portal_sampling_pdf);
- }
- }
- else {
- /* We can't sample a portal.
- * Check if we can sample the map instead.
- */
- if(portal_sampling_pdf == 1.0f) {
- /* Use uniform as a fallback if we can't sample the map. */
- *pdf = 1.0f / M_4PI_F;
- return sample_uniform_sphere(randu, randv);
- }
- else {
- portal_sampling_pdf = 0.0f;
- }
- }
- }
-
- float3 D = background_map_sample(kg, randu, randv, pdf);
- /* Use MIS if portals could be sampled as well. */
- if(portal_sampling_pdf > 0.0f) {
- float portal_pdf = background_portal_pdf(kg, P, D, -1, NULL);
- *pdf = (portal_sampling_pdf * portal_pdf
- + (1.0f - portal_sampling_pdf) * (*pdf));
- }
- return D;
+ /* Probability of sampling portals instead of the map. */
+ float portal_sampling_pdf = kernel_data.integrator.portal_pdf;
+
+ /* Check if there are portals in the scene which we can sample. */
+ if (portal_sampling_pdf > 0.0f) {
+ int num_portals = background_num_possible_portals(kg, P);
+ if (num_portals > 0) {
+ if (portal_sampling_pdf == 1.0f || randu < portal_sampling_pdf) {
+ if (portal_sampling_pdf < 1.0f) {
+ randu /= portal_sampling_pdf;
+ }
+ int portal;
+ float3 D = background_portal_sample(kg, P, randu, randv, num_portals, &portal, pdf);
+ if (num_portals > 1) {
+ /* Ignore the chosen portal, its pdf is already included. */
+ *pdf += background_portal_pdf(kg, P, D, portal, NULL);
+ }
+ /* We could also have sampled the map, so combine with MIS. */
+ if (portal_sampling_pdf < 1.0f) {
+ float cdf_pdf = background_map_pdf(kg, D);
+ *pdf = (portal_sampling_pdf * (*pdf) + (1.0f - portal_sampling_pdf) * cdf_pdf);
+ }
+ return D;
+ }
+ else {
+ /* Sample map, but with nonzero portal_sampling_pdf for MIS. */
+ randu = (randu - portal_sampling_pdf) / (1.0f - portal_sampling_pdf);
+ }
+ }
+ else {
+ /* We can't sample a portal.
+ * Check if we can sample the map instead.
+ */
+ if (portal_sampling_pdf == 1.0f) {
+ /* Use uniform as a fallback if we can't sample the map. */
+ *pdf = 1.0f / M_4PI_F;
+ return sample_uniform_sphere(randu, randv);
+ }
+ else {
+ portal_sampling_pdf = 0.0f;
+ }
+ }
+ }
+
+ float3 D = background_map_sample(kg, randu, randv, pdf);
+ /* Use MIS if portals could be sampled as well. */
+ if (portal_sampling_pdf > 0.0f) {
+ float portal_pdf = background_portal_pdf(kg, P, D, -1, NULL);
+ *pdf = (portal_sampling_pdf * portal_pdf + (1.0f - portal_sampling_pdf) * (*pdf));
+ }
+ return D;
}
ccl_device float background_light_pdf(KernelGlobals *kg, float3 P, float3 direction)
{
- /* Probability of sampling portals instead of the map. */
- float portal_sampling_pdf = kernel_data.integrator.portal_pdf;
-
- float portal_pdf = 0.0f, map_pdf = 0.0f;
- if(portal_sampling_pdf > 0.0f) {
- /* Evaluate PDF of sampling this direction by portal sampling. */
- bool is_possible = false;
- portal_pdf = background_portal_pdf(kg, P, direction, -1, &is_possible) * portal_sampling_pdf;
- if(!is_possible) {
- /* Portal sampling is not possible here because all portals point to the wrong side.
- * If map sampling is possible, it would be used instead, otherwise fallback sampling is used. */
- if(portal_sampling_pdf == 1.0f) {
- return kernel_data.integrator.pdf_lights / M_4PI_F;
- }
- else {
- /* Force map sampling. */
- portal_sampling_pdf = 0.0f;
- }
- }
- }
- if(portal_sampling_pdf < 1.0f) {
- /* Evaluate PDF of sampling this direction by map sampling. */
- map_pdf = background_map_pdf(kg, direction) * (1.0f - portal_sampling_pdf);
- }
- return (portal_pdf + map_pdf) * kernel_data.integrator.pdf_lights;
+ /* Probability of sampling portals instead of the map. */
+ float portal_sampling_pdf = kernel_data.integrator.portal_pdf;
+
+ float portal_pdf = 0.0f, map_pdf = 0.0f;
+ if (portal_sampling_pdf > 0.0f) {
+ /* Evaluate PDF of sampling this direction by portal sampling. */
+ bool is_possible = false;
+ portal_pdf = background_portal_pdf(kg, P, direction, -1, &is_possible) * portal_sampling_pdf;
+ if (!is_possible) {
+ /* Portal sampling is not possible here because all portals point to the wrong side.
+ * If map sampling is possible, it would be used instead, otherwise fallback sampling is used. */
+ if (portal_sampling_pdf == 1.0f) {
+ return kernel_data.integrator.pdf_lights / M_4PI_F;
+ }
+ else {
+ /* Force map sampling. */
+ portal_sampling_pdf = 0.0f;
+ }
+ }
+ }
+ if (portal_sampling_pdf < 1.0f) {
+ /* Evaluate PDF of sampling this direction by map sampling. */
+ map_pdf = background_map_pdf(kg, direction) * (1.0f - portal_sampling_pdf);
+ }
+ return (portal_pdf + map_pdf) * kernel_data.integrator.pdf_lights;
}
#endif
/* Regular Light */
-ccl_device_inline bool lamp_light_sample(KernelGlobals *kg,
- int lamp,
- float randu, float randv,
- float3 P,
- LightSample *ls)
+ccl_device_inline bool lamp_light_sample(
+ KernelGlobals *kg, int lamp, float randu, float randv, float3 P, LightSample *ls)
{
- const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, lamp);
- LightType type = (LightType)klight->type;
- ls->type = type;
- ls->shader = klight->shader_id;
- ls->object = PRIM_NONE;
- ls->prim = PRIM_NONE;
- ls->lamp = lamp;
- ls->u = randu;
- ls->v = randv;
-
- if(type == LIGHT_DISTANT) {
- /* distant light */
- float3 lightD = make_float3(klight->co[0], klight->co[1], klight->co[2]);
- float3 D = lightD;
- float radius = klight->distant.radius;
- float invarea = klight->distant.invarea;
-
- if(radius > 0.0f)
- D = distant_light_sample(D, radius, randu, randv);
-
- ls->P = D;
- ls->Ng = D;
- ls->D = -D;
- ls->t = FLT_MAX;
-
- float costheta = dot(lightD, D);
- ls->pdf = invarea/(costheta*costheta*costheta);
- ls->eval_fac = ls->pdf;
- }
+ const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, lamp);
+ LightType type = (LightType)klight->type;
+ ls->type = type;
+ ls->shader = klight->shader_id;
+ ls->object = PRIM_NONE;
+ ls->prim = PRIM_NONE;
+ ls->lamp = lamp;
+ ls->u = randu;
+ ls->v = randv;
+
+ if (type == LIGHT_DISTANT) {
+ /* distant light */
+ float3 lightD = make_float3(klight->co[0], klight->co[1], klight->co[2]);
+ float3 D = lightD;
+ float radius = klight->distant.radius;
+ float invarea = klight->distant.invarea;
+
+ if (radius > 0.0f)
+ D = distant_light_sample(D, radius, randu, randv);
+
+ ls->P = D;
+ ls->Ng = D;
+ ls->D = -D;
+ ls->t = FLT_MAX;
+
+ float costheta = dot(lightD, D);
+ ls->pdf = invarea / (costheta * costheta * costheta);
+ ls->eval_fac = ls->pdf;
+ }
#ifdef __BACKGROUND_MIS__
- else if(type == LIGHT_BACKGROUND) {
- /* infinite area light (e.g. light dome or env light) */
- float3 D = -background_light_sample(kg, P, randu, randv, &ls->pdf);
-
- ls->P = D;
- ls->Ng = D;
- ls->D = -D;
- ls->t = FLT_MAX;
- ls->eval_fac = 1.0f;
- }
+ else if (type == LIGHT_BACKGROUND) {
+ /* infinite area light (e.g. light dome or env light) */
+ float3 D = -background_light_sample(kg, P, randu, randv, &ls->pdf);
+
+ ls->P = D;
+ ls->Ng = D;
+ ls->D = -D;
+ ls->t = FLT_MAX;
+ ls->eval_fac = 1.0f;
+ }
#endif
- else {
- ls->P = make_float3(klight->co[0], klight->co[1], klight->co[2]);
-
- if(type == LIGHT_POINT || type == LIGHT_SPOT) {
- float radius = klight->spot.radius;
-
- if(radius > 0.0f)
- /* sphere light */
- ls->P += sphere_light_sample(P, ls->P, radius, randu, randv);
-
- ls->D = normalize_len(ls->P - P, &ls->t);
- ls->Ng = -ls->D;
-
- float invarea = klight->spot.invarea;
- ls->eval_fac = (0.25f*M_1_PI_F)*invarea;
- ls->pdf = invarea;
-
- if(type == LIGHT_SPOT) {
- /* spot light attenuation */
- float3 dir = make_float3(klight->spot.dir[0],
- klight->spot.dir[1],
- klight->spot.dir[2]);
- ls->eval_fac *= spot_light_attenuation(dir,
- klight->spot.spot_angle,
- klight->spot.spot_smooth,
- ls);
- if(ls->eval_fac == 0.0f) {
- return false;
- }
- }
- float2 uv = map_to_sphere(ls->Ng);
- ls->u = uv.x;
- ls->v = uv.y;
-
- ls->pdf *= lamp_light_pdf(kg, ls->Ng, -ls->D, ls->t);
- }
- else {
- /* area light */
- float3 axisu = make_float3(klight->area.axisu[0],
- klight->area.axisu[1],
- klight->area.axisu[2]);
- float3 axisv = make_float3(klight->area.axisv[0],
- klight->area.axisv[1],
- klight->area.axisv[2]);
- float3 D = make_float3(klight->area.dir[0],
- klight->area.dir[1],
- klight->area.dir[2]);
- float invarea = fabsf(klight->area.invarea);
- bool is_round = (klight->area.invarea < 0.0f);
-
- if(dot(ls->P - P, D) > 0.0f) {
- return false;
- }
-
- float3 inplane;
-
- if(is_round) {
- inplane = ellipse_sample(axisu*0.5f, axisv*0.5f, randu, randv);
- ls->P += inplane;
- ls->pdf = invarea;
- }
- else {
- inplane = ls->P;
- ls->pdf = rect_light_sample(P, &ls->P,
- axisu, axisv,
- randu, randv,
- true);
- inplane = ls->P - inplane;
- }
-
- ls->u = dot(inplane, axisu) * (1.0f / dot(axisu, axisu)) + 0.5f;
- ls->v = dot(inplane, axisv) * (1.0f / dot(axisv, axisv)) + 0.5f;
-
- ls->Ng = D;
- ls->D = normalize_len(ls->P - P, &ls->t);
-
- ls->eval_fac = 0.25f*invarea;
- if(is_round) {
- ls->pdf *= lamp_light_pdf(kg, D, -ls->D, ls->t);
- }
- }
- }
-
- ls->pdf *= kernel_data.integrator.pdf_lights;
-
- return (ls->pdf > 0.0f);
+ else {
+ ls->P = make_float3(klight->co[0], klight->co[1], klight->co[2]);
+
+ if (type == LIGHT_POINT || type == LIGHT_SPOT) {
+ float radius = klight->spot.radius;
+
+ if (radius > 0.0f)
+ /* sphere light */
+ ls->P += sphere_light_sample(P, ls->P, radius, randu, randv);
+
+ ls->D = normalize_len(ls->P - P, &ls->t);
+ ls->Ng = -ls->D;
+
+ float invarea = klight->spot.invarea;
+ ls->eval_fac = (0.25f * M_1_PI_F) * invarea;
+ ls->pdf = invarea;
+
+ if (type == LIGHT_SPOT) {
+ /* spot light attenuation */
+ float3 dir = make_float3(klight->spot.dir[0], klight->spot.dir[1], klight->spot.dir[2]);
+ ls->eval_fac *= spot_light_attenuation(
+ dir, klight->spot.spot_angle, klight->spot.spot_smooth, ls);
+ if (ls->eval_fac == 0.0f) {
+ return false;
+ }
+ }
+ float2 uv = map_to_sphere(ls->Ng);
+ ls->u = uv.x;
+ ls->v = uv.y;
+
+ ls->pdf *= lamp_light_pdf(kg, ls->Ng, -ls->D, ls->t);
+ }
+ else {
+ /* area light */
+ float3 axisu = make_float3(
+ klight->area.axisu[0], klight->area.axisu[1], klight->area.axisu[2]);
+ float3 axisv = make_float3(
+ klight->area.axisv[0], klight->area.axisv[1], klight->area.axisv[2]);
+ float3 D = make_float3(klight->area.dir[0], klight->area.dir[1], klight->area.dir[2]);
+ float invarea = fabsf(klight->area.invarea);
+ bool is_round = (klight->area.invarea < 0.0f);
+
+ if (dot(ls->P - P, D) > 0.0f) {
+ return false;
+ }
+
+ float3 inplane;
+
+ if (is_round) {
+ inplane = ellipse_sample(axisu * 0.5f, axisv * 0.5f, randu, randv);
+ ls->P += inplane;
+ ls->pdf = invarea;
+ }
+ else {
+ inplane = ls->P;
+ ls->pdf = rect_light_sample(P, &ls->P, axisu, axisv, randu, randv, true);
+ inplane = ls->P - inplane;
+ }
+
+ ls->u = dot(inplane, axisu) * (1.0f / dot(axisu, axisu)) + 0.5f;
+ ls->v = dot(inplane, axisv) * (1.0f / dot(axisv, axisv)) + 0.5f;
+
+ ls->Ng = D;
+ ls->D = normalize_len(ls->P - P, &ls->t);
+
+ ls->eval_fac = 0.25f * invarea;
+ if (is_round) {
+ ls->pdf *= lamp_light_pdf(kg, D, -ls->D, ls->t);
+ }
+ }
+ }
+
+ ls->pdf *= kernel_data.integrator.pdf_lights;
+
+ return (ls->pdf > 0.0f);
}
-ccl_device bool lamp_light_eval(KernelGlobals *kg, int lamp, float3 P, float3 D, float t, LightSample *ls)
+ccl_device bool lamp_light_eval(
+ KernelGlobals *kg, int lamp, float3 P, float3 D, float t, LightSample *ls)
{
- const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, lamp);
- LightType type = (LightType)klight->type;
- ls->type = type;
- ls->shader = klight->shader_id;
- ls->object = PRIM_NONE;
- ls->prim = PRIM_NONE;
- ls->lamp = lamp;
- /* todo: missing texture coordinates */
- ls->u = 0.0f;
- ls->v = 0.0f;
-
- if(!(ls->shader & SHADER_USE_MIS))
- return false;
-
- if(type == LIGHT_DISTANT) {
- /* distant light */
- float radius = klight->distant.radius;
-
- if(radius == 0.0f)
- return false;
- if(t != FLT_MAX)
- return false;
-
- /* a distant light is infinitely far away, but equivalent to a disk
- * shaped light exactly 1 unit away from the current shading point.
- *
- * radius t^2/cos(theta)
- * <----------> t = sqrt(1^2 + tan(theta)^2)
- * tan(th) area = radius*radius*pi
- * <----->
- * \ | (1 + tan(theta)^2)/cos(theta)
- * \ | (1 + tan(acos(cos(theta)))^2)/cos(theta)
- * t \th| 1 simplifies to
- * \-| 1/(cos(theta)^3)
- * \| magic!
- * P
- */
-
- float3 lightD = make_float3(klight->co[0], klight->co[1], klight->co[2]);
- float costheta = dot(-lightD, D);
- float cosangle = klight->distant.cosangle;
-
- if(costheta < cosangle)
- return false;
-
- ls->P = -D;
- ls->Ng = -D;
- ls->D = D;
- ls->t = FLT_MAX;
-
- /* compute pdf */
- float invarea = klight->distant.invarea;
- ls->pdf = invarea/(costheta*costheta*costheta);
- ls->eval_fac = ls->pdf;
- }
- else if(type == LIGHT_POINT || type == LIGHT_SPOT) {
- float3 lightP = make_float3(klight->co[0], klight->co[1], klight->co[2]);
-
- float radius = klight->spot.radius;
-
- /* sphere light */
- if(radius == 0.0f)
- return false;
-
- if(!ray_aligned_disk_intersect(P, D, t,
- lightP, radius, &ls->P, &ls->t))
- {
- return false;
- }
-
- ls->Ng = -D;
- ls->D = D;
-
- float invarea = klight->spot.invarea;
- ls->eval_fac = (0.25f*M_1_PI_F)*invarea;
- ls->pdf = invarea;
-
- if(type == LIGHT_SPOT) {
- /* spot light attenuation */
- float3 dir = make_float3(klight->spot.dir[0],
- klight->spot.dir[1],
- klight->spot.dir[2]);
- ls->eval_fac *= spot_light_attenuation(dir,
- klight->spot.spot_angle,
- klight->spot.spot_smooth,
- ls);
-
- if(ls->eval_fac == 0.0f)
- return false;
- }
- float2 uv = map_to_sphere(ls->Ng);
- ls->u = uv.x;
- ls->v = uv.y;
-
- /* compute pdf */
- if(ls->t != FLT_MAX)
- ls->pdf *= lamp_light_pdf(kg, ls->Ng, -ls->D, ls->t);
- }
- else if(type == LIGHT_AREA) {
- /* area light */
- float invarea = fabsf(klight->area.invarea);
- bool is_round = (klight->area.invarea < 0.0f);
- if(invarea == 0.0f)
- return false;
-
- float3 axisu = make_float3(klight->area.axisu[0],
- klight->area.axisu[1],
- klight->area.axisu[2]);
- float3 axisv = make_float3(klight->area.axisv[0],
- klight->area.axisv[1],
- klight->area.axisv[2]);
- float3 Ng = make_float3(klight->area.dir[0],
- klight->area.dir[1],
- klight->area.dir[2]);
-
- /* one sided */
- if(dot(D, Ng) >= 0.0f)
- return false;
-
- float3 light_P = make_float3(klight->co[0], klight->co[1], klight->co[2]);
-
- if(!ray_quad_intersect(P, D, 0.0f, t, light_P,
- axisu, axisv, Ng,
- &ls->P, &ls->t,
- &ls->u, &ls->v,
- is_round))
- {
- return false;
- }
-
- ls->D = D;
- ls->Ng = Ng;
- if(is_round) {
- ls->pdf = invarea * lamp_light_pdf(kg, Ng, -D, ls->t);
- }
- else {
- ls->pdf = rect_light_sample(P, &light_P, axisu, axisv, 0, 0, false);
- }
- ls->eval_fac = 0.25f*invarea;
- }
- else {
- return false;
- }
-
- ls->pdf *= kernel_data.integrator.pdf_lights;
-
- return true;
+ const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, lamp);
+ LightType type = (LightType)klight->type;
+ ls->type = type;
+ ls->shader = klight->shader_id;
+ ls->object = PRIM_NONE;
+ ls->prim = PRIM_NONE;
+ ls->lamp = lamp;
+ /* todo: missing texture coordinates */
+ ls->u = 0.0f;
+ ls->v = 0.0f;
+
+ if (!(ls->shader & SHADER_USE_MIS))
+ return false;
+
+ if (type == LIGHT_DISTANT) {
+ /* distant light */
+ float radius = klight->distant.radius;
+
+ if (radius == 0.0f)
+ return false;
+ if (t != FLT_MAX)
+ return false;
+
+ /* a distant light is infinitely far away, but equivalent to a disk
+ * shaped light exactly 1 unit away from the current shading point.
+ *
+ * radius t^2/cos(theta)
+ * <----------> t = sqrt(1^2 + tan(theta)^2)
+ * tan(th) area = radius*radius*pi
+ * <----->
+ * \ | (1 + tan(theta)^2)/cos(theta)
+ * \ | (1 + tan(acos(cos(theta)))^2)/cos(theta)
+ * t \th| 1 simplifies to
+ * \-| 1/(cos(theta)^3)
+ * \| magic!
+ * P
+ */
+
+ float3 lightD = make_float3(klight->co[0], klight->co[1], klight->co[2]);
+ float costheta = dot(-lightD, D);
+ float cosangle = klight->distant.cosangle;
+
+ if (costheta < cosangle)
+ return false;
+
+ ls->P = -D;
+ ls->Ng = -D;
+ ls->D = D;
+ ls->t = FLT_MAX;
+
+ /* compute pdf */
+ float invarea = klight->distant.invarea;
+ ls->pdf = invarea / (costheta * costheta * costheta);
+ ls->eval_fac = ls->pdf;
+ }
+ else if (type == LIGHT_POINT || type == LIGHT_SPOT) {
+ float3 lightP = make_float3(klight->co[0], klight->co[1], klight->co[2]);
+
+ float radius = klight->spot.radius;
+
+ /* sphere light */
+ if (radius == 0.0f)
+ return false;
+
+ if (!ray_aligned_disk_intersect(P, D, t, lightP, radius, &ls->P, &ls->t)) {
+ return false;
+ }
+
+ ls->Ng = -D;
+ ls->D = D;
+
+ float invarea = klight->spot.invarea;
+ ls->eval_fac = (0.25f * M_1_PI_F) * invarea;
+ ls->pdf = invarea;
+
+ if (type == LIGHT_SPOT) {
+ /* spot light attenuation */
+ float3 dir = make_float3(klight->spot.dir[0], klight->spot.dir[1], klight->spot.dir[2]);
+ ls->eval_fac *= spot_light_attenuation(
+ dir, klight->spot.spot_angle, klight->spot.spot_smooth, ls);
+
+ if (ls->eval_fac == 0.0f)
+ return false;
+ }
+ float2 uv = map_to_sphere(ls->Ng);
+ ls->u = uv.x;
+ ls->v = uv.y;
+
+ /* compute pdf */
+ if (ls->t != FLT_MAX)
+ ls->pdf *= lamp_light_pdf(kg, ls->Ng, -ls->D, ls->t);
+ }
+ else if (type == LIGHT_AREA) {
+ /* area light */
+ float invarea = fabsf(klight->area.invarea);
+ bool is_round = (klight->area.invarea < 0.0f);
+ if (invarea == 0.0f)
+ return false;
+
+ float3 axisu = make_float3(
+ klight->area.axisu[0], klight->area.axisu[1], klight->area.axisu[2]);
+ float3 axisv = make_float3(
+ klight->area.axisv[0], klight->area.axisv[1], klight->area.axisv[2]);
+ float3 Ng = make_float3(klight->area.dir[0], klight->area.dir[1], klight->area.dir[2]);
+
+ /* one sided */
+ if (dot(D, Ng) >= 0.0f)
+ return false;
+
+ float3 light_P = make_float3(klight->co[0], klight->co[1], klight->co[2]);
+
+ if (!ray_quad_intersect(
+ P, D, 0.0f, t, light_P, axisu, axisv, Ng, &ls->P, &ls->t, &ls->u, &ls->v, is_round)) {
+ return false;
+ }
+
+ ls->D = D;
+ ls->Ng = Ng;
+ if (is_round) {
+ ls->pdf = invarea * lamp_light_pdf(kg, Ng, -D, ls->t);
+ }
+ else {
+ ls->pdf = rect_light_sample(P, &light_P, axisu, axisv, 0, 0, false);
+ }
+ ls->eval_fac = 0.25f * invarea;
+ }
+ else {
+ return false;
+ }
+
+ ls->pdf *= kernel_data.integrator.pdf_lights;
+
+ return true;
}
/* Triangle Light */
/* returns true if the triangle is has motion blur or an instancing transform applied */
-ccl_device_inline bool triangle_world_space_vertices(KernelGlobals *kg, int object, int prim, float time, float3 V[3])
+ccl_device_inline bool triangle_world_space_vertices(
+ KernelGlobals *kg, int object, int prim, float time, float3 V[3])
{
- bool has_motion = false;
- const int object_flag = kernel_tex_fetch(__object_flag, object);
+ bool has_motion = false;
+ const int object_flag = kernel_tex_fetch(__object_flag, object);
- if(object_flag & SD_OBJECT_HAS_VERTEX_MOTION && time >= 0.0f) {
- motion_triangle_vertices(kg, object, prim, time, V);
- has_motion = true;
- }
- else {
- triangle_vertices(kg, prim, V);
- }
+ if (object_flag & SD_OBJECT_HAS_VERTEX_MOTION && time >= 0.0f) {
+ motion_triangle_vertices(kg, object, prim, time, V);
+ has_motion = true;
+ }
+ else {
+ triangle_vertices(kg, prim, V);
+ }
#ifdef __INSTANCING__
- if(!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+ if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
# ifdef __OBJECT_MOTION__
- float object_time = (time >= 0.0f) ? time : 0.5f;
- Transform tfm = object_fetch_transform_motion_test(kg, object, object_time, NULL);
+ float object_time = (time >= 0.0f) ? time : 0.5f;
+ Transform tfm = object_fetch_transform_motion_test(kg, object, object_time, NULL);
# else
- Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
+ Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
# endif
- V[0] = transform_point(&tfm, V[0]);
- V[1] = transform_point(&tfm, V[1]);
- V[2] = transform_point(&tfm, V[2]);
- has_motion = true;
- }
+ V[0] = transform_point(&tfm, V[0]);
+ V[1] = transform_point(&tfm, V[1]);
+ V[2] = transform_point(&tfm, V[2]);
+ has_motion = true;
+ }
#endif
- return has_motion;
+ return has_motion;
}
-ccl_device_inline float triangle_light_pdf_area(KernelGlobals *kg, const float3 Ng, const float3 I, float t)
+ccl_device_inline float triangle_light_pdf_area(KernelGlobals *kg,
+ const float3 Ng,
+ const float3 I,
+ float t)
{
- float pdf = kernel_data.integrator.pdf_triangles;
- float cos_pi = fabsf(dot(Ng, I));
+ float pdf = kernel_data.integrator.pdf_triangles;
+ float cos_pi = fabsf(dot(Ng, I));
- if(cos_pi == 0.0f)
- return 0.0f;
+ if (cos_pi == 0.0f)
+ return 0.0f;
- return t*t*pdf/cos_pi;
+ return t * t * pdf / cos_pi;
}
ccl_device_forceinline float triangle_light_pdf(KernelGlobals *kg, ShaderData *sd, float t)
{
- /* A naive heuristic to decide between costly solid angle sampling
- * and simple area sampling, comparing the distance to the triangle plane
- * to the length of the edges of the triangle. */
-
- float3 V[3];
- bool has_motion = triangle_world_space_vertices(kg, sd->object, sd->prim, sd->time, V);
-
- const float3 e0 = V[1] - V[0];
- const float3 e1 = V[2] - V[0];
- const float3 e2 = V[2] - V[1];
- const float longest_edge_squared = max(len_squared(e0), max(len_squared(e1), len_squared(e2)));
- const float3 N = cross(e0, e1);
- const float distance_to_plane = fabsf(dot(N, sd->I * t))/dot(N, N);
-
- if(longest_edge_squared > distance_to_plane*distance_to_plane) {
- /* sd contains the point on the light source
- * calculate Px, the point that we're shading */
- const float3 Px = sd->P + sd->I * t;
- const float3 v0_p = V[0] - Px;
- const float3 v1_p = V[1] - Px;
- const float3 v2_p = V[2] - Px;
-
- const float3 u01 = safe_normalize(cross(v0_p, v1_p));
- const float3 u02 = safe_normalize(cross(v0_p, v2_p));
- const float3 u12 = safe_normalize(cross(v1_p, v2_p));
-
- const float alpha = fast_acosf(dot(u02, u01));
- const float beta = fast_acosf(-dot(u01, u12));
- const float gamma = fast_acosf(dot(u02, u12));
- const float solid_angle = alpha + beta + gamma - M_PI_F;
-
- /* pdf_triangles is calculated over triangle area, but we're not sampling over its area */
- if(UNLIKELY(solid_angle == 0.0f)) {
- return 0.0f;
- }
- else {
- float area = 1.0f;
- if(has_motion) {
- /* get the center frame vertices, this is what the PDF was calculated from */
- triangle_world_space_vertices(kg, sd->object, sd->prim, -1.0f, V);
- area = triangle_area(V[0], V[1], V[2]);
- }
- else {
- area = 0.5f * len(N);
- }
- const float pdf = area * kernel_data.integrator.pdf_triangles;
- return pdf / solid_angle;
- }
- }
- else {
- float pdf = triangle_light_pdf_area(kg, sd->Ng, sd->I, t);
- if(has_motion) {
- const float area = 0.5f * len(N);
- if(UNLIKELY(area == 0.0f)) {
- return 0.0f;
- }
- /* scale the PDF.
- * area = the area the sample was taken from
- * area_pre = the are from which pdf_triangles was calculated from */
- triangle_world_space_vertices(kg, sd->object, sd->prim, -1.0f, V);
- const float area_pre = triangle_area(V[0], V[1], V[2]);
- pdf = pdf * area_pre / area;
- }
- return pdf;
- }
+ /* A naive heuristic to decide between costly solid angle sampling
+ * and simple area sampling, comparing the distance to the triangle plane
+ * to the length of the edges of the triangle. */
+
+ float3 V[3];
+ bool has_motion = triangle_world_space_vertices(kg, sd->object, sd->prim, sd->time, V);
+
+ const float3 e0 = V[1] - V[0];
+ const float3 e1 = V[2] - V[0];
+ const float3 e2 = V[2] - V[1];
+ const float longest_edge_squared = max(len_squared(e0), max(len_squared(e1), len_squared(e2)));
+ const float3 N = cross(e0, e1);
+ const float distance_to_plane = fabsf(dot(N, sd->I * t)) / dot(N, N);
+
+ if (longest_edge_squared > distance_to_plane * distance_to_plane) {
+ /* sd contains the point on the light source
+ * calculate Px, the point that we're shading */
+ const float3 Px = sd->P + sd->I * t;
+ const float3 v0_p = V[0] - Px;
+ const float3 v1_p = V[1] - Px;
+ const float3 v2_p = V[2] - Px;
+
+ const float3 u01 = safe_normalize(cross(v0_p, v1_p));
+ const float3 u02 = safe_normalize(cross(v0_p, v2_p));
+ const float3 u12 = safe_normalize(cross(v1_p, v2_p));
+
+ const float alpha = fast_acosf(dot(u02, u01));
+ const float beta = fast_acosf(-dot(u01, u12));
+ const float gamma = fast_acosf(dot(u02, u12));
+ const float solid_angle = alpha + beta + gamma - M_PI_F;
+
+ /* pdf_triangles is calculated over triangle area, but we're not sampling over its area */
+ if (UNLIKELY(solid_angle == 0.0f)) {
+ return 0.0f;
+ }
+ else {
+ float area = 1.0f;
+ if (has_motion) {
+ /* get the center frame vertices, this is what the PDF was calculated from */
+ triangle_world_space_vertices(kg, sd->object, sd->prim, -1.0f, V);
+ area = triangle_area(V[0], V[1], V[2]);
+ }
+ else {
+ area = 0.5f * len(N);
+ }
+ const float pdf = area * kernel_data.integrator.pdf_triangles;
+ return pdf / solid_angle;
+ }
+ }
+ else {
+ float pdf = triangle_light_pdf_area(kg, sd->Ng, sd->I, t);
+ if (has_motion) {
+ const float area = 0.5f * len(N);
+ if (UNLIKELY(area == 0.0f)) {
+ return 0.0f;
+ }
+ /* scale the PDF.
+ * area = the area the sample was taken from
+ * area_pre = the are from which pdf_triangles was calculated from */
+ triangle_world_space_vertices(kg, sd->object, sd->prim, -1.0f, V);
+ const float area_pre = triangle_area(V[0], V[1], V[2]);
+ pdf = pdf * area_pre / area;
+ }
+ return pdf;
+ }
}
-ccl_device_forceinline void triangle_light_sample(KernelGlobals *kg, int prim, int object,
- float randu, float randv, float time, LightSample *ls, const float3 P)
+ccl_device_forceinline void triangle_light_sample(KernelGlobals *kg,
+ int prim,
+ int object,
+ float randu,
+ float randv,
+ float time,
+ LightSample *ls,
+ const float3 P)
{
- /* A naive heuristic to decide between costly solid angle sampling
- * and simple area sampling, comparing the distance to the triangle plane
- * to the length of the edges of the triangle. */
-
- float3 V[3];
- bool has_motion = triangle_world_space_vertices(kg, object, prim, time, V);
-
- const float3 e0 = V[1] - V[0];
- const float3 e1 = V[2] - V[0];
- const float3 e2 = V[2] - V[1];
- const float longest_edge_squared = max(len_squared(e0), max(len_squared(e1), len_squared(e2)));
- const float3 N0 = cross(e0, e1);
- float Nl = 0.0f;
- ls->Ng = safe_normalize_len(N0, &Nl);
- float area = 0.5f * Nl;
-
- /* flip normal if necessary */
- const int object_flag = kernel_tex_fetch(__object_flag, object);
- if(object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) {
- ls->Ng = -ls->Ng;
- }
- ls->eval_fac = 1.0f;
- ls->shader = kernel_tex_fetch(__tri_shader, prim);
- ls->object = object;
- ls->prim = prim;
- ls->lamp = LAMP_NONE;
- ls->shader |= SHADER_USE_MIS;
- ls->type = LIGHT_TRIANGLE;
-
- float distance_to_plane = fabsf(dot(N0, V[0] - P)/dot(N0, N0));
-
- if(longest_edge_squared > distance_to_plane*distance_to_plane) {
- /* see James Arvo, "Stratified Sampling of Spherical Triangles"
- * http://www.graphics.cornell.edu/pubs/1995/Arv95c.pdf */
-
- /* project the triangle to the unit sphere
- * and calculate its edges and angles */
- const float3 v0_p = V[0] - P;
- const float3 v1_p = V[1] - P;
- const float3 v2_p = V[2] - P;
-
- const float3 u01 = safe_normalize(cross(v0_p, v1_p));
- const float3 u02 = safe_normalize(cross(v0_p, v2_p));
- const float3 u12 = safe_normalize(cross(v1_p, v2_p));
-
- const float3 A = safe_normalize(v0_p);
- const float3 B = safe_normalize(v1_p);
- const float3 C = safe_normalize(v2_p);
-
- const float cos_alpha = dot(u02, u01);
- const float cos_beta = -dot(u01, u12);
- const float cos_gamma = dot(u02, u12);
-
- /* calculate dihedral angles */
- const float alpha = fast_acosf(cos_alpha);
- const float beta = fast_acosf(cos_beta);
- const float gamma = fast_acosf(cos_gamma);
- /* the area of the unit spherical triangle = solid angle */
- const float solid_angle = alpha + beta + gamma - M_PI_F;
-
- /* precompute a few things
- * these could be re-used to take several samples
- * as they are independent of randu/randv */
- const float cos_c = dot(A, B);
- const float sin_alpha = fast_sinf(alpha);
- const float product = sin_alpha * cos_c;
-
- /* Select a random sub-area of the spherical triangle
- * and calculate the third vertex C_ of that new triangle */
- const float phi = randu * solid_angle - alpha;
- float s, t;
- fast_sincosf(phi, &s, &t);
- const float u = t - cos_alpha;
- const float v = s + product;
-
- const float3 U = safe_normalize(C - dot(C, A) * A);
-
- float q = 1.0f;
- const float det = ((v * s + u * t) * sin_alpha);
- if(det != 0.0f) {
- q = ((v * t - u * s) * cos_alpha - v) / det;
- }
- const float temp = max(1.0f - q*q, 0.0f);
-
- const float3 C_ = safe_normalize(q * A + sqrtf(temp) * U);
-
- /* Finally, select a random point along the edge of the new triangle
- * That point on the spherical triangle is the sampled ray direction */
- const float z = 1.0f - randv * (1.0f - dot(C_, B));
- ls->D = z * B + safe_sqrtf(1.0f - z*z) * safe_normalize(C_ - dot(C_, B) * B);
-
- /* calculate intersection with the planar triangle */
- if(!ray_triangle_intersect(P, ls->D, FLT_MAX,
+ /* A naive heuristic to decide between costly solid angle sampling
+ * and simple area sampling, comparing the distance to the triangle plane
+ * to the length of the edges of the triangle. */
+
+ float3 V[3];
+ bool has_motion = triangle_world_space_vertices(kg, object, prim, time, V);
+
+ const float3 e0 = V[1] - V[0];
+ const float3 e1 = V[2] - V[0];
+ const float3 e2 = V[2] - V[1];
+ const float longest_edge_squared = max(len_squared(e0), max(len_squared(e1), len_squared(e2)));
+ const float3 N0 = cross(e0, e1);
+ float Nl = 0.0f;
+ ls->Ng = safe_normalize_len(N0, &Nl);
+ float area = 0.5f * Nl;
+
+ /* flip normal if necessary */
+ const int object_flag = kernel_tex_fetch(__object_flag, object);
+ if (object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) {
+ ls->Ng = -ls->Ng;
+ }
+ ls->eval_fac = 1.0f;
+ ls->shader = kernel_tex_fetch(__tri_shader, prim);
+ ls->object = object;
+ ls->prim = prim;
+ ls->lamp = LAMP_NONE;
+ ls->shader |= SHADER_USE_MIS;
+ ls->type = LIGHT_TRIANGLE;
+
+ float distance_to_plane = fabsf(dot(N0, V[0] - P) / dot(N0, N0));
+
+ if (longest_edge_squared > distance_to_plane * distance_to_plane) {
+ /* see James Arvo, "Stratified Sampling of Spherical Triangles"
+ * http://www.graphics.cornell.edu/pubs/1995/Arv95c.pdf */
+
+ /* project the triangle to the unit sphere
+ * and calculate its edges and angles */
+ const float3 v0_p = V[0] - P;
+ const float3 v1_p = V[1] - P;
+ const float3 v2_p = V[2] - P;
+
+ const float3 u01 = safe_normalize(cross(v0_p, v1_p));
+ const float3 u02 = safe_normalize(cross(v0_p, v2_p));
+ const float3 u12 = safe_normalize(cross(v1_p, v2_p));
+
+ const float3 A = safe_normalize(v0_p);
+ const float3 B = safe_normalize(v1_p);
+ const float3 C = safe_normalize(v2_p);
+
+ const float cos_alpha = dot(u02, u01);
+ const float cos_beta = -dot(u01, u12);
+ const float cos_gamma = dot(u02, u12);
+
+ /* calculate dihedral angles */
+ const float alpha = fast_acosf(cos_alpha);
+ const float beta = fast_acosf(cos_beta);
+ const float gamma = fast_acosf(cos_gamma);
+ /* the area of the unit spherical triangle = solid angle */
+ const float solid_angle = alpha + beta + gamma - M_PI_F;
+
+ /* precompute a few things
+ * these could be re-used to take several samples
+ * as they are independent of randu/randv */
+ const float cos_c = dot(A, B);
+ const float sin_alpha = fast_sinf(alpha);
+ const float product = sin_alpha * cos_c;
+
+ /* Select a random sub-area of the spherical triangle
+ * and calculate the third vertex C_ of that new triangle */
+ const float phi = randu * solid_angle - alpha;
+ float s, t;
+ fast_sincosf(phi, &s, &t);
+ const float u = t - cos_alpha;
+ const float v = s + product;
+
+ const float3 U = safe_normalize(C - dot(C, A) * A);
+
+ float q = 1.0f;
+ const float det = ((v * s + u * t) * sin_alpha);
+ if (det != 0.0f) {
+ q = ((v * t - u * s) * cos_alpha - v) / det;
+ }
+ const float temp = max(1.0f - q * q, 0.0f);
+
+ const float3 C_ = safe_normalize(q * A + sqrtf(temp) * U);
+
+ /* Finally, select a random point along the edge of the new triangle
+ * That point on the spherical triangle is the sampled ray direction */
+ const float z = 1.0f - randv * (1.0f - dot(C_, B));
+ ls->D = z * B + safe_sqrtf(1.0f - z * z) * safe_normalize(C_ - dot(C_, B) * B);
+
+ /* calculate intersection with the planar triangle */
+ if (!ray_triangle_intersect(P,
+ ls->D,
+ FLT_MAX,
#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
- (ssef*)V,
+ (ssef *)V,
#else
- V[0], V[1], V[2],
+ V[0],
+ V[1],
+ V[2],
#endif
- &ls->u, &ls->v, &ls->t)) {
- ls->pdf = 0.0f;
- return;
- }
-
- ls->P = P + ls->D * ls->t;
-
- /* pdf_triangles is calculated over triangle area, but we're sampling over solid angle */
- if(UNLIKELY(solid_angle == 0.0f)) {
- ls->pdf = 0.0f;
- return;
- }
- else {
- if(has_motion) {
- /* get the center frame vertices, this is what the PDF was calculated from */
- triangle_world_space_vertices(kg, object, prim, -1.0f, V);
- area = triangle_area(V[0], V[1], V[2]);
- }
- const float pdf = area * kernel_data.integrator.pdf_triangles;
- ls->pdf = pdf / solid_angle;
- }
- }
- else {
- /* compute random point in triangle */
- randu = sqrtf(randu);
-
- const float u = 1.0f - randu;
- const float v = randv*randu;
- const float t = 1.0f - u - v;
- ls->P = u * V[0] + v * V[1] + t * V[2];
- /* compute incoming direction, distance and pdf */
- ls->D = normalize_len(ls->P - P, &ls->t);
- ls->pdf = triangle_light_pdf_area(kg, ls->Ng, -ls->D, ls->t);
- if(has_motion && area != 0.0f) {
- /* scale the PDF.
- * area = the area the sample was taken from
- * area_pre = the are from which pdf_triangles was calculated from */
- triangle_world_space_vertices(kg, object, prim, -1.0f, V);
- const float area_pre = triangle_area(V[0], V[1], V[2]);
- ls->pdf = ls->pdf * area_pre / area;
- }
- ls->u = u;
- ls->v = v;
- }
+ &ls->u,
+ &ls->v,
+ &ls->t)) {
+ ls->pdf = 0.0f;
+ return;
+ }
+
+ ls->P = P + ls->D * ls->t;
+
+ /* pdf_triangles is calculated over triangle area, but we're sampling over solid angle */
+ if (UNLIKELY(solid_angle == 0.0f)) {
+ ls->pdf = 0.0f;
+ return;
+ }
+ else {
+ if (has_motion) {
+ /* get the center frame vertices, this is what the PDF was calculated from */
+ triangle_world_space_vertices(kg, object, prim, -1.0f, V);
+ area = triangle_area(V[0], V[1], V[2]);
+ }
+ const float pdf = area * kernel_data.integrator.pdf_triangles;
+ ls->pdf = pdf / solid_angle;
+ }
+ }
+ else {
+ /* compute random point in triangle */
+ randu = sqrtf(randu);
+
+ const float u = 1.0f - randu;
+ const float v = randv * randu;
+ const float t = 1.0f - u - v;
+ ls->P = u * V[0] + v * V[1] + t * V[2];
+ /* compute incoming direction, distance and pdf */
+ ls->D = normalize_len(ls->P - P, &ls->t);
+ ls->pdf = triangle_light_pdf_area(kg, ls->Ng, -ls->D, ls->t);
+ if (has_motion && area != 0.0f) {
+ /* scale the PDF.
+ * area = the area the sample was taken from
+ * area_pre = the are from which pdf_triangles was calculated from */
+ triangle_world_space_vertices(kg, object, prim, -1.0f, V);
+ const float area_pre = triangle_area(V[0], V[1], V[2]);
+ ls->pdf = ls->pdf * area_pre / area;
+ }
+ ls->u = u;
+ ls->v = v;
+ }
}
/* Light Distribution */
ccl_device int light_distribution_sample(KernelGlobals *kg, float *randu)
{
- /* This is basically std::upper_bound as used by pbrt, to find a point light or
- * triangle to emit from, proportional to area. a good improvement would be to
- * also sample proportional to power, though it's not so well defined with
- * arbitrary shaders. */
- int first = 0;
- int len = kernel_data.integrator.num_distribution + 1;
- float r = *randu;
-
- while(len > 0) {
- int half_len = len >> 1;
- int middle = first + half_len;
-
- if(r < kernel_tex_fetch(__light_distribution, middle).totarea) {
- len = half_len;
- }
- else {
- first = middle + 1;
- len = len - half_len - 1;
- }
- }
-
- /* Clamping should not be needed but float rounding errors seem to
- * make this fail on rare occasions. */
- int index = clamp(first-1, 0, kernel_data.integrator.num_distribution-1);
-
- /* Rescale to reuse random number. this helps the 2D samples within
- * each area light be stratified as well. */
- float distr_min = kernel_tex_fetch(__light_distribution, index).totarea;
- float distr_max = kernel_tex_fetch(__light_distribution, index+1).totarea;
- *randu = (r - distr_min)/(distr_max - distr_min);
-
- return index;
+ /* This is basically std::upper_bound as used by pbrt, to find a point light or
+ * triangle to emit from, proportional to area. a good improvement would be to
+ * also sample proportional to power, though it's not so well defined with
+ * arbitrary shaders. */
+ int first = 0;
+ int len = kernel_data.integrator.num_distribution + 1;
+ float r = *randu;
+
+ while (len > 0) {
+ int half_len = len >> 1;
+ int middle = first + half_len;
+
+ if (r < kernel_tex_fetch(__light_distribution, middle).totarea) {
+ len = half_len;
+ }
+ else {
+ first = middle + 1;
+ len = len - half_len - 1;
+ }
+ }
+
+ /* Clamping should not be needed but float rounding errors seem to
+ * make this fail on rare occasions. */
+ int index = clamp(first - 1, 0, kernel_data.integrator.num_distribution - 1);
+
+ /* Rescale to reuse random number. this helps the 2D samples within
+ * each area light be stratified as well. */
+ float distr_min = kernel_tex_fetch(__light_distribution, index).totarea;
+ float distr_max = kernel_tex_fetch(__light_distribution, index + 1).totarea;
+ *randu = (r - distr_min) / (distr_max - distr_min);
+
+ return index;
}
/* Generic Light */
ccl_device bool light_select_reached_max_bounces(KernelGlobals *kg, int index, int bounce)
{
- return (bounce > kernel_tex_fetch(__lights, index).max_bounces);
+ return (bounce > kernel_tex_fetch(__lights, index).max_bounces);
}
-ccl_device_noinline bool light_sample(KernelGlobals *kg,
- float randu,
- float randv,
- float time,
- float3 P,
- int bounce,
- LightSample *ls)
+ccl_device_noinline bool light_sample(
+ KernelGlobals *kg, float randu, float randv, float time, float3 P, int bounce, LightSample *ls)
{
- /* sample index */
- int index = light_distribution_sample(kg, &randu);
-
- /* fetch light data */
- const ccl_global KernelLightDistribution *kdistribution = &kernel_tex_fetch(__light_distribution, index);
- int prim = kdistribution->prim;
-
- if(prim >= 0) {
- int object = kdistribution->mesh_light.object_id;
- int shader_flag = kdistribution->mesh_light.shader_flag;
-
- triangle_light_sample(kg, prim, object, randu, randv, time, ls, P);
- ls->shader |= shader_flag;
- return (ls->pdf > 0.0f);
- }
- else {
- int lamp = -prim-1;
-
- if(UNLIKELY(light_select_reached_max_bounces(kg, lamp, bounce))) {
- return false;
- }
-
- return lamp_light_sample(kg, lamp, randu, randv, P, ls);
- }
+ /* sample index */
+ int index = light_distribution_sample(kg, &randu);
+
+ /* fetch light data */
+ const ccl_global KernelLightDistribution *kdistribution = &kernel_tex_fetch(__light_distribution,
+ index);
+ int prim = kdistribution->prim;
+
+ if (prim >= 0) {
+ int object = kdistribution->mesh_light.object_id;
+ int shader_flag = kdistribution->mesh_light.shader_flag;
+
+ triangle_light_sample(kg, prim, object, randu, randv, time, ls, P);
+ ls->shader |= shader_flag;
+ return (ls->pdf > 0.0f);
+ }
+ else {
+ int lamp = -prim - 1;
+
+ if (UNLIKELY(light_select_reached_max_bounces(kg, lamp, bounce))) {
+ return false;
+ }
+
+ return lamp_light_sample(kg, lamp, randu, randv, P, ls);
+ }
}
ccl_device int light_select_num_samples(KernelGlobals *kg, int index)
{
- return kernel_tex_fetch(__lights, index).samples;
+ return kernel_tex_fetch(__lights, index).samples;
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_math.h b/intern/cycles/kernel/kernel_math.h
index a8a43f3ea4a..96391db7649 100644
--- a/intern/cycles/kernel/kernel_math.h
+++ b/intern/cycles/kernel/kernel_math.h
@@ -25,4 +25,4 @@
#include "util/util_texture.h"
#include "util/util_transform.h"
-#endif /* __KERNEL_MATH_H__ */
+#endif /* __KERNEL_MATH_H__ */
diff --git a/intern/cycles/kernel/kernel_montecarlo.h b/intern/cycles/kernel/kernel_montecarlo.h
index dde93844dd3..a933be970c2 100644
--- a/intern/cycles/kernel/kernel_montecarlo.h
+++ b/intern/cycles/kernel/kernel_montecarlo.h
@@ -38,248 +38,245 @@ CCL_NAMESPACE_BEGIN
/* distribute uniform xy on [0,1] over unit disk [-1,1] */
ccl_device void to_unit_disk(float *x, float *y)
{
- float phi = M_2PI_F * (*x);
- float r = sqrtf(*y);
+ float phi = M_2PI_F * (*x);
+ float r = sqrtf(*y);
- *x = r * cosf(phi);
- *y = r * sinf(phi);
+ *x = r * cosf(phi);
+ *y = r * sinf(phi);
}
/* return an orthogonal tangent and bitangent given a normal and tangent that
* may not be exactly orthogonal */
ccl_device void make_orthonormals_tangent(const float3 N, const float3 T, float3 *a, float3 *b)
{
- *b = normalize(cross(N, T));
- *a = cross(*b, N);
+ *b = normalize(cross(N, T));
+ *a = cross(*b, N);
}
/* sample direction with cosine weighted distributed in hemisphere */
-ccl_device_inline void sample_cos_hemisphere(const float3 N,
- float randu, float randv, float3 *omega_in, float *pdf)
+ccl_device_inline void sample_cos_hemisphere(
+ const float3 N, float randu, float randv, float3 *omega_in, float *pdf)
{
- to_unit_disk(&randu, &randv);
- float costheta = sqrtf(max(1.0f - randu * randu - randv * randv, 0.0f));
- float3 T, B;
- make_orthonormals(N, &T, &B);
- *omega_in = randu * T + randv * B + costheta * N;
- *pdf = costheta *M_1_PI_F;
+ to_unit_disk(&randu, &randv);
+ float costheta = sqrtf(max(1.0f - randu * randu - randv * randv, 0.0f));
+ float3 T, B;
+ make_orthonormals(N, &T, &B);
+ *omega_in = randu * T + randv * B + costheta * N;
+ *pdf = costheta * M_1_PI_F;
}
/* sample direction uniformly distributed in hemisphere */
-ccl_device_inline void sample_uniform_hemisphere(const float3 N,
- float randu, float randv,
- float3 *omega_in, float *pdf)
+ccl_device_inline void sample_uniform_hemisphere(
+ const float3 N, float randu, float randv, float3 *omega_in, float *pdf)
{
- float z = randu;
- float r = sqrtf(max(0.0f, 1.0f - z*z));
- float phi = M_2PI_F * randv;
- float x = r * cosf(phi);
- float y = r * sinf(phi);
-
- float3 T, B;
- make_orthonormals (N, &T, &B);
- *omega_in = x * T + y * B + z * N;
- *pdf = 0.5f * M_1_PI_F;
+ float z = randu;
+ float r = sqrtf(max(0.0f, 1.0f - z * z));
+ float phi = M_2PI_F * randv;
+ float x = r * cosf(phi);
+ float y = r * sinf(phi);
+
+ float3 T, B;
+ make_orthonormals(N, &T, &B);
+ *omega_in = x * T + y * B + z * N;
+ *pdf = 0.5f * M_1_PI_F;
}
/* sample direction uniformly distributed in cone */
-ccl_device_inline void sample_uniform_cone(const float3 N, float angle,
- float randu, float randv,
- float3 *omega_in, float *pdf)
+ccl_device_inline void sample_uniform_cone(
+ const float3 N, float angle, float randu, float randv, float3 *omega_in, float *pdf)
{
- float z = cosf(angle*randu);
- float r = sqrtf(max(0.0f, 1.0f - z*z));
- float phi = M_2PI_F * randv;
- float x = r * cosf(phi);
- float y = r * sinf(phi);
-
- float3 T, B;
- make_orthonormals (N, &T, &B);
- *omega_in = x * T + y * B + z * N;
- *pdf = 0.5f * M_1_PI_F / (1.0f - cosf(angle));
+ float z = cosf(angle * randu);
+ float r = sqrtf(max(0.0f, 1.0f - z * z));
+ float phi = M_2PI_F * randv;
+ float x = r * cosf(phi);
+ float y = r * sinf(phi);
+
+ float3 T, B;
+ make_orthonormals(N, &T, &B);
+ *omega_in = x * T + y * B + z * N;
+ *pdf = 0.5f * M_1_PI_F / (1.0f - cosf(angle));
}
/* sample uniform point on the surface of a sphere */
ccl_device float3 sample_uniform_sphere(float u1, float u2)
{
- float z = 1.0f - 2.0f*u1;
- float r = sqrtf(fmaxf(0.0f, 1.0f - z*z));
- float phi = M_2PI_F*u2;
- float x = r*cosf(phi);
- float y = r*sinf(phi);
+ float z = 1.0f - 2.0f * u1;
+ float r = sqrtf(fmaxf(0.0f, 1.0f - z * z));
+ float phi = M_2PI_F * u2;
+ float x = r * cosf(phi);
+ float y = r * sinf(phi);
- return make_float3(x, y, z);
+ return make_float3(x, y, z);
}
ccl_device float balance_heuristic(float a, float b)
{
- return (a)/(a + b);
+ return (a) / (a + b);
}
ccl_device float balance_heuristic_3(float a, float b, float c)
{
- return (a)/(a + b + c);
+ return (a) / (a + b + c);
}
ccl_device float power_heuristic(float a, float b)
{
- return (a*a)/(a*a + b*b);
+ return (a * a) / (a * a + b * b);
}
ccl_device float power_heuristic_3(float a, float b, float c)
{
- return (a*a)/(a*a + b*b + c*c);
+ return (a * a) / (a * a + b * b + c * c);
}
ccl_device float max_heuristic(float a, float b)
{
- return (a > b)? 1.0f: 0.0f;
+ return (a > b) ? 1.0f : 0.0f;
}
/* distribute uniform xy on [0,1] over unit disk [-1,1], with concentric mapping
* to better preserve stratification for some RNG sequences */
ccl_device float2 concentric_sample_disk(float u1, float u2)
{
- float phi, r;
- float a = 2.0f*u1 - 1.0f;
- float b = 2.0f*u2 - 1.0f;
-
- if(a == 0.0f && b == 0.0f) {
- return make_float2(0.0f, 0.0f);
- }
- else if(a*a > b*b) {
- r = a;
- phi = M_PI_4_F * (b/a);
- }
- else {
- r = b;
- phi = M_PI_2_F - M_PI_4_F * (a/b);
- }
-
- return make_float2(r*cosf(phi), r*sinf(phi));
+ float phi, r;
+ float a = 2.0f * u1 - 1.0f;
+ float b = 2.0f * u2 - 1.0f;
+
+ if (a == 0.0f && b == 0.0f) {
+ return make_float2(0.0f, 0.0f);
+ }
+ else if (a * a > b * b) {
+ r = a;
+ phi = M_PI_4_F * (b / a);
+ }
+ else {
+ r = b;
+ phi = M_PI_2_F - M_PI_4_F * (a / b);
+ }
+
+ return make_float2(r * cosf(phi), r * sinf(phi));
}
/* sample point in unit polygon with given number of corners and rotation */
ccl_device float2 regular_polygon_sample(float corners, float rotation, float u, float v)
{
- /* sample corner number and reuse u */
- float corner = floorf(u*corners);
- u = u*corners - corner;
+ /* sample corner number and reuse u */
+ float corner = floorf(u * corners);
+ u = u * corners - corner;
- /* uniform sampled triangle weights */
- u = sqrtf(u);
- v = v*u;
- u = 1.0f - u;
+ /* uniform sampled triangle weights */
+ u = sqrtf(u);
+ v = v * u;
+ u = 1.0f - u;
- /* point in triangle */
- float angle = M_PI_F/corners;
- float2 p = make_float2((u + v)*cosf(angle), (u - v)*sinf(angle));
+ /* point in triangle */
+ float angle = M_PI_F / corners;
+ float2 p = make_float2((u + v) * cosf(angle), (u - v) * sinf(angle));
- /* rotate */
- rotation += corner*2.0f*angle;
+ /* rotate */
+ rotation += corner * 2.0f * angle;
- float cr = cosf(rotation);
- float sr = sinf(rotation);
+ float cr = cosf(rotation);
+ float sr = sinf(rotation);
- return make_float2(cr*p.x - sr*p.y, sr*p.x + cr*p.y);
+ return make_float2(cr * p.x - sr * p.y, sr * p.x + cr * p.y);
}
ccl_device float3 ensure_valid_reflection(float3 Ng, float3 I, float3 N)
{
- float3 R = 2*dot(N, I)*N - I;
-
- /* Reflection rays may always be at least as shallow as the incoming ray. */
- float threshold = min(0.9f*dot(Ng, I), 0.01f);
- if(dot(Ng, R) >= threshold) {
- return N;
- }
-
- /* Form coordinate system with Ng as the Z axis and N inside the X-Z-plane.
- * The X axis is found by normalizing the component of N that's orthogonal to Ng.
- * The Y axis isn't actually needed.
- */
- float NdotNg = dot(N, Ng);
- float3 X = normalize(N - NdotNg*Ng);
-
- /* Calculate N.z and N.x in the local coordinate system.
- *
- * The goal of this computation is to find a N' that is rotated towards Ng just enough
- * to lift R' above the threshold (here called t), therefore dot(R', Ng) = t.
- *
- * According to the standard reflection equation, this means that we want dot(2*dot(N', I)*N' - I, Ng) = t.
- *
- * Since the Z axis of our local coordinate system is Ng, dot(x, Ng) is just x.z, so we get 2*dot(N', I)*N'.z - I.z = t.
- *
- * The rotation is simple to express in the coordinate system we formed - since N lies in the X-Z-plane, we know that
- * N' will also lie in the X-Z-plane, so N'.y = 0 and therefore dot(N', I) = N'.x*I.x + N'.z*I.z .
- *
- * Furthermore, we want N' to be normalized, so N'.x = sqrt(1 - N'.z^2).
- *
- * With these simplifications, we get the final equation 2*(sqrt(1 - N'.z^2)*I.x + N'.z*I.z)*N'.z - I.z = t.
- *
- * The only unknown here is N'.z, so we can solve for that.
- *
- * The equation has four solutions in general:
- *
- * N'.z = +-sqrt(0.5*(+-sqrt(I.x^2*(I.x^2 + I.z^2 - t^2)) + t*I.z + I.x^2 + I.z^2)/(I.x^2 + I.z^2))
- * We can simplify this expression a bit by grouping terms:
- *
- * a = I.x^2 + I.z^2
- * b = sqrt(I.x^2 * (a - t^2))
- * c = I.z*t + a
- * N'.z = +-sqrt(0.5*(+-b + c)/a)
- *
- * Two solutions can immediately be discarded because they're negative so N' would lie in the lower hemisphere.
- */
- float Ix = dot(I, X), Iz = dot(I, Ng);
- float Ix2 = sqr(Ix), Iz2 = sqr(Iz);
- float a = Ix2 + Iz2;
-
- float b = safe_sqrtf(Ix2*(a - sqr(threshold)));
- float c = Iz*threshold + a;
-
- /* Evaluate both solutions.
- * In many cases one can be immediately discarded (if N'.z would be imaginary or larger than one), so check for that first.
- * If no option is viable (might happen in extreme cases like N being in the wrong hemisphere), give up and return Ng. */
- float fac = 0.5f/a;
- float N1_z2 = fac*(b+c), N2_z2 = fac*(-b+c);
- bool valid1 = (N1_z2 > 1e-5f) && (N1_z2 <= (1.0f + 1e-5f));
- bool valid2 = (N2_z2 > 1e-5f) && (N2_z2 <= (1.0f + 1e-5f));
-
- float2 N_new;
- if(valid1 && valid2) {
- /* If both are possible, do the expensive reflection-based check. */
- float2 N1 = make_float2(safe_sqrtf(1.0f - N1_z2), safe_sqrtf(N1_z2));
- float2 N2 = make_float2(safe_sqrtf(1.0f - N2_z2), safe_sqrtf(N2_z2));
-
- float R1 = 2*(N1.x*Ix + N1.y*Iz)*N1.y - Iz;
- float R2 = 2*(N2.x*Ix + N2.y*Iz)*N2.y - Iz;
-
- valid1 = (R1 >= 1e-5f);
- valid2 = (R2 >= 1e-5f);
- if(valid1 && valid2) {
- /* If both solutions are valid, return the one with the shallower reflection since it will be closer to the input
- * (if the original reflection wasn't shallow, we would not be in this part of the function). */
- N_new = (R1 < R2)? N1 : N2;
- }
- else {
- /* If only one reflection is valid (= positive), pick that one. */
- N_new = (R1 > R2)? N1 : N2;
- }
-
- }
- else if(valid1 || valid2) {
- /* Only one solution passes the N'.z criterium, so pick that one. */
- float Nz2 = valid1? N1_z2 : N2_z2;
- N_new = make_float2(safe_sqrtf(1.0f - Nz2), safe_sqrtf(Nz2));
- }
- else {
- return Ng;
- }
-
- return N_new.x*X + N_new.y*Ng;
+ float3 R = 2 * dot(N, I) * N - I;
+
+ /* Reflection rays may always be at least as shallow as the incoming ray. */
+ float threshold = min(0.9f * dot(Ng, I), 0.01f);
+ if (dot(Ng, R) >= threshold) {
+ return N;
+ }
+
+ /* Form coordinate system with Ng as the Z axis and N inside the X-Z-plane.
+ * The X axis is found by normalizing the component of N that's orthogonal to Ng.
+ * The Y axis isn't actually needed.
+ */
+ float NdotNg = dot(N, Ng);
+ float3 X = normalize(N - NdotNg * Ng);
+
+ /* Calculate N.z and N.x in the local coordinate system.
+ *
+ * The goal of this computation is to find a N' that is rotated towards Ng just enough
+ * to lift R' above the threshold (here called t), therefore dot(R', Ng) = t.
+ *
+ * According to the standard reflection equation, this means that we want dot(2*dot(N', I)*N' - I, Ng) = t.
+ *
+ * Since the Z axis of our local coordinate system is Ng, dot(x, Ng) is just x.z, so we get 2*dot(N', I)*N'.z - I.z = t.
+ *
+ * The rotation is simple to express in the coordinate system we formed - since N lies in the X-Z-plane, we know that
+ * N' will also lie in the X-Z-plane, so N'.y = 0 and therefore dot(N', I) = N'.x*I.x + N'.z*I.z .
+ *
+ * Furthermore, we want N' to be normalized, so N'.x = sqrt(1 - N'.z^2).
+ *
+ * With these simplifications, we get the final equation 2*(sqrt(1 - N'.z^2)*I.x + N'.z*I.z)*N'.z - I.z = t.
+ *
+ * The only unknown here is N'.z, so we can solve for that.
+ *
+ * The equation has four solutions in general:
+ *
+ * N'.z = +-sqrt(0.5*(+-sqrt(I.x^2*(I.x^2 + I.z^2 - t^2)) + t*I.z + I.x^2 + I.z^2)/(I.x^2 + I.z^2))
+ * We can simplify this expression a bit by grouping terms:
+ *
+ * a = I.x^2 + I.z^2
+ * b = sqrt(I.x^2 * (a - t^2))
+ * c = I.z*t + a
+ * N'.z = +-sqrt(0.5*(+-b + c)/a)
+ *
+ * Two solutions can immediately be discarded because they're negative so N' would lie in the lower hemisphere.
+ */
+ float Ix = dot(I, X), Iz = dot(I, Ng);
+ float Ix2 = sqr(Ix), Iz2 = sqr(Iz);
+ float a = Ix2 + Iz2;
+
+ float b = safe_sqrtf(Ix2 * (a - sqr(threshold)));
+ float c = Iz * threshold + a;
+
+ /* Evaluate both solutions.
+ * In many cases one can be immediately discarded (if N'.z would be imaginary or larger than one), so check for that first.
+ * If no option is viable (might happen in extreme cases like N being in the wrong hemisphere), give up and return Ng. */
+ float fac = 0.5f / a;
+ float N1_z2 = fac * (b + c), N2_z2 = fac * (-b + c);
+ bool valid1 = (N1_z2 > 1e-5f) && (N1_z2 <= (1.0f + 1e-5f));
+ bool valid2 = (N2_z2 > 1e-5f) && (N2_z2 <= (1.0f + 1e-5f));
+
+ float2 N_new;
+ if (valid1 && valid2) {
+ /* If both are possible, do the expensive reflection-based check. */
+ float2 N1 = make_float2(safe_sqrtf(1.0f - N1_z2), safe_sqrtf(N1_z2));
+ float2 N2 = make_float2(safe_sqrtf(1.0f - N2_z2), safe_sqrtf(N2_z2));
+
+ float R1 = 2 * (N1.x * Ix + N1.y * Iz) * N1.y - Iz;
+ float R2 = 2 * (N2.x * Ix + N2.y * Iz) * N2.y - Iz;
+
+ valid1 = (R1 >= 1e-5f);
+ valid2 = (R2 >= 1e-5f);
+ if (valid1 && valid2) {
+ /* If both solutions are valid, return the one with the shallower reflection since it will be closer to the input
+ * (if the original reflection wasn't shallow, we would not be in this part of the function). */
+ N_new = (R1 < R2) ? N1 : N2;
+ }
+ else {
+ /* If only one reflection is valid (= positive), pick that one. */
+ N_new = (R1 > R2) ? N1 : N2;
+ }
+ }
+ else if (valid1 || valid2) {
+ /* Only one solution passes the N'.z criterium, so pick that one. */
+ float Nz2 = valid1 ? N1_z2 : N2_z2;
+ N_new = make_float2(safe_sqrtf(1.0f - Nz2), safe_sqrtf(Nz2));
+ }
+ else {
+ return Ng;
+ }
+
+ return N_new.x * X + N_new.y * Ng;
}
CCL_NAMESPACE_END
-#endif /* __KERNEL_MONTECARLO_CL__ */
+#endif /* __KERNEL_MONTECARLO_CL__ */
diff --git a/intern/cycles/kernel/kernel_passes.h b/intern/cycles/kernel/kernel_passes.h
index 08e9db05c39..462ec037ee7 100644
--- a/intern/cycles/kernel/kernel_passes.h
+++ b/intern/cycles/kernel/kernel_passes.h
@@ -15,7 +15,7 @@
*/
#if defined(__SPLIT_KERNEL__) || defined(__KERNEL_CUDA__)
-#define __ATOMIC_PASS_WRITE__
+# define __ATOMIC_PASS_WRITE__
#endif
#include "kernel/kernel_id_passes.h"
@@ -24,56 +24,56 @@ CCL_NAMESPACE_BEGIN
ccl_device_inline void kernel_write_pass_float(ccl_global float *buffer, float value)
{
- ccl_global float *buf = buffer;
+ ccl_global float *buf = buffer;
#ifdef __ATOMIC_PASS_WRITE__
- atomic_add_and_fetch_float(buf, value);
+ atomic_add_and_fetch_float(buf, value);
#else
- *buf += value;
+ *buf += value;
#endif
}
ccl_device_inline void kernel_write_pass_float3(ccl_global float *buffer, float3 value)
{
#ifdef __ATOMIC_PASS_WRITE__
- ccl_global float *buf_x = buffer + 0;
- ccl_global float *buf_y = buffer + 1;
- ccl_global float *buf_z = buffer + 2;
+ ccl_global float *buf_x = buffer + 0;
+ ccl_global float *buf_y = buffer + 1;
+ ccl_global float *buf_z = buffer + 2;
- atomic_add_and_fetch_float(buf_x, value.x);
- atomic_add_and_fetch_float(buf_y, value.y);
- atomic_add_and_fetch_float(buf_z, value.z);
+ atomic_add_and_fetch_float(buf_x, value.x);
+ atomic_add_and_fetch_float(buf_y, value.y);
+ atomic_add_and_fetch_float(buf_z, value.z);
#else
- ccl_global float3 *buf = (ccl_global float3*)buffer;
- *buf += value;
+ ccl_global float3 *buf = (ccl_global float3 *)buffer;
+ *buf += value;
#endif
}
ccl_device_inline void kernel_write_pass_float4(ccl_global float *buffer, float4 value)
{
#ifdef __ATOMIC_PASS_WRITE__
- ccl_global float *buf_x = buffer + 0;
- ccl_global float *buf_y = buffer + 1;
- ccl_global float *buf_z = buffer + 2;
- ccl_global float *buf_w = buffer + 3;
-
- atomic_add_and_fetch_float(buf_x, value.x);
- atomic_add_and_fetch_float(buf_y, value.y);
- atomic_add_and_fetch_float(buf_z, value.z);
- atomic_add_and_fetch_float(buf_w, value.w);
+ ccl_global float *buf_x = buffer + 0;
+ ccl_global float *buf_y = buffer + 1;
+ ccl_global float *buf_z = buffer + 2;
+ ccl_global float *buf_w = buffer + 3;
+
+ atomic_add_and_fetch_float(buf_x, value.x);
+ atomic_add_and_fetch_float(buf_y, value.y);
+ atomic_add_and_fetch_float(buf_z, value.z);
+ atomic_add_and_fetch_float(buf_w, value.w);
#else
- ccl_global float4 *buf = (ccl_global float4*)buffer;
- *buf += value;
+ ccl_global float4 *buf = (ccl_global float4 *)buffer;
+ *buf += value;
#endif
}
#ifdef __DENOISING_FEATURES__
ccl_device_inline void kernel_write_pass_float_variance(ccl_global float *buffer, float value)
{
- kernel_write_pass_float(buffer, value);
+ kernel_write_pass_float(buffer, value);
- /* The online one-pass variance update that's used for the megakernel can't easily be implemented
- * with atomics, so for the split kernel the E[x^2] - 1/N * (E[x])^2 fallback is used. */
- kernel_write_pass_float(buffer+1, value*value);
+ /* The online one-pass variance update that's used for the megakernel can't easily be implemented
+ * with atomics, so for the split kernel the E[x^2] - 1/N * (E[x])^2 fallback is used. */
+ kernel_write_pass_float(buffer + 1, value * value);
}
# ifdef __ATOMIC_PASS_WRITE__
@@ -81,36 +81,39 @@ ccl_device_inline void kernel_write_pass_float_variance(ccl_global float *buffer
# else
ccl_device_inline void kernel_write_pass_float3_unaligned(ccl_global float *buffer, float3 value)
{
- buffer[0] += value.x;
- buffer[1] += value.y;
- buffer[2] += value.z;
+ buffer[0] += value.x;
+ buffer[1] += value.y;
+ buffer[2] += value.z;
}
# endif
ccl_device_inline void kernel_write_pass_float3_variance(ccl_global float *buffer, float3 value)
{
- kernel_write_pass_float3_unaligned(buffer, value);
- kernel_write_pass_float3_unaligned(buffer+3, value*value);
+ kernel_write_pass_float3_unaligned(buffer, value);
+ kernel_write_pass_float3_unaligned(buffer + 3, value * value);
}
-ccl_device_inline void kernel_write_denoising_shadow(KernelGlobals *kg, ccl_global float *buffer,
- int sample, float path_total, float path_total_shaded)
+ccl_device_inline void kernel_write_denoising_shadow(KernelGlobals *kg,
+ ccl_global float *buffer,
+ int sample,
+ float path_total,
+ float path_total_shaded)
{
- if(kernel_data.film.pass_denoising_data == 0)
- return;
+ if (kernel_data.film.pass_denoising_data == 0)
+ return;
- buffer += (sample & 1)? DENOISING_PASS_SHADOW_B : DENOISING_PASS_SHADOW_A;
+ buffer += (sample & 1) ? DENOISING_PASS_SHADOW_B : DENOISING_PASS_SHADOW_A;
- path_total = ensure_finite(path_total);
- path_total_shaded = ensure_finite(path_total_shaded);
+ path_total = ensure_finite(path_total);
+ path_total_shaded = ensure_finite(path_total_shaded);
- kernel_write_pass_float(buffer, path_total);
- kernel_write_pass_float(buffer+1, path_total_shaded);
+ kernel_write_pass_float(buffer, path_total);
+ kernel_write_pass_float(buffer + 1, path_total_shaded);
- float value = path_total_shaded / max(path_total, 1e-7f);
- kernel_write_pass_float(buffer+2, value*value);
+ float value = path_total_shaded / max(path_total, 1e-7f);
+ kernel_write_pass_float(buffer + 2, value * value);
}
-#endif /* __DENOISING_FEATURES__ */
+#endif /* __DENOISING_FEATURES__ */
ccl_device_inline void kernel_update_denoising_features(KernelGlobals *kg,
ShaderData *sd,
@@ -118,52 +121,52 @@ ccl_device_inline void kernel_update_denoising_features(KernelGlobals *kg,
PathRadiance *L)
{
#ifdef __DENOISING_FEATURES__
- if(state->denoising_feature_weight == 0.0f) {
- return;
- }
-
- L->denoising_depth += ensure_finite(state->denoising_feature_weight * sd->ray_length);
-
- /* Skip implicitly transparent surfaces. */
- if(sd->flag & SD_HAS_ONLY_VOLUME) {
- return;
- }
-
- float3 normal = make_float3(0.0f, 0.0f, 0.0f);
- float3 albedo = make_float3(0.0f, 0.0f, 0.0f);
- float sum_weight = 0.0f, sum_nonspecular_weight = 0.0f;
-
- for(int i = 0; i < sd->num_closure; i++) {
- ShaderClosure *sc = &sd->closure[i];
-
- if(!CLOSURE_IS_BSDF_OR_BSSRDF(sc->type))
- continue;
-
- /* All closures contribute to the normal feature, but only diffuse-like ones to the albedo. */
- normal += sc->N * sc->sample_weight;
- sum_weight += sc->sample_weight;
- if(bsdf_get_specular_roughness_squared(sc) > sqr(0.075f)) {
- albedo += sc->weight;
- sum_nonspecular_weight += sc->sample_weight;
- }
- }
-
- /* Wait for next bounce if 75% or more sample weight belongs to specular-like closures. */
- if((sum_weight == 0.0f) || (sum_nonspecular_weight*4.0f > sum_weight)) {
- if(sum_weight != 0.0f) {
- normal /= sum_weight;
- }
- L->denoising_normal += ensure_finite3(state->denoising_feature_weight * normal);
- L->denoising_albedo += ensure_finite3(state->denoising_feature_weight * albedo);
-
- state->denoising_feature_weight = 0.0f;
- }
+ if (state->denoising_feature_weight == 0.0f) {
+ return;
+ }
+
+ L->denoising_depth += ensure_finite(state->denoising_feature_weight * sd->ray_length);
+
+ /* Skip implicitly transparent surfaces. */
+ if (sd->flag & SD_HAS_ONLY_VOLUME) {
+ return;
+ }
+
+ float3 normal = make_float3(0.0f, 0.0f, 0.0f);
+ float3 albedo = make_float3(0.0f, 0.0f, 0.0f);
+ float sum_weight = 0.0f, sum_nonspecular_weight = 0.0f;
+
+ for (int i = 0; i < sd->num_closure; i++) {
+ ShaderClosure *sc = &sd->closure[i];
+
+ if (!CLOSURE_IS_BSDF_OR_BSSRDF(sc->type))
+ continue;
+
+ /* All closures contribute to the normal feature, but only diffuse-like ones to the albedo. */
+ normal += sc->N * sc->sample_weight;
+ sum_weight += sc->sample_weight;
+ if (bsdf_get_specular_roughness_squared(sc) > sqr(0.075f)) {
+ albedo += sc->weight;
+ sum_nonspecular_weight += sc->sample_weight;
+ }
+ }
+
+ /* Wait for next bounce if 75% or more sample weight belongs to specular-like closures. */
+ if ((sum_weight == 0.0f) || (sum_nonspecular_weight * 4.0f > sum_weight)) {
+ if (sum_weight != 0.0f) {
+ normal /= sum_weight;
+ }
+ L->denoising_normal += ensure_finite3(state->denoising_feature_weight * normal);
+ L->denoising_albedo += ensure_finite3(state->denoising_feature_weight * albedo);
+
+ state->denoising_feature_weight = 0.0f;
+ }
#else
- (void) kg;
- (void) sd;
- (void) state;
- (void) L;
-#endif /* __DENOISING_FEATURES__ */
+ (void)kg;
+ (void)sd;
+ (void)state;
+ (void)L;
+#endif /* __DENOISING_FEATURES__ */
}
#ifdef __KERNEL_DEBUG__
@@ -171,203 +174,221 @@ ccl_device_inline void kernel_write_debug_passes(KernelGlobals *kg,
ccl_global float *buffer,
PathRadiance *L)
{
- int flag = kernel_data.film.pass_flag;
- if(flag & PASSMASK(BVH_TRAVERSED_NODES)) {
- kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_traversed_nodes,
- L->debug_data.num_bvh_traversed_nodes);
- }
- if(flag & PASSMASK(BVH_TRAVERSED_INSTANCES)) {
- kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_traversed_instances,
- L->debug_data.num_bvh_traversed_instances);
- }
- if(flag & PASSMASK(BVH_INTERSECTIONS)) {
- kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_intersections,
- L->debug_data.num_bvh_intersections);
- }
- if(flag & PASSMASK(RAY_BOUNCES)) {
- kernel_write_pass_float(buffer + kernel_data.film.pass_ray_bounces,
- L->debug_data.num_ray_bounces);
- }
+ int flag = kernel_data.film.pass_flag;
+ if (flag & PASSMASK(BVH_TRAVERSED_NODES)) {
+ kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_traversed_nodes,
+ L->debug_data.num_bvh_traversed_nodes);
+ }
+ if (flag & PASSMASK(BVH_TRAVERSED_INSTANCES)) {
+ kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_traversed_instances,
+ L->debug_data.num_bvh_traversed_instances);
+ }
+ if (flag & PASSMASK(BVH_INTERSECTIONS)) {
+ kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_intersections,
+ L->debug_data.num_bvh_intersections);
+ }
+ if (flag & PASSMASK(RAY_BOUNCES)) {
+ kernel_write_pass_float(buffer + kernel_data.film.pass_ray_bounces,
+ L->debug_data.num_ray_bounces);
+ }
}
-#endif /* __KERNEL_DEBUG__ */
+#endif /* __KERNEL_DEBUG__ */
#ifdef __KERNEL_CPU__
-#define WRITE_ID_SLOT(buffer, depth, id, matte_weight, name) kernel_write_id_pass_cpu(buffer, depth * 2, id, matte_weight, kg->coverage_##name)
-ccl_device_inline size_t kernel_write_id_pass_cpu(float *buffer, size_t depth, float id, float matte_weight, CoverageMap *map)
+# define WRITE_ID_SLOT(buffer, depth, id, matte_weight, name) \
+ kernel_write_id_pass_cpu(buffer, depth * 2, id, matte_weight, kg->coverage_##name)
+ccl_device_inline size_t kernel_write_id_pass_cpu(
+ float *buffer, size_t depth, float id, float matte_weight, CoverageMap *map)
{
- if(map) {
- (*map)[id] += matte_weight;
- return 0;
- }
-#else /* __KERNEL_CPU__ */
-#define WRITE_ID_SLOT(buffer, depth, id, matte_weight, name) kernel_write_id_slots_gpu(buffer, depth * 2, id, matte_weight)
-ccl_device_inline size_t kernel_write_id_slots_gpu(ccl_global float *buffer, size_t depth, float id, float matte_weight)
+ if (map) {
+ (*map)[id] += matte_weight;
+ return 0;
+ }
+#else /* __KERNEL_CPU__ */
+# define WRITE_ID_SLOT(buffer, depth, id, matte_weight, name) \
+ kernel_write_id_slots_gpu(buffer, depth * 2, id, matte_weight)
+ccl_device_inline size_t kernel_write_id_slots_gpu(ccl_global float *buffer,
+ size_t depth,
+ float id,
+ float matte_weight)
{
-#endif /* __KERNEL_CPU__ */
- kernel_write_id_slots(buffer, depth, id, matte_weight);
- return depth * 2;
+#endif /* __KERNEL_CPU__ */
+ kernel_write_id_slots(buffer, depth, id, matte_weight);
+ return depth * 2;
}
-ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg, ccl_global float *buffer, PathRadiance *L,
- ShaderData *sd, ccl_addr_space PathState *state, float3 throughput)
+ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg,
+ ccl_global float *buffer,
+ PathRadiance *L,
+ ShaderData *sd,
+ ccl_addr_space PathState *state,
+ float3 throughput)
{
#ifdef __PASSES__
- int path_flag = state->flag;
-
- if(!(path_flag & PATH_RAY_CAMERA))
- return;
-
- int flag = kernel_data.film.pass_flag;
- int light_flag = kernel_data.film.light_pass_flag;
-
- if(!((flag | light_flag) & PASS_ANY))
- return;
-
- if(!(path_flag & PATH_RAY_SINGLE_PASS_DONE)) {
- if(!(sd->flag & SD_TRANSPARENT) ||
- kernel_data.film.pass_alpha_threshold == 0.0f ||
- average(shader_bsdf_alpha(kg, sd)) >= kernel_data.film.pass_alpha_threshold)
- {
- if(state->sample == 0) {
- if(flag & PASSMASK(DEPTH)) {
- float depth = camera_distance(kg, sd->P);
- kernel_write_pass_float(buffer + kernel_data.film.pass_depth, depth);
- }
- if(flag & PASSMASK(OBJECT_ID)) {
- float id = object_pass_id(kg, sd->object);
- kernel_write_pass_float(buffer + kernel_data.film.pass_object_id, id);
- }
- if(flag & PASSMASK(MATERIAL_ID)) {
- float id = shader_pass_id(kg, sd);
- kernel_write_pass_float(buffer + kernel_data.film.pass_material_id, id);
- }
- }
-
- if(flag & PASSMASK(NORMAL)) {
- float3 normal = shader_bsdf_average_normal(kg, sd);
- kernel_write_pass_float3(buffer + kernel_data.film.pass_normal, normal);
- }
- if(flag & PASSMASK(UV)) {
- float3 uv = primitive_uv(kg, sd);
- kernel_write_pass_float3(buffer + kernel_data.film.pass_uv, uv);
- }
- if(flag & PASSMASK(MOTION)) {
- float4 speed = primitive_motion_vector(kg, sd);
- kernel_write_pass_float4(buffer + kernel_data.film.pass_motion, speed);
- kernel_write_pass_float(buffer + kernel_data.film.pass_motion_weight, 1.0f);
- }
-
- state->flag |= PATH_RAY_SINGLE_PASS_DONE;
- }
- }
-
- if(kernel_data.film.cryptomatte_passes) {
- const float matte_weight = average(throughput) * (1.0f - average(shader_bsdf_transparency(kg, sd)));
- if(matte_weight > 0.0f) {
- ccl_global float *cryptomatte_buffer = buffer + kernel_data.film.pass_cryptomatte;
- if(kernel_data.film.cryptomatte_passes & CRYPT_OBJECT) {
- float id = object_cryptomatte_id(kg, sd->object);
- cryptomatte_buffer += WRITE_ID_SLOT(cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight, object);
- }
- if(kernel_data.film.cryptomatte_passes & CRYPT_MATERIAL) {
- float id = shader_cryptomatte_id(kg, sd->shader);
- cryptomatte_buffer += WRITE_ID_SLOT(cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight, material);
- }
- if(kernel_data.film.cryptomatte_passes & CRYPT_ASSET) {
- float id = object_cryptomatte_asset_id(kg, sd->object);
- cryptomatte_buffer += WRITE_ID_SLOT(cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight, asset);
- }
- }
- }
-
-
- if(light_flag & PASSMASK_COMPONENT(DIFFUSE))
- L->color_diffuse += shader_bsdf_diffuse(kg, sd)*throughput;
- if(light_flag & PASSMASK_COMPONENT(GLOSSY))
- L->color_glossy += shader_bsdf_glossy(kg, sd)*throughput;
- if(light_flag & PASSMASK_COMPONENT(TRANSMISSION))
- L->color_transmission += shader_bsdf_transmission(kg, sd)*throughput;
- if(light_flag & PASSMASK_COMPONENT(SUBSURFACE))
- L->color_subsurface += shader_bsdf_subsurface(kg, sd)*throughput;
-
- if(light_flag & PASSMASK(MIST)) {
- /* bring depth into 0..1 range */
- float mist_start = kernel_data.film.mist_start;
- float mist_inv_depth = kernel_data.film.mist_inv_depth;
-
- float depth = camera_distance(kg, sd->P);
- float mist = saturate((depth - mist_start)*mist_inv_depth);
-
- /* falloff */
- float mist_falloff = kernel_data.film.mist_falloff;
-
- if(mist_falloff == 1.0f)
- ;
- else if(mist_falloff == 2.0f)
- mist = mist*mist;
- else if(mist_falloff == 0.5f)
- mist = sqrtf(mist);
- else
- mist = powf(mist, mist_falloff);
-
- /* modulate by transparency */
- float3 alpha = shader_bsdf_alpha(kg, sd);
- L->mist += (1.0f - mist)*average(throughput*alpha);
- }
+ int path_flag = state->flag;
+
+ if (!(path_flag & PATH_RAY_CAMERA))
+ return;
+
+ int flag = kernel_data.film.pass_flag;
+ int light_flag = kernel_data.film.light_pass_flag;
+
+ if (!((flag | light_flag) & PASS_ANY))
+ return;
+
+ if (!(path_flag & PATH_RAY_SINGLE_PASS_DONE)) {
+ if (!(sd->flag & SD_TRANSPARENT) || kernel_data.film.pass_alpha_threshold == 0.0f ||
+ average(shader_bsdf_alpha(kg, sd)) >= kernel_data.film.pass_alpha_threshold) {
+ if (state->sample == 0) {
+ if (flag & PASSMASK(DEPTH)) {
+ float depth = camera_distance(kg, sd->P);
+ kernel_write_pass_float(buffer + kernel_data.film.pass_depth, depth);
+ }
+ if (flag & PASSMASK(OBJECT_ID)) {
+ float id = object_pass_id(kg, sd->object);
+ kernel_write_pass_float(buffer + kernel_data.film.pass_object_id, id);
+ }
+ if (flag & PASSMASK(MATERIAL_ID)) {
+ float id = shader_pass_id(kg, sd);
+ kernel_write_pass_float(buffer + kernel_data.film.pass_material_id, id);
+ }
+ }
+
+ if (flag & PASSMASK(NORMAL)) {
+ float3 normal = shader_bsdf_average_normal(kg, sd);
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_normal, normal);
+ }
+ if (flag & PASSMASK(UV)) {
+ float3 uv = primitive_uv(kg, sd);
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_uv, uv);
+ }
+ if (flag & PASSMASK(MOTION)) {
+ float4 speed = primitive_motion_vector(kg, sd);
+ kernel_write_pass_float4(buffer + kernel_data.film.pass_motion, speed);
+ kernel_write_pass_float(buffer + kernel_data.film.pass_motion_weight, 1.0f);
+ }
+
+ state->flag |= PATH_RAY_SINGLE_PASS_DONE;
+ }
+ }
+
+ if (kernel_data.film.cryptomatte_passes) {
+ const float matte_weight = average(throughput) *
+ (1.0f - average(shader_bsdf_transparency(kg, sd)));
+ if (matte_weight > 0.0f) {
+ ccl_global float *cryptomatte_buffer = buffer + kernel_data.film.pass_cryptomatte;
+ if (kernel_data.film.cryptomatte_passes & CRYPT_OBJECT) {
+ float id = object_cryptomatte_id(kg, sd->object);
+ cryptomatte_buffer += WRITE_ID_SLOT(
+ cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight, object);
+ }
+ if (kernel_data.film.cryptomatte_passes & CRYPT_MATERIAL) {
+ float id = shader_cryptomatte_id(kg, sd->shader);
+ cryptomatte_buffer += WRITE_ID_SLOT(
+ cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight, material);
+ }
+ if (kernel_data.film.cryptomatte_passes & CRYPT_ASSET) {
+ float id = object_cryptomatte_asset_id(kg, sd->object);
+ cryptomatte_buffer += WRITE_ID_SLOT(
+ cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight, asset);
+ }
+ }
+ }
+
+ if (light_flag & PASSMASK_COMPONENT(DIFFUSE))
+ L->color_diffuse += shader_bsdf_diffuse(kg, sd) * throughput;
+ if (light_flag & PASSMASK_COMPONENT(GLOSSY))
+ L->color_glossy += shader_bsdf_glossy(kg, sd) * throughput;
+ if (light_flag & PASSMASK_COMPONENT(TRANSMISSION))
+ L->color_transmission += shader_bsdf_transmission(kg, sd) * throughput;
+ if (light_flag & PASSMASK_COMPONENT(SUBSURFACE))
+ L->color_subsurface += shader_bsdf_subsurface(kg, sd) * throughput;
+
+ if (light_flag & PASSMASK(MIST)) {
+ /* bring depth into 0..1 range */
+ float mist_start = kernel_data.film.mist_start;
+ float mist_inv_depth = kernel_data.film.mist_inv_depth;
+
+ float depth = camera_distance(kg, sd->P);
+ float mist = saturate((depth - mist_start) * mist_inv_depth);
+
+ /* falloff */
+ float mist_falloff = kernel_data.film.mist_falloff;
+
+ if (mist_falloff == 1.0f)
+ ;
+ else if (mist_falloff == 2.0f)
+ mist = mist * mist;
+ else if (mist_falloff == 0.5f)
+ mist = sqrtf(mist);
+ else
+ mist = powf(mist, mist_falloff);
+
+ /* modulate by transparency */
+ float3 alpha = shader_bsdf_alpha(kg, sd);
+ L->mist += (1.0f - mist) * average(throughput * alpha);
+ }
#endif
}
-ccl_device_inline void kernel_write_light_passes(KernelGlobals *kg, ccl_global float *buffer, PathRadiance *L)
+ccl_device_inline void kernel_write_light_passes(KernelGlobals *kg,
+ ccl_global float *buffer,
+ PathRadiance *L)
{
#ifdef __PASSES__
- int light_flag = kernel_data.film.light_pass_flag;
-
- if(!kernel_data.film.use_light_pass)
- return;
-
- if(light_flag & PASSMASK(DIFFUSE_INDIRECT))
- kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_indirect, L->indirect_diffuse);
- if(light_flag & PASSMASK(GLOSSY_INDIRECT))
- kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_indirect, L->indirect_glossy);
- if(light_flag & PASSMASK(TRANSMISSION_INDIRECT))
- kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_indirect, L->indirect_transmission);
- if(light_flag & PASSMASK(SUBSURFACE_INDIRECT))
- kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_indirect, L->indirect_subsurface);
- if(light_flag & PASSMASK(VOLUME_INDIRECT))
- kernel_write_pass_float3(buffer + kernel_data.film.pass_volume_indirect, L->indirect_scatter);
- if(light_flag & PASSMASK(DIFFUSE_DIRECT))
- kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_direct, L->direct_diffuse);
- if(light_flag & PASSMASK(GLOSSY_DIRECT))
- kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_direct, L->direct_glossy);
- if(light_flag & PASSMASK(TRANSMISSION_DIRECT))
- kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_direct, L->direct_transmission);
- if(light_flag & PASSMASK(SUBSURFACE_DIRECT))
- kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_direct, L->direct_subsurface);
- if(light_flag & PASSMASK(VOLUME_DIRECT))
- kernel_write_pass_float3(buffer + kernel_data.film.pass_volume_direct, L->direct_scatter);
-
- if(light_flag & PASSMASK(EMISSION))
- kernel_write_pass_float3(buffer + kernel_data.film.pass_emission, L->emission);
- if(light_flag & PASSMASK(BACKGROUND))
- kernel_write_pass_float3(buffer + kernel_data.film.pass_background, L->background);
- if(light_flag & PASSMASK(AO))
- kernel_write_pass_float3(buffer + kernel_data.film.pass_ao, L->ao);
-
- if(light_flag & PASSMASK(DIFFUSE_COLOR))
- kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_color, L->color_diffuse);
- if(light_flag & PASSMASK(GLOSSY_COLOR))
- kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_color, L->color_glossy);
- if(light_flag & PASSMASK(TRANSMISSION_COLOR))
- kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_color, L->color_transmission);
- if(light_flag & PASSMASK(SUBSURFACE_COLOR))
- kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_color, L->color_subsurface);
- if(light_flag & PASSMASK(SHADOW)) {
- float4 shadow = L->shadow;
- shadow.w = kernel_data.film.pass_shadow_scale;
- kernel_write_pass_float4(buffer + kernel_data.film.pass_shadow, shadow);
- }
- if(light_flag & PASSMASK(MIST))
- kernel_write_pass_float(buffer + kernel_data.film.pass_mist, 1.0f - L->mist);
+ int light_flag = kernel_data.film.light_pass_flag;
+
+ if (!kernel_data.film.use_light_pass)
+ return;
+
+ if (light_flag & PASSMASK(DIFFUSE_INDIRECT))
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_indirect, L->indirect_diffuse);
+ if (light_flag & PASSMASK(GLOSSY_INDIRECT))
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_indirect, L->indirect_glossy);
+ if (light_flag & PASSMASK(TRANSMISSION_INDIRECT))
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_indirect,
+ L->indirect_transmission);
+ if (light_flag & PASSMASK(SUBSURFACE_INDIRECT))
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_indirect,
+ L->indirect_subsurface);
+ if (light_flag & PASSMASK(VOLUME_INDIRECT))
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_volume_indirect, L->indirect_scatter);
+ if (light_flag & PASSMASK(DIFFUSE_DIRECT))
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_direct, L->direct_diffuse);
+ if (light_flag & PASSMASK(GLOSSY_DIRECT))
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_direct, L->direct_glossy);
+ if (light_flag & PASSMASK(TRANSMISSION_DIRECT))
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_direct,
+ L->direct_transmission);
+ if (light_flag & PASSMASK(SUBSURFACE_DIRECT))
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_direct,
+ L->direct_subsurface);
+ if (light_flag & PASSMASK(VOLUME_DIRECT))
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_volume_direct, L->direct_scatter);
+
+ if (light_flag & PASSMASK(EMISSION))
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_emission, L->emission);
+ if (light_flag & PASSMASK(BACKGROUND))
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_background, L->background);
+ if (light_flag & PASSMASK(AO))
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_ao, L->ao);
+
+ if (light_flag & PASSMASK(DIFFUSE_COLOR))
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_color, L->color_diffuse);
+ if (light_flag & PASSMASK(GLOSSY_COLOR))
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_color, L->color_glossy);
+ if (light_flag & PASSMASK(TRANSMISSION_COLOR))
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_color,
+ L->color_transmission);
+ if (light_flag & PASSMASK(SUBSURFACE_COLOR))
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_color, L->color_subsurface);
+ if (light_flag & PASSMASK(SHADOW)) {
+ float4 shadow = L->shadow;
+ shadow.w = kernel_data.film.pass_shadow_scale;
+ kernel_write_pass_float4(buffer + kernel_data.film.pass_shadow, shadow);
+ }
+ if (light_flag & PASSMASK(MIST))
+ kernel_write_pass_float(buffer + kernel_data.film.pass_mist, 1.0f - L->mist);
#endif
}
@@ -376,60 +397,54 @@ ccl_device_inline void kernel_write_result(KernelGlobals *kg,
int sample,
PathRadiance *L)
{
- PROFILING_INIT(kg, PROFILING_WRITE_RESULT);
- PROFILING_OBJECT(PRIM_NONE);
+ PROFILING_INIT(kg, PROFILING_WRITE_RESULT);
+ PROFILING_OBJECT(PRIM_NONE);
- float alpha;
- float3 L_sum = path_radiance_clamp_and_sum(kg, L, &alpha);
+ float alpha;
+ float3 L_sum = path_radiance_clamp_and_sum(kg, L, &alpha);
- kernel_write_pass_float4(buffer, make_float4(L_sum.x, L_sum.y, L_sum.z, alpha));
+ kernel_write_pass_float4(buffer, make_float4(L_sum.x, L_sum.y, L_sum.z, alpha));
- kernel_write_light_passes(kg, buffer, L);
+ kernel_write_light_passes(kg, buffer, L);
#ifdef __DENOISING_FEATURES__
- if(kernel_data.film.pass_denoising_data) {
+ if (kernel_data.film.pass_denoising_data) {
# ifdef __SHADOW_TRICKS__
- kernel_write_denoising_shadow(kg,
- buffer + kernel_data.film.pass_denoising_data,
- sample,
- average(L->path_total),
- average(L->path_total_shaded));
+ kernel_write_denoising_shadow(kg,
+ buffer + kernel_data.film.pass_denoising_data,
+ sample,
+ average(L->path_total),
+ average(L->path_total_shaded));
# else
- kernel_write_denoising_shadow(kg,
- buffer + kernel_data.film.pass_denoising_data,
- sample,
- 0.0f, 0.0f);
+ kernel_write_denoising_shadow(
+ kg, buffer + kernel_data.film.pass_denoising_data, sample, 0.0f, 0.0f);
# endif
- if(kernel_data.film.pass_denoising_clean) {
- float3 noisy, clean;
- path_radiance_split_denoising(kg, L, &noisy, &clean);
- kernel_write_pass_float3_variance(
- buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR,
- noisy);
- kernel_write_pass_float3_unaligned(
- buffer + kernel_data.film.pass_denoising_clean,
- clean);
- }
- else {
- kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR,
- ensure_finite3(L_sum));
- }
-
- kernel_write_pass_float3_variance(
- buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_NORMAL,
- L->denoising_normal);
- kernel_write_pass_float3_variance(
- buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_ALBEDO,
- L->denoising_albedo);
- kernel_write_pass_float_variance(
- buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_DEPTH,
- L->denoising_depth);
- }
-#endif /* __DENOISING_FEATURES__ */
-
+ if (kernel_data.film.pass_denoising_clean) {
+ float3 noisy, clean;
+ path_radiance_split_denoising(kg, L, &noisy, &clean);
+ kernel_write_pass_float3_variance(
+ buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR, noisy);
+ kernel_write_pass_float3_unaligned(buffer + kernel_data.film.pass_denoising_clean, clean);
+ }
+ else {
+ kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data +
+ DENOISING_PASS_COLOR,
+ ensure_finite3(L_sum));
+ }
+
+ kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data +
+ DENOISING_PASS_NORMAL,
+ L->denoising_normal);
+ kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data +
+ DENOISING_PASS_ALBEDO,
+ L->denoising_albedo);
+ kernel_write_pass_float_variance(
+ buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_DEPTH, L->denoising_depth);
+ }
+#endif /* __DENOISING_FEATURES__ */
#ifdef __KERNEL_DEBUG__
- kernel_write_debug_passes(kg, buffer, L);
+ kernel_write_debug_passes(kg, buffer, L);
#endif
}
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
index a1fc6028293..2be1b745632 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -50,309 +50,298 @@
CCL_NAMESPACE_BEGIN
-ccl_device_forceinline bool kernel_path_scene_intersect(
- KernelGlobals *kg,
- ccl_addr_space PathState *state,
- Ray *ray,
- Intersection *isect,
- PathRadiance *L)
+ccl_device_forceinline bool kernel_path_scene_intersect(KernelGlobals *kg,
+ ccl_addr_space PathState *state,
+ Ray *ray,
+ Intersection *isect,
+ PathRadiance *L)
{
- PROFILING_INIT(kg, PROFILING_SCENE_INTERSECT);
+ PROFILING_INIT(kg, PROFILING_SCENE_INTERSECT);
- uint visibility = path_state_ray_visibility(kg, state);
+ uint visibility = path_state_ray_visibility(kg, state);
- if(path_state_ao_bounce(kg, state)) {
- visibility = PATH_RAY_SHADOW;
- ray->t = kernel_data.background.ao_distance;
- }
+ if (path_state_ao_bounce(kg, state)) {
+ visibility = PATH_RAY_SHADOW;
+ ray->t = kernel_data.background.ao_distance;
+ }
#ifdef __HAIR__
- float difl = 0.0f, extmax = 0.0f;
- uint lcg_state = 0;
+ float difl = 0.0f, extmax = 0.0f;
+ uint lcg_state = 0;
- if(kernel_data.bvh.have_curves) {
- if((kernel_data.cam.resolution == 1) && (state->flag & PATH_RAY_CAMERA)) {
- float3 pixdiff = ray->dD.dx + ray->dD.dy;
- /*pixdiff = pixdiff - dot(pixdiff, ray.D)*ray.D;*/
- difl = kernel_data.curve.minimum_width * len(pixdiff) * 0.5f;
- }
+ if (kernel_data.bvh.have_curves) {
+ if ((kernel_data.cam.resolution == 1) && (state->flag & PATH_RAY_CAMERA)) {
+ float3 pixdiff = ray->dD.dx + ray->dD.dy;
+ /*pixdiff = pixdiff - dot(pixdiff, ray.D)*ray.D;*/
+ difl = kernel_data.curve.minimum_width * len(pixdiff) * 0.5f;
+ }
- extmax = kernel_data.curve.maximum_width;
- lcg_state = lcg_state_init_addrspace(state, 0x51633e2d);
- }
+ extmax = kernel_data.curve.maximum_width;
+ lcg_state = lcg_state_init_addrspace(state, 0x51633e2d);
+ }
- bool hit = scene_intersect(kg, *ray, visibility, isect, &lcg_state, difl, extmax);
+ bool hit = scene_intersect(kg, *ray, visibility, isect, &lcg_state, difl, extmax);
#else
- bool hit = scene_intersect(kg, *ray, visibility, isect, NULL, 0.0f, 0.0f);
-#endif /* __HAIR__ */
+ bool hit = scene_intersect(kg, *ray, visibility, isect, NULL, 0.0f, 0.0f);
+#endif /* __HAIR__ */
#ifdef __KERNEL_DEBUG__
- if(state->flag & PATH_RAY_CAMERA) {
- L->debug_data.num_bvh_traversed_nodes += isect->num_traversed_nodes;
- L->debug_data.num_bvh_traversed_instances += isect->num_traversed_instances;
- L->debug_data.num_bvh_intersections += isect->num_intersections;
- }
- L->debug_data.num_ray_bounces++;
-#endif /* __KERNEL_DEBUG__ */
-
- return hit;
+ if (state->flag & PATH_RAY_CAMERA) {
+ L->debug_data.num_bvh_traversed_nodes += isect->num_traversed_nodes;
+ L->debug_data.num_bvh_traversed_instances += isect->num_traversed_instances;
+ L->debug_data.num_bvh_intersections += isect->num_intersections;
+ }
+ L->debug_data.num_ray_bounces++;
+#endif /* __KERNEL_DEBUG__ */
+
+ return hit;
}
-ccl_device_forceinline void kernel_path_lamp_emission(
- KernelGlobals *kg,
- ccl_addr_space PathState *state,
- Ray *ray,
- float3 throughput,
- ccl_addr_space Intersection *isect,
- ShaderData *emission_sd,
- PathRadiance *L)
+ccl_device_forceinline void kernel_path_lamp_emission(KernelGlobals *kg,
+ ccl_addr_space PathState *state,
+ Ray *ray,
+ float3 throughput,
+ ccl_addr_space Intersection *isect,
+ ShaderData *emission_sd,
+ PathRadiance *L)
{
- PROFILING_INIT(kg, PROFILING_INDIRECT_EMISSION);
+ PROFILING_INIT(kg, PROFILING_INDIRECT_EMISSION);
#ifdef __LAMP_MIS__
- if(kernel_data.integrator.use_lamp_mis && !(state->flag & PATH_RAY_CAMERA)) {
- /* ray starting from previous non-transparent bounce */
- Ray light_ray;
-
- light_ray.P = ray->P - state->ray_t*ray->D;
- state->ray_t += isect->t;
- light_ray.D = ray->D;
- light_ray.t = state->ray_t;
- light_ray.time = ray->time;
- light_ray.dD = ray->dD;
- light_ray.dP = ray->dP;
-
- /* intersect with lamp */
- float3 emission;
-
- if(indirect_lamp_emission(kg, emission_sd, state, &light_ray, &emission))
- path_radiance_accum_emission(L, state, throughput, emission);
- }
-#endif /* __LAMP_MIS__ */
+ if (kernel_data.integrator.use_lamp_mis && !(state->flag & PATH_RAY_CAMERA)) {
+ /* ray starting from previous non-transparent bounce */
+ Ray light_ray;
+
+ light_ray.P = ray->P - state->ray_t * ray->D;
+ state->ray_t += isect->t;
+ light_ray.D = ray->D;
+ light_ray.t = state->ray_t;
+ light_ray.time = ray->time;
+ light_ray.dD = ray->dD;
+ light_ray.dP = ray->dP;
+
+ /* intersect with lamp */
+ float3 emission;
+
+ if (indirect_lamp_emission(kg, emission_sd, state, &light_ray, &emission))
+ path_radiance_accum_emission(L, state, throughput, emission);
+ }
+#endif /* __LAMP_MIS__ */
}
-ccl_device_forceinline void kernel_path_background(
- KernelGlobals *kg,
- ccl_addr_space PathState *state,
- ccl_addr_space Ray *ray,
- float3 throughput,
- ShaderData *sd,
- PathRadiance *L)
+ccl_device_forceinline void kernel_path_background(KernelGlobals *kg,
+ ccl_addr_space PathState *state,
+ ccl_addr_space Ray *ray,
+ float3 throughput,
+ ShaderData *sd,
+ PathRadiance *L)
{
- /* eval background shader if nothing hit */
- if(kernel_data.background.transparent && (state->flag & PATH_RAY_TRANSPARENT_BACKGROUND)) {
- L->transparent += average(throughput);
+ /* eval background shader if nothing hit */
+ if (kernel_data.background.transparent && (state->flag & PATH_RAY_TRANSPARENT_BACKGROUND)) {
+ L->transparent += average(throughput);
#ifdef __PASSES__
- if(!(kernel_data.film.light_pass_flag & PASSMASK(BACKGROUND)))
-#endif /* __PASSES__ */
- return;
- }
+ if (!(kernel_data.film.light_pass_flag & PASSMASK(BACKGROUND)))
+#endif /* __PASSES__ */
+ return;
+ }
- /* When using the ao bounces approximation, adjust background
- * shader intensity with ao factor. */
- if(path_state_ao_bounce(kg, state)) {
- throughput *= kernel_data.background.ao_bounces_factor;
- }
+ /* When using the ao bounces approximation, adjust background
+ * shader intensity with ao factor. */
+ if (path_state_ao_bounce(kg, state)) {
+ throughput *= kernel_data.background.ao_bounces_factor;
+ }
#ifdef __BACKGROUND__
- /* sample background shader */
- float3 L_background = indirect_background(kg, sd, state, ray);
- path_radiance_accum_background(L, state, throughput, L_background);
-#endif /* __BACKGROUND__ */
+ /* sample background shader */
+ float3 L_background = indirect_background(kg, sd, state, ray);
+ path_radiance_accum_background(L, state, throughput, L_background);
+#endif /* __BACKGROUND__ */
}
#ifndef __SPLIT_KERNEL__
-#ifdef __VOLUME__
-ccl_device_forceinline VolumeIntegrateResult kernel_path_volume(
- KernelGlobals *kg,
- ShaderData *sd,
- PathState *state,
- Ray *ray,
- float3 *throughput,
- ccl_addr_space Intersection *isect,
- bool hit,
- ShaderData *emission_sd,
- PathRadiance *L)
+# ifdef __VOLUME__
+ccl_device_forceinline VolumeIntegrateResult kernel_path_volume(KernelGlobals *kg,
+ ShaderData *sd,
+ PathState *state,
+ Ray *ray,
+ float3 *throughput,
+ ccl_addr_space Intersection *isect,
+ bool hit,
+ ShaderData *emission_sd,
+ PathRadiance *L)
{
- PROFILING_INIT(kg, PROFILING_VOLUME);
-
- /* Sanitize volume stack. */
- if(!hit) {
- kernel_volume_clean_stack(kg, state->volume_stack);
- }
-
- if(state->volume_stack[0].shader == SHADER_NONE) {
- return VOLUME_PATH_ATTENUATED;
- }
-
- /* volume attenuation, emission, scatter */
- Ray volume_ray = *ray;
- volume_ray.t = (hit)? isect->t: FLT_MAX;
-
- bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
-
-# ifdef __VOLUME_DECOUPLED__
- int sampling_method = volume_stack_sampling_method(kg, state->volume_stack);
- bool direct = (state->flag & PATH_RAY_CAMERA) != 0;
- bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, direct, sampling_method);
-
- if(decoupled) {
- /* cache steps along volume for repeated sampling */
- VolumeSegment volume_segment;
-
- shader_setup_from_volume(kg, sd, &volume_ray);
- kernel_volume_decoupled_record(kg, state,
- &volume_ray, sd, &volume_segment, heterogeneous);
-
- volume_segment.sampling_method = sampling_method;
-
- /* emission */
- if(volume_segment.closure_flag & SD_EMISSION)
- path_radiance_accum_emission(L, state, *throughput, volume_segment.accum_emission);
-
- /* scattering */
- VolumeIntegrateResult result = VOLUME_PATH_ATTENUATED;
-
- if(volume_segment.closure_flag & SD_SCATTER) {
- int all = kernel_data.integrator.sample_all_lights_indirect;
-
- /* direct light sampling */
- kernel_branched_path_volume_connect_light(kg, sd,
- emission_sd, *throughput, state, L, all,
- &volume_ray, &volume_segment);
-
- /* indirect sample. if we use distance sampling and take just
- * one sample for direct and indirect light, we could share
- * this computation, but makes code a bit complex */
- float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
- float rscatter = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
-
- result = kernel_volume_decoupled_scatter(kg,
- state, &volume_ray, sd, throughput,
- rphase, rscatter, &volume_segment, NULL, true);
- }
-
- /* free cached steps */
- kernel_volume_decoupled_free(kg, &volume_segment);
-
- if(result == VOLUME_PATH_SCATTERED) {
- if(kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray))
- return VOLUME_PATH_SCATTERED;
- else
- return VOLUME_PATH_MISSED;
- }
- else {
- *throughput *= volume_segment.accum_transmittance;
- }
- }
- else
-# endif /* __VOLUME_DECOUPLED__ */
- {
- /* integrate along volume segment with distance sampling */
- VolumeIntegrateResult result = kernel_volume_integrate(
- kg, state, sd, &volume_ray, L, throughput, heterogeneous);
-
-# ifdef __VOLUME_SCATTER__
- if(result == VOLUME_PATH_SCATTERED) {
- /* direct lighting */
- kernel_path_volume_connect_light(kg, sd, emission_sd, *throughput, state, L);
-
- /* indirect light bounce */
- if(kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray))
- return VOLUME_PATH_SCATTERED;
- else
- return VOLUME_PATH_MISSED;
- }
-# endif /* __VOLUME_SCATTER__ */
- }
-
- return VOLUME_PATH_ATTENUATED;
+ PROFILING_INIT(kg, PROFILING_VOLUME);
+
+ /* Sanitize volume stack. */
+ if (!hit) {
+ kernel_volume_clean_stack(kg, state->volume_stack);
+ }
+
+ if (state->volume_stack[0].shader == SHADER_NONE) {
+ return VOLUME_PATH_ATTENUATED;
+ }
+
+ /* volume attenuation, emission, scatter */
+ Ray volume_ray = *ray;
+ volume_ray.t = (hit) ? isect->t : FLT_MAX;
+
+ bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
+
+# ifdef __VOLUME_DECOUPLED__
+ int sampling_method = volume_stack_sampling_method(kg, state->volume_stack);
+ bool direct = (state->flag & PATH_RAY_CAMERA) != 0;
+ bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, direct, sampling_method);
+
+ if (decoupled) {
+ /* cache steps along volume for repeated sampling */
+ VolumeSegment volume_segment;
+
+ shader_setup_from_volume(kg, sd, &volume_ray);
+ kernel_volume_decoupled_record(kg, state, &volume_ray, sd, &volume_segment, heterogeneous);
+
+ volume_segment.sampling_method = sampling_method;
+
+ /* emission */
+ if (volume_segment.closure_flag & SD_EMISSION)
+ path_radiance_accum_emission(L, state, *throughput, volume_segment.accum_emission);
+
+ /* scattering */
+ VolumeIntegrateResult result = VOLUME_PATH_ATTENUATED;
+
+ if (volume_segment.closure_flag & SD_SCATTER) {
+ int all = kernel_data.integrator.sample_all_lights_indirect;
+
+ /* direct light sampling */
+ kernel_branched_path_volume_connect_light(
+ kg, sd, emission_sd, *throughput, state, L, all, &volume_ray, &volume_segment);
+
+ /* indirect sample. if we use distance sampling and take just
+ * one sample for direct and indirect light, we could share
+ * this computation, but makes code a bit complex */
+ float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
+ float rscatter = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
+
+ result = kernel_volume_decoupled_scatter(
+ kg, state, &volume_ray, sd, throughput, rphase, rscatter, &volume_segment, NULL, true);
+ }
+
+ /* free cached steps */
+ kernel_volume_decoupled_free(kg, &volume_segment);
+
+ if (result == VOLUME_PATH_SCATTERED) {
+ if (kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray))
+ return VOLUME_PATH_SCATTERED;
+ else
+ return VOLUME_PATH_MISSED;
+ }
+ else {
+ *throughput *= volume_segment.accum_transmittance;
+ }
+ }
+ else
+# endif /* __VOLUME_DECOUPLED__ */
+ {
+ /* integrate along volume segment with distance sampling */
+ VolumeIntegrateResult result = kernel_volume_integrate(
+ kg, state, sd, &volume_ray, L, throughput, heterogeneous);
+
+# ifdef __VOLUME_SCATTER__
+ if (result == VOLUME_PATH_SCATTERED) {
+ /* direct lighting */
+ kernel_path_volume_connect_light(kg, sd, emission_sd, *throughput, state, L);
+
+ /* indirect light bounce */
+ if (kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray))
+ return VOLUME_PATH_SCATTERED;
+ else
+ return VOLUME_PATH_MISSED;
+ }
+# endif /* __VOLUME_SCATTER__ */
+ }
+
+ return VOLUME_PATH_ATTENUATED;
}
-#endif /* __VOLUME__ */
-
-#endif /* __SPLIT_KERNEL__ */
-
-ccl_device_forceinline bool kernel_path_shader_apply(
- KernelGlobals *kg,
- ShaderData *sd,
- ccl_addr_space PathState *state,
- ccl_addr_space Ray *ray,
- float3 throughput,
- ShaderData *emission_sd,
- PathRadiance *L,
- ccl_global float *buffer)
+# endif /* __VOLUME__ */
+
+#endif /* __SPLIT_KERNEL__ */
+
+ccl_device_forceinline bool kernel_path_shader_apply(KernelGlobals *kg,
+ ShaderData *sd,
+ ccl_addr_space PathState *state,
+ ccl_addr_space Ray *ray,
+ float3 throughput,
+ ShaderData *emission_sd,
+ PathRadiance *L,
+ ccl_global float *buffer)
{
- PROFILING_INIT(kg, PROFILING_SHADER_APPLY);
+ PROFILING_INIT(kg, PROFILING_SHADER_APPLY);
#ifdef __SHADOW_TRICKS__
- if((sd->object_flag & SD_OBJECT_SHADOW_CATCHER)) {
- if(state->flag & PATH_RAY_TRANSPARENT_BACKGROUND) {
- state->flag |= (PATH_RAY_SHADOW_CATCHER |
- PATH_RAY_STORE_SHADOW_INFO);
-
- float3 bg = make_float3(0.0f, 0.0f, 0.0f);
- if(!kernel_data.background.transparent) {
- bg = indirect_background(kg, emission_sd, state, ray);
- }
- path_radiance_accum_shadowcatcher(L, throughput, bg);
- }
- }
- else if(state->flag & PATH_RAY_SHADOW_CATCHER) {
- /* Only update transparency after shadow catcher bounce. */
- L->shadow_transparency *=
- average(shader_bsdf_transparency(kg, sd));
- }
-#endif /* __SHADOW_TRICKS__ */
-
- /* holdout */
+ if ((sd->object_flag & SD_OBJECT_SHADOW_CATCHER)) {
+ if (state->flag & PATH_RAY_TRANSPARENT_BACKGROUND) {
+ state->flag |= (PATH_RAY_SHADOW_CATCHER | PATH_RAY_STORE_SHADOW_INFO);
+
+ float3 bg = make_float3(0.0f, 0.0f, 0.0f);
+ if (!kernel_data.background.transparent) {
+ bg = indirect_background(kg, emission_sd, state, ray);
+ }
+ path_radiance_accum_shadowcatcher(L, throughput, bg);
+ }
+ }
+ else if (state->flag & PATH_RAY_SHADOW_CATCHER) {
+ /* Only update transparency after shadow catcher bounce. */
+ L->shadow_transparency *= average(shader_bsdf_transparency(kg, sd));
+ }
+#endif /* __SHADOW_TRICKS__ */
+
+ /* holdout */
#ifdef __HOLDOUT__
- if(((sd->flag & SD_HOLDOUT) ||
- (sd->object_flag & SD_OBJECT_HOLDOUT_MASK)) &&
- (state->flag & PATH_RAY_TRANSPARENT_BACKGROUND))
- {
- if(kernel_data.background.transparent) {
- float3 holdout_weight;
- if(sd->object_flag & SD_OBJECT_HOLDOUT_MASK) {
- holdout_weight = make_float3(1.0f, 1.0f, 1.0f);
- }
- else {
- holdout_weight = shader_holdout_eval(kg, sd);
- }
- /* any throughput is ok, should all be identical here */
- L->transparent += average(holdout_weight*throughput);
- }
-
- if(sd->object_flag & SD_OBJECT_HOLDOUT_MASK) {
- return false;
- }
- }
-#endif /* __HOLDOUT__ */
-
- /* holdout mask objects do not write data passes */
- kernel_write_data_passes(kg, buffer, L, sd, state, throughput);
-
- /* blurring of bsdf after bounces, for rays that have a small likelihood
- * of following this particular path (diffuse, rough glossy) */
- if(kernel_data.integrator.filter_glossy != FLT_MAX) {
- float blur_pdf = kernel_data.integrator.filter_glossy*state->min_ray_pdf;
-
- if(blur_pdf < 1.0f) {
- float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
- shader_bsdf_blur(kg, sd, blur_roughness);
- }
- }
+ if (((sd->flag & SD_HOLDOUT) || (sd->object_flag & SD_OBJECT_HOLDOUT_MASK)) &&
+ (state->flag & PATH_RAY_TRANSPARENT_BACKGROUND)) {
+ if (kernel_data.background.transparent) {
+ float3 holdout_weight;
+ if (sd->object_flag & SD_OBJECT_HOLDOUT_MASK) {
+ holdout_weight = make_float3(1.0f, 1.0f, 1.0f);
+ }
+ else {
+ holdout_weight = shader_holdout_eval(kg, sd);
+ }
+ /* any throughput is ok, should all be identical here */
+ L->transparent += average(holdout_weight * throughput);
+ }
+
+ if (sd->object_flag & SD_OBJECT_HOLDOUT_MASK) {
+ return false;
+ }
+ }
+#endif /* __HOLDOUT__ */
+
+ /* holdout mask objects do not write data passes */
+ kernel_write_data_passes(kg, buffer, L, sd, state, throughput);
+
+ /* blurring of bsdf after bounces, for rays that have a small likelihood
+ * of following this particular path (diffuse, rough glossy) */
+ if (kernel_data.integrator.filter_glossy != FLT_MAX) {
+ float blur_pdf = kernel_data.integrator.filter_glossy * state->min_ray_pdf;
+
+ if (blur_pdf < 1.0f) {
+ float blur_roughness = sqrtf(1.0f - blur_pdf) * 0.5f;
+ shader_bsdf_blur(kg, sd, blur_roughness);
+ }
+ }
#ifdef __EMISSION__
- /* emission */
- if(sd->flag & SD_EMISSION) {
- float3 emission = indirect_primitive_emission(kg, sd, sd->ray_length, state->flag, state->ray_pdf);
- path_radiance_accum_emission(L, state, throughput, emission);
- }
-#endif /* __EMISSION__ */
-
- return true;
+ /* emission */
+ if (sd->flag & SD_EMISSION) {
+ float3 emission = indirect_primitive_emission(
+ kg, sd, sd->ray_length, state->flag, state->ray_pdf);
+ path_radiance_accum_emission(L, state, throughput, emission);
+ }
+#endif /* __EMISSION__ */
+
+ return true;
}
ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
@@ -363,44 +352,44 @@ ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
float3 throughput,
float3 ao_alpha)
{
- PROFILING_INIT(kg, PROFILING_AO);
-
- /* todo: solve correlation */
- float bsdf_u, bsdf_v;
-
- path_state_rng_2D(kg, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
-
- float ao_factor = kernel_data.background.ao_factor;
- float3 ao_N;
- float3 ao_bsdf = shader_bsdf_ao(kg, sd, ao_factor, &ao_N);
- float3 ao_D;
- float ao_pdf;
-
- sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
-
- if(dot(sd->Ng, ao_D) > 0.0f && ao_pdf != 0.0f) {
- Ray light_ray;
- float3 ao_shadow;
-
- light_ray.P = ray_offset(sd->P, sd->Ng);
- light_ray.D = ao_D;
- light_ray.t = kernel_data.background.ao_distance;
- light_ray.time = sd->time;
- light_ray.dP = sd->dP;
- light_ray.dD = differential3_zero();
-
- if(!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &ao_shadow)) {
- path_radiance_accum_ao(L, state, throughput, ao_alpha, ao_bsdf, ao_shadow);
- }
- else {
- path_radiance_accum_total_ao(L, state, throughput, ao_bsdf);
- }
- }
+ PROFILING_INIT(kg, PROFILING_AO);
+
+ /* todo: solve correlation */
+ float bsdf_u, bsdf_v;
+
+ path_state_rng_2D(kg, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
+
+ float ao_factor = kernel_data.background.ao_factor;
+ float3 ao_N;
+ float3 ao_bsdf = shader_bsdf_ao(kg, sd, ao_factor, &ao_N);
+ float3 ao_D;
+ float ao_pdf;
+
+ sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
+
+ if (dot(sd->Ng, ao_D) > 0.0f && ao_pdf != 0.0f) {
+ Ray light_ray;
+ float3 ao_shadow;
+
+ light_ray.P = ray_offset(sd->P, sd->Ng);
+ light_ray.D = ao_D;
+ light_ray.t = kernel_data.background.ao_distance;
+ light_ray.time = sd->time;
+ light_ray.dP = sd->dP;
+ light_ray.dD = differential3_zero();
+
+ if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &ao_shadow)) {
+ path_radiance_accum_ao(L, state, throughput, ao_alpha, ao_bsdf, ao_shadow);
+ }
+ else {
+ path_radiance_accum_total_ao(L, state, throughput, ao_bsdf);
+ }
+ }
}
#ifndef __SPLIT_KERNEL__
-#if defined(__BRANCHED_PATH__) || defined(__BAKING__)
+# if defined(__BRANCHED_PATH__) || defined(__BAKING__)
ccl_device void kernel_path_indirect(KernelGlobals *kg,
ShaderData *sd,
@@ -410,369 +399,300 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
PathState *state,
PathRadiance *L)
{
-#ifdef __SUBSURFACE__
- SubsurfaceIndirectRays ss_indirect;
- kernel_path_subsurface_init_indirect(&ss_indirect);
-
- for(;;) {
-#endif /* __SUBSURFACE__ */
-
- /* path iteration */
- for(;;) {
- /* Find intersection with objects in scene. */
- Intersection isect;
- bool hit = kernel_path_scene_intersect(kg, state, ray, &isect, L);
-
- /* Find intersection with lamps and compute emission for MIS. */
- kernel_path_lamp_emission(kg, state, ray, throughput, &isect, sd, L);
-
-#ifdef __VOLUME__
- /* Volume integration. */
- VolumeIntegrateResult result = kernel_path_volume(kg,
- sd,
- state,
- ray,
- &throughput,
- &isect,
- hit,
- emission_sd,
- L);
-
- if(result == VOLUME_PATH_SCATTERED) {
- continue;
- }
- else if(result == VOLUME_PATH_MISSED) {
- break;
- }
-#endif /* __VOLUME__*/
-
- /* Shade background. */
- if(!hit) {
- kernel_path_background(kg, state, ray, throughput, sd, L);
- break;
- }
- else if(path_state_ao_bounce(kg, state)) {
- break;
- }
-
- /* Setup shader data. */
- shader_setup_from_ray(kg, sd, &isect, ray);
-
- /* Skip most work for volume bounding surface. */
-#ifdef __VOLUME__
- if(!(sd->flag & SD_HAS_ONLY_VOLUME)) {
-#endif
-
- /* Evaluate shader. */
- shader_eval_surface(kg, sd, state, state->flag);
- shader_prepare_closures(sd, state);
-
- /* Apply shadow catcher, holdout, emission. */
- if(!kernel_path_shader_apply(kg,
- sd,
- state,
- ray,
- throughput,
- emission_sd,
- L,
- NULL))
- {
- break;
- }
-
- /* path termination. this is a strange place to put the termination, it's
- * mainly due to the mixed in MIS that we use. gives too many unneeded
- * shader evaluations, only need emission if we are going to terminate */
- float probability = path_state_continuation_probability(kg, state, throughput);
-
- if(probability == 0.0f) {
- break;
- }
- else if(probability != 1.0f) {
- float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE);
-
- if(terminate >= probability)
- break;
-
- throughput /= probability;
- }
-
- kernel_update_denoising_features(kg, sd, state, L);
-
-#ifdef __AO__
- /* ambient occlusion */
- if(kernel_data.integrator.use_ambient_occlusion) {
- kernel_path_ao(kg, sd, emission_sd, L, state, throughput, make_float3(0.0f, 0.0f, 0.0f));
- }
-#endif /* __AO__ */
-
-
-#ifdef __SUBSURFACE__
- /* bssrdf scatter to a different location on the same object, replacing
- * the closures with a diffuse BSDF */
- if(sd->flag & SD_BSSRDF) {
- if(kernel_path_subsurface_scatter(kg,
- sd,
- emission_sd,
- L,
- state,
- ray,
- &throughput,
- &ss_indirect))
- {
- break;
- }
- }
-#endif /* __SUBSURFACE__ */
-
-#if defined(__EMISSION__)
- if(kernel_data.integrator.use_direct_light) {
- int all = (kernel_data.integrator.sample_all_lights_indirect) ||
- (state->flag & PATH_RAY_SHADOW_CATCHER);
- kernel_branched_path_surface_connect_light(kg,
- sd,
- emission_sd,
- state,
- throughput,
- 1.0f,
- L,
- all);
- }
-#endif /* defined(__EMISSION__) */
-
-#ifdef __VOLUME__
- }
-#endif
-
- if(!kernel_path_surface_bounce(kg, sd, &throughput, state, &L->state, ray))
- break;
- }
-
-#ifdef __SUBSURFACE__
- /* Trace indirect subsurface rays by restarting the loop. this uses less
- * stack memory than invoking kernel_path_indirect.
- */
- if(ss_indirect.num_rays) {
- kernel_path_subsurface_setup_indirect(kg,
- &ss_indirect,
- state,
- ray,
- L,
- &throughput);
- }
- else {
- break;
- }
- }
-#endif /* __SUBSURFACE__ */
+# ifdef __SUBSURFACE__
+ SubsurfaceIndirectRays ss_indirect;
+ kernel_path_subsurface_init_indirect(&ss_indirect);
+
+ for (;;) {
+# endif /* __SUBSURFACE__ */
+
+ /* path iteration */
+ for (;;) {
+ /* Find intersection with objects in scene. */
+ Intersection isect;
+ bool hit = kernel_path_scene_intersect(kg, state, ray, &isect, L);
+
+ /* Find intersection with lamps and compute emission for MIS. */
+ kernel_path_lamp_emission(kg, state, ray, throughput, &isect, sd, L);
+
+# ifdef __VOLUME__
+ /* Volume integration. */
+ VolumeIntegrateResult result = kernel_path_volume(
+ kg, sd, state, ray, &throughput, &isect, hit, emission_sd, L);
+
+ if (result == VOLUME_PATH_SCATTERED) {
+ continue;
+ }
+ else if (result == VOLUME_PATH_MISSED) {
+ break;
+ }
+# endif /* __VOLUME__*/
+
+ /* Shade background. */
+ if (!hit) {
+ kernel_path_background(kg, state, ray, throughput, sd, L);
+ break;
+ }
+ else if (path_state_ao_bounce(kg, state)) {
+ break;
+ }
+
+ /* Setup shader data. */
+ shader_setup_from_ray(kg, sd, &isect, ray);
+
+ /* Skip most work for volume bounding surface. */
+# ifdef __VOLUME__
+ if (!(sd->flag & SD_HAS_ONLY_VOLUME)) {
+# endif
+
+ /* Evaluate shader. */
+ shader_eval_surface(kg, sd, state, state->flag);
+ shader_prepare_closures(sd, state);
+
+ /* Apply shadow catcher, holdout, emission. */
+ if (!kernel_path_shader_apply(kg, sd, state, ray, throughput, emission_sd, L, NULL)) {
+ break;
+ }
+
+ /* path termination. this is a strange place to put the termination, it's
+ * mainly due to the mixed in MIS that we use. gives too many unneeded
+ * shader evaluations, only need emission if we are going to terminate */
+ float probability = path_state_continuation_probability(kg, state, throughput);
+
+ if (probability == 0.0f) {
+ break;
+ }
+ else if (probability != 1.0f) {
+ float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE);
+
+ if (terminate >= probability)
+ break;
+
+ throughput /= probability;
+ }
+
+ kernel_update_denoising_features(kg, sd, state, L);
+
+# ifdef __AO__
+ /* ambient occlusion */
+ if (kernel_data.integrator.use_ambient_occlusion) {
+ kernel_path_ao(kg, sd, emission_sd, L, state, throughput, make_float3(0.0f, 0.0f, 0.0f));
+ }
+# endif /* __AO__ */
+
+# ifdef __SUBSURFACE__
+ /* bssrdf scatter to a different location on the same object, replacing
+ * the closures with a diffuse BSDF */
+ if (sd->flag & SD_BSSRDF) {
+ if (kernel_path_subsurface_scatter(
+ kg, sd, emission_sd, L, state, ray, &throughput, &ss_indirect)) {
+ break;
+ }
+ }
+# endif /* __SUBSURFACE__ */
+
+# if defined(__EMISSION__)
+ if (kernel_data.integrator.use_direct_light) {
+ int all = (kernel_data.integrator.sample_all_lights_indirect) ||
+ (state->flag & PATH_RAY_SHADOW_CATCHER);
+ kernel_branched_path_surface_connect_light(
+ kg, sd, emission_sd, state, throughput, 1.0f, L, all);
+ }
+# endif /* defined(__EMISSION__) */
+
+# ifdef __VOLUME__
+ }
+# endif
+
+ if (!kernel_path_surface_bounce(kg, sd, &throughput, state, &L->state, ray))
+ break;
+ }
+
+# ifdef __SUBSURFACE__
+ /* Trace indirect subsurface rays by restarting the loop. this uses less
+ * stack memory than invoking kernel_path_indirect.
+ */
+ if (ss_indirect.num_rays) {
+ kernel_path_subsurface_setup_indirect(kg, &ss_indirect, state, ray, L, &throughput);
+ }
+ else {
+ break;
+ }
+ }
+# endif /* __SUBSURFACE__ */
}
-#endif /* defined(__BRANCHED_PATH__) || defined(__BAKING__) */
+# endif /* defined(__BRANCHED_PATH__) || defined(__BAKING__) */
-ccl_device_forceinline void kernel_path_integrate(
- KernelGlobals *kg,
- PathState *state,
- float3 throughput,
- Ray *ray,
- PathRadiance *L,
- ccl_global float *buffer,
- ShaderData *emission_sd)
+ccl_device_forceinline void kernel_path_integrate(KernelGlobals *kg,
+ PathState *state,
+ float3 throughput,
+ Ray *ray,
+ PathRadiance *L,
+ ccl_global float *buffer,
+ ShaderData *emission_sd)
{
- PROFILING_INIT(kg, PROFILING_PATH_INTEGRATE);
-
- /* Shader data memory used for both volumes and surfaces, saves stack space. */
- ShaderData sd;
-
-#ifdef __SUBSURFACE__
- SubsurfaceIndirectRays ss_indirect;
- kernel_path_subsurface_init_indirect(&ss_indirect);
-
- for(;;) {
-#endif /* __SUBSURFACE__ */
-
- /* path iteration */
- for(;;) {
- /* Find intersection with objects in scene. */
- Intersection isect;
- bool hit = kernel_path_scene_intersect(kg, state, ray, &isect, L);
-
- /* Find intersection with lamps and compute emission for MIS. */
- kernel_path_lamp_emission(kg, state, ray, throughput, &isect, &sd, L);
-
-#ifdef __VOLUME__
- /* Volume integration. */
- VolumeIntegrateResult result = kernel_path_volume(kg,
- &sd,
- state,
- ray,
- &throughput,
- &isect,
- hit,
- emission_sd,
- L);
-
- if(result == VOLUME_PATH_SCATTERED) {
- continue;
- }
- else if(result == VOLUME_PATH_MISSED) {
- break;
- }
-#endif /* __VOLUME__*/
-
- /* Shade background. */
- if(!hit) {
- kernel_path_background(kg, state, ray, throughput, &sd, L);
- break;
- }
- else if(path_state_ao_bounce(kg, state)) {
- break;
- }
-
- /* Setup shader data. */
- shader_setup_from_ray(kg, &sd, &isect, ray);
-
- /* Skip most work for volume bounding surface. */
-#ifdef __VOLUME__
- if(!(sd.flag & SD_HAS_ONLY_VOLUME)) {
-#endif
-
- /* Evaluate shader. */
- shader_eval_surface(kg, &sd, state, state->flag);
- shader_prepare_closures(&sd, state);
-
- /* Apply shadow catcher, holdout, emission. */
- if(!kernel_path_shader_apply(kg,
- &sd,
- state,
- ray,
- throughput,
- emission_sd,
- L,
- buffer))
- {
- break;
- }
-
- /* path termination. this is a strange place to put the termination, it's
- * mainly due to the mixed in MIS that we use. gives too many unneeded
- * shader evaluations, only need emission if we are going to terminate */
- float probability = path_state_continuation_probability(kg, state, throughput);
-
- if(probability == 0.0f) {
- break;
- }
- else if(probability != 1.0f) {
- float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE);
- if(terminate >= probability)
- break;
-
- throughput /= probability;
- }
-
- kernel_update_denoising_features(kg, &sd, state, L);
-
-#ifdef __AO__
- /* ambient occlusion */
- if(kernel_data.integrator.use_ambient_occlusion) {
- kernel_path_ao(kg, &sd, emission_sd, L, state, throughput, shader_bsdf_alpha(kg, &sd));
- }
-#endif /* __AO__ */
-
-#ifdef __SUBSURFACE__
- /* bssrdf scatter to a different location on the same object, replacing
- * the closures with a diffuse BSDF */
- if(sd.flag & SD_BSSRDF) {
- if(kernel_path_subsurface_scatter(kg,
- &sd,
- emission_sd,
- L,
- state,
- ray,
- &throughput,
- &ss_indirect))
- {
- break;
- }
- }
-#endif /* __SUBSURFACE__ */
-
- /* direct lighting */
- kernel_path_surface_connect_light(kg, &sd, emission_sd, throughput, state, L);
-
-#ifdef __VOLUME__
- }
-#endif
-
- /* compute direct lighting and next bounce */
- if(!kernel_path_surface_bounce(kg, &sd, &throughput, state, &L->state, ray))
- break;
- }
-
-#ifdef __SUBSURFACE__
- /* Trace indirect subsurface rays by restarting the loop. this uses less
- * stack memory than invoking kernel_path_indirect.
- */
- if(ss_indirect.num_rays) {
- kernel_path_subsurface_setup_indirect(kg,
- &ss_indirect,
- state,
- ray,
- L,
- &throughput);
- }
- else {
- break;
- }
- }
-#endif /* __SUBSURFACE__ */
+ PROFILING_INIT(kg, PROFILING_PATH_INTEGRATE);
+
+ /* Shader data memory used for both volumes and surfaces, saves stack space. */
+ ShaderData sd;
+
+# ifdef __SUBSURFACE__
+ SubsurfaceIndirectRays ss_indirect;
+ kernel_path_subsurface_init_indirect(&ss_indirect);
+
+ for (;;) {
+# endif /* __SUBSURFACE__ */
+
+ /* path iteration */
+ for (;;) {
+ /* Find intersection with objects in scene. */
+ Intersection isect;
+ bool hit = kernel_path_scene_intersect(kg, state, ray, &isect, L);
+
+ /* Find intersection with lamps and compute emission for MIS. */
+ kernel_path_lamp_emission(kg, state, ray, throughput, &isect, &sd, L);
+
+# ifdef __VOLUME__
+ /* Volume integration. */
+ VolumeIntegrateResult result = kernel_path_volume(
+ kg, &sd, state, ray, &throughput, &isect, hit, emission_sd, L);
+
+ if (result == VOLUME_PATH_SCATTERED) {
+ continue;
+ }
+ else if (result == VOLUME_PATH_MISSED) {
+ break;
+ }
+# endif /* __VOLUME__*/
+
+ /* Shade background. */
+ if (!hit) {
+ kernel_path_background(kg, state, ray, throughput, &sd, L);
+ break;
+ }
+ else if (path_state_ao_bounce(kg, state)) {
+ break;
+ }
+
+ /* Setup shader data. */
+ shader_setup_from_ray(kg, &sd, &isect, ray);
+
+ /* Skip most work for volume bounding surface. */
+# ifdef __VOLUME__
+ if (!(sd.flag & SD_HAS_ONLY_VOLUME)) {
+# endif
+
+ /* Evaluate shader. */
+ shader_eval_surface(kg, &sd, state, state->flag);
+ shader_prepare_closures(&sd, state);
+
+ /* Apply shadow catcher, holdout, emission. */
+ if (!kernel_path_shader_apply(kg, &sd, state, ray, throughput, emission_sd, L, buffer)) {
+ break;
+ }
+
+ /* path termination. this is a strange place to put the termination, it's
+ * mainly due to the mixed in MIS that we use. gives too many unneeded
+ * shader evaluations, only need emission if we are going to terminate */
+ float probability = path_state_continuation_probability(kg, state, throughput);
+
+ if (probability == 0.0f) {
+ break;
+ }
+ else if (probability != 1.0f) {
+ float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE);
+ if (terminate >= probability)
+ break;
+
+ throughput /= probability;
+ }
+
+ kernel_update_denoising_features(kg, &sd, state, L);
+
+# ifdef __AO__
+ /* ambient occlusion */
+ if (kernel_data.integrator.use_ambient_occlusion) {
+ kernel_path_ao(kg, &sd, emission_sd, L, state, throughput, shader_bsdf_alpha(kg, &sd));
+ }
+# endif /* __AO__ */
+
+# ifdef __SUBSURFACE__
+ /* bssrdf scatter to a different location on the same object, replacing
+ * the closures with a diffuse BSDF */
+ if (sd.flag & SD_BSSRDF) {
+ if (kernel_path_subsurface_scatter(
+ kg, &sd, emission_sd, L, state, ray, &throughput, &ss_indirect)) {
+ break;
+ }
+ }
+# endif /* __SUBSURFACE__ */
+
+ /* direct lighting */
+ kernel_path_surface_connect_light(kg, &sd, emission_sd, throughput, state, L);
+
+# ifdef __VOLUME__
+ }
+# endif
+
+ /* compute direct lighting and next bounce */
+ if (!kernel_path_surface_bounce(kg, &sd, &throughput, state, &L->state, ray))
+ break;
+ }
+
+# ifdef __SUBSURFACE__
+ /* Trace indirect subsurface rays by restarting the loop. this uses less
+ * stack memory than invoking kernel_path_indirect.
+ */
+ if (ss_indirect.num_rays) {
+ kernel_path_subsurface_setup_indirect(kg, &ss_indirect, state, ray, L, &throughput);
+ }
+ else {
+ break;
+ }
+ }
+# endif /* __SUBSURFACE__ */
}
-ccl_device void kernel_path_trace(KernelGlobals *kg,
- ccl_global float *buffer,
- int sample, int x, int y, int offset, int stride)
+ccl_device void kernel_path_trace(
+ KernelGlobals *kg, ccl_global float *buffer, int sample, int x, int y, int offset, int stride)
{
- PROFILING_INIT(kg, PROFILING_RAY_SETUP);
+ PROFILING_INIT(kg, PROFILING_RAY_SETUP);
- /* buffer offset */
- int index = offset + x + y*stride;
- int pass_stride = kernel_data.film.pass_stride;
+ /* buffer offset */
+ int index = offset + x + y * stride;
+ int pass_stride = kernel_data.film.pass_stride;
- buffer += index*pass_stride;
+ buffer += index * pass_stride;
- /* Initialize random numbers and sample ray. */
- uint rng_hash;
- Ray ray;
+ /* Initialize random numbers and sample ray. */
+ uint rng_hash;
+ Ray ray;
- kernel_path_trace_setup(kg, sample, x, y, &rng_hash, &ray);
+ kernel_path_trace_setup(kg, sample, x, y, &rng_hash, &ray);
- if(ray.t == 0.0f) {
- return;
- }
+ if (ray.t == 0.0f) {
+ return;
+ }
- /* Initialize state. */
- float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
+ /* Initialize state. */
+ float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
- PathRadiance L;
- path_radiance_init(&L, kernel_data.film.use_light_pass);
+ PathRadiance L;
+ path_radiance_init(&L, kernel_data.film.use_light_pass);
- ShaderDataTinyStorage emission_sd_storage;
- ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
+ ShaderDataTinyStorage emission_sd_storage;
+ ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
- PathState state;
- path_state_init(kg, emission_sd, &state, rng_hash, sample, &ray);
+ PathState state;
+ path_state_init(kg, emission_sd, &state, rng_hash, sample, &ray);
- /* Integrate. */
- kernel_path_integrate(kg,
- &state,
- throughput,
- &ray,
- &L,
- buffer,
- emission_sd);
+ /* Integrate. */
+ kernel_path_integrate(kg, &state, throughput, &ray, &L, buffer, emission_sd);
- kernel_write_result(kg, buffer, sample, &L);
+ kernel_write_result(kg, buffer, sample, &L);
}
-#endif /* __SPLIT_KERNEL__ */
+#endif /* __SPLIT_KERNEL__ */
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h
index 21da4d9308b..e8ce61024b3 100644
--- a/intern/cycles/kernel/kernel_path_branched.h
+++ b/intern/cycles/kernel/kernel_path_branched.h
@@ -25,297 +25,262 @@ ccl_device_inline void kernel_branched_path_ao(KernelGlobals *kg,
ccl_addr_space PathState *state,
float3 throughput)
{
- int num_samples = kernel_data.integrator.ao_samples;
- float num_samples_inv = 1.0f/num_samples;
- float ao_factor = kernel_data.background.ao_factor;
- float3 ao_N;
- float3 ao_bsdf = shader_bsdf_ao(kg, sd, ao_factor, &ao_N);
- float3 ao_alpha = shader_bsdf_alpha(kg, sd);
-
- for(int j = 0; j < num_samples; j++) {
- float bsdf_u, bsdf_v;
- path_branched_rng_2D(kg, state->rng_hash, state, j, num_samples, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
-
- float3 ao_D;
- float ao_pdf;
-
- sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
-
- if(dot(sd->Ng, ao_D) > 0.0f && ao_pdf != 0.0f) {
- Ray light_ray;
- float3 ao_shadow;
-
- light_ray.P = ray_offset(sd->P, sd->Ng);
- light_ray.D = ao_D;
- light_ray.t = kernel_data.background.ao_distance;
- light_ray.time = sd->time;
- light_ray.dP = sd->dP;
- light_ray.dD = differential3_zero();
-
- if(!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &ao_shadow)) {
- path_radiance_accum_ao(L, state, throughput*num_samples_inv, ao_alpha, ao_bsdf, ao_shadow);
- }
- else {
- path_radiance_accum_total_ao(L, state, throughput*num_samples_inv, ao_bsdf);
- }
- }
- }
+ int num_samples = kernel_data.integrator.ao_samples;
+ float num_samples_inv = 1.0f / num_samples;
+ float ao_factor = kernel_data.background.ao_factor;
+ float3 ao_N;
+ float3 ao_bsdf = shader_bsdf_ao(kg, sd, ao_factor, &ao_N);
+ float3 ao_alpha = shader_bsdf_alpha(kg, sd);
+
+ for (int j = 0; j < num_samples; j++) {
+ float bsdf_u, bsdf_v;
+ path_branched_rng_2D(
+ kg, state->rng_hash, state, j, num_samples, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
+
+ float3 ao_D;
+ float ao_pdf;
+
+ sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
+
+ if (dot(sd->Ng, ao_D) > 0.0f && ao_pdf != 0.0f) {
+ Ray light_ray;
+ float3 ao_shadow;
+
+ light_ray.P = ray_offset(sd->P, sd->Ng);
+ light_ray.D = ao_D;
+ light_ray.t = kernel_data.background.ao_distance;
+ light_ray.time = sd->time;
+ light_ray.dP = sd->dP;
+ light_ray.dD = differential3_zero();
+
+ if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &ao_shadow)) {
+ path_radiance_accum_ao(
+ L, state, throughput * num_samples_inv, ao_alpha, ao_bsdf, ao_shadow);
+ }
+ else {
+ path_radiance_accum_total_ao(L, state, throughput * num_samples_inv, ao_bsdf);
+ }
+ }
+ }
}
-#ifndef __SPLIT_KERNEL__
-
-#ifdef __VOLUME__
-ccl_device_forceinline void kernel_branched_path_volume(
- KernelGlobals *kg,
- ShaderData *sd,
- PathState *state,
- Ray *ray,
- float3 *throughput,
- ccl_addr_space Intersection *isect,
- bool hit,
- ShaderData *indirect_sd,
- ShaderData *emission_sd,
- PathRadiance *L)
+# ifndef __SPLIT_KERNEL__
+
+# ifdef __VOLUME__
+ccl_device_forceinline void kernel_branched_path_volume(KernelGlobals *kg,
+ ShaderData *sd,
+ PathState *state,
+ Ray *ray,
+ float3 *throughput,
+ ccl_addr_space Intersection *isect,
+ bool hit,
+ ShaderData *indirect_sd,
+ ShaderData *emission_sd,
+ PathRadiance *L)
{
- /* Sanitize volume stack. */
- if(!hit) {
- kernel_volume_clean_stack(kg, state->volume_stack);
- }
-
- if(state->volume_stack[0].shader == SHADER_NONE) {
- return;
- }
-
- /* volume attenuation, emission, scatter */
- Ray volume_ray = *ray;
- volume_ray.t = (hit)? isect->t: FLT_MAX;
-
- bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
-
-# ifdef __VOLUME_DECOUPLED__
- /* decoupled ray marching only supported on CPU */
- if(kernel_data.integrator.volume_decoupled) {
- /* cache steps along volume for repeated sampling */
- VolumeSegment volume_segment;
-
- shader_setup_from_volume(kg, sd, &volume_ray);
- kernel_volume_decoupled_record(kg, state,
- &volume_ray, sd, &volume_segment, heterogeneous);
-
- /* direct light sampling */
- if(volume_segment.closure_flag & SD_SCATTER) {
- volume_segment.sampling_method = volume_stack_sampling_method(kg, state->volume_stack);
-
- int all = kernel_data.integrator.sample_all_lights_direct;
-
- kernel_branched_path_volume_connect_light(kg, sd,
- emission_sd, *throughput, state, L, all,
- &volume_ray, &volume_segment);
-
- /* indirect light sampling */
- int num_samples = kernel_data.integrator.volume_samples;
- float num_samples_inv = 1.0f/num_samples;
-
- for(int j = 0; j < num_samples; j++) {
- PathState ps = *state;
- Ray pray = *ray;
- float3 tp = *throughput;
-
- /* branch RNG state */
- path_state_branch(&ps, j, num_samples);
-
- /* scatter sample. if we use distance sampling and take just one
- * sample for direct and indirect light, we could share this
- * computation, but makes code a bit complex */
- float rphase = path_state_rng_1D(kg, &ps, PRNG_PHASE_CHANNEL);
- float rscatter = path_state_rng_1D(kg, &ps, PRNG_SCATTER_DISTANCE);
-
- VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
- &ps, &pray, sd, &tp, rphase, rscatter, &volume_segment, NULL, false);
-
- if(result == VOLUME_PATH_SCATTERED &&
- kernel_path_volume_bounce(kg,
- sd,
- &tp,
- &ps,
- &L->state,
- &pray))
- {
- kernel_path_indirect(kg,
- indirect_sd,
- emission_sd,
- &pray,
- tp*num_samples_inv,
- &ps,
- L);
-
- /* for render passes, sum and reset indirect light pass variables
- * for the next samples */
- path_radiance_sum_indirect(L);
- path_radiance_reset_indirect(L);
- }
- }
- }
-
- /* emission and transmittance */
- if(volume_segment.closure_flag & SD_EMISSION)
- path_radiance_accum_emission(L, state, *throughput, volume_segment.accum_emission);
- *throughput *= volume_segment.accum_transmittance;
-
- /* free cached steps */
- kernel_volume_decoupled_free(kg, &volume_segment);
- }
- else
-# endif /* __VOLUME_DECOUPLED__ */
- {
- /* GPU: no decoupled ray marching, scatter probalistically */
- int num_samples = kernel_data.integrator.volume_samples;
- float num_samples_inv = 1.0f/num_samples;
-
- /* todo: we should cache the shader evaluations from stepping
- * through the volume, for now we redo them multiple times */
-
- for(int j = 0; j < num_samples; j++) {
- PathState ps = *state;
- Ray pray = *ray;
- float3 tp = (*throughput) * num_samples_inv;
-
- /* branch RNG state */
- path_state_branch(&ps, j, num_samples);
-
- VolumeIntegrateResult result = kernel_volume_integrate(
- kg, &ps, sd, &volume_ray, L, &tp, heterogeneous);
-
-# ifdef __VOLUME_SCATTER__
- if(result == VOLUME_PATH_SCATTERED) {
- /* todo: support equiangular, MIS and all light sampling.
- * alternatively get decoupled ray marching working on the GPU */
- kernel_path_volume_connect_light(kg, sd, emission_sd, tp, state, L);
-
- if(kernel_path_volume_bounce(kg,
- sd,
- &tp,
- &ps,
- &L->state,
- &pray))
- {
- kernel_path_indirect(kg,
- indirect_sd,
- emission_sd,
- &pray,
- tp,
- &ps,
- L);
-
- /* for render passes, sum and reset indirect light pass variables
- * for the next samples */
- path_radiance_sum_indirect(L);
- path_radiance_reset_indirect(L);
- }
- }
-# endif /* __VOLUME_SCATTER__ */
- }
-
- /* todo: avoid this calculation using decoupled ray marching */
- kernel_volume_shadow(kg, emission_sd, state, &volume_ray, throughput);
- }
+ /* Sanitize volume stack. */
+ if (!hit) {
+ kernel_volume_clean_stack(kg, state->volume_stack);
+ }
+
+ if (state->volume_stack[0].shader == SHADER_NONE) {
+ return;
+ }
+
+ /* volume attenuation, emission, scatter */
+ Ray volume_ray = *ray;
+ volume_ray.t = (hit) ? isect->t : FLT_MAX;
+
+ bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
+
+# ifdef __VOLUME_DECOUPLED__
+ /* decoupled ray marching only supported on CPU */
+ if (kernel_data.integrator.volume_decoupled) {
+ /* cache steps along volume for repeated sampling */
+ VolumeSegment volume_segment;
+
+ shader_setup_from_volume(kg, sd, &volume_ray);
+ kernel_volume_decoupled_record(kg, state, &volume_ray, sd, &volume_segment, heterogeneous);
+
+ /* direct light sampling */
+ if (volume_segment.closure_flag & SD_SCATTER) {
+ volume_segment.sampling_method = volume_stack_sampling_method(kg, state->volume_stack);
+
+ int all = kernel_data.integrator.sample_all_lights_direct;
+
+ kernel_branched_path_volume_connect_light(
+ kg, sd, emission_sd, *throughput, state, L, all, &volume_ray, &volume_segment);
+
+ /* indirect light sampling */
+ int num_samples = kernel_data.integrator.volume_samples;
+ float num_samples_inv = 1.0f / num_samples;
+
+ for (int j = 0; j < num_samples; j++) {
+ PathState ps = *state;
+ Ray pray = *ray;
+ float3 tp = *throughput;
+
+ /* branch RNG state */
+ path_state_branch(&ps, j, num_samples);
+
+ /* scatter sample. if we use distance sampling and take just one
+ * sample for direct and indirect light, we could share this
+ * computation, but makes code a bit complex */
+ float rphase = path_state_rng_1D(kg, &ps, PRNG_PHASE_CHANNEL);
+ float rscatter = path_state_rng_1D(kg, &ps, PRNG_SCATTER_DISTANCE);
+
+ VolumeIntegrateResult result = kernel_volume_decoupled_scatter(
+ kg, &ps, &pray, sd, &tp, rphase, rscatter, &volume_segment, NULL, false);
+
+ if (result == VOLUME_PATH_SCATTERED &&
+ kernel_path_volume_bounce(kg, sd, &tp, &ps, &L->state, &pray)) {
+ kernel_path_indirect(kg, indirect_sd, emission_sd, &pray, tp * num_samples_inv, &ps, L);
+
+ /* for render passes, sum and reset indirect light pass variables
+ * for the next samples */
+ path_radiance_sum_indirect(L);
+ path_radiance_reset_indirect(L);
+ }
+ }
+ }
+
+ /* emission and transmittance */
+ if (volume_segment.closure_flag & SD_EMISSION)
+ path_radiance_accum_emission(L, state, *throughput, volume_segment.accum_emission);
+ *throughput *= volume_segment.accum_transmittance;
+
+ /* free cached steps */
+ kernel_volume_decoupled_free(kg, &volume_segment);
+ }
+ else
+# endif /* __VOLUME_DECOUPLED__ */
+ {
+ /* GPU: no decoupled ray marching, scatter probalistically */
+ int num_samples = kernel_data.integrator.volume_samples;
+ float num_samples_inv = 1.0f / num_samples;
+
+ /* todo: we should cache the shader evaluations from stepping
+ * through the volume, for now we redo them multiple times */
+
+ for (int j = 0; j < num_samples; j++) {
+ PathState ps = *state;
+ Ray pray = *ray;
+ float3 tp = (*throughput) * num_samples_inv;
+
+ /* branch RNG state */
+ path_state_branch(&ps, j, num_samples);
+
+ VolumeIntegrateResult result = kernel_volume_integrate(
+ kg, &ps, sd, &volume_ray, L, &tp, heterogeneous);
+
+# ifdef __VOLUME_SCATTER__
+ if (result == VOLUME_PATH_SCATTERED) {
+ /* todo: support equiangular, MIS and all light sampling.
+ * alternatively get decoupled ray marching working on the GPU */
+ kernel_path_volume_connect_light(kg, sd, emission_sd, tp, state, L);
+
+ if (kernel_path_volume_bounce(kg, sd, &tp, &ps, &L->state, &pray)) {
+ kernel_path_indirect(kg, indirect_sd, emission_sd, &pray, tp, &ps, L);
+
+ /* for render passes, sum and reset indirect light pass variables
+ * for the next samples */
+ path_radiance_sum_indirect(L);
+ path_radiance_reset_indirect(L);
+ }
+ }
+# endif /* __VOLUME_SCATTER__ */
+ }
+
+ /* todo: avoid this calculation using decoupled ray marching */
+ kernel_volume_shadow(kg, emission_sd, state, &volume_ray, throughput);
+ }
}
-#endif /* __VOLUME__ */
+# endif /* __VOLUME__ */
/* bounce off surface and integrate indirect light */
ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGlobals *kg,
- ShaderData *sd, ShaderData *indirect_sd, ShaderData *emission_sd,
- float3 throughput, float num_samples_adjust, PathState *state, PathRadiance *L)
+ ShaderData *sd,
+ ShaderData *indirect_sd,
+ ShaderData *emission_sd,
+ float3 throughput,
+ float num_samples_adjust,
+ PathState *state,
+ PathRadiance *L)
{
- float sum_sample_weight = 0.0f;
-#ifdef __DENOISING_FEATURES__
- if(state->denoising_feature_weight > 0.0f) {
- for(int i = 0; i < sd->num_closure; i++) {
- const ShaderClosure *sc = &sd->closure[i];
-
- /* transparency is not handled here, but in outer loop */
- if(!CLOSURE_IS_BSDF(sc->type) || CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) {
- continue;
- }
-
- sum_sample_weight += sc->sample_weight;
- }
- }
- else {
- sum_sample_weight = 1.0f;
- }
-#endif /* __DENOISING_FEATURES__ */
-
- for(int i = 0; i < sd->num_closure; i++) {
- const ShaderClosure *sc = &sd->closure[i];
-
- /* transparency is not handled here, but in outer loop */
- if(!CLOSURE_IS_BSDF(sc->type) || CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) {
- continue;
- }
-
- int num_samples;
-
- if(CLOSURE_IS_BSDF_DIFFUSE(sc->type))
- num_samples = kernel_data.integrator.diffuse_samples;
- else if(CLOSURE_IS_BSDF_BSSRDF(sc->type))
- num_samples = 1;
- else if(CLOSURE_IS_BSDF_GLOSSY(sc->type))
- num_samples = kernel_data.integrator.glossy_samples;
- else
- num_samples = kernel_data.integrator.transmission_samples;
-
- num_samples = ceil_to_int(num_samples_adjust*num_samples);
-
- float num_samples_inv = num_samples_adjust/num_samples;
-
- for(int j = 0; j < num_samples; j++) {
- PathState ps = *state;
- float3 tp = throughput;
- Ray bsdf_ray;
-#ifdef __SHADOW_TRICKS__
- float shadow_transparency = L->shadow_transparency;
-#endif
-
- ps.rng_hash = cmj_hash(state->rng_hash, i);
-
- if(!kernel_branched_path_surface_bounce(kg,
- sd,
- sc,
- j,
- num_samples,
- &tp,
- &ps,
- &L->state,
- &bsdf_ray,
- sum_sample_weight))
- {
- continue;
- }
-
- ps.rng_hash = state->rng_hash;
-
- kernel_path_indirect(kg,
- indirect_sd,
- emission_sd,
- &bsdf_ray,
- tp*num_samples_inv,
- &ps,
- L);
-
- /* for render passes, sum and reset indirect light pass variables
- * for the next samples */
- path_radiance_sum_indirect(L);
- path_radiance_reset_indirect(L);
-
-#ifdef __SHADOW_TRICKS__
- L->shadow_transparency = shadow_transparency;
-#endif
- }
- }
+ float sum_sample_weight = 0.0f;
+# ifdef __DENOISING_FEATURES__
+ if (state->denoising_feature_weight > 0.0f) {
+ for (int i = 0; i < sd->num_closure; i++) {
+ const ShaderClosure *sc = &sd->closure[i];
+
+ /* transparency is not handled here, but in outer loop */
+ if (!CLOSURE_IS_BSDF(sc->type) || CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) {
+ continue;
+ }
+
+ sum_sample_weight += sc->sample_weight;
+ }
+ }
+ else {
+ sum_sample_weight = 1.0f;
+ }
+# endif /* __DENOISING_FEATURES__ */
+
+ for (int i = 0; i < sd->num_closure; i++) {
+ const ShaderClosure *sc = &sd->closure[i];
+
+ /* transparency is not handled here, but in outer loop */
+ if (!CLOSURE_IS_BSDF(sc->type) || CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) {
+ continue;
+ }
+
+ int num_samples;
+
+ if (CLOSURE_IS_BSDF_DIFFUSE(sc->type))
+ num_samples = kernel_data.integrator.diffuse_samples;
+ else if (CLOSURE_IS_BSDF_BSSRDF(sc->type))
+ num_samples = 1;
+ else if (CLOSURE_IS_BSDF_GLOSSY(sc->type))
+ num_samples = kernel_data.integrator.glossy_samples;
+ else
+ num_samples = kernel_data.integrator.transmission_samples;
+
+ num_samples = ceil_to_int(num_samples_adjust * num_samples);
+
+ float num_samples_inv = num_samples_adjust / num_samples;
+
+ for (int j = 0; j < num_samples; j++) {
+ PathState ps = *state;
+ float3 tp = throughput;
+ Ray bsdf_ray;
+# ifdef __SHADOW_TRICKS__
+ float shadow_transparency = L->shadow_transparency;
+# endif
+
+ ps.rng_hash = cmj_hash(state->rng_hash, i);
+
+ if (!kernel_branched_path_surface_bounce(
+ kg, sd, sc, j, num_samples, &tp, &ps, &L->state, &bsdf_ray, sum_sample_weight)) {
+ continue;
+ }
+
+ ps.rng_hash = state->rng_hash;
+
+ kernel_path_indirect(kg, indirect_sd, emission_sd, &bsdf_ray, tp * num_samples_inv, &ps, L);
+
+ /* for render passes, sum and reset indirect light pass variables
+ * for the next samples */
+ path_radiance_sum_indirect(L);
+ path_radiance_reset_indirect(L);
+
+# ifdef __SHADOW_TRICKS__
+ L->shadow_transparency = shadow_transparency;
+# endif
+ }
+ }
}
-#ifdef __SUBSURFACE__
+# ifdef __SUBSURFACE__
ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
ShaderData *sd,
ShaderData *indirect_sd,
@@ -325,111 +290,81 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
Ray *ray,
float3 throughput)
{
- for(int i = 0; i < sd->num_closure; i++) {
- ShaderClosure *sc = &sd->closure[i];
-
- if(!CLOSURE_IS_BSSRDF(sc->type))
- continue;
-
- /* set up random number generator */
- uint lcg_state = lcg_state_init(state, 0x68bc21eb);
- int num_samples = kernel_data.integrator.subsurface_samples * 3;
- float num_samples_inv = 1.0f/num_samples;
- uint bssrdf_rng_hash = cmj_hash(state->rng_hash, i);
-
- /* do subsurface scatter step with copy of shader data, this will
- * replace the BSSRDF with a diffuse BSDF closure */
- for(int j = 0; j < num_samples; j++) {
- PathState hit_state = *state;
- path_state_branch(&hit_state, j, num_samples);
- hit_state.rng_hash = bssrdf_rng_hash;
-
- LocalIntersection ss_isect;
- float bssrdf_u, bssrdf_v;
- path_state_rng_2D(kg, &hit_state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
- int num_hits = subsurface_scatter_multi_intersect(kg,
- &ss_isect,
- sd,
- &hit_state,
- sc,
- &lcg_state,
- bssrdf_u, bssrdf_v,
- true);
-
- hit_state.rng_offset += PRNG_BOUNCE_NUM;
-
-#ifdef __VOLUME__
- Ray volume_ray = *ray;
- bool need_update_volume_stack =
- kernel_data.integrator.use_volumes &&
- sd->object_flag & SD_OBJECT_INTERSECTS_VOLUME;
-#endif /* __VOLUME__ */
-
- /* compute lighting with the BSDF closure */
- for(int hit = 0; hit < num_hits; hit++) {
- ShaderData bssrdf_sd = *sd;
- Bssrdf *bssrdf = (Bssrdf *)sc;
- ClosureType bssrdf_type = sc->type;
- float bssrdf_roughness = bssrdf->roughness;
- subsurface_scatter_multi_setup(kg,
- &ss_isect,
- hit,
- &bssrdf_sd,
- &hit_state,
- bssrdf_type,
- bssrdf_roughness);
-
-#ifdef __VOLUME__
- if(need_update_volume_stack) {
- /* Setup ray from previous surface point to the new one. */
- float3 P = ray_offset(bssrdf_sd.P, -bssrdf_sd.Ng);
- volume_ray.D = normalize_len(P - volume_ray.P,
- &volume_ray.t);
-
- for(int k = 0; k < VOLUME_STACK_SIZE; k++) {
- hit_state.volume_stack[k] = state->volume_stack[k];
- }
-
- kernel_volume_stack_update_for_subsurface(
- kg,
- emission_sd,
- &volume_ray,
- hit_state.volume_stack);
- }
-#endif /* __VOLUME__ */
-
-#ifdef __EMISSION__
- /* direct light */
- if(kernel_data.integrator.use_direct_light) {
- int all = (kernel_data.integrator.sample_all_lights_direct) ||
- (hit_state.flag & PATH_RAY_SHADOW_CATCHER);
- kernel_branched_path_surface_connect_light(
- kg,
- &bssrdf_sd,
- emission_sd,
- &hit_state,
- throughput,
- num_samples_inv,
- L,
- all);
- }
-#endif /* __EMISSION__ */
-
- /* indirect light */
- kernel_branched_path_surface_indirect_light(
- kg,
- &bssrdf_sd,
- indirect_sd,
- emission_sd,
- throughput,
- num_samples_inv,
- &hit_state,
- L);
- }
- }
- }
+ for (int i = 0; i < sd->num_closure; i++) {
+ ShaderClosure *sc = &sd->closure[i];
+
+ if (!CLOSURE_IS_BSSRDF(sc->type))
+ continue;
+
+ /* set up random number generator */
+ uint lcg_state = lcg_state_init(state, 0x68bc21eb);
+ int num_samples = kernel_data.integrator.subsurface_samples * 3;
+ float num_samples_inv = 1.0f / num_samples;
+ uint bssrdf_rng_hash = cmj_hash(state->rng_hash, i);
+
+ /* do subsurface scatter step with copy of shader data, this will
+ * replace the BSSRDF with a diffuse BSDF closure */
+ for (int j = 0; j < num_samples; j++) {
+ PathState hit_state = *state;
+ path_state_branch(&hit_state, j, num_samples);
+ hit_state.rng_hash = bssrdf_rng_hash;
+
+ LocalIntersection ss_isect;
+ float bssrdf_u, bssrdf_v;
+ path_state_rng_2D(kg, &hit_state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
+ int num_hits = subsurface_scatter_multi_intersect(
+ kg, &ss_isect, sd, &hit_state, sc, &lcg_state, bssrdf_u, bssrdf_v, true);
+
+ hit_state.rng_offset += PRNG_BOUNCE_NUM;
+
+# ifdef __VOLUME__
+ Ray volume_ray = *ray;
+ bool need_update_volume_stack = kernel_data.integrator.use_volumes &&
+ sd->object_flag & SD_OBJECT_INTERSECTS_VOLUME;
+# endif /* __VOLUME__ */
+
+ /* compute lighting with the BSDF closure */
+ for (int hit = 0; hit < num_hits; hit++) {
+ ShaderData bssrdf_sd = *sd;
+ Bssrdf *bssrdf = (Bssrdf *)sc;
+ ClosureType bssrdf_type = sc->type;
+ float bssrdf_roughness = bssrdf->roughness;
+ subsurface_scatter_multi_setup(
+ kg, &ss_isect, hit, &bssrdf_sd, &hit_state, bssrdf_type, bssrdf_roughness);
+
+# ifdef __VOLUME__
+ if (need_update_volume_stack) {
+ /* Setup ray from previous surface point to the new one. */
+ float3 P = ray_offset(bssrdf_sd.P, -bssrdf_sd.Ng);
+ volume_ray.D = normalize_len(P - volume_ray.P, &volume_ray.t);
+
+ for (int k = 0; k < VOLUME_STACK_SIZE; k++) {
+ hit_state.volume_stack[k] = state->volume_stack[k];
+ }
+
+ kernel_volume_stack_update_for_subsurface(
+ kg, emission_sd, &volume_ray, hit_state.volume_stack);
+ }
+# endif /* __VOLUME__ */
+
+# ifdef __EMISSION__
+ /* direct light */
+ if (kernel_data.integrator.use_direct_light) {
+ int all = (kernel_data.integrator.sample_all_lights_direct) ||
+ (hit_state.flag & PATH_RAY_SHADOW_CATCHER);
+ kernel_branched_path_surface_connect_light(
+ kg, &bssrdf_sd, emission_sd, &hit_state, throughput, num_samples_inv, L, all);
+ }
+# endif /* __EMISSION__ */
+
+ /* indirect light */
+ kernel_branched_path_surface_indirect_light(
+ kg, &bssrdf_sd, indirect_sd, emission_sd, throughput, num_samples_inv, &hit_state, L);
+ }
+ }
+ }
}
-#endif /* __SUBSURFACE__ */
+# endif /* __SUBSURFACE__ */
ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
uint rng_hash,
@@ -438,188 +373,171 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
ccl_global float *buffer,
PathRadiance *L)
{
- /* initialize */
- float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
-
- path_radiance_init(L, kernel_data.film.use_light_pass);
-
- /* shader data memory used for both volumes and surfaces, saves stack space */
- ShaderData sd;
- /* shader data used by emission, shadows, volume stacks, indirect path */
- ShaderDataTinyStorage emission_sd_storage;
- ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
- ShaderData indirect_sd;
-
- PathState state;
- path_state_init(kg, emission_sd, &state, rng_hash, sample, &ray);
-
- /* Main Loop
- * Here we only handle transparency intersections from the camera ray.
- * Indirect bounces are handled in kernel_branched_path_surface_indirect_light().
- */
- for(;;) {
- /* Find intersection with objects in scene. */
- Intersection isect;
- bool hit = kernel_path_scene_intersect(kg, &state, &ray, &isect, L);
-
-#ifdef __VOLUME__
- /* Volume integration. */
- kernel_branched_path_volume(kg,
- &sd,
- &state,
- &ray,
- &throughput,
- &isect,
- hit,
- &indirect_sd,
- emission_sd,
- L);
-#endif /* __VOLUME__ */
-
- /* Shade background. */
- if(!hit) {
- kernel_path_background(kg, &state, &ray, throughput, &sd, L);
- break;
- }
-
- /* Setup and evaluate shader. */
- shader_setup_from_ray(kg, &sd, &isect, &ray);
-
- /* Skip most work for volume bounding surface. */
-#ifdef __VOLUME__
- if(!(sd.flag & SD_HAS_ONLY_VOLUME)) {
-#endif
-
- shader_eval_surface(kg, &sd, &state, state.flag);
- shader_merge_closures(&sd);
-
- /* Apply shadow catcher, holdout, emission. */
- if(!kernel_path_shader_apply(kg,
- &sd,
- &state,
- &ray,
- throughput,
- emission_sd,
- L,
- buffer))
- {
- break;
- }
-
- /* transparency termination */
- if(state.flag & PATH_RAY_TRANSPARENT) {
- /* path termination. this is a strange place to put the termination, it's
- * mainly due to the mixed in MIS that we use. gives too many unneeded
- * shader evaluations, only need emission if we are going to terminate */
- float probability = path_state_continuation_probability(kg, &state, throughput);
-
- if(probability == 0.0f) {
- break;
- }
- else if(probability != 1.0f) {
- float terminate = path_state_rng_1D(kg, &state, PRNG_TERMINATE);
-
- if(terminate >= probability)
- break;
-
- throughput /= probability;
- }
- }
-
- kernel_update_denoising_features(kg, &sd, &state, L);
-
-#ifdef __AO__
- /* ambient occlusion */
- if(kernel_data.integrator.use_ambient_occlusion) {
- kernel_branched_path_ao(kg, &sd, emission_sd, L, &state, throughput);
- }
-#endif /* __AO__ */
-
-#ifdef __SUBSURFACE__
- /* bssrdf scatter to a different location on the same object */
- if(sd.flag & SD_BSSRDF) {
- kernel_branched_path_subsurface_scatter(kg, &sd, &indirect_sd, emission_sd,
- L, &state, &ray, throughput);
- }
-#endif /* __SUBSURFACE__ */
-
- PathState hit_state = state;
-
-#ifdef __EMISSION__
- /* direct light */
- if(kernel_data.integrator.use_direct_light) {
- int all = (kernel_data.integrator.sample_all_lights_direct) ||
- (state.flag & PATH_RAY_SHADOW_CATCHER);
- kernel_branched_path_surface_connect_light(kg,
- &sd, emission_sd, &hit_state, throughput, 1.0f, L, all);
- }
-#endif /* __EMISSION__ */
-
- /* indirect light */
- kernel_branched_path_surface_indirect_light(kg,
- &sd, &indirect_sd, emission_sd, throughput, 1.0f, &hit_state, L);
-
- /* continue in case of transparency */
- throughput *= shader_bsdf_transparency(kg, &sd);
-
- if(is_zero(throughput))
- break;
-
- /* Update Path State */
- path_state_next(kg, &state, LABEL_TRANSPARENT);
-
-#ifdef __VOLUME__
- }
- else {
- if(!path_state_volume_next(kg, &state)) {
- break;
- }
- }
-#endif
-
- ray.P = ray_offset(sd.P, -sd.Ng);
- ray.t -= sd.ray_length; /* clipping works through transparent */
-
-#ifdef __RAY_DIFFERENTIALS__
- ray.dP = sd.dP;
- ray.dD.dx = -sd.dI.dx;
- ray.dD.dy = -sd.dI.dy;
-#endif /* __RAY_DIFFERENTIALS__ */
-
-#ifdef __VOLUME__
- /* enter/exit volume */
- kernel_volume_stack_enter_exit(kg, &sd, state.volume_stack);
-#endif /* __VOLUME__ */
- }
+ /* initialize */
+ float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
+
+ path_radiance_init(L, kernel_data.film.use_light_pass);
+
+ /* shader data memory used for both volumes and surfaces, saves stack space */
+ ShaderData sd;
+ /* shader data used by emission, shadows, volume stacks, indirect path */
+ ShaderDataTinyStorage emission_sd_storage;
+ ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
+ ShaderData indirect_sd;
+
+ PathState state;
+ path_state_init(kg, emission_sd, &state, rng_hash, sample, &ray);
+
+ /* Main Loop
+ * Here we only handle transparency intersections from the camera ray.
+ * Indirect bounces are handled in kernel_branched_path_surface_indirect_light().
+ */
+ for (;;) {
+ /* Find intersection with objects in scene. */
+ Intersection isect;
+ bool hit = kernel_path_scene_intersect(kg, &state, &ray, &isect, L);
+
+# ifdef __VOLUME__
+ /* Volume integration. */
+ kernel_branched_path_volume(
+ kg, &sd, &state, &ray, &throughput, &isect, hit, &indirect_sd, emission_sd, L);
+# endif /* __VOLUME__ */
+
+ /* Shade background. */
+ if (!hit) {
+ kernel_path_background(kg, &state, &ray, throughput, &sd, L);
+ break;
+ }
+
+ /* Setup and evaluate shader. */
+ shader_setup_from_ray(kg, &sd, &isect, &ray);
+
+ /* Skip most work for volume bounding surface. */
+# ifdef __VOLUME__
+ if (!(sd.flag & SD_HAS_ONLY_VOLUME)) {
+# endif
+
+ shader_eval_surface(kg, &sd, &state, state.flag);
+ shader_merge_closures(&sd);
+
+ /* Apply shadow catcher, holdout, emission. */
+ if (!kernel_path_shader_apply(kg, &sd, &state, &ray, throughput, emission_sd, L, buffer)) {
+ break;
+ }
+
+ /* transparency termination */
+ if (state.flag & PATH_RAY_TRANSPARENT) {
+ /* path termination. this is a strange place to put the termination, it's
+ * mainly due to the mixed in MIS that we use. gives too many unneeded
+ * shader evaluations, only need emission if we are going to terminate */
+ float probability = path_state_continuation_probability(kg, &state, throughput);
+
+ if (probability == 0.0f) {
+ break;
+ }
+ else if (probability != 1.0f) {
+ float terminate = path_state_rng_1D(kg, &state, PRNG_TERMINATE);
+
+ if (terminate >= probability)
+ break;
+
+ throughput /= probability;
+ }
+ }
+
+ kernel_update_denoising_features(kg, &sd, &state, L);
+
+# ifdef __AO__
+ /* ambient occlusion */
+ if (kernel_data.integrator.use_ambient_occlusion) {
+ kernel_branched_path_ao(kg, &sd, emission_sd, L, &state, throughput);
+ }
+# endif /* __AO__ */
+
+# ifdef __SUBSURFACE__
+ /* bssrdf scatter to a different location on the same object */
+ if (sd.flag & SD_BSSRDF) {
+ kernel_branched_path_subsurface_scatter(
+ kg, &sd, &indirect_sd, emission_sd, L, &state, &ray, throughput);
+ }
+# endif /* __SUBSURFACE__ */
+
+ PathState hit_state = state;
+
+# ifdef __EMISSION__
+ /* direct light */
+ if (kernel_data.integrator.use_direct_light) {
+ int all = (kernel_data.integrator.sample_all_lights_direct) ||
+ (state.flag & PATH_RAY_SHADOW_CATCHER);
+ kernel_branched_path_surface_connect_light(
+ kg, &sd, emission_sd, &hit_state, throughput, 1.0f, L, all);
+ }
+# endif /* __EMISSION__ */
+
+ /* indirect light */
+ kernel_branched_path_surface_indirect_light(
+ kg, &sd, &indirect_sd, emission_sd, throughput, 1.0f, &hit_state, L);
+
+ /* continue in case of transparency */
+ throughput *= shader_bsdf_transparency(kg, &sd);
+
+ if (is_zero(throughput))
+ break;
+
+ /* Update Path State */
+ path_state_next(kg, &state, LABEL_TRANSPARENT);
+
+# ifdef __VOLUME__
+ }
+ else {
+ if (!path_state_volume_next(kg, &state)) {
+ break;
+ }
+ }
+# endif
+
+ ray.P = ray_offset(sd.P, -sd.Ng);
+ ray.t -= sd.ray_length; /* clipping works through transparent */
+
+# ifdef __RAY_DIFFERENTIALS__
+ ray.dP = sd.dP;
+ ray.dD.dx = -sd.dI.dx;
+ ray.dD.dy = -sd.dI.dy;
+# endif /* __RAY_DIFFERENTIALS__ */
+
+# ifdef __VOLUME__
+ /* enter/exit volume */
+ kernel_volume_stack_enter_exit(kg, &sd, state.volume_stack);
+# endif /* __VOLUME__ */
+ }
}
-ccl_device void kernel_branched_path_trace(KernelGlobals *kg,
- ccl_global float *buffer,
- int sample, int x, int y, int offset, int stride)
+ccl_device void kernel_branched_path_trace(
+ KernelGlobals *kg, ccl_global float *buffer, int sample, int x, int y, int offset, int stride)
{
- /* buffer offset */
- int index = offset + x + y*stride;
- int pass_stride = kernel_data.film.pass_stride;
+ /* buffer offset */
+ int index = offset + x + y * stride;
+ int pass_stride = kernel_data.film.pass_stride;
- buffer += index*pass_stride;
+ buffer += index * pass_stride;
- /* initialize random numbers and ray */
- uint rng_hash;
- Ray ray;
+ /* initialize random numbers and ray */
+ uint rng_hash;
+ Ray ray;
- kernel_path_trace_setup(kg, sample, x, y, &rng_hash, &ray);
+ kernel_path_trace_setup(kg, sample, x, y, &rng_hash, &ray);
- /* integrate */
- PathRadiance L;
+ /* integrate */
+ PathRadiance L;
- if(ray.t != 0.0f) {
- kernel_branched_path_integrate(kg, rng_hash, sample, ray, buffer, &L);
- kernel_write_result(kg, buffer, sample, &L);
- }
+ if (ray.t != 0.0f) {
+ kernel_branched_path_integrate(kg, rng_hash, sample, ray, buffer, &L);
+ kernel_write_result(kg, buffer, sample, &L);
+ }
}
-#endif /* __SPLIT_KERNEL__ */
+# endif /* __SPLIT_KERNEL__ */
-#endif /* __BRANCHED_PATH__ */
+#endif /* __BRANCHED_PATH__ */
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_path_common.h b/intern/cycles/kernel/kernel_path_common.h
index d83fd474cde..815767595a9 100644
--- a/intern/cycles/kernel/kernel_path_common.h
+++ b/intern/cycles/kernel/kernel_path_common.h
@@ -18,34 +18,31 @@
CCL_NAMESPACE_BEGIN
-ccl_device_inline void kernel_path_trace_setup(KernelGlobals *kg,
- int sample,
- int x, int y,
- uint *rng_hash,
- ccl_addr_space Ray *ray)
+ccl_device_inline void kernel_path_trace_setup(
+ KernelGlobals *kg, int sample, int x, int y, uint *rng_hash, ccl_addr_space Ray *ray)
{
- float filter_u;
- float filter_v;
+ float filter_u;
+ float filter_v;
- int num_samples = kernel_data.integrator.aa_samples;
+ int num_samples = kernel_data.integrator.aa_samples;
- path_rng_init(kg, sample, num_samples, rng_hash, x, y, &filter_u, &filter_v);
+ path_rng_init(kg, sample, num_samples, rng_hash, x, y, &filter_u, &filter_v);
- /* sample camera ray */
+ /* sample camera ray */
- float lens_u = 0.0f, lens_v = 0.0f;
+ float lens_u = 0.0f, lens_v = 0.0f;
- if(kernel_data.cam.aperturesize > 0.0f)
- path_rng_2D(kg, *rng_hash, sample, num_samples, PRNG_LENS_U, &lens_u, &lens_v);
+ if (kernel_data.cam.aperturesize > 0.0f)
+ path_rng_2D(kg, *rng_hash, sample, num_samples, PRNG_LENS_U, &lens_u, &lens_v);
- float time = 0.0f;
+ float time = 0.0f;
#ifdef __CAMERA_MOTION__
- if(kernel_data.cam.shuttertime != -1.0f)
- time = path_rng_1D(kg, *rng_hash, sample, num_samples, PRNG_TIME);
+ if (kernel_data.cam.shuttertime != -1.0f)
+ time = path_rng_1D(kg, *rng_hash, sample, num_samples, PRNG_TIME);
#endif
- camera_sample(kg, x, y, filter_u, filter_v, lens_u, lens_v, time, ray);
+ camera_sample(kg, x, y, filter_u, filter_v, lens_u, lens_v, time, ray);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_path_state.h b/intern/cycles/kernel/kernel_path_state.h
index e85050df4bb..cdca0b1f9bf 100644
--- a/intern/cycles/kernel/kernel_path_state.h
+++ b/intern/cycles/kernel/kernel_path_state.h
@@ -23,249 +23,252 @@ ccl_device_inline void path_state_init(KernelGlobals *kg,
int sample,
ccl_addr_space Ray *ray)
{
- state->flag = PATH_RAY_CAMERA|PATH_RAY_MIS_SKIP|PATH_RAY_TRANSPARENT_BACKGROUND;
+ state->flag = PATH_RAY_CAMERA | PATH_RAY_MIS_SKIP | PATH_RAY_TRANSPARENT_BACKGROUND;
- state->rng_hash = rng_hash;
- state->rng_offset = PRNG_BASE_NUM;
- state->sample = sample;
- state->num_samples = kernel_data.integrator.aa_samples;
- state->branch_factor = 1.0f;
+ state->rng_hash = rng_hash;
+ state->rng_offset = PRNG_BASE_NUM;
+ state->sample = sample;
+ state->num_samples = kernel_data.integrator.aa_samples;
+ state->branch_factor = 1.0f;
- state->bounce = 0;
- state->diffuse_bounce = 0;
- state->glossy_bounce = 0;
- state->transmission_bounce = 0;
- state->transparent_bounce = 0;
+ state->bounce = 0;
+ state->diffuse_bounce = 0;
+ state->glossy_bounce = 0;
+ state->transmission_bounce = 0;
+ state->transparent_bounce = 0;
#ifdef __DENOISING_FEATURES__
- if(kernel_data.film.pass_denoising_data) {
- state->flag |= PATH_RAY_STORE_SHADOW_INFO;
- state->denoising_feature_weight = 1.0f;
- }
- else {
- state->denoising_feature_weight = 0.0f;
- }
-#endif /* __DENOISING_FEATURES__ */
-
- state->min_ray_pdf = FLT_MAX;
- state->ray_pdf = 0.0f;
+ if (kernel_data.film.pass_denoising_data) {
+ state->flag |= PATH_RAY_STORE_SHADOW_INFO;
+ state->denoising_feature_weight = 1.0f;
+ }
+ else {
+ state->denoising_feature_weight = 0.0f;
+ }
+#endif /* __DENOISING_FEATURES__ */
+
+ state->min_ray_pdf = FLT_MAX;
+ state->ray_pdf = 0.0f;
#ifdef __LAMP_MIS__
- state->ray_t = 0.0f;
+ state->ray_t = 0.0f;
#endif
#ifdef __VOLUME__
- state->volume_bounce = 0;
- state->volume_bounds_bounce = 0;
-
- if(kernel_data.integrator.use_volumes) {
- /* Initialize volume stack with volume we are inside of. */
- kernel_volume_stack_init(kg, stack_sd, state, ray, state->volume_stack);
- }
- else {
- state->volume_stack[0].shader = SHADER_NONE;
- }
+ state->volume_bounce = 0;
+ state->volume_bounds_bounce = 0;
+
+ if (kernel_data.integrator.use_volumes) {
+ /* Initialize volume stack with volume we are inside of. */
+ kernel_volume_stack_init(kg, stack_sd, state, ray, state->volume_stack);
+ }
+ else {
+ state->volume_stack[0].shader = SHADER_NONE;
+ }
#endif
}
-ccl_device_inline void path_state_next(KernelGlobals *kg, ccl_addr_space PathState *state, int label)
+ccl_device_inline void path_state_next(KernelGlobals *kg,
+ ccl_addr_space PathState *state,
+ int label)
{
- /* ray through transparent keeps same flags from previous ray and is
- * not counted as a regular bounce, transparent has separate max */
- if(label & LABEL_TRANSPARENT) {
- state->flag |= PATH_RAY_TRANSPARENT;
- state->transparent_bounce++;
- if(state->transparent_bounce >= kernel_data.integrator.transparent_max_bounce) {
- state->flag |= PATH_RAY_TERMINATE_IMMEDIATE;
- }
+ /* ray through transparent keeps same flags from previous ray and is
+ * not counted as a regular bounce, transparent has separate max */
+ if (label & LABEL_TRANSPARENT) {
+ state->flag |= PATH_RAY_TRANSPARENT;
+ state->transparent_bounce++;
+ if (state->transparent_bounce >= kernel_data.integrator.transparent_max_bounce) {
+ state->flag |= PATH_RAY_TERMINATE_IMMEDIATE;
+ }
- if(!kernel_data.integrator.transparent_shadows)
- state->flag |= PATH_RAY_MIS_SKIP;
+ if (!kernel_data.integrator.transparent_shadows)
+ state->flag |= PATH_RAY_MIS_SKIP;
- /* random number generator next bounce */
- state->rng_offset += PRNG_BOUNCE_NUM;
+ /* random number generator next bounce */
+ state->rng_offset += PRNG_BOUNCE_NUM;
- return;
- }
+ return;
+ }
- state->bounce++;
- if(state->bounce >= kernel_data.integrator.max_bounce) {
- state->flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT;
- }
+ state->bounce++;
+ if (state->bounce >= kernel_data.integrator.max_bounce) {
+ state->flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT;
+ }
- state->flag &= ~(PATH_RAY_ALL_VISIBILITY|PATH_RAY_MIS_SKIP);
+ state->flag &= ~(PATH_RAY_ALL_VISIBILITY | PATH_RAY_MIS_SKIP);
#ifdef __VOLUME__
- if(label & LABEL_VOLUME_SCATTER) {
- /* volume scatter */
- state->flag |= PATH_RAY_VOLUME_SCATTER;
- state->flag &= ~PATH_RAY_TRANSPARENT_BACKGROUND;
-
- state->volume_bounce++;
- if(state->volume_bounce >= kernel_data.integrator.max_volume_bounce) {
- state->flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT;
- }
- }
- else
+ if (label & LABEL_VOLUME_SCATTER) {
+ /* volume scatter */
+ state->flag |= PATH_RAY_VOLUME_SCATTER;
+ state->flag &= ~PATH_RAY_TRANSPARENT_BACKGROUND;
+
+ state->volume_bounce++;
+ if (state->volume_bounce >= kernel_data.integrator.max_volume_bounce) {
+ state->flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT;
+ }
+ }
+ else
#endif
- {
- /* surface reflection/transmission */
- if(label & LABEL_REFLECT) {
- state->flag |= PATH_RAY_REFLECT;
- state->flag &= ~PATH_RAY_TRANSPARENT_BACKGROUND;
-
- if(label & LABEL_DIFFUSE) {
- state->diffuse_bounce++;
- if(state->diffuse_bounce >= kernel_data.integrator.max_diffuse_bounce) {
- state->flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT;
- }
- }
- else {
- state->glossy_bounce++;
- if(state->glossy_bounce >= kernel_data.integrator.max_glossy_bounce) {
- state->flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT;
- }
- }
- }
- else {
- kernel_assert(label & LABEL_TRANSMIT);
-
- state->flag |= PATH_RAY_TRANSMIT;
-
- if(!(label & LABEL_TRANSMIT_TRANSPARENT)) {
- state->flag &= ~PATH_RAY_TRANSPARENT_BACKGROUND;
- }
-
- state->transmission_bounce++;
- if(state->transmission_bounce >= kernel_data.integrator.max_transmission_bounce) {
- state->flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT;
- }
- }
-
- /* diffuse/glossy/singular */
- if(label & LABEL_DIFFUSE) {
- state->flag |= PATH_RAY_DIFFUSE|PATH_RAY_DIFFUSE_ANCESTOR;
- }
- else if(label & LABEL_GLOSSY) {
- state->flag |= PATH_RAY_GLOSSY;
- }
- else {
- kernel_assert(label & LABEL_SINGULAR);
- state->flag |= PATH_RAY_GLOSSY|PATH_RAY_SINGULAR|PATH_RAY_MIS_SKIP;
- }
- }
-
- /* random number generator next bounce */
- state->rng_offset += PRNG_BOUNCE_NUM;
+ {
+ /* surface reflection/transmission */
+ if (label & LABEL_REFLECT) {
+ state->flag |= PATH_RAY_REFLECT;
+ state->flag &= ~PATH_RAY_TRANSPARENT_BACKGROUND;
+
+ if (label & LABEL_DIFFUSE) {
+ state->diffuse_bounce++;
+ if (state->diffuse_bounce >= kernel_data.integrator.max_diffuse_bounce) {
+ state->flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT;
+ }
+ }
+ else {
+ state->glossy_bounce++;
+ if (state->glossy_bounce >= kernel_data.integrator.max_glossy_bounce) {
+ state->flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT;
+ }
+ }
+ }
+ else {
+ kernel_assert(label & LABEL_TRANSMIT);
+
+ state->flag |= PATH_RAY_TRANSMIT;
+
+ if (!(label & LABEL_TRANSMIT_TRANSPARENT)) {
+ state->flag &= ~PATH_RAY_TRANSPARENT_BACKGROUND;
+ }
+
+ state->transmission_bounce++;
+ if (state->transmission_bounce >= kernel_data.integrator.max_transmission_bounce) {
+ state->flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT;
+ }
+ }
+
+ /* diffuse/glossy/singular */
+ if (label & LABEL_DIFFUSE) {
+ state->flag |= PATH_RAY_DIFFUSE | PATH_RAY_DIFFUSE_ANCESTOR;
+ }
+ else if (label & LABEL_GLOSSY) {
+ state->flag |= PATH_RAY_GLOSSY;
+ }
+ else {
+ kernel_assert(label & LABEL_SINGULAR);
+ state->flag |= PATH_RAY_GLOSSY | PATH_RAY_SINGULAR | PATH_RAY_MIS_SKIP;
+ }
+ }
+
+ /* random number generator next bounce */
+ state->rng_offset += PRNG_BOUNCE_NUM;
#ifdef __DENOISING_FEATURES__
- if((state->denoising_feature_weight == 0.0f) && !(state->flag & PATH_RAY_SHADOW_CATCHER)) {
- state->flag &= ~PATH_RAY_STORE_SHADOW_INFO;
- }
+ if ((state->denoising_feature_weight == 0.0f) && !(state->flag & PATH_RAY_SHADOW_CATCHER)) {
+ state->flag &= ~PATH_RAY_STORE_SHADOW_INFO;
+ }
#endif
}
#ifdef __VOLUME__
ccl_device_inline bool path_state_volume_next(KernelGlobals *kg, ccl_addr_space PathState *state)
{
- /* For volume bounding meshes we pass through without counting transparent
- * bounces, only sanity check in case self intersection gets us stuck. */
- state->volume_bounds_bounce++;
- if(state->volume_bounds_bounce > VOLUME_BOUNDS_MAX) {
- return false;
- }
-
- /* Random number generator next bounce. */
- if(state->volume_bounds_bounce > 1) {
- state->rng_offset += PRNG_BOUNCE_NUM;
- }
-
- return true;
+ /* For volume bounding meshes we pass through without counting transparent
+ * bounces, only sanity check in case self intersection gets us stuck. */
+ state->volume_bounds_bounce++;
+ if (state->volume_bounds_bounce > VOLUME_BOUNDS_MAX) {
+ return false;
+ }
+
+ /* Random number generator next bounce. */
+ if (state->volume_bounds_bounce > 1) {
+ state->rng_offset += PRNG_BOUNCE_NUM;
+ }
+
+ return true;
}
#endif
-ccl_device_inline uint path_state_ray_visibility(KernelGlobals *kg, ccl_addr_space PathState *state)
+ccl_device_inline uint path_state_ray_visibility(KernelGlobals *kg,
+ ccl_addr_space PathState *state)
{
- uint flag = state->flag & PATH_RAY_ALL_VISIBILITY;
+ uint flag = state->flag & PATH_RAY_ALL_VISIBILITY;
- /* for visibility, diffuse/glossy are for reflection only */
- if(flag & PATH_RAY_TRANSMIT)
- flag &= ~(PATH_RAY_DIFFUSE|PATH_RAY_GLOSSY);
- /* todo: this is not supported as its own ray visibility yet */
- if(state->flag & PATH_RAY_VOLUME_SCATTER)
- flag |= PATH_RAY_DIFFUSE;
+ /* for visibility, diffuse/glossy are for reflection only */
+ if (flag & PATH_RAY_TRANSMIT)
+ flag &= ~(PATH_RAY_DIFFUSE | PATH_RAY_GLOSSY);
+ /* todo: this is not supported as its own ray visibility yet */
+ if (state->flag & PATH_RAY_VOLUME_SCATTER)
+ flag |= PATH_RAY_DIFFUSE;
- return flag;
+ return flag;
}
ccl_device_inline float path_state_continuation_probability(KernelGlobals *kg,
ccl_addr_space PathState *state,
const float3 throughput)
{
- if(state->flag & PATH_RAY_TERMINATE_IMMEDIATE) {
- /* Ray is to be terminated immediately. */
- return 0.0f;
- }
- else if(state->flag & PATH_RAY_TRANSPARENT) {
- /* Do at least one bounce without RR. */
- if(state->transparent_bounce <= 1) {
- return 1.0f;
- }
+ if (state->flag & PATH_RAY_TERMINATE_IMMEDIATE) {
+ /* Ray is to be terminated immediately. */
+ return 0.0f;
+ }
+ else if (state->flag & PATH_RAY_TRANSPARENT) {
+ /* Do at least one bounce without RR. */
+ if (state->transparent_bounce <= 1) {
+ return 1.0f;
+ }
#ifdef __SHADOW_TRICKS__
- /* Exception for shadow catcher not working correctly with RR. */
- else if((state->flag & PATH_RAY_SHADOW_CATCHER) && (state->transparent_bounce <= 8)) {
- return 1.0f;
- }
+ /* Exception for shadow catcher not working correctly with RR. */
+ else if ((state->flag & PATH_RAY_SHADOW_CATCHER) && (state->transparent_bounce <= 8)) {
+ return 1.0f;
+ }
#endif
- }
- else {
- /* Do at least one bounce without RR. */
- if(state->bounce <= 1) {
- return 1.0f;
- }
+ }
+ else {
+ /* Do at least one bounce without RR. */
+ if (state->bounce <= 1) {
+ return 1.0f;
+ }
#ifdef __SHADOW_TRICKS__
- /* Exception for shadow catcher not working correctly with RR. */
- else if((state->flag & PATH_RAY_SHADOW_CATCHER) && (state->bounce <= 3)) {
- return 1.0f;
- }
+ /* Exception for shadow catcher not working correctly with RR. */
+ else if ((state->flag & PATH_RAY_SHADOW_CATCHER) && (state->bounce <= 3)) {
+ return 1.0f;
+ }
#endif
- }
+ }
- /* Probabilistic termination: use sqrt() to roughly match typical view
- * transform and do path termination a bit later on average. */
- return min(sqrtf(max3(fabs(throughput)) * state->branch_factor), 1.0f);
+ /* Probabilistic termination: use sqrt() to roughly match typical view
+ * transform and do path termination a bit later on average. */
+ return min(sqrtf(max3(fabs(throughput)) * state->branch_factor), 1.0f);
}
/* TODO(DingTo): Find more meaningful name for this */
ccl_device_inline void path_state_modify_bounce(ccl_addr_space PathState *state, bool increase)
{
- /* Modify bounce temporarily for shader eval */
- if(increase)
- state->bounce += 1;
- else
- state->bounce -= 1;
+ /* Modify bounce temporarily for shader eval */
+ if (increase)
+ state->bounce += 1;
+ else
+ state->bounce -= 1;
}
ccl_device_inline bool path_state_ao_bounce(KernelGlobals *kg, ccl_addr_space PathState *state)
{
- if(state->bounce <= kernel_data.integrator.ao_bounces) {
- return false;
- }
+ if (state->bounce <= kernel_data.integrator.ao_bounces) {
+ return false;
+ }
- int bounce = state->bounce - state->transmission_bounce - (state->glossy_bounce > 0);
- return (bounce > kernel_data.integrator.ao_bounces);
+ int bounce = state->bounce - state->transmission_bounce - (state->glossy_bounce > 0);
+ return (bounce > kernel_data.integrator.ao_bounces);
}
ccl_device_inline void path_state_branch(ccl_addr_space PathState *state,
int branch,
int num_branches)
{
- if(num_branches > 1) {
- /* Path is splitting into a branch, adjust so that each branch
- * still gets a unique sample from the same sequence. */
- state->sample = state->sample*num_branches + branch;
- state->num_samples = state->num_samples*num_branches;
- state->branch_factor *= num_branches;
- }
+ if (num_branches > 1) {
+ /* Path is splitting into a branch, adjust so that each branch
+ * still gets a unique sample from the same sequence. */
+ state->sample = state->sample * num_branches + branch;
+ state->num_samples = state->num_samples * num_branches;
+ state->branch_factor *= num_branches;
+ }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_path_subsurface.h b/intern/cycles/kernel/kernel_path_subsurface.h
index b5a92c74ed5..97d3f292ca3 100644
--- a/intern/cycles/kernel/kernel_path_subsurface.h
+++ b/intern/cycles/kernel/kernel_path_subsurface.h
@@ -22,141 +22,118 @@ ccl_device
# else
ccl_device_inline
# endif
-bool kernel_path_subsurface_scatter(
- KernelGlobals *kg,
- ShaderData *sd,
- ShaderData *emission_sd,
- PathRadiance *L,
- ccl_addr_space PathState *state,
- ccl_addr_space Ray *ray,
- ccl_addr_space float3 *throughput,
- ccl_addr_space SubsurfaceIndirectRays *ss_indirect)
+ bool
+ kernel_path_subsurface_scatter(KernelGlobals *kg,
+ ShaderData *sd,
+ ShaderData *emission_sd,
+ PathRadiance *L,
+ ccl_addr_space PathState *state,
+ ccl_addr_space Ray *ray,
+ ccl_addr_space float3 *throughput,
+ ccl_addr_space SubsurfaceIndirectRays *ss_indirect)
{
- PROFILING_INIT(kg, PROFILING_SUBSURFACE);
-
- float bssrdf_u, bssrdf_v;
- path_state_rng_2D(kg, state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
-
- const ShaderClosure *sc = shader_bssrdf_pick(sd, throughput, &bssrdf_u);
-
- /* do bssrdf scatter step if we picked a bssrdf closure */
- if(sc) {
- /* We should never have two consecutive BSSRDF bounces,
- * the second one should be converted to a diffuse BSDF to
- * avoid this.
- */
- kernel_assert(!(state->flag & PATH_RAY_DIFFUSE_ANCESTOR));
-
- uint lcg_state = lcg_state_init_addrspace(state, 0x68bc21eb);
-
- LocalIntersection ss_isect;
- int num_hits = subsurface_scatter_multi_intersect(kg,
- &ss_isect,
- sd,
- state,
- sc,
- &lcg_state,
- bssrdf_u, bssrdf_v,
- false);
+ PROFILING_INIT(kg, PROFILING_SUBSURFACE);
+
+ float bssrdf_u, bssrdf_v;
+ path_state_rng_2D(kg, state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
+
+ const ShaderClosure *sc = shader_bssrdf_pick(sd, throughput, &bssrdf_u);
+
+ /* do bssrdf scatter step if we picked a bssrdf closure */
+ if (sc) {
+ /* We should never have two consecutive BSSRDF bounces,
+ * the second one should be converted to a diffuse BSDF to
+ * avoid this.
+ */
+ kernel_assert(!(state->flag & PATH_RAY_DIFFUSE_ANCESTOR));
+
+ uint lcg_state = lcg_state_init_addrspace(state, 0x68bc21eb);
+
+ LocalIntersection ss_isect;
+ int num_hits = subsurface_scatter_multi_intersect(
+ kg, &ss_isect, sd, state, sc, &lcg_state, bssrdf_u, bssrdf_v, false);
# ifdef __VOLUME__
- bool need_update_volume_stack =
- kernel_data.integrator.use_volumes &&
- sd->object_flag & SD_OBJECT_INTERSECTS_VOLUME;
-# endif /* __VOLUME__ */
-
- /* Closure memory will be overwritten, so read required variables now. */
- Bssrdf *bssrdf = (Bssrdf *)sc;
- ClosureType bssrdf_type = sc->type;
- float bssrdf_roughness = bssrdf->roughness;
-
- /* compute lighting with the BSDF closure */
- for(int hit = 0; hit < num_hits; hit++) {
- /* NOTE: We reuse the existing ShaderData, we assume the path
- * integration loop stops when this function returns true.
- */
- subsurface_scatter_multi_setup(kg,
- &ss_isect,
- hit,
- sd,
- state,
- bssrdf_type,
- bssrdf_roughness);
-
- kernel_path_surface_connect_light(kg, sd, emission_sd, *throughput, state, L);
-
- ccl_addr_space PathState *hit_state = &ss_indirect->state[ss_indirect->num_rays];
- ccl_addr_space Ray *hit_ray = &ss_indirect->rays[ss_indirect->num_rays];
- ccl_addr_space float3 *hit_tp = &ss_indirect->throughputs[ss_indirect->num_rays];
- PathRadianceState *hit_L_state = &ss_indirect->L_state[ss_indirect->num_rays];
-
- *hit_state = *state;
- *hit_ray = *ray;
- *hit_tp = *throughput;
- *hit_L_state = L->state;
-
- hit_state->rng_offset += PRNG_BOUNCE_NUM;
-
- if(kernel_path_surface_bounce(kg,
- sd,
- hit_tp,
- hit_state,
- hit_L_state,
- hit_ray))
- {
+ bool need_update_volume_stack = kernel_data.integrator.use_volumes &&
+ sd->object_flag & SD_OBJECT_INTERSECTS_VOLUME;
+# endif /* __VOLUME__ */
+
+ /* Closure memory will be overwritten, so read required variables now. */
+ Bssrdf *bssrdf = (Bssrdf *)sc;
+ ClosureType bssrdf_type = sc->type;
+ float bssrdf_roughness = bssrdf->roughness;
+
+ /* compute lighting with the BSDF closure */
+ for (int hit = 0; hit < num_hits; hit++) {
+ /* NOTE: We reuse the existing ShaderData, we assume the path
+ * integration loop stops when this function returns true.
+ */
+ subsurface_scatter_multi_setup(kg, &ss_isect, hit, sd, state, bssrdf_type, bssrdf_roughness);
+
+ kernel_path_surface_connect_light(kg, sd, emission_sd, *throughput, state, L);
+
+ ccl_addr_space PathState *hit_state = &ss_indirect->state[ss_indirect->num_rays];
+ ccl_addr_space Ray *hit_ray = &ss_indirect->rays[ss_indirect->num_rays];
+ ccl_addr_space float3 *hit_tp = &ss_indirect->throughputs[ss_indirect->num_rays];
+ PathRadianceState *hit_L_state = &ss_indirect->L_state[ss_indirect->num_rays];
+
+ *hit_state = *state;
+ *hit_ray = *ray;
+ *hit_tp = *throughput;
+ *hit_L_state = L->state;
+
+ hit_state->rng_offset += PRNG_BOUNCE_NUM;
+
+ if (kernel_path_surface_bounce(kg, sd, hit_tp, hit_state, hit_L_state, hit_ray)) {
# ifdef __LAMP_MIS__
- hit_state->ray_t = 0.0f;
-# endif /* __LAMP_MIS__ */
+ hit_state->ray_t = 0.0f;
+# endif /* __LAMP_MIS__ */
# ifdef __VOLUME__
- if(need_update_volume_stack) {
- Ray volume_ray = *ray;
- /* Setup ray from previous surface point to the new one. */
- volume_ray.D = normalize_len(hit_ray->P - volume_ray.P,
- &volume_ray.t);
-
- kernel_volume_stack_update_for_subsurface(
- kg,
- emission_sd,
- &volume_ray,
- hit_state->volume_stack);
- }
-# endif /* __VOLUME__ */
- ss_indirect->num_rays++;
- }
- }
- return true;
- }
- return false;
+ if (need_update_volume_stack) {
+ Ray volume_ray = *ray;
+ /* Setup ray from previous surface point to the new one. */
+ volume_ray.D = normalize_len(hit_ray->P - volume_ray.P, &volume_ray.t);
+
+ kernel_volume_stack_update_for_subsurface(
+ kg, emission_sd, &volume_ray, hit_state->volume_stack);
+ }
+# endif /* __VOLUME__ */
+ ss_indirect->num_rays++;
+ }
+ }
+ return true;
+ }
+ return false;
}
ccl_device_inline void kernel_path_subsurface_init_indirect(
- ccl_addr_space SubsurfaceIndirectRays *ss_indirect)
+ ccl_addr_space SubsurfaceIndirectRays *ss_indirect)
{
- ss_indirect->num_rays = 0;
+ ss_indirect->num_rays = 0;
}
ccl_device void kernel_path_subsurface_setup_indirect(
- KernelGlobals *kg,
- ccl_addr_space SubsurfaceIndirectRays *ss_indirect,
- ccl_addr_space PathState *state,
- ccl_addr_space Ray *ray,
- PathRadiance *L,
- ccl_addr_space float3 *throughput)
+ KernelGlobals *kg,
+ ccl_addr_space SubsurfaceIndirectRays *ss_indirect,
+ ccl_addr_space PathState *state,
+ ccl_addr_space Ray *ray,
+ PathRadiance *L,
+ ccl_addr_space float3 *throughput)
{
- /* Setup state, ray and throughput for indirect SSS rays. */
- ss_indirect->num_rays--;
+ /* Setup state, ray and throughput for indirect SSS rays. */
+ ss_indirect->num_rays--;
- path_radiance_sum_indirect(L);
- path_radiance_reset_indirect(L);
+ path_radiance_sum_indirect(L);
+ path_radiance_reset_indirect(L);
- *state = ss_indirect->state[ss_indirect->num_rays];
- *ray = ss_indirect->rays[ss_indirect->num_rays];
- L->state = ss_indirect->L_state[ss_indirect->num_rays];
- *throughput = ss_indirect->throughputs[ss_indirect->num_rays];
+ *state = ss_indirect->state[ss_indirect->num_rays];
+ *ray = ss_indirect->rays[ss_indirect->num_rays];
+ L->state = ss_indirect->L_state[ss_indirect->num_rays];
+ *throughput = ss_indirect->throughputs[ss_indirect->num_rays];
- state->rng_offset += ss_indirect->num_rays * PRNG_BOUNCE_NUM;
+ state->rng_offset += ss_indirect->num_rays * PRNG_BOUNCE_NUM;
}
-#endif /* __SUBSURFACE__ */
+#endif /* __SUBSURFACE__ */
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_path_surface.h b/intern/cycles/kernel/kernel_path_surface.h
index 0d18a1e8c77..6251313c5f8 100644
--- a/intern/cycles/kernel/kernel_path_surface.h
+++ b/intern/cycles/kernel/kernel_path_surface.h
@@ -16,255 +16,280 @@
CCL_NAMESPACE_BEGIN
-#if defined(__BRANCHED_PATH__) || defined(__SUBSURFACE__) || defined(__SHADOW_TRICKS__) || defined(__BAKING__)
+#if defined(__BRANCHED_PATH__) || defined(__SUBSURFACE__) || defined(__SHADOW_TRICKS__) || \
+ defined(__BAKING__)
/* branched path tracing: connect path directly to position on one or more lights and add it to L */
ccl_device_noinline void kernel_branched_path_surface_connect_light(
- KernelGlobals *kg,
- ShaderData *sd,
- ShaderData *emission_sd,
- ccl_addr_space PathState *state,
- float3 throughput,
- float num_samples_adjust,
- PathRadiance *L,
- int sample_all_lights)
+ KernelGlobals *kg,
+ ShaderData *sd,
+ ShaderData *emission_sd,
+ ccl_addr_space PathState *state,
+ float3 throughput,
+ float num_samples_adjust,
+ PathRadiance *L,
+ int sample_all_lights)
{
-#ifdef __EMISSION__
- /* sample illumination from lights to find path contribution */
- if(!(sd->flag & SD_BSDF_HAS_EVAL))
- return;
-
- Ray light_ray;
- BsdfEval L_light;
- bool is_lamp;
-
-# ifdef __OBJECT_MOTION__
- light_ray.time = sd->time;
+# ifdef __EMISSION__
+ /* sample illumination from lights to find path contribution */
+ if (!(sd->flag & SD_BSDF_HAS_EVAL))
+ return;
+
+ Ray light_ray;
+ BsdfEval L_light;
+ bool is_lamp;
+
+# ifdef __OBJECT_MOTION__
+ light_ray.time = sd->time;
+# endif
+
+ if (sample_all_lights) {
+ /* lamp sampling */
+ for (int i = 0; i < kernel_data.integrator.num_all_lights; i++) {
+ if (UNLIKELY(light_select_reached_max_bounces(kg, i, state->bounce)))
+ continue;
+
+ int num_samples = ceil_to_int(num_samples_adjust * light_select_num_samples(kg, i));
+ float num_samples_inv = num_samples_adjust /
+ (num_samples * kernel_data.integrator.num_all_lights);
+ uint lamp_rng_hash = cmj_hash(state->rng_hash, i);
+
+ for (int j = 0; j < num_samples; j++) {
+ float light_u, light_v;
+ path_branched_rng_2D(
+ kg, lamp_rng_hash, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
+ float terminate = path_branched_rng_light_termination(
+ kg, lamp_rng_hash, state, j, num_samples);
+
+ LightSample ls;
+ if (lamp_light_sample(kg, i, light_u, light_v, sd->P, &ls)) {
+ /* The sampling probability returned by lamp_light_sample assumes that all lights were sampled.
+ * However, this code only samples lamps, so if the scene also had mesh lights, the real probability is twice as high. */
+ if (kernel_data.integrator.pdf_triangles != 0.0f)
+ ls.pdf *= 2.0f;
+
+ if (direct_emission(
+ kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
+ /* trace shadow ray */
+ float3 shadow;
+
+ if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) {
+ /* accumulate */
+ path_radiance_accum_light(L,
+ state,
+ throughput * num_samples_inv,
+ &L_light,
+ shadow,
+ num_samples_inv,
+ is_lamp);
+ }
+ else {
+ path_radiance_accum_total_light(L, state, throughput * num_samples_inv, &L_light);
+ }
+ }
+ }
+ }
+ }
+
+ /* mesh light sampling */
+ if (kernel_data.integrator.pdf_triangles != 0.0f) {
+ int num_samples = ceil_to_int(num_samples_adjust *
+ kernel_data.integrator.mesh_light_samples);
+ float num_samples_inv = num_samples_adjust / num_samples;
+
+ for (int j = 0; j < num_samples; j++) {
+ float light_u, light_v;
+ path_branched_rng_2D(
+ kg, state->rng_hash, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
+ float terminate = path_branched_rng_light_termination(
+ kg, state->rng_hash, state, j, num_samples);
+
+ /* only sample triangle lights */
+ if (kernel_data.integrator.num_all_lights)
+ light_u = 0.5f * light_u;
+
+ LightSample ls;
+ if (light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) {
+ /* Same as above, probability needs to be corrected since the sampling was forced to select a mesh light. */
+ if (kernel_data.integrator.num_all_lights)
+ ls.pdf *= 2.0f;
+
+ if (direct_emission(
+ kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
+ /* trace shadow ray */
+ float3 shadow;
+
+ if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) {
+ /* accumulate */
+ path_radiance_accum_light(L,
+ state,
+ throughput * num_samples_inv,
+ &L_light,
+ shadow,
+ num_samples_inv,
+ is_lamp);
+ }
+ else {
+ path_radiance_accum_total_light(L, state, throughput * num_samples_inv, &L_light);
+ }
+ }
+ }
+ }
+ }
+ }
+ else {
+ /* sample one light at random */
+ float light_u, light_v;
+ path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v);
+ float terminate = path_state_rng_light_termination(kg, state);
+
+ LightSample ls;
+ if (light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) {
+ /* sample random light */
+ if (direct_emission(
+ kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
+ /* trace shadow ray */
+ float3 shadow;
+
+ if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) {
+ /* accumulate */
+ path_radiance_accum_light(L,
+ state,
+ throughput * num_samples_adjust,
+ &L_light,
+ shadow,
+ num_samples_adjust,
+ is_lamp);
+ }
+ else {
+ path_radiance_accum_total_light(L, state, throughput * num_samples_adjust, &L_light);
+ }
+ }
+ }
+ }
# endif
-
- if(sample_all_lights) {
- /* lamp sampling */
- for(int i = 0; i < kernel_data.integrator.num_all_lights; i++) {
- if(UNLIKELY(light_select_reached_max_bounces(kg, i, state->bounce)))
- continue;
-
- int num_samples = ceil_to_int(num_samples_adjust*light_select_num_samples(kg, i));
- float num_samples_inv = num_samples_adjust/(num_samples*kernel_data.integrator.num_all_lights);
- uint lamp_rng_hash = cmj_hash(state->rng_hash, i);
-
- for(int j = 0; j < num_samples; j++) {
- float light_u, light_v;
- path_branched_rng_2D(kg, lamp_rng_hash, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
- float terminate = path_branched_rng_light_termination(kg, lamp_rng_hash, state, j, num_samples);
-
- LightSample ls;
- if(lamp_light_sample(kg, i, light_u, light_v, sd->P, &ls)) {
- /* The sampling probability returned by lamp_light_sample assumes that all lights were sampled.
- * However, this code only samples lamps, so if the scene also had mesh lights, the real probability is twice as high. */
- if(kernel_data.integrator.pdf_triangles != 0.0f)
- ls.pdf *= 2.0f;
-
- if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
- /* trace shadow ray */
- float3 shadow;
-
- if(!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) {
- /* accumulate */
- path_radiance_accum_light(L, state, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, is_lamp);
- }
- else {
- path_radiance_accum_total_light(L, state, throughput*num_samples_inv, &L_light);
- }
- }
- }
- }
- }
-
- /* mesh light sampling */
- if(kernel_data.integrator.pdf_triangles != 0.0f) {
- int num_samples = ceil_to_int(num_samples_adjust*kernel_data.integrator.mesh_light_samples);
- float num_samples_inv = num_samples_adjust/num_samples;
-
- for(int j = 0; j < num_samples; j++) {
- float light_u, light_v;
- path_branched_rng_2D(kg, state->rng_hash, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
- float terminate = path_branched_rng_light_termination(kg, state->rng_hash, state, j, num_samples);
-
- /* only sample triangle lights */
- if(kernel_data.integrator.num_all_lights)
- light_u = 0.5f*light_u;
-
- LightSample ls;
- if(light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) {
- /* Same as above, probability needs to be corrected since the sampling was forced to select a mesh light. */
- if(kernel_data.integrator.num_all_lights)
- ls.pdf *= 2.0f;
-
- if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
- /* trace shadow ray */
- float3 shadow;
-
- if(!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) {
- /* accumulate */
- path_radiance_accum_light(L, state, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, is_lamp);
- }
- else {
- path_radiance_accum_total_light(L, state, throughput*num_samples_inv, &L_light);
- }
- }
- }
- }
- }
- }
- else {
- /* sample one light at random */
- float light_u, light_v;
- path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v);
- float terminate = path_state_rng_light_termination(kg, state);
-
- LightSample ls;
- if(light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) {
- /* sample random light */
- if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
- /* trace shadow ray */
- float3 shadow;
-
- if(!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) {
- /* accumulate */
- path_radiance_accum_light(L, state, throughput*num_samples_adjust, &L_light, shadow, num_samples_adjust, is_lamp);
- }
- else {
- path_radiance_accum_total_light(L, state, throughput*num_samples_adjust, &L_light);
- }
- }
- }
- }
-#endif
}
/* branched path tracing: bounce off or through surface to with new direction stored in ray */
-ccl_device bool kernel_branched_path_surface_bounce(
- KernelGlobals *kg,
- ShaderData *sd,
- const ShaderClosure *sc,
- int sample,
- int num_samples,
- ccl_addr_space float3 *throughput,
- ccl_addr_space PathState *state,
- PathRadianceState *L_state,
- ccl_addr_space Ray *ray,
- float sum_sample_weight)
+ccl_device bool kernel_branched_path_surface_bounce(KernelGlobals *kg,
+ ShaderData *sd,
+ const ShaderClosure *sc,
+ int sample,
+ int num_samples,
+ ccl_addr_space float3 *throughput,
+ ccl_addr_space PathState *state,
+ PathRadianceState *L_state,
+ ccl_addr_space Ray *ray,
+ float sum_sample_weight)
{
- /* sample BSDF */
- float bsdf_pdf;
- BsdfEval bsdf_eval;
- float3 bsdf_omega_in;
- differential3 bsdf_domega_in;
- float bsdf_u, bsdf_v;
- path_branched_rng_2D(kg, state->rng_hash, state, sample, num_samples, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
- int label;
-
- label = shader_bsdf_sample_closure(kg, sd, sc, bsdf_u, bsdf_v, &bsdf_eval,
- &bsdf_omega_in, &bsdf_domega_in, &bsdf_pdf);
-
- if(bsdf_pdf == 0.0f || bsdf_eval_is_zero(&bsdf_eval))
- return false;
-
- /* modify throughput */
- path_radiance_bsdf_bounce(kg, L_state, throughput, &bsdf_eval, bsdf_pdf, state->bounce, label);
-
-#ifdef __DENOISING_FEATURES__
- state->denoising_feature_weight *= sc->sample_weight / (sum_sample_weight * num_samples);
-#endif
+ /* sample BSDF */
+ float bsdf_pdf;
+ BsdfEval bsdf_eval;
+ float3 bsdf_omega_in;
+ differential3 bsdf_domega_in;
+ float bsdf_u, bsdf_v;
+ path_branched_rng_2D(
+ kg, state->rng_hash, state, sample, num_samples, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
+ int label;
+
+ label = shader_bsdf_sample_closure(
+ kg, sd, sc, bsdf_u, bsdf_v, &bsdf_eval, &bsdf_omega_in, &bsdf_domega_in, &bsdf_pdf);
+
+ if (bsdf_pdf == 0.0f || bsdf_eval_is_zero(&bsdf_eval))
+ return false;
+
+ /* modify throughput */
+ path_radiance_bsdf_bounce(kg, L_state, throughput, &bsdf_eval, bsdf_pdf, state->bounce, label);
+
+# ifdef __DENOISING_FEATURES__
+ state->denoising_feature_weight *= sc->sample_weight / (sum_sample_weight * num_samples);
+# endif
- /* modify path state */
- path_state_next(kg, state, label);
+ /* modify path state */
+ path_state_next(kg, state, label);
- /* setup ray */
- ray->P = ray_offset(sd->P, (label & LABEL_TRANSMIT)? -sd->Ng: sd->Ng);
- ray->D = normalize(bsdf_omega_in);
- ray->t = FLT_MAX;
-#ifdef __RAY_DIFFERENTIALS__
- ray->dP = sd->dP;
- ray->dD = bsdf_domega_in;
-#endif
-#ifdef __OBJECT_MOTION__
- ray->time = sd->time;
-#endif
+ /* setup ray */
+ ray->P = ray_offset(sd->P, (label & LABEL_TRANSMIT) ? -sd->Ng : sd->Ng);
+ ray->D = normalize(bsdf_omega_in);
+ ray->t = FLT_MAX;
+# ifdef __RAY_DIFFERENTIALS__
+ ray->dP = sd->dP;
+ ray->dD = bsdf_domega_in;
+# endif
+# ifdef __OBJECT_MOTION__
+ ray->time = sd->time;
+# endif
-#ifdef __VOLUME__
- /* enter/exit volume */
- if(label & LABEL_TRANSMIT)
- kernel_volume_stack_enter_exit(kg, sd, state->volume_stack);
-#endif
+# ifdef __VOLUME__
+ /* enter/exit volume */
+ if (label & LABEL_TRANSMIT)
+ kernel_volume_stack_enter_exit(kg, sd, state->volume_stack);
+# endif
- /* branch RNG state */
- path_state_branch(state, sample, num_samples);
+ /* branch RNG state */
+ path_state_branch(state, sample, num_samples);
- /* set MIS state */
- state->min_ray_pdf = fminf(bsdf_pdf, FLT_MAX);
- state->ray_pdf = bsdf_pdf;
-#ifdef __LAMP_MIS__
- state->ray_t = 0.0f;
-#endif
+ /* set MIS state */
+ state->min_ray_pdf = fminf(bsdf_pdf, FLT_MAX);
+ state->ray_pdf = bsdf_pdf;
+# ifdef __LAMP_MIS__
+ state->ray_t = 0.0f;
+# endif
- return true;
+ return true;
}
#endif
/* path tracing: connect path directly to position on a light and add it to L */
ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg,
- ShaderData *sd, ShaderData *emission_sd, float3 throughput, ccl_addr_space PathState *state,
- PathRadiance *L)
+ ShaderData *sd,
+ ShaderData *emission_sd,
+ float3 throughput,
+ ccl_addr_space PathState *state,
+ PathRadiance *L)
{
- PROFILING_INIT(kg, PROFILING_CONNECT_LIGHT);
+ PROFILING_INIT(kg, PROFILING_CONNECT_LIGHT);
#ifdef __EMISSION__
- if(!(kernel_data.integrator.use_direct_light && (sd->flag & SD_BSDF_HAS_EVAL)))
- return;
-
-#ifdef __SHADOW_TRICKS__
- if(state->flag & PATH_RAY_SHADOW_CATCHER) {
- kernel_branched_path_surface_connect_light(kg,
- sd,
- emission_sd,
- state,
- throughput,
- 1.0f,
- L,
- 1);
- return;
- }
-#endif
+ if (!(kernel_data.integrator.use_direct_light && (sd->flag & SD_BSDF_HAS_EVAL)))
+ return;
+
+# ifdef __SHADOW_TRICKS__
+ if (state->flag & PATH_RAY_SHADOW_CATCHER) {
+ kernel_branched_path_surface_connect_light(kg, sd, emission_sd, state, throughput, 1.0f, L, 1);
+ return;
+ }
+# endif
- /* sample illumination from lights to find path contribution */
- float light_u, light_v;
- path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v);
+ /* sample illumination from lights to find path contribution */
+ float light_u, light_v;
+ path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v);
- Ray light_ray;
- BsdfEval L_light;
- bool is_lamp;
+ Ray light_ray;
+ BsdfEval L_light;
+ bool is_lamp;
-#ifdef __OBJECT_MOTION__
- light_ray.time = sd->time;
-#endif
+# ifdef __OBJECT_MOTION__
+ light_ray.time = sd->time;
+# endif
- LightSample ls;
- if(light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) {
- float terminate = path_state_rng_light_termination(kg, state);
- if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
- /* trace shadow ray */
- float3 shadow;
-
- if(!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) {
- /* accumulate */
- path_radiance_accum_light(L, state, throughput, &L_light, shadow, 1.0f, is_lamp);
- }
- else {
- path_radiance_accum_total_light(L, state, throughput, &L_light);
- }
- }
- }
+ LightSample ls;
+ if (light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) {
+ float terminate = path_state_rng_light_termination(kg, state);
+ if (direct_emission(
+ kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
+ /* trace shadow ray */
+ float3 shadow;
+
+ if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) {
+ /* accumulate */
+ path_radiance_accum_light(L, state, throughput, &L_light, shadow, 1.0f, is_lamp);
+ }
+ else {
+ path_radiance_accum_total_light(L, state, throughput, &L_light);
+ }
+ }
+ }
#endif
}
@@ -276,87 +301,87 @@ ccl_device bool kernel_path_surface_bounce(KernelGlobals *kg,
PathRadianceState *L_state,
ccl_addr_space Ray *ray)
{
- PROFILING_INIT(kg, PROFILING_SURFACE_BOUNCE);
-
- /* no BSDF? we can stop here */
- if(sd->flag & SD_BSDF) {
- /* sample BSDF */
- float bsdf_pdf;
- BsdfEval bsdf_eval;
- float3 bsdf_omega_in;
- differential3 bsdf_domega_in;
- float bsdf_u, bsdf_v;
- path_state_rng_2D(kg, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
- int label;
-
- label = shader_bsdf_sample(kg, sd, bsdf_u, bsdf_v, &bsdf_eval,
- &bsdf_omega_in, &bsdf_domega_in, &bsdf_pdf);
-
- if(bsdf_pdf == 0.0f || bsdf_eval_is_zero(&bsdf_eval))
- return false;
-
- /* modify throughput */
- path_radiance_bsdf_bounce(kg, L_state, throughput, &bsdf_eval, bsdf_pdf, state->bounce, label);
-
- /* set labels */
- if(!(label & LABEL_TRANSPARENT)) {
- state->ray_pdf = bsdf_pdf;
+ PROFILING_INIT(kg, PROFILING_SURFACE_BOUNCE);
+
+ /* no BSDF? we can stop here */
+ if (sd->flag & SD_BSDF) {
+ /* sample BSDF */
+ float bsdf_pdf;
+ BsdfEval bsdf_eval;
+ float3 bsdf_omega_in;
+ differential3 bsdf_domega_in;
+ float bsdf_u, bsdf_v;
+ path_state_rng_2D(kg, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
+ int label;
+
+ label = shader_bsdf_sample(
+ kg, sd, bsdf_u, bsdf_v, &bsdf_eval, &bsdf_omega_in, &bsdf_domega_in, &bsdf_pdf);
+
+ if (bsdf_pdf == 0.0f || bsdf_eval_is_zero(&bsdf_eval))
+ return false;
+
+ /* modify throughput */
+ path_radiance_bsdf_bounce(kg, L_state, throughput, &bsdf_eval, bsdf_pdf, state->bounce, label);
+
+ /* set labels */
+ if (!(label & LABEL_TRANSPARENT)) {
+ state->ray_pdf = bsdf_pdf;
#ifdef __LAMP_MIS__
- state->ray_t = 0.0f;
+ state->ray_t = 0.0f;
#endif
- state->min_ray_pdf = fminf(bsdf_pdf, state->min_ray_pdf);
- }
+ state->min_ray_pdf = fminf(bsdf_pdf, state->min_ray_pdf);
+ }
- /* update path state */
- path_state_next(kg, state, label);
+ /* update path state */
+ path_state_next(kg, state, label);
- /* setup ray */
- ray->P = ray_offset(sd->P, (label & LABEL_TRANSMIT)? -sd->Ng: sd->Ng);
- ray->D = normalize(bsdf_omega_in);
+ /* setup ray */
+ ray->P = ray_offset(sd->P, (label & LABEL_TRANSMIT) ? -sd->Ng : sd->Ng);
+ ray->D = normalize(bsdf_omega_in);
- if(state->bounce == 0)
- ray->t -= sd->ray_length; /* clipping works through transparent */
- else
- ray->t = FLT_MAX;
+ if (state->bounce == 0)
+ ray->t -= sd->ray_length; /* clipping works through transparent */
+ else
+ ray->t = FLT_MAX;
#ifdef __RAY_DIFFERENTIALS__
- ray->dP = sd->dP;
- ray->dD = bsdf_domega_in;
+ ray->dP = sd->dP;
+ ray->dD = bsdf_domega_in;
#endif
#ifdef __VOLUME__
- /* enter/exit volume */
- if(label & LABEL_TRANSMIT)
- kernel_volume_stack_enter_exit(kg, sd, state->volume_stack);
+ /* enter/exit volume */
+ if (label & LABEL_TRANSMIT)
+ kernel_volume_stack_enter_exit(kg, sd, state->volume_stack);
#endif
- return true;
- }
+ return true;
+ }
#ifdef __VOLUME__
- else if(sd->flag & SD_HAS_ONLY_VOLUME) {
- if(!path_state_volume_next(kg, state)) {
- return false;
- }
-
- if(state->bounce == 0)
- ray->t -= sd->ray_length; /* clipping works through transparent */
- else
- ray->t = FLT_MAX;
-
- /* setup ray position, direction stays unchanged */
- ray->P = ray_offset(sd->P, -sd->Ng);
-#ifdef __RAY_DIFFERENTIALS__
- ray->dP = sd->dP;
-#endif
+ else if (sd->flag & SD_HAS_ONLY_VOLUME) {
+ if (!path_state_volume_next(kg, state)) {
+ return false;
+ }
+
+ if (state->bounce == 0)
+ ray->t -= sd->ray_length; /* clipping works through transparent */
+ else
+ ray->t = FLT_MAX;
+
+ /* setup ray position, direction stays unchanged */
+ ray->P = ray_offset(sd->P, -sd->Ng);
+# ifdef __RAY_DIFFERENTIALS__
+ ray->dP = sd->dP;
+# endif
- /* enter/exit volume */
- kernel_volume_stack_enter_exit(kg, sd, state->volume_stack);
- return true;
- }
+ /* enter/exit volume */
+ kernel_volume_stack_enter_exit(kg, sd, state->volume_stack);
+ return true;
+ }
#endif
- else {
- /* no bsdf or volume? */
- return false;
- }
+ else {
+ /* no bsdf or volume? */
+ return false;
+ }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_path_volume.h b/intern/cycles/kernel/kernel_path_volume.h
index d2506fc1e7e..fea4dfc159d 100644
--- a/intern/cycles/kernel/kernel_path_volume.h
+++ b/intern/cycles/kernel/kernel_path_volume.h
@@ -18,269 +18,307 @@ CCL_NAMESPACE_BEGIN
#ifdef __VOLUME_SCATTER__
-ccl_device_inline void kernel_path_volume_connect_light(
- KernelGlobals *kg,
- ShaderData *sd,
- ShaderData *emission_sd,
- float3 throughput,
- ccl_addr_space PathState *state,
- PathRadiance *L)
+ccl_device_inline void kernel_path_volume_connect_light(KernelGlobals *kg,
+ ShaderData *sd,
+ ShaderData *emission_sd,
+ float3 throughput,
+ ccl_addr_space PathState *state,
+ PathRadiance *L)
{
-#ifdef __EMISSION__
- if(!kernel_data.integrator.use_direct_light)
- return;
-
- /* sample illumination from lights to find path contribution */
- float light_u, light_v;
- path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v);
-
- Ray light_ray;
- BsdfEval L_light;
- LightSample ls;
- bool is_lamp;
-
- /* connect to light from given point where shader has been evaluated */
- light_ray.time = sd->time;
-
- if(light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls))
- {
- float terminate = path_state_rng_light_termination(kg, state);
- if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
- /* trace shadow ray */
- float3 shadow;
-
- if(!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) {
- /* accumulate */
- path_radiance_accum_light(L, state, throughput, &L_light, shadow, 1.0f, is_lamp);
- }
- }
- }
-#endif /* __EMISSION__ */
+# ifdef __EMISSION__
+ if (!kernel_data.integrator.use_direct_light)
+ return;
+
+ /* sample illumination from lights to find path contribution */
+ float light_u, light_v;
+ path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v);
+
+ Ray light_ray;
+ BsdfEval L_light;
+ LightSample ls;
+ bool is_lamp;
+
+ /* connect to light from given point where shader has been evaluated */
+ light_ray.time = sd->time;
+
+ if (light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) {
+ float terminate = path_state_rng_light_termination(kg, state);
+ if (direct_emission(
+ kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
+ /* trace shadow ray */
+ float3 shadow;
+
+ if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) {
+ /* accumulate */
+ path_radiance_accum_light(L, state, throughput, &L_light, shadow, 1.0f, is_lamp);
+ }
+ }
+ }
+# endif /* __EMISSION__ */
}
-#ifdef __KERNEL_GPU__
+# ifdef __KERNEL_GPU__
ccl_device_noinline
-#else
+# else
ccl_device
-#endif
-bool kernel_path_volume_bounce(
- KernelGlobals *kg,
- ShaderData *sd,
- ccl_addr_space float3 *throughput,
- ccl_addr_space PathState *state,
- PathRadianceState *L_state,
- ccl_addr_space Ray *ray)
+# endif
+ bool
+ kernel_path_volume_bounce(KernelGlobals *kg,
+ ShaderData *sd,
+ ccl_addr_space float3 *throughput,
+ ccl_addr_space PathState *state,
+ PathRadianceState *L_state,
+ ccl_addr_space Ray *ray)
{
- /* sample phase function */
- float phase_pdf;
- BsdfEval phase_eval;
- float3 phase_omega_in;
- differential3 phase_domega_in;
- float phase_u, phase_v;
- path_state_rng_2D(kg, state, PRNG_BSDF_U, &phase_u, &phase_v);
- int label;
-
- label = shader_volume_phase_sample(kg, sd, phase_u, phase_v, &phase_eval,
- &phase_omega_in, &phase_domega_in, &phase_pdf);
-
- if(phase_pdf == 0.0f || bsdf_eval_is_zero(&phase_eval))
- return false;
-
- /* modify throughput */
- path_radiance_bsdf_bounce(kg, L_state, throughput, &phase_eval, phase_pdf, state->bounce, label);
-
- /* set labels */
- state->ray_pdf = phase_pdf;
-#ifdef __LAMP_MIS__
- state->ray_t = 0.0f;
-#endif
- state->min_ray_pdf = fminf(phase_pdf, state->min_ray_pdf);
-
- /* update path state */
- path_state_next(kg, state, label);
-
- /* Russian roulette termination of volume ray scattering. */
- float probability = path_state_continuation_probability(kg, state, *throughput);
-
- if(probability == 0.0f) {
- return false;
- }
- else if(probability != 1.0f) {
- /* Use dimension from the previous bounce, has not been used yet. */
- float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE - PRNG_BOUNCE_NUM);
-
- if(terminate >= probability) {
- return false;
- }
-
- *throughput /= probability;
- }
-
- /* setup ray */
- ray->P = sd->P;
- ray->D = phase_omega_in;
- ray->t = FLT_MAX;
-
-#ifdef __RAY_DIFFERENTIALS__
- ray->dP = sd->dP;
- ray->dD = phase_domega_in;
-#endif
-
- return true;
+ /* sample phase function */
+ float phase_pdf;
+ BsdfEval phase_eval;
+ float3 phase_omega_in;
+ differential3 phase_domega_in;
+ float phase_u, phase_v;
+ path_state_rng_2D(kg, state, PRNG_BSDF_U, &phase_u, &phase_v);
+ int label;
+
+ label = shader_volume_phase_sample(
+ kg, sd, phase_u, phase_v, &phase_eval, &phase_omega_in, &phase_domega_in, &phase_pdf);
+
+ if (phase_pdf == 0.0f || bsdf_eval_is_zero(&phase_eval))
+ return false;
+
+ /* modify throughput */
+ path_radiance_bsdf_bounce(kg, L_state, throughput, &phase_eval, phase_pdf, state->bounce, label);
+
+ /* set labels */
+ state->ray_pdf = phase_pdf;
+# ifdef __LAMP_MIS__
+ state->ray_t = 0.0f;
+# endif
+ state->min_ray_pdf = fminf(phase_pdf, state->min_ray_pdf);
+
+ /* update path state */
+ path_state_next(kg, state, label);
+
+ /* Russian roulette termination of volume ray scattering. */
+ float probability = path_state_continuation_probability(kg, state, *throughput);
+
+ if (probability == 0.0f) {
+ return false;
+ }
+ else if (probability != 1.0f) {
+ /* Use dimension from the previous bounce, has not been used yet. */
+ float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE - PRNG_BOUNCE_NUM);
+
+ if (terminate >= probability) {
+ return false;
+ }
+
+ *throughput /= probability;
+ }
+
+ /* setup ray */
+ ray->P = sd->P;
+ ray->D = phase_omega_in;
+ ray->t = FLT_MAX;
+
+# ifdef __RAY_DIFFERENTIALS__
+ ray->dP = sd->dP;
+ ray->dD = phase_domega_in;
+# endif
+
+ return true;
}
-#ifndef __SPLIT_KERNEL__
-ccl_device void kernel_branched_path_volume_connect_light(
- KernelGlobals *kg,
- ShaderData *sd,
- ShaderData *emission_sd,
- float3 throughput,
- ccl_addr_space PathState *state,
- PathRadiance *L,
- bool sample_all_lights,
- Ray *ray,
- const VolumeSegment *segment)
+# ifndef __SPLIT_KERNEL__
+ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg,
+ ShaderData *sd,
+ ShaderData *emission_sd,
+ float3 throughput,
+ ccl_addr_space PathState *state,
+ PathRadiance *L,
+ bool sample_all_lights,
+ Ray *ray,
+ const VolumeSegment *segment)
{
-#ifdef __EMISSION__
- if(!kernel_data.integrator.use_direct_light)
- return;
-
- Ray light_ray;
- BsdfEval L_light;
- bool is_lamp;
-
- light_ray.time = sd->time;
-
- if(sample_all_lights) {
- /* lamp sampling */
- for(int i = 0; i < kernel_data.integrator.num_all_lights; i++) {
- if(UNLIKELY(light_select_reached_max_bounces(kg, i, state->bounce)))
- continue;
-
- int num_samples = light_select_num_samples(kg, i);
- float num_samples_inv = 1.0f/(num_samples*kernel_data.integrator.num_all_lights);
- uint lamp_rng_hash = cmj_hash(state->rng_hash, i);
-
- for(int j = 0; j < num_samples; j++) {
- /* sample random position on given light */
- float light_u, light_v;
- path_branched_rng_2D(kg, lamp_rng_hash, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
-
- LightSample ls;
- lamp_light_sample(kg, i, light_u, light_v, ray->P, &ls);
-
- float3 tp = throughput;
-
- /* sample position on volume segment */
- float rphase = path_branched_rng_1D(kg, state->rng_hash, state, j, num_samples, PRNG_PHASE_CHANNEL);
- float rscatter = path_branched_rng_1D(kg, state->rng_hash, state, j, num_samples, PRNG_SCATTER_DISTANCE);
-
- VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
- state, ray, sd, &tp, rphase, rscatter, segment, (ls.t != FLT_MAX)? &ls.P: NULL, false);
-
- /* todo: split up light_sample so we don't have to call it again with new position */
- if(result == VOLUME_PATH_SCATTERED &&
- lamp_light_sample(kg, i, light_u, light_v, sd->P, &ls)) {
- if(kernel_data.integrator.pdf_triangles != 0.0f)
- ls.pdf *= 2.0f;
-
- float terminate = path_branched_rng_light_termination(kg, state->rng_hash, state, j, num_samples);
- if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
- /* trace shadow ray */
- float3 shadow;
-
- if(!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) {
- /* accumulate */
- path_radiance_accum_light(L, state, tp*num_samples_inv, &L_light, shadow, num_samples_inv, is_lamp);
- }
- }
- }
- }
- }
-
- /* mesh light sampling */
- if(kernel_data.integrator.pdf_triangles != 0.0f) {
- int num_samples = kernel_data.integrator.mesh_light_samples;
- float num_samples_inv = 1.0f/num_samples;
-
- for(int j = 0; j < num_samples; j++) {
- /* sample random position on random triangle */
- float light_u, light_v;
- path_branched_rng_2D(kg, state->rng_hash, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
-
- /* only sample triangle lights */
- if(kernel_data.integrator.num_all_lights)
- light_u = 0.5f*light_u;
-
- LightSample ls;
- light_sample(kg, light_u, light_v, sd->time, ray->P, state->bounce, &ls);
-
- float3 tp = throughput;
-
- /* sample position on volume segment */
- float rphase = path_branched_rng_1D(kg, state->rng_hash, state, j, num_samples, PRNG_PHASE_CHANNEL);
- float rscatter = path_branched_rng_1D(kg, state->rng_hash, state, j, num_samples, PRNG_SCATTER_DISTANCE);
-
- VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
- state, ray, sd, &tp, rphase, rscatter, segment, (ls.t != FLT_MAX)? &ls.P: NULL, false);
-
- /* todo: split up light_sample so we don't have to call it again with new position */
- if(result == VOLUME_PATH_SCATTERED &&
- light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) {
- if(kernel_data.integrator.num_all_lights)
- ls.pdf *= 2.0f;
-
- float terminate = path_branched_rng_light_termination(kg, state->rng_hash, state, j, num_samples);
- if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
- /* trace shadow ray */
- float3 shadow;
-
- if(!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) {
- /* accumulate */
- path_radiance_accum_light(L, state, tp*num_samples_inv, &L_light, shadow, num_samples_inv, is_lamp);
- }
- }
- }
- }
- }
- }
- else {
- /* sample random position on random light */
- float light_u, light_v;
- path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v);
-
- LightSample ls;
- light_sample(kg, light_u, light_v, sd->time, ray->P, state->bounce, &ls);
-
- float3 tp = throughput;
-
- /* sample position on volume segment */
- float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
- float rscatter = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
-
- VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
- state, ray, sd, &tp, rphase, rscatter, segment, (ls.t != FLT_MAX)? &ls.P: NULL, false);
-
- /* todo: split up light_sample so we don't have to call it again with new position */
- if(result == VOLUME_PATH_SCATTERED &&
- light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) {
- /* sample random light */
- float terminate = path_state_rng_light_termination(kg, state);
- if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
- /* trace shadow ray */
- float3 shadow;
-
- if(!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) {
- /* accumulate */
- path_radiance_accum_light(L, state, tp, &L_light, shadow, 1.0f, is_lamp);
- }
- }
- }
- }
-#endif /* __EMISSION__ */
+# ifdef __EMISSION__
+ if (!kernel_data.integrator.use_direct_light)
+ return;
+
+ Ray light_ray;
+ BsdfEval L_light;
+ bool is_lamp;
+
+ light_ray.time = sd->time;
+
+ if (sample_all_lights) {
+ /* lamp sampling */
+ for (int i = 0; i < kernel_data.integrator.num_all_lights; i++) {
+ if (UNLIKELY(light_select_reached_max_bounces(kg, i, state->bounce)))
+ continue;
+
+ int num_samples = light_select_num_samples(kg, i);
+ float num_samples_inv = 1.0f / (num_samples * kernel_data.integrator.num_all_lights);
+ uint lamp_rng_hash = cmj_hash(state->rng_hash, i);
+
+ for (int j = 0; j < num_samples; j++) {
+ /* sample random position on given light */
+ float light_u, light_v;
+ path_branched_rng_2D(
+ kg, lamp_rng_hash, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
+
+ LightSample ls;
+ lamp_light_sample(kg, i, light_u, light_v, ray->P, &ls);
+
+ float3 tp = throughput;
+
+ /* sample position on volume segment */
+ float rphase = path_branched_rng_1D(
+ kg, state->rng_hash, state, j, num_samples, PRNG_PHASE_CHANNEL);
+ float rscatter = path_branched_rng_1D(
+ kg, state->rng_hash, state, j, num_samples, PRNG_SCATTER_DISTANCE);
+
+ VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
+ state,
+ ray,
+ sd,
+ &tp,
+ rphase,
+ rscatter,
+ segment,
+ (ls.t != FLT_MAX) ? &ls.P :
+ NULL,
+ false);
+
+ /* todo: split up light_sample so we don't have to call it again with new position */
+ if (result == VOLUME_PATH_SCATTERED &&
+ lamp_light_sample(kg, i, light_u, light_v, sd->P, &ls)) {
+ if (kernel_data.integrator.pdf_triangles != 0.0f)
+ ls.pdf *= 2.0f;
+
+ float terminate = path_branched_rng_light_termination(
+ kg, state->rng_hash, state, j, num_samples);
+ if (direct_emission(
+ kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
+ /* trace shadow ray */
+ float3 shadow;
+
+ if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) {
+ /* accumulate */
+ path_radiance_accum_light(
+ L, state, tp * num_samples_inv, &L_light, shadow, num_samples_inv, is_lamp);
+ }
+ }
+ }
+ }
+ }
+
+ /* mesh light sampling */
+ if (kernel_data.integrator.pdf_triangles != 0.0f) {
+ int num_samples = kernel_data.integrator.mesh_light_samples;
+ float num_samples_inv = 1.0f / num_samples;
+
+ for (int j = 0; j < num_samples; j++) {
+ /* sample random position on random triangle */
+ float light_u, light_v;
+ path_branched_rng_2D(
+ kg, state->rng_hash, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
+
+ /* only sample triangle lights */
+ if (kernel_data.integrator.num_all_lights)
+ light_u = 0.5f * light_u;
+
+ LightSample ls;
+ light_sample(kg, light_u, light_v, sd->time, ray->P, state->bounce, &ls);
+
+ float3 tp = throughput;
+
+ /* sample position on volume segment */
+ float rphase = path_branched_rng_1D(
+ kg, state->rng_hash, state, j, num_samples, PRNG_PHASE_CHANNEL);
+ float rscatter = path_branched_rng_1D(
+ kg, state->rng_hash, state, j, num_samples, PRNG_SCATTER_DISTANCE);
+
+ VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
+ state,
+ ray,
+ sd,
+ &tp,
+ rphase,
+ rscatter,
+ segment,
+ (ls.t != FLT_MAX) ? &ls.P :
+ NULL,
+ false);
+
+ /* todo: split up light_sample so we don't have to call it again with new position */
+ if (result == VOLUME_PATH_SCATTERED &&
+ light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) {
+ if (kernel_data.integrator.num_all_lights)
+ ls.pdf *= 2.0f;
+
+ float terminate = path_branched_rng_light_termination(
+ kg, state->rng_hash, state, j, num_samples);
+ if (direct_emission(
+ kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
+ /* trace shadow ray */
+ float3 shadow;
+
+ if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) {
+ /* accumulate */
+ path_radiance_accum_light(
+ L, state, tp * num_samples_inv, &L_light, shadow, num_samples_inv, is_lamp);
+ }
+ }
+ }
+ }
+ }
+ }
+ else {
+ /* sample random position on random light */
+ float light_u, light_v;
+ path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v);
+
+ LightSample ls;
+ light_sample(kg, light_u, light_v, sd->time, ray->P, state->bounce, &ls);
+
+ float3 tp = throughput;
+
+ /* sample position on volume segment */
+ float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
+ float rscatter = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
+
+ VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
+ state,
+ ray,
+ sd,
+ &tp,
+ rphase,
+ rscatter,
+ segment,
+ (ls.t != FLT_MAX) ? &ls.P :
+ NULL,
+ false);
+
+ /* todo: split up light_sample so we don't have to call it again with new position */
+ if (result == VOLUME_PATH_SCATTERED &&
+ light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) {
+ /* sample random light */
+ float terminate = path_state_rng_light_termination(kg, state);
+ if (direct_emission(
+ kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
+ /* trace shadow ray */
+ float3 shadow;
+
+ if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) {
+ /* accumulate */
+ path_radiance_accum_light(L, state, tp, &L_light, shadow, 1.0f, is_lamp);
+ }
+ }
+ }
+ }
+# endif /* __EMISSION__ */
}
-#endif /* __SPLIT_KERNEL__ */
+# endif /* __SPLIT_KERNEL__ */
-#endif /* __VOLUME_SCATTER__ */
+#endif /* __VOLUME_SCATTER__ */
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_profiling.h b/intern/cycles/kernel/kernel_profiling.h
index a46d6376473..780830879d8 100644
--- a/intern/cycles/kernel/kernel_profiling.h
+++ b/intern/cycles/kernel/kernel_profiling.h
@@ -26,15 +26,21 @@ CCL_NAMESPACE_BEGIN
#ifdef __KERNEL_CPU__
# define PROFILING_INIT(kg, event) ProfilingHelper profiling_helper(&kg->profiler, event)
# define PROFILING_EVENT(event) profiling_helper.set_event(event)
-# define PROFILING_SHADER(shader) if((shader) != SHADER_NONE) { profiling_helper.set_shader((shader) & SHADER_MASK); }
-# define PROFILING_OBJECT(object) if((object) != PRIM_NONE) { profiling_helper.set_object(object); }
+# define PROFILING_SHADER(shader) \
+ if ((shader) != SHADER_NONE) { \
+ profiling_helper.set_shader((shader)&SHADER_MASK); \
+ }
+# define PROFILING_OBJECT(object) \
+ if ((object) != PRIM_NONE) { \
+ profiling_helper.set_object(object); \
+ }
#else
# define PROFILING_INIT(kg, event)
# define PROFILING_EVENT(event)
# define PROFILING_SHADER(shader)
# define PROFILING_OBJECT(object)
-#endif /* __KERNEL_CPU__ */
+#endif /* __KERNEL_CPU__ */
CCL_NAMESPACE_END
-#endif /* __KERNEL_PROFILING_H__ */
+#endif /* __KERNEL_PROFILING_H__ */
diff --git a/intern/cycles/kernel/kernel_projection.h b/intern/cycles/kernel/kernel_projection.h
index 7bad89c831c..f74ced45fd5 100644
--- a/intern/cycles/kernel/kernel_projection.h
+++ b/intern/cycles/kernel/kernel_projection.h
@@ -39,233 +39,223 @@ CCL_NAMESPACE_BEGIN
ccl_device float2 direction_to_spherical(float3 dir)
{
- float theta = safe_acosf(dir.z);
- float phi = atan2f(dir.x, dir.y);
+ float theta = safe_acosf(dir.z);
+ float phi = atan2f(dir.x, dir.y);
- return make_float2(theta, phi);
+ return make_float2(theta, phi);
}
ccl_device float3 spherical_to_direction(float theta, float phi)
{
- float sin_theta = sinf(theta);
- return make_float3(sin_theta*cosf(phi),
- sin_theta*sinf(phi),
- cosf(theta));
+ float sin_theta = sinf(theta);
+ return make_float3(sin_theta * cosf(phi), sin_theta * sinf(phi), cosf(theta));
}
/* Equirectangular coordinates <-> Cartesian direction */
ccl_device float2 direction_to_equirectangular_range(float3 dir, float4 range)
{
- if(is_zero(dir))
- return make_float2(0.0f, 0.0f);
+ if (is_zero(dir))
+ return make_float2(0.0f, 0.0f);
- float u = (atan2f(dir.y, dir.x) - range.y) / range.x;
- float v = (acosf(dir.z / len(dir)) - range.w) / range.z;
+ float u = (atan2f(dir.y, dir.x) - range.y) / range.x;
+ float v = (acosf(dir.z / len(dir)) - range.w) / range.z;
- return make_float2(u, v);
+ return make_float2(u, v);
}
ccl_device float3 equirectangular_range_to_direction(float u, float v, float4 range)
{
- float phi = range.x*u + range.y;
- float theta = range.z*v + range.w;
- float sin_theta = sinf(theta);
- return make_float3(sin_theta*cosf(phi),
- sin_theta*sinf(phi),
- cosf(theta));
+ float phi = range.x * u + range.y;
+ float theta = range.z * v + range.w;
+ float sin_theta = sinf(theta);
+ return make_float3(sin_theta * cosf(phi), sin_theta * sinf(phi), cosf(theta));
}
ccl_device float2 direction_to_equirectangular(float3 dir)
{
- return direction_to_equirectangular_range(dir, make_float4(-M_2PI_F, M_PI_F, -M_PI_F, M_PI_F));
+ return direction_to_equirectangular_range(dir, make_float4(-M_2PI_F, M_PI_F, -M_PI_F, M_PI_F));
}
ccl_device float3 equirectangular_to_direction(float u, float v)
{
- return equirectangular_range_to_direction(u, v, make_float4(-M_2PI_F, M_PI_F, -M_PI_F, M_PI_F));
+ return equirectangular_range_to_direction(u, v, make_float4(-M_2PI_F, M_PI_F, -M_PI_F, M_PI_F));
}
/* Fisheye <-> Cartesian direction */
ccl_device float2 direction_to_fisheye(float3 dir, float fov)
{
- float r = atan2f(sqrtf(dir.y*dir.y + dir.z*dir.z), dir.x) / fov;
- float phi = atan2f(dir.z, dir.y);
+ float r = atan2f(sqrtf(dir.y * dir.y + dir.z * dir.z), dir.x) / fov;
+ float phi = atan2f(dir.z, dir.y);
- float u = r * cosf(phi) + 0.5f;
- float v = r * sinf(phi) + 0.5f;
+ float u = r * cosf(phi) + 0.5f;
+ float v = r * sinf(phi) + 0.5f;
- return make_float2(u, v);
+ return make_float2(u, v);
}
ccl_device float3 fisheye_to_direction(float u, float v, float fov)
{
- u = (u - 0.5f) * 2.0f;
- v = (v - 0.5f) * 2.0f;
+ u = (u - 0.5f) * 2.0f;
+ v = (v - 0.5f) * 2.0f;
- float r = sqrtf(u*u + v*v);
+ float r = sqrtf(u * u + v * v);
- if(r > 1.0f)
- return make_float3(0.0f, 0.0f, 0.0f);
+ if (r > 1.0f)
+ return make_float3(0.0f, 0.0f, 0.0f);
- float phi = safe_acosf((r != 0.0f)? u/r: 0.0f);
- float theta = r * fov * 0.5f;
+ float phi = safe_acosf((r != 0.0f) ? u / r : 0.0f);
+ float theta = r * fov * 0.5f;
- if(v < 0.0f) phi = -phi;
+ if (v < 0.0f)
+ phi = -phi;
- return make_float3(
- cosf(theta),
- -cosf(phi)*sinf(theta),
- sinf(phi)*sinf(theta)
- );
+ return make_float3(cosf(theta), -cosf(phi) * sinf(theta), sinf(phi) * sinf(theta));
}
ccl_device float2 direction_to_fisheye_equisolid(float3 dir, float lens, float width, float height)
{
- float theta = safe_acosf(dir.x);
- float r = 2.0f * lens * sinf(theta * 0.5f);
- float phi = atan2f(dir.z, dir.y);
+ float theta = safe_acosf(dir.x);
+ float r = 2.0f * lens * sinf(theta * 0.5f);
+ float phi = atan2f(dir.z, dir.y);
- float u = r * cosf(phi) / width + 0.5f;
- float v = r * sinf(phi) / height + 0.5f;
+ float u = r * cosf(phi) / width + 0.5f;
+ float v = r * sinf(phi) / height + 0.5f;
- return make_float2(u, v);
+ return make_float2(u, v);
}
-ccl_device_inline float3 fisheye_equisolid_to_direction(float u, float v,
- float lens,
- float fov,
- float width, float height)
+ccl_device_inline float3
+fisheye_equisolid_to_direction(float u, float v, float lens, float fov, float width, float height)
{
- u = (u - 0.5f) * width;
- v = (v - 0.5f) * height;
+ u = (u - 0.5f) * width;
+ v = (v - 0.5f) * height;
- float rmax = 2.0f * lens * sinf(fov * 0.25f);
- float r = sqrtf(u*u + v*v);
+ float rmax = 2.0f * lens * sinf(fov * 0.25f);
+ float r = sqrtf(u * u + v * v);
- if(r > rmax)
- return make_float3(0.0f, 0.0f, 0.0f);
+ if (r > rmax)
+ return make_float3(0.0f, 0.0f, 0.0f);
- float phi = safe_acosf((r != 0.0f)? u/r: 0.0f);
- float theta = 2.0f * asinf(r/(2.0f * lens));
+ float phi = safe_acosf((r != 0.0f) ? u / r : 0.0f);
+ float theta = 2.0f * asinf(r / (2.0f * lens));
- if(v < 0.0f) phi = -phi;
+ if (v < 0.0f)
+ phi = -phi;
- return make_float3(
- cosf(theta),
- -cosf(phi)*sinf(theta),
- sinf(phi)*sinf(theta)
- );
+ return make_float3(cosf(theta), -cosf(phi) * sinf(theta), sinf(phi) * sinf(theta));
}
/* Mirror Ball <-> Cartesion direction */
ccl_device float3 mirrorball_to_direction(float u, float v)
{
- /* point on sphere */
- float3 dir;
+ /* point on sphere */
+ float3 dir;
- dir.x = 2.0f*u - 1.0f;
- dir.z = 2.0f*v - 1.0f;
+ dir.x = 2.0f * u - 1.0f;
+ dir.z = 2.0f * v - 1.0f;
- if(dir.x*dir.x + dir.z*dir.z > 1.0f)
- return make_float3(0.0f, 0.0f, 0.0f);
+ if (dir.x * dir.x + dir.z * dir.z > 1.0f)
+ return make_float3(0.0f, 0.0f, 0.0f);
- dir.y = -sqrtf(max(1.0f - dir.x*dir.x - dir.z*dir.z, 0.0f));
+ dir.y = -sqrtf(max(1.0f - dir.x * dir.x - dir.z * dir.z, 0.0f));
- /* reflection */
- float3 I = make_float3(0.0f, -1.0f, 0.0f);
+ /* reflection */
+ float3 I = make_float3(0.0f, -1.0f, 0.0f);
- return 2.0f*dot(dir, I)*dir - I;
+ return 2.0f * dot(dir, I) * dir - I;
}
ccl_device float2 direction_to_mirrorball(float3 dir)
{
- /* inverse of mirrorball_to_direction */
- dir.y -= 1.0f;
+ /* inverse of mirrorball_to_direction */
+ dir.y -= 1.0f;
- float div = 2.0f*sqrtf(max(-0.5f*dir.y, 0.0f));
- if(div > 0.0f)
- dir /= div;
+ float div = 2.0f * sqrtf(max(-0.5f * dir.y, 0.0f));
+ if (div > 0.0f)
+ dir /= div;
- float u = 0.5f*(dir.x + 1.0f);
- float v = 0.5f*(dir.z + 1.0f);
+ float u = 0.5f * (dir.x + 1.0f);
+ float v = 0.5f * (dir.z + 1.0f);
- return make_float2(u, v);
+ return make_float2(u, v);
}
ccl_device_inline float3 panorama_to_direction(ccl_constant KernelCamera *cam, float u, float v)
{
- switch(cam->panorama_type) {
- case PANORAMA_EQUIRECTANGULAR:
- return equirectangular_range_to_direction(u, v, cam->equirectangular_range);
- case PANORAMA_MIRRORBALL:
- return mirrorball_to_direction(u, v);
- case PANORAMA_FISHEYE_EQUIDISTANT:
- return fisheye_to_direction(u, v, cam->fisheye_fov);
- case PANORAMA_FISHEYE_EQUISOLID:
- default:
- return fisheye_equisolid_to_direction(u, v, cam->fisheye_lens,
- cam->fisheye_fov, cam->sensorwidth, cam->sensorheight);
- }
+ switch (cam->panorama_type) {
+ case PANORAMA_EQUIRECTANGULAR:
+ return equirectangular_range_to_direction(u, v, cam->equirectangular_range);
+ case PANORAMA_MIRRORBALL:
+ return mirrorball_to_direction(u, v);
+ case PANORAMA_FISHEYE_EQUIDISTANT:
+ return fisheye_to_direction(u, v, cam->fisheye_fov);
+ case PANORAMA_FISHEYE_EQUISOLID:
+ default:
+ return fisheye_equisolid_to_direction(
+ u, v, cam->fisheye_lens, cam->fisheye_fov, cam->sensorwidth, cam->sensorheight);
+ }
}
ccl_device_inline float2 direction_to_panorama(ccl_constant KernelCamera *cam, float3 dir)
{
- switch(cam->panorama_type) {
- case PANORAMA_EQUIRECTANGULAR:
- return direction_to_equirectangular_range(dir, cam->equirectangular_range);
- case PANORAMA_MIRRORBALL:
- return direction_to_mirrorball(dir);
- case PANORAMA_FISHEYE_EQUIDISTANT:
- return direction_to_fisheye(dir, cam->fisheye_fov);
- case PANORAMA_FISHEYE_EQUISOLID:
- default:
- return direction_to_fisheye_equisolid(dir, cam->fisheye_lens,
- cam->sensorwidth, cam->sensorheight);
- }
+ switch (cam->panorama_type) {
+ case PANORAMA_EQUIRECTANGULAR:
+ return direction_to_equirectangular_range(dir, cam->equirectangular_range);
+ case PANORAMA_MIRRORBALL:
+ return direction_to_mirrorball(dir);
+ case PANORAMA_FISHEYE_EQUIDISTANT:
+ return direction_to_fisheye(dir, cam->fisheye_fov);
+ case PANORAMA_FISHEYE_EQUISOLID:
+ default:
+ return direction_to_fisheye_equisolid(
+ dir, cam->fisheye_lens, cam->sensorwidth, cam->sensorheight);
+ }
}
-ccl_device_inline void spherical_stereo_transform(ccl_constant KernelCamera *cam, float3 *P, float3 *D)
+ccl_device_inline void spherical_stereo_transform(ccl_constant KernelCamera *cam,
+ float3 *P,
+ float3 *D)
{
- float interocular_offset = cam->interocular_offset;
-
- /* Interocular offset of zero means either non stereo, or stereo without
- * spherical stereo. */
- kernel_assert(interocular_offset != 0.0f);
-
- if(cam->pole_merge_angle_to > 0.0f) {
- const float pole_merge_angle_from = cam->pole_merge_angle_from,
- pole_merge_angle_to = cam->pole_merge_angle_to;
- float altitude = fabsf(safe_asinf((*D).z));
- if(altitude > pole_merge_angle_to) {
- interocular_offset = 0.0f;
- }
- else if(altitude > pole_merge_angle_from) {
- float fac = (altitude - pole_merge_angle_from) / (pole_merge_angle_to - pole_merge_angle_from);
- float fade = cosf(fac * M_PI_2_F);
- interocular_offset *= fade;
- }
- }
-
- float3 up = make_float3(0.0f, 0.0f, 1.0f);
- float3 side = normalize(cross(*D, up));
- float3 stereo_offset = side * interocular_offset;
-
- *P += stereo_offset;
-
- /* Convergence distance is FLT_MAX in the case of parallel convergence mode,
- * no need to modify direction in this case either. */
- const float convergence_distance = cam->convergence_distance;
-
- if(convergence_distance != FLT_MAX)
- {
- float3 screen_offset = convergence_distance * (*D);
- *D = normalize(screen_offset - stereo_offset);
- }
+ float interocular_offset = cam->interocular_offset;
+
+ /* Interocular offset of zero means either non stereo, or stereo without
+ * spherical stereo. */
+ kernel_assert(interocular_offset != 0.0f);
+
+ if (cam->pole_merge_angle_to > 0.0f) {
+ const float pole_merge_angle_from = cam->pole_merge_angle_from,
+ pole_merge_angle_to = cam->pole_merge_angle_to;
+ float altitude = fabsf(safe_asinf((*D).z));
+ if (altitude > pole_merge_angle_to) {
+ interocular_offset = 0.0f;
+ }
+ else if (altitude > pole_merge_angle_from) {
+ float fac = (altitude - pole_merge_angle_from) /
+ (pole_merge_angle_to - pole_merge_angle_from);
+ float fade = cosf(fac * M_PI_2_F);
+ interocular_offset *= fade;
+ }
+ }
+
+ float3 up = make_float3(0.0f, 0.0f, 1.0f);
+ float3 side = normalize(cross(*D, up));
+ float3 stereo_offset = side * interocular_offset;
+
+ *P += stereo_offset;
+
+ /* Convergence distance is FLT_MAX in the case of parallel convergence mode,
+ * no need to modify direction in this case either. */
+ const float convergence_distance = cam->convergence_distance;
+
+ if (convergence_distance != FLT_MAX) {
+ float3 screen_offset = convergence_distance * (*D);
+ *D = normalize(screen_offset - stereo_offset);
+ }
}
CCL_NAMESPACE_END
-#endif /* __KERNEL_PROJECTION_CL__ */
+#endif /* __KERNEL_PROJECTION_CL__ */
diff --git a/intern/cycles/kernel/kernel_queues.h b/intern/cycles/kernel/kernel_queues.h
index de8cc4a0cef..91a39fc1465 100644
--- a/intern/cycles/kernel/kernel_queues.h
+++ b/intern/cycles/kernel/kernel_queues.h
@@ -23,24 +23,24 @@ CCL_NAMESPACE_BEGIN
* Queue utility functions for split kernel
*/
#ifdef __KERNEL_OPENCL__
-#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
-#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
+# pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
+# pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
#endif
/*
* Enqueue ray index into the queue
*/
ccl_device void enqueue_ray_index(
- int ray_index, /* Ray index to be enqueued. */
- int queue_number, /* Queue in which the ray index should be enqueued. */
- ccl_global int *queues, /* Buffer of all queues. */
- int queue_size, /* Size of each queue. */
- ccl_global int *queue_index) /* Array of size num_queues; Used for atomic increment. */
+ int ray_index, /* Ray index to be enqueued. */
+ int queue_number, /* Queue in which the ray index should be enqueued. */
+ ccl_global int *queues, /* Buffer of all queues. */
+ int queue_size, /* Size of each queue. */
+ ccl_global int *queue_index) /* Array of size num_queues; Used for atomic increment. */
{
- /* This thread's queue index. */
- int my_queue_index = atomic_fetch_and_inc_uint32((ccl_global uint*)&queue_index[queue_number])
- + (queue_number * queue_size);
- queues[my_queue_index] = ray_index;
+ /* This thread's queue index. */
+ int my_queue_index = atomic_fetch_and_inc_uint32((ccl_global uint *)&queue_index[queue_number]) +
+ (queue_number * queue_size);
+ queues[my_queue_index] = ray_index;
}
/*
@@ -51,96 +51,95 @@ ccl_device void enqueue_ray_index(
* is no more ray to allocate to other threads.
*/
ccl_device int get_ray_index(
- KernelGlobals *kg,
- int thread_index, /* Global thread index. */
- int queue_number, /* Queue to operate on. */
- ccl_global int *queues, /* Buffer of all queues. */
- int queuesize, /* Size of a queue. */
- int empty_queue) /* Empty the queue slot as soon as we fetch the ray index. */
+ KernelGlobals *kg,
+ int thread_index, /* Global thread index. */
+ int queue_number, /* Queue to operate on. */
+ ccl_global int *queues, /* Buffer of all queues. */
+ int queuesize, /* Size of a queue. */
+ int empty_queue) /* Empty the queue slot as soon as we fetch the ray index. */
{
- int ray_index = queues[queue_number * queuesize + thread_index];
- if(empty_queue && ray_index != QUEUE_EMPTY_SLOT) {
- queues[queue_number * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
- }
- return ray_index;
+ int ray_index = queues[queue_number * queuesize + thread_index];
+ if (empty_queue && ray_index != QUEUE_EMPTY_SLOT) {
+ queues[queue_number * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
+ }
+ return ray_index;
}
/* The following functions are to realize Local memory variant of enqueue ray index function. */
/* All threads should call this function. */
ccl_device void enqueue_ray_index_local(
- int ray_index, /* Ray index to enqueue. */
- int queue_number, /* Queue in which to enqueue ray index. */
- char enqueue_flag, /* True for threads whose ray index has to be enqueued. */
- int queuesize, /* queue size. */
- ccl_local_param unsigned int *local_queue_atomics, /* To to local queue atomics. */
- ccl_global int *Queue_data, /* Queues. */
- ccl_global int *Queue_index) /* To do global queue atomics. */
+ int ray_index, /* Ray index to enqueue. */
+ int queue_number, /* Queue in which to enqueue ray index. */
+ char enqueue_flag, /* True for threads whose ray index has to be enqueued. */
+ int queuesize, /* queue size. */
+ ccl_local_param unsigned int *local_queue_atomics, /* To to local queue atomics. */
+ ccl_global int *Queue_data, /* Queues. */
+ ccl_global int *Queue_index) /* To do global queue atomics. */
{
- int lidx = ccl_local_id(1) * ccl_local_size(0) + ccl_local_id(0);
-
- /* Get local queue id .*/
- unsigned int lqidx;
- if(enqueue_flag) {
- lqidx = atomic_fetch_and_inc_uint32(local_queue_atomics);
- }
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
-
- /* Get global queue offset. */
- if(lidx == 0) {
- *local_queue_atomics = atomic_fetch_and_add_uint32((ccl_global uint*)&Queue_index[queue_number],
- *local_queue_atomics);
- }
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
-
- /* Get global queue index and enqueue ray. */
- if(enqueue_flag) {
- unsigned int my_gqidx = queue_number * queuesize + (*local_queue_atomics) + lqidx;
- Queue_data[my_gqidx] = ray_index;
- }
+ int lidx = ccl_local_id(1) * ccl_local_size(0) + ccl_local_id(0);
+
+ /* Get local queue id .*/
+ unsigned int lqidx;
+ if (enqueue_flag) {
+ lqidx = atomic_fetch_and_inc_uint32(local_queue_atomics);
+ }
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+ /* Get global queue offset. */
+ if (lidx == 0) {
+ *local_queue_atomics = atomic_fetch_and_add_uint32(
+ (ccl_global uint *)&Queue_index[queue_number], *local_queue_atomics);
+ }
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+ /* Get global queue index and enqueue ray. */
+ if (enqueue_flag) {
+ unsigned int my_gqidx = queue_number * queuesize + (*local_queue_atomics) + lqidx;
+ Queue_data[my_gqidx] = ray_index;
+ }
}
ccl_device unsigned int get_local_queue_index(
- int queue_number, /* Queue in which to enqueue the ray; -1 if no queue */
- ccl_local_param unsigned int *local_queue_atomics)
+ int queue_number, /* Queue in which to enqueue the ray; -1 if no queue */
+ ccl_local_param unsigned int *local_queue_atomics)
{
- int my_lqidx = atomic_fetch_and_inc_uint32(&local_queue_atomics[queue_number]);
- return my_lqidx;
+ int my_lqidx = atomic_fetch_and_inc_uint32(&local_queue_atomics[queue_number]);
+ return my_lqidx;
}
ccl_device unsigned int get_global_per_queue_offset(
- int queue_number,
- ccl_local_param unsigned int *local_queue_atomics,
- ccl_global int* global_queue_atomics)
+ int queue_number,
+ ccl_local_param unsigned int *local_queue_atomics,
+ ccl_global int *global_queue_atomics)
{
- unsigned int queue_offset = atomic_fetch_and_add_uint32((ccl_global uint*)&global_queue_atomics[queue_number],
- local_queue_atomics[queue_number]);
- return queue_offset;
+ unsigned int queue_offset = atomic_fetch_and_add_uint32(
+ (ccl_global uint *)&global_queue_atomics[queue_number], local_queue_atomics[queue_number]);
+ return queue_offset;
}
ccl_device unsigned int get_global_queue_index(
int queue_number,
int queuesize,
unsigned int lqidx,
- ccl_local_param unsigned int * global_per_queue_offset)
+ ccl_local_param unsigned int *global_per_queue_offset)
{
- int my_gqidx = queuesize * queue_number + lqidx + global_per_queue_offset[queue_number];
- return my_gqidx;
+ int my_gqidx = queuesize * queue_number + lqidx + global_per_queue_offset[queue_number];
+ return my_gqidx;
}
-ccl_device int dequeue_ray_index(
- int queue_number,
- ccl_global int *queues,
- int queue_size,
- ccl_global int *queue_index)
+ccl_device int dequeue_ray_index(int queue_number,
+ ccl_global int *queues,
+ int queue_size,
+ ccl_global int *queue_index)
{
- int index = atomic_fetch_and_dec_uint32((ccl_global uint*)&queue_index[queue_number])-1;
+ int index = atomic_fetch_and_dec_uint32((ccl_global uint *)&queue_index[queue_number]) - 1;
- if(index < 0) {
- return QUEUE_EMPTY_SLOT;
- }
+ if (index < 0) {
+ return QUEUE_EMPTY_SLOT;
+ }
- return queues[index + queue_number * queue_size];
+ return queues[index + queue_number * queue_size];
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_random.h b/intern/cycles/kernel/kernel_random.h
index 61ddf4a4f81..6779c1f7160 100644
--- a/intern/cycles/kernel/kernel_random.h
+++ b/intern/cycles/kernel/kernel_random.h
@@ -23,7 +23,6 @@ CCL_NAMESPACE_BEGIN
* this single threaded on a CPU for repeatable results. */
//#define __DEBUG_CORRELATION__
-
/* High Dimensional Sobol.
*
* Multidimensional sobol with generator matrices. Dimension 0 and 1 are equal
@@ -36,136 +35,138 @@ CCL_NAMESPACE_BEGIN
* progressive pattern that doesn't suffer from this problem, because even
* with this offset some dimensions are quite poor.
*/
-#define SOBOL_SKIP 64
+# define SOBOL_SKIP 64
ccl_device uint sobol_dimension(KernelGlobals *kg, int index, int dimension)
{
- uint result = 0;
- uint i = index + SOBOL_SKIP;
- for(uint j = 0; i; i >>= 1, j++) {
- if(i & 1) {
- result ^= kernel_tex_fetch(__sobol_directions, 32*dimension + j);
- }
- }
- return result;
+ uint result = 0;
+ uint i = index + SOBOL_SKIP;
+ for (uint j = 0; i; i >>= 1, j++) {
+ if (i & 1) {
+ result ^= kernel_tex_fetch(__sobol_directions, 32 * dimension + j);
+ }
+ }
+ return result;
}
-#endif /* __SOBOL__ */
-
+#endif /* __SOBOL__ */
-ccl_device_forceinline float path_rng_1D(KernelGlobals *kg,
- uint rng_hash,
- int sample, int num_samples,
- int dimension)
+ccl_device_forceinline float path_rng_1D(
+ KernelGlobals *kg, uint rng_hash, int sample, int num_samples, int dimension)
{
#ifdef __DEBUG_CORRELATION__
- return (float)drand48();
+ return (float)drand48();
#endif
#ifdef __CMJ__
# ifdef __SOBOL__
- if(kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ)
+ if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ)
# endif
- {
- /* Correlated multi-jitter. */
- int p = rng_hash + dimension;
- return cmj_sample_1D(sample, num_samples, p);
- }
+ {
+ /* Correlated multi-jitter. */
+ int p = rng_hash + dimension;
+ return cmj_sample_1D(sample, num_samples, p);
+ }
#endif
#ifdef __SOBOL__
- /* Sobol sequence value using direction vectors. */
- uint result = sobol_dimension(kg, sample, dimension);
- float r = (float)result * (1.0f/(float)0xFFFFFFFF);
+ /* Sobol sequence value using direction vectors. */
+ uint result = sobol_dimension(kg, sample, dimension);
+ float r = (float)result * (1.0f / (float)0xFFFFFFFF);
- /* Cranly-Patterson rotation using rng seed */
- float shift;
+ /* Cranly-Patterson rotation using rng seed */
+ float shift;
- /* Hash rng with dimension to solve correlation issues.
- * See T38710, T50116.
- */
- uint tmp_rng = cmj_hash_simple(dimension, rng_hash);
- shift = tmp_rng * (1.0f/(float)0xFFFFFFFF);
+ /* Hash rng with dimension to solve correlation issues.
+ * See T38710, T50116.
+ */
+ uint tmp_rng = cmj_hash_simple(dimension, rng_hash);
+ shift = tmp_rng * (1.0f / (float)0xFFFFFFFF);
- return r + shift - floorf(r + shift);
+ return r + shift - floorf(r + shift);
#endif
}
ccl_device_forceinline void path_rng_2D(KernelGlobals *kg,
uint rng_hash,
- int sample, int num_samples,
+ int sample,
+ int num_samples,
int dimension,
- float *fx, float *fy)
+ float *fx,
+ float *fy)
{
#ifdef __DEBUG_CORRELATION__
- *fx = (float)drand48();
- *fy = (float)drand48();
- return;
+ *fx = (float)drand48();
+ *fy = (float)drand48();
+ return;
#endif
#ifdef __CMJ__
# ifdef __SOBOL__
- if(kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ)
+ if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ)
# endif
- {
- /* Correlated multi-jitter. */
- int p = rng_hash + dimension;
- cmj_sample_2D(sample, num_samples, p, fx, fy);
- return;
- }
+ {
+ /* Correlated multi-jitter. */
+ int p = rng_hash + dimension;
+ cmj_sample_2D(sample, num_samples, p, fx, fy);
+ return;
+ }
#endif
#ifdef __SOBOL__
- /* Sobol. */
- *fx = path_rng_1D(kg, rng_hash, sample, num_samples, dimension);
- *fy = path_rng_1D(kg, rng_hash, sample, num_samples, dimension + 1);
+ /* Sobol. */
+ *fx = path_rng_1D(kg, rng_hash, sample, num_samples, dimension);
+ *fy = path_rng_1D(kg, rng_hash, sample, num_samples, dimension + 1);
#endif
}
ccl_device_inline void path_rng_init(KernelGlobals *kg,
- int sample, int num_samples,
+ int sample,
+ int num_samples,
uint *rng_hash,
- int x, int y,
- float *fx, float *fy)
+ int x,
+ int y,
+ float *fx,
+ float *fy)
{
- /* load state */
- *rng_hash = hash_int_2d(x, y);
- *rng_hash ^= kernel_data.integrator.seed;
+ /* load state */
+ *rng_hash = hash_int_2d(x, y);
+ *rng_hash ^= kernel_data.integrator.seed;
#ifdef __DEBUG_CORRELATION__
- srand48(*rng_hash + sample);
+ srand48(*rng_hash + sample);
#endif
- if(sample == 0) {
- *fx = 0.5f;
- *fy = 0.5f;
- }
- else {
- path_rng_2D(kg, *rng_hash, sample, num_samples, PRNG_FILTER_U, fx, fy);
- }
+ if (sample == 0) {
+ *fx = 0.5f;
+ *fy = 0.5f;
+ }
+ else {
+ path_rng_2D(kg, *rng_hash, sample, num_samples, PRNG_FILTER_U, fx, fy);
+ }
}
/* Linear Congruential Generator */
ccl_device uint lcg_step_uint(uint *rng)
{
- /* implicit mod 2^32 */
- *rng = (1103515245*(*rng) + 12345);
- return *rng;
+ /* implicit mod 2^32 */
+ *rng = (1103515245 * (*rng) + 12345);
+ return *rng;
}
ccl_device float lcg_step_float(uint *rng)
{
- /* implicit mod 2^32 */
- *rng = (1103515245*(*rng) + 12345);
- return (float)*rng * (1.0f/(float)0xFFFFFFFF);
+ /* implicit mod 2^32 */
+ *rng = (1103515245 * (*rng) + 12345);
+ return (float)*rng * (1.0f / (float)0xFFFFFFFF);
}
ccl_device uint lcg_init(uint seed)
{
- uint rng = seed;
- lcg_step_uint(&rng);
- return rng;
+ uint rng = seed;
+ lcg_step_uint(&rng);
+ return rng;
}
/* Path Tracing Utility Functions
@@ -181,118 +182,107 @@ ccl_device_inline float path_state_rng_1D(KernelGlobals *kg,
const ccl_addr_space PathState *state,
int dimension)
{
- return path_rng_1D(kg,
- state->rng_hash,
- state->sample, state->num_samples,
- state->rng_offset + dimension);
+ return path_rng_1D(
+ kg, state->rng_hash, state->sample, state->num_samples, state->rng_offset + dimension);
}
-ccl_device_inline void path_state_rng_2D(KernelGlobals *kg,
- const ccl_addr_space PathState *state,
- int dimension,
- float *fx, float *fy)
+ccl_device_inline void path_state_rng_2D(
+ KernelGlobals *kg, const ccl_addr_space PathState *state, int dimension, float *fx, float *fy)
{
- path_rng_2D(kg,
- state->rng_hash,
- state->sample, state->num_samples,
- state->rng_offset + dimension,
- fx, fy);
+ path_rng_2D(kg,
+ state->rng_hash,
+ state->sample,
+ state->num_samples,
+ state->rng_offset + dimension,
+ fx,
+ fy);
}
ccl_device_inline float path_state_rng_1D_hash(KernelGlobals *kg,
- const ccl_addr_space PathState *state,
- uint hash)
+ const ccl_addr_space PathState *state,
+ uint hash)
{
- /* Use a hash instead of dimension, this is not great but avoids adding
- * more dimensions to each bounce which reduces quality of dimensions we
- * are already using. */
- return path_rng_1D(kg,
- cmj_hash_simple(state->rng_hash, hash),
- state->sample, state->num_samples,
- state->rng_offset);
+ /* Use a hash instead of dimension, this is not great but avoids adding
+ * more dimensions to each bounce which reduces quality of dimensions we
+ * are already using. */
+ return path_rng_1D(kg,
+ cmj_hash_simple(state->rng_hash, hash),
+ state->sample,
+ state->num_samples,
+ state->rng_offset);
}
-ccl_device_inline float path_branched_rng_1D(
- KernelGlobals *kg,
- uint rng_hash,
- const ccl_addr_space PathState *state,
- int branch,
- int num_branches,
- int dimension)
+ccl_device_inline float path_branched_rng_1D(KernelGlobals *kg,
+ uint rng_hash,
+ const ccl_addr_space PathState *state,
+ int branch,
+ int num_branches,
+ int dimension)
{
- return path_rng_1D(kg,
- rng_hash,
- state->sample * num_branches + branch,
- state->num_samples * num_branches,
- state->rng_offset + dimension);
+ return path_rng_1D(kg,
+ rng_hash,
+ state->sample * num_branches + branch,
+ state->num_samples * num_branches,
+ state->rng_offset + dimension);
}
-ccl_device_inline void path_branched_rng_2D(
- KernelGlobals *kg,
- uint rng_hash,
- const ccl_addr_space PathState *state,
- int branch,
- int num_branches,
- int dimension,
- float *fx, float *fy)
+ccl_device_inline void path_branched_rng_2D(KernelGlobals *kg,
+ uint rng_hash,
+ const ccl_addr_space PathState *state,
+ int branch,
+ int num_branches,
+ int dimension,
+ float *fx,
+ float *fy)
{
- path_rng_2D(kg,
- rng_hash,
- state->sample * num_branches + branch,
- state->num_samples * num_branches,
- state->rng_offset + dimension,
- fx, fy);
+ path_rng_2D(kg,
+ rng_hash,
+ state->sample * num_branches + branch,
+ state->num_samples * num_branches,
+ state->rng_offset + dimension,
+ fx,
+ fy);
}
/* Utitility functions to get light termination value,
* since it might not be needed in many cases.
*/
-ccl_device_inline float path_state_rng_light_termination(
- KernelGlobals *kg,
- const ccl_addr_space PathState *state)
+ccl_device_inline float path_state_rng_light_termination(KernelGlobals *kg,
+ const ccl_addr_space PathState *state)
{
- if(kernel_data.integrator.light_inv_rr_threshold > 0.0f) {
- return path_state_rng_1D(kg, state, PRNG_LIGHT_TERMINATE);
- }
- return 0.0f;
+ if (kernel_data.integrator.light_inv_rr_threshold > 0.0f) {
+ return path_state_rng_1D(kg, state, PRNG_LIGHT_TERMINATE);
+ }
+ return 0.0f;
}
-ccl_device_inline float path_branched_rng_light_termination(
- KernelGlobals *kg,
- uint rng_hash,
- const ccl_addr_space PathState *state,
- int branch,
- int num_branches)
+ccl_device_inline float path_branched_rng_light_termination(KernelGlobals *kg,
+ uint rng_hash,
+ const ccl_addr_space PathState *state,
+ int branch,
+ int num_branches)
{
- if(kernel_data.integrator.light_inv_rr_threshold > 0.0f) {
- return path_branched_rng_1D(kg,
- rng_hash,
- state,
- branch,
- num_branches,
- PRNG_LIGHT_TERMINATE);
- }
- return 0.0f;
+ if (kernel_data.integrator.light_inv_rr_threshold > 0.0f) {
+ return path_branched_rng_1D(kg, rng_hash, state, branch, num_branches, PRNG_LIGHT_TERMINATE);
+ }
+ return 0.0f;
}
-ccl_device_inline uint lcg_state_init(PathState *state,
- uint scramble)
+ccl_device_inline uint lcg_state_init(PathState *state, uint scramble)
{
- return lcg_init(state->rng_hash + state->rng_offset + state->sample*scramble);
+ return lcg_init(state->rng_hash + state->rng_offset + state->sample * scramble);
}
-ccl_device_inline uint lcg_state_init_addrspace(ccl_addr_space PathState *state,
- uint scramble)
+ccl_device_inline uint lcg_state_init_addrspace(ccl_addr_space PathState *state, uint scramble)
{
- return lcg_init(state->rng_hash + state->rng_offset + state->sample*scramble);
+ return lcg_init(state->rng_hash + state->rng_offset + state->sample * scramble);
}
-
ccl_device float lcg_step_float_addrspace(ccl_addr_space uint *rng)
{
- /* Implicit mod 2^32 */
- *rng = (1103515245*(*rng) + 12345);
- return (float)*rng * (1.0f/(float)0xFFFFFFFF);
+ /* Implicit mod 2^32 */
+ *rng = (1103515245 * (*rng) + 12345);
+ return (float)*rng * (1.0f / (float)0xFFFFFFFF);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h
index b1da523501d..351b623addb 100644
--- a/intern/cycles/kernel/kernel_shader.h
+++ b/intern/cycles/kernel/kernel_shader.h
@@ -37,14 +37,14 @@ CCL_NAMESPACE_BEGIN
#ifdef __OBJECT_MOTION__
ccl_device void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd, float time)
{
- if(sd->object_flag & SD_OBJECT_MOTION) {
- sd->ob_tfm = object_fetch_transform_motion(kg, sd->object, time);
- sd->ob_itfm = transform_quick_inverse(sd->ob_tfm);
- }
- else {
- sd->ob_tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
- sd->ob_itfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
- }
+ if (sd->object_flag & SD_OBJECT_MOTION) {
+ sd->ob_tfm = object_fetch_transform_motion(kg, sd->object, time);
+ sd->ob_itfm = transform_quick_inverse(sd->ob_tfm);
+ }
+ else {
+ sd->ob_tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
+ sd->ob_itfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
+ }
}
#endif
@@ -53,104 +53,104 @@ ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg,
const Intersection *isect,
const Ray *ray)
{
- PROFILING_INIT(kg, PROFILING_SHADER_SETUP);
+ PROFILING_INIT(kg, PROFILING_SHADER_SETUP);
#ifdef __INSTANCING__
- sd->object = (isect->object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, isect->prim): isect->object;
+ sd->object = (isect->object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, isect->prim) :
+ isect->object;
#endif
- sd->lamp = LAMP_NONE;
+ sd->lamp = LAMP_NONE;
- sd->type = isect->type;
- sd->flag = 0;
- sd->object_flag = kernel_tex_fetch(__object_flag,
- sd->object);
+ sd->type = isect->type;
+ sd->flag = 0;
+ sd->object_flag = kernel_tex_fetch(__object_flag, sd->object);
- /* matrices and time */
+ /* matrices and time */
#ifdef __OBJECT_MOTION__
- shader_setup_object_transforms(kg, sd, ray->time);
+ shader_setup_object_transforms(kg, sd, ray->time);
#endif
- sd->time = ray->time;
+ sd->time = ray->time;
- sd->prim = kernel_tex_fetch(__prim_index, isect->prim);
- sd->ray_length = isect->t;
+ sd->prim = kernel_tex_fetch(__prim_index, isect->prim);
+ sd->ray_length = isect->t;
#ifdef __UV__
- sd->u = isect->u;
- sd->v = isect->v;
+ sd->u = isect->u;
+ sd->v = isect->v;
#endif
#ifdef __HAIR__
- if(sd->type & PRIMITIVE_ALL_CURVE) {
- /* curve */
- float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
-
- sd->shader = __float_as_int(curvedata.z);
- sd->P = curve_refine(kg, sd, isect, ray);
- }
- else
+ if (sd->type & PRIMITIVE_ALL_CURVE) {
+ /* curve */
+ float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
+
+ sd->shader = __float_as_int(curvedata.z);
+ sd->P = curve_refine(kg, sd, isect, ray);
+ }
+ else
#endif
- if(sd->type & PRIMITIVE_TRIANGLE) {
- /* static triangle */
- float3 Ng = triangle_normal(kg, sd);
- sd->shader = kernel_tex_fetch(__tri_shader, sd->prim);
+ if (sd->type & PRIMITIVE_TRIANGLE) {
+ /* static triangle */
+ float3 Ng = triangle_normal(kg, sd);
+ sd->shader = kernel_tex_fetch(__tri_shader, sd->prim);
- /* vectors */
- sd->P = triangle_refine(kg, sd, isect, ray);
- sd->Ng = Ng;
- sd->N = Ng;
+ /* vectors */
+ sd->P = triangle_refine(kg, sd, isect, ray);
+ sd->Ng = Ng;
+ sd->N = Ng;
- /* smooth normal */
- if(sd->shader & SHADER_SMOOTH_NORMAL)
- sd->N = triangle_smooth_normal(kg, Ng, sd->prim, sd->u, sd->v);
+ /* smooth normal */
+ if (sd->shader & SHADER_SMOOTH_NORMAL)
+ sd->N = triangle_smooth_normal(kg, Ng, sd->prim, sd->u, sd->v);
#ifdef __DPDU__
- /* dPdu/dPdv */
- triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv);
+ /* dPdu/dPdv */
+ triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv);
#endif
- }
- else {
- /* motion triangle */
- motion_triangle_shader_setup(kg, sd, isect, ray, false);
- }
+ }
+ else {
+ /* motion triangle */
+ motion_triangle_shader_setup(kg, sd, isect, ray, false);
+ }
- sd->I = -ray->D;
+ sd->I = -ray->D;
- sd->flag |= kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
+ sd->flag |= kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
#ifdef __INSTANCING__
- if(isect->object != OBJECT_NONE) {
- /* instance transform */
- object_normal_transform_auto(kg, sd, &sd->N);
- object_normal_transform_auto(kg, sd, &sd->Ng);
+ if (isect->object != OBJECT_NONE) {
+ /* instance transform */
+ object_normal_transform_auto(kg, sd, &sd->N);
+ object_normal_transform_auto(kg, sd, &sd->Ng);
# ifdef __DPDU__
- object_dir_transform_auto(kg, sd, &sd->dPdu);
- object_dir_transform_auto(kg, sd, &sd->dPdv);
+ object_dir_transform_auto(kg, sd, &sd->dPdu);
+ object_dir_transform_auto(kg, sd, &sd->dPdv);
# endif
- }
+ }
#endif
- /* backfacing test */
- bool backfacing = (dot(sd->Ng, sd->I) < 0.0f);
+ /* backfacing test */
+ bool backfacing = (dot(sd->Ng, sd->I) < 0.0f);
- if(backfacing) {
- sd->flag |= SD_BACKFACING;
- sd->Ng = -sd->Ng;
- sd->N = -sd->N;
+ if (backfacing) {
+ sd->flag |= SD_BACKFACING;
+ sd->Ng = -sd->Ng;
+ sd->N = -sd->N;
#ifdef __DPDU__
- sd->dPdu = -sd->dPdu;
- sd->dPdv = -sd->dPdv;
+ sd->dPdu = -sd->dPdu;
+ sd->dPdv = -sd->dPdv;
#endif
- }
+ }
#ifdef __RAY_DIFFERENTIALS__
- /* differentials */
- differential_transfer(&sd->dP, ray->dP, ray->D, ray->dD, sd->Ng, isect->t);
- differential_incoming(&sd->dI, ray->dD);
- differential_dudv(&sd->du, &sd->dv, sd->dPdu, sd->dPdv, sd->dP, sd->Ng);
+ /* differentials */
+ differential_transfer(&sd->dP, ray->dP, ray->D, ray->dD, sd->Ng, isect->t);
+ differential_incoming(&sd->dI, ray->dD);
+ differential_dudv(&sd->du, &sd->dv, sd->dPdu, sd->dPdv, sd->dP, sd->Ng);
#endif
- PROFILING_SHADER(sd->shader);
- PROFILING_OBJECT(sd->object);
+ PROFILING_SHADER(sd->shader);
+ PROFILING_OBJECT(sd->object);
}
/* ShaderData setup from BSSRDF scatter */
@@ -161,86 +161,86 @@ ccl_device
# else
ccl_device_inline
# endif
-void shader_setup_from_subsurface(
- KernelGlobals *kg,
- ShaderData *sd,
- const Intersection *isect,
- const Ray *ray)
+ void
+ shader_setup_from_subsurface(KernelGlobals *kg,
+ ShaderData *sd,
+ const Intersection *isect,
+ const Ray *ray)
{
- PROFILING_INIT(kg, PROFILING_SHADER_SETUP);
+ PROFILING_INIT(kg, PROFILING_SHADER_SETUP);
- const bool backfacing = sd->flag & SD_BACKFACING;
+ const bool backfacing = sd->flag & SD_BACKFACING;
- /* object, matrices, time, ray_length stay the same */
- sd->flag = 0;
- sd->object_flag = kernel_tex_fetch(__object_flag, sd->object);
- sd->prim = kernel_tex_fetch(__prim_index, isect->prim);
- sd->type = isect->type;
+ /* object, matrices, time, ray_length stay the same */
+ sd->flag = 0;
+ sd->object_flag = kernel_tex_fetch(__object_flag, sd->object);
+ sd->prim = kernel_tex_fetch(__prim_index, isect->prim);
+ sd->type = isect->type;
# ifdef __UV__
- sd->u = isect->u;
- sd->v = isect->v;
+ sd->u = isect->u;
+ sd->v = isect->v;
# endif
- /* fetch triangle data */
- if(sd->type == PRIMITIVE_TRIANGLE) {
- float3 Ng = triangle_normal(kg, sd);
- sd->shader = kernel_tex_fetch(__tri_shader, sd->prim);
+ /* fetch triangle data */
+ if (sd->type == PRIMITIVE_TRIANGLE) {
+ float3 Ng = triangle_normal(kg, sd);
+ sd->shader = kernel_tex_fetch(__tri_shader, sd->prim);
- /* static triangle */
- sd->P = triangle_refine_local(kg, sd, isect, ray);
- sd->Ng = Ng;
- sd->N = Ng;
+ /* static triangle */
+ sd->P = triangle_refine_local(kg, sd, isect, ray);
+ sd->Ng = Ng;
+ sd->N = Ng;
- if(sd->shader & SHADER_SMOOTH_NORMAL)
- sd->N = triangle_smooth_normal(kg, Ng, sd->prim, sd->u, sd->v);
+ if (sd->shader & SHADER_SMOOTH_NORMAL)
+ sd->N = triangle_smooth_normal(kg, Ng, sd->prim, sd->u, sd->v);
# ifdef __DPDU__
- /* dPdu/dPdv */
- triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv);
+ /* dPdu/dPdv */
+ triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv);
# endif
- }
- else {
- /* motion triangle */
- motion_triangle_shader_setup(kg, sd, isect, ray, true);
- }
+ }
+ else {
+ /* motion triangle */
+ motion_triangle_shader_setup(kg, sd, isect, ray, true);
+ }
- sd->flag |= kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
+ sd->flag |= kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
# ifdef __INSTANCING__
- if(isect->object != OBJECT_NONE) {
- /* instance transform */
- object_normal_transform_auto(kg, sd, &sd->N);
- object_normal_transform_auto(kg, sd, &sd->Ng);
+ if (isect->object != OBJECT_NONE) {
+ /* instance transform */
+ object_normal_transform_auto(kg, sd, &sd->N);
+ object_normal_transform_auto(kg, sd, &sd->Ng);
# ifdef __DPDU__
- object_dir_transform_auto(kg, sd, &sd->dPdu);
- object_dir_transform_auto(kg, sd, &sd->dPdv);
+ object_dir_transform_auto(kg, sd, &sd->dPdu);
+ object_dir_transform_auto(kg, sd, &sd->dPdv);
# endif
- }
+ }
# endif
- /* backfacing test */
- if(backfacing) {
- sd->flag |= SD_BACKFACING;
- sd->Ng = -sd->Ng;
- sd->N = -sd->N;
+ /* backfacing test */
+ if (backfacing) {
+ sd->flag |= SD_BACKFACING;
+ sd->Ng = -sd->Ng;
+ sd->N = -sd->N;
# ifdef __DPDU__
- sd->dPdu = -sd->dPdu;
- sd->dPdv = -sd->dPdv;
+ sd->dPdu = -sd->dPdu;
+ sd->dPdv = -sd->dPdv;
# endif
- }
+ }
- /* should not get used in principle as the shading will only use a diffuse
- * BSDF, but the shader might still access it */
- sd->I = sd->N;
+ /* should not get used in principle as the shading will only use a diffuse
+ * BSDF, but the shader might still access it */
+ sd->I = sd->N;
# ifdef __RAY_DIFFERENTIALS__
- /* differentials */
- differential_dudv(&sd->du, &sd->dv, sd->dPdu, sd->dPdv, sd->dP, sd->Ng);
- /* don't modify dP and dI */
+ /* differentials */
+ differential_dudv(&sd->du, &sd->dv, sd->dPdu, sd->dPdv, sd->dP, sd->Ng);
+ /* don't modify dP and dI */
# endif
- PROFILING_SHADER(sd->shader);
+ PROFILING_SHADER(sd->shader);
}
#endif
@@ -251,194 +251,208 @@ ccl_device_inline void shader_setup_from_sample(KernelGlobals *kg,
const float3 P,
const float3 Ng,
const float3 I,
- int shader, int object, int prim,
- float u, float v, float t,
+ int shader,
+ int object,
+ int prim,
+ float u,
+ float v,
+ float t,
float time,
bool object_space,
int lamp)
{
- PROFILING_INIT(kg, PROFILING_SHADER_SETUP);
-
- /* vectors */
- sd->P = P;
- sd->N = Ng;
- sd->Ng = Ng;
- sd->I = I;
- sd->shader = shader;
- if(prim != PRIM_NONE)
- sd->type = PRIMITIVE_TRIANGLE;
- else if(lamp != LAMP_NONE)
- sd->type = PRIMITIVE_LAMP;
- else
- sd->type = PRIMITIVE_NONE;
-
- /* primitive */
+ PROFILING_INIT(kg, PROFILING_SHADER_SETUP);
+
+ /* vectors */
+ sd->P = P;
+ sd->N = Ng;
+ sd->Ng = Ng;
+ sd->I = I;
+ sd->shader = shader;
+ if (prim != PRIM_NONE)
+ sd->type = PRIMITIVE_TRIANGLE;
+ else if (lamp != LAMP_NONE)
+ sd->type = PRIMITIVE_LAMP;
+ else
+ sd->type = PRIMITIVE_NONE;
+
+ /* primitive */
#ifdef __INSTANCING__
- sd->object = object;
+ sd->object = object;
#endif
- sd->lamp = LAMP_NONE;
- /* currently no access to bvh prim index for strand sd->prim*/
- sd->prim = prim;
+ sd->lamp = LAMP_NONE;
+ /* currently no access to bvh prim index for strand sd->prim*/
+ sd->prim = prim;
#ifdef __UV__
- sd->u = u;
- sd->v = v;
+ sd->u = u;
+ sd->v = v;
#endif
- sd->time = time;
- sd->ray_length = t;
+ sd->time = time;
+ sd->ray_length = t;
- sd->flag = kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
- sd->object_flag = 0;
- if(sd->object != OBJECT_NONE) {
- sd->object_flag |= kernel_tex_fetch(__object_flag,
- sd->object);
+ sd->flag = kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
+ sd->object_flag = 0;
+ if (sd->object != OBJECT_NONE) {
+ sd->object_flag |= kernel_tex_fetch(__object_flag, sd->object);
#ifdef __OBJECT_MOTION__
- shader_setup_object_transforms(kg, sd, time);
- }
- else if(lamp != LAMP_NONE) {
- sd->ob_tfm = lamp_fetch_transform(kg, lamp, false);
- sd->ob_itfm = lamp_fetch_transform(kg, lamp, true);
- sd->lamp = lamp;
+ shader_setup_object_transforms(kg, sd, time);
+ }
+ else if (lamp != LAMP_NONE) {
+ sd->ob_tfm = lamp_fetch_transform(kg, lamp, false);
+ sd->ob_itfm = lamp_fetch_transform(kg, lamp, true);
+ sd->lamp = lamp;
#else
- }
- else if(lamp != LAMP_NONE) {
- sd->lamp = lamp;
+ }
+ else if (lamp != LAMP_NONE) {
+ sd->lamp = lamp;
#endif
- }
+ }
- /* transform into world space */
- if(object_space) {
- object_position_transform_auto(kg, sd, &sd->P);
- object_normal_transform_auto(kg, sd, &sd->Ng);
- sd->N = sd->Ng;
- object_dir_transform_auto(kg, sd, &sd->I);
- }
+ /* transform into world space */
+ if (object_space) {
+ object_position_transform_auto(kg, sd, &sd->P);
+ object_normal_transform_auto(kg, sd, &sd->Ng);
+ sd->N = sd->Ng;
+ object_dir_transform_auto(kg, sd, &sd->I);
+ }
- if(sd->type & PRIMITIVE_TRIANGLE) {
- /* smooth normal */
- if(sd->shader & SHADER_SMOOTH_NORMAL) {
- sd->N = triangle_smooth_normal(kg, Ng, sd->prim, sd->u, sd->v);
+ if (sd->type & PRIMITIVE_TRIANGLE) {
+ /* smooth normal */
+ if (sd->shader & SHADER_SMOOTH_NORMAL) {
+ sd->N = triangle_smooth_normal(kg, Ng, sd->prim, sd->u, sd->v);
#ifdef __INSTANCING__
- if(!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
- object_normal_transform_auto(kg, sd, &sd->N);
- }
+ if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+ object_normal_transform_auto(kg, sd, &sd->N);
+ }
#endif
- }
+ }
- /* dPdu/dPdv */
+ /* dPdu/dPdv */
#ifdef __DPDU__
- triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv);
+ triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv);
# ifdef __INSTANCING__
- if(!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
- object_dir_transform_auto(kg, sd, &sd->dPdu);
- object_dir_transform_auto(kg, sd, &sd->dPdv);
- }
+ if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+ object_dir_transform_auto(kg, sd, &sd->dPdu);
+ object_dir_transform_auto(kg, sd, &sd->dPdv);
+ }
# endif
#endif
- }
- else {
+ }
+ else {
#ifdef __DPDU__
- sd->dPdu = make_float3(0.0f, 0.0f, 0.0f);
- sd->dPdv = make_float3(0.0f, 0.0f, 0.0f);
+ sd->dPdu = make_float3(0.0f, 0.0f, 0.0f);
+ sd->dPdv = make_float3(0.0f, 0.0f, 0.0f);
#endif
- }
+ }
- /* backfacing test */
- if(sd->prim != PRIM_NONE) {
- bool backfacing = (dot(sd->Ng, sd->I) < 0.0f);
+ /* backfacing test */
+ if (sd->prim != PRIM_NONE) {
+ bool backfacing = (dot(sd->Ng, sd->I) < 0.0f);
- if(backfacing) {
- sd->flag |= SD_BACKFACING;
- sd->Ng = -sd->Ng;
- sd->N = -sd->N;
+ if (backfacing) {
+ sd->flag |= SD_BACKFACING;
+ sd->Ng = -sd->Ng;
+ sd->N = -sd->N;
#ifdef __DPDU__
- sd->dPdu = -sd->dPdu;
- sd->dPdv = -sd->dPdv;
+ sd->dPdu = -sd->dPdu;
+ sd->dPdv = -sd->dPdv;
#endif
- }
- }
+ }
+ }
#ifdef __RAY_DIFFERENTIALS__
- /* no ray differentials here yet */
- sd->dP = differential3_zero();
- sd->dI = differential3_zero();
- sd->du = differential_zero();
- sd->dv = differential_zero();
+ /* no ray differentials here yet */
+ sd->dP = differential3_zero();
+ sd->dI = differential3_zero();
+ sd->du = differential_zero();
+ sd->dv = differential_zero();
#endif
- PROFILING_SHADER(sd->shader);
- PROFILING_OBJECT(sd->object);
+ PROFILING_SHADER(sd->shader);
+ PROFILING_OBJECT(sd->object);
}
/* ShaderData setup for displacement */
-ccl_device void shader_setup_from_displace(KernelGlobals *kg, ShaderData *sd,
- int object, int prim, float u, float v)
+ccl_device void shader_setup_from_displace(
+ KernelGlobals *kg, ShaderData *sd, int object, int prim, float u, float v)
{
- float3 P, Ng, I = make_float3(0.0f, 0.0f, 0.0f);
- int shader;
-
- triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader);
-
- /* force smooth shading for displacement */
- shader |= SHADER_SMOOTH_NORMAL;
-
- shader_setup_from_sample(kg, sd,
- P, Ng, I,
- shader, object, prim,
- u, v, 0.0f, 0.5f,
- !(kernel_tex_fetch(__object_flag, object) & SD_OBJECT_TRANSFORM_APPLIED),
- LAMP_NONE);
+ float3 P, Ng, I = make_float3(0.0f, 0.0f, 0.0f);
+ int shader;
+
+ triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader);
+
+ /* force smooth shading for displacement */
+ shader |= SHADER_SMOOTH_NORMAL;
+
+ shader_setup_from_sample(
+ kg,
+ sd,
+ P,
+ Ng,
+ I,
+ shader,
+ object,
+ prim,
+ u,
+ v,
+ 0.0f,
+ 0.5f,
+ !(kernel_tex_fetch(__object_flag, object) & SD_OBJECT_TRANSFORM_APPLIED),
+ LAMP_NONE);
}
/* ShaderData setup from ray into background */
-ccl_device_inline void shader_setup_from_background(KernelGlobals *kg, ShaderData *sd, const Ray *ray)
+ccl_device_inline void shader_setup_from_background(KernelGlobals *kg,
+ ShaderData *sd,
+ const Ray *ray)
{
- PROFILING_INIT(kg, PROFILING_SHADER_SETUP);
-
- /* vectors */
- sd->P = ray->D;
- sd->N = -ray->D;
- sd->Ng = -ray->D;
- sd->I = -ray->D;
- sd->shader = kernel_data.background.surface_shader;
- sd->flag = kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
- sd->object_flag = 0;
- sd->time = ray->time;
- sd->ray_length = 0.0f;
+ PROFILING_INIT(kg, PROFILING_SHADER_SETUP);
+
+ /* vectors */
+ sd->P = ray->D;
+ sd->N = -ray->D;
+ sd->Ng = -ray->D;
+ sd->I = -ray->D;
+ sd->shader = kernel_data.background.surface_shader;
+ sd->flag = kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
+ sd->object_flag = 0;
+ sd->time = ray->time;
+ sd->ray_length = 0.0f;
#ifdef __INSTANCING__
- sd->object = OBJECT_NONE;
+ sd->object = OBJECT_NONE;
#endif
- sd->lamp = LAMP_NONE;
- sd->prim = PRIM_NONE;
+ sd->lamp = LAMP_NONE;
+ sd->prim = PRIM_NONE;
#ifdef __UV__
- sd->u = 0.0f;
- sd->v = 0.0f;
+ sd->u = 0.0f;
+ sd->v = 0.0f;
#endif
#ifdef __DPDU__
- /* dPdu/dPdv */
- sd->dPdu = make_float3(0.0f, 0.0f, 0.0f);
- sd->dPdv = make_float3(0.0f, 0.0f, 0.0f);
+ /* dPdu/dPdv */
+ sd->dPdu = make_float3(0.0f, 0.0f, 0.0f);
+ sd->dPdv = make_float3(0.0f, 0.0f, 0.0f);
#endif
#ifdef __RAY_DIFFERENTIALS__
- /* differentials */
- sd->dP = ray->dD;
- differential_incoming(&sd->dI, sd->dP);
- sd->du = differential_zero();
- sd->dv = differential_zero();
+ /* differentials */
+ sd->dP = ray->dD;
+ differential_incoming(&sd->dI, sd->dP);
+ sd->du = differential_zero();
+ sd->dv = differential_zero();
#endif
- /* for NDC coordinates */
- sd->ray_P = ray->P;
+ /* for NDC coordinates */
+ sd->ray_P = ray->P;
- PROFILING_SHADER(sd->shader);
- PROFILING_OBJECT(sd->object);
+ PROFILING_SHADER(sd->shader);
+ PROFILING_OBJECT(sd->object);
}
/* ShaderData setup from point inside volume */
@@ -446,141 +460,145 @@ ccl_device_inline void shader_setup_from_background(KernelGlobals *kg, ShaderDat
#ifdef __VOLUME__
ccl_device_inline void shader_setup_from_volume(KernelGlobals *kg, ShaderData *sd, const Ray *ray)
{
- PROFILING_INIT(kg, PROFILING_SHADER_SETUP);
-
- /* vectors */
- sd->P = ray->P;
- sd->N = -ray->D;
- sd->Ng = -ray->D;
- sd->I = -ray->D;
- sd->shader = SHADER_NONE;
- sd->flag = 0;
- sd->object_flag = 0;
- sd->time = ray->time;
- sd->ray_length = 0.0f; /* todo: can we set this to some useful value? */
+ PROFILING_INIT(kg, PROFILING_SHADER_SETUP);
+
+ /* vectors */
+ sd->P = ray->P;
+ sd->N = -ray->D;
+ sd->Ng = -ray->D;
+ sd->I = -ray->D;
+ sd->shader = SHADER_NONE;
+ sd->flag = 0;
+ sd->object_flag = 0;
+ sd->time = ray->time;
+ sd->ray_length = 0.0f; /* todo: can we set this to some useful value? */
# ifdef __INSTANCING__
- sd->object = OBJECT_NONE; /* todo: fill this for texture coordinates */
+ sd->object = OBJECT_NONE; /* todo: fill this for texture coordinates */
# endif
- sd->lamp = LAMP_NONE;
- sd->prim = PRIM_NONE;
- sd->type = PRIMITIVE_NONE;
+ sd->lamp = LAMP_NONE;
+ sd->prim = PRIM_NONE;
+ sd->type = PRIMITIVE_NONE;
# ifdef __UV__
- sd->u = 0.0f;
- sd->v = 0.0f;
+ sd->u = 0.0f;
+ sd->v = 0.0f;
# endif
# ifdef __DPDU__
- /* dPdu/dPdv */
- sd->dPdu = make_float3(0.0f, 0.0f, 0.0f);
- sd->dPdv = make_float3(0.0f, 0.0f, 0.0f);
+ /* dPdu/dPdv */
+ sd->dPdu = make_float3(0.0f, 0.0f, 0.0f);
+ sd->dPdv = make_float3(0.0f, 0.0f, 0.0f);
# endif
# ifdef __RAY_DIFFERENTIALS__
- /* differentials */
- sd->dP = ray->dD;
- differential_incoming(&sd->dI, sd->dP);
- sd->du = differential_zero();
- sd->dv = differential_zero();
+ /* differentials */
+ sd->dP = ray->dD;
+ differential_incoming(&sd->dI, sd->dP);
+ sd->du = differential_zero();
+ sd->dv = differential_zero();
# endif
- /* for NDC coordinates */
- sd->ray_P = ray->P;
- sd->ray_dP = ray->dP;
+ /* for NDC coordinates */
+ sd->ray_P = ray->P;
+ sd->ray_dP = ray->dP;
- PROFILING_SHADER(sd->shader);
- PROFILING_OBJECT(sd->object);
+ PROFILING_SHADER(sd->shader);
+ PROFILING_OBJECT(sd->object);
}
-#endif /* __VOLUME__ */
+#endif /* __VOLUME__ */
/* Merging */
#if defined(__BRANCHED_PATH__) || defined(__VOLUME__)
ccl_device_inline void shader_merge_closures(ShaderData *sd)
{
- /* merge identical closures, better when we sample a single closure at a time */
- for(int i = 0; i < sd->num_closure; i++) {
- ShaderClosure *sci = &sd->closure[i];
-
- for(int j = i + 1; j < sd->num_closure; j++) {
- ShaderClosure *scj = &sd->closure[j];
-
- if(sci->type != scj->type)
- continue;
- if(!bsdf_merge(sci, scj))
- continue;
-
- sci->weight += scj->weight;
- sci->sample_weight += scj->sample_weight;
-
- int size = sd->num_closure - (j+1);
- if(size > 0) {
- for(int k = 0; k < size; k++) {
- scj[k] = scj[k+1];
- }
- }
-
- sd->num_closure--;
- kernel_assert(sd->num_closure >= 0);
- j--;
- }
- }
+ /* merge identical closures, better when we sample a single closure at a time */
+ for (int i = 0; i < sd->num_closure; i++) {
+ ShaderClosure *sci = &sd->closure[i];
+
+ for (int j = i + 1; j < sd->num_closure; j++) {
+ ShaderClosure *scj = &sd->closure[j];
+
+ if (sci->type != scj->type)
+ continue;
+ if (!bsdf_merge(sci, scj))
+ continue;
+
+ sci->weight += scj->weight;
+ sci->sample_weight += scj->sample_weight;
+
+ int size = sd->num_closure - (j + 1);
+ if (size > 0) {
+ for (int k = 0; k < size; k++) {
+ scj[k] = scj[k + 1];
+ }
+ }
+
+ sd->num_closure--;
+ kernel_assert(sd->num_closure >= 0);
+ j--;
+ }
+ }
}
-#endif /* __BRANCHED_PATH__ || __VOLUME__ */
+#endif /* __BRANCHED_PATH__ || __VOLUME__ */
/* Defensive sampling. */
-ccl_device_inline void shader_prepare_closures(ShaderData *sd,
- ccl_addr_space PathState *state)
+ccl_device_inline void shader_prepare_closures(ShaderData *sd, ccl_addr_space PathState *state)
{
- /* We can likely also do defensive sampling at deeper bounces, particularly
- * for cases like a perfect mirror but possibly also others. This will need
- * a good heuristic. */
- if(state->bounce + state->transparent_bounce == 0 && sd->num_closure > 1) {
- float sum = 0.0f;
-
- for(int i = 0; i < sd->num_closure; i++) {
- ShaderClosure *sc = &sd->closure[i];
- if(CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
- sum += sc->sample_weight;
- }
- }
-
- for(int i = 0; i < sd->num_closure; i++) {
- ShaderClosure *sc = &sd->closure[i];
- if(CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
- sc->sample_weight = max(sc->sample_weight, 0.125f * sum);
- }
- }
- }
+ /* We can likely also do defensive sampling at deeper bounces, particularly
+ * for cases like a perfect mirror but possibly also others. This will need
+ * a good heuristic. */
+ if (state->bounce + state->transparent_bounce == 0 && sd->num_closure > 1) {
+ float sum = 0.0f;
+
+ for (int i = 0; i < sd->num_closure; i++) {
+ ShaderClosure *sc = &sd->closure[i];
+ if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
+ sum += sc->sample_weight;
+ }
+ }
+
+ for (int i = 0; i < sd->num_closure; i++) {
+ ShaderClosure *sc = &sd->closure[i];
+ if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
+ sc->sample_weight = max(sc->sample_weight, 0.125f * sum);
+ }
+ }
+ }
}
-
/* BSDF */
-ccl_device_inline void _shader_bsdf_multi_eval(KernelGlobals *kg, ShaderData *sd, const float3 omega_in, float *pdf,
- const ShaderClosure *skip_sc, BsdfEval *result_eval, float sum_pdf, float sum_sample_weight)
+ccl_device_inline void _shader_bsdf_multi_eval(KernelGlobals *kg,
+ ShaderData *sd,
+ const float3 omega_in,
+ float *pdf,
+ const ShaderClosure *skip_sc,
+ BsdfEval *result_eval,
+ float sum_pdf,
+ float sum_sample_weight)
{
- /* this is the veach one-sample model with balance heuristic, some pdf
- * factors drop out when using balance heuristic weighting */
- for(int i = 0; i < sd->num_closure; i++) {
- const ShaderClosure *sc = &sd->closure[i];
+ /* this is the veach one-sample model with balance heuristic, some pdf
+ * factors drop out when using balance heuristic weighting */
+ for (int i = 0; i < sd->num_closure; i++) {
+ const ShaderClosure *sc = &sd->closure[i];
- if(sc != skip_sc && CLOSURE_IS_BSDF(sc->type)) {
- float bsdf_pdf = 0.0f;
- float3 eval = bsdf_eval(kg, sd, sc, omega_in, &bsdf_pdf);
+ if (sc != skip_sc && CLOSURE_IS_BSDF(sc->type)) {
+ float bsdf_pdf = 0.0f;
+ float3 eval = bsdf_eval(kg, sd, sc, omega_in, &bsdf_pdf);
- if(bsdf_pdf != 0.0f) {
- bsdf_eval_accum(result_eval, sc->type, eval*sc->weight, 1.0f);
- sum_pdf += bsdf_pdf*sc->sample_weight;
- }
+ if (bsdf_pdf != 0.0f) {
+ bsdf_eval_accum(result_eval, sc->type, eval * sc->weight, 1.0f);
+ sum_pdf += bsdf_pdf * sc->sample_weight;
+ }
- sum_sample_weight += sc->sample_weight;
- }
- }
+ sum_sample_weight += sc->sample_weight;
+ }
+ }
- *pdf = (sum_sample_weight > 0.0f)? sum_pdf/sum_sample_weight: 0.0f;
+ *pdf = (sum_sample_weight > 0.0f) ? sum_pdf / sum_sample_weight : 0.0f;
}
#ifdef __BRANCHED_PATH__
@@ -591,633 +609,654 @@ ccl_device_inline void _shader_bsdf_multi_eval_branched(KernelGlobals *kg,
float light_pdf,
bool use_mis)
{
- for(int i = 0; i < sd->num_closure; i++) {
- const ShaderClosure *sc = &sd->closure[i];
- if(CLOSURE_IS_BSDF(sc->type)) {
- float bsdf_pdf = 0.0f;
- float3 eval = bsdf_eval(kg, sd, sc, omega_in, &bsdf_pdf);
- if(bsdf_pdf != 0.0f) {
- float mis_weight = use_mis? power_heuristic(light_pdf, bsdf_pdf): 1.0f;
- bsdf_eval_accum(result_eval,
- sc->type,
- eval * sc->weight,
- mis_weight);
- }
- }
- }
+ for (int i = 0; i < sd->num_closure; i++) {
+ const ShaderClosure *sc = &sd->closure[i];
+ if (CLOSURE_IS_BSDF(sc->type)) {
+ float bsdf_pdf = 0.0f;
+ float3 eval = bsdf_eval(kg, sd, sc, omega_in, &bsdf_pdf);
+ if (bsdf_pdf != 0.0f) {
+ float mis_weight = use_mis ? power_heuristic(light_pdf, bsdf_pdf) : 1.0f;
+ bsdf_eval_accum(result_eval, sc->type, eval * sc->weight, mis_weight);
+ }
+ }
+ }
}
-#endif /* __BRANCHED_PATH__ */
-
+#endif /* __BRANCHED_PATH__ */
#ifndef __KERNEL_CUDA__
ccl_device
#else
ccl_device_inline
#endif
-void shader_bsdf_eval(KernelGlobals *kg,
- ShaderData *sd,
- const float3 omega_in,
- BsdfEval *eval,
- float light_pdf,
- bool use_mis)
+ void
+ shader_bsdf_eval(KernelGlobals *kg,
+ ShaderData *sd,
+ const float3 omega_in,
+ BsdfEval *eval,
+ float light_pdf,
+ bool use_mis)
{
- PROFILING_INIT(kg, PROFILING_CLOSURE_EVAL);
+ PROFILING_INIT(kg, PROFILING_CLOSURE_EVAL);
- bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass);
+ bsdf_eval_init(
+ eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass);
#ifdef __BRANCHED_PATH__
- if(kernel_data.integrator.branched)
- _shader_bsdf_multi_eval_branched(kg, sd, omega_in, eval, light_pdf, use_mis);
- else
+ if (kernel_data.integrator.branched)
+ _shader_bsdf_multi_eval_branched(kg, sd, omega_in, eval, light_pdf, use_mis);
+ else
#endif
- {
- float pdf;
- _shader_bsdf_multi_eval(kg, sd, omega_in, &pdf, NULL, eval, 0.0f, 0.0f);
- if(use_mis) {
- float weight = power_heuristic(light_pdf, pdf);
- bsdf_eval_mis(eval, weight);
- }
- }
+ {
+ float pdf;
+ _shader_bsdf_multi_eval(kg, sd, omega_in, &pdf, NULL, eval, 0.0f, 0.0f);
+ if (use_mis) {
+ float weight = power_heuristic(light_pdf, pdf);
+ bsdf_eval_mis(eval, weight);
+ }
+ }
}
-ccl_device_inline const ShaderClosure *shader_bsdf_pick(ShaderData *sd,
- float *randu)
+ccl_device_inline const ShaderClosure *shader_bsdf_pick(ShaderData *sd, float *randu)
{
- /* Note the sampling here must match shader_bssrdf_pick,
- * since we reuse the same random number. */
- int sampled = 0;
+ /* Note the sampling here must match shader_bssrdf_pick,
+ * since we reuse the same random number. */
+ int sampled = 0;
- if(sd->num_closure > 1) {
- /* Pick a BSDF or based on sample weights. */
- float sum = 0.0f;
+ if (sd->num_closure > 1) {
+ /* Pick a BSDF or based on sample weights. */
+ float sum = 0.0f;
- for(int i = 0; i < sd->num_closure; i++) {
- const ShaderClosure *sc = &sd->closure[i];
+ for (int i = 0; i < sd->num_closure; i++) {
+ const ShaderClosure *sc = &sd->closure[i];
- if(CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
- sum += sc->sample_weight;
- }
- }
+ if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
+ sum += sc->sample_weight;
+ }
+ }
- float r = (*randu)*sum;
- float partial_sum = 0.0f;
+ float r = (*randu) * sum;
+ float partial_sum = 0.0f;
- for(int i = 0; i < sd->num_closure; i++) {
- const ShaderClosure *sc = &sd->closure[i];
+ for (int i = 0; i < sd->num_closure; i++) {
+ const ShaderClosure *sc = &sd->closure[i];
- if(CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
- float next_sum = partial_sum + sc->sample_weight;
+ if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
+ float next_sum = partial_sum + sc->sample_weight;
- if(r < next_sum) {
- sampled = i;
+ if (r < next_sum) {
+ sampled = i;
- /* Rescale to reuse for direction sample, to better
- * preserve stratifaction. */
- *randu = (r - partial_sum) / sc->sample_weight;
- break;
- }
+ /* Rescale to reuse for direction sample, to better
+ * preserve stratifaction. */
+ *randu = (r - partial_sum) / sc->sample_weight;
+ break;
+ }
- partial_sum = next_sum;
- }
- }
- }
+ partial_sum = next_sum;
+ }
+ }
+ }
- const ShaderClosure *sc = &sd->closure[sampled];
- return CLOSURE_IS_BSDF(sc->type)? sc: NULL;
+ const ShaderClosure *sc = &sd->closure[sampled];
+ return CLOSURE_IS_BSDF(sc->type) ? sc : NULL;
}
ccl_device_inline const ShaderClosure *shader_bssrdf_pick(ShaderData *sd,
ccl_addr_space float3 *throughput,
float *randu)
{
- /* Note the sampling here must match shader_bsdf_pick,
- * since we reuse the same random number. */
- int sampled = 0;
-
- if(sd->num_closure > 1) {
- /* Pick a BSDF or BSSRDF or based on sample weights. */
- float sum_bsdf = 0.0f;
- float sum_bssrdf = 0.0f;
-
- for(int i = 0; i < sd->num_closure; i++) {
- const ShaderClosure *sc = &sd->closure[i];
-
- if(CLOSURE_IS_BSDF(sc->type)) {
- sum_bsdf += sc->sample_weight;
- }
- else if(CLOSURE_IS_BSSRDF(sc->type)) {
- sum_bssrdf += sc->sample_weight;
- }
- }
-
- float r = (*randu)*(sum_bsdf + sum_bssrdf);
- float partial_sum = 0.0f;
-
- for(int i = 0; i < sd->num_closure; i++) {
- const ShaderClosure *sc = &sd->closure[i];
-
- if(CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
- float next_sum = partial_sum + sc->sample_weight;
-
- if(r < next_sum) {
- if(CLOSURE_IS_BSDF(sc->type)) {
- *throughput *= (sum_bsdf + sum_bssrdf) / sum_bsdf;
- return NULL;
- }
- else {
- *throughput *= (sum_bsdf + sum_bssrdf) / sum_bssrdf;
- sampled = i;
-
- /* Rescale to reuse for direction sample, to better
- * preserve stratifaction. */
- *randu = (r - partial_sum) / sc->sample_weight;
- break;
- }
- }
-
- partial_sum = next_sum;
- }
- }
- }
-
- const ShaderClosure *sc = &sd->closure[sampled];
- return CLOSURE_IS_BSSRDF(sc->type)? sc: NULL;
+ /* Note the sampling here must match shader_bsdf_pick,
+ * since we reuse the same random number. */
+ int sampled = 0;
+
+ if (sd->num_closure > 1) {
+ /* Pick a BSDF or BSSRDF or based on sample weights. */
+ float sum_bsdf = 0.0f;
+ float sum_bssrdf = 0.0f;
+
+ for (int i = 0; i < sd->num_closure; i++) {
+ const ShaderClosure *sc = &sd->closure[i];
+
+ if (CLOSURE_IS_BSDF(sc->type)) {
+ sum_bsdf += sc->sample_weight;
+ }
+ else if (CLOSURE_IS_BSSRDF(sc->type)) {
+ sum_bssrdf += sc->sample_weight;
+ }
+ }
+
+ float r = (*randu) * (sum_bsdf + sum_bssrdf);
+ float partial_sum = 0.0f;
+
+ for (int i = 0; i < sd->num_closure; i++) {
+ const ShaderClosure *sc = &sd->closure[i];
+
+ if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
+ float next_sum = partial_sum + sc->sample_weight;
+
+ if (r < next_sum) {
+ if (CLOSURE_IS_BSDF(sc->type)) {
+ *throughput *= (sum_bsdf + sum_bssrdf) / sum_bsdf;
+ return NULL;
+ }
+ else {
+ *throughput *= (sum_bsdf + sum_bssrdf) / sum_bssrdf;
+ sampled = i;
+
+ /* Rescale to reuse for direction sample, to better
+ * preserve stratifaction. */
+ *randu = (r - partial_sum) / sc->sample_weight;
+ break;
+ }
+ }
+
+ partial_sum = next_sum;
+ }
+ }
+ }
+
+ const ShaderClosure *sc = &sd->closure[sampled];
+ return CLOSURE_IS_BSSRDF(sc->type) ? sc : NULL;
}
ccl_device_inline int shader_bsdf_sample(KernelGlobals *kg,
ShaderData *sd,
- float randu, float randv,
+ float randu,
+ float randv,
BsdfEval *bsdf_eval,
float3 *omega_in,
differential3 *domega_in,
float *pdf)
{
- PROFILING_INIT(kg, PROFILING_CLOSURE_SAMPLE);
+ PROFILING_INIT(kg, PROFILING_CLOSURE_SAMPLE);
- const ShaderClosure *sc = shader_bsdf_pick(sd, &randu);
- if(sc == NULL) {
- *pdf = 0.0f;
- return LABEL_NONE;
- }
+ const ShaderClosure *sc = shader_bsdf_pick(sd, &randu);
+ if (sc == NULL) {
+ *pdf = 0.0f;
+ return LABEL_NONE;
+ }
- /* BSSRDF should already have been handled elsewhere. */
- kernel_assert(CLOSURE_IS_BSDF(sc->type));
+ /* BSSRDF should already have been handled elsewhere. */
+ kernel_assert(CLOSURE_IS_BSDF(sc->type));
- int label;
- float3 eval;
+ int label;
+ float3 eval;
- *pdf = 0.0f;
- label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
+ *pdf = 0.0f;
+ label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
- if(*pdf != 0.0f) {
- bsdf_eval_init(bsdf_eval, sc->type, eval*sc->weight, kernel_data.film.use_light_pass);
+ if (*pdf != 0.0f) {
+ bsdf_eval_init(bsdf_eval, sc->type, eval * sc->weight, kernel_data.film.use_light_pass);
- if(sd->num_closure > 1) {
- float sweight = sc->sample_weight;
- _shader_bsdf_multi_eval(kg, sd, *omega_in, pdf, sc, bsdf_eval, *pdf*sweight, sweight);
- }
- }
+ if (sd->num_closure > 1) {
+ float sweight = sc->sample_weight;
+ _shader_bsdf_multi_eval(kg, sd, *omega_in, pdf, sc, bsdf_eval, *pdf * sweight, sweight);
+ }
+ }
- return label;
+ return label;
}
-ccl_device int shader_bsdf_sample_closure(KernelGlobals *kg, ShaderData *sd,
- const ShaderClosure *sc, float randu, float randv, BsdfEval *bsdf_eval,
- float3 *omega_in, differential3 *domega_in, float *pdf)
+ccl_device int shader_bsdf_sample_closure(KernelGlobals *kg,
+ ShaderData *sd,
+ const ShaderClosure *sc,
+ float randu,
+ float randv,
+ BsdfEval *bsdf_eval,
+ float3 *omega_in,
+ differential3 *domega_in,
+ float *pdf)
{
- PROFILING_INIT(kg, PROFILING_CLOSURE_SAMPLE);
+ PROFILING_INIT(kg, PROFILING_CLOSURE_SAMPLE);
- int label;
- float3 eval;
+ int label;
+ float3 eval;
- *pdf = 0.0f;
- label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
+ *pdf = 0.0f;
+ label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
- if(*pdf != 0.0f)
- bsdf_eval_init(bsdf_eval, sc->type, eval*sc->weight, kernel_data.film.use_light_pass);
+ if (*pdf != 0.0f)
+ bsdf_eval_init(bsdf_eval, sc->type, eval * sc->weight, kernel_data.film.use_light_pass);
- return label;
+ return label;
}
ccl_device float shader_bsdf_average_roughness(ShaderData *sd)
{
- float roughness = 0.0f;
- float sum_weight = 0.0f;
-
- for(int i = 0; i < sd->num_closure; i++) {
- ShaderClosure *sc = &sd->closure[i];
-
- if(CLOSURE_IS_BSDF(sc->type)) {
- /* sqrt once to undo the squaring from multiplying roughness on the
- * two axes, and once for the squared roughness convention. */
- float weight = fabsf(average(sc->weight));
- roughness += weight * sqrtf(safe_sqrtf(bsdf_get_roughness_squared(sc)));
- sum_weight += weight;
- }
- }
-
- return (sum_weight > 0.0f) ? roughness / sum_weight : 0.0f;
+ float roughness = 0.0f;
+ float sum_weight = 0.0f;
+
+ for (int i = 0; i < sd->num_closure; i++) {
+ ShaderClosure *sc = &sd->closure[i];
+
+ if (CLOSURE_IS_BSDF(sc->type)) {
+ /* sqrt once to undo the squaring from multiplying roughness on the
+ * two axes, and once for the squared roughness convention. */
+ float weight = fabsf(average(sc->weight));
+ roughness += weight * sqrtf(safe_sqrtf(bsdf_get_roughness_squared(sc)));
+ sum_weight += weight;
+ }
+ }
+
+ return (sum_weight > 0.0f) ? roughness / sum_weight : 0.0f;
}
ccl_device void shader_bsdf_blur(KernelGlobals *kg, ShaderData *sd, float roughness)
{
- for(int i = 0; i < sd->num_closure; i++) {
- ShaderClosure *sc = &sd->closure[i];
+ for (int i = 0; i < sd->num_closure; i++) {
+ ShaderClosure *sc = &sd->closure[i];
- if(CLOSURE_IS_BSDF(sc->type))
- bsdf_blur(kg, sc, roughness);
- }
+ if (CLOSURE_IS_BSDF(sc->type))
+ bsdf_blur(kg, sc, roughness);
+ }
}
ccl_device float3 shader_bsdf_transparency(KernelGlobals *kg, const ShaderData *sd)
{
- if(sd->flag & SD_HAS_ONLY_VOLUME) {
- return make_float3(1.0f, 1.0f, 1.0f);
- }
- else if(sd->flag & SD_TRANSPARENT) {
- return sd->closure_transparent_extinction;
- }
- else {
- return make_float3(0.0f, 0.0f, 0.0f);
- }
+ if (sd->flag & SD_HAS_ONLY_VOLUME) {
+ return make_float3(1.0f, 1.0f, 1.0f);
+ }
+ else if (sd->flag & SD_TRANSPARENT) {
+ return sd->closure_transparent_extinction;
+ }
+ else {
+ return make_float3(0.0f, 0.0f, 0.0f);
+ }
}
ccl_device void shader_bsdf_disable_transparency(KernelGlobals *kg, ShaderData *sd)
{
- if(sd->flag & SD_TRANSPARENT) {
- for(int i = 0; i < sd->num_closure; i++) {
- ShaderClosure *sc = &sd->closure[i];
-
- if(sc->type == CLOSURE_BSDF_TRANSPARENT_ID) {
- sc->sample_weight = 0.0f;
- sc->weight = make_float3(0.0f, 0.0f, 0.0f);
- }
- }
-
- sd->flag &= ~SD_TRANSPARENT;
- }
+ if (sd->flag & SD_TRANSPARENT) {
+ for (int i = 0; i < sd->num_closure; i++) {
+ ShaderClosure *sc = &sd->closure[i];
+
+ if (sc->type == CLOSURE_BSDF_TRANSPARENT_ID) {
+ sc->sample_weight = 0.0f;
+ sc->weight = make_float3(0.0f, 0.0f, 0.0f);
+ }
+ }
+
+ sd->flag &= ~SD_TRANSPARENT;
+ }
}
ccl_device float3 shader_bsdf_alpha(KernelGlobals *kg, ShaderData *sd)
{
- float3 alpha = make_float3(1.0f, 1.0f, 1.0f) - shader_bsdf_transparency(kg, sd);
+ float3 alpha = make_float3(1.0f, 1.0f, 1.0f) - shader_bsdf_transparency(kg, sd);
- alpha = max(alpha, make_float3(0.0f, 0.0f, 0.0f));
- alpha = min(alpha, make_float3(1.0f, 1.0f, 1.0f));
+ alpha = max(alpha, make_float3(0.0f, 0.0f, 0.0f));
+ alpha = min(alpha, make_float3(1.0f, 1.0f, 1.0f));
- return alpha;
+ return alpha;
}
ccl_device float3 shader_bsdf_diffuse(KernelGlobals *kg, ShaderData *sd)
{
- float3 eval = make_float3(0.0f, 0.0f, 0.0f);
+ float3 eval = make_float3(0.0f, 0.0f, 0.0f);
- for(int i = 0; i < sd->num_closure; i++) {
- ShaderClosure *sc = &sd->closure[i];
+ for (int i = 0; i < sd->num_closure; i++) {
+ ShaderClosure *sc = &sd->closure[i];
- if(CLOSURE_IS_BSDF_DIFFUSE(sc->type))
- eval += sc->weight;
- }
+ if (CLOSURE_IS_BSDF_DIFFUSE(sc->type))
+ eval += sc->weight;
+ }
- return eval;
+ return eval;
}
ccl_device float3 shader_bsdf_glossy(KernelGlobals *kg, ShaderData *sd)
{
- float3 eval = make_float3(0.0f, 0.0f, 0.0f);
+ float3 eval = make_float3(0.0f, 0.0f, 0.0f);
- for(int i = 0; i < sd->num_closure; i++) {
- ShaderClosure *sc = &sd->closure[i];
+ for (int i = 0; i < sd->num_closure; i++) {
+ ShaderClosure *sc = &sd->closure[i];
- if(CLOSURE_IS_BSDF_GLOSSY(sc->type))
- eval += sc->weight;
- }
+ if (CLOSURE_IS_BSDF_GLOSSY(sc->type))
+ eval += sc->weight;
+ }
- return eval;
+ return eval;
}
ccl_device float3 shader_bsdf_transmission(KernelGlobals *kg, ShaderData *sd)
{
- float3 eval = make_float3(0.0f, 0.0f, 0.0f);
+ float3 eval = make_float3(0.0f, 0.0f, 0.0f);
- for(int i = 0; i < sd->num_closure; i++) {
- ShaderClosure *sc = &sd->closure[i];
+ for (int i = 0; i < sd->num_closure; i++) {
+ ShaderClosure *sc = &sd->closure[i];
- if(CLOSURE_IS_BSDF_TRANSMISSION(sc->type))
- eval += sc->weight;
- }
+ if (CLOSURE_IS_BSDF_TRANSMISSION(sc->type))
+ eval += sc->weight;
+ }
- return eval;
+ return eval;
}
ccl_device float3 shader_bsdf_subsurface(KernelGlobals *kg, ShaderData *sd)
{
- float3 eval = make_float3(0.0f, 0.0f, 0.0f);
+ float3 eval = make_float3(0.0f, 0.0f, 0.0f);
- for(int i = 0; i < sd->num_closure; i++) {
- ShaderClosure *sc = &sd->closure[i];
+ for (int i = 0; i < sd->num_closure; i++) {
+ ShaderClosure *sc = &sd->closure[i];
- if(CLOSURE_IS_BSSRDF(sc->type) || CLOSURE_IS_BSDF_BSSRDF(sc->type))
- eval += sc->weight;
- }
+ if (CLOSURE_IS_BSSRDF(sc->type) || CLOSURE_IS_BSDF_BSSRDF(sc->type))
+ eval += sc->weight;
+ }
- return eval;
+ return eval;
}
ccl_device float3 shader_bsdf_average_normal(KernelGlobals *kg, ShaderData *sd)
{
- float3 N = make_float3(0.0f, 0.0f, 0.0f);
+ float3 N = make_float3(0.0f, 0.0f, 0.0f);
- for(int i = 0; i < sd->num_closure; i++) {
- ShaderClosure *sc = &sd->closure[i];
- if(CLOSURE_IS_BSDF_OR_BSSRDF(sc->type))
- N += sc->N*fabsf(average(sc->weight));
- }
+ for (int i = 0; i < sd->num_closure; i++) {
+ ShaderClosure *sc = &sd->closure[i];
+ if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type))
+ N += sc->N * fabsf(average(sc->weight));
+ }
- return (is_zero(N))? sd->N : normalize(N);
+ return (is_zero(N)) ? sd->N : normalize(N);
}
ccl_device float3 shader_bsdf_ao(KernelGlobals *kg, ShaderData *sd, float ao_factor, float3 *N_)
{
- float3 eval = make_float3(0.0f, 0.0f, 0.0f);
- float3 N = make_float3(0.0f, 0.0f, 0.0f);
+ float3 eval = make_float3(0.0f, 0.0f, 0.0f);
+ float3 N = make_float3(0.0f, 0.0f, 0.0f);
- for(int i = 0; i < sd->num_closure; i++) {
- ShaderClosure *sc = &sd->closure[i];
+ for (int i = 0; i < sd->num_closure; i++) {
+ ShaderClosure *sc = &sd->closure[i];
- if(CLOSURE_IS_BSDF_DIFFUSE(sc->type)) {
- const DiffuseBsdf *bsdf = (const DiffuseBsdf*)sc;
- eval += sc->weight*ao_factor;
- N += bsdf->N*fabsf(average(sc->weight));
- }
- }
+ if (CLOSURE_IS_BSDF_DIFFUSE(sc->type)) {
+ const DiffuseBsdf *bsdf = (const DiffuseBsdf *)sc;
+ eval += sc->weight * ao_factor;
+ N += bsdf->N * fabsf(average(sc->weight));
+ }
+ }
- *N_ = (is_zero(N))? sd->N : normalize(N);
- return eval;
+ *N_ = (is_zero(N)) ? sd->N : normalize(N);
+ return eval;
}
#ifdef __SUBSURFACE__
ccl_device float3 shader_bssrdf_sum(ShaderData *sd, float3 *N_, float *texture_blur_)
{
- float3 eval = make_float3(0.0f, 0.0f, 0.0f);
- float3 N = make_float3(0.0f, 0.0f, 0.0f);
- float texture_blur = 0.0f, weight_sum = 0.0f;
+ float3 eval = make_float3(0.0f, 0.0f, 0.0f);
+ float3 N = make_float3(0.0f, 0.0f, 0.0f);
+ float texture_blur = 0.0f, weight_sum = 0.0f;
- for(int i = 0; i < sd->num_closure; i++) {
- ShaderClosure *sc = &sd->closure[i];
+ for (int i = 0; i < sd->num_closure; i++) {
+ ShaderClosure *sc = &sd->closure[i];
- if(CLOSURE_IS_BSSRDF(sc->type)) {
- const Bssrdf *bssrdf = (const Bssrdf*)sc;
- float avg_weight = fabsf(average(sc->weight));
+ if (CLOSURE_IS_BSSRDF(sc->type)) {
+ const Bssrdf *bssrdf = (const Bssrdf *)sc;
+ float avg_weight = fabsf(average(sc->weight));
- N += bssrdf->N*avg_weight;
- eval += sc->weight;
- texture_blur += bssrdf->texture_blur*avg_weight;
- weight_sum += avg_weight;
- }
- }
+ N += bssrdf->N * avg_weight;
+ eval += sc->weight;
+ texture_blur += bssrdf->texture_blur * avg_weight;
+ weight_sum += avg_weight;
+ }
+ }
- if(N_)
- *N_ = (is_zero(N))? sd->N: normalize(N);
+ if (N_)
+ *N_ = (is_zero(N)) ? sd->N : normalize(N);
- if(texture_blur_)
- *texture_blur_ = safe_divide(texture_blur, weight_sum);
+ if (texture_blur_)
+ *texture_blur_ = safe_divide(texture_blur, weight_sum);
- return eval;
+ return eval;
}
-#endif /* __SUBSURFACE__ */
+#endif /* __SUBSURFACE__ */
/* Constant emission optimization */
ccl_device bool shader_constant_emission_eval(KernelGlobals *kg, int shader, float3 *eval)
{
- int shader_index = shader & SHADER_MASK;
- int shader_flag = kernel_tex_fetch(__shaders, shader_index).flags;
+ int shader_index = shader & SHADER_MASK;
+ int shader_flag = kernel_tex_fetch(__shaders, shader_index).flags;
- if (shader_flag & SD_HAS_CONSTANT_EMISSION) {
- *eval = make_float3(
- kernel_tex_fetch(__shaders, shader_index).constant_emission[0],
- kernel_tex_fetch(__shaders, shader_index).constant_emission[1],
- kernel_tex_fetch(__shaders, shader_index).constant_emission[2]);
+ if (shader_flag & SD_HAS_CONSTANT_EMISSION) {
+ *eval = make_float3(kernel_tex_fetch(__shaders, shader_index).constant_emission[0],
+ kernel_tex_fetch(__shaders, shader_index).constant_emission[1],
+ kernel_tex_fetch(__shaders, shader_index).constant_emission[2]);
- return true;
- }
+ return true;
+ }
- return false;
+ return false;
}
/* Background */
ccl_device float3 shader_background_eval(ShaderData *sd)
{
- if(sd->flag & SD_EMISSION) {
- return sd->closure_emission_background;
- }
- else {
- return make_float3(0.0f, 0.0f, 0.0f);
- }
+ if (sd->flag & SD_EMISSION) {
+ return sd->closure_emission_background;
+ }
+ else {
+ return make_float3(0.0f, 0.0f, 0.0f);
+ }
}
/* Emission */
ccl_device float3 shader_emissive_eval(ShaderData *sd)
{
- if(sd->flag & SD_EMISSION) {
- return emissive_simple_eval(sd->Ng, sd->I) * sd->closure_emission_background;
- }
- else {
- return make_float3(0.0f, 0.0f, 0.0f);
- }
+ if (sd->flag & SD_EMISSION) {
+ return emissive_simple_eval(sd->Ng, sd->I) * sd->closure_emission_background;
+ }
+ else {
+ return make_float3(0.0f, 0.0f, 0.0f);
+ }
}
/* Holdout */
ccl_device float3 shader_holdout_eval(KernelGlobals *kg, ShaderData *sd)
{
- float3 weight = make_float3(0.0f, 0.0f, 0.0f);
+ float3 weight = make_float3(0.0f, 0.0f, 0.0f);
- for(int i = 0; i < sd->num_closure; i++) {
- ShaderClosure *sc = &sd->closure[i];
+ for (int i = 0; i < sd->num_closure; i++) {
+ ShaderClosure *sc = &sd->closure[i];
- if(CLOSURE_IS_HOLDOUT(sc->type))
- weight += sc->weight;
- }
+ if (CLOSURE_IS_HOLDOUT(sc->type))
+ weight += sc->weight;
+ }
- return weight;
+ return weight;
}
/* Surface Evaluation */
-ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd,
- ccl_addr_space PathState *state, int path_flag)
+ccl_device void shader_eval_surface(KernelGlobals *kg,
+ ShaderData *sd,
+ ccl_addr_space PathState *state,
+ int path_flag)
{
- PROFILING_INIT(kg, PROFILING_SHADER_EVAL);
-
- /* If path is being terminated, we are tracing a shadow ray or evaluating
- * emission, then we don't need to store closures. The emission and shadow
- * shader data also do not have a closure array to save GPU memory. */
- int max_closures;
- if(path_flag & (PATH_RAY_TERMINATE|PATH_RAY_SHADOW|PATH_RAY_EMISSION)) {
- max_closures = 0;
- }
- else {
- max_closures = kernel_data.integrator.max_closures;
- }
-
- sd->num_closure = 0;
- sd->num_closure_left = max_closures;
+ PROFILING_INIT(kg, PROFILING_SHADER_EVAL);
+
+ /* If path is being terminated, we are tracing a shadow ray or evaluating
+ * emission, then we don't need to store closures. The emission and shadow
+ * shader data also do not have a closure array to save GPU memory. */
+ int max_closures;
+ if (path_flag & (PATH_RAY_TERMINATE | PATH_RAY_SHADOW | PATH_RAY_EMISSION)) {
+ max_closures = 0;
+ }
+ else {
+ max_closures = kernel_data.integrator.max_closures;
+ }
+
+ sd->num_closure = 0;
+ sd->num_closure_left = max_closures;
#ifdef __OSL__
- if(kg->osl) {
- if (sd->object == OBJECT_NONE) {
- OSLShader::eval_background(kg, sd, state, path_flag);
- }
- else {
- OSLShader::eval_surface(kg, sd, state, path_flag);
- }
- }
- else
+ if (kg->osl) {
+ if (sd->object == OBJECT_NONE) {
+ OSLShader::eval_background(kg, sd, state, path_flag);
+ }
+ else {
+ OSLShader::eval_surface(kg, sd, state, path_flag);
+ }
+ }
+ else
#endif
- {
+ {
#ifdef __SVM__
- svm_eval_nodes(kg, sd, state, SHADER_TYPE_SURFACE, path_flag);
+ svm_eval_nodes(kg, sd, state, SHADER_TYPE_SURFACE, path_flag);
#else
- if(sd->object == OBJECT_NONE) {
- sd->closure_emission_background = make_float3(0.8f, 0.8f, 0.8f);
- sd->flag |= SD_EMISSION;
- }
- else {
- DiffuseBsdf *bsdf = (DiffuseBsdf*)bsdf_alloc(sd,
- sizeof(DiffuseBsdf),
- make_float3(0.8f, 0.8f, 0.8f));
- if(bsdf != NULL) {
- bsdf->N = sd->N;
- sd->flag |= bsdf_diffuse_setup(bsdf);
- }
- }
+ if (sd->object == OBJECT_NONE) {
+ sd->closure_emission_background = make_float3(0.8f, 0.8f, 0.8f);
+ sd->flag |= SD_EMISSION;
+ }
+ else {
+ DiffuseBsdf *bsdf = (DiffuseBsdf *)bsdf_alloc(
+ sd, sizeof(DiffuseBsdf), make_float3(0.8f, 0.8f, 0.8f));
+ if (bsdf != NULL) {
+ bsdf->N = sd->N;
+ sd->flag |= bsdf_diffuse_setup(bsdf);
+ }
+ }
#endif
- }
+ }
- if(sd->flag & SD_BSDF_NEEDS_LCG) {
- sd->lcg_state = lcg_state_init_addrspace(state, 0xb4bc3953);
- }
+ if (sd->flag & SD_BSDF_NEEDS_LCG) {
+ sd->lcg_state = lcg_state_init_addrspace(state, 0xb4bc3953);
+ }
}
/* Volume */
#ifdef __VOLUME__
-ccl_device_inline void _shader_volume_phase_multi_eval(const ShaderData *sd, const float3 omega_in, float *pdf,
- int skip_phase, BsdfEval *result_eval, float sum_pdf, float sum_sample_weight)
+ccl_device_inline void _shader_volume_phase_multi_eval(const ShaderData *sd,
+ const float3 omega_in,
+ float *pdf,
+ int skip_phase,
+ BsdfEval *result_eval,
+ float sum_pdf,
+ float sum_sample_weight)
{
- for(int i = 0; i < sd->num_closure; i++) {
- if(i == skip_phase)
- continue;
+ for (int i = 0; i < sd->num_closure; i++) {
+ if (i == skip_phase)
+ continue;
- const ShaderClosure *sc = &sd->closure[i];
+ const ShaderClosure *sc = &sd->closure[i];
- if(CLOSURE_IS_PHASE(sc->type)) {
- float phase_pdf = 0.0f;
- float3 eval = volume_phase_eval(sd, sc, omega_in, &phase_pdf);
+ if (CLOSURE_IS_PHASE(sc->type)) {
+ float phase_pdf = 0.0f;
+ float3 eval = volume_phase_eval(sd, sc, omega_in, &phase_pdf);
- if(phase_pdf != 0.0f) {
- bsdf_eval_accum(result_eval, sc->type, eval, 1.0f);
- sum_pdf += phase_pdf*sc->sample_weight;
- }
+ if (phase_pdf != 0.0f) {
+ bsdf_eval_accum(result_eval, sc->type, eval, 1.0f);
+ sum_pdf += phase_pdf * sc->sample_weight;
+ }
- sum_sample_weight += sc->sample_weight;
- }
- }
+ sum_sample_weight += sc->sample_weight;
+ }
+ }
- *pdf = (sum_sample_weight > 0.0f)? sum_pdf/sum_sample_weight: 0.0f;
+ *pdf = (sum_sample_weight > 0.0f) ? sum_pdf / sum_sample_weight : 0.0f;
}
-ccl_device void shader_volume_phase_eval(KernelGlobals *kg, const ShaderData *sd,
- const float3 omega_in, BsdfEval *eval, float *pdf)
+ccl_device void shader_volume_phase_eval(
+ KernelGlobals *kg, const ShaderData *sd, const float3 omega_in, BsdfEval *eval, float *pdf)
{
- PROFILING_INIT(kg, PROFILING_CLOSURE_VOLUME_EVAL);
+ PROFILING_INIT(kg, PROFILING_CLOSURE_VOLUME_EVAL);
- bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass);
+ bsdf_eval_init(
+ eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass);
- _shader_volume_phase_multi_eval(sd, omega_in, pdf, -1, eval, 0.0f, 0.0f);
+ _shader_volume_phase_multi_eval(sd, omega_in, pdf, -1, eval, 0.0f, 0.0f);
}
-ccl_device int shader_volume_phase_sample(KernelGlobals *kg, const ShaderData *sd,
- float randu, float randv, BsdfEval *phase_eval,
- float3 *omega_in, differential3 *domega_in, float *pdf)
+ccl_device int shader_volume_phase_sample(KernelGlobals *kg,
+ const ShaderData *sd,
+ float randu,
+ float randv,
+ BsdfEval *phase_eval,
+ float3 *omega_in,
+ differential3 *domega_in,
+ float *pdf)
{
- PROFILING_INIT(kg, PROFILING_CLOSURE_VOLUME_SAMPLE);
+ PROFILING_INIT(kg, PROFILING_CLOSURE_VOLUME_SAMPLE);
- int sampled = 0;
+ int sampled = 0;
- if(sd->num_closure > 1) {
- /* pick a phase closure based on sample weights */
- float sum = 0.0f;
+ if (sd->num_closure > 1) {
+ /* pick a phase closure based on sample weights */
+ float sum = 0.0f;
- for(sampled = 0; sampled < sd->num_closure; sampled++) {
- const ShaderClosure *sc = &sd->closure[sampled];
+ for (sampled = 0; sampled < sd->num_closure; sampled++) {
+ const ShaderClosure *sc = &sd->closure[sampled];
- if(CLOSURE_IS_PHASE(sc->type))
- sum += sc->sample_weight;
- }
+ if (CLOSURE_IS_PHASE(sc->type))
+ sum += sc->sample_weight;
+ }
- float r = randu*sum;
- float partial_sum = 0.0f;
+ float r = randu * sum;
+ float partial_sum = 0.0f;
- for(sampled = 0; sampled < sd->num_closure; sampled++) {
- const ShaderClosure *sc = &sd->closure[sampled];
+ for (sampled = 0; sampled < sd->num_closure; sampled++) {
+ const ShaderClosure *sc = &sd->closure[sampled];
- if(CLOSURE_IS_PHASE(sc->type)) {
- float next_sum = partial_sum + sc->sample_weight;
+ if (CLOSURE_IS_PHASE(sc->type)) {
+ float next_sum = partial_sum + sc->sample_weight;
- if(r <= next_sum) {
- /* Rescale to reuse for BSDF direction sample. */
- randu = (r - partial_sum) / sc->sample_weight;
- break;
- }
+ if (r <= next_sum) {
+ /* Rescale to reuse for BSDF direction sample. */
+ randu = (r - partial_sum) / sc->sample_weight;
+ break;
+ }
- partial_sum = next_sum;
- }
- }
+ partial_sum = next_sum;
+ }
+ }
- if(sampled == sd->num_closure) {
- *pdf = 0.0f;
- return LABEL_NONE;
- }
- }
+ if (sampled == sd->num_closure) {
+ *pdf = 0.0f;
+ return LABEL_NONE;
+ }
+ }
- /* todo: this isn't quite correct, we don't weight anisotropy properly
- * depending on color channels, even if this is perhaps not a common case */
- const ShaderClosure *sc = &sd->closure[sampled];
- int label;
- float3 eval;
+ /* todo: this isn't quite correct, we don't weight anisotropy properly
+ * depending on color channels, even if this is perhaps not a common case */
+ const ShaderClosure *sc = &sd->closure[sampled];
+ int label;
+ float3 eval;
- *pdf = 0.0f;
- label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
+ *pdf = 0.0f;
+ label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
- if(*pdf != 0.0f) {
- bsdf_eval_init(phase_eval, sc->type, eval, kernel_data.film.use_light_pass);
- }
+ if (*pdf != 0.0f) {
+ bsdf_eval_init(phase_eval, sc->type, eval, kernel_data.film.use_light_pass);
+ }
- return label;
+ return label;
}
-ccl_device int shader_phase_sample_closure(KernelGlobals *kg, const ShaderData *sd,
- const ShaderClosure *sc, float randu, float randv, BsdfEval *phase_eval,
- float3 *omega_in, differential3 *domega_in, float *pdf)
+ccl_device int shader_phase_sample_closure(KernelGlobals *kg,
+ const ShaderData *sd,
+ const ShaderClosure *sc,
+ float randu,
+ float randv,
+ BsdfEval *phase_eval,
+ float3 *omega_in,
+ differential3 *domega_in,
+ float *pdf)
{
- PROFILING_INIT(kg, PROFILING_CLOSURE_VOLUME_SAMPLE);
+ PROFILING_INIT(kg, PROFILING_CLOSURE_VOLUME_SAMPLE);
- int label;
- float3 eval;
+ int label;
+ float3 eval;
- *pdf = 0.0f;
- label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
+ *pdf = 0.0f;
+ label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
- if(*pdf != 0.0f)
- bsdf_eval_init(phase_eval, sc->type, eval, kernel_data.film.use_light_pass);
+ if (*pdf != 0.0f)
+ bsdf_eval_init(phase_eval, sc->type, eval, kernel_data.film.use_light_pass);
- return label;
+ return label;
}
/* Volume Evaluation */
@@ -1228,83 +1267,85 @@ ccl_device_inline void shader_eval_volume(KernelGlobals *kg,
ccl_addr_space VolumeStack *stack,
int path_flag)
{
- /* If path is being terminated, we are tracing a shadow ray or evaluating
- * emission, then we don't need to store closures. The emission and shadow
- * shader data also do not have a closure array to save GPU memory. */
- int max_closures;
- if(path_flag & (PATH_RAY_TERMINATE|PATH_RAY_SHADOW|PATH_RAY_EMISSION)) {
- max_closures = 0;
- }
- else {
- max_closures = kernel_data.integrator.max_closures;
- }
-
- /* reset closures once at the start, we will be accumulating the closures
- * for all volumes in the stack into a single array of closures */
- sd->num_closure = 0;
- sd->num_closure_left = max_closures;
- sd->flag = 0;
- sd->object_flag = 0;
-
- for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
- /* setup shaderdata from stack. it's mostly setup already in
- * shader_setup_from_volume, this switching should be quick */
- sd->object = stack[i].object;
- sd->lamp = LAMP_NONE;
- sd->shader = stack[i].shader;
-
- sd->flag &= ~SD_SHADER_FLAGS;
- sd->flag |= kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
- sd->object_flag &= ~SD_OBJECT_FLAGS;
-
- if(sd->object != OBJECT_NONE) {
- sd->object_flag |= kernel_tex_fetch(__object_flag, sd->object);
-
-#ifdef __OBJECT_MOTION__
- /* todo: this is inefficient for motion blur, we should be
- * caching matrices instead of recomputing them each step */
- shader_setup_object_transforms(kg, sd, sd->time);
-#endif
- }
-
- /* evaluate shader */
-#ifdef __SVM__
-# ifdef __OSL__
- if(kg->osl) {
- OSLShader::eval_volume(kg, sd, state, path_flag);
- }
- else
+ /* If path is being terminated, we are tracing a shadow ray or evaluating
+ * emission, then we don't need to store closures. The emission and shadow
+ * shader data also do not have a closure array to save GPU memory. */
+ int max_closures;
+ if (path_flag & (PATH_RAY_TERMINATE | PATH_RAY_SHADOW | PATH_RAY_EMISSION)) {
+ max_closures = 0;
+ }
+ else {
+ max_closures = kernel_data.integrator.max_closures;
+ }
+
+ /* reset closures once at the start, we will be accumulating the closures
+ * for all volumes in the stack into a single array of closures */
+ sd->num_closure = 0;
+ sd->num_closure_left = max_closures;
+ sd->flag = 0;
+ sd->object_flag = 0;
+
+ for (int i = 0; stack[i].shader != SHADER_NONE; i++) {
+ /* setup shaderdata from stack. it's mostly setup already in
+ * shader_setup_from_volume, this switching should be quick */
+ sd->object = stack[i].object;
+ sd->lamp = LAMP_NONE;
+ sd->shader = stack[i].shader;
+
+ sd->flag &= ~SD_SHADER_FLAGS;
+ sd->flag |= kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
+ sd->object_flag &= ~SD_OBJECT_FLAGS;
+
+ if (sd->object != OBJECT_NONE) {
+ sd->object_flag |= kernel_tex_fetch(__object_flag, sd->object);
+
+# ifdef __OBJECT_MOTION__
+ /* todo: this is inefficient for motion blur, we should be
+ * caching matrices instead of recomputing them each step */
+ shader_setup_object_transforms(kg, sd, sd->time);
+# endif
+ }
+
+ /* evaluate shader */
+# ifdef __SVM__
+# ifdef __OSL__
+ if (kg->osl) {
+ OSLShader::eval_volume(kg, sd, state, path_flag);
+ }
+ else
+# endif
+ {
+ svm_eval_nodes(kg, sd, state, SHADER_TYPE_VOLUME, path_flag);
+ }
# endif
- {
- svm_eval_nodes(kg, sd, state, SHADER_TYPE_VOLUME, path_flag);
- }
-#endif
- /* merge closures to avoid exceeding number of closures limit */
- if(i > 0)
- shader_merge_closures(sd);
- }
+ /* merge closures to avoid exceeding number of closures limit */
+ if (i > 0)
+ shader_merge_closures(sd);
+ }
}
-#endif /* __VOLUME__ */
+#endif /* __VOLUME__ */
/* Displacement Evaluation */
-ccl_device void shader_eval_displacement(KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state)
+ccl_device void shader_eval_displacement(KernelGlobals *kg,
+ ShaderData *sd,
+ ccl_addr_space PathState *state)
{
- sd->num_closure = 0;
- sd->num_closure_left = 0;
+ sd->num_closure = 0;
+ sd->num_closure_left = 0;
- /* this will modify sd->P */
+ /* this will modify sd->P */
#ifdef __SVM__
# ifdef __OSL__
- if(kg->osl)
- OSLShader::eval_displacement(kg, sd, state);
- else
+ if (kg->osl)
+ OSLShader::eval_displacement(kg, sd, state);
+ else
# endif
- {
- svm_eval_nodes(kg, sd, state, SHADER_TYPE_DISPLACEMENT, 0);
- }
+ {
+ svm_eval_nodes(kg, sd, state, SHADER_TYPE_DISPLACEMENT, 0);
+ }
#endif
}
@@ -1313,29 +1354,29 @@ ccl_device void shader_eval_displacement(KernelGlobals *kg, ShaderData *sd, ccl_
#ifdef __TRANSPARENT_SHADOWS__
ccl_device bool shader_transparent_shadow(KernelGlobals *kg, Intersection *isect)
{
- int prim = kernel_tex_fetch(__prim_index, isect->prim);
- int shader = 0;
+ int prim = kernel_tex_fetch(__prim_index, isect->prim);
+ int shader = 0;
-#ifdef __HAIR__
- if(kernel_tex_fetch(__prim_type, isect->prim) & PRIMITIVE_ALL_TRIANGLE) {
-#endif
- shader = kernel_tex_fetch(__tri_shader, prim);
-#ifdef __HAIR__
- }
- else {
- float4 str = kernel_tex_fetch(__curves, prim);
- shader = __float_as_int(str.z);
- }
-#endif
- int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
+# ifdef __HAIR__
+ if (kernel_tex_fetch(__prim_type, isect->prim) & PRIMITIVE_ALL_TRIANGLE) {
+# endif
+ shader = kernel_tex_fetch(__tri_shader, prim);
+# ifdef __HAIR__
+ }
+ else {
+ float4 str = kernel_tex_fetch(__curves, prim);
+ shader = __float_as_int(str.z);
+ }
+# endif
+ int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
- return (flag & SD_HAS_TRANSPARENT_SHADOW) != 0;
+ return (flag & SD_HAS_TRANSPARENT_SHADOW) != 0;
}
-#endif /* __TRANSPARENT_SHADOWS__ */
+#endif /* __TRANSPARENT_SHADOWS__ */
ccl_device float shader_cryptomatte_id(KernelGlobals *kg, int shader)
{
- return kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).cryptomatte_id;
+ return kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).cryptomatte_id;
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_shadow.h b/intern/cycles/kernel/kernel_shadow.h
index fafa3ad4bfa..6af1369feab 100644
--- a/intern/cycles/kernel/kernel_shadow.h
+++ b/intern/cycles/kernel/kernel_shadow.h
@@ -20,7 +20,7 @@ CCL_NAMESPACE_BEGIN
typedef struct VolumeState {
# ifdef __SPLIT_KERNEL__
# else
- PathState ps;
+ PathState ps;
# endif
} VolumeState;
@@ -28,77 +28,70 @@ typedef struct VolumeState {
# ifdef __SPLIT_KERNEL__
ccl_addr_space
# endif
-ccl_device_inline PathState *shadow_blocked_volume_path_state(
- KernelGlobals *kg,
- VolumeState *volume_state,
- ccl_addr_space PathState *state,
- ShaderData *sd,
- Ray *ray)
+ ccl_device_inline PathState *
+ shadow_blocked_volume_path_state(KernelGlobals *kg,
+ VolumeState *volume_state,
+ ccl_addr_space PathState *state,
+ ShaderData *sd,
+ Ray *ray)
{
# ifdef __SPLIT_KERNEL__
- ccl_addr_space PathState *ps =
- &kernel_split_state.state_shadow[ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0)];
+ ccl_addr_space PathState *ps =
+ &kernel_split_state.state_shadow[ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0)];
# else
- PathState *ps = &volume_state->ps;
+ PathState *ps = &volume_state->ps;
# endif
- *ps = *state;
- /* We are checking for shadow on the "other" side of the surface, so need
- * to discard volume we are currently at.
- */
- if(dot(sd->Ng, ray->D) < 0.0f) {
- kernel_volume_stack_enter_exit(kg, sd, ps->volume_stack);
- }
- return ps;
+ *ps = *state;
+ /* We are checking for shadow on the "other" side of the surface, so need
+ * to discard volume we are currently at.
+ */
+ if (dot(sd->Ng, ray->D) < 0.0f) {
+ kernel_volume_stack_enter_exit(kg, sd, ps->volume_stack);
+ }
+ return ps;
}
-#endif /* __VOLUME__ */
+#endif /* __VOLUME__ */
/* Attenuate throughput accordingly to the given intersection event.
* Returns true if the throughput is zero and traversal can be aborted.
*/
ccl_device_forceinline bool shadow_handle_transparent_isect(
- KernelGlobals *kg,
- ShaderData *shadow_sd,
- ccl_addr_space PathState *state,
-# ifdef __VOLUME__
- ccl_addr_space struct PathState *volume_state,
-# endif
- Intersection *isect,
- Ray *ray,
- float3 *throughput)
+ KernelGlobals *kg,
+ ShaderData *shadow_sd,
+ ccl_addr_space PathState *state,
+#ifdef __VOLUME__
+ ccl_addr_space struct PathState *volume_state,
+#endif
+ Intersection *isect,
+ Ray *ray,
+ float3 *throughput)
{
#ifdef __VOLUME__
- /* Attenuation between last surface and next surface. */
- if(volume_state->volume_stack[0].shader != SHADER_NONE) {
- Ray segment_ray = *ray;
- segment_ray.t = isect->t;
- kernel_volume_shadow(kg,
- shadow_sd,
- volume_state,
- &segment_ray,
- throughput);
- }
+ /* Attenuation between last surface and next surface. */
+ if (volume_state->volume_stack[0].shader != SHADER_NONE) {
+ Ray segment_ray = *ray;
+ segment_ray.t = isect->t;
+ kernel_volume_shadow(kg, shadow_sd, volume_state, &segment_ray, throughput);
+ }
#endif
- /* Setup shader data at surface. */
- shader_setup_from_ray(kg, shadow_sd, isect, ray);
- /* Attenuation from transparent surface. */
- if(!(shadow_sd->flag & SD_HAS_ONLY_VOLUME)) {
- path_state_modify_bounce(state, true);
- shader_eval_surface(kg,
- shadow_sd,
- state,
- PATH_RAY_SHADOW);
- path_state_modify_bounce(state, false);
- *throughput *= shader_bsdf_transparency(kg, shadow_sd);
- }
- /* Stop if all light is blocked. */
- if(is_zero(*throughput)) {
- return true;
- }
+ /* Setup shader data at surface. */
+ shader_setup_from_ray(kg, shadow_sd, isect, ray);
+ /* Attenuation from transparent surface. */
+ if (!(shadow_sd->flag & SD_HAS_ONLY_VOLUME)) {
+ path_state_modify_bounce(state, true);
+ shader_eval_surface(kg, shadow_sd, state, PATH_RAY_SHADOW);
+ path_state_modify_bounce(state, false);
+ *throughput *= shader_bsdf_transparency(kg, shadow_sd);
+ }
+ /* Stop if all light is blocked. */
+ if (is_zero(*throughput)) {
+ return true;
+ }
#ifdef __VOLUME__
- /* Exit/enter volume. */
- kernel_volume_stack_enter_exit(kg, shadow_sd, volume_state->volume_stack);
+ /* Exit/enter volume. */
+ kernel_volume_stack_enter_exit(kg, shadow_sd, volume_state->volume_stack);
#endif
- return false;
+ return false;
}
/* Special version which only handles opaque shadows. */
@@ -110,19 +103,15 @@ ccl_device bool shadow_blocked_opaque(KernelGlobals *kg,
Intersection *isect,
float3 *shadow)
{
- const bool blocked = scene_intersect(kg,
- *ray,
- visibility & PATH_RAY_SHADOW_OPAQUE,
- isect,
- NULL,
- 0.0f, 0.0f);
+ const bool blocked = scene_intersect(
+ kg, *ray, visibility & PATH_RAY_SHADOW_OPAQUE, isect, NULL, 0.0f, 0.0f);
#ifdef __VOLUME__
- if(!blocked && state->volume_stack[0].shader != SHADER_NONE) {
- /* Apply attenuation from current volume shader. */
- kernel_volume_shadow(kg, shadow_sd, state, ray, shadow);
- }
+ if (!blocked && state->volume_stack[0].shader != SHADER_NONE) {
+ /* Apply attenuation from current volume shader. */
+ kernel_volume_shadow(kg, shadow_sd, state, ray, shadow);
+ }
#endif
- return blocked;
+ return blocked;
}
#ifdef __TRANSPARENT_SHADOWS__
@@ -169,94 +158,80 @@ ccl_device bool shadow_blocked_transparent_all_loop(KernelGlobals *kg,
uint max_hits,
float3 *shadow)
{
- /* Intersect to find an opaque surface, or record all transparent
- * surface hits.
- */
- uint num_hits;
- const bool blocked = scene_intersect_shadow_all(kg,
- ray,
- hits,
- visibility,
- max_hits,
- &num_hits);
+ /* Intersect to find an opaque surface, or record all transparent
+ * surface hits.
+ */
+ uint num_hits;
+ const bool blocked = scene_intersect_shadow_all(kg, ray, hits, visibility, max_hits, &num_hits);
# ifdef __VOLUME__
- VolumeState volume_state;
+ VolumeState volume_state;
# endif
- /* If no opaque surface found but we did find transparent hits,
- * shade them.
- */
- if(!blocked && num_hits > 0) {
- float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
- float3 Pend = ray->P + ray->D*ray->t;
- float last_t = 0.0f;
- int bounce = state->transparent_bounce;
- Intersection *isect = hits;
+ /* If no opaque surface found but we did find transparent hits,
+ * shade them.
+ */
+ if (!blocked && num_hits > 0) {
+ float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
+ float3 Pend = ray->P + ray->D * ray->t;
+ float last_t = 0.0f;
+ int bounce = state->transparent_bounce;
+ Intersection *isect = hits;
# ifdef __VOLUME__
# ifdef __SPLIT_KERNEL__
- ccl_addr_space
+ ccl_addr_space
# endif
- PathState *ps = shadow_blocked_volume_path_state(kg,
- &volume_state,
- state,
- sd,
- ray);
+ PathState *ps = shadow_blocked_volume_path_state(kg, &volume_state, state, sd, ray);
# endif
- sort_intersections(hits, num_hits);
- for(int hit = 0; hit < num_hits; hit++, isect++) {
- /* Adjust intersection distance for moving ray forward. */
- float new_t = isect->t;
- isect->t -= last_t;
- /* Skip hit if we did not move forward, step by step raytracing
- * would have skipped it as well then.
- */
- if(last_t == new_t) {
- continue;
- }
- last_t = new_t;
- /* Attenuate the throughput. */
- if(shadow_handle_transparent_isect(kg,
- shadow_sd,
- state,
-#ifdef __VOLUME__
- ps,
-#endif
- isect,
- ray,
- &throughput))
- {
- return true;
- }
- /* Move ray forward. */
- ray->P = shadow_sd->P;
- if(ray->t != FLT_MAX) {
- ray->D = normalize_len(Pend - ray->P, &ray->t);
- }
- bounce++;
- }
+ sort_intersections(hits, num_hits);
+ for (int hit = 0; hit < num_hits; hit++, isect++) {
+ /* Adjust intersection distance for moving ray forward. */
+ float new_t = isect->t;
+ isect->t -= last_t;
+ /* Skip hit if we did not move forward, step by step raytracing
+ * would have skipped it as well then.
+ */
+ if (last_t == new_t) {
+ continue;
+ }
+ last_t = new_t;
+ /* Attenuate the throughput. */
+ if (shadow_handle_transparent_isect(kg,
+ shadow_sd,
+ state,
# ifdef __VOLUME__
- /* Attenuation for last line segment towards light. */
- if(ps->volume_stack[0].shader != SHADER_NONE) {
- kernel_volume_shadow(kg, shadow_sd, ps, ray, &throughput);
- }
+ ps,
# endif
- *shadow = throughput;
- return is_zero(throughput);
- }
+ isect,
+ ray,
+ &throughput)) {
+ return true;
+ }
+ /* Move ray forward. */
+ ray->P = shadow_sd->P;
+ if (ray->t != FLT_MAX) {
+ ray->D = normalize_len(Pend - ray->P, &ray->t);
+ }
+ bounce++;
+ }
# ifdef __VOLUME__
- if(!blocked && state->volume_stack[0].shader != SHADER_NONE) {
- /* Apply attenuation from current volume shader. */
+ /* Attenuation for last line segment towards light. */
+ if (ps->volume_stack[0].shader != SHADER_NONE) {
+ kernel_volume_shadow(kg, shadow_sd, ps, ray, &throughput);
+ }
+# endif
+ *shadow = throughput;
+ return is_zero(throughput);
+ }
+# ifdef __VOLUME__
+ if (!blocked && state->volume_stack[0].shader != SHADER_NONE) {
+ /* Apply attenuation from current volume shader. */
# ifdef __SPLIT_KERNEL__
- ccl_addr_space
+ ccl_addr_space
# endif
- PathState *ps = shadow_blocked_volume_path_state(kg,
- &volume_state,
- state,
- sd,
- ray);
- kernel_volume_shadow(kg, shadow_sd, ps, ray, shadow);
- }
+ PathState *ps = shadow_blocked_volume_path_state(kg, &volume_state, state, sd, ray);
+ kernel_volume_shadow(kg, shadow_sd, ps, ray, shadow);
+ }
# endif
- return blocked;
+ return blocked;
}
/* Here we do all device specific trickery before invoking actual traversal
@@ -272,43 +247,36 @@ ccl_device bool shadow_blocked_transparent_all(KernelGlobals *kg,
float3 *shadow)
{
# ifdef __SPLIT_KERNEL__
- Intersection hits_[SHADOW_STACK_MAX_HITS];
- Intersection *hits = &hits_[0];
+ Intersection hits_[SHADOW_STACK_MAX_HITS];
+ Intersection *hits = &hits_[0];
# elif defined(__KERNEL_CUDA__)
- Intersection *hits = kg->hits_stack;
+ Intersection *hits = kg->hits_stack;
# else
- Intersection hits_stack[SHADOW_STACK_MAX_HITS];
- Intersection *hits = hits_stack;
+ Intersection hits_stack[SHADOW_STACK_MAX_HITS];
+ Intersection *hits = hits_stack;
# endif
# ifndef __KERNEL_GPU__
- /* Prefer to use stack but use dynamic allocation if too deep max hits
- * we need max_hits + 1 storage space due to the logic in
- * scene_intersect_shadow_all which will first store and then check if
- * the limit is exceeded.
- *
- * Ignore this on GPU because of slow/unavailable malloc().
- */
- if(max_hits + 1 > SHADOW_STACK_MAX_HITS) {
- if(kg->transparent_shadow_intersections == NULL) {
- const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce;
- kg->transparent_shadow_intersections =
- (Intersection*)malloc(sizeof(Intersection)*(transparent_max_bounce + 1));
- }
- hits = kg->transparent_shadow_intersections;
- }
-# endif /* __KERNEL_GPU__ */
- /* Invoke actual traversal. */
- return shadow_blocked_transparent_all_loop(kg,
- sd,
- shadow_sd,
- state,
- visibility,
- ray,
- hits,
- max_hits,
- shadow);
+ /* Prefer to use stack but use dynamic allocation if too deep max hits
+ * we need max_hits + 1 storage space due to the logic in
+ * scene_intersect_shadow_all which will first store and then check if
+ * the limit is exceeded.
+ *
+ * Ignore this on GPU because of slow/unavailable malloc().
+ */
+ if (max_hits + 1 > SHADOW_STACK_MAX_HITS) {
+ if (kg->transparent_shadow_intersections == NULL) {
+ const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce;
+ kg->transparent_shadow_intersections = (Intersection *)malloc(sizeof(Intersection) *
+ (transparent_max_bounce + 1));
+ }
+ hits = kg->transparent_shadow_intersections;
+ }
+# endif /* __KERNEL_GPU__ */
+ /* Invoke actual traversal. */
+ return shadow_blocked_transparent_all_loop(
+ kg, sd, shadow_sd, state, visibility, ray, hits, max_hits, shadow);
}
-# endif /* __SHADOW_RECORD_ALL__ */
+# endif /* __SHADOW_RECORD_ALL__ */
# if defined(__KERNEL_GPU__) || !defined(__SHADOW_RECORD_ALL__)
/* Shadow function to compute how much light is blocked,
@@ -323,130 +291,100 @@ ccl_device bool shadow_blocked_transparent_all(KernelGlobals *kg,
/* This function is only implementing device-independent traversal logic
* which requires some precalculation done.
*/
-ccl_device bool shadow_blocked_transparent_stepped_loop(
- KernelGlobals *kg,
- ShaderData *sd,
- ShaderData *shadow_sd,
- ccl_addr_space PathState *state,
- const uint visibility,
- Ray *ray,
- Intersection *isect,
- const bool blocked,
- const bool is_transparent_isect,
- float3 *shadow)
+ccl_device bool shadow_blocked_transparent_stepped_loop(KernelGlobals *kg,
+ ShaderData *sd,
+ ShaderData *shadow_sd,
+ ccl_addr_space PathState *state,
+ const uint visibility,
+ Ray *ray,
+ Intersection *isect,
+ const bool blocked,
+ const bool is_transparent_isect,
+ float3 *shadow)
{
# ifdef __VOLUME__
- VolumeState volume_state;
+ VolumeState volume_state;
# endif
- if(blocked && is_transparent_isect) {
- float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
- float3 Pend = ray->P + ray->D*ray->t;
- int bounce = state->transparent_bounce;
+ if (blocked && is_transparent_isect) {
+ float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
+ float3 Pend = ray->P + ray->D * ray->t;
+ int bounce = state->transparent_bounce;
# ifdef __VOLUME__
# ifdef __SPLIT_KERNEL__
- ccl_addr_space
+ ccl_addr_space
# endif
- PathState *ps = shadow_blocked_volume_path_state(kg,
- &volume_state,
- state,
- sd,
- ray);
+ PathState *ps = shadow_blocked_volume_path_state(kg, &volume_state, state, sd, ray);
# endif
- for(;;) {
- if(bounce >= kernel_data.integrator.transparent_max_bounce) {
- return true;
- }
- if(!scene_intersect(kg,
- *ray,
- visibility & PATH_RAY_SHADOW_TRANSPARENT,
- isect,
- NULL,
- 0.0f, 0.0f))
- {
- break;
- }
- if(!shader_transparent_shadow(kg, isect)) {
- return true;
- }
- /* Attenuate the throughput. */
- if(shadow_handle_transparent_isect(kg,
- shadow_sd,
- state,
-#ifdef __VOLUME__
- ps,
-#endif
- isect,
- ray,
- &throughput))
- {
- return true;
- }
- /* Move ray forward. */
- ray->P = ray_offset(shadow_sd->P, -shadow_sd->Ng);
- if(ray->t != FLT_MAX) {
- ray->D = normalize_len(Pend - ray->P, &ray->t);
- }
- bounce++;
- }
+ for (;;) {
+ if (bounce >= kernel_data.integrator.transparent_max_bounce) {
+ return true;
+ }
+ if (!scene_intersect(
+ kg, *ray, visibility & PATH_RAY_SHADOW_TRANSPARENT, isect, NULL, 0.0f, 0.0f)) {
+ break;
+ }
+ if (!shader_transparent_shadow(kg, isect)) {
+ return true;
+ }
+ /* Attenuate the throughput. */
+ if (shadow_handle_transparent_isect(kg,
+ shadow_sd,
+ state,
+# ifdef __VOLUME__
+ ps,
+# endif
+ isect,
+ ray,
+ &throughput)) {
+ return true;
+ }
+ /* Move ray forward. */
+ ray->P = ray_offset(shadow_sd->P, -shadow_sd->Ng);
+ if (ray->t != FLT_MAX) {
+ ray->D = normalize_len(Pend - ray->P, &ray->t);
+ }
+ bounce++;
+ }
# ifdef __VOLUME__
- /* Attenuation for last line segment towards light. */
- if(ps->volume_stack[0].shader != SHADER_NONE) {
- kernel_volume_shadow(kg, shadow_sd, ps, ray, &throughput);
- }
+ /* Attenuation for last line segment towards light. */
+ if (ps->volume_stack[0].shader != SHADER_NONE) {
+ kernel_volume_shadow(kg, shadow_sd, ps, ray, &throughput);
+ }
# endif
- *shadow *= throughput;
- return is_zero(throughput);
- }
+ *shadow *= throughput;
+ return is_zero(throughput);
+ }
# ifdef __VOLUME__
- if(!blocked && state->volume_stack[0].shader != SHADER_NONE) {
- /* Apply attenuation from current volume shader. */
+ if (!blocked && state->volume_stack[0].shader != SHADER_NONE) {
+ /* Apply attenuation from current volume shader. */
# ifdef __SPLIT_KERNEL__
- ccl_addr_space
+ ccl_addr_space
# endif
- PathState *ps = shadow_blocked_volume_path_state(kg,
- &volume_state,
- state,
- sd,
- ray);
- kernel_volume_shadow(kg, shadow_sd, ps, ray, shadow);
- }
+ PathState *ps = shadow_blocked_volume_path_state(kg, &volume_state, state, sd, ray);
+ kernel_volume_shadow(kg, shadow_sd, ps, ray, shadow);
+ }
# endif
- return blocked;
+ return blocked;
}
-ccl_device bool shadow_blocked_transparent_stepped(
- KernelGlobals *kg,
- ShaderData *sd,
- ShaderData *shadow_sd,
- ccl_addr_space PathState *state,
- const uint visibility,
- Ray *ray,
- Intersection *isect,
- float3 *shadow)
+ccl_device bool shadow_blocked_transparent_stepped(KernelGlobals *kg,
+ ShaderData *sd,
+ ShaderData *shadow_sd,
+ ccl_addr_space PathState *state,
+ const uint visibility,
+ Ray *ray,
+ Intersection *isect,
+ float3 *shadow)
{
- bool blocked = scene_intersect(kg,
- *ray,
- visibility & PATH_RAY_SHADOW_OPAQUE,
- isect,
- NULL,
- 0.0f, 0.0f);
- bool is_transparent_isect = blocked
- ? shader_transparent_shadow(kg, isect)
- : false;
- return shadow_blocked_transparent_stepped_loop(kg,
- sd,
- shadow_sd,
- state,
- visibility,
- ray,
- isect,
- blocked,
- is_transparent_isect,
- shadow);
+ bool blocked = scene_intersect(
+ kg, *ray, visibility & PATH_RAY_SHADOW_OPAQUE, isect, NULL, 0.0f, 0.0f);
+ bool is_transparent_isect = blocked ? shader_transparent_shadow(kg, isect) : false;
+ return shadow_blocked_transparent_stepped_loop(
+ kg, sd, shadow_sd, state, visibility, ray, isect, blocked, is_transparent_isect, shadow);
}
-# endif /* __KERNEL_GPU__ || !__SHADOW_RECORD_ALL__ */
-#endif /* __TRANSPARENT_SHADOWS__ */
+# endif /* __KERNEL_GPU__ || !__SHADOW_RECORD_ALL__ */
+#endif /* __TRANSPARENT_SHADOWS__ */
ccl_device_inline bool shadow_blocked(KernelGlobals *kg,
ShaderData *sd,
@@ -455,100 +393,65 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg,
Ray *ray_input,
float3 *shadow)
{
- Ray *ray = ray_input;
- Intersection isect;
- /* Some common early checks. */
- *shadow = make_float3(1.0f, 1.0f, 1.0f);
- if(ray->t == 0.0f) {
- return false;
- }
+ Ray *ray = ray_input;
+ Intersection isect;
+ /* Some common early checks. */
+ *shadow = make_float3(1.0f, 1.0f, 1.0f);
+ if (ray->t == 0.0f) {
+ return false;
+ }
#ifdef __SHADOW_TRICKS__
- const uint visibility = (state->flag & PATH_RAY_SHADOW_CATCHER)
- ? PATH_RAY_SHADOW_NON_CATCHER
- : PATH_RAY_SHADOW;
+ const uint visibility = (state->flag & PATH_RAY_SHADOW_CATCHER) ? PATH_RAY_SHADOW_NON_CATCHER :
+ PATH_RAY_SHADOW;
#else
- const uint visibility = PATH_RAY_SHADOW;
+ const uint visibility = PATH_RAY_SHADOW;
#endif
- /* Do actual shadow shading. */
- /* First of all, we check if integrator requires transparent shadows.
- * if not, we use simplest and fastest ever way to calculate occlusion.
- */
+ /* Do actual shadow shading. */
+ /* First of all, we check if integrator requires transparent shadows.
+ * if not, we use simplest and fastest ever way to calculate occlusion.
+ */
#ifdef __TRANSPARENT_SHADOWS__
- if(!kernel_data.integrator.transparent_shadows)
+ if (!kernel_data.integrator.transparent_shadows)
#endif
- {
- return shadow_blocked_opaque(kg,
- shadow_sd,
- state,
- visibility,
- ray,
- &isect,
- shadow);
- }
+ {
+ return shadow_blocked_opaque(kg, shadow_sd, state, visibility, ray, &isect, shadow);
+ }
#ifdef __TRANSPARENT_SHADOWS__
# ifdef __SHADOW_RECORD_ALL__
- /* For the transparent shadows we try to use record-all logic on the
- * devices which supports this.
- */
- const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce;
- /* Check transparent bounces here, for volume scatter which can do
- * lighting before surface path termination is checked.
- */
- if(state->transparent_bounce >= transparent_max_bounce) {
- return true;
- }
- const uint max_hits = transparent_max_bounce - state->transparent_bounce - 1;
+ /* For the transparent shadows we try to use record-all logic on the
+ * devices which supports this.
+ */
+ const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce;
+ /* Check transparent bounces here, for volume scatter which can do
+ * lighting before surface path termination is checked.
+ */
+ if (state->transparent_bounce >= transparent_max_bounce) {
+ return true;
+ }
+ const uint max_hits = transparent_max_bounce - state->transparent_bounce - 1;
# ifdef __KERNEL_GPU__
- /* On GPU we do trickey with tracing opaque ray first, this avoids speed
- * regressions in some files.
- *
- * TODO(sergey): Check why using record-all behavior causes slowdown in such
- * cases. Could that be caused by a higher spill pressure?
- */
- const bool blocked = scene_intersect(kg,
- *ray,
- visibility & PATH_RAY_SHADOW_OPAQUE,
- &isect,
- NULL,
- 0.0f, 0.0f);
- const bool is_transparent_isect = blocked
- ? shader_transparent_shadow(kg, &isect)
- : false;
- if(!blocked || !is_transparent_isect ||
- max_hits + 1 >= SHADOW_STACK_MAX_HITS)
- {
- return shadow_blocked_transparent_stepped_loop(kg,
- sd,
- shadow_sd,
- state,
- visibility,
- ray,
- &isect,
- blocked,
- is_transparent_isect,
- shadow);
- }
-# endif /* __KERNEL_GPU__ */
- return shadow_blocked_transparent_all(kg,
- sd,
- shadow_sd,
- state,
- visibility,
- ray,
- max_hits,
- shadow);
+ /* On GPU we do trickey with tracing opaque ray first, this avoids speed
+ * regressions in some files.
+ *
+ * TODO(sergey): Check why using record-all behavior causes slowdown in such
+ * cases. Could that be caused by a higher spill pressure?
+ */
+ const bool blocked = scene_intersect(
+ kg, *ray, visibility & PATH_RAY_SHADOW_OPAQUE, &isect, NULL, 0.0f, 0.0f);
+ const bool is_transparent_isect = blocked ? shader_transparent_shadow(kg, &isect) : false;
+ if (!blocked || !is_transparent_isect || max_hits + 1 >= SHADOW_STACK_MAX_HITS) {
+ return shadow_blocked_transparent_stepped_loop(
+ kg, sd, shadow_sd, state, visibility, ray, &isect, blocked, is_transparent_isect, shadow);
+ }
+# endif /* __KERNEL_GPU__ */
+ return shadow_blocked_transparent_all(
+ kg, sd, shadow_sd, state, visibility, ray, max_hits, shadow);
# else /* __SHADOW_RECORD_ALL__ */
- /* Fallback to a slowest version which works on all devices. */
- return shadow_blocked_transparent_stepped(kg,
- sd,
- shadow_sd,
- state,
- visibility,
- ray,
- &isect,
- shadow);
-# endif /* __SHADOW_RECORD_ALL__ */
-#endif /* __TRANSPARENT_SHADOWS__ */
+ /* Fallback to a slowest version which works on all devices. */
+ return shadow_blocked_transparent_stepped(
+ kg, sd, shadow_sd, state, visibility, ray, &isect, shadow);
+# endif /* __SHADOW_RECORD_ALL__ */
+#endif /* __TRANSPARENT_SHADOWS__ */
}
#undef SHADOW_STACK_MAX_HITS
diff --git a/intern/cycles/kernel/kernel_subsurface.h b/intern/cycles/kernel/kernel_subsurface.h
index 96b717530ce..7510e50a962 100644
--- a/intern/cycles/kernel/kernel_subsurface.h
+++ b/intern/cycles/kernel/kernel_subsurface.h
@@ -22,317 +22,295 @@ CCL_NAMESPACE_BEGIN
* http://library.imageworks.com/pdfs/imageworks-library-BSSRDF-sampling.pdf
*/
-ccl_device_inline float3 subsurface_scatter_eval(ShaderData *sd,
- const ShaderClosure *sc,
- float disk_r,
- float r,
- bool all)
+ccl_device_inline float3
+subsurface_scatter_eval(ShaderData *sd, const ShaderClosure *sc, float disk_r, float r, bool all)
{
- /* this is the veach one-sample model with balance heuristic, some pdf
- * factors drop out when using balance heuristic weighting */
- float3 eval_sum = make_float3(0.0f, 0.0f, 0.0f);
- float pdf_sum = 0.0f;
- float sample_weight_inv = 0.0f;
+ /* this is the veach one-sample model with balance heuristic, some pdf
+ * factors drop out when using balance heuristic weighting */
+ float3 eval_sum = make_float3(0.0f, 0.0f, 0.0f);
+ float pdf_sum = 0.0f;
+ float sample_weight_inv = 0.0f;
- if(!all) {
- float sample_weight_sum = 0.0f;
+ if (!all) {
+ float sample_weight_sum = 0.0f;
- for(int i = 0; i < sd->num_closure; i++) {
- sc = &sd->closure[i];
+ for (int i = 0; i < sd->num_closure; i++) {
+ sc = &sd->closure[i];
- if(CLOSURE_IS_DISK_BSSRDF(sc->type)) {
- sample_weight_sum += sc->sample_weight;
- }
- }
+ if (CLOSURE_IS_DISK_BSSRDF(sc->type)) {
+ sample_weight_sum += sc->sample_weight;
+ }
+ }
- sample_weight_inv = 1.0f/sample_weight_sum;
- }
+ sample_weight_inv = 1.0f / sample_weight_sum;
+ }
- for(int i = 0; i < sd->num_closure; i++) {
- sc = &sd->closure[i];
+ for (int i = 0; i < sd->num_closure; i++) {
+ sc = &sd->closure[i];
- if(CLOSURE_IS_DISK_BSSRDF(sc->type)) {
- /* in case of branched path integrate we sample all bssrdf's once,
- * for path trace we pick one, so adjust pdf for that */
- float sample_weight = (all)? 1.0f: sc->sample_weight * sample_weight_inv;
+ if (CLOSURE_IS_DISK_BSSRDF(sc->type)) {
+ /* in case of branched path integrate we sample all bssrdf's once,
+ * for path trace we pick one, so adjust pdf for that */
+ float sample_weight = (all) ? 1.0f : sc->sample_weight * sample_weight_inv;
- /* compute pdf */
- float3 eval = bssrdf_eval(sc, r);
- float pdf = bssrdf_pdf(sc, disk_r);
+ /* compute pdf */
+ float3 eval = bssrdf_eval(sc, r);
+ float pdf = bssrdf_pdf(sc, disk_r);
- eval_sum += sc->weight * eval;
- pdf_sum += sample_weight * pdf;
- }
- }
+ eval_sum += sc->weight * eval;
+ pdf_sum += sample_weight * pdf;
+ }
+ }
- return (pdf_sum > 0.0f)? eval_sum / pdf_sum : make_float3(0.0f, 0.0f, 0.0f);
+ return (pdf_sum > 0.0f) ? eval_sum / pdf_sum : make_float3(0.0f, 0.0f, 0.0f);
}
/* replace closures with a single diffuse bsdf closure after scatter step */
-ccl_device void subsurface_scatter_setup_diffuse_bsdf(KernelGlobals *kg, ShaderData *sd, ClosureType type, float roughness, float3 weight, float3 N)
+ccl_device void subsurface_scatter_setup_diffuse_bsdf(
+ KernelGlobals *kg, ShaderData *sd, ClosureType type, float roughness, float3 weight, float3 N)
{
- sd->flag &= ~SD_CLOSURE_FLAGS;
- sd->num_closure = 0;
- sd->num_closure_left = kernel_data.integrator.max_closures;
+ sd->flag &= ~SD_CLOSURE_FLAGS;
+ sd->num_closure = 0;
+ sd->num_closure_left = kernel_data.integrator.max_closures;
#ifdef __PRINCIPLED__
- if(type == CLOSURE_BSSRDF_PRINCIPLED_ID ||
- type == CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID)
- {
- PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf*)bsdf_alloc(sd, sizeof(PrincipledDiffuseBsdf), weight);
-
- if(bsdf) {
- bsdf->N = N;
- bsdf->roughness = roughness;
- sd->flag |= bsdf_principled_diffuse_setup(bsdf);
-
- /* replace CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID with this special ID so render passes
- * can recognize it as not being a regular Disney principled diffuse closure */
- bsdf->type = CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID;
- }
- }
- else if(CLOSURE_IS_BSDF_BSSRDF(type) ||
- CLOSURE_IS_BSSRDF(type))
-#endif /* __PRINCIPLED__ */
- {
- DiffuseBsdf *bsdf = (DiffuseBsdf*)bsdf_alloc(sd, sizeof(DiffuseBsdf), weight);
-
- if(bsdf) {
- bsdf->N = N;
- sd->flag |= bsdf_diffuse_setup(bsdf);
-
- /* replace CLOSURE_BSDF_DIFFUSE_ID with this special ID so render passes
- * can recognize it as not being a regular diffuse closure */
- bsdf->type = CLOSURE_BSDF_BSSRDF_ID;
- }
- }
+ if (type == CLOSURE_BSSRDF_PRINCIPLED_ID || type == CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID) {
+ PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf *)bsdf_alloc(
+ sd, sizeof(PrincipledDiffuseBsdf), weight);
+
+ if (bsdf) {
+ bsdf->N = N;
+ bsdf->roughness = roughness;
+ sd->flag |= bsdf_principled_diffuse_setup(bsdf);
+
+ /* replace CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID with this special ID so render passes
+ * can recognize it as not being a regular Disney principled diffuse closure */
+ bsdf->type = CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID;
+ }
+ }
+ else if (CLOSURE_IS_BSDF_BSSRDF(type) || CLOSURE_IS_BSSRDF(type))
+#endif /* __PRINCIPLED__ */
+ {
+ DiffuseBsdf *bsdf = (DiffuseBsdf *)bsdf_alloc(sd, sizeof(DiffuseBsdf), weight);
+
+ if (bsdf) {
+ bsdf->N = N;
+ sd->flag |= bsdf_diffuse_setup(bsdf);
+
+ /* replace CLOSURE_BSDF_DIFFUSE_ID with this special ID so render passes
+ * can recognize it as not being a regular diffuse closure */
+ bsdf->type = CLOSURE_BSDF_BSSRDF_ID;
+ }
+ }
}
/* optionally do blurring of color and/or bump mapping, at the cost of a shader evaluation */
ccl_device float3 subsurface_color_pow(float3 color, float exponent)
{
- color = max(color, make_float3(0.0f, 0.0f, 0.0f));
-
- if(exponent == 1.0f) {
- /* nothing to do */
- }
- else if(exponent == 0.5f) {
- color.x = sqrtf(color.x);
- color.y = sqrtf(color.y);
- color.z = sqrtf(color.z);
- }
- else {
- color.x = powf(color.x, exponent);
- color.y = powf(color.y, exponent);
- color.z = powf(color.z, exponent);
- }
-
- return color;
+ color = max(color, make_float3(0.0f, 0.0f, 0.0f));
+
+ if (exponent == 1.0f) {
+ /* nothing to do */
+ }
+ else if (exponent == 0.5f) {
+ color.x = sqrtf(color.x);
+ color.y = sqrtf(color.y);
+ color.z = sqrtf(color.z);
+ }
+ else {
+ color.x = powf(color.x, exponent);
+ color.y = powf(color.y, exponent);
+ color.z = powf(color.z, exponent);
+ }
+
+ return color;
}
-ccl_device void subsurface_color_bump_blur(KernelGlobals *kg,
- ShaderData *sd,
- ccl_addr_space PathState *state,
- float3 *eval,
- float3 *N)
+ccl_device void subsurface_color_bump_blur(
+ KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state, float3 *eval, float3 *N)
{
- /* average color and texture blur at outgoing point */
- float texture_blur;
- float3 out_color = shader_bssrdf_sum(sd, NULL, &texture_blur);
-
- /* do we have bump mapping? */
- bool bump = (sd->flag & SD_HAS_BSSRDF_BUMP) != 0;
-
- if(bump || texture_blur > 0.0f) {
- /* average color and normal at incoming point */
- shader_eval_surface(kg, sd, state, state->flag);
- float3 in_color = shader_bssrdf_sum(sd, (bump)? N: NULL, NULL);
-
- /* we simply divide out the average color and multiply with the average
- * of the other one. we could try to do this per closure but it's quite
- * tricky to match closures between shader evaluations, their number and
- * order may change, this is simpler */
- if(texture_blur > 0.0f) {
- out_color = subsurface_color_pow(out_color, texture_blur);
- in_color = subsurface_color_pow(in_color, texture_blur);
-
- *eval *= safe_divide_color(in_color, out_color);
- }
- }
+ /* average color and texture blur at outgoing point */
+ float texture_blur;
+ float3 out_color = shader_bssrdf_sum(sd, NULL, &texture_blur);
+
+ /* do we have bump mapping? */
+ bool bump = (sd->flag & SD_HAS_BSSRDF_BUMP) != 0;
+
+ if (bump || texture_blur > 0.0f) {
+ /* average color and normal at incoming point */
+ shader_eval_surface(kg, sd, state, state->flag);
+ float3 in_color = shader_bssrdf_sum(sd, (bump) ? N : NULL, NULL);
+
+ /* we simply divide out the average color and multiply with the average
+ * of the other one. we could try to do this per closure but it's quite
+ * tricky to match closures between shader evaluations, their number and
+ * order may change, this is simpler */
+ if (texture_blur > 0.0f) {
+ out_color = subsurface_color_pow(out_color, texture_blur);
+ in_color = subsurface_color_pow(in_color, texture_blur);
+
+ *eval *= safe_divide_color(in_color, out_color);
+ }
+ }
}
/* Subsurface scattering step, from a point on the surface to other
* nearby points on the same object.
*/
-ccl_device_inline int subsurface_scatter_disk(
- KernelGlobals *kg,
- LocalIntersection *ss_isect,
- ShaderData *sd,
- const ShaderClosure *sc,
- uint *lcg_state,
- float disk_u,
- float disk_v,
- bool all)
+ccl_device_inline int subsurface_scatter_disk(KernelGlobals *kg,
+ LocalIntersection *ss_isect,
+ ShaderData *sd,
+ const ShaderClosure *sc,
+ uint *lcg_state,
+ float disk_u,
+ float disk_v,
+ bool all)
{
- /* pick random axis in local frame and point on disk */
- float3 disk_N, disk_T, disk_B;
- float pick_pdf_N, pick_pdf_T, pick_pdf_B;
-
- disk_N = sd->Ng;
- make_orthonormals(disk_N, &disk_T, &disk_B);
-
- if(disk_v < 0.5f) {
- pick_pdf_N = 0.5f;
- pick_pdf_T = 0.25f;
- pick_pdf_B = 0.25f;
- disk_v *= 2.0f;
- }
- else if(disk_v < 0.75f) {
- float3 tmp = disk_N;
- disk_N = disk_T;
- disk_T = tmp;
- pick_pdf_N = 0.25f;
- pick_pdf_T = 0.5f;
- pick_pdf_B = 0.25f;
- disk_v = (disk_v - 0.5f)*4.0f;
- }
- else {
- float3 tmp = disk_N;
- disk_N = disk_B;
- disk_B = tmp;
- pick_pdf_N = 0.25f;
- pick_pdf_T = 0.25f;
- pick_pdf_B = 0.5f;
- disk_v = (disk_v - 0.75f)*4.0f;
- }
-
- /* sample point on disk */
- float phi = M_2PI_F * disk_v;
- float disk_height, disk_r;
-
- bssrdf_sample(sc, disk_u, &disk_r, &disk_height);
-
- float3 disk_P = (disk_r*cosf(phi)) * disk_T + (disk_r*sinf(phi)) * disk_B;
-
- /* create ray */
+ /* pick random axis in local frame and point on disk */
+ float3 disk_N, disk_T, disk_B;
+ float pick_pdf_N, pick_pdf_T, pick_pdf_B;
+
+ disk_N = sd->Ng;
+ make_orthonormals(disk_N, &disk_T, &disk_B);
+
+ if (disk_v < 0.5f) {
+ pick_pdf_N = 0.5f;
+ pick_pdf_T = 0.25f;
+ pick_pdf_B = 0.25f;
+ disk_v *= 2.0f;
+ }
+ else if (disk_v < 0.75f) {
+ float3 tmp = disk_N;
+ disk_N = disk_T;
+ disk_T = tmp;
+ pick_pdf_N = 0.25f;
+ pick_pdf_T = 0.5f;
+ pick_pdf_B = 0.25f;
+ disk_v = (disk_v - 0.5f) * 4.0f;
+ }
+ else {
+ float3 tmp = disk_N;
+ disk_N = disk_B;
+ disk_B = tmp;
+ pick_pdf_N = 0.25f;
+ pick_pdf_T = 0.25f;
+ pick_pdf_B = 0.5f;
+ disk_v = (disk_v - 0.75f) * 4.0f;
+ }
+
+ /* sample point on disk */
+ float phi = M_2PI_F * disk_v;
+ float disk_height, disk_r;
+
+ bssrdf_sample(sc, disk_u, &disk_r, &disk_height);
+
+ float3 disk_P = (disk_r * cosf(phi)) * disk_T + (disk_r * sinf(phi)) * disk_B;
+
+ /* create ray */
#ifdef __SPLIT_KERNEL__
- Ray ray_object = ss_isect->ray;
- Ray *ray = &ray_object;
+ Ray ray_object = ss_isect->ray;
+ Ray *ray = &ray_object;
#else
- Ray *ray = &ss_isect->ray;
+ Ray *ray = &ss_isect->ray;
#endif
- ray->P = sd->P + disk_N*disk_height + disk_P;
- ray->D = -disk_N;
- ray->t = 2.0f*disk_height;
- ray->dP = sd->dP;
- ray->dD = differential3_zero();
- ray->time = sd->time;
-
- /* intersect with the same object. if multiple intersections are found it
- * will use at most BSSRDF_MAX_HITS hits, a random subset of all hits */
- scene_intersect_local(kg,
- *ray,
- ss_isect,
- sd->object,
- lcg_state,
- BSSRDF_MAX_HITS);
- int num_eval_hits = min(ss_isect->num_hits, BSSRDF_MAX_HITS);
-
- for(int hit = 0; hit < num_eval_hits; hit++) {
- /* Quickly retrieve P and Ng without setting up ShaderData. */
- float3 hit_P;
- if(sd->type & PRIMITIVE_TRIANGLE) {
- hit_P = triangle_refine_local(kg,
- sd,
- &ss_isect->hits[hit],
- ray);
- }
+ ray->P = sd->P + disk_N * disk_height + disk_P;
+ ray->D = -disk_N;
+ ray->t = 2.0f * disk_height;
+ ray->dP = sd->dP;
+ ray->dD = differential3_zero();
+ ray->time = sd->time;
+
+ /* intersect with the same object. if multiple intersections are found it
+ * will use at most BSSRDF_MAX_HITS hits, a random subset of all hits */
+ scene_intersect_local(kg, *ray, ss_isect, sd->object, lcg_state, BSSRDF_MAX_HITS);
+ int num_eval_hits = min(ss_isect->num_hits, BSSRDF_MAX_HITS);
+
+ for (int hit = 0; hit < num_eval_hits; hit++) {
+ /* Quickly retrieve P and Ng without setting up ShaderData. */
+ float3 hit_P;
+ if (sd->type & PRIMITIVE_TRIANGLE) {
+ hit_P = triangle_refine_local(kg, sd, &ss_isect->hits[hit], ray);
+ }
#ifdef __OBJECT_MOTION__
- else if(sd->type & PRIMITIVE_MOTION_TRIANGLE) {
- float3 verts[3];
- motion_triangle_vertices(
- kg,
- sd->object,
- kernel_tex_fetch(__prim_index, ss_isect->hits[hit].prim),
- sd->time,
- verts);
- hit_P = motion_triangle_refine_local(kg,
- sd,
- &ss_isect->hits[hit],
- ray,
- verts);
- }
-#endif /* __OBJECT_MOTION__ */
- else {
- ss_isect->weight[hit] = make_float3(0.0f, 0.0f, 0.0f);
- continue;
- }
-
- float3 hit_Ng = ss_isect->Ng[hit];
- if(ss_isect->hits[hit].object != OBJECT_NONE) {
- object_normal_transform(kg, sd, &hit_Ng);
- }
-
- /* Probability densities for local frame axes. */
- float pdf_N = pick_pdf_N * fabsf(dot(disk_N, hit_Ng));
- float pdf_T = pick_pdf_T * fabsf(dot(disk_T, hit_Ng));
- float pdf_B = pick_pdf_B * fabsf(dot(disk_B, hit_Ng));
-
- /* Multiple importance sample between 3 axes, power heuristic
- * found to be slightly better than balance heuristic. pdf_N
- * in the MIS weight and denominator cancelled out. */
- float w = pdf_N / (sqr(pdf_N) + sqr(pdf_T) + sqr(pdf_B));
- if(ss_isect->num_hits > BSSRDF_MAX_HITS) {
- w *= ss_isect->num_hits/(float)BSSRDF_MAX_HITS;
- }
-
- /* Real distance to sampled point. */
- float r = len(hit_P - sd->P);
-
- /* Evaluate profiles. */
- float3 eval = subsurface_scatter_eval(sd, sc, disk_r, r, all) * w;
-
- ss_isect->weight[hit] = eval;
- }
+ else if (sd->type & PRIMITIVE_MOTION_TRIANGLE) {
+ float3 verts[3];
+ motion_triangle_vertices(kg,
+ sd->object,
+ kernel_tex_fetch(__prim_index, ss_isect->hits[hit].prim),
+ sd->time,
+ verts);
+ hit_P = motion_triangle_refine_local(kg, sd, &ss_isect->hits[hit], ray, verts);
+ }
+#endif /* __OBJECT_MOTION__ */
+ else {
+ ss_isect->weight[hit] = make_float3(0.0f, 0.0f, 0.0f);
+ continue;
+ }
+
+ float3 hit_Ng = ss_isect->Ng[hit];
+ if (ss_isect->hits[hit].object != OBJECT_NONE) {
+ object_normal_transform(kg, sd, &hit_Ng);
+ }
+
+ /* Probability densities for local frame axes. */
+ float pdf_N = pick_pdf_N * fabsf(dot(disk_N, hit_Ng));
+ float pdf_T = pick_pdf_T * fabsf(dot(disk_T, hit_Ng));
+ float pdf_B = pick_pdf_B * fabsf(dot(disk_B, hit_Ng));
+
+ /* Multiple importance sample between 3 axes, power heuristic
+ * found to be slightly better than balance heuristic. pdf_N
+ * in the MIS weight and denominator cancelled out. */
+ float w = pdf_N / (sqr(pdf_N) + sqr(pdf_T) + sqr(pdf_B));
+ if (ss_isect->num_hits > BSSRDF_MAX_HITS) {
+ w *= ss_isect->num_hits / (float)BSSRDF_MAX_HITS;
+ }
+
+ /* Real distance to sampled point. */
+ float r = len(hit_P - sd->P);
+
+ /* Evaluate profiles. */
+ float3 eval = subsurface_scatter_eval(sd, sc, disk_r, r, all) * w;
+
+ ss_isect->weight[hit] = eval;
+ }
#ifdef __SPLIT_KERNEL__
- ss_isect->ray = *ray;
+ ss_isect->ray = *ray;
#endif
- return num_eval_hits;
+ return num_eval_hits;
}
-ccl_device_noinline void subsurface_scatter_multi_setup(
- KernelGlobals *kg,
- LocalIntersection* ss_isect,
- int hit,
- ShaderData *sd,
- ccl_addr_space PathState *state,
- ClosureType type,
- float roughness)
+ccl_device_noinline void subsurface_scatter_multi_setup(KernelGlobals *kg,
+ LocalIntersection *ss_isect,
+ int hit,
+ ShaderData *sd,
+ ccl_addr_space PathState *state,
+ ClosureType type,
+ float roughness)
{
#ifdef __SPLIT_KERNEL__
- Ray ray_object = ss_isect->ray;
- Ray *ray = &ray_object;
+ Ray ray_object = ss_isect->ray;
+ Ray *ray = &ray_object;
#else
- Ray *ray = &ss_isect->ray;
+ Ray *ray = &ss_isect->ray;
#endif
- /* Workaround for AMD GPU OpenCL compiler. Most probably cache bypass issue. */
+ /* Workaround for AMD GPU OpenCL compiler. Most probably cache bypass issue. */
#if defined(__SPLIT_KERNEL__) && defined(__KERNEL_OPENCL_AMD__) && defined(__KERNEL_GPU__)
- kernel_split_params.dummy_sd_flag = sd->flag;
+ kernel_split_params.dummy_sd_flag = sd->flag;
#endif
- /* Setup new shading point. */
- shader_setup_from_subsurface(kg, sd, &ss_isect->hits[hit], ray);
+ /* Setup new shading point. */
+ shader_setup_from_subsurface(kg, sd, &ss_isect->hits[hit], ray);
- /* Optionally blur colors and bump mapping. */
- float3 weight = ss_isect->weight[hit];
- float3 N = sd->N;
- subsurface_color_bump_blur(kg, sd, state, &weight, &N);
+ /* Optionally blur colors and bump mapping. */
+ float3 weight = ss_isect->weight[hit];
+ float3 N = sd->N;
+ subsurface_color_bump_blur(kg, sd, state, &weight, &N);
- /* Setup diffuse BSDF. */
- subsurface_scatter_setup_diffuse_bsdf(kg, sd, type, roughness, weight, N);
+ /* Setup diffuse BSDF. */
+ subsurface_scatter_setup_diffuse_bsdf(kg, sd, type, roughness, weight, N);
}
/* Random walk subsurface scattering.
@@ -340,196 +318,178 @@ ccl_device_noinline void subsurface_scatter_multi_setup(
* "Practical and Controllable Subsurface Scattering for Production Path
* Tracing". Matt Jen-Yuan Chiang, Peter Kutz, Brent Burley. SIGGRAPH 2016. */
-ccl_device void subsurface_random_walk_remap(
- const float A,
- const float d,
- float *sigma_t,
- float *sigma_s)
+ccl_device void subsurface_random_walk_remap(const float A,
+ const float d,
+ float *sigma_t,
+ float *sigma_s)
{
- /* Compute attenuation and scattering coefficients from albedo. */
- const float a = 1.0f - expf(A * (-5.09406f + A * (2.61188f - A * 4.31805f)));
- const float s = 1.9f - A + 3.5f * sqr(A - 0.8f);
+ /* Compute attenuation and scattering coefficients from albedo. */
+ const float a = 1.0f - expf(A * (-5.09406f + A * (2.61188f - A * 4.31805f)));
+ const float s = 1.9f - A + 3.5f * sqr(A - 0.8f);
- *sigma_t = 1.0f / fmaxf(d * s, 1e-16f);
- *sigma_s = *sigma_t * a;
+ *sigma_t = 1.0f / fmaxf(d * s, 1e-16f);
+ *sigma_s = *sigma_t * a;
}
-ccl_device void subsurface_random_walk_coefficients(
- const ShaderClosure *sc,
- float3 *sigma_t,
- float3 *sigma_s,
- float3 *weight)
+ccl_device void subsurface_random_walk_coefficients(const ShaderClosure *sc,
+ float3 *sigma_t,
+ float3 *sigma_s,
+ float3 *weight)
{
- const Bssrdf *bssrdf = (const Bssrdf*)sc;
- const float3 A = bssrdf->albedo;
- const float3 d = bssrdf->radius;
- float sigma_t_x, sigma_t_y, sigma_t_z;
- float sigma_s_x, sigma_s_y, sigma_s_z;
+ const Bssrdf *bssrdf = (const Bssrdf *)sc;
+ const float3 A = bssrdf->albedo;
+ const float3 d = bssrdf->radius;
+ float sigma_t_x, sigma_t_y, sigma_t_z;
+ float sigma_s_x, sigma_s_y, sigma_s_z;
- subsurface_random_walk_remap(A.x, d.x, &sigma_t_x, &sigma_s_x);
- subsurface_random_walk_remap(A.y, d.y, &sigma_t_y, &sigma_s_y);
- subsurface_random_walk_remap(A.z, d.z, &sigma_t_z, &sigma_s_z);
+ subsurface_random_walk_remap(A.x, d.x, &sigma_t_x, &sigma_s_x);
+ subsurface_random_walk_remap(A.y, d.y, &sigma_t_y, &sigma_s_y);
+ subsurface_random_walk_remap(A.z, d.z, &sigma_t_z, &sigma_s_z);
- *sigma_t = make_float3(sigma_t_x, sigma_t_y, sigma_t_z);
- *sigma_s = make_float3(sigma_s_x, sigma_s_y, sigma_s_z);
+ *sigma_t = make_float3(sigma_t_x, sigma_t_y, sigma_t_z);
+ *sigma_s = make_float3(sigma_s_x, sigma_s_y, sigma_s_z);
- /* Closure mixing and Fresnel weights separate from albedo. */
- *weight = safe_divide_color(bssrdf->weight, A);
+ /* Closure mixing and Fresnel weights separate from albedo. */
+ *weight = safe_divide_color(bssrdf->weight, A);
}
-ccl_device_noinline bool subsurface_random_walk(
- KernelGlobals *kg,
- LocalIntersection *ss_isect,
- ShaderData *sd,
- ccl_addr_space PathState *state,
- const ShaderClosure *sc,
- const float bssrdf_u,
- const float bssrdf_v)
+ccl_device_noinline bool subsurface_random_walk(KernelGlobals *kg,
+ LocalIntersection *ss_isect,
+ ShaderData *sd,
+ ccl_addr_space PathState *state,
+ const ShaderClosure *sc,
+ const float bssrdf_u,
+ const float bssrdf_v)
{
- /* Sample diffuse surface scatter into the object. */
- float3 D;
- float pdf;
- sample_cos_hemisphere(-sd->N, bssrdf_u, bssrdf_v, &D, &pdf);
- if(dot(-sd->Ng, D) <= 0.0f) {
- return 0;
- }
-
- /* Convert subsurface to volume coefficients. */
- float3 sigma_t, sigma_s;
- float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
- subsurface_random_walk_coefficients(sc, &sigma_t, &sigma_s, &throughput);
-
- /* Setup ray. */
+ /* Sample diffuse surface scatter into the object. */
+ float3 D;
+ float pdf;
+ sample_cos_hemisphere(-sd->N, bssrdf_u, bssrdf_v, &D, &pdf);
+ if (dot(-sd->Ng, D) <= 0.0f) {
+ return 0;
+ }
+
+ /* Convert subsurface to volume coefficients. */
+ float3 sigma_t, sigma_s;
+ float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
+ subsurface_random_walk_coefficients(sc, &sigma_t, &sigma_s, &throughput);
+
+ /* Setup ray. */
#ifdef __SPLIT_KERNEL__
- Ray ray_object = ss_isect->ray;
- Ray *ray = &ray_object;
+ Ray ray_object = ss_isect->ray;
+ Ray *ray = &ray_object;
#else
- Ray *ray = &ss_isect->ray;
+ Ray *ray = &ss_isect->ray;
#endif
- ray->P = ray_offset(sd->P, -sd->Ng);
- ray->D = D;
- ray->t = FLT_MAX;
- ray->time = sd->time;
-
- /* Modify state for RNGs, decorrelated from other paths. */
- uint prev_rng_offset = state->rng_offset;
- uint prev_rng_hash = state->rng_hash;
- state->rng_hash = cmj_hash(state->rng_hash + state->rng_offset, 0xdeadbeef);
-
- /* Random walk until we hit the surface again. */
- bool hit = false;
-
- for(int bounce = 0; bounce < BSSRDF_MAX_BOUNCES; bounce++) {
- /* Advance random number offset. */
- state->rng_offset += PRNG_BOUNCE_NUM;
-
- if(bounce > 0) {
- /* Sample scattering direction. */
- const float anisotropy = 0.0f;
- float scatter_u, scatter_v;
- path_state_rng_2D(kg, state, PRNG_BSDF_U, &scatter_u, &scatter_v);
- ray->D = henyey_greenstrein_sample(ray->D, anisotropy, scatter_u, scatter_v, NULL);
- }
-
- /* Sample color channel, use MIS with balance heuristic. */
- float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
- float3 albedo = safe_divide_color(sigma_s, sigma_t);
- float3 channel_pdf;
- int channel = kernel_volume_sample_channel(albedo, throughput, rphase, &channel_pdf);
-
- /* Distance sampling. */
- float rdist = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
- float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel);
- float t = -logf(1.0f - rdist)/sample_sigma_t;
-
- ray->t = t;
- scene_intersect_local(kg, *ray, ss_isect, sd->object, NULL, 1);
- hit = (ss_isect->num_hits > 0);
-
- if(hit) {
- /* Compute world space distance to surface hit. */
- float3 D = ray->D;
- object_inverse_dir_transform(kg, sd, &D);
- D = normalize(D) * ss_isect->hits[0].t;
- object_dir_transform(kg, sd, &D);
- t = len(D);
- }
-
- /* Advance to new scatter location. */
- ray->P += t * ray->D;
-
- /* Update throughput. */
- float3 transmittance = volume_color_transmittance(sigma_t, t);
- float pdf = dot(channel_pdf, (hit)? transmittance: sigma_t * transmittance);
- throughput *= ((hit)? transmittance: sigma_s * transmittance) / pdf;
-
- if(hit) {
- /* If we hit the surface, we are done. */
- break;
- }
-
- /* Russian roulette. */
- float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE);
- float probability = min(max3(fabs(throughput)), 1.0f);
- if(terminate >= probability) {
- break;
- }
- throughput /= probability;
- }
-
- kernel_assert(isfinite_safe(throughput.x) &&
- isfinite_safe(throughput.y) &&
- isfinite_safe(throughput.z));
-
- state->rng_offset = prev_rng_offset;
- state->rng_hash = prev_rng_hash;
-
- /* Return number of hits in ss_isect. */
- if(!hit) {
- return 0;
- }
-
- /* TODO: gain back performance lost from merging with disk BSSRDF. We
- * only need to return on hit so this indirect ray push/pop overhead
- * is not actually needed, but it does keep the code simpler. */
- ss_isect->weight[0] = throughput;
+ ray->P = ray_offset(sd->P, -sd->Ng);
+ ray->D = D;
+ ray->t = FLT_MAX;
+ ray->time = sd->time;
+
+ /* Modify state for RNGs, decorrelated from other paths. */
+ uint prev_rng_offset = state->rng_offset;
+ uint prev_rng_hash = state->rng_hash;
+ state->rng_hash = cmj_hash(state->rng_hash + state->rng_offset, 0xdeadbeef);
+
+ /* Random walk until we hit the surface again. */
+ bool hit = false;
+
+ for (int bounce = 0; bounce < BSSRDF_MAX_BOUNCES; bounce++) {
+ /* Advance random number offset. */
+ state->rng_offset += PRNG_BOUNCE_NUM;
+
+ if (bounce > 0) {
+ /* Sample scattering direction. */
+ const float anisotropy = 0.0f;
+ float scatter_u, scatter_v;
+ path_state_rng_2D(kg, state, PRNG_BSDF_U, &scatter_u, &scatter_v);
+ ray->D = henyey_greenstrein_sample(ray->D, anisotropy, scatter_u, scatter_v, NULL);
+ }
+
+ /* Sample color channel, use MIS with balance heuristic. */
+ float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
+ float3 albedo = safe_divide_color(sigma_s, sigma_t);
+ float3 channel_pdf;
+ int channel = kernel_volume_sample_channel(albedo, throughput, rphase, &channel_pdf);
+
+ /* Distance sampling. */
+ float rdist = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
+ float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel);
+ float t = -logf(1.0f - rdist) / sample_sigma_t;
+
+ ray->t = t;
+ scene_intersect_local(kg, *ray, ss_isect, sd->object, NULL, 1);
+ hit = (ss_isect->num_hits > 0);
+
+ if (hit) {
+ /* Compute world space distance to surface hit. */
+ float3 D = ray->D;
+ object_inverse_dir_transform(kg, sd, &D);
+ D = normalize(D) * ss_isect->hits[0].t;
+ object_dir_transform(kg, sd, &D);
+ t = len(D);
+ }
+
+ /* Advance to new scatter location. */
+ ray->P += t * ray->D;
+
+ /* Update throughput. */
+ float3 transmittance = volume_color_transmittance(sigma_t, t);
+ float pdf = dot(channel_pdf, (hit) ? transmittance : sigma_t * transmittance);
+ throughput *= ((hit) ? transmittance : sigma_s * transmittance) / pdf;
+
+ if (hit) {
+ /* If we hit the surface, we are done. */
+ break;
+ }
+
+ /* Russian roulette. */
+ float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE);
+ float probability = min(max3(fabs(throughput)), 1.0f);
+ if (terminate >= probability) {
+ break;
+ }
+ throughput /= probability;
+ }
+
+ kernel_assert(isfinite_safe(throughput.x) && isfinite_safe(throughput.y) &&
+ isfinite_safe(throughput.z));
+
+ state->rng_offset = prev_rng_offset;
+ state->rng_hash = prev_rng_hash;
+
+ /* Return number of hits in ss_isect. */
+ if (!hit) {
+ return 0;
+ }
+
+ /* TODO: gain back performance lost from merging with disk BSSRDF. We
+ * only need to return on hit so this indirect ray push/pop overhead
+ * is not actually needed, but it does keep the code simpler. */
+ ss_isect->weight[0] = throughput;
#ifdef __SPLIT_KERNEL__
- ss_isect->ray = *ray;
+ ss_isect->ray = *ray;
#endif
- return 1;
+ return 1;
}
-ccl_device_inline int subsurface_scatter_multi_intersect(
- KernelGlobals *kg,
- LocalIntersection *ss_isect,
- ShaderData *sd,
- ccl_addr_space PathState *state,
- const ShaderClosure *sc,
- uint *lcg_state,
- float bssrdf_u,
- float bssrdf_v,
- bool all)
+ccl_device_inline int subsurface_scatter_multi_intersect(KernelGlobals *kg,
+ LocalIntersection *ss_isect,
+ ShaderData *sd,
+ ccl_addr_space PathState *state,
+ const ShaderClosure *sc,
+ uint *lcg_state,
+ float bssrdf_u,
+ float bssrdf_v,
+ bool all)
{
- if(CLOSURE_IS_DISK_BSSRDF(sc->type)) {
- return subsurface_scatter_disk(kg,
- ss_isect,
- sd,
- sc,
- lcg_state,
- bssrdf_u,
- bssrdf_v,
- all);
- }
- else {
- return subsurface_random_walk(kg,
- ss_isect,
- sd,
- state,
- sc,
- bssrdf_u,
- bssrdf_v);
- }
+ if (CLOSURE_IS_DISK_BSSRDF(sc->type)) {
+ return subsurface_scatter_disk(kg, ss_isect, sd, sc, lcg_state, bssrdf_u, bssrdf_v, all);
+ }
+ else {
+ return subsurface_random_walk(kg, ss_isect, sd, state, sc, bssrdf_u, bssrdf_v);
+ }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index 4b1c8e82dfa..3f62b726b6a 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -42,26 +42,26 @@ CCL_NAMESPACE_BEGIN
/* Constants */
#define OBJECT_MOTION_PASS_SIZE 2
-#define FILTER_TABLE_SIZE 1024
-#define RAMP_TABLE_SIZE 256
-#define SHUTTER_TABLE_SIZE 256
+#define FILTER_TABLE_SIZE 1024
+#define RAMP_TABLE_SIZE 256
+#define SHUTTER_TABLE_SIZE 256
-#define BSSRDF_MIN_RADIUS 1e-8f
-#define BSSRDF_MAX_HITS 4
-#define BSSRDF_MAX_BOUNCES 256
-#define LOCAL_MAX_HITS 4
+#define BSSRDF_MIN_RADIUS 1e-8f
+#define BSSRDF_MAX_HITS 4
+#define BSSRDF_MAX_BOUNCES 256
+#define LOCAL_MAX_HITS 4
-#define VOLUME_BOUNDS_MAX 1024
+#define VOLUME_BOUNDS_MAX 1024
-#define BECKMANN_TABLE_SIZE 256
+#define BECKMANN_TABLE_SIZE 256
-#define SHADER_NONE (~0)
-#define OBJECT_NONE (~0)
-#define PRIM_NONE (~0)
-#define LAMP_NONE (~0)
-#define ID_NONE (0.0f)
+#define SHADER_NONE (~0)
+#define OBJECT_NONE (~0)
+#define PRIM_NONE (~0)
+#define LAMP_NONE (~0)
+#define ID_NONE (0.0f)
-#define VOLUME_STACK_SIZE 32
+#define VOLUME_STACK_SIZE 32
/* Split kernel constants */
#define WORK_POOL_SIZE_GPU 64
@@ -72,7 +72,6 @@ CCL_NAMESPACE_BEGIN
# define WORK_POOL_SIZE WORK_POOL_SIZE_CPU
#endif
-
#define SHADER_SORT_BLOCK_SIZE 2048
#ifdef __KERNEL_OPENCL__
@@ -137,16 +136,16 @@ CCL_NAMESPACE_BEGIN
# endif
# define __VOLUME_DECOUPLED__
# define __VOLUME_RECORD_ALL__
-#endif /* __KERNEL_CPU__ */
+#endif /* __KERNEL_CPU__ */
#ifdef __KERNEL_CUDA__
# ifdef __SPLIT_KERNEL__
# undef __BRANCHED_PATH__
# endif
-#endif /* __KERNEL_CUDA__ */
+#endif /* __KERNEL_CUDA__ */
#ifdef __KERNEL_OPENCL__
-#endif /* __KERNEL_OPENCL__ */
+#endif /* __KERNEL_OPENCL__ */
/* Scene-based selective features compilation. */
#ifdef __NO_CAMERA_MOTION__
@@ -202,273 +201,269 @@ CCL_NAMESPACE_BEGIN
/* Shader Evaluation */
typedef enum ShaderEvalType {
- SHADER_EVAL_DISPLACE,
- SHADER_EVAL_BACKGROUND,
- /* bake types */
- SHADER_EVAL_BAKE, /* no real shade, it's used in the code to
- * differentiate the type of shader eval from the above
- */
- /* data passes */
- SHADER_EVAL_NORMAL,
- SHADER_EVAL_UV,
- SHADER_EVAL_ROUGHNESS,
- SHADER_EVAL_DIFFUSE_COLOR,
- SHADER_EVAL_GLOSSY_COLOR,
- SHADER_EVAL_TRANSMISSION_COLOR,
- SHADER_EVAL_SUBSURFACE_COLOR,
- SHADER_EVAL_EMISSION,
-
- /* light passes */
- SHADER_EVAL_AO,
- SHADER_EVAL_COMBINED,
- SHADER_EVAL_SHADOW,
- SHADER_EVAL_DIFFUSE,
- SHADER_EVAL_GLOSSY,
- SHADER_EVAL_TRANSMISSION,
- SHADER_EVAL_SUBSURFACE,
-
- /* extra */
- SHADER_EVAL_ENVIRONMENT,
+ SHADER_EVAL_DISPLACE,
+ SHADER_EVAL_BACKGROUND,
+ /* bake types */
+ SHADER_EVAL_BAKE, /* no real shade, it's used in the code to
+ * differentiate the type of shader eval from the above
+ */
+ /* data passes */
+ SHADER_EVAL_NORMAL,
+ SHADER_EVAL_UV,
+ SHADER_EVAL_ROUGHNESS,
+ SHADER_EVAL_DIFFUSE_COLOR,
+ SHADER_EVAL_GLOSSY_COLOR,
+ SHADER_EVAL_TRANSMISSION_COLOR,
+ SHADER_EVAL_SUBSURFACE_COLOR,
+ SHADER_EVAL_EMISSION,
+
+ /* light passes */
+ SHADER_EVAL_AO,
+ SHADER_EVAL_COMBINED,
+ SHADER_EVAL_SHADOW,
+ SHADER_EVAL_DIFFUSE,
+ SHADER_EVAL_GLOSSY,
+ SHADER_EVAL_TRANSMISSION,
+ SHADER_EVAL_SUBSURFACE,
+
+ /* extra */
+ SHADER_EVAL_ENVIRONMENT,
} ShaderEvalType;
/* Path Tracing
* note we need to keep the u/v pairs at even values */
enum PathTraceDimension {
- PRNG_FILTER_U = 0,
- PRNG_FILTER_V = 1,
- PRNG_LENS_U = 2,
- PRNG_LENS_V = 3,
- PRNG_TIME = 4,
- PRNG_UNUSED_0 = 5,
- PRNG_UNUSED_1 = 6, /* for some reason (6, 7) is a bad sobol pattern */
- PRNG_UNUSED_2 = 7, /* with a low number of samples (< 64) */
- PRNG_BASE_NUM = 10,
-
- PRNG_BSDF_U = 0,
- PRNG_BSDF_V = 1,
- PRNG_LIGHT_U = 2,
- PRNG_LIGHT_V = 3,
- PRNG_LIGHT_TERMINATE = 4,
- PRNG_TERMINATE = 5,
- PRNG_PHASE_CHANNEL = 6,
- PRNG_SCATTER_DISTANCE = 7,
- PRNG_BOUNCE_NUM = 8,
-
- PRNG_BEVEL_U = 6, /* reuse volume dimension, correlation won't harm */
- PRNG_BEVEL_V = 7,
+ PRNG_FILTER_U = 0,
+ PRNG_FILTER_V = 1,
+ PRNG_LENS_U = 2,
+ PRNG_LENS_V = 3,
+ PRNG_TIME = 4,
+ PRNG_UNUSED_0 = 5,
+ PRNG_UNUSED_1 = 6, /* for some reason (6, 7) is a bad sobol pattern */
+ PRNG_UNUSED_2 = 7, /* with a low number of samples (< 64) */
+ PRNG_BASE_NUM = 10,
+
+ PRNG_BSDF_U = 0,
+ PRNG_BSDF_V = 1,
+ PRNG_LIGHT_U = 2,
+ PRNG_LIGHT_V = 3,
+ PRNG_LIGHT_TERMINATE = 4,
+ PRNG_TERMINATE = 5,
+ PRNG_PHASE_CHANNEL = 6,
+ PRNG_SCATTER_DISTANCE = 7,
+ PRNG_BOUNCE_NUM = 8,
+
+ PRNG_BEVEL_U = 6, /* reuse volume dimension, correlation won't harm */
+ PRNG_BEVEL_V = 7,
};
enum SamplingPattern {
- SAMPLING_PATTERN_SOBOL = 0,
- SAMPLING_PATTERN_CMJ = 1,
+ SAMPLING_PATTERN_SOBOL = 0,
+ SAMPLING_PATTERN_CMJ = 1,
- SAMPLING_NUM_PATTERNS,
+ SAMPLING_NUM_PATTERNS,
};
/* these flags values correspond to raytypes in osl.cpp, so keep them in sync! */
enum PathRayFlag {
- PATH_RAY_CAMERA = (1 << 0),
- PATH_RAY_REFLECT = (1 << 1),
- PATH_RAY_TRANSMIT = (1 << 2),
- PATH_RAY_DIFFUSE = (1 << 3),
- PATH_RAY_GLOSSY = (1 << 4),
- PATH_RAY_SINGULAR = (1 << 5),
- PATH_RAY_TRANSPARENT = (1 << 6),
-
- PATH_RAY_SHADOW_OPAQUE_NON_CATCHER = (1 << 7),
- PATH_RAY_SHADOW_OPAQUE_CATCHER = (1 << 8),
- PATH_RAY_SHADOW_OPAQUE = (PATH_RAY_SHADOW_OPAQUE_NON_CATCHER|PATH_RAY_SHADOW_OPAQUE_CATCHER),
- PATH_RAY_SHADOW_TRANSPARENT_NON_CATCHER = (1 << 9),
- PATH_RAY_SHADOW_TRANSPARENT_CATCHER = (1 << 10),
- PATH_RAY_SHADOW_TRANSPARENT = (PATH_RAY_SHADOW_TRANSPARENT_NON_CATCHER|PATH_RAY_SHADOW_TRANSPARENT_CATCHER),
- PATH_RAY_SHADOW_NON_CATCHER = (PATH_RAY_SHADOW_OPAQUE_NON_CATCHER|PATH_RAY_SHADOW_TRANSPARENT_NON_CATCHER),
- PATH_RAY_SHADOW = (PATH_RAY_SHADOW_OPAQUE|PATH_RAY_SHADOW_TRANSPARENT),
-
- PATH_RAY_CURVE = (1 << 11), /* visibility flag to define curve segments */
- PATH_RAY_VOLUME_SCATTER = (1 << 12), /* volume scattering */
-
- /* Special flag to tag unaligned BVH nodes. */
- PATH_RAY_NODE_UNALIGNED = (1 << 13),
-
- PATH_RAY_ALL_VISIBILITY = ((1 << 14)-1),
-
- /* Don't apply multiple importance sampling weights to emission from
- * lamp or surface hits, because they were not direct light sampled. */
- PATH_RAY_MIS_SKIP = (1 << 14),
- /* Diffuse bounce earlier in the path, skip SSS to improve performance
- * and avoid branching twice with disk sampling SSS. */
- PATH_RAY_DIFFUSE_ANCESTOR = (1 << 15),
- /* Single pass has been written. */
- PATH_RAY_SINGLE_PASS_DONE = (1 << 16),
- /* Ray is behind a shadow catcher .*/
- PATH_RAY_SHADOW_CATCHER = (1 << 17),
- /* Store shadow data for shadow catcher or denoising. */
- PATH_RAY_STORE_SHADOW_INFO = (1 << 18),
- /* Zero background alpha, for camera or transparent glass rays. */
- PATH_RAY_TRANSPARENT_BACKGROUND = (1 << 19),
- /* Terminate ray immediately at next bounce. */
- PATH_RAY_TERMINATE_IMMEDIATE = (1 << 20),
- /* Ray is to be terminated, but continue with transparent bounces and
- * emission as long as we encounter them. This is required to make the
- * MIS between direct and indirect light rays match, as shadow rays go
- * through transparent surfaces to reach emisison too. */
- PATH_RAY_TERMINATE_AFTER_TRANSPARENT = (1 << 21),
- /* Ray is to be terminated. */
- PATH_RAY_TERMINATE = (PATH_RAY_TERMINATE_IMMEDIATE|PATH_RAY_TERMINATE_AFTER_TRANSPARENT),
- /* Path and shader is being evaluated for direct lighting emission. */
- PATH_RAY_EMISSION = (1 << 22)
+ PATH_RAY_CAMERA = (1 << 0),
+ PATH_RAY_REFLECT = (1 << 1),
+ PATH_RAY_TRANSMIT = (1 << 2),
+ PATH_RAY_DIFFUSE = (1 << 3),
+ PATH_RAY_GLOSSY = (1 << 4),
+ PATH_RAY_SINGULAR = (1 << 5),
+ PATH_RAY_TRANSPARENT = (1 << 6),
+
+ PATH_RAY_SHADOW_OPAQUE_NON_CATCHER = (1 << 7),
+ PATH_RAY_SHADOW_OPAQUE_CATCHER = (1 << 8),
+ PATH_RAY_SHADOW_OPAQUE = (PATH_RAY_SHADOW_OPAQUE_NON_CATCHER | PATH_RAY_SHADOW_OPAQUE_CATCHER),
+ PATH_RAY_SHADOW_TRANSPARENT_NON_CATCHER = (1 << 9),
+ PATH_RAY_SHADOW_TRANSPARENT_CATCHER = (1 << 10),
+ PATH_RAY_SHADOW_TRANSPARENT = (PATH_RAY_SHADOW_TRANSPARENT_NON_CATCHER |
+ PATH_RAY_SHADOW_TRANSPARENT_CATCHER),
+ PATH_RAY_SHADOW_NON_CATCHER = (PATH_RAY_SHADOW_OPAQUE_NON_CATCHER |
+ PATH_RAY_SHADOW_TRANSPARENT_NON_CATCHER),
+ PATH_RAY_SHADOW = (PATH_RAY_SHADOW_OPAQUE | PATH_RAY_SHADOW_TRANSPARENT),
+
+ PATH_RAY_CURVE = (1 << 11), /* visibility flag to define curve segments */
+ PATH_RAY_VOLUME_SCATTER = (1 << 12), /* volume scattering */
+
+ /* Special flag to tag unaligned BVH nodes. */
+ PATH_RAY_NODE_UNALIGNED = (1 << 13),
+
+ PATH_RAY_ALL_VISIBILITY = ((1 << 14) - 1),
+
+ /* Don't apply multiple importance sampling weights to emission from
+ * lamp or surface hits, because they were not direct light sampled. */
+ PATH_RAY_MIS_SKIP = (1 << 14),
+ /* Diffuse bounce earlier in the path, skip SSS to improve performance
+ * and avoid branching twice with disk sampling SSS. */
+ PATH_RAY_DIFFUSE_ANCESTOR = (1 << 15),
+ /* Single pass has been written. */
+ PATH_RAY_SINGLE_PASS_DONE = (1 << 16),
+ /* Ray is behind a shadow catcher .*/
+ PATH_RAY_SHADOW_CATCHER = (1 << 17),
+ /* Store shadow data for shadow catcher or denoising. */
+ PATH_RAY_STORE_SHADOW_INFO = (1 << 18),
+ /* Zero background alpha, for camera or transparent glass rays. */
+ PATH_RAY_TRANSPARENT_BACKGROUND = (1 << 19),
+ /* Terminate ray immediately at next bounce. */
+ PATH_RAY_TERMINATE_IMMEDIATE = (1 << 20),
+ /* Ray is to be terminated, but continue with transparent bounces and
+ * emission as long as we encounter them. This is required to make the
+ * MIS between direct and indirect light rays match, as shadow rays go
+ * through transparent surfaces to reach emisison too. */
+ PATH_RAY_TERMINATE_AFTER_TRANSPARENT = (1 << 21),
+ /* Ray is to be terminated. */
+ PATH_RAY_TERMINATE = (PATH_RAY_TERMINATE_IMMEDIATE | PATH_RAY_TERMINATE_AFTER_TRANSPARENT),
+ /* Path and shader is being evaluated for direct lighting emission. */
+ PATH_RAY_EMISSION = (1 << 22)
};
/* Closure Label */
typedef enum ClosureLabel {
- LABEL_NONE = 0,
- LABEL_TRANSMIT = 1,
- LABEL_REFLECT = 2,
- LABEL_DIFFUSE = 4,
- LABEL_GLOSSY = 8,
- LABEL_SINGULAR = 16,
- LABEL_TRANSPARENT = 32,
- LABEL_VOLUME_SCATTER = 64,
- LABEL_TRANSMIT_TRANSPARENT = 128,
+ LABEL_NONE = 0,
+ LABEL_TRANSMIT = 1,
+ LABEL_REFLECT = 2,
+ LABEL_DIFFUSE = 4,
+ LABEL_GLOSSY = 8,
+ LABEL_SINGULAR = 16,
+ LABEL_TRANSPARENT = 32,
+ LABEL_VOLUME_SCATTER = 64,
+ LABEL_TRANSMIT_TRANSPARENT = 128,
} ClosureLabel;
/* Render Passes */
-#define PASS_NAME_JOIN(a, b) a ## _ ## b
+#define PASS_NAME_JOIN(a, b) a##_##b
#define PASSMASK(pass) (1 << ((PASS_NAME_JOIN(PASS, pass)) % 32))
-#define PASSMASK_COMPONENT(comp) (PASSMASK(PASS_NAME_JOIN(comp, DIRECT)) | \
- PASSMASK(PASS_NAME_JOIN(comp, INDIRECT)) | \
- PASSMASK(PASS_NAME_JOIN(comp, COLOR)))
+#define PASSMASK_COMPONENT(comp) \
+ (PASSMASK(PASS_NAME_JOIN(comp, DIRECT)) | PASSMASK(PASS_NAME_JOIN(comp, INDIRECT)) | \
+ PASSMASK(PASS_NAME_JOIN(comp, COLOR)))
typedef enum PassType {
- PASS_NONE = 0,
-
- /* Main passes */
- PASS_COMBINED = 1,
- PASS_DEPTH,
- PASS_NORMAL,
- PASS_UV,
- PASS_OBJECT_ID,
- PASS_MATERIAL_ID,
- PASS_MOTION,
- PASS_MOTION_WEIGHT,
+ PASS_NONE = 0,
+
+ /* Main passes */
+ PASS_COMBINED = 1,
+ PASS_DEPTH,
+ PASS_NORMAL,
+ PASS_UV,
+ PASS_OBJECT_ID,
+ PASS_MATERIAL_ID,
+ PASS_MOTION,
+ PASS_MOTION_WEIGHT,
#ifdef __KERNEL_DEBUG__
- PASS_BVH_TRAVERSED_NODES,
- PASS_BVH_TRAVERSED_INSTANCES,
- PASS_BVH_INTERSECTIONS,
- PASS_RAY_BOUNCES,
+ PASS_BVH_TRAVERSED_NODES,
+ PASS_BVH_TRAVERSED_INSTANCES,
+ PASS_BVH_INTERSECTIONS,
+ PASS_RAY_BOUNCES,
#endif
- PASS_RENDER_TIME,
- PASS_CRYPTOMATTE,
- PASS_CATEGORY_MAIN_END = 31,
-
- PASS_MIST = 32,
- PASS_EMISSION,
- PASS_BACKGROUND,
- PASS_AO,
- PASS_SHADOW,
- PASS_LIGHT, /* no real pass, used to force use_light_pass */
- PASS_DIFFUSE_DIRECT,
- PASS_DIFFUSE_INDIRECT,
- PASS_DIFFUSE_COLOR,
- PASS_GLOSSY_DIRECT,
- PASS_GLOSSY_INDIRECT,
- PASS_GLOSSY_COLOR,
- PASS_TRANSMISSION_DIRECT,
- PASS_TRANSMISSION_INDIRECT,
- PASS_TRANSMISSION_COLOR,
- PASS_SUBSURFACE_DIRECT,
- PASS_SUBSURFACE_INDIRECT,
- PASS_SUBSURFACE_COLOR,
- PASS_VOLUME_DIRECT,
- PASS_VOLUME_INDIRECT,
- /* No Scatter color since it's tricky to define what it would even mean. */
- PASS_CATEGORY_LIGHT_END = 63,
+ PASS_RENDER_TIME,
+ PASS_CRYPTOMATTE,
+ PASS_CATEGORY_MAIN_END = 31,
+
+ PASS_MIST = 32,
+ PASS_EMISSION,
+ PASS_BACKGROUND,
+ PASS_AO,
+ PASS_SHADOW,
+ PASS_LIGHT, /* no real pass, used to force use_light_pass */
+ PASS_DIFFUSE_DIRECT,
+ PASS_DIFFUSE_INDIRECT,
+ PASS_DIFFUSE_COLOR,
+ PASS_GLOSSY_DIRECT,
+ PASS_GLOSSY_INDIRECT,
+ PASS_GLOSSY_COLOR,
+ PASS_TRANSMISSION_DIRECT,
+ PASS_TRANSMISSION_INDIRECT,
+ PASS_TRANSMISSION_COLOR,
+ PASS_SUBSURFACE_DIRECT,
+ PASS_SUBSURFACE_INDIRECT,
+ PASS_SUBSURFACE_COLOR,
+ PASS_VOLUME_DIRECT,
+ PASS_VOLUME_INDIRECT,
+ /* No Scatter color since it's tricky to define what it would even mean. */
+ PASS_CATEGORY_LIGHT_END = 63,
} PassType;
#define PASS_ANY (~0)
typedef enum CryptomatteType {
- CRYPT_NONE = 0,
- CRYPT_OBJECT = (1 << 0),
- CRYPT_MATERIAL = (1 << 1),
- CRYPT_ASSET = (1 << 2),
- CRYPT_ACCURATE = (1 << 3),
+ CRYPT_NONE = 0,
+ CRYPT_OBJECT = (1 << 0),
+ CRYPT_MATERIAL = (1 << 1),
+ CRYPT_ASSET = (1 << 2),
+ CRYPT_ACCURATE = (1 << 3),
} CryptomatteType;
typedef enum DenoisingPassOffsets {
- DENOISING_PASS_NORMAL = 0,
- DENOISING_PASS_NORMAL_VAR = 3,
- DENOISING_PASS_ALBEDO = 6,
- DENOISING_PASS_ALBEDO_VAR = 9,
- DENOISING_PASS_DEPTH = 12,
- DENOISING_PASS_DEPTH_VAR = 13,
- DENOISING_PASS_SHADOW_A = 14,
- DENOISING_PASS_SHADOW_B = 17,
- DENOISING_PASS_COLOR = 20,
- DENOISING_PASS_COLOR_VAR = 23,
- DENOISING_PASS_CLEAN = 26,
-
- DENOISING_PASS_PREFILTERED_DEPTH = 0,
- DENOISING_PASS_PREFILTERED_NORMAL = 1,
- DENOISING_PASS_PREFILTERED_SHADOWING = 4,
- DENOISING_PASS_PREFILTERED_ALBEDO = 5,
- DENOISING_PASS_PREFILTERED_COLOR = 8,
- DENOISING_PASS_PREFILTERED_VARIANCE = 11,
- DENOISING_PASS_PREFILTERED_INTENSITY = 14,
-
- DENOISING_PASS_SIZE_BASE = 26,
- DENOISING_PASS_SIZE_CLEAN = 3,
- DENOISING_PASS_SIZE_PREFILTERED = 15,
+ DENOISING_PASS_NORMAL = 0,
+ DENOISING_PASS_NORMAL_VAR = 3,
+ DENOISING_PASS_ALBEDO = 6,
+ DENOISING_PASS_ALBEDO_VAR = 9,
+ DENOISING_PASS_DEPTH = 12,
+ DENOISING_PASS_DEPTH_VAR = 13,
+ DENOISING_PASS_SHADOW_A = 14,
+ DENOISING_PASS_SHADOW_B = 17,
+ DENOISING_PASS_COLOR = 20,
+ DENOISING_PASS_COLOR_VAR = 23,
+ DENOISING_PASS_CLEAN = 26,
+
+ DENOISING_PASS_PREFILTERED_DEPTH = 0,
+ DENOISING_PASS_PREFILTERED_NORMAL = 1,
+ DENOISING_PASS_PREFILTERED_SHADOWING = 4,
+ DENOISING_PASS_PREFILTERED_ALBEDO = 5,
+ DENOISING_PASS_PREFILTERED_COLOR = 8,
+ DENOISING_PASS_PREFILTERED_VARIANCE = 11,
+ DENOISING_PASS_PREFILTERED_INTENSITY = 14,
+
+ DENOISING_PASS_SIZE_BASE = 26,
+ DENOISING_PASS_SIZE_CLEAN = 3,
+ DENOISING_PASS_SIZE_PREFILTERED = 15,
} DenoisingPassOffsets;
typedef enum eBakePassFilter {
- BAKE_FILTER_NONE = 0,
- BAKE_FILTER_DIRECT = (1 << 0),
- BAKE_FILTER_INDIRECT = (1 << 1),
- BAKE_FILTER_COLOR = (1 << 2),
- BAKE_FILTER_DIFFUSE = (1 << 3),
- BAKE_FILTER_GLOSSY = (1 << 4),
- BAKE_FILTER_TRANSMISSION = (1 << 5),
- BAKE_FILTER_SUBSURFACE = (1 << 6),
- BAKE_FILTER_EMISSION = (1 << 7),
- BAKE_FILTER_AO = (1 << 8),
+ BAKE_FILTER_NONE = 0,
+ BAKE_FILTER_DIRECT = (1 << 0),
+ BAKE_FILTER_INDIRECT = (1 << 1),
+ BAKE_FILTER_COLOR = (1 << 2),
+ BAKE_FILTER_DIFFUSE = (1 << 3),
+ BAKE_FILTER_GLOSSY = (1 << 4),
+ BAKE_FILTER_TRANSMISSION = (1 << 5),
+ BAKE_FILTER_SUBSURFACE = (1 << 6),
+ BAKE_FILTER_EMISSION = (1 << 7),
+ BAKE_FILTER_AO = (1 << 8),
} eBakePassFilter;
typedef enum BakePassFilterCombos {
- BAKE_FILTER_COMBINED = (
- BAKE_FILTER_DIRECT |
- BAKE_FILTER_INDIRECT |
- BAKE_FILTER_DIFFUSE |
- BAKE_FILTER_GLOSSY |
- BAKE_FILTER_TRANSMISSION |
- BAKE_FILTER_SUBSURFACE |
- BAKE_FILTER_EMISSION |
- BAKE_FILTER_AO),
- BAKE_FILTER_DIFFUSE_DIRECT = (BAKE_FILTER_DIRECT | BAKE_FILTER_DIFFUSE),
- BAKE_FILTER_GLOSSY_DIRECT = (BAKE_FILTER_DIRECT | BAKE_FILTER_GLOSSY),
- BAKE_FILTER_TRANSMISSION_DIRECT = (BAKE_FILTER_DIRECT | BAKE_FILTER_TRANSMISSION),
- BAKE_FILTER_SUBSURFACE_DIRECT = (BAKE_FILTER_DIRECT | BAKE_FILTER_SUBSURFACE),
- BAKE_FILTER_DIFFUSE_INDIRECT = (BAKE_FILTER_INDIRECT | BAKE_FILTER_DIFFUSE),
- BAKE_FILTER_GLOSSY_INDIRECT = (BAKE_FILTER_INDIRECT | BAKE_FILTER_GLOSSY),
- BAKE_FILTER_TRANSMISSION_INDIRECT = (BAKE_FILTER_INDIRECT | BAKE_FILTER_TRANSMISSION),
- BAKE_FILTER_SUBSURFACE_INDIRECT = (BAKE_FILTER_INDIRECT | BAKE_FILTER_SUBSURFACE),
+ BAKE_FILTER_COMBINED = (BAKE_FILTER_DIRECT | BAKE_FILTER_INDIRECT | BAKE_FILTER_DIFFUSE |
+ BAKE_FILTER_GLOSSY | BAKE_FILTER_TRANSMISSION | BAKE_FILTER_SUBSURFACE |
+ BAKE_FILTER_EMISSION | BAKE_FILTER_AO),
+ BAKE_FILTER_DIFFUSE_DIRECT = (BAKE_FILTER_DIRECT | BAKE_FILTER_DIFFUSE),
+ BAKE_FILTER_GLOSSY_DIRECT = (BAKE_FILTER_DIRECT | BAKE_FILTER_GLOSSY),
+ BAKE_FILTER_TRANSMISSION_DIRECT = (BAKE_FILTER_DIRECT | BAKE_FILTER_TRANSMISSION),
+ BAKE_FILTER_SUBSURFACE_DIRECT = (BAKE_FILTER_DIRECT | BAKE_FILTER_SUBSURFACE),
+ BAKE_FILTER_DIFFUSE_INDIRECT = (BAKE_FILTER_INDIRECT | BAKE_FILTER_DIFFUSE),
+ BAKE_FILTER_GLOSSY_INDIRECT = (BAKE_FILTER_INDIRECT | BAKE_FILTER_GLOSSY),
+ BAKE_FILTER_TRANSMISSION_INDIRECT = (BAKE_FILTER_INDIRECT | BAKE_FILTER_TRANSMISSION),
+ BAKE_FILTER_SUBSURFACE_INDIRECT = (BAKE_FILTER_INDIRECT | BAKE_FILTER_SUBSURFACE),
} BakePassFilterCombos;
typedef enum DenoiseFlag {
- DENOISING_CLEAN_DIFFUSE_DIR = (1 << 0),
- DENOISING_CLEAN_DIFFUSE_IND = (1 << 1),
- DENOISING_CLEAN_GLOSSY_DIR = (1 << 2),
- DENOISING_CLEAN_GLOSSY_IND = (1 << 3),
- DENOISING_CLEAN_TRANSMISSION_DIR = (1 << 4),
- DENOISING_CLEAN_TRANSMISSION_IND = (1 << 5),
- DENOISING_CLEAN_SUBSURFACE_DIR = (1 << 6),
- DENOISING_CLEAN_SUBSURFACE_IND = (1 << 7),
- DENOISING_CLEAN_ALL_PASSES = (1 << 8)-1,
+ DENOISING_CLEAN_DIFFUSE_DIR = (1 << 0),
+ DENOISING_CLEAN_DIFFUSE_IND = (1 << 1),
+ DENOISING_CLEAN_GLOSSY_DIR = (1 << 2),
+ DENOISING_CLEAN_GLOSSY_IND = (1 << 3),
+ DENOISING_CLEAN_TRANSMISSION_DIR = (1 << 4),
+ DENOISING_CLEAN_TRANSMISSION_IND = (1 << 5),
+ DENOISING_CLEAN_SUBSURFACE_DIR = (1 << 6),
+ DENOISING_CLEAN_SUBSURFACE_IND = (1 << 7),
+ DENOISING_CLEAN_ALL_PASSES = (1 << 8) - 1,
} DenoiseFlag;
#ifdef __KERNEL_DEBUG__
@@ -476,173 +471,171 @@ typedef enum DenoiseFlag {
* really important here.
*/
typedef struct DebugData {
- int num_bvh_traversed_nodes;
- int num_bvh_traversed_instances;
- int num_bvh_intersections;
- int num_ray_bounces;
+ int num_bvh_traversed_nodes;
+ int num_bvh_traversed_instances;
+ int num_bvh_intersections;
+ int num_ray_bounces;
} DebugData;
#endif
typedef ccl_addr_space struct PathRadianceState {
#ifdef __PASSES__
- float3 diffuse;
- float3 glossy;
- float3 transmission;
- float3 subsurface;
- float3 scatter;
+ float3 diffuse;
+ float3 glossy;
+ float3 transmission;
+ float3 subsurface;
+ float3 scatter;
- float3 direct;
+ float3 direct;
#endif
} PathRadianceState;
typedef ccl_addr_space struct PathRadiance {
#ifdef __PASSES__
- int use_light_pass;
+ int use_light_pass;
#endif
- float transparent;
- float3 emission;
+ float transparent;
+ float3 emission;
#ifdef __PASSES__
- float3 background;
- float3 ao;
-
- float3 indirect;
- float3 direct_emission;
-
- float3 color_diffuse;
- float3 color_glossy;
- float3 color_transmission;
- float3 color_subsurface;
-
- float3 direct_diffuse;
- float3 direct_glossy;
- float3 direct_transmission;
- float3 direct_subsurface;
- float3 direct_scatter;
-
- float3 indirect_diffuse;
- float3 indirect_glossy;
- float3 indirect_transmission;
- float3 indirect_subsurface;
- float3 indirect_scatter;
-
- float4 shadow;
- float mist;
+ float3 background;
+ float3 ao;
+
+ float3 indirect;
+ float3 direct_emission;
+
+ float3 color_diffuse;
+ float3 color_glossy;
+ float3 color_transmission;
+ float3 color_subsurface;
+
+ float3 direct_diffuse;
+ float3 direct_glossy;
+ float3 direct_transmission;
+ float3 direct_subsurface;
+ float3 direct_scatter;
+
+ float3 indirect_diffuse;
+ float3 indirect_glossy;
+ float3 indirect_transmission;
+ float3 indirect_subsurface;
+ float3 indirect_scatter;
+
+ float4 shadow;
+ float mist;
#endif
- struct PathRadianceState state;
+ struct PathRadianceState state;
#ifdef __SHADOW_TRICKS__
- /* Total light reachable across the path, ignoring shadow blocked queries. */
- float3 path_total;
- /* Total light reachable across the path with shadow blocked queries
- * applied here.
- *
- * Dividing this figure by path_total will give estimate of shadow pass.
- */
- float3 path_total_shaded;
-
- /* Color of the background on which shadow is alpha-overed. */
- float3 shadow_background_color;
-
- /* Path radiance sum and throughput at the moment when ray hits shadow
- * catcher object.
- */
- float shadow_throughput;
-
- /* Accumulated transparency along the path after shadow catcher bounce. */
- float shadow_transparency;
-
- /* Indicate if any shadow catcher data is set. */
- int has_shadow_catcher;
+ /* Total light reachable across the path, ignoring shadow blocked queries. */
+ float3 path_total;
+ /* Total light reachable across the path with shadow blocked queries
+ * applied here.
+ *
+ * Dividing this figure by path_total will give estimate of shadow pass.
+ */
+ float3 path_total_shaded;
+
+ /* Color of the background on which shadow is alpha-overed. */
+ float3 shadow_background_color;
+
+ /* Path radiance sum and throughput at the moment when ray hits shadow
+ * catcher object.
+ */
+ float shadow_throughput;
+
+ /* Accumulated transparency along the path after shadow catcher bounce. */
+ float shadow_transparency;
+
+ /* Indicate if any shadow catcher data is set. */
+ int has_shadow_catcher;
#endif
#ifdef __DENOISING_FEATURES__
- float3 denoising_normal;
- float3 denoising_albedo;
- float denoising_depth;
-#endif /* __DENOISING_FEATURES__ */
+ float3 denoising_normal;
+ float3 denoising_albedo;
+ float denoising_depth;
+#endif /* __DENOISING_FEATURES__ */
#ifdef __KERNEL_DEBUG__
- DebugData debug_data;
-#endif /* __KERNEL_DEBUG__ */
+ DebugData debug_data;
+#endif /* __KERNEL_DEBUG__ */
} PathRadiance;
typedef struct BsdfEval {
#ifdef __PASSES__
- int use_light_pass;
+ int use_light_pass;
#endif
- float3 diffuse;
+ float3 diffuse;
#ifdef __PASSES__
- float3 glossy;
- float3 transmission;
- float3 transparent;
- float3 subsurface;
- float3 scatter;
+ float3 glossy;
+ float3 transmission;
+ float3 transparent;
+ float3 subsurface;
+ float3 scatter;
#endif
#ifdef __SHADOW_TRICKS__
- float3 sum_no_mis;
+ float3 sum_no_mis;
#endif
} BsdfEval;
/* Shader Flag */
typedef enum ShaderFlag {
- SHADER_SMOOTH_NORMAL = (1 << 31),
- SHADER_CAST_SHADOW = (1 << 30),
- SHADER_AREA_LIGHT = (1 << 29),
- SHADER_USE_MIS = (1 << 28),
- SHADER_EXCLUDE_DIFFUSE = (1 << 27),
- SHADER_EXCLUDE_GLOSSY = (1 << 26),
- SHADER_EXCLUDE_TRANSMIT = (1 << 25),
- SHADER_EXCLUDE_CAMERA = (1 << 24),
- SHADER_EXCLUDE_SCATTER = (1 << 23),
- SHADER_EXCLUDE_ANY = (SHADER_EXCLUDE_DIFFUSE|SHADER_EXCLUDE_GLOSSY|SHADER_EXCLUDE_TRANSMIT|SHADER_EXCLUDE_CAMERA|SHADER_EXCLUDE_SCATTER),
-
- SHADER_MASK = ~(SHADER_SMOOTH_NORMAL|SHADER_CAST_SHADOW|SHADER_AREA_LIGHT|SHADER_USE_MIS|SHADER_EXCLUDE_ANY)
+ SHADER_SMOOTH_NORMAL = (1 << 31),
+ SHADER_CAST_SHADOW = (1 << 30),
+ SHADER_AREA_LIGHT = (1 << 29),
+ SHADER_USE_MIS = (1 << 28),
+ SHADER_EXCLUDE_DIFFUSE = (1 << 27),
+ SHADER_EXCLUDE_GLOSSY = (1 << 26),
+ SHADER_EXCLUDE_TRANSMIT = (1 << 25),
+ SHADER_EXCLUDE_CAMERA = (1 << 24),
+ SHADER_EXCLUDE_SCATTER = (1 << 23),
+ SHADER_EXCLUDE_ANY = (SHADER_EXCLUDE_DIFFUSE | SHADER_EXCLUDE_GLOSSY | SHADER_EXCLUDE_TRANSMIT |
+ SHADER_EXCLUDE_CAMERA | SHADER_EXCLUDE_SCATTER),
+
+ SHADER_MASK = ~(SHADER_SMOOTH_NORMAL | SHADER_CAST_SHADOW | SHADER_AREA_LIGHT | SHADER_USE_MIS |
+ SHADER_EXCLUDE_ANY)
} ShaderFlag;
/* Light Type */
typedef enum LightType {
- LIGHT_POINT,
- LIGHT_DISTANT,
- LIGHT_BACKGROUND,
- LIGHT_AREA,
- LIGHT_SPOT,
- LIGHT_TRIANGLE
+ LIGHT_POINT,
+ LIGHT_DISTANT,
+ LIGHT_BACKGROUND,
+ LIGHT_AREA,
+ LIGHT_SPOT,
+ LIGHT_TRIANGLE
} LightType;
/* Camera Type */
-enum CameraType {
- CAMERA_PERSPECTIVE,
- CAMERA_ORTHOGRAPHIC,
- CAMERA_PANORAMA
-};
+enum CameraType { CAMERA_PERSPECTIVE, CAMERA_ORTHOGRAPHIC, CAMERA_PANORAMA };
/* Panorama Type */
enum PanoramaType {
- PANORAMA_EQUIRECTANGULAR = 0,
- PANORAMA_FISHEYE_EQUIDISTANT = 1,
- PANORAMA_FISHEYE_EQUISOLID = 2,
- PANORAMA_MIRRORBALL = 3,
+ PANORAMA_EQUIRECTANGULAR = 0,
+ PANORAMA_FISHEYE_EQUIDISTANT = 1,
+ PANORAMA_FISHEYE_EQUISOLID = 2,
+ PANORAMA_MIRRORBALL = 3,
- PANORAMA_NUM_TYPES,
+ PANORAMA_NUM_TYPES,
};
/* Differential */
typedef struct differential3 {
- float3 dx;
- float3 dy;
+ float3 dx;
+ float3 dy;
} differential3;
typedef struct differential {
- float dx;
- float dy;
+ float dx;
+ float dy;
} differential;
/* Ray */
@@ -657,21 +650,21 @@ typedef struct Ray {
* is fixed.
*/
#ifndef __KERNEL_OPENCL_AMD__
- float3 P; /* origin */
- float3 D; /* direction */
+ float3 P; /* origin */
+ float3 D; /* direction */
- float t; /* length of the ray */
- float time; /* time (for motion blur) */
+ float t; /* length of the ray */
+ float time; /* time (for motion blur) */
#else
- float t; /* length of the ray */
- float time; /* time (for motion blur) */
- float3 P; /* origin */
- float3 D; /* direction */
+ float t; /* length of the ray */
+ float time; /* time (for motion blur) */
+ float3 P; /* origin */
+ float3 D; /* direction */
#endif
#ifdef __RAY_DIFFERENTIALS__
- differential3 dP;
- differential3 dD;
+ differential3 dP;
+ differential3 dD;
#endif
} Ray;
@@ -679,42 +672,42 @@ typedef struct Ray {
typedef struct Intersection {
#ifdef __EMBREE__
- float3 Ng;
+ float3 Ng;
#endif
- float t, u, v;
- int prim;
- int object;
- int type;
+ float t, u, v;
+ int prim;
+ int object;
+ int type;
#ifdef __KERNEL_DEBUG__
- int num_traversed_nodes;
- int num_traversed_instances;
- int num_intersections;
+ int num_traversed_nodes;
+ int num_traversed_instances;
+ int num_intersections;
#endif
} Intersection;
/* Primitives */
typedef enum PrimitiveType {
- PRIMITIVE_NONE = 0,
- PRIMITIVE_TRIANGLE = (1 << 0),
- PRIMITIVE_MOTION_TRIANGLE = (1 << 1),
- PRIMITIVE_CURVE = (1 << 2),
- PRIMITIVE_MOTION_CURVE = (1 << 3),
- /* Lamp primitive is not included below on purpose,
- * since it is no real traceable primitive.
- */
- PRIMITIVE_LAMP = (1 << 4),
-
- PRIMITIVE_ALL_TRIANGLE = (PRIMITIVE_TRIANGLE|PRIMITIVE_MOTION_TRIANGLE),
- PRIMITIVE_ALL_CURVE = (PRIMITIVE_CURVE|PRIMITIVE_MOTION_CURVE),
- PRIMITIVE_ALL_MOTION = (PRIMITIVE_MOTION_TRIANGLE|PRIMITIVE_MOTION_CURVE),
- PRIMITIVE_ALL = (PRIMITIVE_ALL_TRIANGLE|PRIMITIVE_ALL_CURVE),
-
- /* Total number of different traceable primitives.
- * NOTE: This is an actual value, not a bitflag.
- */
- PRIMITIVE_NUM_TOTAL = 4,
+ PRIMITIVE_NONE = 0,
+ PRIMITIVE_TRIANGLE = (1 << 0),
+ PRIMITIVE_MOTION_TRIANGLE = (1 << 1),
+ PRIMITIVE_CURVE = (1 << 2),
+ PRIMITIVE_MOTION_CURVE = (1 << 3),
+ /* Lamp primitive is not included below on purpose,
+ * since it is no real traceable primitive.
+ */
+ PRIMITIVE_LAMP = (1 << 4),
+
+ PRIMITIVE_ALL_TRIANGLE = (PRIMITIVE_TRIANGLE | PRIMITIVE_MOTION_TRIANGLE),
+ PRIMITIVE_ALL_CURVE = (PRIMITIVE_CURVE | PRIMITIVE_MOTION_CURVE),
+ PRIMITIVE_ALL_MOTION = (PRIMITIVE_MOTION_TRIANGLE | PRIMITIVE_MOTION_CURVE),
+ PRIMITIVE_ALL = (PRIMITIVE_ALL_TRIANGLE | PRIMITIVE_ALL_CURVE),
+
+ /* Total number of different traceable primitives.
+ * NOTE: This is an actual value, not a bitflag.
+ */
+ PRIMITIVE_NUM_TOTAL = 4,
} PrimitiveType;
#define PRIMITIVE_PACK_SEGMENT(type, segment) ((segment << PRIMITIVE_NUM_TOTAL) | (type))
@@ -723,68 +716,68 @@ typedef enum PrimitiveType {
/* Attributes */
typedef enum AttributePrimitive {
- ATTR_PRIM_TRIANGLE = 0,
- ATTR_PRIM_CURVE,
- ATTR_PRIM_SUBD,
+ ATTR_PRIM_TRIANGLE = 0,
+ ATTR_PRIM_CURVE,
+ ATTR_PRIM_SUBD,
- ATTR_PRIM_TYPES
+ ATTR_PRIM_TYPES
} AttributePrimitive;
typedef enum AttributeElement {
- ATTR_ELEMENT_NONE,
- ATTR_ELEMENT_OBJECT,
- ATTR_ELEMENT_MESH,
- ATTR_ELEMENT_FACE,
- ATTR_ELEMENT_VERTEX,
- ATTR_ELEMENT_VERTEX_MOTION,
- ATTR_ELEMENT_CORNER,
- ATTR_ELEMENT_CORNER_BYTE,
- ATTR_ELEMENT_CURVE,
- ATTR_ELEMENT_CURVE_KEY,
- ATTR_ELEMENT_CURVE_KEY_MOTION,
- ATTR_ELEMENT_VOXEL
+ ATTR_ELEMENT_NONE,
+ ATTR_ELEMENT_OBJECT,
+ ATTR_ELEMENT_MESH,
+ ATTR_ELEMENT_FACE,
+ ATTR_ELEMENT_VERTEX,
+ ATTR_ELEMENT_VERTEX_MOTION,
+ ATTR_ELEMENT_CORNER,
+ ATTR_ELEMENT_CORNER_BYTE,
+ ATTR_ELEMENT_CURVE,
+ ATTR_ELEMENT_CURVE_KEY,
+ ATTR_ELEMENT_CURVE_KEY_MOTION,
+ ATTR_ELEMENT_VOXEL
} AttributeElement;
typedef enum AttributeStandard {
- ATTR_STD_NONE = 0,
- ATTR_STD_VERTEX_NORMAL,
- ATTR_STD_FACE_NORMAL,
- ATTR_STD_UV,
- ATTR_STD_UV_TANGENT,
- ATTR_STD_UV_TANGENT_SIGN,
- ATTR_STD_GENERATED,
- ATTR_STD_GENERATED_TRANSFORM,
- ATTR_STD_POSITION_UNDEFORMED,
- ATTR_STD_POSITION_UNDISPLACED,
- ATTR_STD_MOTION_VERTEX_POSITION,
- ATTR_STD_MOTION_VERTEX_NORMAL,
- ATTR_STD_PARTICLE,
- ATTR_STD_CURVE_INTERCEPT,
- ATTR_STD_CURVE_RANDOM,
- ATTR_STD_PTEX_FACE_ID,
- ATTR_STD_PTEX_UV,
- ATTR_STD_VOLUME_DENSITY,
- ATTR_STD_VOLUME_COLOR,
- ATTR_STD_VOLUME_FLAME,
- ATTR_STD_VOLUME_HEAT,
- ATTR_STD_VOLUME_TEMPERATURE,
- ATTR_STD_VOLUME_VELOCITY,
- ATTR_STD_POINTINESS,
- ATTR_STD_NUM,
-
- ATTR_STD_NOT_FOUND = ~0
+ ATTR_STD_NONE = 0,
+ ATTR_STD_VERTEX_NORMAL,
+ ATTR_STD_FACE_NORMAL,
+ ATTR_STD_UV,
+ ATTR_STD_UV_TANGENT,
+ ATTR_STD_UV_TANGENT_SIGN,
+ ATTR_STD_GENERATED,
+ ATTR_STD_GENERATED_TRANSFORM,
+ ATTR_STD_POSITION_UNDEFORMED,
+ ATTR_STD_POSITION_UNDISPLACED,
+ ATTR_STD_MOTION_VERTEX_POSITION,
+ ATTR_STD_MOTION_VERTEX_NORMAL,
+ ATTR_STD_PARTICLE,
+ ATTR_STD_CURVE_INTERCEPT,
+ ATTR_STD_CURVE_RANDOM,
+ ATTR_STD_PTEX_FACE_ID,
+ ATTR_STD_PTEX_UV,
+ ATTR_STD_VOLUME_DENSITY,
+ ATTR_STD_VOLUME_COLOR,
+ ATTR_STD_VOLUME_FLAME,
+ ATTR_STD_VOLUME_HEAT,
+ ATTR_STD_VOLUME_TEMPERATURE,
+ ATTR_STD_VOLUME_VELOCITY,
+ ATTR_STD_POINTINESS,
+ ATTR_STD_NUM,
+
+ ATTR_STD_NOT_FOUND = ~0
} AttributeStandard;
typedef enum AttributeFlag {
- ATTR_FINAL_SIZE = (1 << 0),
- ATTR_SUBDIVIDED = (1 << 1),
+ ATTR_FINAL_SIZE = (1 << 0),
+ ATTR_SUBDIVIDED = (1 << 1),
} AttributeFlag;
typedef struct AttributeDescriptor {
- AttributeElement element;
- NodeAttributeType type;
- uint flags; /* see enum AttributeFlag */
- int offset;
+ AttributeElement element;
+ NodeAttributeType type;
+ uint flags; /* see enum AttributeFlag */
+ int offset;
} AttributeDescriptor;
/* Closure data */
@@ -794,7 +787,7 @@ typedef struct AttributeDescriptor {
# define MAX_CLOSURE 1
# else
# ifndef __MAX_CLOSURE__
-# define MAX_CLOSURE 64
+# define MAX_CLOSURE 64
# else
# define MAX_CLOSURE __MAX_CLOSURE__
# endif
@@ -815,16 +808,18 @@ typedef struct AttributeDescriptor {
* we assume to be the maximum required alignment for any struct. */
#define SHADER_CLOSURE_BASE \
- float3 weight; \
- ClosureType type; \
- float sample_weight; \
- float3 N
+ float3 weight; \
+ ClosureType type; \
+ float sample_weight; \
+ float3 N
-typedef ccl_addr_space struct ccl_align(16) ShaderClosure {
- SHADER_CLOSURE_BASE;
+typedef ccl_addr_space struct ccl_align(16) ShaderClosure
+{
+ SHADER_CLOSURE_BASE;
- float data[10]; /* pad to 80 bytes */
-} ShaderClosure;
+ float data[10]; /* pad to 80 bytes */
+}
+ShaderClosure;
/* Shader Data
*
@@ -833,272 +828,253 @@ typedef ccl_addr_space struct ccl_align(16) ShaderClosure {
*/
enum ShaderDataFlag {
- /* Runtime flags. */
-
- /* Set when ray hits backside of surface. */
- SD_BACKFACING = (1 << 0),
- /* Shader has non-zero emission. */
- SD_EMISSION = (1 << 1),
- /* Shader has BSDF closure. */
- SD_BSDF = (1 << 2),
- /* Shader has non-singular BSDF closure. */
- SD_BSDF_HAS_EVAL = (1 << 3),
- /* Shader has BSSRDF closure. */
- SD_BSSRDF = (1 << 4),
- /* Shader has holdout closure. */
- SD_HOLDOUT = (1 << 5),
- /* Shader has non-zero volume extinction. */
- SD_EXTINCTION = (1 << 6),
- /* Shader has have volume phase (scatter) closure. */
- SD_SCATTER = (1 << 7),
- /* Shader has transparent closure. */
- SD_TRANSPARENT = (1 << 9),
- /* BSDF requires LCG for evaluation. */
- SD_BSDF_NEEDS_LCG = (1 << 10),
-
- SD_CLOSURE_FLAGS = (SD_EMISSION |
- SD_BSDF |
- SD_BSDF_HAS_EVAL |
- SD_BSSRDF |
- SD_HOLDOUT |
- SD_EXTINCTION |
- SD_SCATTER |
- SD_BSDF_NEEDS_LCG),
-
- /* Shader flags. */
-
- /* direct light sample */
- SD_USE_MIS = (1 << 16),
- /* Has transparent shadow. */
- SD_HAS_TRANSPARENT_SHADOW = (1 << 17),
- /* Has volume shader. */
- SD_HAS_VOLUME = (1 << 18),
- /* Has only volume shader, no surface. */
- SD_HAS_ONLY_VOLUME = (1 << 19),
- /* Has heterogeneous volume. */
- SD_HETEROGENEOUS_VOLUME = (1 << 20),
- /* BSSRDF normal uses bump. */
- SD_HAS_BSSRDF_BUMP = (1 << 21),
- /* Use equiangular volume sampling */
- SD_VOLUME_EQUIANGULAR = (1 << 22),
- /* Use multiple importance volume sampling. */
- SD_VOLUME_MIS = (1 << 23),
- /* Use cubic interpolation for voxels. */
- SD_VOLUME_CUBIC = (1 << 24),
- /* Has data connected to the displacement input or uses bump map. */
- SD_HAS_BUMP = (1 << 25),
- /* Has true displacement. */
- SD_HAS_DISPLACEMENT = (1 << 26),
- /* Has constant emission (value stored in __shaders) */
- SD_HAS_CONSTANT_EMISSION = (1 << 27),
- /* Needs to access attributes */
- SD_NEED_ATTRIBUTES = (1 << 28),
-
- SD_SHADER_FLAGS = (SD_USE_MIS |
- SD_HAS_TRANSPARENT_SHADOW |
- SD_HAS_VOLUME |
- SD_HAS_ONLY_VOLUME |
- SD_HETEROGENEOUS_VOLUME |
- SD_HAS_BSSRDF_BUMP |
- SD_VOLUME_EQUIANGULAR |
- SD_VOLUME_MIS |
- SD_VOLUME_CUBIC |
- SD_HAS_BUMP |
- SD_HAS_DISPLACEMENT |
- SD_HAS_CONSTANT_EMISSION |
- SD_NEED_ATTRIBUTES)
+ /* Runtime flags. */
+
+ /* Set when ray hits backside of surface. */
+ SD_BACKFACING = (1 << 0),
+ /* Shader has non-zero emission. */
+ SD_EMISSION = (1 << 1),
+ /* Shader has BSDF closure. */
+ SD_BSDF = (1 << 2),
+ /* Shader has non-singular BSDF closure. */
+ SD_BSDF_HAS_EVAL = (1 << 3),
+ /* Shader has BSSRDF closure. */
+ SD_BSSRDF = (1 << 4),
+ /* Shader has holdout closure. */
+ SD_HOLDOUT = (1 << 5),
+ /* Shader has non-zero volume extinction. */
+ SD_EXTINCTION = (1 << 6),
+ /* Shader has have volume phase (scatter) closure. */
+ SD_SCATTER = (1 << 7),
+ /* Shader has transparent closure. */
+ SD_TRANSPARENT = (1 << 9),
+ /* BSDF requires LCG for evaluation. */
+ SD_BSDF_NEEDS_LCG = (1 << 10),
+
+ SD_CLOSURE_FLAGS = (SD_EMISSION | SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSSRDF | SD_HOLDOUT |
+ SD_EXTINCTION | SD_SCATTER | SD_BSDF_NEEDS_LCG),
+
+ /* Shader flags. */
+
+ /* direct light sample */
+ SD_USE_MIS = (1 << 16),
+ /* Has transparent shadow. */
+ SD_HAS_TRANSPARENT_SHADOW = (1 << 17),
+ /* Has volume shader. */
+ SD_HAS_VOLUME = (1 << 18),
+ /* Has only volume shader, no surface. */
+ SD_HAS_ONLY_VOLUME = (1 << 19),
+ /* Has heterogeneous volume. */
+ SD_HETEROGENEOUS_VOLUME = (1 << 20),
+ /* BSSRDF normal uses bump. */
+ SD_HAS_BSSRDF_BUMP = (1 << 21),
+ /* Use equiangular volume sampling */
+ SD_VOLUME_EQUIANGULAR = (1 << 22),
+ /* Use multiple importance volume sampling. */
+ SD_VOLUME_MIS = (1 << 23),
+ /* Use cubic interpolation for voxels. */
+ SD_VOLUME_CUBIC = (1 << 24),
+ /* Has data connected to the displacement input or uses bump map. */
+ SD_HAS_BUMP = (1 << 25),
+ /* Has true displacement. */
+ SD_HAS_DISPLACEMENT = (1 << 26),
+ /* Has constant emission (value stored in __shaders) */
+ SD_HAS_CONSTANT_EMISSION = (1 << 27),
+ /* Needs to access attributes */
+ SD_NEED_ATTRIBUTES = (1 << 28),
+
+ SD_SHADER_FLAGS = (SD_USE_MIS | SD_HAS_TRANSPARENT_SHADOW | SD_HAS_VOLUME | SD_HAS_ONLY_VOLUME |
+ SD_HETEROGENEOUS_VOLUME | SD_HAS_BSSRDF_BUMP | SD_VOLUME_EQUIANGULAR |
+ SD_VOLUME_MIS | SD_VOLUME_CUBIC | SD_HAS_BUMP | SD_HAS_DISPLACEMENT |
+ SD_HAS_CONSTANT_EMISSION | SD_NEED_ATTRIBUTES)
};
- /* Object flags. */
+/* Object flags. */
enum ShaderDataObjectFlag {
- /* Holdout for camera rays. */
- SD_OBJECT_HOLDOUT_MASK = (1 << 0),
- /* Has object motion blur. */
- SD_OBJECT_MOTION = (1 << 1),
- /* Vertices have transform applied. */
- SD_OBJECT_TRANSFORM_APPLIED = (1 << 2),
- /* Vertices have negative scale applied. */
- SD_OBJECT_NEGATIVE_SCALE_APPLIED = (1 << 3),
- /* Object has a volume shader. */
- SD_OBJECT_HAS_VOLUME = (1 << 4),
- /* Object intersects AABB of an object with volume shader. */
- SD_OBJECT_INTERSECTS_VOLUME = (1 << 5),
- /* Has position for motion vertices. */
- SD_OBJECT_HAS_VERTEX_MOTION = (1 << 6),
- /* object is used to catch shadows */
- SD_OBJECT_SHADOW_CATCHER = (1 << 7),
- /* object has volume attributes */
- SD_OBJECT_HAS_VOLUME_ATTRIBUTES = (1 << 8),
-
- SD_OBJECT_FLAGS = (SD_OBJECT_HOLDOUT_MASK |
- SD_OBJECT_MOTION |
- SD_OBJECT_TRANSFORM_APPLIED |
- SD_OBJECT_NEGATIVE_SCALE_APPLIED |
- SD_OBJECT_HAS_VOLUME |
- SD_OBJECT_INTERSECTS_VOLUME |
- SD_OBJECT_SHADOW_CATCHER |
- SD_OBJECT_HAS_VOLUME_ATTRIBUTES)
+ /* Holdout for camera rays. */
+ SD_OBJECT_HOLDOUT_MASK = (1 << 0),
+ /* Has object motion blur. */
+ SD_OBJECT_MOTION = (1 << 1),
+ /* Vertices have transform applied. */
+ SD_OBJECT_TRANSFORM_APPLIED = (1 << 2),
+ /* Vertices have negative scale applied. */
+ SD_OBJECT_NEGATIVE_SCALE_APPLIED = (1 << 3),
+ /* Object has a volume shader. */
+ SD_OBJECT_HAS_VOLUME = (1 << 4),
+ /* Object intersects AABB of an object with volume shader. */
+ SD_OBJECT_INTERSECTS_VOLUME = (1 << 5),
+ /* Has position for motion vertices. */
+ SD_OBJECT_HAS_VERTEX_MOTION = (1 << 6),
+ /* object is used to catch shadows */
+ SD_OBJECT_SHADOW_CATCHER = (1 << 7),
+ /* object has volume attributes */
+ SD_OBJECT_HAS_VOLUME_ATTRIBUTES = (1 << 8),
+
+ SD_OBJECT_FLAGS = (SD_OBJECT_HOLDOUT_MASK | SD_OBJECT_MOTION | SD_OBJECT_TRANSFORM_APPLIED |
+ SD_OBJECT_NEGATIVE_SCALE_APPLIED | SD_OBJECT_HAS_VOLUME |
+ SD_OBJECT_INTERSECTS_VOLUME | SD_OBJECT_SHADOW_CATCHER |
+ SD_OBJECT_HAS_VOLUME_ATTRIBUTES)
};
typedef ccl_addr_space struct ShaderData {
- /* position */
- float3 P;
- /* smooth normal for shading */
- float3 N;
- /* true geometric normal */
- float3 Ng;
- /* view/incoming direction */
- float3 I;
- /* shader id */
- int shader;
- /* booleans describing shader, see ShaderDataFlag */
- int flag;
- /* booleans describing object of the shader, see ShaderDataObjectFlag */
- int object_flag;
-
- /* primitive id if there is one, ~0 otherwise */
- int prim;
-
- /* combined type and curve segment for hair */
- int type;
-
- /* parametric coordinates
- * - barycentric weights for triangles */
- float u;
- float v;
- /* object id if there is one, ~0 otherwise */
- int object;
- /* lamp id if there is one, ~0 otherwise */
- int lamp;
-
- /* motion blur sample time */
- float time;
-
- /* length of the ray being shaded */
- float ray_length;
+ /* position */
+ float3 P;
+ /* smooth normal for shading */
+ float3 N;
+ /* true geometric normal */
+ float3 Ng;
+ /* view/incoming direction */
+ float3 I;
+ /* shader id */
+ int shader;
+ /* booleans describing shader, see ShaderDataFlag */
+ int flag;
+ /* booleans describing object of the shader, see ShaderDataObjectFlag */
+ int object_flag;
+
+ /* primitive id if there is one, ~0 otherwise */
+ int prim;
+
+ /* combined type and curve segment for hair */
+ int type;
+
+ /* parametric coordinates
+ * - barycentric weights for triangles */
+ float u;
+ float v;
+ /* object id if there is one, ~0 otherwise */
+ int object;
+ /* lamp id if there is one, ~0 otherwise */
+ int lamp;
+
+ /* motion blur sample time */
+ float time;
+
+ /* length of the ray being shaded */
+ float ray_length;
#ifdef __RAY_DIFFERENTIALS__
- /* differential of P. these are orthogonal to Ng, not N */
- differential3 dP;
- /* differential of I */
- differential3 dI;
- /* differential of u, v */
- differential du;
- differential dv;
+ /* differential of P. these are orthogonal to Ng, not N */
+ differential3 dP;
+ /* differential of I */
+ differential3 dI;
+ /* differential of u, v */
+ differential du;
+ differential dv;
#endif
#ifdef __DPDU__
- /* differential of P w.r.t. parametric coordinates. note that dPdu is
- * not readily suitable as a tangent for shading on triangles. */
- float3 dPdu;
- float3 dPdv;
+ /* differential of P w.r.t. parametric coordinates. note that dPdu is
+ * not readily suitable as a tangent for shading on triangles. */
+ float3 dPdu;
+ float3 dPdv;
#endif
#ifdef __OBJECT_MOTION__
- /* object <-> world space transformations, cached to avoid
- * re-interpolating them constantly for shading */
- Transform ob_tfm;
- Transform ob_itfm;
+ /* object <-> world space transformations, cached to avoid
+ * re-interpolating them constantly for shading */
+ Transform ob_tfm;
+ Transform ob_itfm;
#endif
- /* ray start position, only set for backgrounds */
- float3 ray_P;
- differential3 ray_dP;
+ /* ray start position, only set for backgrounds */
+ float3 ray_P;
+ differential3 ray_dP;
#ifdef __OSL__
- struct KernelGlobals *osl_globals;
- struct PathState *osl_path_state;
+ struct KernelGlobals *osl_globals;
+ struct PathState *osl_path_state;
#endif
- /* LCG state for closures that require additional random numbers. */
- uint lcg_state;
+ /* LCG state for closures that require additional random numbers. */
+ uint lcg_state;
- /* Closure data, we store a fixed array of closures */
- int num_closure;
- int num_closure_left;
- float randb_closure;
- float3 svm_closure_weight;
+ /* Closure data, we store a fixed array of closures */
+ int num_closure;
+ int num_closure_left;
+ float randb_closure;
+ float3 svm_closure_weight;
- /* Closure weights summed directly, so we can evaluate
- * emission and shadow transparency with MAX_CLOSURE 0. */
- float3 closure_emission_background;
- float3 closure_transparent_extinction;
+ /* Closure weights summed directly, so we can evaluate
+ * emission and shadow transparency with MAX_CLOSURE 0. */
+ float3 closure_emission_background;
+ float3 closure_transparent_extinction;
- /* At the end so we can adjust size in ShaderDataTinyStorage. */
- struct ShaderClosure closure[MAX_CLOSURE];
+ /* At the end so we can adjust size in ShaderDataTinyStorage. */
+ struct ShaderClosure closure[MAX_CLOSURE];
} ShaderData;
typedef ccl_addr_space struct ShaderDataTinyStorage {
- char pad[sizeof(ShaderData) - sizeof(ShaderClosure) * MAX_CLOSURE];
+ char pad[sizeof(ShaderData) - sizeof(ShaderClosure) * MAX_CLOSURE];
} ShaderDataTinyStorage;
-#define AS_SHADER_DATA(shader_data_tiny_storage) ((ShaderData*)shader_data_tiny_storage)
+#define AS_SHADER_DATA(shader_data_tiny_storage) ((ShaderData *)shader_data_tiny_storage)
/* Path State */
#ifdef __VOLUME__
typedef struct VolumeStack {
- int object;
- int shader;
+ int object;
+ int shader;
} VolumeStack;
#endif
typedef struct PathState {
- /* see enum PathRayFlag */
- int flag;
-
- /* random number generator state */
- uint rng_hash; /* per pixel hash */
- int rng_offset; /* dimension offset */
- int sample; /* path sample number */
- int num_samples; /* total number of times this path will be sampled */
- float branch_factor; /* number of branches in indirect paths */
-
- /* bounce counting */
- int bounce;
- int diffuse_bounce;
- int glossy_bounce;
- int transmission_bounce;
- int transparent_bounce;
+ /* see enum PathRayFlag */
+ int flag;
+
+ /* random number generator state */
+ uint rng_hash; /* per pixel hash */
+ int rng_offset; /* dimension offset */
+ int sample; /* path sample number */
+ int num_samples; /* total number of times this path will be sampled */
+ float branch_factor; /* number of branches in indirect paths */
+
+ /* bounce counting */
+ int bounce;
+ int diffuse_bounce;
+ int glossy_bounce;
+ int transmission_bounce;
+ int transparent_bounce;
#ifdef __DENOISING_FEATURES__
- float denoising_feature_weight;
-#endif /* __DENOISING_FEATURES__ */
+ float denoising_feature_weight;
+#endif /* __DENOISING_FEATURES__ */
- /* multiple importance sampling */
- float min_ray_pdf; /* smallest bounce pdf over entire path up to now */
- float ray_pdf; /* last bounce pdf */
+ /* multiple importance sampling */
+ float min_ray_pdf; /* smallest bounce pdf over entire path up to now */
+ float ray_pdf; /* last bounce pdf */
#ifdef __LAMP_MIS__
- float ray_t; /* accumulated distance through transparent surfaces */
+ float ray_t; /* accumulated distance through transparent surfaces */
#endif
- /* volume rendering */
+ /* volume rendering */
#ifdef __VOLUME__
- int volume_bounce;
- int volume_bounds_bounce;
- VolumeStack volume_stack[VOLUME_STACK_SIZE];
+ int volume_bounce;
+ int volume_bounds_bounce;
+ VolumeStack volume_stack[VOLUME_STACK_SIZE];
#endif
} PathState;
/* Struct to gather multiple nearby intersections. */
typedef struct LocalIntersection {
- Ray ray;
- float3 weight[LOCAL_MAX_HITS];
+ Ray ray;
+ float3 weight[LOCAL_MAX_HITS];
- int num_hits;
- struct Intersection hits[LOCAL_MAX_HITS];
- float3 Ng[LOCAL_MAX_HITS];
+ int num_hits;
+ struct Intersection hits[LOCAL_MAX_HITS];
+ float3 Ng[LOCAL_MAX_HITS];
} LocalIntersection;
/* Subsurface */
/* Struct to gather SSS indirect rays and delay tracing them. */
typedef struct SubsurfaceIndirectRays {
- PathState state[BSSRDF_MAX_HITS];
+ PathState state[BSSRDF_MAX_HITS];
- int num_rays;
+ int num_rays;
- struct Ray rays[BSSRDF_MAX_HITS];
- float3 throughputs[BSSRDF_MAX_HITS];
- struct PathRadianceState L_state[BSSRDF_MAX_HITS];
+ struct Ray rays[BSSRDF_MAX_HITS];
+ float3 throughputs[BSSRDF_MAX_HITS];
+ struct PathRadianceState L_state[BSSRDF_MAX_HITS];
} SubsurfaceIndirectRays;
static_assert(BSSRDF_MAX_HITS <= LOCAL_MAX_HITS, "BSSRDF hits too high.");
@@ -1109,424 +1085,424 @@ static_assert(BSSRDF_MAX_HITS <= LOCAL_MAX_HITS, "BSSRDF hits too high.");
* do not use float3 because its size may not be the same on all devices. */
typedef struct KernelCamera {
- /* type */
- int type;
-
- /* panorama */
- int panorama_type;
- float fisheye_fov;
- float fisheye_lens;
- float4 equirectangular_range;
-
- /* stereo */
- float interocular_offset;
- float convergence_distance;
- float pole_merge_angle_from;
- float pole_merge_angle_to;
-
- /* matrices */
- Transform cameratoworld;
- ProjectionTransform rastertocamera;
-
- /* differentials */
- float4 dx;
- float4 dy;
-
- /* depth of field */
- float aperturesize;
- float blades;
- float bladesrotation;
- float focaldistance;
-
- /* motion blur */
- float shuttertime;
- int num_motion_steps, have_perspective_motion;
-
- /* clipping */
- float nearclip;
- float cliplength;
-
- /* sensor size */
- float sensorwidth;
- float sensorheight;
-
- /* render size */
- float width, height;
- int resolution;
-
- /* anamorphic lens bokeh */
- float inv_aperture_ratio;
-
- int is_inside_volume;
-
- /* more matrices */
- ProjectionTransform screentoworld;
- ProjectionTransform rastertoworld;
- ProjectionTransform ndctoworld;
- ProjectionTransform worldtoscreen;
- ProjectionTransform worldtoraster;
- ProjectionTransform worldtondc;
- Transform worldtocamera;
-
- /* Stores changes in the projeciton matrix. Use for camera zoom motion
- * blur and motion pass output for perspective camera. */
- ProjectionTransform perspective_pre;
- ProjectionTransform perspective_post;
-
- /* Transforms for motion pass. */
- Transform motion_pass_pre;
- Transform motion_pass_post;
-
- int shutter_table_offset;
-
- /* Rolling shutter */
- int rolling_shutter_type;
- float rolling_shutter_duration;
-
- int pad;
+ /* type */
+ int type;
+
+ /* panorama */
+ int panorama_type;
+ float fisheye_fov;
+ float fisheye_lens;
+ float4 equirectangular_range;
+
+ /* stereo */
+ float interocular_offset;
+ float convergence_distance;
+ float pole_merge_angle_from;
+ float pole_merge_angle_to;
+
+ /* matrices */
+ Transform cameratoworld;
+ ProjectionTransform rastertocamera;
+
+ /* differentials */
+ float4 dx;
+ float4 dy;
+
+ /* depth of field */
+ float aperturesize;
+ float blades;
+ float bladesrotation;
+ float focaldistance;
+
+ /* motion blur */
+ float shuttertime;
+ int num_motion_steps, have_perspective_motion;
+
+ /* clipping */
+ float nearclip;
+ float cliplength;
+
+ /* sensor size */
+ float sensorwidth;
+ float sensorheight;
+
+ /* render size */
+ float width, height;
+ int resolution;
+
+ /* anamorphic lens bokeh */
+ float inv_aperture_ratio;
+
+ int is_inside_volume;
+
+ /* more matrices */
+ ProjectionTransform screentoworld;
+ ProjectionTransform rastertoworld;
+ ProjectionTransform ndctoworld;
+ ProjectionTransform worldtoscreen;
+ ProjectionTransform worldtoraster;
+ ProjectionTransform worldtondc;
+ Transform worldtocamera;
+
+ /* Stores changes in the projeciton matrix. Use for camera zoom motion
+ * blur and motion pass output for perspective camera. */
+ ProjectionTransform perspective_pre;
+ ProjectionTransform perspective_post;
+
+ /* Transforms for motion pass. */
+ Transform motion_pass_pre;
+ Transform motion_pass_post;
+
+ int shutter_table_offset;
+
+ /* Rolling shutter */
+ int rolling_shutter_type;
+ float rolling_shutter_duration;
+
+ int pad;
} KernelCamera;
static_assert_align(KernelCamera, 16);
typedef struct KernelFilm {
- float exposure;
- int pass_flag;
- int light_pass_flag;
- int pass_stride;
- int use_light_pass;
-
- int pass_combined;
- int pass_depth;
- int pass_normal;
- int pass_motion;
-
- int pass_motion_weight;
- int pass_uv;
- int pass_object_id;
- int pass_material_id;
-
- int pass_diffuse_color;
- int pass_glossy_color;
- int pass_transmission_color;
- int pass_subsurface_color;
-
- int pass_diffuse_indirect;
- int pass_glossy_indirect;
- int pass_transmission_indirect;
- int pass_subsurface_indirect;
- int pass_volume_indirect;
-
- int pass_diffuse_direct;
- int pass_glossy_direct;
- int pass_transmission_direct;
- int pass_subsurface_direct;
- int pass_volume_direct;
-
- int pass_emission;
- int pass_background;
- int pass_ao;
- float pass_alpha_threshold;
-
- int pass_shadow;
- float pass_shadow_scale;
- int filter_table_offset;
- int cryptomatte_passes;
- int cryptomatte_depth;
- int pass_cryptomatte;
-
- int pass_mist;
- float mist_start;
- float mist_inv_depth;
- float mist_falloff;
-
- int pass_denoising_data;
- int pass_denoising_clean;
- int denoising_flags;
-
- /* XYZ to rendering color space transform. float4 instead of float3 to
- * ensure consistent padding/alignment across devices. */
- float4 xyz_to_r;
- float4 xyz_to_g;
- float4 xyz_to_b;
- float4 rgb_to_y;
+ float exposure;
+ int pass_flag;
+ int light_pass_flag;
+ int pass_stride;
+ int use_light_pass;
+
+ int pass_combined;
+ int pass_depth;
+ int pass_normal;
+ int pass_motion;
+
+ int pass_motion_weight;
+ int pass_uv;
+ int pass_object_id;
+ int pass_material_id;
+
+ int pass_diffuse_color;
+ int pass_glossy_color;
+ int pass_transmission_color;
+ int pass_subsurface_color;
+
+ int pass_diffuse_indirect;
+ int pass_glossy_indirect;
+ int pass_transmission_indirect;
+ int pass_subsurface_indirect;
+ int pass_volume_indirect;
+
+ int pass_diffuse_direct;
+ int pass_glossy_direct;
+ int pass_transmission_direct;
+ int pass_subsurface_direct;
+ int pass_volume_direct;
+
+ int pass_emission;
+ int pass_background;
+ int pass_ao;
+ float pass_alpha_threshold;
+
+ int pass_shadow;
+ float pass_shadow_scale;
+ int filter_table_offset;
+ int cryptomatte_passes;
+ int cryptomatte_depth;
+ int pass_cryptomatte;
+
+ int pass_mist;
+ float mist_start;
+ float mist_inv_depth;
+ float mist_falloff;
+
+ int pass_denoising_data;
+ int pass_denoising_clean;
+ int denoising_flags;
+
+ /* XYZ to rendering color space transform. float4 instead of float3 to
+ * ensure consistent padding/alignment across devices. */
+ float4 xyz_to_r;
+ float4 xyz_to_g;
+ float4 xyz_to_b;
+ float4 rgb_to_y;
#ifdef __KERNEL_DEBUG__
- int pass_bvh_traversed_nodes;
- int pass_bvh_traversed_instances;
- int pass_bvh_intersections;
- int pass_ray_bounces;
+ int pass_bvh_traversed_nodes;
+ int pass_bvh_traversed_instances;
+ int pass_bvh_intersections;
+ int pass_ray_bounces;
#endif
} KernelFilm;
static_assert_align(KernelFilm, 16);
typedef struct KernelBackground {
- /* only shader index */
- int surface_shader;
- int volume_shader;
- int transparent;
- float transparent_roughness_squared_threshold;
-
- /* ambient occlusion */
- float ao_factor;
- float ao_distance;
- float ao_bounces_factor;
- float ao_pad;
+ /* only shader index */
+ int surface_shader;
+ int volume_shader;
+ int transparent;
+ float transparent_roughness_squared_threshold;
+
+ /* ambient occlusion */
+ float ao_factor;
+ float ao_distance;
+ float ao_bounces_factor;
+ float ao_pad;
} KernelBackground;
static_assert_align(KernelBackground, 16);
typedef struct KernelIntegrator {
- /* emission */
- int use_direct_light;
- int use_ambient_occlusion;
- int num_distribution;
- int num_all_lights;
- float pdf_triangles;
- float pdf_lights;
- int pdf_background_res_x;
- int pdf_background_res_y;
- float light_inv_rr_threshold;
-
- /* light portals */
- float portal_pdf;
- int num_portals;
- int portal_offset;
-
- /* bounces */
- int max_bounce;
-
- int max_diffuse_bounce;
- int max_glossy_bounce;
- int max_transmission_bounce;
- int max_volume_bounce;
-
- int ao_bounces;
-
- /* transparent */
- int transparent_max_bounce;
- int transparent_shadows;
-
- /* caustics */
- int caustics_reflective;
- int caustics_refractive;
- float filter_glossy;
-
- /* seed */
- int seed;
-
- /* clamp */
- float sample_clamp_direct;
- float sample_clamp_indirect;
-
- /* branched path */
- int branched;
- int volume_decoupled;
- int diffuse_samples;
- int glossy_samples;
- int transmission_samples;
- int ao_samples;
- int mesh_light_samples;
- int subsurface_samples;
- int sample_all_lights_direct;
- int sample_all_lights_indirect;
-
- /* mis */
- int use_lamp_mis;
-
- /* sampler */
- int sampling_pattern;
- int aa_samples;
-
- /* volume render */
- int use_volumes;
- int volume_max_steps;
- float volume_step_size;
- int volume_samples;
-
- int start_sample;
-
- int max_closures;
-
- int pad1, pad2, pad3;
+ /* emission */
+ int use_direct_light;
+ int use_ambient_occlusion;
+ int num_distribution;
+ int num_all_lights;
+ float pdf_triangles;
+ float pdf_lights;
+ int pdf_background_res_x;
+ int pdf_background_res_y;
+ float light_inv_rr_threshold;
+
+ /* light portals */
+ float portal_pdf;
+ int num_portals;
+ int portal_offset;
+
+ /* bounces */
+ int max_bounce;
+
+ int max_diffuse_bounce;
+ int max_glossy_bounce;
+ int max_transmission_bounce;
+ int max_volume_bounce;
+
+ int ao_bounces;
+
+ /* transparent */
+ int transparent_max_bounce;
+ int transparent_shadows;
+
+ /* caustics */
+ int caustics_reflective;
+ int caustics_refractive;
+ float filter_glossy;
+
+ /* seed */
+ int seed;
+
+ /* clamp */
+ float sample_clamp_direct;
+ float sample_clamp_indirect;
+
+ /* branched path */
+ int branched;
+ int volume_decoupled;
+ int diffuse_samples;
+ int glossy_samples;
+ int transmission_samples;
+ int ao_samples;
+ int mesh_light_samples;
+ int subsurface_samples;
+ int sample_all_lights_direct;
+ int sample_all_lights_indirect;
+
+ /* mis */
+ int use_lamp_mis;
+
+ /* sampler */
+ int sampling_pattern;
+ int aa_samples;
+
+ /* volume render */
+ int use_volumes;
+ int volume_max_steps;
+ float volume_step_size;
+ int volume_samples;
+
+ int start_sample;
+
+ int max_closures;
+
+ int pad1, pad2, pad3;
} KernelIntegrator;
static_assert_align(KernelIntegrator, 16);
typedef enum KernelBVHLayout {
- BVH_LAYOUT_NONE = 0,
-
- BVH_LAYOUT_BVH2 = (1 << 0),
- BVH_LAYOUT_BVH4 = (1 << 1),
- BVH_LAYOUT_BVH8 = (1 << 2),
- BVH_LAYOUT_EMBREE = (1 << 3),
- BVH_LAYOUT_DEFAULT = BVH_LAYOUT_BVH8,
- BVH_LAYOUT_ALL = (unsigned int)(-1),
+ BVH_LAYOUT_NONE = 0,
+
+ BVH_LAYOUT_BVH2 = (1 << 0),
+ BVH_LAYOUT_BVH4 = (1 << 1),
+ BVH_LAYOUT_BVH8 = (1 << 2),
+ BVH_LAYOUT_EMBREE = (1 << 3),
+ BVH_LAYOUT_DEFAULT = BVH_LAYOUT_BVH8,
+ BVH_LAYOUT_ALL = (unsigned int)(-1),
} KernelBVHLayout;
typedef struct KernelBVH {
- /* Own BVH */
- int root;
- int have_motion;
- int have_curves;
- int have_instancing;
- int bvh_layout;
- int use_bvh_steps;
-
- /* Embree */
+ /* Own BVH */
+ int root;
+ int have_motion;
+ int have_curves;
+ int have_instancing;
+ int bvh_layout;
+ int use_bvh_steps;
+
+ /* Embree */
#ifdef __EMBREE__
- RTCScene scene;
+ RTCScene scene;
# ifndef __KERNEL_64_BIT__
- int pad1;
+ int pad1;
# endif
#else
- int pad1, pad2;
+ int pad1, pad2;
#endif
} KernelBVH;
static_assert_align(KernelBVH, 16);
typedef enum CurveFlag {
- /* runtime flags */
- CURVE_KN_BACKFACING = 1, /* backside of cylinder? */
- CURVE_KN_ENCLOSEFILTER = 2, /* don't consider strands surrounding start point? */
- CURVE_KN_INTERPOLATE = 4, /* render as a curve? */
- CURVE_KN_ACCURATE = 8, /* use accurate intersections test? */
- CURVE_KN_INTERSECTCORRECTION = 16, /* correct for width after determing closest midpoint? */
- CURVE_KN_TRUETANGENTGNORMAL = 32, /* use tangent normal for geometry? */
- CURVE_KN_RIBBONS = 64, /* use flat curve ribbons */
+ /* runtime flags */
+ CURVE_KN_BACKFACING = 1, /* backside of cylinder? */
+ CURVE_KN_ENCLOSEFILTER = 2, /* don't consider strands surrounding start point? */
+ CURVE_KN_INTERPOLATE = 4, /* render as a curve? */
+ CURVE_KN_ACCURATE = 8, /* use accurate intersections test? */
+ CURVE_KN_INTERSECTCORRECTION = 16, /* correct for width after determing closest midpoint? */
+ CURVE_KN_TRUETANGENTGNORMAL = 32, /* use tangent normal for geometry? */
+ CURVE_KN_RIBBONS = 64, /* use flat curve ribbons */
} CurveFlag;
typedef struct KernelCurves {
- int curveflags;
- int subdivisions;
+ int curveflags;
+ int subdivisions;
- float minimum_width;
- float maximum_width;
+ float minimum_width;
+ float maximum_width;
} KernelCurves;
static_assert_align(KernelCurves, 16);
typedef struct KernelTables {
- int beckmann_offset;
- int pad1, pad2, pad3;
+ int beckmann_offset;
+ int pad1, pad2, pad3;
} KernelTables;
static_assert_align(KernelTables, 16);
typedef struct KernelData {
- KernelCamera cam;
- KernelFilm film;
- KernelBackground background;
- KernelIntegrator integrator;
- KernelBVH bvh;
- KernelCurves curve;
- KernelTables tables;
+ KernelCamera cam;
+ KernelFilm film;
+ KernelBackground background;
+ KernelIntegrator integrator;
+ KernelBVH bvh;
+ KernelCurves curve;
+ KernelTables tables;
} KernelData;
static_assert_align(KernelData, 16);
/* Kernel data structures. */
typedef struct KernelObject {
- Transform tfm;
- Transform itfm;
+ Transform tfm;
+ Transform itfm;
- float surface_area;
- float pass_id;
- float random_number;
- int particle_index;
+ float surface_area;
+ float pass_id;
+ float random_number;
+ int particle_index;
- float dupli_generated[3];
- float dupli_uv[2];
+ float dupli_generated[3];
+ float dupli_uv[2];
- int numkeys;
- int numsteps;
- int numverts;
+ int numkeys;
+ int numsteps;
+ int numverts;
- uint patch_map_offset;
- uint attribute_map_offset;
- uint motion_offset;
- uint pad1;
+ uint patch_map_offset;
+ uint attribute_map_offset;
+ uint motion_offset;
+ uint pad1;
- float cryptomatte_object;
- float cryptomatte_asset;
- float pad2, pad3;
+ float cryptomatte_object;
+ float cryptomatte_asset;
+ float pad2, pad3;
} KernelObject;
static_assert_align(KernelObject, 16);
typedef struct KernelSpotLight {
- float radius;
- float invarea;
- float spot_angle;
- float spot_smooth;
- float dir[3];
- float pad;
+ float radius;
+ float invarea;
+ float spot_angle;
+ float spot_smooth;
+ float dir[3];
+ float pad;
} KernelSpotLight;
/* PointLight is SpotLight with only radius and invarea being used. */
typedef struct KernelAreaLight {
- float axisu[3];
- float invarea;
- float axisv[3];
- float pad1;
- float dir[3];
- float pad2;
+ float axisu[3];
+ float invarea;
+ float axisv[3];
+ float pad1;
+ float dir[3];
+ float pad2;
} KernelAreaLight;
typedef struct KernelDistantLight {
- float radius;
- float cosangle;
- float invarea;
- float pad;
+ float radius;
+ float cosangle;
+ float invarea;
+ float pad;
} KernelDistantLight;
typedef struct KernelLight {
- int type;
- float co[3];
- int shader_id;
- int samples;
- float max_bounces;
- float random;
- Transform tfm;
- Transform itfm;
- union {
- KernelSpotLight spot;
- KernelAreaLight area;
- KernelDistantLight distant;
- };
+ int type;
+ float co[3];
+ int shader_id;
+ int samples;
+ float max_bounces;
+ float random;
+ Transform tfm;
+ Transform itfm;
+ union {
+ KernelSpotLight spot;
+ KernelAreaLight area;
+ KernelDistantLight distant;
+ };
} KernelLight;
static_assert_align(KernelLight, 16);
typedef struct KernelLightDistribution {
- float totarea;
- int prim;
- union {
- struct {
- int shader_flag;
- int object_id;
- } mesh_light;
- struct {
- float pad;
- float size;
- } lamp;
- };
+ float totarea;
+ int prim;
+ union {
+ struct {
+ int shader_flag;
+ int object_id;
+ } mesh_light;
+ struct {
+ float pad;
+ float size;
+ } lamp;
+ };
} KernelLightDistribution;
static_assert_align(KernelLightDistribution, 16);
typedef struct KernelParticle {
- int index;
- float age;
- float lifetime;
- float size;
- float4 rotation;
- /* Only xyz are used of the following. float4 instead of float3 are used
- * to ensure consistent padding/alignment across devices. */
- float4 location;
- float4 velocity;
- float4 angular_velocity;
+ int index;
+ float age;
+ float lifetime;
+ float size;
+ float4 rotation;
+ /* Only xyz are used of the following. float4 instead of float3 are used
+ * to ensure consistent padding/alignment across devices. */
+ float4 location;
+ float4 velocity;
+ float4 angular_velocity;
} KernelParticle;
static_assert_align(KernelParticle, 16);
typedef struct KernelShader {
- float constant_emission[3];
- float cryptomatte_id;
- int flags;
- int pass_id;
- int pad2, pad3;
+ float constant_emission[3];
+ float cryptomatte_id;
+ int flags;
+ int pass_id;
+ int pad2, pad3;
} KernelShader;
static_assert_align(KernelShader, 16);
@@ -1545,88 +1521,93 @@ static_assert_align(KernelShader, 16);
/* Queue names */
enum QueueNumber {
- /* All active rays and regenerated rays are enqueued here. */
- QUEUE_ACTIVE_AND_REGENERATED_RAYS = 0,
-
- /* All
- * 1. Background-hit rays,
- * 2. Rays that has exited path-iteration but needs to update output buffer
- * 3. Rays to be regenerated
- * are enqueued here.
- */
- QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
-
- /* All rays for which a shadow ray should be cast to determine radiance
- * contribution for AO are enqueued here.
- */
- QUEUE_SHADOW_RAY_CAST_AO_RAYS,
-
- /* All rays for which a shadow ray should be cast to determine radiance
- * contributing for direct lighting are enqueued here.
- */
- QUEUE_SHADOW_RAY_CAST_DL_RAYS,
-
- /* Rays sorted according to shader->id */
- QUEUE_SHADER_SORTED_RAYS,
+ /* All active rays and regenerated rays are enqueued here. */
+ QUEUE_ACTIVE_AND_REGENERATED_RAYS = 0,
+
+ /* All
+ * 1. Background-hit rays,
+ * 2. Rays that has exited path-iteration but needs to update output buffer
+ * 3. Rays to be regenerated
+ * are enqueued here.
+ */
+ QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+
+ /* All rays for which a shadow ray should be cast to determine radiance
+ * contribution for AO are enqueued here.
+ */
+ QUEUE_SHADOW_RAY_CAST_AO_RAYS,
+
+ /* All rays for which a shadow ray should be cast to determine radiance
+ * contributing for direct lighting are enqueued here.
+ */
+ QUEUE_SHADOW_RAY_CAST_DL_RAYS,
+
+ /* Rays sorted according to shader->id */
+ QUEUE_SHADER_SORTED_RAYS,
#ifdef __BRANCHED_PATH__
- /* All rays moving to next iteration of the indirect loop for light */
- QUEUE_LIGHT_INDIRECT_ITER,
- /* Queue of all inactive rays. These are candidates for sharing work of indirect loops */
- QUEUE_INACTIVE_RAYS,
+ /* All rays moving to next iteration of the indirect loop for light */
+ QUEUE_LIGHT_INDIRECT_ITER,
+ /* Queue of all inactive rays. These are candidates for sharing work of indirect loops */
+ QUEUE_INACTIVE_RAYS,
# ifdef __VOLUME__
- /* All rays moving to next iteration of the indirect loop for volumes */
- QUEUE_VOLUME_INDIRECT_ITER,
+ /* All rays moving to next iteration of the indirect loop for volumes */
+ QUEUE_VOLUME_INDIRECT_ITER,
# endif
# ifdef __SUBSURFACE__
- /* All rays moving to next iteration of the indirect loop for subsurface */
- QUEUE_SUBSURFACE_INDIRECT_ITER,
+ /* All rays moving to next iteration of the indirect loop for subsurface */
+ QUEUE_SUBSURFACE_INDIRECT_ITER,
# endif
-#endif /* __BRANCHED_PATH__ */
+#endif /* __BRANCHED_PATH__ */
- NUM_QUEUES
+ NUM_QUEUES
};
/* We use RAY_STATE_MASK to get ray_state */
#define RAY_STATE_MASK 0x0F
#define RAY_FLAG_MASK 0xF0
enum RayState {
- RAY_INVALID = 0,
- /* Denotes ray is actively involved in path-iteration. */
- RAY_ACTIVE,
- /* Denotes ray has completed processing all samples and is inactive. */
- RAY_INACTIVE,
- /* Denotes ray has exited path-iteration and needs to update output buffer. */
- RAY_UPDATE_BUFFER,
- /* Denotes ray needs to skip most surface shader work. */
- RAY_HAS_ONLY_VOLUME,
- /* Donotes ray has hit background */
- RAY_HIT_BACKGROUND,
- /* Denotes ray has to be regenerated */
- RAY_TO_REGENERATE,
- /* Denotes ray has been regenerated */
- RAY_REGENERATED,
- /* Denotes ray is moving to next iteration of the branched indirect loop */
- RAY_LIGHT_INDIRECT_NEXT_ITER,
- RAY_VOLUME_INDIRECT_NEXT_ITER,
- RAY_SUBSURFACE_INDIRECT_NEXT_ITER,
-
- /* Ray flags */
-
- /* Flags to denote that the ray is currently evaluating the branched indirect loop */
- RAY_BRANCHED_LIGHT_INDIRECT = (1 << 4),
- RAY_BRANCHED_VOLUME_INDIRECT = (1 << 5),
- RAY_BRANCHED_SUBSURFACE_INDIRECT = (1 << 6),
- RAY_BRANCHED_INDIRECT = (RAY_BRANCHED_LIGHT_INDIRECT | RAY_BRANCHED_VOLUME_INDIRECT | RAY_BRANCHED_SUBSURFACE_INDIRECT),
-
- /* Ray is evaluating an iteration of an indirect loop for another thread */
- RAY_BRANCHED_INDIRECT_SHARED = (1 << 7),
+ RAY_INVALID = 0,
+ /* Denotes ray is actively involved in path-iteration. */
+ RAY_ACTIVE,
+ /* Denotes ray has completed processing all samples and is inactive. */
+ RAY_INACTIVE,
+ /* Denotes ray has exited path-iteration and needs to update output buffer. */
+ RAY_UPDATE_BUFFER,
+ /* Denotes ray needs to skip most surface shader work. */
+ RAY_HAS_ONLY_VOLUME,
+ /* Donotes ray has hit background */
+ RAY_HIT_BACKGROUND,
+ /* Denotes ray has to be regenerated */
+ RAY_TO_REGENERATE,
+ /* Denotes ray has been regenerated */
+ RAY_REGENERATED,
+ /* Denotes ray is moving to next iteration of the branched indirect loop */
+ RAY_LIGHT_INDIRECT_NEXT_ITER,
+ RAY_VOLUME_INDIRECT_NEXT_ITER,
+ RAY_SUBSURFACE_INDIRECT_NEXT_ITER,
+
+ /* Ray flags */
+
+ /* Flags to denote that the ray is currently evaluating the branched indirect loop */
+ RAY_BRANCHED_LIGHT_INDIRECT = (1 << 4),
+ RAY_BRANCHED_VOLUME_INDIRECT = (1 << 5),
+ RAY_BRANCHED_SUBSURFACE_INDIRECT = (1 << 6),
+ RAY_BRANCHED_INDIRECT = (RAY_BRANCHED_LIGHT_INDIRECT | RAY_BRANCHED_VOLUME_INDIRECT |
+ RAY_BRANCHED_SUBSURFACE_INDIRECT),
+
+ /* Ray is evaluating an iteration of an indirect loop for another thread */
+ RAY_BRANCHED_INDIRECT_SHARED = (1 << 7),
};
-#define ASSIGN_RAY_STATE(ray_state, ray_index, state) (ray_state[ray_index] = ((ray_state[ray_index] & RAY_FLAG_MASK) | state))
-#define IS_STATE(ray_state, ray_index, state) ((ray_index) != QUEUE_EMPTY_SLOT && ((ray_state)[(ray_index)] & RAY_STATE_MASK) == (state))
-#define ADD_RAY_FLAG(ray_state, ray_index, flag) (ray_state[ray_index] = (ray_state[ray_index] | flag))
-#define REMOVE_RAY_FLAG(ray_state, ray_index, flag) (ray_state[ray_index] = (ray_state[ray_index] & (~flag)))
+#define ASSIGN_RAY_STATE(ray_state, ray_index, state) \
+ (ray_state[ray_index] = ((ray_state[ray_index] & RAY_FLAG_MASK) | state))
+#define IS_STATE(ray_state, ray_index, state) \
+ ((ray_index) != QUEUE_EMPTY_SLOT && ((ray_state)[(ray_index)] & RAY_STATE_MASK) == (state))
+#define ADD_RAY_FLAG(ray_state, ray_index, flag) \
+ (ray_state[ray_index] = (ray_state[ray_index] | flag))
+#define REMOVE_RAY_FLAG(ray_state, ray_index, flag) \
+ (ray_state[ray_index] = (ray_state[ray_index] & (~flag)))
#define IS_FLAG(ray_state, ray_index, flag) (ray_state[ray_index] & flag)
/* Patches */
@@ -1642,17 +1623,17 @@ enum RayState {
/* Work Tiles */
typedef struct WorkTile {
- uint x, y, w, h;
+ uint x, y, w, h;
- uint start_sample;
- uint num_samples;
+ uint start_sample;
+ uint num_samples;
- uint offset;
- uint stride;
+ uint offset;
+ uint stride;
- ccl_global float *buffer;
+ ccl_global float *buffer;
} WorkTile;
CCL_NAMESPACE_END
-#endif /* __KERNEL_TYPES_H__ */
+#endif /* __KERNEL_TYPES_H__ */
diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h
index 44c8f795d2c..e024003252f 100644
--- a/intern/cycles/kernel/kernel_volume.h
+++ b/intern/cycles/kernel/kernel_volume.h
@@ -19,9 +19,9 @@ CCL_NAMESPACE_BEGIN
/* Events for probalistic scattering */
typedef enum VolumeIntegrateResult {
- VOLUME_PATH_SCATTERED = 0,
- VOLUME_PATH_ATTENUATED = 1,
- VOLUME_PATH_MISSED = 2
+ VOLUME_PATH_SCATTERED = 0,
+ VOLUME_PATH_ATTENUATED = 1,
+ VOLUME_PATH_MISSED = 2
} VolumeIntegrateResult;
/* Volume shader properties
@@ -30,9 +30,9 @@ typedef enum VolumeIntegrateResult {
* sigma_t = sigma_a + sigma_s */
typedef struct VolumeShaderCoefficients {
- float3 sigma_t;
- float3 sigma_s;
- float3 emission;
+ float3 sigma_t;
+ float3 sigma_s;
+ float3 emission;
} VolumeShaderCoefficients;
#ifdef __VOLUME__
@@ -44,16 +44,16 @@ ccl_device_inline bool volume_shader_extinction_sample(KernelGlobals *kg,
float3 P,
float3 *extinction)
{
- sd->P = P;
- shader_eval_volume(kg, sd, state, state->volume_stack, PATH_RAY_SHADOW);
-
- if(sd->flag & SD_EXTINCTION) {
- *extinction = sd->closure_transparent_extinction;
- return true;
- }
- else {
- return false;
- }
+ sd->P = P;
+ shader_eval_volume(kg, sd, state, state->volume_stack, PATH_RAY_SHADOW);
+
+ if (sd->flag & SD_EXTINCTION) {
+ *extinction = sd->closure_transparent_extinction;
+ return true;
+ }
+ else {
+ return false;
+ }
}
/* evaluate shader to get absorption, scattering and emission at P */
@@ -63,97 +63,97 @@ ccl_device_inline bool volume_shader_sample(KernelGlobals *kg,
float3 P,
VolumeShaderCoefficients *coeff)
{
- sd->P = P;
- shader_eval_volume(kg, sd, state, state->volume_stack, state->flag);
+ sd->P = P;
+ shader_eval_volume(kg, sd, state, state->volume_stack, state->flag);
- if(!(sd->flag & (SD_EXTINCTION|SD_SCATTER|SD_EMISSION)))
- return false;
+ if (!(sd->flag & (SD_EXTINCTION | SD_SCATTER | SD_EMISSION)))
+ return false;
- coeff->sigma_s = make_float3(0.0f, 0.0f, 0.0f);
- coeff->sigma_t = (sd->flag & SD_EXTINCTION)? sd->closure_transparent_extinction:
- make_float3(0.0f, 0.0f, 0.0f);
- coeff->emission = (sd->flag & SD_EMISSION)? sd->closure_emission_background:
- make_float3(0.0f, 0.0f, 0.0f);
+ coeff->sigma_s = make_float3(0.0f, 0.0f, 0.0f);
+ coeff->sigma_t = (sd->flag & SD_EXTINCTION) ? sd->closure_transparent_extinction :
+ make_float3(0.0f, 0.0f, 0.0f);
+ coeff->emission = (sd->flag & SD_EMISSION) ? sd->closure_emission_background :
+ make_float3(0.0f, 0.0f, 0.0f);
- if(sd->flag & SD_SCATTER) {
- for(int i = 0; i < sd->num_closure; i++) {
- const ShaderClosure *sc = &sd->closure[i];
+ if (sd->flag & SD_SCATTER) {
+ for (int i = 0; i < sd->num_closure; i++) {
+ const ShaderClosure *sc = &sd->closure[i];
- if(CLOSURE_IS_VOLUME(sc->type))
- coeff->sigma_s += sc->weight;
- }
- }
+ if (CLOSURE_IS_VOLUME(sc->type))
+ coeff->sigma_s += sc->weight;
+ }
+ }
- return true;
+ return true;
}
-#endif /* __VOLUME__ */
+#endif /* __VOLUME__ */
ccl_device float3 volume_color_transmittance(float3 sigma, float t)
{
- return exp3(-sigma * t);
+ return exp3(-sigma * t);
}
ccl_device float kernel_volume_channel_get(float3 value, int channel)
{
- return (channel == 0)? value.x: ((channel == 1)? value.y: value.z);
+ return (channel == 0) ? value.x : ((channel == 1) ? value.y : value.z);
}
#ifdef __VOLUME__
ccl_device bool volume_stack_is_heterogeneous(KernelGlobals *kg, ccl_addr_space VolumeStack *stack)
{
- for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
- int shader_flag = kernel_tex_fetch(__shaders, (stack[i].shader & SHADER_MASK)).flags;
-
- if(shader_flag & SD_HETEROGENEOUS_VOLUME) {
- return true;
- }
- else if(shader_flag & SD_NEED_ATTRIBUTES) {
- /* We want to render world or objects without any volume grids
- * as homogenous, but can only verify this at runtime since other
- * heterogenous volume objects may be using the same shader. */
- int object = stack[i].object;
- if(object != OBJECT_NONE) {
- int object_flag = kernel_tex_fetch(__object_flag, object);
- if(object_flag & SD_OBJECT_HAS_VOLUME_ATTRIBUTES) {
- return true;
- }
- }
- }
- }
-
- return false;
+ for (int i = 0; stack[i].shader != SHADER_NONE; i++) {
+ int shader_flag = kernel_tex_fetch(__shaders, (stack[i].shader & SHADER_MASK)).flags;
+
+ if (shader_flag & SD_HETEROGENEOUS_VOLUME) {
+ return true;
+ }
+ else if (shader_flag & SD_NEED_ATTRIBUTES) {
+ /* We want to render world or objects without any volume grids
+ * as homogenous, but can only verify this at runtime since other
+ * heterogenous volume objects may be using the same shader. */
+ int object = stack[i].object;
+ if (object != OBJECT_NONE) {
+ int object_flag = kernel_tex_fetch(__object_flag, object);
+ if (object_flag & SD_OBJECT_HAS_VOLUME_ATTRIBUTES) {
+ return true;
+ }
+ }
+ }
+ }
+
+ return false;
}
ccl_device int volume_stack_sampling_method(KernelGlobals *kg, VolumeStack *stack)
{
- if(kernel_data.integrator.num_all_lights == 0)
- return 0;
+ if (kernel_data.integrator.num_all_lights == 0)
+ return 0;
- int method = -1;
+ int method = -1;
- for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
- int shader_flag = kernel_tex_fetch(__shaders, (stack[i].shader & SHADER_MASK)).flags;
+ for (int i = 0; stack[i].shader != SHADER_NONE; i++) {
+ int shader_flag = kernel_tex_fetch(__shaders, (stack[i].shader & SHADER_MASK)).flags;
- if(shader_flag & SD_VOLUME_MIS) {
- return SD_VOLUME_MIS;
- }
- else if(shader_flag & SD_VOLUME_EQUIANGULAR) {
- if(method == 0)
- return SD_VOLUME_MIS;
+ if (shader_flag & SD_VOLUME_MIS) {
+ return SD_VOLUME_MIS;
+ }
+ else if (shader_flag & SD_VOLUME_EQUIANGULAR) {
+ if (method == 0)
+ return SD_VOLUME_MIS;
- method = SD_VOLUME_EQUIANGULAR;
- }
- else {
- if(method == SD_VOLUME_EQUIANGULAR)
- return SD_VOLUME_MIS;
+ method = SD_VOLUME_EQUIANGULAR;
+ }
+ else {
+ if (method == SD_VOLUME_EQUIANGULAR)
+ return SD_VOLUME_MIS;
- method = 0;
- }
- }
+ method = 0;
+ }
+ }
- return method;
+ return method;
}
ccl_device_inline void kernel_volume_step_init(KernelGlobals *kg,
@@ -162,16 +162,16 @@ ccl_device_inline void kernel_volume_step_init(KernelGlobals *kg,
float *step_size,
float *step_offset)
{
- const int max_steps = kernel_data.integrator.volume_max_steps;
- float step = min(kernel_data.integrator.volume_step_size, t);
+ const int max_steps = kernel_data.integrator.volume_max_steps;
+ float step = min(kernel_data.integrator.volume_step_size, t);
- /* compute exact steps in advance for malloc */
- if(t > max_steps * step) {
- step = t / (float)max_steps;
- }
+ /* compute exact steps in advance for malloc */
+ if (t > max_steps * step) {
+ step = t / (float)max_steps;
+ }
- *step_size = step;
- *step_offset = path_state_rng_1D_hash(kg, state, 0x1e31d8a4) * step;
+ *step_size = step;
+ *step_offset = path_state_rng_1D_hash(kg, state, 0x1e31d8a4) * step;
}
/* Volume Shadows
@@ -187,10 +187,10 @@ ccl_device void kernel_volume_shadow_homogeneous(KernelGlobals *kg,
ShaderData *sd,
float3 *throughput)
{
- float3 sigma_t;
+ float3 sigma_t;
- if(volume_shader_extinction_sample(kg, sd, state, ray->P, &sigma_t))
- *throughput *= volume_color_transmittance(sigma_t, ray->t);
+ if (volume_shader_extinction_sample(kg, sd, state, ray->P, &sigma_t))
+ *throughput *= volume_color_transmittance(sigma_t, ray->t);
}
/* heterogeneous volume: integrate stepping through the volume until we
@@ -201,57 +201,57 @@ ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg,
ShaderData *sd,
float3 *throughput)
{
- float3 tp = *throughput;
- const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
-
- /* prepare for stepping */
- int max_steps = kernel_data.integrator.volume_max_steps;
- float step_offset, step_size;
- kernel_volume_step_init(kg, state, ray->t, &step_size, &step_offset);
-
- /* compute extinction at the start */
- float t = 0.0f;
-
- float3 sum = make_float3(0.0f, 0.0f, 0.0f);
-
- for(int i = 0; i < max_steps; i++) {
- /* advance to new position */
- float new_t = min(ray->t, (i+1) * step_size);
-
- /* use random position inside this segment to sample shader, adjust
- * for last step that is shorter than other steps. */
- if(new_t == ray->t) {
- step_offset *= (new_t - t) / step_size;
- }
-
- float3 new_P = ray->P + ray->D * (t + step_offset);
- float3 sigma_t;
-
- /* compute attenuation over segment */
- if(volume_shader_extinction_sample(kg, sd, state, new_P, &sigma_t)) {
- /* Compute expf() only for every Nth step, to save some calculations
- * because exp(a)*exp(b) = exp(a+b), also do a quick tp_eps check then. */
-
- sum += (-sigma_t * (new_t - t));
- if((i & 0x07) == 0) { /* ToDo: Other interval? */
- tp = *throughput * exp3(sum);
-
- /* stop if nearly all light is blocked */
- if(tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps)
- break;
- }
- }
-
- /* stop if at the end of the volume */
- t = new_t;
- if(t == ray->t) {
- /* Update throughput in case we haven't done it above */
- tp = *throughput * exp3(sum);
- break;
- }
- }
-
- *throughput = tp;
+ float3 tp = *throughput;
+ const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
+
+ /* prepare for stepping */
+ int max_steps = kernel_data.integrator.volume_max_steps;
+ float step_offset, step_size;
+ kernel_volume_step_init(kg, state, ray->t, &step_size, &step_offset);
+
+ /* compute extinction at the start */
+ float t = 0.0f;
+
+ float3 sum = make_float3(0.0f, 0.0f, 0.0f);
+
+ for (int i = 0; i < max_steps; i++) {
+ /* advance to new position */
+ float new_t = min(ray->t, (i + 1) * step_size);
+
+ /* use random position inside this segment to sample shader, adjust
+ * for last step that is shorter than other steps. */
+ if (new_t == ray->t) {
+ step_offset *= (new_t - t) / step_size;
+ }
+
+ float3 new_P = ray->P + ray->D * (t + step_offset);
+ float3 sigma_t;
+
+ /* compute attenuation over segment */
+ if (volume_shader_extinction_sample(kg, sd, state, new_P, &sigma_t)) {
+ /* Compute expf() only for every Nth step, to save some calculations
+ * because exp(a)*exp(b) = exp(a+b), also do a quick tp_eps check then. */
+
+ sum += (-sigma_t * (new_t - t));
+ if ((i & 0x07) == 0) { /* ToDo: Other interval? */
+ tp = *throughput * exp3(sum);
+
+ /* stop if nearly all light is blocked */
+ if (tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps)
+ break;
+ }
+ }
+
+ /* stop if at the end of the volume */
+ t = new_t;
+ if (t == ray->t) {
+ /* Update throughput in case we haven't done it above */
+ tp = *throughput * exp3(sum);
+ break;
+ }
+ }
+
+ *throughput = tp;
}
/* get the volume attenuation over line segment defined by ray, with the
@@ -262,422 +262,433 @@ ccl_device_noinline void kernel_volume_shadow(KernelGlobals *kg,
Ray *ray,
float3 *throughput)
{
- shader_setup_from_volume(kg, shadow_sd, ray);
+ shader_setup_from_volume(kg, shadow_sd, ray);
- if(volume_stack_is_heterogeneous(kg, state->volume_stack))
- kernel_volume_shadow_heterogeneous(kg, state, ray, shadow_sd, throughput);
- else
- kernel_volume_shadow_homogeneous(kg, state, ray, shadow_sd, throughput);
+ if (volume_stack_is_heterogeneous(kg, state->volume_stack))
+ kernel_volume_shadow_heterogeneous(kg, state, ray, shadow_sd, throughput);
+ else
+ kernel_volume_shadow_homogeneous(kg, state, ray, shadow_sd, throughput);
}
-#endif /* __VOLUME__ */
+#endif /* __VOLUME__ */
/* Equi-angular sampling as in:
* "Importance Sampling Techniques for Path Tracing in Participating Media" */
ccl_device float kernel_volume_equiangular_sample(Ray *ray, float3 light_P, float xi, float *pdf)
{
- float t = ray->t;
-
- float delta = dot((light_P - ray->P) , ray->D);
- float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta);
- if(UNLIKELY(D == 0.0f)) {
- *pdf = 0.0f;
- return 0.0f;
- }
- float theta_a = -atan2f(delta, D);
- float theta_b = atan2f(t - delta, D);
- float t_ = D * tanf((xi * theta_b) + (1 - xi) * theta_a);
- if(UNLIKELY(theta_b == theta_a)) {
- *pdf = 0.0f;
- return 0.0f;
- }
- *pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));
-
- return min(t, delta + t_); /* min is only for float precision errors */
+ float t = ray->t;
+
+ float delta = dot((light_P - ray->P), ray->D);
+ float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta);
+ if (UNLIKELY(D == 0.0f)) {
+ *pdf = 0.0f;
+ return 0.0f;
+ }
+ float theta_a = -atan2f(delta, D);
+ float theta_b = atan2f(t - delta, D);
+ float t_ = D * tanf((xi * theta_b) + (1 - xi) * theta_a);
+ if (UNLIKELY(theta_b == theta_a)) {
+ *pdf = 0.0f;
+ return 0.0f;
+ }
+ *pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));
+
+ return min(t, delta + t_); /* min is only for float precision errors */
}
ccl_device float kernel_volume_equiangular_pdf(Ray *ray, float3 light_P, float sample_t)
{
- float delta = dot((light_P - ray->P) , ray->D);
- float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta);
- if(UNLIKELY(D == 0.0f)) {
- return 0.0f;
- }
+ float delta = dot((light_P - ray->P), ray->D);
+ float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta);
+ if (UNLIKELY(D == 0.0f)) {
+ return 0.0f;
+ }
- float t = ray->t;
- float t_ = sample_t - delta;
+ float t = ray->t;
+ float t_ = sample_t - delta;
- float theta_a = -atan2f(delta, D);
- float theta_b = atan2f(t - delta, D);
- if(UNLIKELY(theta_b == theta_a)) {
- return 0.0f;
- }
+ float theta_a = -atan2f(delta, D);
+ float theta_b = atan2f(t - delta, D);
+ if (UNLIKELY(theta_b == theta_a)) {
+ return 0.0f;
+ }
- float pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));
+ float pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));
- return pdf;
+ return pdf;
}
/* Distance sampling */
-ccl_device float kernel_volume_distance_sample(float max_t, float3 sigma_t, int channel, float xi, float3 *transmittance, float3 *pdf)
+ccl_device float kernel_volume_distance_sample(
+ float max_t, float3 sigma_t, int channel, float xi, float3 *transmittance, float3 *pdf)
{
- /* xi is [0, 1[ so log(0) should never happen, division by zero is
- * avoided because sample_sigma_t > 0 when SD_SCATTER is set */
- float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel);
- float3 full_transmittance = volume_color_transmittance(sigma_t, max_t);
- float sample_transmittance = kernel_volume_channel_get(full_transmittance, channel);
+ /* xi is [0, 1[ so log(0) should never happen, division by zero is
+ * avoided because sample_sigma_t > 0 when SD_SCATTER is set */
+ float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel);
+ float3 full_transmittance = volume_color_transmittance(sigma_t, max_t);
+ float sample_transmittance = kernel_volume_channel_get(full_transmittance, channel);
- float sample_t = min(max_t, -logf(1.0f - xi*(1.0f - sample_transmittance))/sample_sigma_t);
+ float sample_t = min(max_t, -logf(1.0f - xi * (1.0f - sample_transmittance)) / sample_sigma_t);
- *transmittance = volume_color_transmittance(sigma_t, sample_t);
- *pdf = safe_divide_color(sigma_t * *transmittance, make_float3(1.0f, 1.0f, 1.0f) - full_transmittance);
+ *transmittance = volume_color_transmittance(sigma_t, sample_t);
+ *pdf = safe_divide_color(sigma_t * *transmittance,
+ make_float3(1.0f, 1.0f, 1.0f) - full_transmittance);
- /* todo: optimization: when taken together with hit/miss decision,
- * the full_transmittance cancels out drops out and xi does not
- * need to be remapped */
+ /* todo: optimization: when taken together with hit/miss decision,
+ * the full_transmittance cancels out drops out and xi does not
+ * need to be remapped */
- return sample_t;
+ return sample_t;
}
ccl_device float3 kernel_volume_distance_pdf(float max_t, float3 sigma_t, float sample_t)
{
- float3 full_transmittance = volume_color_transmittance(sigma_t, max_t);
- float3 transmittance = volume_color_transmittance(sigma_t, sample_t);
+ float3 full_transmittance = volume_color_transmittance(sigma_t, max_t);
+ float3 transmittance = volume_color_transmittance(sigma_t, sample_t);
- return safe_divide_color(sigma_t * transmittance, make_float3(1.0f, 1.0f, 1.0f) - full_transmittance);
+ return safe_divide_color(sigma_t * transmittance,
+ make_float3(1.0f, 1.0f, 1.0f) - full_transmittance);
}
/* Emission */
-ccl_device float3 kernel_volume_emission_integrate(VolumeShaderCoefficients *coeff, int closure_flag, float3 transmittance, float t)
+ccl_device float3 kernel_volume_emission_integrate(VolumeShaderCoefficients *coeff,
+ int closure_flag,
+ float3 transmittance,
+ float t)
{
- /* integral E * exp(-sigma_t * t) from 0 to t = E * (1 - exp(-sigma_t * t))/sigma_t
- * this goes to E * t as sigma_t goes to zero
- *
- * todo: we should use an epsilon to avoid precision issues near zero sigma_t */
- float3 emission = coeff->emission;
-
- if(closure_flag & SD_EXTINCTION) {
- float3 sigma_t = coeff->sigma_t;
-
- emission.x *= (sigma_t.x > 0.0f)? (1.0f - transmittance.x)/sigma_t.x: t;
- emission.y *= (sigma_t.y > 0.0f)? (1.0f - transmittance.y)/sigma_t.y: t;
- emission.z *= (sigma_t.z > 0.0f)? (1.0f - transmittance.z)/sigma_t.z: t;
- }
- else
- emission *= t;
-
- return emission;
+ /* integral E * exp(-sigma_t * t) from 0 to t = E * (1 - exp(-sigma_t * t))/sigma_t
+ * this goes to E * t as sigma_t goes to zero
+ *
+ * todo: we should use an epsilon to avoid precision issues near zero sigma_t */
+ float3 emission = coeff->emission;
+
+ if (closure_flag & SD_EXTINCTION) {
+ float3 sigma_t = coeff->sigma_t;
+
+ emission.x *= (sigma_t.x > 0.0f) ? (1.0f - transmittance.x) / sigma_t.x : t;
+ emission.y *= (sigma_t.y > 0.0f) ? (1.0f - transmittance.y) / sigma_t.y : t;
+ emission.z *= (sigma_t.z > 0.0f) ? (1.0f - transmittance.z) / sigma_t.z : t;
+ }
+ else
+ emission *= t;
+
+ return emission;
}
/* Volume Path */
-ccl_device int kernel_volume_sample_channel(float3 albedo, float3 throughput, float rand, float3 *pdf)
+ccl_device int kernel_volume_sample_channel(float3 albedo,
+ float3 throughput,
+ float rand,
+ float3 *pdf)
{
- /* Sample color channel proportional to throughput and single scattering
- * albedo, to significantly reduce noise with many bounce, following:
- *
- * "Practical and Controllable Subsurface Scattering for Production Path
- * Tracing". Matt Jen-Yuan Chiang, Peter Kutz, Brent Burley. SIGGRAPH 2016. */
- float3 weights = fabs(throughput * albedo);
- float sum_weights = weights.x + weights.y + weights.z;
- float3 weights_pdf;
-
- if(sum_weights > 0.0f) {
- weights_pdf = weights/sum_weights;
- }
- else {
- weights_pdf = make_float3(1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f);
- }
-
- *pdf = weights_pdf;
-
- /* OpenCL does not support -> on float3, so don't use pdf->x. */
- if(rand < weights_pdf.x) {
- return 0;
- }
- else if(rand < weights_pdf.x + weights_pdf.y) {
- return 1;
- }
- else {
- return 2;
- }
+ /* Sample color channel proportional to throughput and single scattering
+ * albedo, to significantly reduce noise with many bounce, following:
+ *
+ * "Practical and Controllable Subsurface Scattering for Production Path
+ * Tracing". Matt Jen-Yuan Chiang, Peter Kutz, Brent Burley. SIGGRAPH 2016. */
+ float3 weights = fabs(throughput * albedo);
+ float sum_weights = weights.x + weights.y + weights.z;
+ float3 weights_pdf;
+
+ if (sum_weights > 0.0f) {
+ weights_pdf = weights / sum_weights;
+ }
+ else {
+ weights_pdf = make_float3(1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f);
+ }
+
+ *pdf = weights_pdf;
+
+ /* OpenCL does not support -> on float3, so don't use pdf->x. */
+ if (rand < weights_pdf.x) {
+ return 0;
+ }
+ else if (rand < weights_pdf.x + weights_pdf.y) {
+ return 1;
+ }
+ else {
+ return 2;
+ }
}
#ifdef __VOLUME__
/* homogeneous volume: assume shader evaluation at the start gives
* the volume shading coefficient for the entire line segment */
-ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(
- KernelGlobals *kg,
- ccl_addr_space PathState *state,
- Ray *ray,
- ShaderData *sd,
- PathRadiance *L,
- ccl_addr_space float3 *throughput,
- bool probalistic_scatter)
+ccl_device VolumeIntegrateResult
+kernel_volume_integrate_homogeneous(KernelGlobals *kg,
+ ccl_addr_space PathState *state,
+ Ray *ray,
+ ShaderData *sd,
+ PathRadiance *L,
+ ccl_addr_space float3 *throughput,
+ bool probalistic_scatter)
{
- VolumeShaderCoefficients coeff;
-
- if(!volume_shader_sample(kg, sd, state, ray->P, &coeff))
- return VOLUME_PATH_MISSED;
-
- int closure_flag = sd->flag;
- float t = ray->t;
- float3 new_tp;
-
-#ifdef __VOLUME_SCATTER__
- /* randomly scatter, and if we do t is shortened */
- if(closure_flag & SD_SCATTER) {
- /* Sample channel, use MIS with balance heuristic. */
- float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
- float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t);
- float3 channel_pdf;
- int channel = kernel_volume_sample_channel(albedo, *throughput, rphase, &channel_pdf);
-
- /* decide if we will hit or miss */
- bool scatter = true;
- float xi = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
-
- if(probalistic_scatter) {
- float sample_sigma_t = kernel_volume_channel_get(coeff.sigma_t, channel);
- float sample_transmittance = expf(-sample_sigma_t * t);
-
- if(1.0f - xi >= sample_transmittance) {
- scatter = true;
-
- /* rescale random number so we can reuse it */
- xi = 1.0f - (1.0f - xi - sample_transmittance)/(1.0f - sample_transmittance);
-
- }
- else
- scatter = false;
- }
-
- if(scatter) {
- /* scattering */
- float3 pdf;
- float3 transmittance;
- float sample_t;
-
- /* distance sampling */
- sample_t = kernel_volume_distance_sample(ray->t, coeff.sigma_t, channel, xi, &transmittance, &pdf);
-
- /* modify pdf for hit/miss decision */
- if(probalistic_scatter)
- pdf *= make_float3(1.0f, 1.0f, 1.0f) - volume_color_transmittance(coeff.sigma_t, t);
-
- new_tp = *throughput * coeff.sigma_s * transmittance / dot(channel_pdf, pdf);
- t = sample_t;
- }
- else {
- /* no scattering */
- float3 transmittance = volume_color_transmittance(coeff.sigma_t, t);
- float pdf = dot(channel_pdf, transmittance);
- new_tp = *throughput * transmittance / pdf;
- }
- }
- else
-#endif
- if(closure_flag & SD_EXTINCTION) {
- /* absorption only, no sampling needed */
- float3 transmittance = volume_color_transmittance(coeff.sigma_t, t);
- new_tp = *throughput * transmittance;
- }
- else {
- new_tp = *throughput;
- }
-
- /* integrate emission attenuated by extinction */
- if(L && (closure_flag & SD_EMISSION)) {
- float3 transmittance = volume_color_transmittance(coeff.sigma_t, ray->t);
- float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, ray->t);
- path_radiance_accum_emission(L, state, *throughput, emission);
- }
-
- /* modify throughput */
- if(closure_flag & SD_EXTINCTION) {
- *throughput = new_tp;
-
- /* prepare to scatter to new direction */
- if(t < ray->t) {
- /* adjust throughput and move to new location */
- sd->P = ray->P + t*ray->D;
-
- return VOLUME_PATH_SCATTERED;
- }
- }
-
- return VOLUME_PATH_ATTENUATED;
+ VolumeShaderCoefficients coeff;
+
+ if (!volume_shader_sample(kg, sd, state, ray->P, &coeff))
+ return VOLUME_PATH_MISSED;
+
+ int closure_flag = sd->flag;
+ float t = ray->t;
+ float3 new_tp;
+
+# ifdef __VOLUME_SCATTER__
+ /* randomly scatter, and if we do t is shortened */
+ if (closure_flag & SD_SCATTER) {
+ /* Sample channel, use MIS with balance heuristic. */
+ float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
+ float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t);
+ float3 channel_pdf;
+ int channel = kernel_volume_sample_channel(albedo, *throughput, rphase, &channel_pdf);
+
+ /* decide if we will hit or miss */
+ bool scatter = true;
+ float xi = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
+
+ if (probalistic_scatter) {
+ float sample_sigma_t = kernel_volume_channel_get(coeff.sigma_t, channel);
+ float sample_transmittance = expf(-sample_sigma_t * t);
+
+ if (1.0f - xi >= sample_transmittance) {
+ scatter = true;
+
+ /* rescale random number so we can reuse it */
+ xi = 1.0f - (1.0f - xi - sample_transmittance) / (1.0f - sample_transmittance);
+ }
+ else
+ scatter = false;
+ }
+
+ if (scatter) {
+ /* scattering */
+ float3 pdf;
+ float3 transmittance;
+ float sample_t;
+
+ /* distance sampling */
+ sample_t = kernel_volume_distance_sample(
+ ray->t, coeff.sigma_t, channel, xi, &transmittance, &pdf);
+
+ /* modify pdf for hit/miss decision */
+ if (probalistic_scatter)
+ pdf *= make_float3(1.0f, 1.0f, 1.0f) - volume_color_transmittance(coeff.sigma_t, t);
+
+ new_tp = *throughput * coeff.sigma_s * transmittance / dot(channel_pdf, pdf);
+ t = sample_t;
+ }
+ else {
+ /* no scattering */
+ float3 transmittance = volume_color_transmittance(coeff.sigma_t, t);
+ float pdf = dot(channel_pdf, transmittance);
+ new_tp = *throughput * transmittance / pdf;
+ }
+ }
+ else
+# endif
+ if (closure_flag & SD_EXTINCTION) {
+ /* absorption only, no sampling needed */
+ float3 transmittance = volume_color_transmittance(coeff.sigma_t, t);
+ new_tp = *throughput * transmittance;
+ }
+ else {
+ new_tp = *throughput;
+ }
+
+ /* integrate emission attenuated by extinction */
+ if (L && (closure_flag & SD_EMISSION)) {
+ float3 transmittance = volume_color_transmittance(coeff.sigma_t, ray->t);
+ float3 emission = kernel_volume_emission_integrate(
+ &coeff, closure_flag, transmittance, ray->t);
+ path_radiance_accum_emission(L, state, *throughput, emission);
+ }
+
+ /* modify throughput */
+ if (closure_flag & SD_EXTINCTION) {
+ *throughput = new_tp;
+
+ /* prepare to scatter to new direction */
+ if (t < ray->t) {
+ /* adjust throughput and move to new location */
+ sd->P = ray->P + t * ray->D;
+
+ return VOLUME_PATH_SCATTERED;
+ }
+ }
+
+ return VOLUME_PATH_ATTENUATED;
}
/* heterogeneous volume distance sampling: integrate stepping through the
* volume until we reach the end, get absorbed entirely, or run out of
* iterations. this does probabilistically scatter or get transmitted through
* for path tracing where we don't want to branch. */
-ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance(
- KernelGlobals *kg,
- ccl_addr_space PathState *state,
- Ray *ray,
- ShaderData *sd,
- PathRadiance *L,
- ccl_addr_space float3 *throughput)
+ccl_device VolumeIntegrateResult
+kernel_volume_integrate_heterogeneous_distance(KernelGlobals *kg,
+ ccl_addr_space PathState *state,
+ Ray *ray,
+ ShaderData *sd,
+ PathRadiance *L,
+ ccl_addr_space float3 *throughput)
{
- float3 tp = *throughput;
- const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
-
- /* prepare for stepping */
- int max_steps = kernel_data.integrator.volume_max_steps;
- float step_offset, step_size;
- kernel_volume_step_init(kg, state, ray->t, &step_size, &step_offset);
-
- /* compute coefficients at the start */
- float t = 0.0f;
- float3 accum_transmittance = make_float3(1.0f, 1.0f, 1.0f);
-
- /* pick random color channel, we use the Veach one-sample
- * model with balance heuristic for the channels */
- float xi = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
- float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
- bool has_scatter = false;
-
- for(int i = 0; i < max_steps; i++) {
- /* advance to new position */
- float new_t = min(ray->t, (i+1) * step_size);
- float dt = new_t - t;
-
- /* use random position inside this segment to sample shader,
- * for last shorter step we remap it to fit within the segment. */
- if(new_t == ray->t) {
- step_offset *= (new_t - t) / step_size;
- }
-
- float3 new_P = ray->P + ray->D * (t + step_offset);
- VolumeShaderCoefficients coeff;
-
- /* compute segment */
- if(volume_shader_sample(kg, sd, state, new_P, &coeff)) {
- int closure_flag = sd->flag;
- float3 new_tp;
- float3 transmittance;
- bool scatter = false;
-
- /* distance sampling */
-#ifdef __VOLUME_SCATTER__
- if((closure_flag & SD_SCATTER) || (has_scatter && (closure_flag & SD_EXTINCTION))) {
- has_scatter = true;
-
- /* Sample channel, use MIS with balance heuristic. */
- float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t);
- float3 channel_pdf;
- int channel = kernel_volume_sample_channel(albedo, tp, rphase, &channel_pdf);
-
- /* compute transmittance over full step */
- transmittance = volume_color_transmittance(coeff.sigma_t, dt);
-
- /* decide if we will scatter or continue */
- float sample_transmittance = kernel_volume_channel_get(transmittance, channel);
-
- if(1.0f - xi >= sample_transmittance) {
- /* compute sampling distance */
- float sample_sigma_t = kernel_volume_channel_get(coeff.sigma_t, channel);
- float new_dt = -logf(1.0f - xi)/sample_sigma_t;
- new_t = t + new_dt;
-
- /* transmittance and pdf */
- float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt);
- float3 pdf = coeff.sigma_t * new_transmittance;
-
- /* throughput */
- new_tp = tp * coeff.sigma_s * new_transmittance / dot(channel_pdf, pdf);
- scatter = true;
- }
- else {
- /* throughput */
- float pdf = dot(channel_pdf, transmittance);
- new_tp = tp * transmittance / pdf;
-
- /* remap xi so we can reuse it and keep thing stratified */
- xi = 1.0f - (1.0f - xi)/sample_transmittance;
- }
- }
- else
-#endif
- if(closure_flag & SD_EXTINCTION) {
- /* absorption only, no sampling needed */
- transmittance = volume_color_transmittance(coeff.sigma_t, dt);
- new_tp = tp * transmittance;
- }
- else {
- new_tp = tp;
- }
-
- /* integrate emission attenuated by absorption */
- if(L && (closure_flag & SD_EMISSION)) {
- float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, dt);
- path_radiance_accum_emission(L, state, tp, emission);
- }
-
- /* modify throughput */
- if(closure_flag & SD_EXTINCTION) {
- tp = new_tp;
-
- /* stop if nearly all light blocked */
- if(tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps) {
- tp = make_float3(0.0f, 0.0f, 0.0f);
- break;
- }
- }
-
- /* prepare to scatter to new direction */
- if(scatter) {
- /* adjust throughput and move to new location */
- sd->P = ray->P + new_t*ray->D;
- *throughput = tp;
-
- return VOLUME_PATH_SCATTERED;
- }
- else {
- /* accumulate transmittance */
- accum_transmittance *= transmittance;
- }
- }
-
- /* stop if at the end of the volume */
- t = new_t;
- if(t == ray->t)
- break;
- }
-
- *throughput = tp;
-
- return VOLUME_PATH_ATTENUATED;
+ float3 tp = *throughput;
+ const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
+
+ /* prepare for stepping */
+ int max_steps = kernel_data.integrator.volume_max_steps;
+ float step_offset, step_size;
+ kernel_volume_step_init(kg, state, ray->t, &step_size, &step_offset);
+
+ /* compute coefficients at the start */
+ float t = 0.0f;
+ float3 accum_transmittance = make_float3(1.0f, 1.0f, 1.0f);
+
+ /* pick random color channel, we use the Veach one-sample
+ * model with balance heuristic for the channels */
+ float xi = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
+ float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
+ bool has_scatter = false;
+
+ for (int i = 0; i < max_steps; i++) {
+ /* advance to new position */
+ float new_t = min(ray->t, (i + 1) * step_size);
+ float dt = new_t - t;
+
+ /* use random position inside this segment to sample shader,
+ * for last shorter step we remap it to fit within the segment. */
+ if (new_t == ray->t) {
+ step_offset *= (new_t - t) / step_size;
+ }
+
+ float3 new_P = ray->P + ray->D * (t + step_offset);
+ VolumeShaderCoefficients coeff;
+
+ /* compute segment */
+ if (volume_shader_sample(kg, sd, state, new_P, &coeff)) {
+ int closure_flag = sd->flag;
+ float3 new_tp;
+ float3 transmittance;
+ bool scatter = false;
+
+ /* distance sampling */
+# ifdef __VOLUME_SCATTER__
+ if ((closure_flag & SD_SCATTER) || (has_scatter && (closure_flag & SD_EXTINCTION))) {
+ has_scatter = true;
+
+ /* Sample channel, use MIS with balance heuristic. */
+ float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t);
+ float3 channel_pdf;
+ int channel = kernel_volume_sample_channel(albedo, tp, rphase, &channel_pdf);
+
+ /* compute transmittance over full step */
+ transmittance = volume_color_transmittance(coeff.sigma_t, dt);
+
+ /* decide if we will scatter or continue */
+ float sample_transmittance = kernel_volume_channel_get(transmittance, channel);
+
+ if (1.0f - xi >= sample_transmittance) {
+ /* compute sampling distance */
+ float sample_sigma_t = kernel_volume_channel_get(coeff.sigma_t, channel);
+ float new_dt = -logf(1.0f - xi) / sample_sigma_t;
+ new_t = t + new_dt;
+
+ /* transmittance and pdf */
+ float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt);
+ float3 pdf = coeff.sigma_t * new_transmittance;
+
+ /* throughput */
+ new_tp = tp * coeff.sigma_s * new_transmittance / dot(channel_pdf, pdf);
+ scatter = true;
+ }
+ else {
+ /* throughput */
+ float pdf = dot(channel_pdf, transmittance);
+ new_tp = tp * transmittance / pdf;
+
+ /* remap xi so we can reuse it and keep thing stratified */
+ xi = 1.0f - (1.0f - xi) / sample_transmittance;
+ }
+ }
+ else
+# endif
+ if (closure_flag & SD_EXTINCTION) {
+ /* absorption only, no sampling needed */
+ transmittance = volume_color_transmittance(coeff.sigma_t, dt);
+ new_tp = tp * transmittance;
+ }
+ else {
+ new_tp = tp;
+ }
+
+ /* integrate emission attenuated by absorption */
+ if (L && (closure_flag & SD_EMISSION)) {
+ float3 emission = kernel_volume_emission_integrate(
+ &coeff, closure_flag, transmittance, dt);
+ path_radiance_accum_emission(L, state, tp, emission);
+ }
+
+ /* modify throughput */
+ if (closure_flag & SD_EXTINCTION) {
+ tp = new_tp;
+
+ /* stop if nearly all light blocked */
+ if (tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps) {
+ tp = make_float3(0.0f, 0.0f, 0.0f);
+ break;
+ }
+ }
+
+ /* prepare to scatter to new direction */
+ if (scatter) {
+ /* adjust throughput and move to new location */
+ sd->P = ray->P + new_t * ray->D;
+ *throughput = tp;
+
+ return VOLUME_PATH_SCATTERED;
+ }
+ else {
+ /* accumulate transmittance */
+ accum_transmittance *= transmittance;
+ }
+ }
+
+ /* stop if at the end of the volume */
+ t = new_t;
+ if (t == ray->t)
+ break;
+ }
+
+ *throughput = tp;
+
+ return VOLUME_PATH_ATTENUATED;
}
/* get the volume attenuation and emission over line segment defined by
* ray, with the assumption that there are no surfaces blocking light
* between the endpoints. distance sampling is used to decide if we will
* scatter or not. */
-ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(
- KernelGlobals *kg,
- ccl_addr_space PathState *state,
- ShaderData *sd,
- Ray *ray,
- PathRadiance *L,
- ccl_addr_space float3 *throughput,
- bool heterogeneous)
+ccl_device_noinline VolumeIntegrateResult
+kernel_volume_integrate(KernelGlobals *kg,
+ ccl_addr_space PathState *state,
+ ShaderData *sd,
+ Ray *ray,
+ PathRadiance *L,
+ ccl_addr_space float3 *throughput,
+ bool heterogeneous)
{
- shader_setup_from_volume(kg, sd, ray);
+ shader_setup_from_volume(kg, sd, ray);
- if(heterogeneous)
- return kernel_volume_integrate_heterogeneous_distance(kg, state, ray, sd, L, throughput);
- else
- return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, true);
+ if (heterogeneous)
+ return kernel_volume_integrate_heterogeneous_distance(kg, state, ray, sd, L, throughput);
+ else
+ return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, true);
}
-#ifndef __SPLIT_KERNEL__
+# ifndef __SPLIT_KERNEL__
/* Decoupled Volume Sampling
*
* VolumeSegment is list of coefficients and transmittance stored at all steps
@@ -689,26 +700,26 @@ ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(
* no support for malloc/free and too much stack usage with a fix size array. */
typedef struct VolumeStep {
- float3 sigma_s; /* scatter coefficient */
- float3 sigma_t; /* extinction coefficient */
- float3 accum_transmittance; /* accumulated transmittance including this step */
- float3 cdf_distance; /* cumulative density function for distance sampling */
- float t; /* distance at end of this step */
- float shade_t; /* jittered distance where shading was done in step */
- int closure_flag; /* shader evaluation closure flags */
+ float3 sigma_s; /* scatter coefficient */
+ float3 sigma_t; /* extinction coefficient */
+ float3 accum_transmittance; /* accumulated transmittance including this step */
+ float3 cdf_distance; /* cumulative density function for distance sampling */
+ float t; /* distance at end of this step */
+ float shade_t; /* jittered distance where shading was done in step */
+ int closure_flag; /* shader evaluation closure flags */
} VolumeStep;
typedef struct VolumeSegment {
- VolumeStep stack_step; /* stack storage for homogeneous step, to avoid malloc */
- VolumeStep *steps; /* recorded steps */
- int numsteps; /* number of steps */
- int closure_flag; /* accumulated closure flags from all steps */
+ VolumeStep stack_step; /* stack storage for homogeneous step, to avoid malloc */
+ VolumeStep *steps; /* recorded steps */
+ int numsteps; /* number of steps */
+ int closure_flag; /* accumulated closure flags from all steps */
- float3 accum_emission; /* accumulated emission at end of segment */
- float3 accum_transmittance; /* accumulated transmittance at end of segment */
- float3 accum_albedo; /* accumulated average albedo over segment */
+ float3 accum_emission; /* accumulated emission at end of segment */
+ float3 accum_transmittance; /* accumulated transmittance at end of segment */
+ float3 accum_albedo; /* accumulated average albedo over segment */
- int sampling_method; /* volume sampling method */
+ int sampling_method; /* volume sampling method */
} VolumeSegment;
/* record volume steps to the end of the volume.
@@ -717,400 +728,412 @@ typedef struct VolumeSegment {
* but the entire segment is needed to do always scattering, rather than probabilistically
* hitting or missing the volume. if we don't know the transmittance at the end of the
* volume we can't generate stratified distance samples up to that transmittance */
-#ifdef __VOLUME_DECOUPLED__
-ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *state,
- Ray *ray, ShaderData *sd, VolumeSegment *segment, bool heterogeneous)
+# ifdef __VOLUME_DECOUPLED__
+ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg,
+ PathState *state,
+ Ray *ray,
+ ShaderData *sd,
+ VolumeSegment *segment,
+ bool heterogeneous)
{
- const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
-
- /* prepare for volume stepping */
- int max_steps;
- float step_size, step_offset;
-
- if(heterogeneous) {
- max_steps = kernel_data.integrator.volume_max_steps;
- kernel_volume_step_init(kg, state, ray->t, &step_size, &step_offset);
-
-#ifdef __KERNEL_CPU__
- /* NOTE: For the branched path tracing it's possible to have direct
- * and indirect light integration both having volume segments allocated.
- * We detect this using index in the pre-allocated memory. Currently we
- * only support two segments allocated at a time, if more needed some
- * modifications to the KernelGlobals will be needed.
- *
- * This gives us restrictions that decoupled record should only happen
- * in the stack manner, meaning if there's subsequent call of decoupled
- * record it'll need to free memory before it's caller frees memory.
- */
- const int index = kg->decoupled_volume_steps_index;
- assert(index < sizeof(kg->decoupled_volume_steps) /
- sizeof(*kg->decoupled_volume_steps));
- if(kg->decoupled_volume_steps[index] == NULL) {
- kg->decoupled_volume_steps[index] =
- (VolumeStep*)malloc(sizeof(VolumeStep)*max_steps);
- }
- segment->steps = kg->decoupled_volume_steps[index];
- ++kg->decoupled_volume_steps_index;
-#else
- segment->steps = (VolumeStep*)malloc(sizeof(VolumeStep)*max_steps);
-#endif
- }
- else {
- max_steps = 1;
- step_size = ray->t;
- step_offset = 0.0f;
- segment->steps = &segment->stack_step;
- }
-
- /* init accumulation variables */
- float3 accum_emission = make_float3(0.0f, 0.0f, 0.0f);
- float3 accum_transmittance = make_float3(1.0f, 1.0f, 1.0f);
- float3 accum_albedo = make_float3(0.0f, 0.0f, 0.0f);
- float3 cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
- float t = 0.0f;
-
- segment->numsteps = 0;
- segment->closure_flag = 0;
- bool is_last_step_empty = false;
-
- VolumeStep *step = segment->steps;
-
- for(int i = 0; i < max_steps; i++, step++) {
- /* advance to new position */
- float new_t = min(ray->t, (i+1) * step_size);
- float dt = new_t - t;
-
- /* use random position inside this segment to sample shader,
- * for last shorter step we remap it to fit within the segment. */
- if(new_t == ray->t) {
- step_offset *= (new_t - t) / step_size;
- }
-
- float3 new_P = ray->P + ray->D * (t + step_offset);
- VolumeShaderCoefficients coeff;
-
- /* compute segment */
- if(volume_shader_sample(kg, sd, state, new_P, &coeff)) {
- int closure_flag = sd->flag;
- float3 sigma_t = coeff.sigma_t;
-
- /* compute average albedo for channel sampling */
- if(closure_flag & SD_SCATTER) {
- accum_albedo += dt * safe_divide_color(coeff.sigma_s, sigma_t);
- }
-
- /* compute accumulated transmittance */
- float3 transmittance = volume_color_transmittance(sigma_t, dt);
-
- /* compute emission attenuated by absorption */
- if(closure_flag & SD_EMISSION) {
- float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, dt);
- accum_emission += accum_transmittance * emission;
- }
-
- accum_transmittance *= transmittance;
-
- /* compute pdf for distance sampling */
- float3 pdf_distance = dt * accum_transmittance * coeff.sigma_s;
- cdf_distance = cdf_distance + pdf_distance;
-
- /* write step data */
- step->sigma_t = sigma_t;
- step->sigma_s = coeff.sigma_s;
- step->closure_flag = closure_flag;
-
- segment->closure_flag |= closure_flag;
-
- is_last_step_empty = false;
- segment->numsteps++;
- }
- else {
- if(is_last_step_empty) {
- /* consecutive empty step, merge */
- step--;
- }
- else {
- /* store empty step */
- step->sigma_t = make_float3(0.0f, 0.0f, 0.0f);
- step->sigma_s = make_float3(0.0f, 0.0f, 0.0f);
- step->closure_flag = 0;
-
- segment->numsteps++;
- is_last_step_empty = true;
- }
- }
-
- step->accum_transmittance = accum_transmittance;
- step->cdf_distance = cdf_distance;
- step->t = new_t;
- step->shade_t = t + step_offset;
-
- /* stop if at the end of the volume */
- t = new_t;
- if(t == ray->t)
- break;
-
- /* stop if nearly all light blocked */
- if(accum_transmittance.x < tp_eps && accum_transmittance.y < tp_eps && accum_transmittance.z < tp_eps)
- break;
- }
-
- /* store total emission and transmittance */
- segment->accum_emission = accum_emission;
- segment->accum_transmittance = accum_transmittance;
- segment->accum_albedo = accum_albedo;
-
- /* normalize cumulative density function for distance sampling */
- VolumeStep *last_step = segment->steps + segment->numsteps - 1;
-
- if(!is_zero(last_step->cdf_distance)) {
- VolumeStep *step = &segment->steps[0];
- int numsteps = segment->numsteps;
- float3 inv_cdf_distance_sum = safe_invert_color(last_step->cdf_distance);
-
- for(int i = 0; i < numsteps; i++, step++)
- step->cdf_distance *= inv_cdf_distance_sum;
- }
+ const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
+
+ /* prepare for volume stepping */
+ int max_steps;
+ float step_size, step_offset;
+
+ if (heterogeneous) {
+ max_steps = kernel_data.integrator.volume_max_steps;
+ kernel_volume_step_init(kg, state, ray->t, &step_size, &step_offset);
+
+# ifdef __KERNEL_CPU__
+ /* NOTE: For the branched path tracing it's possible to have direct
+ * and indirect light integration both having volume segments allocated.
+ * We detect this using index in the pre-allocated memory. Currently we
+ * only support two segments allocated at a time, if more needed some
+ * modifications to the KernelGlobals will be needed.
+ *
+ * This gives us restrictions that decoupled record should only happen
+ * in the stack manner, meaning if there's subsequent call of decoupled
+ * record it'll need to free memory before it's caller frees memory.
+ */
+ const int index = kg->decoupled_volume_steps_index;
+ assert(index < sizeof(kg->decoupled_volume_steps) / sizeof(*kg->decoupled_volume_steps));
+ if (kg->decoupled_volume_steps[index] == NULL) {
+ kg->decoupled_volume_steps[index] = (VolumeStep *)malloc(sizeof(VolumeStep) * max_steps);
+ }
+ segment->steps = kg->decoupled_volume_steps[index];
+ ++kg->decoupled_volume_steps_index;
+# else
+ segment->steps = (VolumeStep *)malloc(sizeof(VolumeStep) * max_steps);
+# endif
+ }
+ else {
+ max_steps = 1;
+ step_size = ray->t;
+ step_offset = 0.0f;
+ segment->steps = &segment->stack_step;
+ }
+
+ /* init accumulation variables */
+ float3 accum_emission = make_float3(0.0f, 0.0f, 0.0f);
+ float3 accum_transmittance = make_float3(1.0f, 1.0f, 1.0f);
+ float3 accum_albedo = make_float3(0.0f, 0.0f, 0.0f);
+ float3 cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
+ float t = 0.0f;
+
+ segment->numsteps = 0;
+ segment->closure_flag = 0;
+ bool is_last_step_empty = false;
+
+ VolumeStep *step = segment->steps;
+
+ for (int i = 0; i < max_steps; i++, step++) {
+ /* advance to new position */
+ float new_t = min(ray->t, (i + 1) * step_size);
+ float dt = new_t - t;
+
+ /* use random position inside this segment to sample shader,
+ * for last shorter step we remap it to fit within the segment. */
+ if (new_t == ray->t) {
+ step_offset *= (new_t - t) / step_size;
+ }
+
+ float3 new_P = ray->P + ray->D * (t + step_offset);
+ VolumeShaderCoefficients coeff;
+
+ /* compute segment */
+ if (volume_shader_sample(kg, sd, state, new_P, &coeff)) {
+ int closure_flag = sd->flag;
+ float3 sigma_t = coeff.sigma_t;
+
+ /* compute average albedo for channel sampling */
+ if (closure_flag & SD_SCATTER) {
+ accum_albedo += dt * safe_divide_color(coeff.sigma_s, sigma_t);
+ }
+
+ /* compute accumulated transmittance */
+ float3 transmittance = volume_color_transmittance(sigma_t, dt);
+
+ /* compute emission attenuated by absorption */
+ if (closure_flag & SD_EMISSION) {
+ float3 emission = kernel_volume_emission_integrate(
+ &coeff, closure_flag, transmittance, dt);
+ accum_emission += accum_transmittance * emission;
+ }
+
+ accum_transmittance *= transmittance;
+
+ /* compute pdf for distance sampling */
+ float3 pdf_distance = dt * accum_transmittance * coeff.sigma_s;
+ cdf_distance = cdf_distance + pdf_distance;
+
+ /* write step data */
+ step->sigma_t = sigma_t;
+ step->sigma_s = coeff.sigma_s;
+ step->closure_flag = closure_flag;
+
+ segment->closure_flag |= closure_flag;
+
+ is_last_step_empty = false;
+ segment->numsteps++;
+ }
+ else {
+ if (is_last_step_empty) {
+ /* consecutive empty step, merge */
+ step--;
+ }
+ else {
+ /* store empty step */
+ step->sigma_t = make_float3(0.0f, 0.0f, 0.0f);
+ step->sigma_s = make_float3(0.0f, 0.0f, 0.0f);
+ step->closure_flag = 0;
+
+ segment->numsteps++;
+ is_last_step_empty = true;
+ }
+ }
+
+ step->accum_transmittance = accum_transmittance;
+ step->cdf_distance = cdf_distance;
+ step->t = new_t;
+ step->shade_t = t + step_offset;
+
+ /* stop if at the end of the volume */
+ t = new_t;
+ if (t == ray->t)
+ break;
+
+ /* stop if nearly all light blocked */
+ if (accum_transmittance.x < tp_eps && accum_transmittance.y < tp_eps &&
+ accum_transmittance.z < tp_eps)
+ break;
+ }
+
+ /* store total emission and transmittance */
+ segment->accum_emission = accum_emission;
+ segment->accum_transmittance = accum_transmittance;
+ segment->accum_albedo = accum_albedo;
+
+ /* normalize cumulative density function for distance sampling */
+ VolumeStep *last_step = segment->steps + segment->numsteps - 1;
+
+ if (!is_zero(last_step->cdf_distance)) {
+ VolumeStep *step = &segment->steps[0];
+ int numsteps = segment->numsteps;
+ float3 inv_cdf_distance_sum = safe_invert_color(last_step->cdf_distance);
+
+ for (int i = 0; i < numsteps; i++, step++)
+ step->cdf_distance *= inv_cdf_distance_sum;
+ }
}
ccl_device void kernel_volume_decoupled_free(KernelGlobals *kg, VolumeSegment *segment)
{
- if(segment->steps != &segment->stack_step) {
-#ifdef __KERNEL_CPU__
- /* NOTE: We only allow free last allocated segment.
- * No random order of alloc/free is supported.
- */
- assert(kg->decoupled_volume_steps_index > 0);
- assert(segment->steps == kg->decoupled_volume_steps[kg->decoupled_volume_steps_index - 1]);
- --kg->decoupled_volume_steps_index;
-#else
- free(segment->steps);
-#endif
- }
+ if (segment->steps != &segment->stack_step) {
+# ifdef __KERNEL_CPU__
+ /* NOTE: We only allow free last allocated segment.
+ * No random order of alloc/free is supported.
+ */
+ assert(kg->decoupled_volume_steps_index > 0);
+ assert(segment->steps == kg->decoupled_volume_steps[kg->decoupled_volume_steps_index - 1]);
+ --kg->decoupled_volume_steps_index;
+# else
+ free(segment->steps);
+# endif
+ }
}
-#endif /* __VOLUME_DECOUPLED__ */
+# endif /* __VOLUME_DECOUPLED__ */
/* scattering for homogeneous and heterogeneous volumes, using decoupled ray
* marching.
*
* function is expected to return VOLUME_PATH_SCATTERED when probalistic_scatter is false */
-ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter(
- KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd,
- float3 *throughput, float rphase, float rscatter,
- const VolumeSegment *segment, const float3 *light_P, bool probalistic_scatter)
+ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter(KernelGlobals *kg,
+ PathState *state,
+ Ray *ray,
+ ShaderData *sd,
+ float3 *throughput,
+ float rphase,
+ float rscatter,
+ const VolumeSegment *segment,
+ const float3 *light_P,
+ bool probalistic_scatter)
{
- kernel_assert(segment->closure_flag & SD_SCATTER);
-
- /* Sample color channel, use MIS with balance heuristic. */
- float3 channel_pdf;
- int channel = kernel_volume_sample_channel(segment->accum_albedo,
- *throughput,
- rphase,
- &channel_pdf);
-
- float xi = rscatter;
-
- /* probabilistic scattering decision based on transmittance */
- if(probalistic_scatter) {
- float sample_transmittance = kernel_volume_channel_get(segment->accum_transmittance, channel);
-
- if(1.0f - xi >= sample_transmittance) {
- /* rescale random number so we can reuse it */
- xi = 1.0f - (1.0f - xi - sample_transmittance)/(1.0f - sample_transmittance);
- }
- else {
- *throughput /= sample_transmittance;
- return VOLUME_PATH_MISSED;
- }
- }
-
- VolumeStep *step;
- float3 transmittance;
- float pdf, sample_t;
- float mis_weight = 1.0f;
- bool distance_sample = true;
- bool use_mis = false;
-
- if(segment->sampling_method && light_P) {
- if(segment->sampling_method == SD_VOLUME_MIS) {
- /* multiple importance sample: randomly pick between
- * equiangular and distance sampling strategy */
- if(xi < 0.5f) {
- xi *= 2.0f;
- }
- else {
- xi = (xi - 0.5f)*2.0f;
- distance_sample = false;
- }
-
- use_mis = true;
- }
- else {
- /* only equiangular sampling */
- distance_sample = false;
- }
- }
-
- /* distance sampling */
- if(distance_sample) {
- /* find step in cdf */
- step = segment->steps;
-
- float prev_t = 0.0f;
- float3 step_pdf_distance = make_float3(1.0f, 1.0f, 1.0f);
-
- if(segment->numsteps > 1) {
- float prev_cdf = 0.0f;
- float step_cdf = 1.0f;
- float3 prev_cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
-
- for(int i = 0; ; i++, step++) {
- /* todo: optimize using binary search */
- step_cdf = kernel_volume_channel_get(step->cdf_distance, channel);
-
- if(xi < step_cdf || i == segment->numsteps-1)
- break;
-
- prev_cdf = step_cdf;
- prev_t = step->t;
- prev_cdf_distance = step->cdf_distance;
- }
-
- /* remap xi so we can reuse it */
- xi = (xi - prev_cdf)/(step_cdf - prev_cdf);
-
- /* pdf for picking step */
- step_pdf_distance = step->cdf_distance - prev_cdf_distance;
- }
-
- /* determine range in which we will sample */
- float step_t = step->t - prev_t;
-
- /* sample distance and compute transmittance */
- float3 distance_pdf;
- sample_t = prev_t + kernel_volume_distance_sample(step_t, step->sigma_t, channel, xi, &transmittance, &distance_pdf);
-
- /* modify pdf for hit/miss decision */
- if(probalistic_scatter)
- distance_pdf *= make_float3(1.0f, 1.0f, 1.0f) - segment->accum_transmittance;
-
- pdf = dot(channel_pdf, distance_pdf * step_pdf_distance);
-
- /* multiple importance sampling */
- if(use_mis) {
- float equi_pdf = kernel_volume_equiangular_pdf(ray, *light_P, sample_t);
- mis_weight = 2.0f*power_heuristic(pdf, equi_pdf);
- }
- }
- /* equi-angular sampling */
- else {
- /* sample distance */
- sample_t = kernel_volume_equiangular_sample(ray, *light_P, xi, &pdf);
-
- /* find step in which sampled distance is located */
- step = segment->steps;
-
- float prev_t = 0.0f;
- float3 step_pdf_distance = make_float3(1.0f, 1.0f, 1.0f);
-
- if(segment->numsteps > 1) {
- float3 prev_cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
-
- int numsteps = segment->numsteps;
- int high = numsteps - 1;
- int low = 0;
- int mid;
-
- while(low < high) {
- mid = (low + high) >> 1;
-
- if(sample_t < step[mid].t)
- high = mid;
- else if(sample_t >= step[mid + 1].t)
- low = mid + 1;
- else {
- /* found our interval in step[mid] .. step[mid+1] */
- prev_t = step[mid].t;
- prev_cdf_distance = step[mid].cdf_distance;
- step += mid+1;
- break;
- }
- }
-
- if(low >= numsteps - 1) {
- prev_t = step[numsteps - 1].t;
- prev_cdf_distance = step[numsteps-1].cdf_distance;
- step += numsteps - 1;
- }
-
- /* pdf for picking step with distance sampling */
- step_pdf_distance = step->cdf_distance - prev_cdf_distance;
- }
-
- /* determine range in which we will sample */
- float step_t = step->t - prev_t;
- float step_sample_t = sample_t - prev_t;
-
- /* compute transmittance */
- transmittance = volume_color_transmittance(step->sigma_t, step_sample_t);
-
- /* multiple importance sampling */
- if(use_mis) {
- float3 distance_pdf3 = kernel_volume_distance_pdf(step_t, step->sigma_t, step_sample_t);
- float distance_pdf = dot(channel_pdf, distance_pdf3 * step_pdf_distance);
- mis_weight = 2.0f*power_heuristic(pdf, distance_pdf);
- }
- }
- if(sample_t < 0.0f || pdf == 0.0f) {
- return VOLUME_PATH_MISSED;
- }
-
- /* compute transmittance up to this step */
- if(step != segment->steps)
- transmittance *= (step-1)->accum_transmittance;
-
- /* modify throughput */
- *throughput *= step->sigma_s * transmittance * (mis_weight / pdf);
-
- /* evaluate shader to create closures at shading point */
- if(segment->numsteps > 1) {
- sd->P = ray->P + step->shade_t*ray->D;
-
- VolumeShaderCoefficients coeff;
- volume_shader_sample(kg, sd, state, sd->P, &coeff);
- }
-
- /* move to new position */
- sd->P = ray->P + sample_t*ray->D;
-
- return VOLUME_PATH_SCATTERED;
+ kernel_assert(segment->closure_flag & SD_SCATTER);
+
+ /* Sample color channel, use MIS with balance heuristic. */
+ float3 channel_pdf;
+ int channel = kernel_volume_sample_channel(
+ segment->accum_albedo, *throughput, rphase, &channel_pdf);
+
+ float xi = rscatter;
+
+ /* probabilistic scattering decision based on transmittance */
+ if (probalistic_scatter) {
+ float sample_transmittance = kernel_volume_channel_get(segment->accum_transmittance, channel);
+
+ if (1.0f - xi >= sample_transmittance) {
+ /* rescale random number so we can reuse it */
+ xi = 1.0f - (1.0f - xi - sample_transmittance) / (1.0f - sample_transmittance);
+ }
+ else {
+ *throughput /= sample_transmittance;
+ return VOLUME_PATH_MISSED;
+ }
+ }
+
+ VolumeStep *step;
+ float3 transmittance;
+ float pdf, sample_t;
+ float mis_weight = 1.0f;
+ bool distance_sample = true;
+ bool use_mis = false;
+
+ if (segment->sampling_method && light_P) {
+ if (segment->sampling_method == SD_VOLUME_MIS) {
+ /* multiple importance sample: randomly pick between
+ * equiangular and distance sampling strategy */
+ if (xi < 0.5f) {
+ xi *= 2.0f;
+ }
+ else {
+ xi = (xi - 0.5f) * 2.0f;
+ distance_sample = false;
+ }
+
+ use_mis = true;
+ }
+ else {
+ /* only equiangular sampling */
+ distance_sample = false;
+ }
+ }
+
+ /* distance sampling */
+ if (distance_sample) {
+ /* find step in cdf */
+ step = segment->steps;
+
+ float prev_t = 0.0f;
+ float3 step_pdf_distance = make_float3(1.0f, 1.0f, 1.0f);
+
+ if (segment->numsteps > 1) {
+ float prev_cdf = 0.0f;
+ float step_cdf = 1.0f;
+ float3 prev_cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
+
+ for (int i = 0;; i++, step++) {
+ /* todo: optimize using binary search */
+ step_cdf = kernel_volume_channel_get(step->cdf_distance, channel);
+
+ if (xi < step_cdf || i == segment->numsteps - 1)
+ break;
+
+ prev_cdf = step_cdf;
+ prev_t = step->t;
+ prev_cdf_distance = step->cdf_distance;
+ }
+
+ /* remap xi so we can reuse it */
+ xi = (xi - prev_cdf) / (step_cdf - prev_cdf);
+
+ /* pdf for picking step */
+ step_pdf_distance = step->cdf_distance - prev_cdf_distance;
+ }
+
+ /* determine range in which we will sample */
+ float step_t = step->t - prev_t;
+
+ /* sample distance and compute transmittance */
+ float3 distance_pdf;
+ sample_t = prev_t + kernel_volume_distance_sample(
+ step_t, step->sigma_t, channel, xi, &transmittance, &distance_pdf);
+
+ /* modify pdf for hit/miss decision */
+ if (probalistic_scatter)
+ distance_pdf *= make_float3(1.0f, 1.0f, 1.0f) - segment->accum_transmittance;
+
+ pdf = dot(channel_pdf, distance_pdf * step_pdf_distance);
+
+ /* multiple importance sampling */
+ if (use_mis) {
+ float equi_pdf = kernel_volume_equiangular_pdf(ray, *light_P, sample_t);
+ mis_weight = 2.0f * power_heuristic(pdf, equi_pdf);
+ }
+ }
+ /* equi-angular sampling */
+ else {
+ /* sample distance */
+ sample_t = kernel_volume_equiangular_sample(ray, *light_P, xi, &pdf);
+
+ /* find step in which sampled distance is located */
+ step = segment->steps;
+
+ float prev_t = 0.0f;
+ float3 step_pdf_distance = make_float3(1.0f, 1.0f, 1.0f);
+
+ if (segment->numsteps > 1) {
+ float3 prev_cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
+
+ int numsteps = segment->numsteps;
+ int high = numsteps - 1;
+ int low = 0;
+ int mid;
+
+ while (low < high) {
+ mid = (low + high) >> 1;
+
+ if (sample_t < step[mid].t)
+ high = mid;
+ else if (sample_t >= step[mid + 1].t)
+ low = mid + 1;
+ else {
+ /* found our interval in step[mid] .. step[mid+1] */
+ prev_t = step[mid].t;
+ prev_cdf_distance = step[mid].cdf_distance;
+ step += mid + 1;
+ break;
+ }
+ }
+
+ if (low >= numsteps - 1) {
+ prev_t = step[numsteps - 1].t;
+ prev_cdf_distance = step[numsteps - 1].cdf_distance;
+ step += numsteps - 1;
+ }
+
+ /* pdf for picking step with distance sampling */
+ step_pdf_distance = step->cdf_distance - prev_cdf_distance;
+ }
+
+ /* determine range in which we will sample */
+ float step_t = step->t - prev_t;
+ float step_sample_t = sample_t - prev_t;
+
+ /* compute transmittance */
+ transmittance = volume_color_transmittance(step->sigma_t, step_sample_t);
+
+ /* multiple importance sampling */
+ if (use_mis) {
+ float3 distance_pdf3 = kernel_volume_distance_pdf(step_t, step->sigma_t, step_sample_t);
+ float distance_pdf = dot(channel_pdf, distance_pdf3 * step_pdf_distance);
+ mis_weight = 2.0f * power_heuristic(pdf, distance_pdf);
+ }
+ }
+ if (sample_t < 0.0f || pdf == 0.0f) {
+ return VOLUME_PATH_MISSED;
+ }
+
+ /* compute transmittance up to this step */
+ if (step != segment->steps)
+ transmittance *= (step - 1)->accum_transmittance;
+
+ /* modify throughput */
+ *throughput *= step->sigma_s * transmittance * (mis_weight / pdf);
+
+ /* evaluate shader to create closures at shading point */
+ if (segment->numsteps > 1) {
+ sd->P = ray->P + step->shade_t * ray->D;
+
+ VolumeShaderCoefficients coeff;
+ volume_shader_sample(kg, sd, state, sd->P, &coeff);
+ }
+
+ /* move to new position */
+ sd->P = ray->P + sample_t * ray->D;
+
+ return VOLUME_PATH_SCATTERED;
}
-#endif /* __SPLIT_KERNEL */
+# endif /* __SPLIT_KERNEL */
/* decide if we need to use decoupled or not */
-ccl_device bool kernel_volume_use_decoupled(KernelGlobals *kg, bool heterogeneous, bool direct, int sampling_method)
+ccl_device bool kernel_volume_use_decoupled(KernelGlobals *kg,
+ bool heterogeneous,
+ bool direct,
+ int sampling_method)
{
- /* decoupled ray marching for heterogeneous volumes not supported on the GPU,
- * which also means equiangular and multiple importance sampling is not
- * support for that case */
- if(!kernel_data.integrator.volume_decoupled)
- return false;
-
-#ifdef __KERNEL_GPU__
- if(heterogeneous)
- return false;
-#endif
-
- /* equiangular and multiple importance sampling only implemented for decoupled */
- if(sampling_method != 0)
- return true;
-
- /* for all light sampling use decoupled, reusing shader evaluations is
- * typically faster in that case */
- if(direct)
- return kernel_data.integrator.sample_all_lights_direct;
- else
- return kernel_data.integrator.sample_all_lights_indirect;
+ /* decoupled ray marching for heterogeneous volumes not supported on the GPU,
+ * which also means equiangular and multiple importance sampling is not
+ * support for that case */
+ if (!kernel_data.integrator.volume_decoupled)
+ return false;
+
+# ifdef __KERNEL_GPU__
+ if (heterogeneous)
+ return false;
+# endif
+
+ /* equiangular and multiple importance sampling only implemented for decoupled */
+ if (sampling_method != 0)
+ return true;
+
+ /* for all light sampling use decoupled, reusing shader evaluations is
+ * typically faster in that case */
+ if (direct)
+ return kernel_data.integrator.sample_all_lights_direct;
+ else
+ return kernel_data.integrator.sample_all_lights_indirect;
}
/* Volume Stack
@@ -1124,242 +1147,231 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
ccl_addr_space const Ray *ray,
ccl_addr_space VolumeStack *stack)
{
- /* NULL ray happens in the baker, does it need proper initialization of
- * camera in volume?
- */
- if(!kernel_data.cam.is_inside_volume || ray == NULL) {
- /* Camera is guaranteed to be in the air, only take background volume
- * into account in this case.
- */
- if(kernel_data.background.volume_shader != SHADER_NONE) {
- stack[0].shader = kernel_data.background.volume_shader;
- stack[0].object = PRIM_NONE;
- stack[1].shader = SHADER_NONE;
- }
- else {
- stack[0].shader = SHADER_NONE;
- }
- return;
- }
-
- kernel_assert(state->flag & PATH_RAY_CAMERA);
-
- Ray volume_ray = *ray;
- volume_ray.t = FLT_MAX;
-
- const uint visibility = (state->flag & PATH_RAY_ALL_VISIBILITY);
- int stack_index = 0, enclosed_index = 0;
-
-#ifdef __VOLUME_RECORD_ALL__
- Intersection hits[2*VOLUME_STACK_SIZE + 1];
- uint num_hits = scene_intersect_volume_all(kg,
- &volume_ray,
- hits,
- 2*VOLUME_STACK_SIZE,
- visibility);
- if(num_hits > 0) {
- int enclosed_volumes[VOLUME_STACK_SIZE];
- Intersection *isect = hits;
-
- qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
-
- for(uint hit = 0; hit < num_hits; ++hit, ++isect) {
- shader_setup_from_ray(kg, stack_sd, isect, &volume_ray);
- if(stack_sd->flag & SD_BACKFACING) {
- bool need_add = true;
- for(int i = 0; i < enclosed_index && need_add; ++i) {
- /* If ray exited the volume and never entered to that volume
- * it means that camera is inside such a volume.
- */
- if(enclosed_volumes[i] == stack_sd->object) {
- need_add = false;
- }
- }
- for(int i = 0; i < stack_index && need_add; ++i) {
- /* Don't add intersections twice. */
- if(stack[i].object == stack_sd->object) {
- need_add = false;
- break;
- }
- }
- if(need_add && stack_index < VOLUME_STACK_SIZE - 1) {
- stack[stack_index].object = stack_sd->object;
- stack[stack_index].shader = stack_sd->shader;
- ++stack_index;
- }
- }
- else {
- /* If ray from camera enters the volume, this volume shouldn't
- * be added to the stack on exit.
- */
- enclosed_volumes[enclosed_index++] = stack_sd->object;
- }
- }
- }
-#else
- int enclosed_volumes[VOLUME_STACK_SIZE];
- int step = 0;
-
- while(stack_index < VOLUME_STACK_SIZE - 1 &&
- enclosed_index < VOLUME_STACK_SIZE - 1 &&
- step < 2 * VOLUME_STACK_SIZE)
- {
- Intersection isect;
- if(!scene_intersect_volume(kg, &volume_ray, &isect, visibility)) {
- break;
- }
-
- shader_setup_from_ray(kg, stack_sd, &isect, &volume_ray);
- if(stack_sd->flag & SD_BACKFACING) {
- /* If ray exited the volume and never entered to that volume
- * it means that camera is inside such a volume.
- */
- bool need_add = true;
- for(int i = 0; i < enclosed_index && need_add; ++i) {
- /* If ray exited the volume and never entered to that volume
- * it means that camera is inside such a volume.
- */
- if(enclosed_volumes[i] == stack_sd->object) {
- need_add = false;
- }
- }
- for(int i = 0; i < stack_index && need_add; ++i) {
- /* Don't add intersections twice. */
- if(stack[i].object == stack_sd->object) {
- need_add = false;
- break;
- }
- }
- if(need_add) {
- stack[stack_index].object = stack_sd->object;
- stack[stack_index].shader = stack_sd->shader;
- ++stack_index;
- }
- }
- else {
- /* If ray from camera enters the volume, this volume shouldn't
- * be added to the stack on exit.
- */
- enclosed_volumes[enclosed_index++] = stack_sd->object;
- }
-
- /* Move ray forward. */
- volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng);
- ++step;
- }
-#endif
- /* stack_index of 0 means quick checks outside of the kernel gave false
- * positive, nothing to worry about, just we've wasted quite a few of
- * ticks just to come into conclusion that camera is in the air.
- *
- * In this case we're doing the same above -- check whether background has
- * volume.
- */
- if(stack_index == 0 && kernel_data.background.volume_shader == SHADER_NONE) {
- stack[0].shader = kernel_data.background.volume_shader;
- stack[0].object = PRIM_NONE;
- stack[1].shader = SHADER_NONE;
- }
- else {
- stack[stack_index].shader = SHADER_NONE;
- }
+ /* NULL ray happens in the baker, does it need proper initialization of
+ * camera in volume?
+ */
+ if (!kernel_data.cam.is_inside_volume || ray == NULL) {
+ /* Camera is guaranteed to be in the air, only take background volume
+ * into account in this case.
+ */
+ if (kernel_data.background.volume_shader != SHADER_NONE) {
+ stack[0].shader = kernel_data.background.volume_shader;
+ stack[0].object = PRIM_NONE;
+ stack[1].shader = SHADER_NONE;
+ }
+ else {
+ stack[0].shader = SHADER_NONE;
+ }
+ return;
+ }
+
+ kernel_assert(state->flag & PATH_RAY_CAMERA);
+
+ Ray volume_ray = *ray;
+ volume_ray.t = FLT_MAX;
+
+ const uint visibility = (state->flag & PATH_RAY_ALL_VISIBILITY);
+ int stack_index = 0, enclosed_index = 0;
+
+# ifdef __VOLUME_RECORD_ALL__
+ Intersection hits[2 * VOLUME_STACK_SIZE + 1];
+ uint num_hits = scene_intersect_volume_all(
+ kg, &volume_ray, hits, 2 * VOLUME_STACK_SIZE, visibility);
+ if (num_hits > 0) {
+ int enclosed_volumes[VOLUME_STACK_SIZE];
+ Intersection *isect = hits;
+
+ qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
+
+ for (uint hit = 0; hit < num_hits; ++hit, ++isect) {
+ shader_setup_from_ray(kg, stack_sd, isect, &volume_ray);
+ if (stack_sd->flag & SD_BACKFACING) {
+ bool need_add = true;
+ for (int i = 0; i < enclosed_index && need_add; ++i) {
+ /* If ray exited the volume and never entered to that volume
+ * it means that camera is inside such a volume.
+ */
+ if (enclosed_volumes[i] == stack_sd->object) {
+ need_add = false;
+ }
+ }
+ for (int i = 0; i < stack_index && need_add; ++i) {
+ /* Don't add intersections twice. */
+ if (stack[i].object == stack_sd->object) {
+ need_add = false;
+ break;
+ }
+ }
+ if (need_add && stack_index < VOLUME_STACK_SIZE - 1) {
+ stack[stack_index].object = stack_sd->object;
+ stack[stack_index].shader = stack_sd->shader;
+ ++stack_index;
+ }
+ }
+ else {
+ /* If ray from camera enters the volume, this volume shouldn't
+ * be added to the stack on exit.
+ */
+ enclosed_volumes[enclosed_index++] = stack_sd->object;
+ }
+ }
+ }
+# else
+ int enclosed_volumes[VOLUME_STACK_SIZE];
+ int step = 0;
+
+ while (stack_index < VOLUME_STACK_SIZE - 1 && enclosed_index < VOLUME_STACK_SIZE - 1 &&
+ step < 2 * VOLUME_STACK_SIZE) {
+ Intersection isect;
+ if (!scene_intersect_volume(kg, &volume_ray, &isect, visibility)) {
+ break;
+ }
+
+ shader_setup_from_ray(kg, stack_sd, &isect, &volume_ray);
+ if (stack_sd->flag & SD_BACKFACING) {
+ /* If ray exited the volume and never entered to that volume
+ * it means that camera is inside such a volume.
+ */
+ bool need_add = true;
+ for (int i = 0; i < enclosed_index && need_add; ++i) {
+ /* If ray exited the volume and never entered to that volume
+ * it means that camera is inside such a volume.
+ */
+ if (enclosed_volumes[i] == stack_sd->object) {
+ need_add = false;
+ }
+ }
+ for (int i = 0; i < stack_index && need_add; ++i) {
+ /* Don't add intersections twice. */
+ if (stack[i].object == stack_sd->object) {
+ need_add = false;
+ break;
+ }
+ }
+ if (need_add) {
+ stack[stack_index].object = stack_sd->object;
+ stack[stack_index].shader = stack_sd->shader;
+ ++stack_index;
+ }
+ }
+ else {
+ /* If ray from camera enters the volume, this volume shouldn't
+ * be added to the stack on exit.
+ */
+ enclosed_volumes[enclosed_index++] = stack_sd->object;
+ }
+
+ /* Move ray forward. */
+ volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng);
+ ++step;
+ }
+# endif
+ /* stack_index of 0 means quick checks outside of the kernel gave false
+ * positive, nothing to worry about, just we've wasted quite a few of
+ * ticks just to come into conclusion that camera is in the air.
+ *
+ * In this case we're doing the same above -- check whether background has
+ * volume.
+ */
+ if (stack_index == 0 && kernel_data.background.volume_shader == SHADER_NONE) {
+ stack[0].shader = kernel_data.background.volume_shader;
+ stack[0].object = PRIM_NONE;
+ stack[1].shader = SHADER_NONE;
+ }
+ else {
+ stack[stack_index].shader = SHADER_NONE;
+ }
}
-ccl_device void kernel_volume_stack_enter_exit(KernelGlobals *kg, ShaderData *sd, ccl_addr_space VolumeStack *stack)
+ccl_device void kernel_volume_stack_enter_exit(KernelGlobals *kg,
+ ShaderData *sd,
+ ccl_addr_space VolumeStack *stack)
{
- /* todo: we should have some way for objects to indicate if they want the
- * world shader to work inside them. excluding it by default is problematic
- * because non-volume objects can't be assumed to be closed manifolds */
-
- if(!(sd->flag & SD_HAS_VOLUME))
- return;
-
- if(sd->flag & SD_BACKFACING) {
- /* exit volume object: remove from stack */
- for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
- if(stack[i].object == sd->object) {
- /* shift back next stack entries */
- do {
- stack[i] = stack[i+1];
- i++;
- }
- while(stack[i].shader != SHADER_NONE);
-
- return;
- }
- }
- }
- else {
- /* enter volume object: add to stack */
- int i;
-
- for(i = 0; stack[i].shader != SHADER_NONE; i++) {
- /* already in the stack? then we have nothing to do */
- if(stack[i].object == sd->object)
- return;
- }
-
- /* if we exceed the stack limit, ignore */
- if(i >= VOLUME_STACK_SIZE-1)
- return;
-
- /* add to the end of the stack */
- stack[i].shader = sd->shader;
- stack[i].object = sd->object;
- stack[i+1].shader = SHADER_NONE;
- }
+ /* todo: we should have some way for objects to indicate if they want the
+ * world shader to work inside them. excluding it by default is problematic
+ * because non-volume objects can't be assumed to be closed manifolds */
+
+ if (!(sd->flag & SD_HAS_VOLUME))
+ return;
+
+ if (sd->flag & SD_BACKFACING) {
+ /* exit volume object: remove from stack */
+ for (int i = 0; stack[i].shader != SHADER_NONE; i++) {
+ if (stack[i].object == sd->object) {
+ /* shift back next stack entries */
+ do {
+ stack[i] = stack[i + 1];
+ i++;
+ } while (stack[i].shader != SHADER_NONE);
+
+ return;
+ }
+ }
+ }
+ else {
+ /* enter volume object: add to stack */
+ int i;
+
+ for (i = 0; stack[i].shader != SHADER_NONE; i++) {
+ /* already in the stack? then we have nothing to do */
+ if (stack[i].object == sd->object)
+ return;
+ }
+
+ /* if we exceed the stack limit, ignore */
+ if (i >= VOLUME_STACK_SIZE - 1)
+ return;
+
+ /* add to the end of the stack */
+ stack[i].shader = sd->shader;
+ stack[i].object = sd->object;
+ stack[i + 1].shader = SHADER_NONE;
+ }
}
-#ifdef __SUBSURFACE__
+# ifdef __SUBSURFACE__
ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg,
ShaderData *stack_sd,
Ray *ray,
ccl_addr_space VolumeStack *stack)
{
- kernel_assert(kernel_data.integrator.use_volumes);
-
- Ray volume_ray = *ray;
-
-# ifdef __VOLUME_RECORD_ALL__
- Intersection hits[2*VOLUME_STACK_SIZE + 1];
- uint num_hits = scene_intersect_volume_all(kg,
- &volume_ray,
- hits,
- 2*VOLUME_STACK_SIZE,
- PATH_RAY_ALL_VISIBILITY);
- if(num_hits > 0) {
- Intersection *isect = hits;
-
- qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
-
- for(uint hit = 0; hit < num_hits; ++hit, ++isect) {
- shader_setup_from_ray(kg, stack_sd, isect, &volume_ray);
- kernel_volume_stack_enter_exit(kg, stack_sd, stack);
- }
- }
-# else
- Intersection isect;
- int step = 0;
- float3 Pend = ray->P + ray->D*ray->t;
- while(step < 2 * VOLUME_STACK_SIZE &&
- scene_intersect_volume(kg,
- &volume_ray,
- &isect,
- PATH_RAY_ALL_VISIBILITY))
- {
- shader_setup_from_ray(kg, stack_sd, &isect, &volume_ray);
- kernel_volume_stack_enter_exit(kg, stack_sd, stack);
-
- /* Move ray forward. */
- volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng);
- if(volume_ray.t != FLT_MAX) {
- volume_ray.D = normalize_len(Pend - volume_ray.P, &volume_ray.t);
- }
- ++step;
- }
-# endif
+ kernel_assert(kernel_data.integrator.use_volumes);
+
+ Ray volume_ray = *ray;
+
+# ifdef __VOLUME_RECORD_ALL__
+ Intersection hits[2 * VOLUME_STACK_SIZE + 1];
+ uint num_hits = scene_intersect_volume_all(
+ kg, &volume_ray, hits, 2 * VOLUME_STACK_SIZE, PATH_RAY_ALL_VISIBILITY);
+ if (num_hits > 0) {
+ Intersection *isect = hits;
+
+ qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
+
+ for (uint hit = 0; hit < num_hits; ++hit, ++isect) {
+ shader_setup_from_ray(kg, stack_sd, isect, &volume_ray);
+ kernel_volume_stack_enter_exit(kg, stack_sd, stack);
+ }
+ }
+# else
+ Intersection isect;
+ int step = 0;
+ float3 Pend = ray->P + ray->D * ray->t;
+ while (step < 2 * VOLUME_STACK_SIZE &&
+ scene_intersect_volume(kg, &volume_ray, &isect, PATH_RAY_ALL_VISIBILITY)) {
+ shader_setup_from_ray(kg, stack_sd, &isect, &volume_ray);
+ kernel_volume_stack_enter_exit(kg, stack_sd, stack);
+
+ /* Move ray forward. */
+ volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng);
+ if (volume_ray.t != FLT_MAX) {
+ volume_ray.D = normalize_len(Pend - volume_ray.P, &volume_ray.t);
+ }
+ ++step;
+ }
+# endif
}
-#endif
+# endif
/* Clean stack after the last bounce.
*
@@ -1378,15 +1390,15 @@ ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg,
ccl_device_inline void kernel_volume_clean_stack(KernelGlobals *kg,
ccl_addr_space VolumeStack *volume_stack)
{
- if(kernel_data.background.volume_shader != SHADER_NONE) {
- /* Keep the world's volume in stack. */
- volume_stack[1].shader = SHADER_NONE;
- }
- else {
- volume_stack[0].shader = SHADER_NONE;
- }
+ if (kernel_data.background.volume_shader != SHADER_NONE) {
+ /* Keep the world's volume in stack. */
+ volume_stack[1].shader = SHADER_NONE;
+ }
+ else {
+ volume_stack[0].shader = SHADER_NONE;
+ }
}
-#endif /* __VOLUME__ */
+#endif /* __VOLUME__ */
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_work_stealing.h b/intern/cycles/kernel/kernel_work_stealing.h
index 9667156eaf5..799561a7466 100644
--- a/intern/cycles/kernel/kernel_work_stealing.h
+++ b/intern/cycles/kernel/kernel_work_stealing.h
@@ -35,27 +35,26 @@ ccl_device bool get_next_work(KernelGlobals *kg,
uint ray_index,
ccl_private uint *global_work_index)
{
- /* With a small amount of work there may be more threads than work due to
- * rounding up of global size, stop such threads immediately. */
- if(ray_index >= total_work_size) {
- return false;
- }
+ /* With a small amount of work there may be more threads than work due to
+ * rounding up of global size, stop such threads immediately. */
+ if (ray_index >= total_work_size) {
+ return false;
+ }
- /* Increase atomic work index counter in pool. */
- uint pool = ray_index / WORK_POOL_SIZE;
- uint work_index = atomic_fetch_and_inc_uint32(&work_pools[pool]);
+ /* Increase atomic work index counter in pool. */
+ uint pool = ray_index / WORK_POOL_SIZE;
+ uint work_index = atomic_fetch_and_inc_uint32(&work_pools[pool]);
- /* Map per-pool work index to a global work index. */
- uint global_size = ccl_global_size(0) * ccl_global_size(1);
- kernel_assert(global_size % WORK_POOL_SIZE == 0);
- kernel_assert(ray_index < global_size);
+ /* Map per-pool work index to a global work index. */
+ uint global_size = ccl_global_size(0) * ccl_global_size(1);
+ kernel_assert(global_size % WORK_POOL_SIZE == 0);
+ kernel_assert(ray_index < global_size);
- *global_work_index = (work_index / WORK_POOL_SIZE) * global_size
- + (pool * WORK_POOL_SIZE)
- + (work_index % WORK_POOL_SIZE);
+ *global_work_index = (work_index / WORK_POOL_SIZE) * global_size + (pool * WORK_POOL_SIZE) +
+ (work_index % WORK_POOL_SIZE);
- /* Test if all work for this pool is done. */
- return (*global_work_index < total_work_size);
+ /* Test if all work for this pool is done. */
+ return (*global_work_index < total_work_size);
}
#endif
@@ -67,22 +66,22 @@ ccl_device_inline void get_work_pixel(ccl_global const WorkTile *tile,
ccl_private uint *sample)
{
#ifdef __KERNEL_CUDA__
- /* Keeping threads for the same pixel together improves performance on CUDA. */
- uint sample_offset = global_work_index % tile->num_samples;
- uint pixel_offset = global_work_index / tile->num_samples;
-#else /* __KERNEL_CUDA__ */
- uint tile_pixels = tile->w * tile->h;
- uint sample_offset = global_work_index / tile_pixels;
- uint pixel_offset = global_work_index - sample_offset * tile_pixels;
+ /* Keeping threads for the same pixel together improves performance on CUDA. */
+ uint sample_offset = global_work_index % tile->num_samples;
+ uint pixel_offset = global_work_index / tile->num_samples;
+#else /* __KERNEL_CUDA__ */
+ uint tile_pixels = tile->w * tile->h;
+ uint sample_offset = global_work_index / tile_pixels;
+ uint pixel_offset = global_work_index - sample_offset * tile_pixels;
#endif /* __KERNEL_CUDA__ */
- uint y_offset = pixel_offset / tile->w;
- uint x_offset = pixel_offset - y_offset * tile->w;
+ uint y_offset = pixel_offset / tile->w;
+ uint x_offset = pixel_offset - y_offset * tile->w;
- *x = tile->x + x_offset;
- *y = tile->y + y_offset;
- *sample = tile->start_sample + sample_offset;
+ *x = tile->x + x_offset;
+ *y = tile->y + y_offset;
+ *sample = tile->start_sample + sample_offset;
}
CCL_NAMESPACE_END
-#endif /* __KERNEL_WORK_STEALING_H__ */
+#endif /* __KERNEL_WORK_STEALING_H__ */
diff --git a/intern/cycles/kernel/kernels/cpu/filter.cpp b/intern/cycles/kernel/kernels/cpu/filter.cpp
index 2ff1a392dc3..145a6b6ac40 100644
--- a/intern/cycles/kernel/kernels/cpu/filter.cpp
+++ b/intern/cycles/kernel/kernels/cpu/filter.cpp
@@ -53,7 +53,7 @@
/* quiet unused define warnings */
#if defined(__KERNEL_SSE2__)
- /* do nothing */
+/* do nothing */
#endif
#include "kernel/filter/filter.h"
diff --git a/intern/cycles/kernel/kernels/cpu/filter_avx.cpp b/intern/cycles/kernel/kernels/cpu/filter_avx.cpp
index 4a9e6047ecf..1d68214c8e7 100644
--- a/intern/cycles/kernel/kernels/cpu/filter_avx.cpp
+++ b/intern/cycles/kernel/kernels/cpu/filter_avx.cpp
@@ -32,7 +32,7 @@
# define __KERNEL_SSE41__
# define __KERNEL_AVX__
# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */
#include "kernel/filter/filter.h"
#define KERNEL_ARCH cpu_avx
diff --git a/intern/cycles/kernel/kernels/cpu/filter_avx2.cpp b/intern/cycles/kernel/kernels/cpu/filter_avx2.cpp
index c22ec576254..b6709fbc529 100644
--- a/intern/cycles/kernel/kernels/cpu/filter_avx2.cpp
+++ b/intern/cycles/kernel/kernels/cpu/filter_avx2.cpp
@@ -33,7 +33,7 @@
# define __KERNEL_AVX__
# define __KERNEL_AVX2__
# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */
#include "kernel/filter/filter.h"
#define KERNEL_ARCH cpu_avx2
diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu.h b/intern/cycles/kernel/kernels/cpu/filter_cpu.h
index 02c85562db8..1423b182ab8 100644
--- a/intern/cycles/kernel/kernels/cpu/filter_cpu.h
+++ b/intern/cycles/kernel/kernels/cpu/filter_cpu.h
@@ -25,7 +25,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample,
float *sampleV,
float *sampleVV,
float *bufferV,
- int* prefilter_rect,
+ int *prefilter_rect,
int buffer_pass_stride,
int buffer_denoising_offset);
@@ -38,7 +38,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample,
float *mean,
float *variance,
float scale,
- int* prefilter_rect,
+ int *prefilter_rect,
int buffer_pass_stride,
int buffer_denoising_offset);
@@ -49,9 +49,10 @@ void KERNEL_FUNCTION_FULL_NAME(filter_write_feature)(int sample,
float *from,
float *buffer,
int out_offset,
- int* prefilter_rect);
+ int *prefilter_rect);
-void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x, int y,
+void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x,
+ int y,
ccl_global float *image,
ccl_global float *variance,
ccl_global float *depth,
@@ -59,22 +60,17 @@ void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x, int y,
int *rect,
int pass_stride);
-void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)(int x, int y,
- float *mean,
- float *variance,
- float *a,
- float *b,
- int* prefilter_rect,
- int r);
+void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)(
+ int x, int y, float *mean, float *variance, float *a, float *b, int *prefilter_rect, int r);
-void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float* buffer,
+void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float *buffer,
TileInfo *tiles,
int x,
int y,
int storage_ofs,
float *transform,
int *rank,
- int* rect,
+ int *rect,
int pass_stride,
int frame_stride,
bool use_time,
@@ -87,24 +83,18 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_difference)(int dx,
float *variance_image,
float *scale_image,
float *difference_image,
- int* rect,
+ int *rect,
int stride,
int channel_offset,
int frame_offset,
float a,
float k_2);
-void KERNEL_FUNCTION_FULL_NAME(filter_nlm_blur)(float *difference_image,
- float *out_image,
- int* rect,
- int stride,
- int f);
+void KERNEL_FUNCTION_FULL_NAME(filter_nlm_blur)(
+ float *difference_image, float *out_image, int *rect, int stride, int f);
-void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_weight)(float *difference_image,
- float *out_image,
- int* rect,
- int stride,
- int f);
+void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_weight)(
+ float *difference_image, float *out_image, int *rect, int stride, int f);
void KERNEL_FUNCTION_FULL_NAME(filter_nlm_update_output)(int dx,
int dy,
@@ -113,7 +103,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_update_output)(int dx,
float *temp_image,
float *out_image,
float *accum_image,
- int* rect,
+ int *rect,
int channel_offset,
int stride,
int f);
@@ -137,7 +127,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx,
void KERNEL_FUNCTION_FULL_NAME(filter_nlm_normalize)(float *out_image,
float *accum_image,
- int* rect,
+ int *rect,
int stride);
void KERNEL_FUNCTION_FULL_NAME(filter_finalize)(int x,
diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h
index c29505880cb..3d4cb87e104 100644
--- a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h
@@ -25,12 +25,12 @@
#include "kernel/filter/filter_kernel.h"
#ifdef KERNEL_STUB
-# define STUB_ASSERT(arch, name) assert(!(#name " kernel stub for architecture " #arch " was called!"))
+# define STUB_ASSERT(arch, name) \
+ assert(!(#name " kernel stub for architecture " #arch " was called!"))
#endif
CCL_NAMESPACE_BEGIN
-
/* Denoise filter */
void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample,
@@ -42,23 +42,25 @@ void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample,
float *sampleVariance,
float *sampleVarianceV,
float *bufferVariance,
- int* prefilter_rect,
+ int *prefilter_rect,
int buffer_pass_stride,
int buffer_denoising_offset)
{
#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_divide_shadow);
+ STUB_ASSERT(KERNEL_ARCH, filter_divide_shadow);
#else
- kernel_filter_divide_shadow(sample, tile_info,
- x, y,
- unfilteredA,
- unfilteredB,
- sampleVariance,
- sampleVarianceV,
- bufferVariance,
- load_int4(prefilter_rect),
- buffer_pass_stride,
- buffer_denoising_offset);
+ kernel_filter_divide_shadow(sample,
+ tile_info,
+ x,
+ y,
+ unfilteredA,
+ unfilteredB,
+ sampleVariance,
+ sampleVarianceV,
+ bufferVariance,
+ load_int4(prefilter_rect),
+ buffer_pass_stride,
+ buffer_denoising_offset);
#endif
}
@@ -68,23 +70,28 @@ void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample,
int v_offset,
int x,
int y,
- float *mean, float *variance,
+ float *mean,
+ float *variance,
float scale,
- int* prefilter_rect,
+ int *prefilter_rect,
int buffer_pass_stride,
int buffer_denoising_offset)
{
#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_get_feature);
+ STUB_ASSERT(KERNEL_ARCH, filter_get_feature);
#else
- kernel_filter_get_feature(sample, tile_info,
- m_offset, v_offset,
- x, y,
- mean, variance,
- scale,
- load_int4(prefilter_rect),
- buffer_pass_stride,
- buffer_denoising_offset);
+ kernel_filter_get_feature(sample,
+ tile_info,
+ m_offset,
+ v_offset,
+ x,
+ y,
+ mean,
+ variance,
+ scale,
+ load_int4(prefilter_rect),
+ buffer_pass_stride,
+ buffer_denoising_offset);
#endif
}
@@ -95,16 +102,18 @@ void KERNEL_FUNCTION_FULL_NAME(filter_write_feature)(int sample,
float *from,
float *buffer,
int out_offset,
- int* prefilter_rect)
+ int *prefilter_rect)
{
#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_write_feature);
+ STUB_ASSERT(KERNEL_ARCH, filter_write_feature);
#else
- kernel_filter_write_feature(sample, x, y, load_int4(buffer_params), from, buffer, out_offset, load_int4(prefilter_rect));
+ kernel_filter_write_feature(
+ sample, x, y, load_int4(buffer_params), from, buffer, out_offset, load_int4(prefilter_rect));
#endif
}
-void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x, int y,
+void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x,
+ int y,
ccl_global float *image,
ccl_global float *variance,
ccl_global float *depth,
@@ -113,35 +122,31 @@ void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x, int y,
int pass_stride)
{
#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_detect_outliers);
+ STUB_ASSERT(KERNEL_ARCH, filter_detect_outliers);
#else
- kernel_filter_detect_outliers(x, y, image, variance, depth, output, load_int4(rect), pass_stride);
+ kernel_filter_detect_outliers(
+ x, y, image, variance, depth, output, load_int4(rect), pass_stride);
#endif
}
-void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)(int x, int y,
- float *mean,
- float *variance,
- float *a,
- float *b,
- int* prefilter_rect,
- int r)
+void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)(
+ int x, int y, float *mean, float *variance, float *a, float *b, int *prefilter_rect, int r)
{
#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_combine_halves);
+ STUB_ASSERT(KERNEL_ARCH, filter_combine_halves);
#else
- kernel_filter_combine_halves(x, y, mean, variance, a, b, load_int4(prefilter_rect), r);
+ kernel_filter_combine_halves(x, y, mean, variance, a, b, load_int4(prefilter_rect), r);
#endif
}
-void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float* buffer,
+void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float *buffer,
TileInfo *tile_info,
int x,
int y,
int storage_ofs,
float *transform,
int *rank,
- int* prefilter_rect,
+ int *prefilter_rect,
int pass_stride,
int frame_stride,
bool use_time,
@@ -149,21 +154,22 @@ void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float* buffer,
float pca_threshold)
{
#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_construct_transform);
+ STUB_ASSERT(KERNEL_ARCH, filter_construct_transform);
#else
- rank += storage_ofs;
- transform += storage_ofs*TRANSFORM_SIZE;
- kernel_filter_construct_transform(buffer,
- tile_info,
- x, y,
- load_int4(prefilter_rect),
- pass_stride,
- frame_stride,
- use_time,
- transform,
- rank,
- radius,
- pca_threshold);
+ rank += storage_ofs;
+ transform += storage_ofs * TRANSFORM_SIZE;
+ kernel_filter_construct_transform(buffer,
+ tile_info,
+ x,
+ y,
+ load_int4(prefilter_rect),
+ pass_stride,
+ frame_stride,
+ use_time,
+ transform,
+ rank,
+ radius,
+ pca_threshold);
#endif
}
@@ -181,44 +187,40 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_difference)(int dx,
float k_2)
{
#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_nlm_calc_difference);
+ STUB_ASSERT(KERNEL_ARCH, filter_nlm_calc_difference);
#else
- kernel_filter_nlm_calc_difference(dx, dy,
- weight_image,
- variance_image,
- scale_image,
- difference_image,
- load_int4(rect),
- stride,
- channel_offset,
- frame_offset,
- a, k_2);
+ kernel_filter_nlm_calc_difference(dx,
+ dy,
+ weight_image,
+ variance_image,
+ scale_image,
+ difference_image,
+ load_int4(rect),
+ stride,
+ channel_offset,
+ frame_offset,
+ a,
+ k_2);
#endif
}
-void KERNEL_FUNCTION_FULL_NAME(filter_nlm_blur)(float *difference_image,
- float *out_image,
- int *rect,
- int stride,
- int f)
+void KERNEL_FUNCTION_FULL_NAME(filter_nlm_blur)(
+ float *difference_image, float *out_image, int *rect, int stride, int f)
{
#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_nlm_blur);
+ STUB_ASSERT(KERNEL_ARCH, filter_nlm_blur);
#else
- kernel_filter_nlm_blur(difference_image, out_image, load_int4(rect), stride, f);
+ kernel_filter_nlm_blur(difference_image, out_image, load_int4(rect), stride, f);
#endif
}
-void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_weight)(float *difference_image,
- float *out_image,
- int *rect,
- int stride,
- int f)
+void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_weight)(
+ float *difference_image, float *out_image, int *rect, int stride, int f)
{
#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_nlm_calc_weight);
+ STUB_ASSERT(KERNEL_ARCH, filter_nlm_calc_weight);
#else
- kernel_filter_nlm_calc_weight(difference_image, out_image, load_int4(rect), stride, f);
+ kernel_filter_nlm_calc_weight(difference_image, out_image, load_int4(rect), stride, f);
#endif
}
@@ -235,17 +237,19 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_update_output)(int dx,
int f)
{
#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_nlm_update_output);
+ STUB_ASSERT(KERNEL_ARCH, filter_nlm_update_output);
#else
- kernel_filter_nlm_update_output(dx, dy,
- difference_image,
- image,
- temp_image,
- out_image,
- accum_image,
- load_int4(rect),
- channel_offset,
- stride, f);
+ kernel_filter_nlm_update_output(dx,
+ dy,
+ difference_image,
+ image,
+ temp_image,
+ out_image,
+ accum_image,
+ load_int4(rect),
+ channel_offset,
+ stride,
+ f);
#endif
}
@@ -267,19 +271,24 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx,
bool use_time)
{
#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_nlm_construct_gramian);
+ STUB_ASSERT(KERNEL_ARCH, filter_nlm_construct_gramian);
#else
- kernel_filter_nlm_construct_gramian(dx, dy, t,
- difference_image,
- buffer,
- transform, rank,
- XtWX, XtWY,
- load_int4(rect),
- load_int4(filter_window),
- stride, f,
- pass_stride,
- frame_offset,
- use_time);
+ kernel_filter_nlm_construct_gramian(dx,
+ dy,
+ t,
+ difference_image,
+ buffer,
+ transform,
+ rank,
+ XtWX,
+ XtWY,
+ load_int4(rect),
+ load_int4(filter_window),
+ stride,
+ f,
+ pass_stride,
+ frame_offset,
+ use_time);
#endif
}
@@ -289,9 +298,9 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_normalize)(float *out_image,
int stride)
{
#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_nlm_normalize);
+ STUB_ASSERT(KERNEL_ARCH, filter_nlm_normalize);
#else
- kernel_filter_nlm_normalize(out_image, accum_image, load_int4(rect), stride);
+ kernel_filter_nlm_normalize(out_image, accum_image, load_int4(rect), stride);
#endif
}
@@ -306,12 +315,12 @@ void KERNEL_FUNCTION_FULL_NAME(filter_finalize)(int x,
int sample)
{
#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_finalize);
+ STUB_ASSERT(KERNEL_ARCH, filter_finalize);
#else
- XtWX += storage_ofs*XTWX_SIZE;
- XtWY += storage_ofs*XTWY_SIZE;
- rank += storage_ofs;
- kernel_filter_finalize(x, y, buffer, rank, 1, XtWX, XtWY, load_int4(buffer_params), sample);
+ XtWX += storage_ofs * XTWX_SIZE;
+ XtWY += storage_ofs * XTWY_SIZE;
+ rank += storage_ofs;
+ kernel_filter_finalize(x, y, buffer, rank, 1, XtWX, XtWY, load_int4(buffer_params), sample);
#endif
}
diff --git a/intern/cycles/kernel/kernels/cpu/filter_sse2.cpp b/intern/cycles/kernel/kernels/cpu/filter_sse2.cpp
index f7c9935f1d0..6c6c3e78696 100644
--- a/intern/cycles/kernel/kernels/cpu/filter_sse2.cpp
+++ b/intern/cycles/kernel/kernels/cpu/filter_sse2.cpp
@@ -27,7 +27,7 @@
# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
# define __KERNEL_SSE2__
# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */
#include "kernel/filter/filter.h"
#define KERNEL_ARCH cpu_sse2
diff --git a/intern/cycles/kernel/kernels/cpu/filter_sse3.cpp b/intern/cycles/kernel/kernels/cpu/filter_sse3.cpp
index 070b95a3505..e2243000331 100644
--- a/intern/cycles/kernel/kernels/cpu/filter_sse3.cpp
+++ b/intern/cycles/kernel/kernels/cpu/filter_sse3.cpp
@@ -29,7 +29,7 @@
# define __KERNEL_SSE3__
# define __KERNEL_SSSE3__
# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */
#include "kernel/filter/filter.h"
#define KERNEL_ARCH cpu_sse3
diff --git a/intern/cycles/kernel/kernels/cpu/filter_sse41.cpp b/intern/cycles/kernel/kernels/cpu/filter_sse41.cpp
index 254025be4e2..068889365e3 100644
--- a/intern/cycles/kernel/kernels/cpu/filter_sse41.cpp
+++ b/intern/cycles/kernel/kernels/cpu/filter_sse41.cpp
@@ -31,7 +31,7 @@
# define __KERNEL_SSSE3__
# define __KERNEL_SSE41__
# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */
#include "kernel/filter/filter.h"
#define KERNEL_ARCH cpu_sse41
diff --git a/intern/cycles/kernel/kernels/cpu/kernel.cpp b/intern/cycles/kernel/kernels/cpu/kernel.cpp
index de487f6123f..f2146302a27 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel.cpp
@@ -53,7 +53,7 @@
/* quiet unused define warnings */
#if defined(__KERNEL_SSE2__)
- /* do nothing */
+/* do nothing */
#endif
#include "kernel/kernel.h"
@@ -66,29 +66,27 @@ CCL_NAMESPACE_BEGIN
void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t size)
{
- if(strcmp(name, "__data") == 0)
- memcpy(&kg->__data, host, size);
- else
- assert(0);
+ if (strcmp(name, "__data") == 0)
+ memcpy(&kg->__data, host, size);
+ else
+ assert(0);
}
-void kernel_tex_copy(KernelGlobals *kg,
- const char *name,
- void *mem,
- size_t size)
+void kernel_tex_copy(KernelGlobals *kg, const char *name, void *mem, size_t size)
{
- if(0) {
- }
+ if (0) {
+ }
#define KERNEL_TEX(type, tname) \
- else if(strcmp(name, #tname) == 0) { \
- kg->tname.data = (type*)mem; \
- kg->tname.width = size; \
- }
+ else if (strcmp(name, #tname) == 0) \
+ { \
+ kg->tname.data = (type *)mem; \
+ kg->tname.width = size; \
+ }
#include "kernel/kernel_textures.h"
- else {
- assert(0);
- }
+ else {
+ assert(0);
+ }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp b/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp
index a645fb4d8dd..0656fc9dd00 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp
@@ -32,7 +32,7 @@
# define __KERNEL_SSE41__
# define __KERNEL_AVX__
# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */
#include "kernel/kernel.h"
#define KERNEL_ARCH cpu_avx
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp
index 6bbb87727b9..5baafdc699e 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp
@@ -33,7 +33,7 @@
# define __KERNEL_AVX__
# define __KERNEL_AVX2__
# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */
#include "kernel/kernel.h"
#define KERNEL_ARCH cpu_avx2
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
index 6bdb8546a24..f5d981fb71a 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
@@ -16,25 +16,24 @@
/* Templated common declaration part of all CPU kernels. */
-void KERNEL_FUNCTION_FULL_NAME(path_trace)(KernelGlobals *kg,
- float *buffer,
- int sample,
- int x, int y,
- int offset,
- int stride);
+void KERNEL_FUNCTION_FULL_NAME(path_trace)(
+ KernelGlobals *kg, float *buffer, int sample, int x, int y, int offset, int stride);
void KERNEL_FUNCTION_FULL_NAME(convert_to_byte)(KernelGlobals *kg,
uchar4 *rgba,
float *buffer,
float sample_scale,
- int x, int y,
- int offset, int stride);
+ int x,
+ int y,
+ int offset,
+ int stride);
void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg,
uchar4 *rgba,
float *buffer,
float sample_scale,
- int x, int y,
+ int x,
+ int y,
int offset,
int stride);
@@ -49,24 +48,28 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
/* Split kernels */
-void KERNEL_FUNCTION_FULL_NAME(data_init)(
- KernelGlobals *kg,
- ccl_constant KernelData *data,
- ccl_global void *split_data_buffer,
- int num_elements,
- ccl_global char *ray_state,
- int start_sample,
- int end_sample,
- int sx, int sy, int sw, int sh, int offset, int stride,
- ccl_global int *Queue_index,
- int queuesize,
- ccl_global char *use_queues_flag,
- ccl_global unsigned int *work_pool_wgs,
- unsigned int num_samples,
- ccl_global float *buffer);
+void KERNEL_FUNCTION_FULL_NAME(data_init)(KernelGlobals *kg,
+ ccl_constant KernelData *data,
+ ccl_global void *split_data_buffer,
+ int num_elements,
+ ccl_global char *ray_state,
+ int start_sample,
+ int end_sample,
+ int sx,
+ int sy,
+ int sw,
+ int sh,
+ int offset,
+ int stride,
+ ccl_global int *Queue_index,
+ int queuesize,
+ ccl_global char *use_queues_flag,
+ ccl_global unsigned int *work_pool_wgs,
+ unsigned int num_samples,
+ ccl_global float *buffer);
#define DECLARE_SPLIT_KERNEL_FUNCTION(name) \
- void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData *data);
+ void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals * kg, KernelData * data);
DECLARE_SPLIT_KERNEL_FUNCTION(path_init)
DECLARE_SPLIT_KERNEL_FUNCTION(scene_intersect)
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
index ae4fd85780d..4289e2bbb85 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
@@ -19,523 +19,508 @@
CCL_NAMESPACE_BEGIN
-template<typename T> struct TextureInterpolator {
+template<typename T> struct TextureInterpolator {
#define SET_CUBIC_SPLINE_WEIGHTS(u, t) \
- { \
- u[0] = (((-1.0f/6.0f)* t + 0.5f) * t - 0.5f) * t + (1.0f/6.0f); \
- u[1] = (( 0.5f * t - 1.0f) * t ) * t + (2.0f/3.0f); \
- u[2] = (( -0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f/6.0f); \
- u[3] = (1.0f / 6.0f) * t * t * t; \
- } (void) 0
-
- static ccl_always_inline float4 read(float4 r)
- {
- return r;
- }
-
- static ccl_always_inline float4 read(uchar4 r)
- {
- float f = 1.0f / 255.0f;
- return make_float4(r.x*f, r.y*f, r.z*f, r.w*f);
- }
-
- static ccl_always_inline float4 read(uchar r)
- {
- float f = r * (1.0f / 255.0f);
- return make_float4(f, f, f, 1.0f);
- }
-
- static ccl_always_inline float4 read(float r)
- {
- /* TODO(dingto): Optimize this, so interpolation
- * happens on float instead of float4 */
- return make_float4(r, r, r, 1.0f);
- }
-
- static ccl_always_inline float4 read(half4 r)
- {
- return half4_to_float4(r);
- }
-
- static ccl_always_inline float4 read(half r)
- {
- float f = half_to_float(r);
- return make_float4(f, f, f, 1.0f);
- }
-
- static ccl_always_inline float4 read(uint16_t r)
- {
- float f = r*(1.0f/65535.0f);
- return make_float4(f, f, f, 1.0f);
- }
-
- static ccl_always_inline float4 read(ushort4 r)
- {
- float f = 1.0f/65535.0f;
- return make_float4(r.x*f, r.y*f, r.z*f, r.w*f);
- }
-
- static ccl_always_inline float4 read(const T *data,
- int x, int y,
- int width, int height)
- {
- if(x < 0 || y < 0 || x >= width || y >= height) {
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- return read(data[y * width + x]);
- }
-
- static ccl_always_inline int wrap_periodic(int x, int width)
- {
- x %= width;
- if(x < 0)
- x += width;
- return x;
- }
-
- static ccl_always_inline int wrap_clamp(int x, int width)
- {
- return clamp(x, 0, width-1);
- }
-
- static ccl_always_inline float frac(float x, int *ix)
- {
- int i = float_to_int(x) - ((x < 0.0f)? 1: 0);
- *ix = i;
- return x - (float)i;
- }
-
- /* ******** 2D interpolation ******** */
-
- static ccl_always_inline float4 interp_closest(const TextureInfo& info,
- float x, float y)
- {
- const T *data = (const T*)info.data;
- const int width = info.width;
- const int height = info.height;
- int ix, iy;
- frac(x*(float)width, &ix);
- frac(y*(float)height, &iy);
- switch(info.extension) {
- case EXTENSION_REPEAT:
- ix = wrap_periodic(ix, width);
- iy = wrap_periodic(iy, height);
- break;
- case EXTENSION_CLIP:
- if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- ATTR_FALLTHROUGH;
- case EXTENSION_EXTEND:
- ix = wrap_clamp(ix, width);
- iy = wrap_clamp(iy, height);
- break;
- default:
- kernel_assert(0);
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- return read(data[ix + iy*width]);
- }
-
- static ccl_always_inline float4 interp_linear(const TextureInfo& info,
- float x, float y)
- {
- const T *data = (const T*)info.data;
- const int width = info.width;
- const int height = info.height;
- int ix, iy, nix, niy;
- const float tx = frac(x*(float)width - 0.5f, &ix);
- const float ty = frac(y*(float)height - 0.5f, &iy);
- switch(info.extension) {
- case EXTENSION_REPEAT:
- ix = wrap_periodic(ix, width);
- iy = wrap_periodic(iy, height);
- nix = wrap_periodic(ix+1, width);
- niy = wrap_periodic(iy+1, height);
- break;
- case EXTENSION_CLIP:
- nix = ix + 1;
- niy = iy + 1;
- break;
- case EXTENSION_EXTEND:
- nix = wrap_clamp(ix+1, width);
- niy = wrap_clamp(iy+1, height);
- ix = wrap_clamp(ix, width);
- iy = wrap_clamp(iy, height);
- break;
- default:
- kernel_assert(0);
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- return (1.0f - ty) * (1.0f - tx) * read(data, ix, iy, width, height) +
- (1.0f - ty) * tx * read(data, nix, iy, width, height) +
- ty * (1.0f - tx) * read(data, ix, niy, width, height) +
- ty * tx * read(data, nix, niy, width, height);
- }
-
- static ccl_always_inline float4 interp_cubic(const TextureInfo& info,
- float x, float y)
- {
- const T *data = (const T*)info.data;
- const int width = info.width;
- const int height = info.height;
- int ix, iy, nix, niy;
- const float tx = frac(x*(float)width - 0.5f, &ix);
- const float ty = frac(y*(float)height - 0.5f, &iy);
- int pix, piy, nnix, nniy;
- switch(info.extension) {
- case EXTENSION_REPEAT:
- ix = wrap_periodic(ix, width);
- iy = wrap_periodic(iy, height);
- pix = wrap_periodic(ix-1, width);
- piy = wrap_periodic(iy-1, height);
- nix = wrap_periodic(ix+1, width);
- niy = wrap_periodic(iy+1, height);
- nnix = wrap_periodic(ix+2, width);
- nniy = wrap_periodic(iy+2, height);
- break;
- case EXTENSION_CLIP:
- pix = ix - 1;
- piy = iy - 1;
- nix = ix + 1;
- niy = iy + 1;
- nnix = ix + 2;
- nniy = iy + 2;
- break;
- case EXTENSION_EXTEND:
- pix = wrap_clamp(ix-1, width);
- piy = wrap_clamp(iy-1, height);
- nix = wrap_clamp(ix+1, width);
- niy = wrap_clamp(iy+1, height);
- nnix = wrap_clamp(ix+2, width);
- nniy = wrap_clamp(iy+2, height);
- ix = wrap_clamp(ix, width);
- iy = wrap_clamp(iy, height);
- break;
- default:
- kernel_assert(0);
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- const int xc[4] = {pix, ix, nix, nnix};
- const int yc[4] = {piy, iy, niy, nniy};
- float u[4], v[4];
- /* Some helper macro to keep code reasonable size,
- * let compiler to inline all the matrix multiplications.
- */
+ { \
+ u[0] = (((-1.0f / 6.0f) * t + 0.5f) * t - 0.5f) * t + (1.0f / 6.0f); \
+ u[1] = ((0.5f * t - 1.0f) * t) * t + (2.0f / 3.0f); \
+ u[2] = ((-0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f / 6.0f); \
+ u[3] = (1.0f / 6.0f) * t * t * t; \
+ } \
+ (void)0
+
+ static ccl_always_inline float4 read(float4 r)
+ {
+ return r;
+ }
+
+ static ccl_always_inline float4 read(uchar4 r)
+ {
+ float f = 1.0f / 255.0f;
+ return make_float4(r.x * f, r.y * f, r.z * f, r.w * f);
+ }
+
+ static ccl_always_inline float4 read(uchar r)
+ {
+ float f = r * (1.0f / 255.0f);
+ return make_float4(f, f, f, 1.0f);
+ }
+
+ static ccl_always_inline float4 read(float r)
+ {
+ /* TODO(dingto): Optimize this, so interpolation
+ * happens on float instead of float4 */
+ return make_float4(r, r, r, 1.0f);
+ }
+
+ static ccl_always_inline float4 read(half4 r)
+ {
+ return half4_to_float4(r);
+ }
+
+ static ccl_always_inline float4 read(half r)
+ {
+ float f = half_to_float(r);
+ return make_float4(f, f, f, 1.0f);
+ }
+
+ static ccl_always_inline float4 read(uint16_t r)
+ {
+ float f = r * (1.0f / 65535.0f);
+ return make_float4(f, f, f, 1.0f);
+ }
+
+ static ccl_always_inline float4 read(ushort4 r)
+ {
+ float f = 1.0f / 65535.0f;
+ return make_float4(r.x * f, r.y * f, r.z * f, r.w * f);
+ }
+
+ static ccl_always_inline float4 read(const T *data, int x, int y, int width, int height)
+ {
+ if (x < 0 || y < 0 || x >= width || y >= height) {
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
+ return read(data[y * width + x]);
+ }
+
+ static ccl_always_inline int wrap_periodic(int x, int width)
+ {
+ x %= width;
+ if (x < 0)
+ x += width;
+ return x;
+ }
+
+ static ccl_always_inline int wrap_clamp(int x, int width)
+ {
+ return clamp(x, 0, width - 1);
+ }
+
+ static ccl_always_inline float frac(float x, int *ix)
+ {
+ int i = float_to_int(x) - ((x < 0.0f) ? 1 : 0);
+ *ix = i;
+ return x - (float)i;
+ }
+
+ /* ******** 2D interpolation ******** */
+
+ static ccl_always_inline float4 interp_closest(const TextureInfo &info, float x, float y)
+ {
+ const T *data = (const T *)info.data;
+ const int width = info.width;
+ const int height = info.height;
+ int ix, iy;
+ frac(x * (float)width, &ix);
+ frac(y * (float)height, &iy);
+ switch (info.extension) {
+ case EXTENSION_REPEAT:
+ ix = wrap_periodic(ix, width);
+ iy = wrap_periodic(iy, height);
+ break;
+ case EXTENSION_CLIP:
+ if (x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
+ ATTR_FALLTHROUGH;
+ case EXTENSION_EXTEND:
+ ix = wrap_clamp(ix, width);
+ iy = wrap_clamp(iy, height);
+ break;
+ default:
+ kernel_assert(0);
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
+ return read(data[ix + iy * width]);
+ }
+
+ static ccl_always_inline float4 interp_linear(const TextureInfo &info, float x, float y)
+ {
+ const T *data = (const T *)info.data;
+ const int width = info.width;
+ const int height = info.height;
+ int ix, iy, nix, niy;
+ const float tx = frac(x * (float)width - 0.5f, &ix);
+ const float ty = frac(y * (float)height - 0.5f, &iy);
+ switch (info.extension) {
+ case EXTENSION_REPEAT:
+ ix = wrap_periodic(ix, width);
+ iy = wrap_periodic(iy, height);
+ nix = wrap_periodic(ix + 1, width);
+ niy = wrap_periodic(iy + 1, height);
+ break;
+ case EXTENSION_CLIP:
+ nix = ix + 1;
+ niy = iy + 1;
+ break;
+ case EXTENSION_EXTEND:
+ nix = wrap_clamp(ix + 1, width);
+ niy = wrap_clamp(iy + 1, height);
+ ix = wrap_clamp(ix, width);
+ iy = wrap_clamp(iy, height);
+ break;
+ default:
+ kernel_assert(0);
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
+ return (1.0f - ty) * (1.0f - tx) * read(data, ix, iy, width, height) +
+ (1.0f - ty) * tx * read(data, nix, iy, width, height) +
+ ty * (1.0f - tx) * read(data, ix, niy, width, height) +
+ ty * tx * read(data, nix, niy, width, height);
+ }
+
+ static ccl_always_inline float4 interp_cubic(const TextureInfo &info, float x, float y)
+ {
+ const T *data = (const T *)info.data;
+ const int width = info.width;
+ const int height = info.height;
+ int ix, iy, nix, niy;
+ const float tx = frac(x * (float)width - 0.5f, &ix);
+ const float ty = frac(y * (float)height - 0.5f, &iy);
+ int pix, piy, nnix, nniy;
+ switch (info.extension) {
+ case EXTENSION_REPEAT:
+ ix = wrap_periodic(ix, width);
+ iy = wrap_periodic(iy, height);
+ pix = wrap_periodic(ix - 1, width);
+ piy = wrap_periodic(iy - 1, height);
+ nix = wrap_periodic(ix + 1, width);
+ niy = wrap_periodic(iy + 1, height);
+ nnix = wrap_periodic(ix + 2, width);
+ nniy = wrap_periodic(iy + 2, height);
+ break;
+ case EXTENSION_CLIP:
+ pix = ix - 1;
+ piy = iy - 1;
+ nix = ix + 1;
+ niy = iy + 1;
+ nnix = ix + 2;
+ nniy = iy + 2;
+ break;
+ case EXTENSION_EXTEND:
+ pix = wrap_clamp(ix - 1, width);
+ piy = wrap_clamp(iy - 1, height);
+ nix = wrap_clamp(ix + 1, width);
+ niy = wrap_clamp(iy + 1, height);
+ nnix = wrap_clamp(ix + 2, width);
+ nniy = wrap_clamp(iy + 2, height);
+ ix = wrap_clamp(ix, width);
+ iy = wrap_clamp(iy, height);
+ break;
+ default:
+ kernel_assert(0);
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
+ const int xc[4] = {pix, ix, nix, nnix};
+ const int yc[4] = {piy, iy, niy, nniy};
+ float u[4], v[4];
+ /* Some helper macro to keep code reasonable size,
+ * let compiler to inline all the matrix multiplications.
+ */
#define DATA(x, y) (read(data, xc[x], yc[y], width, height))
#define TERM(col) \
- (v[col] * (u[0] * DATA(0, col) + \
- u[1] * DATA(1, col) + \
- u[2] * DATA(2, col) + \
- u[3] * DATA(3, col)))
+ (v[col] * \
+ (u[0] * DATA(0, col) + u[1] * DATA(1, col) + u[2] * DATA(2, col) + u[3] * DATA(3, col)))
- SET_CUBIC_SPLINE_WEIGHTS(u, tx);
- SET_CUBIC_SPLINE_WEIGHTS(v, ty);
+ SET_CUBIC_SPLINE_WEIGHTS(u, tx);
+ SET_CUBIC_SPLINE_WEIGHTS(v, ty);
- /* Actual interpolation. */
- return TERM(0) + TERM(1) + TERM(2) + TERM(3);
+ /* Actual interpolation. */
+ return TERM(0) + TERM(1) + TERM(2) + TERM(3);
#undef TERM
#undef DATA
- }
-
- static ccl_always_inline float4 interp(const TextureInfo& info,
- float x, float y)
- {
- if(UNLIKELY(!info.data)) {
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- switch(info.interpolation) {
- case INTERPOLATION_CLOSEST:
- return interp_closest(info, x, y);
- case INTERPOLATION_LINEAR:
- return interp_linear(info, x, y);
- default:
- return interp_cubic(info, x, y);
- }
- }
-
- /* ******** 3D interpolation ******** */
-
- static ccl_always_inline float4 interp_3d_closest(const TextureInfo& info,
- float x, float y, float z)
- {
- int width = info.width;
- int height = info.height;
- int depth = info.depth;
- int ix, iy, iz;
-
- frac(x*(float)width, &ix);
- frac(y*(float)height, &iy);
- frac(z*(float)depth, &iz);
-
- switch(info.extension) {
- case EXTENSION_REPEAT:
- ix = wrap_periodic(ix, width);
- iy = wrap_periodic(iy, height);
- iz = wrap_periodic(iz, depth);
- break;
- case EXTENSION_CLIP:
- if(x < 0.0f || y < 0.0f || z < 0.0f ||
- x > 1.0f || y > 1.0f || z > 1.0f)
- {
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- ATTR_FALLTHROUGH;
- case EXTENSION_EXTEND:
- ix = wrap_clamp(ix, width);
- iy = wrap_clamp(iy, height);
- iz = wrap_clamp(iz, depth);
- break;
- default:
- kernel_assert(0);
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
-
- const T *data = (const T*)info.data;
- return read(data[ix + iy*width + iz*width*height]);
- }
-
- static ccl_always_inline float4 interp_3d_linear(const TextureInfo& info,
- float x, float y, float z)
- {
- int width = info.width;
- int height = info.height;
- int depth = info.depth;
- int ix, iy, iz;
- int nix, niy, niz;
-
- float tx = frac(x*(float)width - 0.5f, &ix);
- float ty = frac(y*(float)height - 0.5f, &iy);
- float tz = frac(z*(float)depth - 0.5f, &iz);
-
- switch(info.extension) {
- case EXTENSION_REPEAT:
- ix = wrap_periodic(ix, width);
- iy = wrap_periodic(iy, height);
- iz = wrap_periodic(iz, depth);
-
- nix = wrap_periodic(ix+1, width);
- niy = wrap_periodic(iy+1, height);
- niz = wrap_periodic(iz+1, depth);
- break;
- case EXTENSION_CLIP:
- if(x < 0.0f || y < 0.0f || z < 0.0f ||
- x > 1.0f || y > 1.0f || z > 1.0f)
- {
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- ATTR_FALLTHROUGH;
- case EXTENSION_EXTEND:
- nix = wrap_clamp(ix+1, width);
- niy = wrap_clamp(iy+1, height);
- niz = wrap_clamp(iz+1, depth);
-
- ix = wrap_clamp(ix, width);
- iy = wrap_clamp(iy, height);
- iz = wrap_clamp(iz, depth);
- break;
- default:
- kernel_assert(0);
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
-
- const T *data = (const T*)info.data;
- float4 r;
-
- r = (1.0f - tz)*(1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width + iz*width*height]);
- r += (1.0f - tz)*(1.0f - ty)*tx*read(data[nix + iy*width + iz*width*height]);
- r += (1.0f - tz)*ty*(1.0f - tx)*read(data[ix + niy*width + iz*width*height]);
- r += (1.0f - tz)*ty*tx*read(data[nix + niy*width + iz*width*height]);
-
- r += tz*(1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width + niz*width*height]);
- r += tz*(1.0f - ty)*tx*read(data[nix + iy*width + niz*width*height]);
- r += tz*ty*(1.0f - tx)*read(data[ix + niy*width + niz*width*height]);
- r += tz*ty*tx*read(data[nix + niy*width + niz*width*height]);
-
- return r;
- }
-
- /* TODO(sergey): For some unspeakable reason both GCC-6 and Clang-3.9 are
- * causing stack overflow issue in this function unless it is inlined.
- *
- * Only happens for AVX2 kernel and global __KERNEL_SSE__ vectorization
- * enabled.
- */
+ }
+
+ static ccl_always_inline float4 interp(const TextureInfo &info, float x, float y)
+ {
+ if (UNLIKELY(!info.data)) {
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
+ switch (info.interpolation) {
+ case INTERPOLATION_CLOSEST:
+ return interp_closest(info, x, y);
+ case INTERPOLATION_LINEAR:
+ return interp_linear(info, x, y);
+ default:
+ return interp_cubic(info, x, y);
+ }
+ }
+
+ /* ******** 3D interpolation ******** */
+
+ static ccl_always_inline float4 interp_3d_closest(const TextureInfo &info,
+ float x,
+ float y,
+ float z)
+ {
+ int width = info.width;
+ int height = info.height;
+ int depth = info.depth;
+ int ix, iy, iz;
+
+ frac(x * (float)width, &ix);
+ frac(y * (float)height, &iy);
+ frac(z * (float)depth, &iz);
+
+ switch (info.extension) {
+ case EXTENSION_REPEAT:
+ ix = wrap_periodic(ix, width);
+ iy = wrap_periodic(iy, height);
+ iz = wrap_periodic(iz, depth);
+ break;
+ case EXTENSION_CLIP:
+ if (x < 0.0f || y < 0.0f || z < 0.0f || x > 1.0f || y > 1.0f || z > 1.0f) {
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
+ ATTR_FALLTHROUGH;
+ case EXTENSION_EXTEND:
+ ix = wrap_clamp(ix, width);
+ iy = wrap_clamp(iy, height);
+ iz = wrap_clamp(iz, depth);
+ break;
+ default:
+ kernel_assert(0);
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
+
+ const T *data = (const T *)info.data;
+ return read(data[ix + iy * width + iz * width * height]);
+ }
+
+ static ccl_always_inline float4 interp_3d_linear(const TextureInfo &info,
+ float x,
+ float y,
+ float z)
+ {
+ int width = info.width;
+ int height = info.height;
+ int depth = info.depth;
+ int ix, iy, iz;
+ int nix, niy, niz;
+
+ float tx = frac(x * (float)width - 0.5f, &ix);
+ float ty = frac(y * (float)height - 0.5f, &iy);
+ float tz = frac(z * (float)depth - 0.5f, &iz);
+
+ switch (info.extension) {
+ case EXTENSION_REPEAT:
+ ix = wrap_periodic(ix, width);
+ iy = wrap_periodic(iy, height);
+ iz = wrap_periodic(iz, depth);
+
+ nix = wrap_periodic(ix + 1, width);
+ niy = wrap_periodic(iy + 1, height);
+ niz = wrap_periodic(iz + 1, depth);
+ break;
+ case EXTENSION_CLIP:
+ if (x < 0.0f || y < 0.0f || z < 0.0f || x > 1.0f || y > 1.0f || z > 1.0f) {
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
+ ATTR_FALLTHROUGH;
+ case EXTENSION_EXTEND:
+ nix = wrap_clamp(ix + 1, width);
+ niy = wrap_clamp(iy + 1, height);
+ niz = wrap_clamp(iz + 1, depth);
+
+ ix = wrap_clamp(ix, width);
+ iy = wrap_clamp(iy, height);
+ iz = wrap_clamp(iz, depth);
+ break;
+ default:
+ kernel_assert(0);
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
+
+ const T *data = (const T *)info.data;
+ float4 r;
+
+ r = (1.0f - tz) * (1.0f - ty) * (1.0f - tx) *
+ read(data[ix + iy * width + iz * width * height]);
+ r += (1.0f - tz) * (1.0f - ty) * tx * read(data[nix + iy * width + iz * width * height]);
+ r += (1.0f - tz) * ty * (1.0f - tx) * read(data[ix + niy * width + iz * width * height]);
+ r += (1.0f - tz) * ty * tx * read(data[nix + niy * width + iz * width * height]);
+
+ r += tz * (1.0f - ty) * (1.0f - tx) * read(data[ix + iy * width + niz * width * height]);
+ r += tz * (1.0f - ty) * tx * read(data[nix + iy * width + niz * width * height]);
+ r += tz * ty * (1.0f - tx) * read(data[ix + niy * width + niz * width * height]);
+ r += tz * ty * tx * read(data[nix + niy * width + niz * width * height]);
+
+ return r;
+ }
+
+ /* TODO(sergey): For some unspeakable reason both GCC-6 and Clang-3.9 are
+ * causing stack overflow issue in this function unless it is inlined.
+ *
+ * Only happens for AVX2 kernel and global __KERNEL_SSE__ vectorization
+ * enabled.
+ */
#if defined(__GNUC__) || defined(__clang__)
- static ccl_always_inline
+ static ccl_always_inline
#else
- static ccl_never_inline
+ static ccl_never_inline
#endif
- float4 interp_3d_tricubic(const TextureInfo& info, float x, float y, float z)
- {
- int width = info.width;
- int height = info.height;
- int depth = info.depth;
- int ix, iy, iz;
- int nix, niy, niz;
- /* Tricubic b-spline interpolation. */
- const float tx = frac(x*(float)width - 0.5f, &ix);
- const float ty = frac(y*(float)height - 0.5f, &iy);
- const float tz = frac(z*(float)depth - 0.5f, &iz);
- int pix, piy, piz, nnix, nniy, nniz;
-
- switch(info.extension) {
- case EXTENSION_REPEAT:
- ix = wrap_periodic(ix, width);
- iy = wrap_periodic(iy, height);
- iz = wrap_periodic(iz, depth);
-
- pix = wrap_periodic(ix-1, width);
- piy = wrap_periodic(iy-1, height);
- piz = wrap_periodic(iz-1, depth);
-
- nix = wrap_periodic(ix+1, width);
- niy = wrap_periodic(iy+1, height);
- niz = wrap_periodic(iz+1, depth);
-
- nnix = wrap_periodic(ix+2, width);
- nniy = wrap_periodic(iy+2, height);
- nniz = wrap_periodic(iz+2, depth);
- break;
- case EXTENSION_CLIP:
- if(x < 0.0f || y < 0.0f || z < 0.0f ||
- x > 1.0f || y > 1.0f || z > 1.0f)
- {
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- ATTR_FALLTHROUGH;
- case EXTENSION_EXTEND:
- pix = wrap_clamp(ix-1, width);
- piy = wrap_clamp(iy-1, height);
- piz = wrap_clamp(iz-1, depth);
-
- nix = wrap_clamp(ix+1, width);
- niy = wrap_clamp(iy+1, height);
- niz = wrap_clamp(iz+1, depth);
-
- nnix = wrap_clamp(ix+2, width);
- nniy = wrap_clamp(iy+2, height);
- nniz = wrap_clamp(iz+2, depth);
-
- ix = wrap_clamp(ix, width);
- iy = wrap_clamp(iy, height);
- iz = wrap_clamp(iz, depth);
- break;
- default:
- kernel_assert(0);
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
-
- const int xc[4] = {pix, ix, nix, nnix};
- const int yc[4] = {width * piy,
- width * iy,
- width * niy,
- width * nniy};
- const int zc[4] = {width * height * piz,
- width * height * iz,
- width * height * niz,
- width * height * nniz};
- float u[4], v[4], w[4];
-
- /* Some helper macro to keep code reasonable size,
- * let compiler to inline all the matrix multiplications.
- */
+ float4
+ interp_3d_tricubic(const TextureInfo &info, float x, float y, float z)
+ {
+ int width = info.width;
+ int height = info.height;
+ int depth = info.depth;
+ int ix, iy, iz;
+ int nix, niy, niz;
+ /* Tricubic b-spline interpolation. */
+ const float tx = frac(x * (float)width - 0.5f, &ix);
+ const float ty = frac(y * (float)height - 0.5f, &iy);
+ const float tz = frac(z * (float)depth - 0.5f, &iz);
+ int pix, piy, piz, nnix, nniy, nniz;
+
+ switch (info.extension) {
+ case EXTENSION_REPEAT:
+ ix = wrap_periodic(ix, width);
+ iy = wrap_periodic(iy, height);
+ iz = wrap_periodic(iz, depth);
+
+ pix = wrap_periodic(ix - 1, width);
+ piy = wrap_periodic(iy - 1, height);
+ piz = wrap_periodic(iz - 1, depth);
+
+ nix = wrap_periodic(ix + 1, width);
+ niy = wrap_periodic(iy + 1, height);
+ niz = wrap_periodic(iz + 1, depth);
+
+ nnix = wrap_periodic(ix + 2, width);
+ nniy = wrap_periodic(iy + 2, height);
+ nniz = wrap_periodic(iz + 2, depth);
+ break;
+ case EXTENSION_CLIP:
+ if (x < 0.0f || y < 0.0f || z < 0.0f || x > 1.0f || y > 1.0f || z > 1.0f) {
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
+ ATTR_FALLTHROUGH;
+ case EXTENSION_EXTEND:
+ pix = wrap_clamp(ix - 1, width);
+ piy = wrap_clamp(iy - 1, height);
+ piz = wrap_clamp(iz - 1, depth);
+
+ nix = wrap_clamp(ix + 1, width);
+ niy = wrap_clamp(iy + 1, height);
+ niz = wrap_clamp(iz + 1, depth);
+
+ nnix = wrap_clamp(ix + 2, width);
+ nniy = wrap_clamp(iy + 2, height);
+ nniz = wrap_clamp(iz + 2, depth);
+
+ ix = wrap_clamp(ix, width);
+ iy = wrap_clamp(iy, height);
+ iz = wrap_clamp(iz, depth);
+ break;
+ default:
+ kernel_assert(0);
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
+
+ const int xc[4] = {pix, ix, nix, nnix};
+ const int yc[4] = {width * piy, width * iy, width * niy, width * nniy};
+ const int zc[4] = {
+ width * height * piz, width * height * iz, width * height * niz, width * height * nniz};
+ float u[4], v[4], w[4];
+
+ /* Some helper macro to keep code reasonable size,
+ * let compiler to inline all the matrix multiplications.
+ */
#define DATA(x, y, z) (read(data[xc[x] + yc[y] + zc[z]]))
#define COL_TERM(col, row) \
- (v[col] * (u[0] * DATA(0, col, row) + \
- u[1] * DATA(1, col, row) + \
- u[2] * DATA(2, col, row) + \
- u[3] * DATA(3, col, row)))
+ (v[col] * (u[0] * DATA(0, col, row) + u[1] * DATA(1, col, row) + u[2] * DATA(2, col, row) + \
+ u[3] * DATA(3, col, row)))
#define ROW_TERM(row) \
- (w[row] * (COL_TERM(0, row) + \
- COL_TERM(1, row) + \
- COL_TERM(2, row) + \
- COL_TERM(3, row)))
+ (w[row] * (COL_TERM(0, row) + COL_TERM(1, row) + COL_TERM(2, row) + COL_TERM(3, row)))
- SET_CUBIC_SPLINE_WEIGHTS(u, tx);
- SET_CUBIC_SPLINE_WEIGHTS(v, ty);
- SET_CUBIC_SPLINE_WEIGHTS(w, tz);
+ SET_CUBIC_SPLINE_WEIGHTS(u, tx);
+ SET_CUBIC_SPLINE_WEIGHTS(v, ty);
+ SET_CUBIC_SPLINE_WEIGHTS(w, tz);
- /* Actual interpolation. */
- const T *data = (const T*)info.data;
- return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3);
+ /* Actual interpolation. */
+ const T *data = (const T *)info.data;
+ return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3);
#undef COL_TERM
#undef ROW_TERM
#undef DATA
- }
-
- static ccl_always_inline float4 interp_3d(const TextureInfo& info,
- float x, float y, float z,
- InterpolationType interp)
- {
- if(UNLIKELY(!info.data))
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-
- switch((interp == INTERPOLATION_NONE)? info.interpolation: interp) {
- case INTERPOLATION_CLOSEST:
- return interp_3d_closest(info, x, y, z);
- case INTERPOLATION_LINEAR:
- return interp_3d_linear(info, x, y, z);
- default:
- return interp_3d_tricubic(info, x, y, z);
- }
- }
+ }
+
+ static ccl_always_inline float4
+ interp_3d(const TextureInfo &info, float x, float y, float z, InterpolationType interp)
+ {
+ if (UNLIKELY(!info.data))
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+
+ switch ((interp == INTERPOLATION_NONE) ? info.interpolation : interp) {
+ case INTERPOLATION_CLOSEST:
+ return interp_3d_closest(info, x, y, z);
+ case INTERPOLATION_LINEAR:
+ return interp_3d_linear(info, x, y, z);
+ default:
+ return interp_3d_tricubic(info, x, y, z);
+ }
+ }
#undef SET_CUBIC_SPLINE_WEIGHTS
};
ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y)
{
- const TextureInfo& info = kernel_tex_fetch(__texture_info, id);
-
- switch(kernel_tex_type(id)) {
- case IMAGE_DATA_TYPE_HALF:
- return TextureInterpolator<half>::interp(info, x, y);
- case IMAGE_DATA_TYPE_BYTE:
- return TextureInterpolator<uchar>::interp(info, x, y);
- case IMAGE_DATA_TYPE_USHORT:
- return TextureInterpolator<uint16_t>::interp(info, x, y);
- case IMAGE_DATA_TYPE_FLOAT:
- return TextureInterpolator<float>::interp(info, x, y);
- case IMAGE_DATA_TYPE_HALF4:
- return TextureInterpolator<half4>::interp(info, x, y);
- case IMAGE_DATA_TYPE_BYTE4:
- return TextureInterpolator<uchar4>::interp(info, x, y);
- case IMAGE_DATA_TYPE_USHORT4:
- return TextureInterpolator<ushort4>::interp(info, x, y);
- case IMAGE_DATA_TYPE_FLOAT4:
- return TextureInterpolator<float4>::interp(info, x, y);
- default:
- assert(0);
- return make_float4(TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B, TEX_IMAGE_MISSING_A);
- }
+ const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
+
+ switch (kernel_tex_type(id)) {
+ case IMAGE_DATA_TYPE_HALF:
+ return TextureInterpolator<half>::interp(info, x, y);
+ case IMAGE_DATA_TYPE_BYTE:
+ return TextureInterpolator<uchar>::interp(info, x, y);
+ case IMAGE_DATA_TYPE_USHORT:
+ return TextureInterpolator<uint16_t>::interp(info, x, y);
+ case IMAGE_DATA_TYPE_FLOAT:
+ return TextureInterpolator<float>::interp(info, x, y);
+ case IMAGE_DATA_TYPE_HALF4:
+ return TextureInterpolator<half4>::interp(info, x, y);
+ case IMAGE_DATA_TYPE_BYTE4:
+ return TextureInterpolator<uchar4>::interp(info, x, y);
+ case IMAGE_DATA_TYPE_USHORT4:
+ return TextureInterpolator<ushort4>::interp(info, x, y);
+ case IMAGE_DATA_TYPE_FLOAT4:
+ return TextureInterpolator<float4>::interp(info, x, y);
+ default:
+ assert(0);
+ return make_float4(
+ TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B, TEX_IMAGE_MISSING_A);
+ }
}
-ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z, InterpolationType interp)
+ccl_device float4 kernel_tex_image_interp_3d(
+ KernelGlobals *kg, int id, float x, float y, float z, InterpolationType interp)
{
- const TextureInfo& info = kernel_tex_fetch(__texture_info, id);
-
- switch(kernel_tex_type(id)) {
- case IMAGE_DATA_TYPE_HALF:
- return TextureInterpolator<half>::interp_3d(info, x, y, z, interp);
- case IMAGE_DATA_TYPE_BYTE:
- return TextureInterpolator<uchar>::interp_3d(info, x, y, z, interp);
- case IMAGE_DATA_TYPE_USHORT:
- return TextureInterpolator<uint16_t>::interp_3d(info, x, y, z, interp);
- case IMAGE_DATA_TYPE_FLOAT:
- return TextureInterpolator<float>::interp_3d(info, x, y, z, interp);
- case IMAGE_DATA_TYPE_HALF4:
- return TextureInterpolator<half4>::interp_3d(info, x, y, z, interp);
- case IMAGE_DATA_TYPE_BYTE4:
- return TextureInterpolator<uchar4>::interp_3d(info, x, y, z, interp);
- case IMAGE_DATA_TYPE_USHORT4:
- return TextureInterpolator<ushort4>::interp_3d(info, x, y, z, interp);
- case IMAGE_DATA_TYPE_FLOAT4:
- return TextureInterpolator<float4>::interp_3d(info, x, y, z, interp);
- default:
- assert(0);
- return make_float4(TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B, TEX_IMAGE_MISSING_A);
- }
+ const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
+
+ switch (kernel_tex_type(id)) {
+ case IMAGE_DATA_TYPE_HALF:
+ return TextureInterpolator<half>::interp_3d(info, x, y, z, interp);
+ case IMAGE_DATA_TYPE_BYTE:
+ return TextureInterpolator<uchar>::interp_3d(info, x, y, z, interp);
+ case IMAGE_DATA_TYPE_USHORT:
+ return TextureInterpolator<uint16_t>::interp_3d(info, x, y, z, interp);
+ case IMAGE_DATA_TYPE_FLOAT:
+ return TextureInterpolator<float>::interp_3d(info, x, y, z, interp);
+ case IMAGE_DATA_TYPE_HALF4:
+ return TextureInterpolator<half4>::interp_3d(info, x, y, z, interp);
+ case IMAGE_DATA_TYPE_BYTE4:
+ return TextureInterpolator<uchar4>::interp_3d(info, x, y, z, interp);
+ case IMAGE_DATA_TYPE_USHORT4:
+ return TextureInterpolator<ushort4>::interp_3d(info, x, y, z, interp);
+ case IMAGE_DATA_TYPE_FLOAT4:
+ return TextureInterpolator<float4>::interp_3d(info, x, y, z, interp);
+ default:
+ assert(0);
+ return make_float4(
+ TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B, TEX_IMAGE_MISSING_A);
+ }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
index 759b7e4c20d..9ca3f46b5b6 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
@@ -58,14 +58,15 @@
# include "kernel/split/kernel_next_iteration_setup.h"
# include "kernel/split/kernel_indirect_subsurface.h"
# include "kernel/split/kernel_buffer_update.h"
-# endif /* __SPLIT_KERNEL__ */
+# endif /* __SPLIT_KERNEL__ */
#else
-# define STUB_ASSERT(arch, name) assert(!(#name " kernel stub for architecture " #arch " was called!"))
+# define STUB_ASSERT(arch, name) \
+ assert(!(#name " kernel stub for architecture " #arch " was called!"))
# ifdef __SPLIT_KERNEL__
# include "kernel/split/kernel_data_init.h"
-# endif /* __SPLIT_KERNEL__ */
-#endif /* KERNEL_STUB */
+# endif /* __SPLIT_KERNEL__ */
+#endif /* KERNEL_STUB */
CCL_NAMESPACE_BEGIN
@@ -73,31 +74,22 @@ CCL_NAMESPACE_BEGIN
/* Path Tracing */
-void KERNEL_FUNCTION_FULL_NAME(path_trace)(KernelGlobals *kg,
- float *buffer,
- int sample,
- int x, int y,
- int offset,
- int stride)
+void KERNEL_FUNCTION_FULL_NAME(path_trace)(
+ KernelGlobals *kg, float *buffer, int sample, int x, int y, int offset, int stride)
{
-#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, path_trace);
-#else
-# ifdef __BRANCHED_PATH__
- if(kernel_data.integrator.branched) {
- kernel_branched_path_trace(kg,
- buffer,
- sample,
- x, y,
- offset,
- stride);
- }
- else
-# endif
- {
- kernel_path_trace(kg, buffer, sample, x, y, offset, stride);
- }
-#endif /* KERNEL_STUB */
+# ifdef KERNEL_STUB
+ STUB_ASSERT(KERNEL_ARCH, path_trace);
+# else
+# ifdef __BRANCHED_PATH__
+ if (kernel_data.integrator.branched) {
+ kernel_branched_path_trace(kg, buffer, sample, x, y, offset, stride);
+ }
+ else
+# endif
+ {
+ kernel_path_trace(kg, buffer, sample, x, y, offset, stride);
+ }
+# endif /* KERNEL_STUB */
}
/* Film */
@@ -106,42 +98,32 @@ void KERNEL_FUNCTION_FULL_NAME(convert_to_byte)(KernelGlobals *kg,
uchar4 *rgba,
float *buffer,
float sample_scale,
- int x, int y,
+ int x,
+ int y,
int offset,
int stride)
{
-#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, convert_to_byte);
-#else
- kernel_film_convert_to_byte(kg,
- rgba,
- buffer,
- sample_scale,
- x, y,
- offset,
- stride);
-#endif /* KERNEL_STUB */
+# ifdef KERNEL_STUB
+ STUB_ASSERT(KERNEL_ARCH, convert_to_byte);
+# else
+ kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride);
+# endif /* KERNEL_STUB */
}
void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg,
uchar4 *rgba,
float *buffer,
float sample_scale,
- int x, int y,
+ int x,
+ int y,
int offset,
int stride)
{
-#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, convert_to_half_float);
-#else
- kernel_film_convert_to_half_float(kg,
- rgba,
- buffer,
- sample_scale,
- x, y,
- offset,
- stride);
-#endif /* KERNEL_STUB */
+# ifdef KERNEL_STUB
+ STUB_ASSERT(KERNEL_ARCH, convert_to_half_float);
+# else
+ kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride);
+# endif /* KERNEL_STUB */
}
/* Shader Evaluate */
@@ -155,60 +137,53 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
int offset,
int sample)
{
-#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, shader);
-#else
- if(type >= SHADER_EVAL_BAKE) {
-# ifdef __BAKING__
- kernel_bake_evaluate(kg,
- input,
- output,
- (ShaderEvalType)type,
- filter,
- i,
- offset,
- sample);
-# endif
- }
- else if(type == SHADER_EVAL_DISPLACE) {
- kernel_displace_evaluate(kg, input, output, i);
- }
- else {
- kernel_background_evaluate(kg, input, output, i);
- }
-#endif /* KERNEL_STUB */
+# ifdef KERNEL_STUB
+ STUB_ASSERT(KERNEL_ARCH, shader);
+# else
+ if (type >= SHADER_EVAL_BAKE) {
+# ifdef __BAKING__
+ kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, filter, i, offset, sample);
+# endif
+ }
+ else if (type == SHADER_EVAL_DISPLACE) {
+ kernel_displace_evaluate(kg, input, output, i);
+ }
+ else {
+ kernel_background_evaluate(kg, input, output, i);
+ }
+# endif /* KERNEL_STUB */
}
-#else /* __SPLIT_KERNEL__ */
+#else /* __SPLIT_KERNEL__ */
/* Split Kernel Path Tracing */
-#ifdef KERNEL_STUB
-# define DEFINE_SPLIT_KERNEL_FUNCTION(name) \
- void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData* /*data*/) \
- { \
- STUB_ASSERT(KERNEL_ARCH, name); \
- }
-
-# define DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(name, type) \
- void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData* /*data*/) \
- { \
- STUB_ASSERT(KERNEL_ARCH, name); \
- }
-#else
-# define DEFINE_SPLIT_KERNEL_FUNCTION(name) \
- void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData* /*data*/) \
- { \
- kernel_##name(kg); \
- }
-
-# define DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(name, type) \
- void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData* /*data*/) \
- { \
- ccl_local type locals; \
- kernel_##name(kg, &locals); \
- }
-#endif /* KERNEL_STUB */
+# ifdef KERNEL_STUB
+# define DEFINE_SPLIT_KERNEL_FUNCTION(name) \
+ void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals * kg, KernelData * /*data*/) \
+ { \
+ STUB_ASSERT(KERNEL_ARCH, name); \
+ }
+
+# define DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(name, type) \
+ void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals * kg, KernelData * /*data*/) \
+ { \
+ STUB_ASSERT(KERNEL_ARCH, name); \
+ }
+# else
+# define DEFINE_SPLIT_KERNEL_FUNCTION(name) \
+ void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals * kg, KernelData * /*data*/) \
+ { \
+ kernel_##name(kg); \
+ }
+
+# define DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(name, type) \
+ void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals * kg, KernelData * /*data*/) \
+ { \
+ ccl_local type locals; \
+ kernel_##name(kg, &locals); \
+ }
+# endif /* KERNEL_STUB */
DEFINE_SPLIT_KERNEL_FUNCTION(path_init)
DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect)
@@ -219,7 +194,8 @@ DEFINE_SPLIT_KERNEL_FUNCTION(indirect_background)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_setup, uint)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_sort, ShaderSortLocals)
DEFINE_SPLIT_KERNEL_FUNCTION(shader_eval)
-DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(holdout_emission_blurring_pathtermination_ao, BackgroundAOLocals)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(holdout_emission_blurring_pathtermination_ao,
+ BackgroundAOLocals)
DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(direct_lighting, uint)
DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao)
@@ -228,7 +204,7 @@ DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(enqueue_inactive, uint)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint)
DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint)
-#endif /* __SPLIT_KERNEL__ */
+#endif /* __SPLIT_KERNEL__ */
#undef KERNEL_STUB
#undef STUB_ASSERT
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split.cpp
index c5e199b0a69..989f5e5aaa8 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_split.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel_split.cpp
@@ -54,7 +54,7 @@
/* quiet unused define warnings */
#if defined(__KERNEL_SSE2__)
- /* do nothing */
+/* do nothing */
#endif
#include "kernel/kernel.h"
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp
index 6ba3425a343..1b2e2516751 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp
@@ -34,7 +34,7 @@
# define __KERNEL_SSE41__
# define __KERNEL_AVX__
# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */
#include "kernel/kernel.h"
#define KERNEL_ARCH cpu_avx
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp
index 76b2d77ebb8..43b8bfbf864 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp
@@ -35,7 +35,7 @@
# define __KERNEL_AVX__
# define __KERNEL_AVX2__
# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */
#include "kernel/kernel.h"
#define KERNEL_ARCH cpu_avx2
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp
index b468b6f44c8..9743789179d 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp
@@ -29,7 +29,7 @@
# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
# define __KERNEL_SSE2__
# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */
#include "kernel/kernel.h"
#define KERNEL_ARCH cpu_sse2
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp
index 3e5792d0b17..1bec7633500 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp
@@ -31,7 +31,7 @@
# define __KERNEL_SSE3__
# define __KERNEL_SSSE3__
# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */
#include "kernel/kernel.h"
#define KERNEL_ARCH cpu_sse3
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp
index 3629f21cd29..c0efc2350e9 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp
@@ -32,7 +32,7 @@
# define __KERNEL_SSSE3__
# define __KERNEL_SSE41__
# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */
#include "kernel/kernel.h"
#define KERNEL_ARCH cpu_sse41
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp
index 57530c88710..173be8e93ce 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp
@@ -27,7 +27,7 @@
# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
# define __KERNEL_SSE2__
# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */
#include "kernel/kernel.h"
#define KERNEL_ARCH cpu_sse2
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp b/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp
index c607753bc4b..31273fe3344 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp
@@ -29,7 +29,7 @@
# define __KERNEL_SSE3__
# define __KERNEL_SSSE3__
# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */
#include "kernel/kernel.h"
#define KERNEL_ARCH cpu_sse3
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp b/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp
index a278554731c..1d020b7fee6 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp
@@ -30,7 +30,7 @@
# define __KERNEL_SSSE3__
# define __KERNEL_SSE41__
# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */
#include "kernel/kernel.h"
#define KERNEL_ARCH cpu_sse41
diff --git a/intern/cycles/kernel/kernels/cuda/kernel_config.h b/intern/cycles/kernel/kernels/cuda/kernel_config.h
index 6d41dc15785..d9f349837a8 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel_config.h
+++ b/intern/cycles/kernel/kernels/cuda/kernel_config.h
@@ -81,7 +81,6 @@
# define CUDA_KERNEL_MAX_REGISTERS 64
# define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 72
-
/* unknown architecture */
#else
# error "Unknown or unsupported CUDA architecture, can't determine launch bounds"
@@ -96,18 +95,19 @@
* given the maximum number of registers per thread. */
#define CUDA_LAUNCH_BOUNDS(threads_block_width, thread_num_registers) \
- __launch_bounds__( \
- threads_block_width*threads_block_width, \
- CUDA_MULTIPRESSOR_MAX_REGISTERS/(threads_block_width*threads_block_width*thread_num_registers) \
- )
+ __launch_bounds__(threads_block_width *threads_block_width, \
+ CUDA_MULTIPRESSOR_MAX_REGISTERS / \
+ (threads_block_width * threads_block_width * thread_num_registers))
/* sanity checks */
-#if CUDA_THREADS_BLOCK_WIDTH*CUDA_THREADS_BLOCK_WIDTH > CUDA_BLOCK_MAX_THREADS
+#if CUDA_THREADS_BLOCK_WIDTH * CUDA_THREADS_BLOCK_WIDTH > CUDA_BLOCK_MAX_THREADS
# error "Maximum number of threads per block exceeded"
#endif
-#if CUDA_MULTIPRESSOR_MAX_REGISTERS/(CUDA_THREADS_BLOCK_WIDTH*CUDA_THREADS_BLOCK_WIDTH*CUDA_KERNEL_MAX_REGISTERS) > CUDA_MULTIPROCESSOR_MAX_BLOCKS
+#if CUDA_MULTIPRESSOR_MAX_REGISTERS / \
+ (CUDA_THREADS_BLOCK_WIDTH * CUDA_THREADS_BLOCK_WIDTH * CUDA_KERNEL_MAX_REGISTERS) > \
+ CUDA_MULTIPROCESSOR_MAX_BLOCKS
# error "Maximum number of blocks per multiprocessor exceeded"
#endif
diff --git a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
index 37cfbbcb235..7c68f08ea10 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
+++ b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
@@ -17,174 +17,165 @@
/* w0, w1, w2, and w3 are the four cubic B-spline basis functions. */
ccl_device float cubic_w0(float a)
{
- return (1.0f/6.0f)*(a*(a*(-a + 3.0f) - 3.0f) + 1.0f);
+ return (1.0f / 6.0f) * (a * (a * (-a + 3.0f) - 3.0f) + 1.0f);
}
ccl_device float cubic_w1(float a)
{
- return (1.0f/6.0f)*(a*a*(3.0f*a - 6.0f) + 4.0f);
+ return (1.0f / 6.0f) * (a * a * (3.0f * a - 6.0f) + 4.0f);
}
ccl_device float cubic_w2(float a)
{
- return (1.0f/6.0f)*(a*(a*(-3.0f*a + 3.0f) + 3.0f) + 1.0f);
+ return (1.0f / 6.0f) * (a * (a * (-3.0f * a + 3.0f) + 3.0f) + 1.0f);
}
ccl_device float cubic_w3(float a)
{
- return (1.0f/6.0f)*(a*a*a);
+ return (1.0f / 6.0f) * (a * a * a);
}
/* g0 and g1 are the two amplitude functions. */
ccl_device float cubic_g0(float a)
{
- return cubic_w0(a) + cubic_w1(a);
+ return cubic_w0(a) + cubic_w1(a);
}
ccl_device float cubic_g1(float a)
{
- return cubic_w2(a) + cubic_w3(a);
+ return cubic_w2(a) + cubic_w3(a);
}
/* h0 and h1 are the two offset functions */
ccl_device float cubic_h0(float a)
{
- /* Note +0.5 offset to compensate for CUDA linear filtering convention. */
- return -1.0f + cubic_w1(a) / (cubic_w0(a) + cubic_w1(a)) + 0.5f;
+ /* Note +0.5 offset to compensate for CUDA linear filtering convention. */
+ return -1.0f + cubic_w1(a) / (cubic_w0(a) + cubic_w1(a)) + 0.5f;
}
ccl_device float cubic_h1(float a)
{
- return 1.0f + cubic_w3(a) / (cubic_w2(a) + cubic_w3(a)) + 0.5f;
+ return 1.0f + cubic_w3(a) / (cubic_w2(a) + cubic_w3(a)) + 0.5f;
}
/* Fast bicubic texture lookup using 4 bilinear lookups, adapted from CUDA samples. */
template<typename T>
-ccl_device T kernel_tex_image_interp_bicubic(const TextureInfo& info, CUtexObject tex, float x, float y)
+ccl_device T
+kernel_tex_image_interp_bicubic(const TextureInfo &info, CUtexObject tex, float x, float y)
{
- x = (x * info.width) - 0.5f;
- y = (y * info.height) - 0.5f;
-
- float px = floor(x);
- float py = floor(y);
- float fx = x - px;
- float fy = y - py;
-
- float g0x = cubic_g0(fx);
- float g1x = cubic_g1(fx);
- float x0 = (px + cubic_h0(fx)) / info.width;
- float x1 = (px + cubic_h1(fx)) / info.width;
- float y0 = (py + cubic_h0(fy)) / info.height;
- float y1 = (py + cubic_h1(fy)) / info.height;
-
- return cubic_g0(fy) * (g0x * tex2D<T>(tex, x0, y0) +
- g1x * tex2D<T>(tex, x1, y0)) +
- cubic_g1(fy) * (g0x * tex2D<T>(tex, x0, y1) +
- g1x * tex2D<T>(tex, x1, y1));
+ x = (x * info.width) - 0.5f;
+ y = (y * info.height) - 0.5f;
+
+ float px = floor(x);
+ float py = floor(y);
+ float fx = x - px;
+ float fy = y - py;
+
+ float g0x = cubic_g0(fx);
+ float g1x = cubic_g1(fx);
+ float x0 = (px + cubic_h0(fx)) / info.width;
+ float x1 = (px + cubic_h1(fx)) / info.width;
+ float y0 = (py + cubic_h0(fy)) / info.height;
+ float y1 = (py + cubic_h1(fy)) / info.height;
+
+ return cubic_g0(fy) * (g0x * tex2D<T>(tex, x0, y0) + g1x * tex2D<T>(tex, x1, y0)) +
+ cubic_g1(fy) * (g0x * tex2D<T>(tex, x0, y1) + g1x * tex2D<T>(tex, x1, y1));
}
/* Fast tricubic texture lookup using 8 trilinear lookups. */
template<typename T>
-ccl_device T kernel_tex_image_interp_bicubic_3d(const TextureInfo& info, CUtexObject tex, float x, float y, float z)
+ccl_device T kernel_tex_image_interp_bicubic_3d(
+ const TextureInfo &info, CUtexObject tex, float x, float y, float z)
{
- x = (x * info.width) - 0.5f;
- y = (y * info.height) - 0.5f;
- z = (z * info.depth) - 0.5f;
-
- float px = floor(x);
- float py = floor(y);
- float pz = floor(z);
- float fx = x - px;
- float fy = y - py;
- float fz = z - pz;
-
- float g0x = cubic_g0(fx);
- float g1x = cubic_g1(fx);
- float g0y = cubic_g0(fy);
- float g1y = cubic_g1(fy);
- float g0z = cubic_g0(fz);
- float g1z = cubic_g1(fz);
-
- float x0 = (px + cubic_h0(fx)) / info.width;
- float x1 = (px + cubic_h1(fx)) / info.width;
- float y0 = (py + cubic_h0(fy)) / info.height;
- float y1 = (py + cubic_h1(fy)) / info.height;
- float z0 = (pz + cubic_h0(fz)) / info.depth;
- float z1 = (pz + cubic_h1(fz)) / info.depth;
-
- return g0z * (g0y * (g0x * tex3D<T>(tex, x0, y0, z0) +
- g1x * tex3D<T>(tex, x1, y0, z0)) +
- g1y * (g0x * tex3D<T>(tex, x0, y1, z0) +
- g1x * tex3D<T>(tex, x1, y1, z0))) +
- g1z * (g0y * (g0x * tex3D<T>(tex, x0, y0, z1) +
- g1x * tex3D<T>(tex, x1, y0, z1)) +
- g1y * (g0x * tex3D<T>(tex, x0, y1, z1) +
- g1x * tex3D<T>(tex, x1, y1, z1)));
+ x = (x * info.width) - 0.5f;
+ y = (y * info.height) - 0.5f;
+ z = (z * info.depth) - 0.5f;
+
+ float px = floor(x);
+ float py = floor(y);
+ float pz = floor(z);
+ float fx = x - px;
+ float fy = y - py;
+ float fz = z - pz;
+
+ float g0x = cubic_g0(fx);
+ float g1x = cubic_g1(fx);
+ float g0y = cubic_g0(fy);
+ float g1y = cubic_g1(fy);
+ float g0z = cubic_g0(fz);
+ float g1z = cubic_g1(fz);
+
+ float x0 = (px + cubic_h0(fx)) / info.width;
+ float x1 = (px + cubic_h1(fx)) / info.width;
+ float y0 = (py + cubic_h0(fy)) / info.height;
+ float y1 = (py + cubic_h1(fy)) / info.height;
+ float z0 = (pz + cubic_h0(fz)) / info.depth;
+ float z1 = (pz + cubic_h1(fz)) / info.depth;
+
+ return g0z * (g0y * (g0x * tex3D<T>(tex, x0, y0, z0) + g1x * tex3D<T>(tex, x1, y0, z0)) +
+ g1y * (g0x * tex3D<T>(tex, x0, y1, z0) + g1x * tex3D<T>(tex, x1, y1, z0))) +
+ g1z * (g0y * (g0x * tex3D<T>(tex, x0, y0, z1) + g1x * tex3D<T>(tex, x1, y0, z1)) +
+ g1y * (g0x * tex3D<T>(tex, x0, y1, z1) + g1x * tex3D<T>(tex, x1, y1, z1)));
}
ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y)
{
- const TextureInfo& info = kernel_tex_fetch(__texture_info, id);
- CUtexObject tex = (CUtexObject)info.data;
-
- /* float4, byte4, ushort4 and half4 */
- const int texture_type = kernel_tex_type(id);
- if(texture_type == IMAGE_DATA_TYPE_FLOAT4 ||
- texture_type == IMAGE_DATA_TYPE_BYTE4 ||
- texture_type == IMAGE_DATA_TYPE_HALF4 ||
- texture_type == IMAGE_DATA_TYPE_USHORT4)
- {
- if(info.interpolation == INTERPOLATION_CUBIC) {
- return kernel_tex_image_interp_bicubic<float4>(info, tex, x, y);
- }
- else {
- return tex2D<float4>(tex, x, y);
- }
- }
- /* float, byte and half */
- else {
- float f;
-
- if(info.interpolation == INTERPOLATION_CUBIC) {
- f = kernel_tex_image_interp_bicubic<float>(info, tex, x, y);
- }
- else {
- f = tex2D<float>(tex, x, y);
- }
-
- return make_float4(f, f, f, 1.0f);
- }
+ const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
+ CUtexObject tex = (CUtexObject)info.data;
+
+ /* float4, byte4, ushort4 and half4 */
+ const int texture_type = kernel_tex_type(id);
+ if (texture_type == IMAGE_DATA_TYPE_FLOAT4 || texture_type == IMAGE_DATA_TYPE_BYTE4 ||
+ texture_type == IMAGE_DATA_TYPE_HALF4 || texture_type == IMAGE_DATA_TYPE_USHORT4) {
+ if (info.interpolation == INTERPOLATION_CUBIC) {
+ return kernel_tex_image_interp_bicubic<float4>(info, tex, x, y);
+ }
+ else {
+ return tex2D<float4>(tex, x, y);
+ }
+ }
+ /* float, byte and half */
+ else {
+ float f;
+
+ if (info.interpolation == INTERPOLATION_CUBIC) {
+ f = kernel_tex_image_interp_bicubic<float>(info, tex, x, y);
+ }
+ else {
+ f = tex2D<float>(tex, x, y);
+ }
+
+ return make_float4(f, f, f, 1.0f);
+ }
}
-ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z, InterpolationType interp)
+ccl_device float4 kernel_tex_image_interp_3d(
+ KernelGlobals *kg, int id, float x, float y, float z, InterpolationType interp)
{
- const TextureInfo& info = kernel_tex_fetch(__texture_info, id);
- CUtexObject tex = (CUtexObject)info.data;
- uint interpolation = (interp == INTERPOLATION_NONE)? info.interpolation: interp;
-
- const int texture_type = kernel_tex_type(id);
- if(texture_type == IMAGE_DATA_TYPE_FLOAT4 ||
- texture_type == IMAGE_DATA_TYPE_BYTE4 ||
- texture_type == IMAGE_DATA_TYPE_HALF4 ||
- texture_type == IMAGE_DATA_TYPE_USHORT4)
- {
- if(interpolation == INTERPOLATION_CUBIC) {
- return kernel_tex_image_interp_bicubic_3d<float4>(info, tex, x, y, z);
- }
- else {
- return tex3D<float4>(tex, x, y, z);
- }
- }
- else {
- float f;
-
- if(interpolation == INTERPOLATION_CUBIC) {
- f = kernel_tex_image_interp_bicubic_3d<float>(info, tex, x, y, z);
- }
- else {
- f = tex3D<float>(tex, x, y, z);
- }
-
- return make_float4(f, f, f, 1.0f);
- }
+ const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
+ CUtexObject tex = (CUtexObject)info.data;
+ uint interpolation = (interp == INTERPOLATION_NONE) ? info.interpolation : interp;
+
+ const int texture_type = kernel_tex_type(id);
+ if (texture_type == IMAGE_DATA_TYPE_FLOAT4 || texture_type == IMAGE_DATA_TYPE_BYTE4 ||
+ texture_type == IMAGE_DATA_TYPE_HALF4 || texture_type == IMAGE_DATA_TYPE_USHORT4) {
+ if (interpolation == INTERPOLATION_CUBIC) {
+ return kernel_tex_image_interp_bicubic_3d<float4>(info, tex, x, y, z);
+ }
+ else {
+ return tex3D<float4>(tex, x, y, z);
+ }
+ }
+ else {
+ float f;
+
+ if (interpolation == INTERPOLATION_CUBIC) {
+ f = kernel_tex_image_interp_bicubic_3d<float>(info, tex, x, y, z);
+ }
+ else {
+ f = tex3D<float>(tex, x, y, z);
+ }
+
+ return make_float4(f, f, f, 1.0f);
+ }
}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h
index 79af831c2fb..b6390679331 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h
+++ b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h
@@ -16,254 +16,257 @@
/* For OpenCL we do manual lookup and interpolation. */
-ccl_device_inline ccl_global TextureInfo* kernel_tex_info(KernelGlobals *kg, uint id) {
- const uint tex_offset = id
-#define KERNEL_TEX(type, name) + 1
+ccl_device_inline ccl_global TextureInfo *kernel_tex_info(KernelGlobals *kg, uint id)
+{
+ const uint tex_offset = id
+#define KERNEL_TEX(type, name) +1
#include "kernel/kernel_textures.h"
- ;
+ ;
- return &((ccl_global TextureInfo*)kg->buffers[0])[tex_offset];
+ return &((ccl_global TextureInfo *)kg->buffers[0])[tex_offset];
}
-#define tex_fetch(type, info, index) ((ccl_global type*)(kg->buffers[info->cl_buffer] + info->data))[(index)]
+#define tex_fetch(type, info, index) \
+ ((ccl_global type *)(kg->buffers[info->cl_buffer] + info->data))[(index)]
ccl_device_inline int svm_image_texture_wrap_periodic(int x, int width)
{
- x %= width;
- if(x < 0)
- x += width;
- return x;
+ x %= width;
+ if (x < 0)
+ x += width;
+ return x;
}
ccl_device_inline int svm_image_texture_wrap_clamp(int x, int width)
{
- return clamp(x, 0, width-1);
+ return clamp(x, 0, width - 1);
}
-ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg, const ccl_global TextureInfo *info, int id, int offset)
+ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg,
+ const ccl_global TextureInfo *info,
+ int id,
+ int offset)
{
- const int texture_type = kernel_tex_type(id);
-
- /* Float4 */
- if(texture_type == IMAGE_DATA_TYPE_FLOAT4) {
- return tex_fetch(float4, info, offset);
- }
- /* Byte4 */
- else if(texture_type == IMAGE_DATA_TYPE_BYTE4) {
- uchar4 r = tex_fetch(uchar4, info, offset);
- float f = 1.0f/255.0f;
- return make_float4(r.x*f, r.y*f, r.z*f, r.w*f);
- }
- /* Ushort4 */
- else if(texture_type == IMAGE_DATA_TYPE_USHORT4) {
- ushort4 r = tex_fetch(ushort4, info, offset);
- float f = 1.0f/65535.f;
- return make_float4(r.x*f, r.y*f, r.z*f, r.w*f);
- }
- /* Float */
- else if(texture_type == IMAGE_DATA_TYPE_FLOAT) {
- float f = tex_fetch(float, info, offset);
- return make_float4(f, f, f, 1.0f);
- }
- /* UShort */
- else if(texture_type == IMAGE_DATA_TYPE_USHORT) {
- ushort r = tex_fetch(ushort, info, offset);
- float f = r * (1.0f / 65535.0f);
- return make_float4(f, f, f, 1.0f);
- }
- /* Byte */
+ const int texture_type = kernel_tex_type(id);
+
+ /* Float4 */
+ if (texture_type == IMAGE_DATA_TYPE_FLOAT4) {
+ return tex_fetch(float4, info, offset);
+ }
+ /* Byte4 */
+ else if (texture_type == IMAGE_DATA_TYPE_BYTE4) {
+ uchar4 r = tex_fetch(uchar4, info, offset);
+ float f = 1.0f / 255.0f;
+ return make_float4(r.x * f, r.y * f, r.z * f, r.w * f);
+ }
+ /* Ushort4 */
+ else if (texture_type == IMAGE_DATA_TYPE_USHORT4) {
+ ushort4 r = tex_fetch(ushort4, info, offset);
+ float f = 1.0f / 65535.f;
+ return make_float4(r.x * f, r.y * f, r.z * f, r.w * f);
+ }
+ /* Float */
+ else if (texture_type == IMAGE_DATA_TYPE_FLOAT) {
+ float f = tex_fetch(float, info, offset);
+ return make_float4(f, f, f, 1.0f);
+ }
+ /* UShort */
+ else if (texture_type == IMAGE_DATA_TYPE_USHORT) {
+ ushort r = tex_fetch(ushort, info, offset);
+ float f = r * (1.0f / 65535.0f);
+ return make_float4(f, f, f, 1.0f);
+ }
+ /* Byte */
#ifdef cl_khr_fp16
- /* half and half4 are optional in OpenCL */
- else if(texture_type == IMAGE_DATA_TYPE_HALF) {
- float f = tex_fetch(half, info, offset);
- return make_float4(f, f, f, 1.0f);
- }
- else if(texture_type == IMAGE_DATA_TYPE_HALF4) {
- half4 r = tex_fetch(half4, info, offset);
- return make_float4(r.x, r.y, r.z, r.w);
- }
+ /* half and half4 are optional in OpenCL */
+ else if (texture_type == IMAGE_DATA_TYPE_HALF) {
+ float f = tex_fetch(half, info, offset);
+ return make_float4(f, f, f, 1.0f);
+ }
+ else if (texture_type == IMAGE_DATA_TYPE_HALF4) {
+ half4 r = tex_fetch(half4, info, offset);
+ return make_float4(r.x, r.y, r.z, r.w);
+ }
#endif
- else {
- uchar r = tex_fetch(uchar, info, offset);
- float f = r * (1.0f/255.0f);
- return make_float4(f, f, f, 1.0f);
- }
+ else {
+ uchar r = tex_fetch(uchar, info, offset);
+ float f = r * (1.0f / 255.0f);
+ return make_float4(f, f, f, 1.0f);
+ }
}
ccl_device_inline float4 svm_image_texture_read_2d(KernelGlobals *kg, int id, int x, int y)
{
- const ccl_global TextureInfo *info = kernel_tex_info(kg, id);
-
- /* Wrap */
- if(info->extension == EXTENSION_REPEAT) {
- x = svm_image_texture_wrap_periodic(x, info->width);
- y = svm_image_texture_wrap_periodic(y, info->height);
- }
- else {
- x = svm_image_texture_wrap_clamp(x, info->width);
- y = svm_image_texture_wrap_clamp(y, info->height);
- }
-
- int offset = x + info->width * y;
- return svm_image_texture_read(kg, info, id, offset);
+ const ccl_global TextureInfo *info = kernel_tex_info(kg, id);
+
+ /* Wrap */
+ if (info->extension == EXTENSION_REPEAT) {
+ x = svm_image_texture_wrap_periodic(x, info->width);
+ y = svm_image_texture_wrap_periodic(y, info->height);
+ }
+ else {
+ x = svm_image_texture_wrap_clamp(x, info->width);
+ y = svm_image_texture_wrap_clamp(y, info->height);
+ }
+
+ int offset = x + info->width * y;
+ return svm_image_texture_read(kg, info, id, offset);
}
ccl_device_inline float4 svm_image_texture_read_3d(KernelGlobals *kg, int id, int x, int y, int z)
{
- const ccl_global TextureInfo *info = kernel_tex_info(kg, id);
-
- /* Wrap */
- if(info->extension == EXTENSION_REPEAT) {
- x = svm_image_texture_wrap_periodic(x, info->width);
- y = svm_image_texture_wrap_periodic(y, info->height);
- z = svm_image_texture_wrap_periodic(z, info->depth);
- }
- else {
- x = svm_image_texture_wrap_clamp(x, info->width);
- y = svm_image_texture_wrap_clamp(y, info->height);
- z = svm_image_texture_wrap_clamp(z, info->depth);
- }
-
- int offset = x + info->width * y + info->width * info->height * z;
- return svm_image_texture_read(kg, info, id, offset);
+ const ccl_global TextureInfo *info = kernel_tex_info(kg, id);
+
+ /* Wrap */
+ if (info->extension == EXTENSION_REPEAT) {
+ x = svm_image_texture_wrap_periodic(x, info->width);
+ y = svm_image_texture_wrap_periodic(y, info->height);
+ z = svm_image_texture_wrap_periodic(z, info->depth);
+ }
+ else {
+ x = svm_image_texture_wrap_clamp(x, info->width);
+ y = svm_image_texture_wrap_clamp(y, info->height);
+ z = svm_image_texture_wrap_clamp(z, info->depth);
+ }
+
+ int offset = x + info->width * y + info->width * info->height * z;
+ return svm_image_texture_read(kg, info, id, offset);
}
-
ccl_device_inline float svm_image_texture_frac(float x, int *ix)
{
- int i = float_to_int(x) - ((x < 0.0f)? 1: 0);
- *ix = i;
- return x - (float)i;
+ int i = float_to_int(x) - ((x < 0.0f) ? 1 : 0);
+ *ix = i;
+ return x - (float)i;
}
#define SET_CUBIC_SPLINE_WEIGHTS(u, t) \
- { \
- u[0] = (((-1.0f/6.0f)* t + 0.5f) * t - 0.5f) * t + (1.0f/6.0f); \
- u[1] = (( 0.5f * t - 1.0f) * t ) * t + (2.0f/3.0f); \
- u[2] = (( -0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f/6.0f); \
- u[3] = (1.0f / 6.0f) * t * t * t; \
- } (void) 0
+ { \
+ u[0] = (((-1.0f / 6.0f) * t + 0.5f) * t - 0.5f) * t + (1.0f / 6.0f); \
+ u[1] = ((0.5f * t - 1.0f) * t) * t + (2.0f / 3.0f); \
+ u[2] = ((-0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f / 6.0f); \
+ u[3] = (1.0f / 6.0f) * t * t * t; \
+ } \
+ (void)0
ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y)
{
- const ccl_global TextureInfo *info = kernel_tex_info(kg, id);
-
- if(info->extension == EXTENSION_CLIP) {
- if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- }
-
- if(info->interpolation == INTERPOLATION_CLOSEST) {
- /* Closest interpolation. */
- int ix, iy;
- svm_image_texture_frac(x*info->width, &ix);
- svm_image_texture_frac(y*info->height, &iy);
-
- return svm_image_texture_read_2d(kg, id, ix, iy);
- }
- else if(info->interpolation == INTERPOLATION_LINEAR) {
- /* Bilinear interpolation. */
- int ix, iy;
- float tx = svm_image_texture_frac(x*info->width - 0.5f, &ix);
- float ty = svm_image_texture_frac(y*info->height - 0.5f, &iy);
-
- float4 r;
- r = (1.0f - ty)*(1.0f - tx)*svm_image_texture_read_2d(kg, id, ix, iy);
- r += (1.0f - ty)*tx*svm_image_texture_read_2d(kg, id, ix+1, iy);
- r += ty*(1.0f - tx)*svm_image_texture_read_2d(kg, id, ix, iy+1);
- r += ty*tx*svm_image_texture_read_2d(kg, id, ix+1, iy+1);
- return r;
- }
- else {
- /* Bicubic interpolation. */
- int ix, iy;
- float tx = svm_image_texture_frac(x*info->width - 0.5f, &ix);
- float ty = svm_image_texture_frac(y*info->height - 0.5f, &iy);
-
- float u[4], v[4];
- SET_CUBIC_SPLINE_WEIGHTS(u, tx);
- SET_CUBIC_SPLINE_WEIGHTS(v, ty);
-
- float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-
- for(int y = 0; y < 4; y++) {
- for(int x = 0; x < 4; x++) {
- float weight = u[x]*v[y];
- r += weight*svm_image_texture_read_2d(kg, id, ix+x-1, iy+y-1);
- }
- }
- return r;
- }
+ const ccl_global TextureInfo *info = kernel_tex_info(kg, id);
+
+ if (info->extension == EXTENSION_CLIP) {
+ if (x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
+ }
+
+ if (info->interpolation == INTERPOLATION_CLOSEST) {
+ /* Closest interpolation. */
+ int ix, iy;
+ svm_image_texture_frac(x * info->width, &ix);
+ svm_image_texture_frac(y * info->height, &iy);
+
+ return svm_image_texture_read_2d(kg, id, ix, iy);
+ }
+ else if (info->interpolation == INTERPOLATION_LINEAR) {
+ /* Bilinear interpolation. */
+ int ix, iy;
+ float tx = svm_image_texture_frac(x * info->width - 0.5f, &ix);
+ float ty = svm_image_texture_frac(y * info->height - 0.5f, &iy);
+
+ float4 r;
+ r = (1.0f - ty) * (1.0f - tx) * svm_image_texture_read_2d(kg, id, ix, iy);
+ r += (1.0f - ty) * tx * svm_image_texture_read_2d(kg, id, ix + 1, iy);
+ r += ty * (1.0f - tx) * svm_image_texture_read_2d(kg, id, ix, iy + 1);
+ r += ty * tx * svm_image_texture_read_2d(kg, id, ix + 1, iy + 1);
+ return r;
+ }
+ else {
+ /* Bicubic interpolation. */
+ int ix, iy;
+ float tx = svm_image_texture_frac(x * info->width - 0.5f, &ix);
+ float ty = svm_image_texture_frac(y * info->height - 0.5f, &iy);
+
+ float u[4], v[4];
+ SET_CUBIC_SPLINE_WEIGHTS(u, tx);
+ SET_CUBIC_SPLINE_WEIGHTS(v, ty);
+
+ float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+
+ for (int y = 0; y < 4; y++) {
+ for (int x = 0; x < 4; x++) {
+ float weight = u[x] * v[y];
+ r += weight * svm_image_texture_read_2d(kg, id, ix + x - 1, iy + y - 1);
+ }
+ }
+ return r;
+ }
}
-
-ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z, int interp)
+ccl_device float4
+kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z, int interp)
{
- const ccl_global TextureInfo *info = kernel_tex_info(kg, id);
-
- if(info->extension == EXTENSION_CLIP) {
- if(x < 0.0f || y < 0.0f || z < 0.0f ||
- x > 1.0f || y > 1.0f || z > 1.0f)
- {
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- }
-
- uint interpolation = (interp == INTERPOLATION_NONE)? info->interpolation: interp;
-
- if(interpolation == INTERPOLATION_CLOSEST) {
- /* Closest interpolation. */
- int ix, iy, iz;
- svm_image_texture_frac(x*info->width, &ix);
- svm_image_texture_frac(y*info->height, &iy);
- svm_image_texture_frac(z*info->depth, &iz);
-
- return svm_image_texture_read_3d(kg, id, ix, iy, iz);
- }
- else if(interpolation == INTERPOLATION_LINEAR) {
- /* Bilinear interpolation. */
- int ix, iy, iz;
- float tx = svm_image_texture_frac(x*info->width - 0.5f, &ix);
- float ty = svm_image_texture_frac(y*info->height - 0.5f, &iy);
- float tz = svm_image_texture_frac(z*info->depth - 0.5f, &iz);
-
- float4 r;
- r = (1.0f - tz)*(1.0f - ty)*(1.0f - tx)*svm_image_texture_read_3d(kg, id, ix, iy, iz);
- r += (1.0f - tz)*(1.0f - ty)*tx*svm_image_texture_read_3d(kg, id, ix+1, iy, iz);
- r += (1.0f - tz)*ty*(1.0f - tx)*svm_image_texture_read_3d(kg, id, ix, iy+1, iz);
- r += (1.0f - tz)*ty*tx*svm_image_texture_read_3d(kg, id, ix+1, iy+1, iz);
-
- r += tz*(1.0f - ty)*(1.0f - tx)*svm_image_texture_read_3d(kg, id, ix, iy, iz+1);
- r += tz*(1.0f - ty)*tx*svm_image_texture_read_3d(kg, id, ix+1, iy, iz+1);
- r += tz*ty*(1.0f - tx)*svm_image_texture_read_3d(kg, id, ix, iy+1, iz+1);
- r += tz*ty*tx*svm_image_texture_read_3d(kg, id, ix+1, iy+1, iz+1);
- return r;
- }
- else {
- /* Bicubic interpolation. */
- int ix, iy, iz;
- float tx = svm_image_texture_frac(x*info->width - 0.5f, &ix);
- float ty = svm_image_texture_frac(y*info->height - 0.5f, &iy);
- float tz = svm_image_texture_frac(z*info->depth - 0.5f, &iz);
-
- float u[4], v[4], w[4];
- SET_CUBIC_SPLINE_WEIGHTS(u, tx);
- SET_CUBIC_SPLINE_WEIGHTS(v, ty);
- SET_CUBIC_SPLINE_WEIGHTS(w, tz);
-
- float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-
- for(int z = 0; z < 4; z++) {
- for(int y = 0; y < 4; y++) {
- for(int x = 0; x < 4; x++) {
- float weight = u[x]*v[y]*w[z];
- r += weight*svm_image_texture_read_3d(kg, id, ix+x-1, iy+y-1, iz+z-1);
- }
- }
- }
- return r;
- }
+ const ccl_global TextureInfo *info = kernel_tex_info(kg, id);
+
+ if (info->extension == EXTENSION_CLIP) {
+ if (x < 0.0f || y < 0.0f || z < 0.0f || x > 1.0f || y > 1.0f || z > 1.0f) {
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
+ }
+
+ uint interpolation = (interp == INTERPOLATION_NONE) ? info->interpolation : interp;
+
+ if (interpolation == INTERPOLATION_CLOSEST) {
+ /* Closest interpolation. */
+ int ix, iy, iz;
+ svm_image_texture_frac(x * info->width, &ix);
+ svm_image_texture_frac(y * info->height, &iy);
+ svm_image_texture_frac(z * info->depth, &iz);
+
+ return svm_image_texture_read_3d(kg, id, ix, iy, iz);
+ }
+ else if (interpolation == INTERPOLATION_LINEAR) {
+ /* Bilinear interpolation. */
+ int ix, iy, iz;
+ float tx = svm_image_texture_frac(x * info->width - 0.5f, &ix);
+ float ty = svm_image_texture_frac(y * info->height - 0.5f, &iy);
+ float tz = svm_image_texture_frac(z * info->depth - 0.5f, &iz);
+
+ float4 r;
+ r = (1.0f - tz) * (1.0f - ty) * (1.0f - tx) * svm_image_texture_read_3d(kg, id, ix, iy, iz);
+ r += (1.0f - tz) * (1.0f - ty) * tx * svm_image_texture_read_3d(kg, id, ix + 1, iy, iz);
+ r += (1.0f - tz) * ty * (1.0f - tx) * svm_image_texture_read_3d(kg, id, ix, iy + 1, iz);
+ r += (1.0f - tz) * ty * tx * svm_image_texture_read_3d(kg, id, ix + 1, iy + 1, iz);
+
+ r += tz * (1.0f - ty) * (1.0f - tx) * svm_image_texture_read_3d(kg, id, ix, iy, iz + 1);
+ r += tz * (1.0f - ty) * tx * svm_image_texture_read_3d(kg, id, ix + 1, iy, iz + 1);
+ r += tz * ty * (1.0f - tx) * svm_image_texture_read_3d(kg, id, ix, iy + 1, iz + 1);
+ r += tz * ty * tx * svm_image_texture_read_3d(kg, id, ix + 1, iy + 1, iz + 1);
+ return r;
+ }
+ else {
+ /* Bicubic interpolation. */
+ int ix, iy, iz;
+ float tx = svm_image_texture_frac(x * info->width - 0.5f, &ix);
+ float ty = svm_image_texture_frac(y * info->height - 0.5f, &iy);
+ float tz = svm_image_texture_frac(z * info->depth - 0.5f, &iz);
+
+ float u[4], v[4], w[4];
+ SET_CUBIC_SPLINE_WEIGHTS(u, tx);
+ SET_CUBIC_SPLINE_WEIGHTS(v, ty);
+ SET_CUBIC_SPLINE_WEIGHTS(w, tz);
+
+ float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+
+ for (int z = 0; z < 4; z++) {
+ for (int y = 0; y < 4; y++) {
+ for (int x = 0; x < 4; x++) {
+ float weight = u[x] * v[y] * w[z];
+ r += weight * svm_image_texture_read_3d(kg, id, ix + x - 1, iy + y - 1, iz + z - 1);
+ }
+ }
+ }
+ return r;
+ }
}
#undef SET_CUBIC_SPLINE_WEIGHTS
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_split_function.h b/intern/cycles/kernel/kernels/opencl/kernel_split_function.h
index 05e1ddf6da2..e123b4cd6ec 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_split_function.h
+++ b/intern/cycles/kernel/kernels/opencl/kernel_split_function.h
@@ -14,50 +14,53 @@
* limitations under the License.
*/
-#define KERNEL_NAME_JOIN(a, b) a ## _ ## b
+#define KERNEL_NAME_JOIN(a, b) a##_##b
#define KERNEL_NAME_EVAL(a, b) KERNEL_NAME_JOIN(a, b)
-__kernel void KERNEL_NAME_EVAL(kernel_ocl_path_trace, KERNEL_NAME)(
- ccl_global char *kg_global,
- ccl_constant KernelData *data,
+__kernel void KERNEL_NAME_EVAL(kernel_ocl_path_trace,
+ KERNEL_NAME)(ccl_global char *kg_global,
+ ccl_constant KernelData *data,
- ccl_global void *split_data_buffer,
- ccl_global char *ray_state,
+ ccl_global void *split_data_buffer,
+ ccl_global char *ray_state,
- KERNEL_BUFFER_PARAMS,
+ KERNEL_BUFFER_PARAMS,
- ccl_global int *queue_index,
- ccl_global char *use_queues_flag,
- ccl_global unsigned int *work_pools,
- ccl_global float *buffer
- )
+ ccl_global int *queue_index,
+ ccl_global char *use_queues_flag,
+ ccl_global unsigned int *work_pools,
+ ccl_global float *buffer)
{
#ifdef LOCALS_TYPE
- ccl_local LOCALS_TYPE locals;
+ ccl_local LOCALS_TYPE locals;
#endif
- KernelGlobals *kg = (KernelGlobals*)kg_global;
+ KernelGlobals *kg = (KernelGlobals *)kg_global;
- if(ccl_local_id(0) + ccl_local_id(1) == 0) {
- kg->data = data;
+ if (ccl_local_id(0) + ccl_local_id(1) == 0) {
+ kg->data = data;
- kernel_split_params.queue_index = queue_index;
- kernel_split_params.use_queues_flag = use_queues_flag;
- kernel_split_params.work_pools = work_pools;
- kernel_split_params.tile.buffer = buffer;
+ kernel_split_params.queue_index = queue_index;
+ kernel_split_params.use_queues_flag = use_queues_flag;
+ kernel_split_params.work_pools = work_pools;
+ kernel_split_params.tile.buffer = buffer;
- split_data_init(kg, &kernel_split_state, ccl_global_size(0)*ccl_global_size(1), split_data_buffer, ray_state);
+ split_data_init(kg,
+ &kernel_split_state,
+ ccl_global_size(0) * ccl_global_size(1),
+ split_data_buffer,
+ ray_state);
+ }
- }
+ kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
- kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
-
- KERNEL_NAME_EVAL(kernel, KERNEL_NAME)(
- kg
+ KERNEL_NAME_EVAL(kernel, KERNEL_NAME)
+ (kg
#ifdef LOCALS_TYPE
- , &locals
+ ,
+ &locals
#endif
- );
+ );
}
#undef KERNEL_NAME_JOIN
diff --git a/intern/cycles/kernel/osl/CMakeLists.txt b/intern/cycles/kernel/osl/CMakeLists.txt
index 0a3d0b974cb..28d9ca854db 100644
--- a/intern/cycles/kernel/osl/CMakeLists.txt
+++ b/intern/cycles/kernel/osl/CMakeLists.txt
@@ -1,6 +1,6 @@
set(INC
- ../..
+ ../..
)
set(INC_SYS
@@ -8,25 +8,25 @@ set(INC_SYS
)
set(SRC
- background.cpp
- bsdf_diffuse_ramp.cpp
- bsdf_phong_ramp.cpp
- emissive.cpp
- osl_bssrdf.cpp
- osl_closures.cpp
- osl_services.cpp
- osl_shader.cpp
+ background.cpp
+ bsdf_diffuse_ramp.cpp
+ bsdf_phong_ramp.cpp
+ emissive.cpp
+ osl_bssrdf.cpp
+ osl_closures.cpp
+ osl_services.cpp
+ osl_shader.cpp
)
set(HEADER_SRC
- osl_closures.h
- osl_globals.h
- osl_services.h
- osl_shader.h
+ osl_closures.h
+ osl_globals.h
+ osl_services.h
+ osl_shader.h
)
set(LIB
- cycles_render
+ cycles_render
)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${RTTI_DISABLE_FLAGS}")
diff --git a/intern/cycles/kernel/osl/background.cpp b/intern/cycles/kernel/osl/background.cpp
index 6924a4144c5..b395227845d 100644
--- a/intern/cycles/kernel/osl/background.cpp
+++ b/intern/cycles/kernel/osl/background.cpp
@@ -51,11 +51,11 @@ using namespace OSL;
/// only the weight is taking into account
///
class GenericBackgroundClosure : public CClosurePrimitive {
-public:
- void setup(ShaderData *sd, int /* path_flag */, float3 weight)
- {
- background_setup(sd, weight);
- }
+ public:
+ void setup(ShaderData *sd, int /* path_flag */, float3 weight)
+ {
+ background_setup(sd, weight);
+ }
};
/// Holdout closure
@@ -66,31 +66,28 @@ public:
/// used
///
class HoldoutClosure : CClosurePrimitive {
-public:
- void setup(ShaderData *sd, int /* path_flag */, float3 weight)
- {
- closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_HOLDOUT_ID, weight);
- sd->flag |= SD_HOLDOUT;
- }
+ public:
+ void setup(ShaderData *sd, int /* path_flag */, float3 weight)
+ {
+ closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_HOLDOUT_ID, weight);
+ sd->flag |= SD_HOLDOUT;
+ }
};
ClosureParam *closure_background_params()
{
- static ClosureParam params[] = {
- CLOSURE_STRING_KEYPARAM(GenericBackgroundClosure, label, "label"),
- CLOSURE_FINISH_PARAM(GenericBackgroundClosure)
- };
- return params;
+ static ClosureParam params[] = {
+ CLOSURE_STRING_KEYPARAM(GenericBackgroundClosure, label, "label"),
+ CLOSURE_FINISH_PARAM(GenericBackgroundClosure)};
+ return params;
}
CCLOSURE_PREPARE(closure_background_prepare, GenericBackgroundClosure)
ClosureParam *closure_holdout_params()
{
- static ClosureParam params[] = {
- CLOSURE_FINISH_PARAM(HoldoutClosure)
- };
- return params;
+ static ClosureParam params[] = {CLOSURE_FINISH_PARAM(HoldoutClosure)};
+ return params;
}
CCLOSURE_PREPARE(closure_holdout_prepare, HoldoutClosure)
diff --git a/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp b/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp
index ed5d5235a34..c5edc7c9be3 100644
--- a/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp
+++ b/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp
@@ -47,36 +47,35 @@ CCL_NAMESPACE_BEGIN
using namespace OSL;
class DiffuseRampClosure : public CBSDFClosure {
-public:
- DiffuseRampBsdf params;
- Color3 colors[8];
+ public:
+ DiffuseRampBsdf params;
+ Color3 colors[8];
- void setup(ShaderData *sd, int /* path_flag */, float3 weight)
- {
- DiffuseRampBsdf *bsdf = (DiffuseRampBsdf*)bsdf_alloc_osl(sd, sizeof(DiffuseRampBsdf), weight, &params);
+ void setup(ShaderData *sd, int /* path_flag */, float3 weight)
+ {
+ DiffuseRampBsdf *bsdf = (DiffuseRampBsdf *)bsdf_alloc_osl(
+ sd, sizeof(DiffuseRampBsdf), weight, &params);
- if(bsdf) {
- bsdf->colors = (float3*)closure_alloc_extra(sd, sizeof(float3)*8);
+ if (bsdf) {
+ bsdf->colors = (float3 *)closure_alloc_extra(sd, sizeof(float3) * 8);
- if(bsdf->colors) {
- for(int i = 0; i < 8; i++)
- bsdf->colors[i] = TO_FLOAT3(colors[i]);
+ if (bsdf->colors) {
+ for (int i = 0; i < 8; i++)
+ bsdf->colors[i] = TO_FLOAT3(colors[i]);
- sd->flag |= bsdf_diffuse_ramp_setup(bsdf);
- }
- }
- }
+ sd->flag |= bsdf_diffuse_ramp_setup(bsdf);
+ }
+ }
+ }
};
ClosureParam *closure_bsdf_diffuse_ramp_params()
{
- static ClosureParam params[] = {
- CLOSURE_FLOAT3_PARAM(DiffuseRampClosure, params.N),
- CLOSURE_COLOR_ARRAY_PARAM(DiffuseRampClosure, colors, 8),
- CLOSURE_STRING_KEYPARAM(DiffuseRampClosure, label, "label"),
- CLOSURE_FINISH_PARAM(DiffuseRampClosure)
- };
- return params;
+ static ClosureParam params[] = {CLOSURE_FLOAT3_PARAM(DiffuseRampClosure, params.N),
+ CLOSURE_COLOR_ARRAY_PARAM(DiffuseRampClosure, colors, 8),
+ CLOSURE_STRING_KEYPARAM(DiffuseRampClosure, label, "label"),
+ CLOSURE_FINISH_PARAM(DiffuseRampClosure)};
+ return params;
}
CCLOSURE_PREPARE(closure_bsdf_diffuse_ramp_prepare, DiffuseRampClosure)
diff --git a/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp b/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp
index a8acdb8e342..4b7e59ff932 100644
--- a/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp
+++ b/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp
@@ -46,37 +46,36 @@ CCL_NAMESPACE_BEGIN
using namespace OSL;
class PhongRampClosure : public CBSDFClosure {
-public:
- PhongRampBsdf params;
- Color3 colors[8];
+ public:
+ PhongRampBsdf params;
+ Color3 colors[8];
- void setup(ShaderData *sd, int /* path_flag */, float3 weight)
- {
- PhongRampBsdf *bsdf = (PhongRampBsdf*)bsdf_alloc_osl(sd, sizeof(PhongRampBsdf), weight, &params);
+ void setup(ShaderData *sd, int /* path_flag */, float3 weight)
+ {
+ PhongRampBsdf *bsdf = (PhongRampBsdf *)bsdf_alloc_osl(
+ sd, sizeof(PhongRampBsdf), weight, &params);
- if(bsdf) {
- bsdf->colors = (float3*)closure_alloc_extra(sd, sizeof(float3)*8);
+ if (bsdf) {
+ bsdf->colors = (float3 *)closure_alloc_extra(sd, sizeof(float3) * 8);
- if(bsdf->colors) {
- for(int i = 0; i < 8; i++)
- bsdf->colors[i] = TO_FLOAT3(colors[i]);
+ if (bsdf->colors) {
+ for (int i = 0; i < 8; i++)
+ bsdf->colors[i] = TO_FLOAT3(colors[i]);
- sd->flag |= bsdf_phong_ramp_setup(bsdf);
- }
- }
- }
+ sd->flag |= bsdf_phong_ramp_setup(bsdf);
+ }
+ }
+ }
};
ClosureParam *closure_bsdf_phong_ramp_params()
{
- static ClosureParam params[] = {
- CLOSURE_FLOAT3_PARAM(PhongRampClosure, params.N),
- CLOSURE_FLOAT_PARAM(PhongRampClosure, params.exponent),
- CLOSURE_COLOR_ARRAY_PARAM(PhongRampClosure, colors, 8),
- CLOSURE_STRING_KEYPARAM(PhongRampClosure, label, "label"),
- CLOSURE_FINISH_PARAM(PhongRampClosure)
- };
- return params;
+ static ClosureParam params[] = {CLOSURE_FLOAT3_PARAM(PhongRampClosure, params.N),
+ CLOSURE_FLOAT_PARAM(PhongRampClosure, params.exponent),
+ CLOSURE_COLOR_ARRAY_PARAM(PhongRampClosure, colors, 8),
+ CLOSURE_STRING_KEYPARAM(PhongRampClosure, label, "label"),
+ CLOSURE_FINISH_PARAM(PhongRampClosure)};
+ return params;
}
CCLOSURE_PREPARE(closure_bsdf_phong_ramp_prepare, PhongRampClosure)
diff --git a/intern/cycles/kernel/osl/emissive.cpp b/intern/cycles/kernel/osl/emissive.cpp
index c2a848231f2..c29ddb13e2e 100644
--- a/intern/cycles/kernel/osl/emissive.cpp
+++ b/intern/cycles/kernel/osl/emissive.cpp
@@ -53,20 +53,18 @@ using namespace OSL;
/// if the provided angles are PI/2, which is the default
///
class GenericEmissiveClosure : public CClosurePrimitive {
-public:
- void setup(ShaderData *sd, int /* path_flag */, float3 weight)
- {
- emission_setup(sd, weight);
- }
+ public:
+ void setup(ShaderData *sd, int /* path_flag */, float3 weight)
+ {
+ emission_setup(sd, weight);
+ }
};
ClosureParam *closure_emission_params()
{
- static ClosureParam params[] = {
- CLOSURE_STRING_KEYPARAM(GenericEmissiveClosure, label, "label"),
- CLOSURE_FINISH_PARAM(GenericEmissiveClosure)
- };
- return params;
+ static ClosureParam params[] = {CLOSURE_STRING_KEYPARAM(GenericEmissiveClosure, label, "label"),
+ CLOSURE_FINISH_PARAM(GenericEmissiveClosure)};
+ return params;
}
CCLOSURE_PREPARE(closure_emission_prepare, GenericEmissiveClosure)
diff --git a/intern/cycles/kernel/osl/osl_bssrdf.cpp b/intern/cycles/kernel/osl/osl_bssrdf.cpp
index 66ec8a996ca..dd52c33071c 100644
--- a/intern/cycles/kernel/osl/osl_bssrdf.cpp
+++ b/intern/cycles/kernel/osl/osl_bssrdf.cpp
@@ -56,77 +56,76 @@ static ustring u_random_walk("random_walk");
static ustring u_principled_random_walk("principled_random_walk");
class CBSSRDFClosure : public CClosurePrimitive {
-public:
- Bssrdf params;
- ustring method;
-
- CBSSRDFClosure()
- {
- params.texture_blur = 0.0f;
- params.sharpness = 0.0f;
- params.roughness = 0.0f;
- }
-
- void setup(ShaderData *sd, int path_flag, float3 weight)
- {
- if(method == u_cubic) {
- alloc(sd, path_flag, weight, CLOSURE_BSSRDF_CUBIC_ID);
- }
- else if(method == u_gaussian) {
- alloc(sd, path_flag, weight, CLOSURE_BSSRDF_GAUSSIAN_ID);
- }
- else if(method == u_burley) {
- alloc(sd, path_flag, weight, CLOSURE_BSSRDF_BURLEY_ID);
- }
- else if(method == u_principled) {
- alloc(sd, path_flag, weight, CLOSURE_BSSRDF_PRINCIPLED_ID);
- }
- else if(method == u_random_walk) {
- alloc(sd, path_flag, weight, CLOSURE_BSSRDF_RANDOM_WALK_ID);
- }
- else if(method == u_principled_random_walk) {
- alloc(sd, path_flag, weight, CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID);
- }
- }
-
- void alloc(ShaderData *sd, int path_flag, float3 weight, ClosureType type)
- {
- Bssrdf *bssrdf = bssrdf_alloc(sd, weight);
-
- if(bssrdf) {
- /* disable in case of diffuse ancestor, can't see it well then and
- * adds considerably noise due to probabilities of continuing path
- * getting lower and lower */
- if(path_flag & PATH_RAY_DIFFUSE_ANCESTOR) {
- params.radius = make_float3(0.0f, 0.0f, 0.0f);
- }
-
- /* create one closure per color channel */
- bssrdf->radius = params.radius;
- bssrdf->albedo = params.albedo;
- bssrdf->texture_blur = params.texture_blur;
- bssrdf->sharpness = params.sharpness;
- bssrdf->N = params.N;
- bssrdf->roughness = params.roughness;
- sd->flag |= bssrdf_setup(sd, bssrdf, (ClosureType)type);
- }
- }
+ public:
+ Bssrdf params;
+ ustring method;
+
+ CBSSRDFClosure()
+ {
+ params.texture_blur = 0.0f;
+ params.sharpness = 0.0f;
+ params.roughness = 0.0f;
+ }
+
+ void setup(ShaderData *sd, int path_flag, float3 weight)
+ {
+ if (method == u_cubic) {
+ alloc(sd, path_flag, weight, CLOSURE_BSSRDF_CUBIC_ID);
+ }
+ else if (method == u_gaussian) {
+ alloc(sd, path_flag, weight, CLOSURE_BSSRDF_GAUSSIAN_ID);
+ }
+ else if (method == u_burley) {
+ alloc(sd, path_flag, weight, CLOSURE_BSSRDF_BURLEY_ID);
+ }
+ else if (method == u_principled) {
+ alloc(sd, path_flag, weight, CLOSURE_BSSRDF_PRINCIPLED_ID);
+ }
+ else if (method == u_random_walk) {
+ alloc(sd, path_flag, weight, CLOSURE_BSSRDF_RANDOM_WALK_ID);
+ }
+ else if (method == u_principled_random_walk) {
+ alloc(sd, path_flag, weight, CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID);
+ }
+ }
+
+ void alloc(ShaderData *sd, int path_flag, float3 weight, ClosureType type)
+ {
+ Bssrdf *bssrdf = bssrdf_alloc(sd, weight);
+
+ if (bssrdf) {
+ /* disable in case of diffuse ancestor, can't see it well then and
+ * adds considerably noise due to probabilities of continuing path
+ * getting lower and lower */
+ if (path_flag & PATH_RAY_DIFFUSE_ANCESTOR) {
+ params.radius = make_float3(0.0f, 0.0f, 0.0f);
+ }
+
+ /* create one closure per color channel */
+ bssrdf->radius = params.radius;
+ bssrdf->albedo = params.albedo;
+ bssrdf->texture_blur = params.texture_blur;
+ bssrdf->sharpness = params.sharpness;
+ bssrdf->N = params.N;
+ bssrdf->roughness = params.roughness;
+ sd->flag |= bssrdf_setup(sd, bssrdf, (ClosureType)type);
+ }
+ }
};
ClosureParam *closure_bssrdf_params()
{
- static ClosureParam params[] = {
- CLOSURE_STRING_PARAM(CBSSRDFClosure, method),
- CLOSURE_FLOAT3_PARAM(CBSSRDFClosure, params.N),
- CLOSURE_FLOAT3_PARAM(CBSSRDFClosure, params.radius),
- CLOSURE_FLOAT3_PARAM(CBSSRDFClosure, params.albedo),
- CLOSURE_FLOAT_KEYPARAM(CBSSRDFClosure, params.texture_blur, "texture_blur"),
- CLOSURE_FLOAT_KEYPARAM(CBSSRDFClosure, params.sharpness, "sharpness"),
- CLOSURE_FLOAT_KEYPARAM(CBSSRDFClosure, params.roughness, "roughness"),
- CLOSURE_STRING_KEYPARAM(CBSSRDFClosure, label, "label"),
- CLOSURE_FINISH_PARAM(CBSSRDFClosure)
- };
- return params;
+ static ClosureParam params[] = {
+ CLOSURE_STRING_PARAM(CBSSRDFClosure, method),
+ CLOSURE_FLOAT3_PARAM(CBSSRDFClosure, params.N),
+ CLOSURE_FLOAT3_PARAM(CBSSRDFClosure, params.radius),
+ CLOSURE_FLOAT3_PARAM(CBSSRDFClosure, params.albedo),
+ CLOSURE_FLOAT_KEYPARAM(CBSSRDFClosure, params.texture_blur, "texture_blur"),
+ CLOSURE_FLOAT_KEYPARAM(CBSSRDFClosure, params.sharpness, "sharpness"),
+ CLOSURE_FLOAT_KEYPARAM(CBSSRDFClosure, params.roughness, "roughness"),
+ CLOSURE_STRING_KEYPARAM(CBSSRDFClosure, label, "label"),
+ CLOSURE_FINISH_PARAM(CBSSRDFClosure)};
+ return params;
}
CCLOSURE_PREPARE(closure_bssrdf_prepare, CBSSRDFClosure)
diff --git a/intern/cycles/kernel/osl/osl_closures.cpp b/intern/cycles/kernel/osl/osl_closures.cpp
index 169351d5ad9..aa7e2727577 100644
--- a/intern/cycles/kernel/osl/osl_closures.cpp
+++ b/intern/cycles/kernel/osl/osl_closures.cpp
@@ -71,706 +71,787 @@ using namespace OSL;
/* BSDF class definitions */
BSDF_CLOSURE_CLASS_BEGIN(Diffuse, diffuse, DiffuseBsdf, LABEL_DIFFUSE)
- CLOSURE_FLOAT3_PARAM(DiffuseClosure, params.N),
-BSDF_CLOSURE_CLASS_END(Diffuse, diffuse)
-
-BSDF_CLOSURE_CLASS_BEGIN(Translucent, translucent, DiffuseBsdf, LABEL_DIFFUSE)
- CLOSURE_FLOAT3_PARAM(TranslucentClosure, params.N),
-BSDF_CLOSURE_CLASS_END(Translucent, translucent)
-
-BSDF_CLOSURE_CLASS_BEGIN(OrenNayar, oren_nayar, OrenNayarBsdf, LABEL_DIFFUSE)
- CLOSURE_FLOAT3_PARAM(OrenNayarClosure, params.N),
- CLOSURE_FLOAT_PARAM(OrenNayarClosure, params.roughness),
-BSDF_CLOSURE_CLASS_END(OrenNayar, oren_nayar)
-
-BSDF_CLOSURE_CLASS_BEGIN(Reflection, reflection, MicrofacetBsdf, LABEL_SINGULAR)
- CLOSURE_FLOAT3_PARAM(ReflectionClosure, params.N),
-BSDF_CLOSURE_CLASS_END(Reflection, reflection)
-
-BSDF_CLOSURE_CLASS_BEGIN(Refraction, refraction, MicrofacetBsdf, LABEL_SINGULAR)
- CLOSURE_FLOAT3_PARAM(RefractionClosure, params.N),
- CLOSURE_FLOAT_PARAM(RefractionClosure, params.ior),
-BSDF_CLOSURE_CLASS_END(Refraction, refraction)
-
-BSDF_CLOSURE_CLASS_BEGIN(AshikhminVelvet, ashikhmin_velvet, VelvetBsdf, LABEL_DIFFUSE)
- CLOSURE_FLOAT3_PARAM(AshikhminVelvetClosure, params.N),
- CLOSURE_FLOAT_PARAM(AshikhminVelvetClosure, params.sigma),
-BSDF_CLOSURE_CLASS_END(AshikhminVelvet, ashikhmin_velvet)
-
-BSDF_CLOSURE_CLASS_BEGIN(AshikhminShirley, ashikhmin_shirley_aniso, MicrofacetBsdf, LABEL_GLOSSY|LABEL_REFLECT)
- CLOSURE_FLOAT3_PARAM(AshikhminShirleyClosure, params.N),
- CLOSURE_FLOAT3_PARAM(AshikhminShirleyClosure, params.T),
- CLOSURE_FLOAT_PARAM(AshikhminShirleyClosure, params.alpha_x),
- CLOSURE_FLOAT_PARAM(AshikhminShirleyClosure, params.alpha_y),
-BSDF_CLOSURE_CLASS_END(AshikhminShirley, ashikhmin_shirley_aniso)
-
-BSDF_CLOSURE_CLASS_BEGIN(DiffuseToon, diffuse_toon, ToonBsdf, LABEL_DIFFUSE)
- CLOSURE_FLOAT3_PARAM(DiffuseToonClosure, params.N),
- CLOSURE_FLOAT_PARAM(DiffuseToonClosure, params.size),
- CLOSURE_FLOAT_PARAM(DiffuseToonClosure, params.smooth),
-BSDF_CLOSURE_CLASS_END(DiffuseToon, diffuse_toon)
-
-BSDF_CLOSURE_CLASS_BEGIN(GlossyToon, glossy_toon, ToonBsdf, LABEL_GLOSSY)
- CLOSURE_FLOAT3_PARAM(GlossyToonClosure, params.N),
- CLOSURE_FLOAT_PARAM(GlossyToonClosure, params.size),
- CLOSURE_FLOAT_PARAM(GlossyToonClosure, params.smooth),
-BSDF_CLOSURE_CLASS_END(GlossyToon, glossy_toon)
-
-BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGX, microfacet_ggx, MicrofacetBsdf, LABEL_GLOSSY|LABEL_REFLECT)
- CLOSURE_FLOAT3_PARAM(MicrofacetGGXClosure, params.N),
- CLOSURE_FLOAT_PARAM(MicrofacetGGXClosure, params.alpha_x),
-BSDF_CLOSURE_CLASS_END(MicrofacetGGX, microfacet_ggx)
-
-BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGXAniso, microfacet_ggx_aniso, MicrofacetBsdf, LABEL_GLOSSY|LABEL_REFLECT)
- CLOSURE_FLOAT3_PARAM(MicrofacetGGXAnisoClosure, params.N),
- CLOSURE_FLOAT3_PARAM(MicrofacetGGXAnisoClosure, params.T),
- CLOSURE_FLOAT_PARAM(MicrofacetGGXAnisoClosure, params.alpha_x),
- CLOSURE_FLOAT_PARAM(MicrofacetGGXAnisoClosure, params.alpha_y),
-BSDF_CLOSURE_CLASS_END(MicrofacetGGXAniso, microfacet_ggx_aniso)
-
-BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmann, microfacet_beckmann, MicrofacetBsdf, LABEL_GLOSSY|LABEL_REFLECT)
- CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannClosure, params.N),
- CLOSURE_FLOAT_PARAM(MicrofacetBeckmannClosure, params.alpha_x),
-BSDF_CLOSURE_CLASS_END(MicrofacetBeckmann, microfacet_beckmann)
-
-BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmannAniso, microfacet_beckmann_aniso, MicrofacetBsdf, LABEL_GLOSSY|LABEL_REFLECT)
- CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannAnisoClosure, params.N),
- CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannAnisoClosure, params.T),
- CLOSURE_FLOAT_PARAM(MicrofacetBeckmannAnisoClosure, params.alpha_x),
- CLOSURE_FLOAT_PARAM(MicrofacetBeckmannAnisoClosure, params.alpha_y),
-BSDF_CLOSURE_CLASS_END(MicrofacetBeckmannAniso, microfacet_beckmann_aniso)
-
-BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGXRefraction, microfacet_ggx_refraction, MicrofacetBsdf, LABEL_GLOSSY|LABEL_TRANSMIT)
- CLOSURE_FLOAT3_PARAM(MicrofacetGGXRefractionClosure, params.N),
- CLOSURE_FLOAT_PARAM(MicrofacetGGXRefractionClosure, params.alpha_x),
- CLOSURE_FLOAT_PARAM(MicrofacetGGXRefractionClosure, params.ior),
-BSDF_CLOSURE_CLASS_END(MicrofacetGGXRefraction, microfacet_ggx_refraction)
-
-BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmannRefraction, microfacet_beckmann_refraction, MicrofacetBsdf, LABEL_GLOSSY|LABEL_TRANSMIT)
- CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannRefractionClosure, params.N),
- CLOSURE_FLOAT_PARAM(MicrofacetBeckmannRefractionClosure, params.alpha_x),
- CLOSURE_FLOAT_PARAM(MicrofacetBeckmannRefractionClosure, params.ior),
-BSDF_CLOSURE_CLASS_END(MicrofacetBeckmannRefraction, microfacet_beckmann_refraction)
-
-BSDF_CLOSURE_CLASS_BEGIN(HairReflection, hair_reflection, HairBsdf, LABEL_GLOSSY)
- CLOSURE_FLOAT3_PARAM(HairReflectionClosure, params.N),
- CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.roughness1),
- CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.roughness2),
- CLOSURE_FLOAT3_PARAM(HairReflectionClosure, params.T),
- CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.offset),
-BSDF_CLOSURE_CLASS_END(HairReflection, hair_reflection)
-
-BSDF_CLOSURE_CLASS_BEGIN(HairTransmission, hair_transmission, HairBsdf, LABEL_GLOSSY)
- CLOSURE_FLOAT3_PARAM(HairTransmissionClosure, params.N),
- CLOSURE_FLOAT_PARAM(HairTransmissionClosure, params.roughness1),
- CLOSURE_FLOAT_PARAM(HairTransmissionClosure, params.roughness2),
- CLOSURE_FLOAT3_PARAM(HairReflectionClosure, params.T),
- CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.offset),
-BSDF_CLOSURE_CLASS_END(HairTransmission, hair_transmission)
-
-BSDF_CLOSURE_CLASS_BEGIN(PrincipledDiffuse, principled_diffuse, PrincipledDiffuseBsdf, LABEL_DIFFUSE)
- CLOSURE_FLOAT3_PARAM(PrincipledDiffuseClosure, params.N),
- CLOSURE_FLOAT_PARAM(PrincipledDiffuseClosure, params.roughness),
-BSDF_CLOSURE_CLASS_END(PrincipledDiffuse, principled_diffuse)
-
-BSDF_CLOSURE_CLASS_BEGIN(PrincipledSheen, principled_sheen, PrincipledSheenBsdf, LABEL_DIFFUSE)
- CLOSURE_FLOAT3_PARAM(PrincipledSheenClosure, params.N),
-BSDF_CLOSURE_CLASS_END(PrincipledSheen, principled_sheen)
-
-/* PRINCIPLED HAIR BSDF */
-class PrincipledHairClosure : public CBSDFClosure {
-public:
- PrincipledHairBSDF params;
-
- PrincipledHairBSDF *alloc(ShaderData *sd, int path_flag, float3 weight)
- {
- PrincipledHairBSDF *bsdf = (PrincipledHairBSDF*)bsdf_alloc_osl(sd, sizeof(PrincipledHairBSDF), weight, &params);
- if(!bsdf) {
- return NULL;
- }
-
- PrincipledHairExtra *extra = (PrincipledHairExtra*)closure_alloc_extra(sd, sizeof(PrincipledHairExtra));
- if(!extra) {
- return NULL;
- }
-
- bsdf->extra = extra;
- return bsdf;
- }
-
- void setup(ShaderData *sd, int path_flag, float3 weight)
- {
- if(!skip(sd, path_flag, LABEL_GLOSSY)) {
- PrincipledHairBSDF *bsdf = (PrincipledHairBSDF*)alloc(sd, path_flag, weight);
- if(!bsdf) {
- return;
- }
-
- sd->flag |= (bsdf) ? bsdf_principled_hair_setup(sd, bsdf) : 0;
- }
- }
+CLOSURE_FLOAT3_PARAM(DiffuseClosure, params.N),
+ BSDF_CLOSURE_CLASS_END(Diffuse, diffuse)
+
+ BSDF_CLOSURE_CLASS_BEGIN(Translucent, translucent, DiffuseBsdf, LABEL_DIFFUSE)
+ CLOSURE_FLOAT3_PARAM(TranslucentClosure, params.N),
+ BSDF_CLOSURE_CLASS_END(Translucent, translucent)
+
+ BSDF_CLOSURE_CLASS_BEGIN(OrenNayar, oren_nayar, OrenNayarBsdf, LABEL_DIFFUSE)
+ CLOSURE_FLOAT3_PARAM(OrenNayarClosure, params.N),
+ CLOSURE_FLOAT_PARAM(OrenNayarClosure, params.roughness),
+ BSDF_CLOSURE_CLASS_END(OrenNayar, oren_nayar)
+
+ BSDF_CLOSURE_CLASS_BEGIN(Reflection, reflection, MicrofacetBsdf, LABEL_SINGULAR)
+ CLOSURE_FLOAT3_PARAM(ReflectionClosure, params.N),
+ BSDF_CLOSURE_CLASS_END(Reflection, reflection)
+
+ BSDF_CLOSURE_CLASS_BEGIN(Refraction, refraction, MicrofacetBsdf, LABEL_SINGULAR)
+ CLOSURE_FLOAT3_PARAM(RefractionClosure, params.N),
+ CLOSURE_FLOAT_PARAM(RefractionClosure, params.ior),
+ BSDF_CLOSURE_CLASS_END(Refraction, refraction)
+
+ BSDF_CLOSURE_CLASS_BEGIN(AshikhminVelvet, ashikhmin_velvet, VelvetBsdf, LABEL_DIFFUSE)
+ CLOSURE_FLOAT3_PARAM(AshikhminVelvetClosure, params.N),
+ CLOSURE_FLOAT_PARAM(AshikhminVelvetClosure, params.sigma),
+ BSDF_CLOSURE_CLASS_END(AshikhminVelvet, ashikhmin_velvet)
+
+ BSDF_CLOSURE_CLASS_BEGIN(AshikhminShirley,
+ ashikhmin_shirley_aniso,
+ MicrofacetBsdf,
+ LABEL_GLOSSY | LABEL_REFLECT)
+ CLOSURE_FLOAT3_PARAM(AshikhminShirleyClosure, params.N),
+ CLOSURE_FLOAT3_PARAM(AshikhminShirleyClosure, params.T),
+ CLOSURE_FLOAT_PARAM(AshikhminShirleyClosure, params.alpha_x),
+ CLOSURE_FLOAT_PARAM(AshikhminShirleyClosure, params.alpha_y),
+ BSDF_CLOSURE_CLASS_END(AshikhminShirley, ashikhmin_shirley_aniso)
+
+ BSDF_CLOSURE_CLASS_BEGIN(DiffuseToon, diffuse_toon, ToonBsdf, LABEL_DIFFUSE)
+ CLOSURE_FLOAT3_PARAM(DiffuseToonClosure, params.N),
+ CLOSURE_FLOAT_PARAM(DiffuseToonClosure, params.size),
+ CLOSURE_FLOAT_PARAM(DiffuseToonClosure, params.smooth),
+ BSDF_CLOSURE_CLASS_END(DiffuseToon, diffuse_toon)
+
+ BSDF_CLOSURE_CLASS_BEGIN(GlossyToon, glossy_toon, ToonBsdf, LABEL_GLOSSY)
+ CLOSURE_FLOAT3_PARAM(GlossyToonClosure, params.N),
+ CLOSURE_FLOAT_PARAM(GlossyToonClosure, params.size),
+ CLOSURE_FLOAT_PARAM(GlossyToonClosure, params.smooth),
+ BSDF_CLOSURE_CLASS_END(GlossyToon, glossy_toon)
+
+ BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGX,
+ microfacet_ggx,
+ MicrofacetBsdf,
+ LABEL_GLOSSY | LABEL_REFLECT)
+ CLOSURE_FLOAT3_PARAM(MicrofacetGGXClosure, params.N),
+ CLOSURE_FLOAT_PARAM(MicrofacetGGXClosure, params.alpha_x),
+ BSDF_CLOSURE_CLASS_END(MicrofacetGGX, microfacet_ggx)
+
+ BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGXAniso,
+ microfacet_ggx_aniso,
+ MicrofacetBsdf,
+ LABEL_GLOSSY | LABEL_REFLECT)
+ CLOSURE_FLOAT3_PARAM(MicrofacetGGXAnisoClosure, params.N),
+ CLOSURE_FLOAT3_PARAM(MicrofacetGGXAnisoClosure, params.T),
+ CLOSURE_FLOAT_PARAM(MicrofacetGGXAnisoClosure, params.alpha_x),
+ CLOSURE_FLOAT_PARAM(MicrofacetGGXAnisoClosure, params.alpha_y),
+ BSDF_CLOSURE_CLASS_END(MicrofacetGGXAniso, microfacet_ggx_aniso)
+
+ BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmann,
+ microfacet_beckmann,
+ MicrofacetBsdf,
+ LABEL_GLOSSY | LABEL_REFLECT)
+ CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannClosure, params.N),
+ CLOSURE_FLOAT_PARAM(MicrofacetBeckmannClosure, params.alpha_x),
+ BSDF_CLOSURE_CLASS_END(MicrofacetBeckmann, microfacet_beckmann)
+
+ BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmannAniso,
+ microfacet_beckmann_aniso,
+ MicrofacetBsdf,
+ LABEL_GLOSSY | LABEL_REFLECT)
+ CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannAnisoClosure, params.N),
+ CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannAnisoClosure, params.T),
+ CLOSURE_FLOAT_PARAM(MicrofacetBeckmannAnisoClosure, params.alpha_x),
+ CLOSURE_FLOAT_PARAM(MicrofacetBeckmannAnisoClosure, params.alpha_y),
+ BSDF_CLOSURE_CLASS_END(MicrofacetBeckmannAniso, microfacet_beckmann_aniso)
+
+ BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGXRefraction,
+ microfacet_ggx_refraction,
+ MicrofacetBsdf,
+ LABEL_GLOSSY | LABEL_TRANSMIT)
+ CLOSURE_FLOAT3_PARAM(MicrofacetGGXRefractionClosure, params.N),
+ CLOSURE_FLOAT_PARAM(MicrofacetGGXRefractionClosure, params.alpha_x),
+ CLOSURE_FLOAT_PARAM(MicrofacetGGXRefractionClosure, params.ior),
+ BSDF_CLOSURE_CLASS_END(MicrofacetGGXRefraction, microfacet_ggx_refraction)
+
+ BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmannRefraction,
+ microfacet_beckmann_refraction,
+ MicrofacetBsdf,
+ LABEL_GLOSSY | LABEL_TRANSMIT)
+ CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannRefractionClosure, params.N),
+ CLOSURE_FLOAT_PARAM(MicrofacetBeckmannRefractionClosure, params.alpha_x),
+ CLOSURE_FLOAT_PARAM(MicrofacetBeckmannRefractionClosure, params.ior),
+ BSDF_CLOSURE_CLASS_END(MicrofacetBeckmannRefraction, microfacet_beckmann_refraction)
+
+ BSDF_CLOSURE_CLASS_BEGIN(HairReflection, hair_reflection, HairBsdf, LABEL_GLOSSY)
+ CLOSURE_FLOAT3_PARAM(HairReflectionClosure, params.N),
+ CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.roughness1),
+ CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.roughness2),
+ CLOSURE_FLOAT3_PARAM(HairReflectionClosure, params.T),
+ CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.offset),
+ BSDF_CLOSURE_CLASS_END(HairReflection, hair_reflection)
+
+ BSDF_CLOSURE_CLASS_BEGIN(HairTransmission, hair_transmission, HairBsdf, LABEL_GLOSSY)
+ CLOSURE_FLOAT3_PARAM(HairTransmissionClosure, params.N),
+ CLOSURE_FLOAT_PARAM(HairTransmissionClosure, params.roughness1),
+ CLOSURE_FLOAT_PARAM(HairTransmissionClosure, params.roughness2),
+ CLOSURE_FLOAT3_PARAM(HairReflectionClosure, params.T),
+ CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.offset),
+ BSDF_CLOSURE_CLASS_END(HairTransmission, hair_transmission)
+
+ BSDF_CLOSURE_CLASS_BEGIN(PrincipledDiffuse,
+ principled_diffuse,
+ PrincipledDiffuseBsdf,
+ LABEL_DIFFUSE)
+ CLOSURE_FLOAT3_PARAM(PrincipledDiffuseClosure, params.N),
+ CLOSURE_FLOAT_PARAM(PrincipledDiffuseClosure, params.roughness),
+ BSDF_CLOSURE_CLASS_END(PrincipledDiffuse, principled_diffuse)
+
+ BSDF_CLOSURE_CLASS_BEGIN(PrincipledSheen,
+ principled_sheen,
+ PrincipledSheenBsdf,
+ LABEL_DIFFUSE)
+ CLOSURE_FLOAT3_PARAM(PrincipledSheenClosure, params.N),
+ BSDF_CLOSURE_CLASS_END(PrincipledSheen, principled_sheen)
+
+ /* PRINCIPLED HAIR BSDF */
+ class PrincipledHairClosure : public CBSDFClosure {
+ public:
+ PrincipledHairBSDF params;
+
+ PrincipledHairBSDF *alloc(ShaderData *sd, int path_flag, float3 weight)
+ {
+ PrincipledHairBSDF *bsdf = (PrincipledHairBSDF *)bsdf_alloc_osl(
+ sd, sizeof(PrincipledHairBSDF), weight, &params);
+ if (!bsdf) {
+ return NULL;
+ }
+
+ PrincipledHairExtra *extra = (PrincipledHairExtra *)closure_alloc_extra(
+ sd, sizeof(PrincipledHairExtra));
+ if (!extra) {
+ return NULL;
+ }
+
+ bsdf->extra = extra;
+ return bsdf;
+ }
+
+ void setup(ShaderData *sd, int path_flag, float3 weight)
+ {
+ if (!skip(sd, path_flag, LABEL_GLOSSY)) {
+ PrincipledHairBSDF *bsdf = (PrincipledHairBSDF *)alloc(sd, path_flag, weight);
+ if (!bsdf) {
+ return;
+ }
+
+ sd->flag |= (bsdf) ? bsdf_principled_hair_setup(sd, bsdf) : 0;
+ }
+ }
};
static ClosureParam *closure_bsdf_principled_hair_params()
{
- static ClosureParam params[] = {
- CLOSURE_FLOAT3_PARAM(PrincipledHairClosure, params.N),
- CLOSURE_FLOAT3_PARAM(PrincipledHairClosure, params.sigma),
- CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.v),
- CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.s),
- CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.m0_roughness),
- CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.alpha),
- CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.eta),
- CLOSURE_STRING_KEYPARAM(PrincipledHairClosure, label, "label"),
- CLOSURE_FINISH_PARAM(PrincipledHairClosure)
- };
-
- return params;
+ static ClosureParam params[] = {CLOSURE_FLOAT3_PARAM(PrincipledHairClosure, params.N),
+ CLOSURE_FLOAT3_PARAM(PrincipledHairClosure, params.sigma),
+ CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.v),
+ CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.s),
+ CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.m0_roughness),
+ CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.alpha),
+ CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.eta),
+ CLOSURE_STRING_KEYPARAM(PrincipledHairClosure, label, "label"),
+ CLOSURE_FINISH_PARAM(PrincipledHairClosure)};
+
+ return params;
}
CCLOSURE_PREPARE(closure_bsdf_principled_hair_prepare, PrincipledHairClosure)
/* DISNEY PRINCIPLED CLEARCOAT */
class PrincipledClearcoatClosure : public CBSDFClosure {
-public:
- MicrofacetBsdf params;
- float clearcoat, clearcoat_roughness;
-
- MicrofacetBsdf *alloc(ShaderData *sd, int path_flag, float3 weight)
- {
- MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc_osl(sd, sizeof(MicrofacetBsdf), weight, &params);
- if(!bsdf) {
- return NULL;
- }
-
- MicrofacetExtra *extra = (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra));
- if(!extra) {
- return NULL;
- }
-
- bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
- bsdf->extra = extra;
- bsdf->ior = 1.5f;
- bsdf->alpha_x = clearcoat_roughness;
- bsdf->alpha_y = clearcoat_roughness;
- bsdf->extra->color = make_float3(0.0f, 0.0f, 0.0f);
- bsdf->extra->cspec0 = make_float3(0.04f, 0.04f, 0.04f);
- bsdf->extra->clearcoat = clearcoat;
- return bsdf;
- }
-
- void setup(ShaderData *sd, int path_flag, float3 weight)
- {
- MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
- if(!bsdf) {
- return;
- }
-
- sd->flag |= bsdf_microfacet_ggx_clearcoat_setup(bsdf, sd);
- }
+ public:
+ MicrofacetBsdf params;
+ float clearcoat, clearcoat_roughness;
+
+ MicrofacetBsdf *alloc(ShaderData *sd, int path_flag, float3 weight)
+ {
+ MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc_osl(
+ sd, sizeof(MicrofacetBsdf), weight, &params);
+ if (!bsdf) {
+ return NULL;
+ }
+
+ MicrofacetExtra *extra = (MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra));
+ if (!extra) {
+ return NULL;
+ }
+
+ bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+ bsdf->extra = extra;
+ bsdf->ior = 1.5f;
+ bsdf->alpha_x = clearcoat_roughness;
+ bsdf->alpha_y = clearcoat_roughness;
+ bsdf->extra->color = make_float3(0.0f, 0.0f, 0.0f);
+ bsdf->extra->cspec0 = make_float3(0.04f, 0.04f, 0.04f);
+ bsdf->extra->clearcoat = clearcoat;
+ return bsdf;
+ }
+
+ void setup(ShaderData *sd, int path_flag, float3 weight)
+ {
+ MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
+ if (!bsdf) {
+ return;
+ }
+
+ sd->flag |= bsdf_microfacet_ggx_clearcoat_setup(bsdf, sd);
+ }
};
ClosureParam *closure_bsdf_principled_clearcoat_params()
{
- static ClosureParam params[] = {
- CLOSURE_FLOAT3_PARAM(PrincipledClearcoatClosure, params.N),
- CLOSURE_FLOAT_PARAM(PrincipledClearcoatClosure, clearcoat),
- CLOSURE_FLOAT_PARAM(PrincipledClearcoatClosure, clearcoat_roughness),
- CLOSURE_STRING_KEYPARAM(PrincipledClearcoatClosure, label, "label"),
- CLOSURE_FINISH_PARAM(PrincipledClearcoatClosure)
- };
- return params;
+ static ClosureParam params[] = {
+ CLOSURE_FLOAT3_PARAM(PrincipledClearcoatClosure, params.N),
+ CLOSURE_FLOAT_PARAM(PrincipledClearcoatClosure, clearcoat),
+ CLOSURE_FLOAT_PARAM(PrincipledClearcoatClosure, clearcoat_roughness),
+ CLOSURE_STRING_KEYPARAM(PrincipledClearcoatClosure, label, "label"),
+ CLOSURE_FINISH_PARAM(PrincipledClearcoatClosure)};
+ return params;
}
CCLOSURE_PREPARE(closure_bsdf_principled_clearcoat_prepare, PrincipledClearcoatClosure)
-
/* Registration */
-static void register_closure(OSL::ShadingSystem *ss, const char *name, int id, OSL::ClosureParam *params, OSL::PrepareClosureFunc prepare)
+static void register_closure(OSL::ShadingSystem *ss,
+ const char *name,
+ int id,
+ OSL::ClosureParam *params,
+ OSL::PrepareClosureFunc prepare)
{
- /* optimization: it's possible to not use a prepare function at all and
- * only initialize the actual class when accessing the closure component
- * data, but then we need to map the id to the class somehow */
+ /* optimization: it's possible to not use a prepare function at all and
+ * only initialize the actual class when accessing the closure component
+ * data, but then we need to map the id to the class somehow */
#if OSL_LIBRARY_VERSION_CODE >= 10900
- ss->register_closure(name, id, params, prepare, NULL);
+ ss->register_closure(name, id, params, prepare, NULL);
#else
- ss->register_closure(name, id, params, prepare, NULL, 16);
+ ss->register_closure(name, id, params, prepare, NULL, 16);
#endif
}
void OSLShader::register_closures(OSLShadingSystem *ss_)
{
- OSL::ShadingSystem *ss = (OSL::ShadingSystem*)ss_;
- int id = 0;
-
- register_closure(ss, "diffuse", id++,
- bsdf_diffuse_params(), bsdf_diffuse_prepare);
- register_closure(ss, "oren_nayar", id++,
- bsdf_oren_nayar_params(), bsdf_oren_nayar_prepare);
- register_closure(ss, "translucent", id++,
- bsdf_translucent_params(), bsdf_translucent_prepare);
- register_closure(ss, "reflection", id++,
- bsdf_reflection_params(), bsdf_reflection_prepare);
- register_closure(ss, "refraction", id++,
- bsdf_refraction_params(), bsdf_refraction_prepare);
- register_closure(ss, "transparent", id++,
- closure_bsdf_transparent_params(), closure_bsdf_transparent_prepare);
- register_closure(ss, "microfacet_ggx", id++,
- bsdf_microfacet_ggx_params(), bsdf_microfacet_ggx_prepare);
- register_closure(ss, "microfacet_ggx_aniso", id++,
- bsdf_microfacet_ggx_aniso_params(), bsdf_microfacet_ggx_aniso_prepare);
- register_closure(ss, "microfacet_ggx_refraction", id++,
- bsdf_microfacet_ggx_refraction_params(), bsdf_microfacet_ggx_refraction_prepare);
- register_closure(ss, "microfacet_multi_ggx", id++,
- closure_bsdf_microfacet_multi_ggx_params(), closure_bsdf_microfacet_multi_ggx_prepare);
- register_closure(ss, "microfacet_multi_ggx_glass", id++,
- closure_bsdf_microfacet_multi_ggx_glass_params(), closure_bsdf_microfacet_multi_ggx_glass_prepare);
- register_closure(ss, "microfacet_multi_ggx_aniso", id++,
- closure_bsdf_microfacet_multi_ggx_aniso_params(), closure_bsdf_microfacet_multi_ggx_aniso_prepare);
- register_closure(ss, "microfacet_ggx_fresnel", id++,
- closure_bsdf_microfacet_ggx_fresnel_params(), closure_bsdf_microfacet_ggx_fresnel_prepare);
- register_closure(ss, "microfacet_ggx_aniso_fresnel", id++,
- closure_bsdf_microfacet_ggx_aniso_fresnel_params(), closure_bsdf_microfacet_ggx_aniso_fresnel_prepare);
- register_closure(ss, "microfacet_multi_ggx_fresnel", id++,
- closure_bsdf_microfacet_multi_ggx_fresnel_params(), closure_bsdf_microfacet_multi_ggx_fresnel_prepare);
- register_closure(ss, "microfacet_multi_ggx_glass_fresnel", id++,
- closure_bsdf_microfacet_multi_ggx_glass_fresnel_params(), closure_bsdf_microfacet_multi_ggx_glass_fresnel_prepare);
- register_closure(ss, "microfacet_multi_ggx_aniso_fresnel", id++,
- closure_bsdf_microfacet_multi_ggx_aniso_fresnel_params(), closure_bsdf_microfacet_multi_ggx_aniso_fresnel_prepare);
- register_closure(ss, "microfacet_beckmann", id++,
- bsdf_microfacet_beckmann_params(), bsdf_microfacet_beckmann_prepare);
- register_closure(ss, "microfacet_beckmann_aniso", id++,
- bsdf_microfacet_beckmann_aniso_params(), bsdf_microfacet_beckmann_aniso_prepare);
- register_closure(ss, "microfacet_beckmann_refraction", id++,
- bsdf_microfacet_beckmann_refraction_params(), bsdf_microfacet_beckmann_refraction_prepare);
- register_closure(ss, "ashikhmin_shirley", id++,
- bsdf_ashikhmin_shirley_aniso_params(), bsdf_ashikhmin_shirley_aniso_prepare);
- register_closure(ss, "ashikhmin_velvet", id++,
- bsdf_ashikhmin_velvet_params(), bsdf_ashikhmin_velvet_prepare);
- register_closure(ss, "diffuse_toon", id++,
- bsdf_diffuse_toon_params(), bsdf_diffuse_toon_prepare);
- register_closure(ss, "glossy_toon", id++,
- bsdf_glossy_toon_params(), bsdf_glossy_toon_prepare);
- register_closure(ss, "principled_diffuse", id++,
- bsdf_principled_diffuse_params(), bsdf_principled_diffuse_prepare);
- register_closure(ss, "principled_sheen", id++,
- bsdf_principled_sheen_params(), bsdf_principled_sheen_prepare);
- register_closure(ss, "principled_clearcoat", id++,
- closure_bsdf_principled_clearcoat_params(), closure_bsdf_principled_clearcoat_prepare);
-
- register_closure(ss, "emission", id++,
- closure_emission_params(), closure_emission_prepare);
- register_closure(ss, "background", id++,
- closure_background_params(), closure_background_prepare);
- register_closure(ss, "holdout", id++,
- closure_holdout_params(), closure_holdout_prepare);
- register_closure(ss, "diffuse_ramp", id++,
- closure_bsdf_diffuse_ramp_params(), closure_bsdf_diffuse_ramp_prepare);
- register_closure(ss, "phong_ramp", id++,
- closure_bsdf_phong_ramp_params(), closure_bsdf_phong_ramp_prepare);
- register_closure(ss, "bssrdf", id++,
- closure_bssrdf_params(), closure_bssrdf_prepare);
-
- register_closure(ss, "hair_reflection", id++,
- bsdf_hair_reflection_params(), bsdf_hair_reflection_prepare);
- register_closure(ss, "hair_transmission", id++,
- bsdf_hair_transmission_params(), bsdf_hair_transmission_prepare);
-
- register_closure(ss, "principled_hair", id++,
- closure_bsdf_principled_hair_params(), closure_bsdf_principled_hair_prepare);
-
- register_closure(ss, "henyey_greenstein", id++,
- closure_henyey_greenstein_params(), closure_henyey_greenstein_prepare);
- register_closure(ss, "absorption", id++,
- closure_absorption_params(), closure_absorption_prepare);
+ OSL::ShadingSystem *ss = (OSL::ShadingSystem *)ss_;
+ int id = 0;
+
+ register_closure(ss, "diffuse", id++, bsdf_diffuse_params(), bsdf_diffuse_prepare);
+ register_closure(ss, "oren_nayar", id++, bsdf_oren_nayar_params(), bsdf_oren_nayar_prepare);
+ register_closure(ss, "translucent", id++, bsdf_translucent_params(), bsdf_translucent_prepare);
+ register_closure(ss, "reflection", id++, bsdf_reflection_params(), bsdf_reflection_prepare);
+ register_closure(ss, "refraction", id++, bsdf_refraction_params(), bsdf_refraction_prepare);
+ register_closure(ss,
+ "transparent",
+ id++,
+ closure_bsdf_transparent_params(),
+ closure_bsdf_transparent_prepare);
+ register_closure(
+ ss, "microfacet_ggx", id++, bsdf_microfacet_ggx_params(), bsdf_microfacet_ggx_prepare);
+ register_closure(ss,
+ "microfacet_ggx_aniso",
+ id++,
+ bsdf_microfacet_ggx_aniso_params(),
+ bsdf_microfacet_ggx_aniso_prepare);
+ register_closure(ss,
+ "microfacet_ggx_refraction",
+ id++,
+ bsdf_microfacet_ggx_refraction_params(),
+ bsdf_microfacet_ggx_refraction_prepare);
+ register_closure(ss,
+ "microfacet_multi_ggx",
+ id++,
+ closure_bsdf_microfacet_multi_ggx_params(),
+ closure_bsdf_microfacet_multi_ggx_prepare);
+ register_closure(ss,
+ "microfacet_multi_ggx_glass",
+ id++,
+ closure_bsdf_microfacet_multi_ggx_glass_params(),
+ closure_bsdf_microfacet_multi_ggx_glass_prepare);
+ register_closure(ss,
+ "microfacet_multi_ggx_aniso",
+ id++,
+ closure_bsdf_microfacet_multi_ggx_aniso_params(),
+ closure_bsdf_microfacet_multi_ggx_aniso_prepare);
+ register_closure(ss,
+ "microfacet_ggx_fresnel",
+ id++,
+ closure_bsdf_microfacet_ggx_fresnel_params(),
+ closure_bsdf_microfacet_ggx_fresnel_prepare);
+ register_closure(ss,
+ "microfacet_ggx_aniso_fresnel",
+ id++,
+ closure_bsdf_microfacet_ggx_aniso_fresnel_params(),
+ closure_bsdf_microfacet_ggx_aniso_fresnel_prepare);
+ register_closure(ss,
+ "microfacet_multi_ggx_fresnel",
+ id++,
+ closure_bsdf_microfacet_multi_ggx_fresnel_params(),
+ closure_bsdf_microfacet_multi_ggx_fresnel_prepare);
+ register_closure(ss,
+ "microfacet_multi_ggx_glass_fresnel",
+ id++,
+ closure_bsdf_microfacet_multi_ggx_glass_fresnel_params(),
+ closure_bsdf_microfacet_multi_ggx_glass_fresnel_prepare);
+ register_closure(ss,
+ "microfacet_multi_ggx_aniso_fresnel",
+ id++,
+ closure_bsdf_microfacet_multi_ggx_aniso_fresnel_params(),
+ closure_bsdf_microfacet_multi_ggx_aniso_fresnel_prepare);
+ register_closure(ss,
+ "microfacet_beckmann",
+ id++,
+ bsdf_microfacet_beckmann_params(),
+ bsdf_microfacet_beckmann_prepare);
+ register_closure(ss,
+ "microfacet_beckmann_aniso",
+ id++,
+ bsdf_microfacet_beckmann_aniso_params(),
+ bsdf_microfacet_beckmann_aniso_prepare);
+ register_closure(ss,
+ "microfacet_beckmann_refraction",
+ id++,
+ bsdf_microfacet_beckmann_refraction_params(),
+ bsdf_microfacet_beckmann_refraction_prepare);
+ register_closure(ss,
+ "ashikhmin_shirley",
+ id++,
+ bsdf_ashikhmin_shirley_aniso_params(),
+ bsdf_ashikhmin_shirley_aniso_prepare);
+ register_closure(
+ ss, "ashikhmin_velvet", id++, bsdf_ashikhmin_velvet_params(), bsdf_ashikhmin_velvet_prepare);
+ register_closure(
+ ss, "diffuse_toon", id++, bsdf_diffuse_toon_params(), bsdf_diffuse_toon_prepare);
+ register_closure(ss, "glossy_toon", id++, bsdf_glossy_toon_params(), bsdf_glossy_toon_prepare);
+ register_closure(ss,
+ "principled_diffuse",
+ id++,
+ bsdf_principled_diffuse_params(),
+ bsdf_principled_diffuse_prepare);
+ register_closure(
+ ss, "principled_sheen", id++, bsdf_principled_sheen_params(), bsdf_principled_sheen_prepare);
+ register_closure(ss,
+ "principled_clearcoat",
+ id++,
+ closure_bsdf_principled_clearcoat_params(),
+ closure_bsdf_principled_clearcoat_prepare);
+
+ register_closure(ss, "emission", id++, closure_emission_params(), closure_emission_prepare);
+ register_closure(
+ ss, "background", id++, closure_background_params(), closure_background_prepare);
+ register_closure(ss, "holdout", id++, closure_holdout_params(), closure_holdout_prepare);
+ register_closure(ss,
+ "diffuse_ramp",
+ id++,
+ closure_bsdf_diffuse_ramp_params(),
+ closure_bsdf_diffuse_ramp_prepare);
+ register_closure(
+ ss, "phong_ramp", id++, closure_bsdf_phong_ramp_params(), closure_bsdf_phong_ramp_prepare);
+ register_closure(ss, "bssrdf", id++, closure_bssrdf_params(), closure_bssrdf_prepare);
+
+ register_closure(
+ ss, "hair_reflection", id++, bsdf_hair_reflection_params(), bsdf_hair_reflection_prepare);
+ register_closure(ss,
+ "hair_transmission",
+ id++,
+ bsdf_hair_transmission_params(),
+ bsdf_hair_transmission_prepare);
+
+ register_closure(ss,
+ "principled_hair",
+ id++,
+ closure_bsdf_principled_hair_params(),
+ closure_bsdf_principled_hair_prepare);
+
+ register_closure(ss,
+ "henyey_greenstein",
+ id++,
+ closure_henyey_greenstein_params(),
+ closure_henyey_greenstein_prepare);
+ register_closure(
+ ss, "absorption", id++, closure_absorption_params(), closure_absorption_prepare);
}
/* BSDF Closure */
bool CBSDFClosure::skip(const ShaderData *sd, int path_flag, int scattering)
{
- /* caustic options */
- if((scattering & LABEL_GLOSSY) && (path_flag & PATH_RAY_DIFFUSE)) {
- KernelGlobals *kg = sd->osl_globals;
-
- if((!kernel_data.integrator.caustics_reflective && (scattering & LABEL_REFLECT)) ||
- (!kernel_data.integrator.caustics_refractive && (scattering & LABEL_TRANSMIT)))
- {
- return true;
- }
- }
-
- return false;
-}
+ /* caustic options */
+ if ((scattering & LABEL_GLOSSY) && (path_flag & PATH_RAY_DIFFUSE)) {
+ KernelGlobals *kg = sd->osl_globals;
+ if ((!kernel_data.integrator.caustics_reflective && (scattering & LABEL_REFLECT)) ||
+ (!kernel_data.integrator.caustics_refractive && (scattering & LABEL_TRANSMIT))) {
+ return true;
+ }
+ }
+
+ return false;
+}
/* GGX closures with Fresnel */
class MicrofacetFresnelClosure : public CBSDFClosure {
-public:
- MicrofacetBsdf params;
- float3 color;
- float3 cspec0;
-
- MicrofacetBsdf *alloc(ShaderData *sd, int path_flag, float3 weight)
- {
- /* Technically, the MultiGGX Glass closure may also transmit. However,
- * since this is set statically and only used for caustic flags, this
- * is probably as good as it gets. */
- if(skip(sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) {
- return NULL;
- }
-
- MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc_osl(sd, sizeof(MicrofacetBsdf), weight, &params);
- if(!bsdf) {
- return NULL;
- }
-
- MicrofacetExtra *extra = (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra));
- if(!extra) {
- return NULL;
- }
-
- bsdf->extra = extra;
- bsdf->extra->color = color;
- bsdf->extra->cspec0 = cspec0;
- bsdf->extra->clearcoat = 0.0f;
- return bsdf;
- }
+ public:
+ MicrofacetBsdf params;
+ float3 color;
+ float3 cspec0;
+
+ MicrofacetBsdf *alloc(ShaderData *sd, int path_flag, float3 weight)
+ {
+ /* Technically, the MultiGGX Glass closure may also transmit. However,
+ * since this is set statically and only used for caustic flags, this
+ * is probably as good as it gets. */
+ if (skip(sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) {
+ return NULL;
+ }
+
+ MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc_osl(
+ sd, sizeof(MicrofacetBsdf), weight, &params);
+ if (!bsdf) {
+ return NULL;
+ }
+
+ MicrofacetExtra *extra = (MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra));
+ if (!extra) {
+ return NULL;
+ }
+
+ bsdf->extra = extra;
+ bsdf->extra->color = color;
+ bsdf->extra->cspec0 = cspec0;
+ bsdf->extra->clearcoat = 0.0f;
+ return bsdf;
+ }
};
class MicrofacetGGXFresnelClosure : public MicrofacetFresnelClosure {
-public:
- void setup(ShaderData *sd, int path_flag, float3 weight)
- {
- MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
- if(!bsdf) {
- return;
- }
-
- bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
- bsdf->alpha_y = bsdf->alpha_x;
- sd->flag |= bsdf_microfacet_ggx_fresnel_setup(bsdf, sd);
- }
+ public:
+ void setup(ShaderData *sd, int path_flag, float3 weight)
+ {
+ MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
+ if (!bsdf) {
+ return;
+ }
+
+ bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+ bsdf->alpha_y = bsdf->alpha_x;
+ sd->flag |= bsdf_microfacet_ggx_fresnel_setup(bsdf, sd);
+ }
};
ClosureParam *closure_bsdf_microfacet_ggx_fresnel_params()
{
- static ClosureParam params[] = {
- CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, params.N),
- CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.alpha_x),
- CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.ior),
- CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, color),
- CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, cspec0),
- CLOSURE_STRING_KEYPARAM(MicrofacetGGXFresnelClosure, label, "label"),
- CLOSURE_FINISH_PARAM(MicrofacetGGXFresnelClosure)
- };
- return params;
+ static ClosureParam params[] = {
+ CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, params.N),
+ CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.alpha_x),
+ CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.ior),
+ CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, color),
+ CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, cspec0),
+ CLOSURE_STRING_KEYPARAM(MicrofacetGGXFresnelClosure, label, "label"),
+ CLOSURE_FINISH_PARAM(MicrofacetGGXFresnelClosure)};
+ return params;
}
CCLOSURE_PREPARE(closure_bsdf_microfacet_ggx_fresnel_prepare, MicrofacetGGXFresnelClosure);
class MicrofacetGGXAnisoFresnelClosure : public MicrofacetFresnelClosure {
-public:
- void setup(ShaderData *sd, int path_flag, float3 weight)
- {
- MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
- if(!bsdf) {
- return;
- }
-
- sd->flag |= bsdf_microfacet_ggx_aniso_fresnel_setup(bsdf, sd);
- }
+ public:
+ void setup(ShaderData *sd, int path_flag, float3 weight)
+ {
+ MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
+ if (!bsdf) {
+ return;
+ }
+
+ sd->flag |= bsdf_microfacet_ggx_aniso_fresnel_setup(bsdf, sd);
+ }
};
ClosureParam *closure_bsdf_microfacet_ggx_aniso_fresnel_params()
{
- static ClosureParam params[] = {
- CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, params.N),
- CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, params.T),
- CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.alpha_x),
- CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.alpha_y),
- CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.ior),
- CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, color),
- CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, cspec0),
- CLOSURE_STRING_KEYPARAM(MicrofacetGGXFresnelClosure, label, "label"),
- CLOSURE_FINISH_PARAM(MicrofacetGGXFresnelClosure)
- };
- return params;
+ static ClosureParam params[] = {
+ CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, params.N),
+ CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, params.T),
+ CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.alpha_x),
+ CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.alpha_y),
+ CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.ior),
+ CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, color),
+ CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, cspec0),
+ CLOSURE_STRING_KEYPARAM(MicrofacetGGXFresnelClosure, label, "label"),
+ CLOSURE_FINISH_PARAM(MicrofacetGGXFresnelClosure)};
+ return params;
}
-CCLOSURE_PREPARE(closure_bsdf_microfacet_ggx_aniso_fresnel_prepare, MicrofacetGGXAnisoFresnelClosure);
-
+CCLOSURE_PREPARE(closure_bsdf_microfacet_ggx_aniso_fresnel_prepare,
+ MicrofacetGGXAnisoFresnelClosure);
/* Multiscattering GGX closures */
class MicrofacetMultiClosure : public CBSDFClosure {
-public:
- MicrofacetBsdf params;
- float3 color;
-
- MicrofacetBsdf *alloc(ShaderData *sd, int path_flag, float3 weight)
- {
- /* Technically, the MultiGGX closure may also transmit. However,
- * since this is set statically and only used for caustic flags, this
- * is probably as good as it gets. */
- if(skip(sd, path_flag, LABEL_GLOSSY|LABEL_REFLECT)) {
- return NULL;
- }
-
- MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc_osl(sd, sizeof(MicrofacetBsdf), weight, &params);
- if(!bsdf) {
- return NULL;
- }
-
- MicrofacetExtra *extra = (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra));
- if(!extra) {
- return NULL;
- }
-
- bsdf->extra = extra;
- bsdf->extra->color = color;
- bsdf->extra->cspec0 = make_float3(0.0f, 0.0f, 0.0f);
- bsdf->extra->clearcoat = 0.0f;
- return bsdf;
- }
+ public:
+ MicrofacetBsdf params;
+ float3 color;
+
+ MicrofacetBsdf *alloc(ShaderData *sd, int path_flag, float3 weight)
+ {
+ /* Technically, the MultiGGX closure may also transmit. However,
+ * since this is set statically and only used for caustic flags, this
+ * is probably as good as it gets. */
+ if (skip(sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) {
+ return NULL;
+ }
+
+ MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc_osl(
+ sd, sizeof(MicrofacetBsdf), weight, &params);
+ if (!bsdf) {
+ return NULL;
+ }
+
+ MicrofacetExtra *extra = (MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra));
+ if (!extra) {
+ return NULL;
+ }
+
+ bsdf->extra = extra;
+ bsdf->extra->color = color;
+ bsdf->extra->cspec0 = make_float3(0.0f, 0.0f, 0.0f);
+ bsdf->extra->clearcoat = 0.0f;
+ return bsdf;
+ }
};
class MicrofacetMultiGGXClosure : public MicrofacetMultiClosure {
-public:
- void setup(ShaderData *sd, int path_flag, float3 weight)
- {
- MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
- if(!bsdf) {
- return;
- }
-
- bsdf->ior = 0.0f;
- bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
- bsdf->alpha_y = bsdf->alpha_x;
- sd->flag |= bsdf_microfacet_multi_ggx_setup(bsdf);
- }
+ public:
+ void setup(ShaderData *sd, int path_flag, float3 weight)
+ {
+ MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
+ if (!bsdf) {
+ return;
+ }
+
+ bsdf->ior = 0.0f;
+ bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+ bsdf->alpha_y = bsdf->alpha_x;
+ sd->flag |= bsdf_microfacet_multi_ggx_setup(bsdf);
+ }
};
ClosureParam *closure_bsdf_microfacet_multi_ggx_params()
{
- static ClosureParam params[] = {
- CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, params.N),
- CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.alpha_x),
- CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, color),
- CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXClosure, label, "label"),
- CLOSURE_FINISH_PARAM(MicrofacetMultiGGXClosure)
- };
- return params;
+ static ClosureParam params[] = {
+ CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, params.N),
+ CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.alpha_x),
+ CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, color),
+ CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXClosure, label, "label"),
+ CLOSURE_FINISH_PARAM(MicrofacetMultiGGXClosure)};
+ return params;
}
CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_prepare, MicrofacetMultiGGXClosure);
class MicrofacetMultiGGXAnisoClosure : public MicrofacetMultiClosure {
-public:
- void setup(ShaderData *sd, int path_flag, float3 weight)
- {
- MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
- if(!bsdf) {
- return;
- }
-
- bsdf->ior = 0.0f;
- sd->flag |= bsdf_microfacet_multi_ggx_aniso_setup(bsdf);
- }
+ public:
+ void setup(ShaderData *sd, int path_flag, float3 weight)
+ {
+ MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
+ if (!bsdf) {
+ return;
+ }
+
+ bsdf->ior = 0.0f;
+ sd->flag |= bsdf_microfacet_multi_ggx_aniso_setup(bsdf);
+ }
};
ClosureParam *closure_bsdf_microfacet_multi_ggx_aniso_params()
{
- static ClosureParam params[] = {
- CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, params.N),
- CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, params.T),
- CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.alpha_x),
- CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.alpha_y),
- CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, color),
- CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXClosure, label, "label"),
- CLOSURE_FINISH_PARAM(MicrofacetMultiGGXClosure)
- };
- return params;
+ static ClosureParam params[] = {
+ CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, params.N),
+ CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, params.T),
+ CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.alpha_x),
+ CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.alpha_y),
+ CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, color),
+ CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXClosure, label, "label"),
+ CLOSURE_FINISH_PARAM(MicrofacetMultiGGXClosure)};
+ return params;
}
CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_aniso_prepare, MicrofacetMultiGGXAnisoClosure);
class MicrofacetMultiGGXGlassClosure : public MicrofacetMultiClosure {
-public:
- MicrofacetMultiGGXGlassClosure() : MicrofacetMultiClosure() {}
-
- void setup(ShaderData *sd, int path_flag, float3 weight)
- {
- MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
- if(!bsdf) {
- return;
- }
-
- bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
- bsdf->alpha_y = bsdf->alpha_x;
- sd->flag |= bsdf_microfacet_multi_ggx_glass_setup(bsdf);
- }
+ public:
+ MicrofacetMultiGGXGlassClosure() : MicrofacetMultiClosure()
+ {
+ }
+
+ void setup(ShaderData *sd, int path_flag, float3 weight)
+ {
+ MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
+ if (!bsdf) {
+ return;
+ }
+
+ bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+ bsdf->alpha_y = bsdf->alpha_x;
+ sd->flag |= bsdf_microfacet_multi_ggx_glass_setup(bsdf);
+ }
};
ClosureParam *closure_bsdf_microfacet_multi_ggx_glass_params()
{
- static ClosureParam params[] = {
- CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, params.N),
- CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.alpha_x),
- CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.ior),
- CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, color),
- CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXClosure, label, "label"),
- CLOSURE_FINISH_PARAM(MicrofacetMultiGGXClosure)
- };
- return params;
+ static ClosureParam params[] = {
+ CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, params.N),
+ CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.alpha_x),
+ CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.ior),
+ CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, color),
+ CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXClosure, label, "label"),
+ CLOSURE_FINISH_PARAM(MicrofacetMultiGGXClosure)};
+ return params;
}
CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_glass_prepare, MicrofacetMultiGGXGlassClosure);
-
/* Multiscattering GGX closures with Fresnel */
class MicrofacetMultiFresnelClosure : public CBSDFClosure {
-public:
- MicrofacetBsdf params;
- float3 color;
- float3 cspec0;
-
- MicrofacetBsdf *alloc(ShaderData *sd, int path_flag, float3 weight)
- {
- /* Technically, the MultiGGX closure may also transmit. However,
- * since this is set statically and only used for caustic flags, this
- * is probably as good as it gets. */
- if(skip(sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) {
- return NULL;
- }
-
- MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc_osl(sd, sizeof(MicrofacetBsdf), weight, &params);
- if(!bsdf) {
- return NULL;
- }
-
- MicrofacetExtra *extra = (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra));
- if(!extra) {
- return NULL;
- }
-
- bsdf->extra = extra;
- bsdf->extra->color = color;
- bsdf->extra->cspec0 = cspec0;
- bsdf->extra->clearcoat = 0.0f;
- return bsdf;
- }
+ public:
+ MicrofacetBsdf params;
+ float3 color;
+ float3 cspec0;
+
+ MicrofacetBsdf *alloc(ShaderData *sd, int path_flag, float3 weight)
+ {
+ /* Technically, the MultiGGX closure may also transmit. However,
+ * since this is set statically and only used for caustic flags, this
+ * is probably as good as it gets. */
+ if (skip(sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) {
+ return NULL;
+ }
+
+ MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc_osl(
+ sd, sizeof(MicrofacetBsdf), weight, &params);
+ if (!bsdf) {
+ return NULL;
+ }
+
+ MicrofacetExtra *extra = (MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra));
+ if (!extra) {
+ return NULL;
+ }
+
+ bsdf->extra = extra;
+ bsdf->extra->color = color;
+ bsdf->extra->cspec0 = cspec0;
+ bsdf->extra->clearcoat = 0.0f;
+ return bsdf;
+ }
};
class MicrofacetMultiGGXFresnelClosure : public MicrofacetMultiFresnelClosure {
-public:
- void setup(ShaderData *sd, int path_flag, float3 weight)
- {
- MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
- if(!bsdf) {
- return;
- }
-
- bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
- bsdf->alpha_y = bsdf->alpha_x;
- sd->flag |= bsdf_microfacet_multi_ggx_fresnel_setup(bsdf, sd);
- }
+ public:
+ void setup(ShaderData *sd, int path_flag, float3 weight)
+ {
+ MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
+ if (!bsdf) {
+ return;
+ }
+
+ bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+ bsdf->alpha_y = bsdf->alpha_x;
+ sd->flag |= bsdf_microfacet_multi_ggx_fresnel_setup(bsdf, sd);
+ }
};
ClosureParam *closure_bsdf_microfacet_multi_ggx_fresnel_params()
{
- static ClosureParam params[] = {
- CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, params.N),
- CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.alpha_x),
- CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.ior),
- CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, color),
- CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, cspec0),
- CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXFresnelClosure, label, "label"),
- CLOSURE_FINISH_PARAM(MicrofacetMultiGGXFresnelClosure)
- };
- return params;
+ static ClosureParam params[] = {
+ CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, params.N),
+ CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.alpha_x),
+ CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.ior),
+ CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, color),
+ CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, cspec0),
+ CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXFresnelClosure, label, "label"),
+ CLOSURE_FINISH_PARAM(MicrofacetMultiGGXFresnelClosure)};
+ return params;
}
-CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_fresnel_prepare, MicrofacetMultiGGXFresnelClosure);
+CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_fresnel_prepare,
+ MicrofacetMultiGGXFresnelClosure);
class MicrofacetMultiGGXAnisoFresnelClosure : public MicrofacetMultiFresnelClosure {
-public:
- void setup(ShaderData *sd, int path_flag, float3 weight)
- {
- MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
- if(!bsdf) {
- return;
- }
-
- sd->flag |= bsdf_microfacet_multi_ggx_aniso_fresnel_setup(bsdf, sd);
- }
+ public:
+ void setup(ShaderData *sd, int path_flag, float3 weight)
+ {
+ MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
+ if (!bsdf) {
+ return;
+ }
+
+ sd->flag |= bsdf_microfacet_multi_ggx_aniso_fresnel_setup(bsdf, sd);
+ }
};
ClosureParam *closure_bsdf_microfacet_multi_ggx_aniso_fresnel_params()
{
- static ClosureParam params[] = {
- CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, params.N),
- CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, params.T),
- CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.alpha_x),
- CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.alpha_y),
- CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.ior),
- CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, color),
- CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, cspec0),
- CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXFresnelClosure, label, "label"),
- CLOSURE_FINISH_PARAM(MicrofacetMultiGGXFresnelClosure)
- };
- return params;
+ static ClosureParam params[] = {
+ CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, params.N),
+ CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, params.T),
+ CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.alpha_x),
+ CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.alpha_y),
+ CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.ior),
+ CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, color),
+ CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, cspec0),
+ CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXFresnelClosure, label, "label"),
+ CLOSURE_FINISH_PARAM(MicrofacetMultiGGXFresnelClosure)};
+ return params;
}
-CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_aniso_fresnel_prepare, MicrofacetMultiGGXAnisoFresnelClosure);
+CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_aniso_fresnel_prepare,
+ MicrofacetMultiGGXAnisoFresnelClosure);
class MicrofacetMultiGGXGlassFresnelClosure : public MicrofacetMultiFresnelClosure {
-public:
- MicrofacetMultiGGXGlassFresnelClosure() : MicrofacetMultiFresnelClosure() {}
-
- void setup(ShaderData *sd, int path_flag, float3 weight)
- {
- MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
- if(!bsdf) {
- return;
- }
-
- bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
- bsdf->alpha_y = bsdf->alpha_x;
- sd->flag |= bsdf_microfacet_multi_ggx_glass_fresnel_setup(bsdf, sd);
- }
+ public:
+ MicrofacetMultiGGXGlassFresnelClosure() : MicrofacetMultiFresnelClosure()
+ {
+ }
+
+ void setup(ShaderData *sd, int path_flag, float3 weight)
+ {
+ MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
+ if (!bsdf) {
+ return;
+ }
+
+ bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+ bsdf->alpha_y = bsdf->alpha_x;
+ sd->flag |= bsdf_microfacet_multi_ggx_glass_fresnel_setup(bsdf, sd);
+ }
};
ClosureParam *closure_bsdf_microfacet_multi_ggx_glass_fresnel_params()
{
- static ClosureParam params[] = {
- CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, params.N),
- CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.alpha_x),
- CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.ior),
- CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, color),
- CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, cspec0),
- CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXFresnelClosure, label, "label"),
- CLOSURE_FINISH_PARAM(MicrofacetMultiGGXFresnelClosure)
- };
- return params;
+ static ClosureParam params[] = {
+ CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, params.N),
+ CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.alpha_x),
+ CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.ior),
+ CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, color),
+ CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, cspec0),
+ CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXFresnelClosure, label, "label"),
+ CLOSURE_FINISH_PARAM(MicrofacetMultiGGXFresnelClosure)};
+ return params;
}
-CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_glass_fresnel_prepare, MicrofacetMultiGGXGlassFresnelClosure);
+CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_glass_fresnel_prepare,
+ MicrofacetMultiGGXGlassFresnelClosure);
/* Transparent */
class TransparentClosure : public CBSDFClosure {
-public:
- ShaderClosure params;
- float3 unused;
-
- void setup(ShaderData *sd, int path_flag, float3 weight)
- {
- bsdf_transparent_setup(sd, weight, path_flag);
- }
+ public:
+ ShaderClosure params;
+ float3 unused;
+
+ void setup(ShaderData *sd, int path_flag, float3 weight)
+ {
+ bsdf_transparent_setup(sd, weight, path_flag);
+ }
};
ClosureParam *closure_bsdf_transparent_params()
{
- static ClosureParam params[] = {
- CLOSURE_STRING_KEYPARAM(TransparentClosure, label, "label"),
- CLOSURE_FINISH_PARAM(TransparentClosure)
- };
- return params;
+ static ClosureParam params[] = {CLOSURE_STRING_KEYPARAM(TransparentClosure, label, "label"),
+ CLOSURE_FINISH_PARAM(TransparentClosure)};
+ return params;
}
CCLOSURE_PREPARE(closure_bsdf_transparent_prepare, TransparentClosure)
@@ -778,52 +859,49 @@ CCLOSURE_PREPARE(closure_bsdf_transparent_prepare, TransparentClosure)
/* Volume */
class VolumeAbsorptionClosure : public CBSDFClosure {
-public:
- void setup(ShaderData *sd, int path_flag, float3 weight)
- {
- volume_extinction_setup(sd, weight);
- }
+ public:
+ void setup(ShaderData *sd, int path_flag, float3 weight)
+ {
+ volume_extinction_setup(sd, weight);
+ }
};
ClosureParam *closure_absorption_params()
{
- static ClosureParam params[] = {
- CLOSURE_STRING_KEYPARAM(VolumeAbsorptionClosure, label, "label"),
- CLOSURE_FINISH_PARAM(VolumeAbsorptionClosure)
- };
- return params;
+ static ClosureParam params[] = {CLOSURE_STRING_KEYPARAM(VolumeAbsorptionClosure, label, "label"),
+ CLOSURE_FINISH_PARAM(VolumeAbsorptionClosure)};
+ return params;
}
CCLOSURE_PREPARE(closure_absorption_prepare, VolumeAbsorptionClosure)
class VolumeHenyeyGreensteinClosure : public CBSDFClosure {
-public:
- HenyeyGreensteinVolume params;
+ public:
+ HenyeyGreensteinVolume params;
- void setup(ShaderData *sd, int path_flag, float3 weight)
- {
- volume_extinction_setup(sd, weight);
+ void setup(ShaderData *sd, int path_flag, float3 weight)
+ {
+ volume_extinction_setup(sd, weight);
- HenyeyGreensteinVolume *volume = (HenyeyGreensteinVolume*)bsdf_alloc_osl(sd, sizeof(HenyeyGreensteinVolume), weight, &params);
- if(!volume) {
- return;
- }
+ HenyeyGreensteinVolume *volume = (HenyeyGreensteinVolume *)bsdf_alloc_osl(
+ sd, sizeof(HenyeyGreensteinVolume), weight, &params);
+ if (!volume) {
+ return;
+ }
- sd->flag |= volume_henyey_greenstein_setup(volume);
- }
+ sd->flag |= volume_henyey_greenstein_setup(volume);
+ }
};
ClosureParam *closure_henyey_greenstein_params()
{
- static ClosureParam params[] = {
- CLOSURE_FLOAT_PARAM(VolumeHenyeyGreensteinClosure, params.g),
- CLOSURE_STRING_KEYPARAM(VolumeHenyeyGreensteinClosure, label, "label"),
- CLOSURE_FINISH_PARAM(VolumeHenyeyGreensteinClosure)
- };
- return params;
+ static ClosureParam params[] = {
+ CLOSURE_FLOAT_PARAM(VolumeHenyeyGreensteinClosure, params.g),
+ CLOSURE_STRING_KEYPARAM(VolumeHenyeyGreensteinClosure, label, "label"),
+ CLOSURE_FINISH_PARAM(VolumeHenyeyGreensteinClosure)};
+ return params;
}
CCLOSURE_PREPARE(closure_henyey_greenstein_prepare, VolumeHenyeyGreensteinClosure)
-
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/osl/osl_closures.h b/intern/cycles/kernel/osl/osl_closures.h
index 2a50704b569..d3db6b71f5c 100644
--- a/intern/cycles/kernel/osl/osl_closures.h
+++ b/intern/cycles/kernel/osl/osl_closures.h
@@ -74,24 +74,34 @@ void closure_bsdf_microfacet_multi_ggx_prepare(OSL::RendererServices *, int id,
void closure_bsdf_microfacet_multi_ggx_glass_prepare(OSL::RendererServices *, int id, void *data);
void closure_bsdf_microfacet_multi_ggx_aniso_prepare(OSL::RendererServices *, int id, void *data);
void closure_bsdf_microfacet_ggx_fresnel_prepare(OSL::RendererServices *, int id, void *data);
-void closure_bsdf_microfacet_ggx_aniso_fresnel_prepare(OSL::RendererServices *, int id, void *data);
-void closure_bsdf_microfacet_multi_ggx_fresnel_prepare(OSL::RendererServices *, int id, void *data);
-void closure_bsdf_microfacet_multi_ggx_glass_fresnel_prepare(OSL::RendererServices *, int id, void *data);
-void closure_bsdf_microfacet_multi_ggx_aniso_fresnel_prepare(OSL::RendererServices *, int id, void *data);
+void closure_bsdf_microfacet_ggx_aniso_fresnel_prepare(OSL::RendererServices *,
+ int id,
+ void *data);
+void closure_bsdf_microfacet_multi_ggx_fresnel_prepare(OSL::RendererServices *,
+ int id,
+ void *data);
+void closure_bsdf_microfacet_multi_ggx_glass_fresnel_prepare(OSL::RendererServices *,
+ int id,
+ void *data);
+void closure_bsdf_microfacet_multi_ggx_aniso_fresnel_prepare(OSL::RendererServices *,
+ int id,
+ void *data);
void closure_bsdf_principled_clearcoat_prepare(OSL::RendererServices *, int id, void *data);
void closure_bsdf_principled_hair_prepare(OSL::RendererServices *, int id, void *data);
-#define CCLOSURE_PREPARE(name, classname) \
-void name(RendererServices *, int id, void *data) \
-{ \
- memset(data, 0, sizeof(classname)); \
- new (data) classname(); \
-}
+#define CCLOSURE_PREPARE(name, classname) \
+ void name(RendererServices *, int id, void *data) \
+ { \
+ memset(data, 0, sizeof(classname)); \
+ new (data) classname(); \
+ }
#define CCLOSURE_PREPARE_STATIC(name, classname) static CCLOSURE_PREPARE(name, classname)
#define CLOSURE_FLOAT3_PARAM(st, fld) \
- { TypeDesc::TypeVector, (int)reckless_offsetof(st, fld), NULL, sizeof(OSL::Vec3) }
+ { \
+ TypeDesc::TypeVector, (int)reckless_offsetof(st, fld), NULL, sizeof(OSL::Vec3) \
+ }
#define TO_VEC3(v) OSL::Vec3(v.x, v.y, v.z)
#define TO_COLOR3(v) OSL::Color3(v.x, v.y, v.z)
@@ -100,50 +110,50 @@ void name(RendererServices *, int id, void *data) \
/* Closure */
class CClosurePrimitive {
-public:
- virtual void setup(ShaderData *sd, int path_flag, float3 weight) = 0;
+ public:
+ virtual void setup(ShaderData *sd, int path_flag, float3 weight) = 0;
- OSL::ustring label;
+ OSL::ustring label;
};
/* BSDF */
class CBSDFClosure : public CClosurePrimitive {
-public:
- bool skip(const ShaderData *sd, int path_flag, int scattering);
+ public:
+ bool skip(const ShaderData *sd, int path_flag, int scattering);
};
#define BSDF_CLOSURE_CLASS_BEGIN(Upper, lower, structname, TYPE) \
\
-class Upper##Closure : public CBSDFClosure { \
-public: \
- structname params; \
- float3 unused; \
+ class Upper##Closure : public CBSDFClosure { \
+ public: \
+ structname params; \
+ float3 unused; \
\
- void setup(ShaderData *sd, int path_flag, float3 weight) \
- { \
- if(!skip(sd, path_flag, TYPE)) { \
- structname *bsdf = (structname*)bsdf_alloc_osl(sd, sizeof(structname), weight, &params); \
- sd->flag |= (bsdf) ? bsdf_##lower##_setup(bsdf) : 0; \
- } \
- } \
-}; \
+ void setup(ShaderData *sd, int path_flag, float3 weight) \
+ { \
+ if (!skip(sd, path_flag, TYPE)) { \
+ structname *bsdf = (structname *)bsdf_alloc_osl(sd, sizeof(structname), weight, &params); \
+ sd->flag |= (bsdf) ? bsdf_##lower##_setup(bsdf) : 0; \
+ } \
+ } \
+ }; \
\
-static ClosureParam *bsdf_##lower##_params() \
-{ \
- static ClosureParam params[] = {
+ static ClosureParam *bsdf_##lower##_params() \
+ { \
+ static ClosureParam params[] = {
/* parameters */
#define BSDF_CLOSURE_CLASS_END(Upper, lower) \
- CLOSURE_STRING_KEYPARAM(Upper##Closure, label, "label"), \
- CLOSURE_FINISH_PARAM(Upper##Closure) \
- }; \
- return params; \
-} \
+ CLOSURE_STRING_KEYPARAM(Upper##Closure, label, "label"), CLOSURE_FINISH_PARAM(Upper##Closure) \
+ } \
+ ; \
+ return params; \
+ } \
\
-CCLOSURE_PREPARE_STATIC(bsdf_##lower##_prepare, Upper##Closure)
+ CCLOSURE_PREPARE_STATIC(bsdf_##lower##_prepare, Upper##Closure)
CCL_NAMESPACE_END
-#endif /* __OSL_CLOSURES_H__ */
+#endif /* __OSL_CLOSURES_H__ */
diff --git a/intern/cycles/kernel/osl/osl_globals.h b/intern/cycles/kernel/osl/osl_globals.h
index 88192fbcccb..641c9967586 100644
--- a/intern/cycles/kernel/osl/osl_globals.h
+++ b/intern/cycles/kernel/osl/osl_globals.h
@@ -19,79 +19,79 @@
#ifdef WITH_OSL
-#include <OSL/oslexec.h>
+# include <OSL/oslexec.h>
-#include "util/util_map.h"
-#include "util/util_param.h"
-#include "util/util_thread.h"
-#include "util/util_vector.h"
+# include "util/util_map.h"
+# include "util/util_param.h"
+# include "util/util_thread.h"
+# include "util/util_vector.h"
-#ifndef WIN32
+# ifndef WIN32
using std::isfinite;
-#endif
+# endif
CCL_NAMESPACE_BEGIN
class OSLRenderServices;
struct OSLGlobals {
- OSLGlobals()
- {
- ss = NULL;
- ts = NULL;
- services = NULL;
- use = false;
- }
-
- bool use;
-
- /* shading system */
- OSL::ShadingSystem *ss;
- OSL::TextureSystem *ts;
- OSLRenderServices *services;
-
- /* shader states */
- vector<OSL::ShaderGroupRef> surface_state;
- vector<OSL::ShaderGroupRef> volume_state;
- vector<OSL::ShaderGroupRef> displacement_state;
- vector<OSL::ShaderGroupRef> bump_state;
- OSL::ShaderGroupRef background_state;
-
- /* attributes */
- struct Attribute {
- TypeDesc type;
- AttributeDescriptor desc;
- ParamValue value;
- };
-
- typedef unordered_map<ustring, Attribute, ustringHash> AttributeMap;
- typedef unordered_map<ustring, int, ustringHash> ObjectNameMap;
-
- vector<AttributeMap> attribute_map;
- ObjectNameMap object_name_map;
- vector<ustring> object_names;
+ OSLGlobals()
+ {
+ ss = NULL;
+ ts = NULL;
+ services = NULL;
+ use = false;
+ }
+
+ bool use;
+
+ /* shading system */
+ OSL::ShadingSystem *ss;
+ OSL::TextureSystem *ts;
+ OSLRenderServices *services;
+
+ /* shader states */
+ vector<OSL::ShaderGroupRef> surface_state;
+ vector<OSL::ShaderGroupRef> volume_state;
+ vector<OSL::ShaderGroupRef> displacement_state;
+ vector<OSL::ShaderGroupRef> bump_state;
+ OSL::ShaderGroupRef background_state;
+
+ /* attributes */
+ struct Attribute {
+ TypeDesc type;
+ AttributeDescriptor desc;
+ ParamValue value;
+ };
+
+ typedef unordered_map<ustring, Attribute, ustringHash> AttributeMap;
+ typedef unordered_map<ustring, int, ustringHash> ObjectNameMap;
+
+ vector<AttributeMap> attribute_map;
+ ObjectNameMap object_name_map;
+ vector<ustring> object_names;
};
/* trace() call result */
struct OSLTraceData {
- Ray ray;
- Intersection isect;
- ShaderData sd;
- bool setup;
- bool init;
+ Ray ray;
+ Intersection isect;
+ ShaderData sd;
+ bool setup;
+ bool init;
};
/* thread key for thread specific data lookup */
struct OSLThreadData {
- OSL::ShaderGlobals globals;
- OSL::PerThreadInfo *osl_thread_info;
- OSLTraceData tracedata;
- OSL::ShadingContext *context;
- OIIO::TextureSystem::Perthread *oiio_thread_info;
+ OSL::ShaderGlobals globals;
+ OSL::PerThreadInfo *osl_thread_info;
+ OSLTraceData tracedata;
+ OSL::ShadingContext *context;
+ OIIO::TextureSystem::Perthread *oiio_thread_info;
};
CCL_NAMESPACE_END
#endif
-#endif /* __OSL_GLOBALS_H__ */
+#endif /* __OSL_GLOBALS_H__ */
diff --git a/intern/cycles/kernel/osl/osl_services.cpp b/intern/cycles/kernel/osl/osl_services.cpp
index 6464d382634..eb9f672fd8a 100644
--- a/intern/cycles/kernel/osl/osl_services.cpp
+++ b/intern/cycles/kernel/osl/osl_services.cpp
@@ -63,16 +63,16 @@ CCL_NAMESPACE_BEGIN
/* RenderServices implementation */
-static void copy_matrix(OSL::Matrix44& m, const Transform& tfm)
+static void copy_matrix(OSL::Matrix44 &m, const Transform &tfm)
{
- ProjectionTransform t = projection_transpose(ProjectionTransform(tfm));
- memcpy((void *)&m, &t, sizeof(m));
+ ProjectionTransform t = projection_transpose(ProjectionTransform(tfm));
+ memcpy((void *)&m, &t, sizeof(m));
}
-static void copy_matrix(OSL::Matrix44& m, const ProjectionTransform& tfm)
+static void copy_matrix(OSL::Matrix44 &m, const ProjectionTransform &tfm)
{
- ProjectionTransform t = projection_transpose(tfm);
- memcpy((void *)&m, &t, sizeof(m));
+ ProjectionTransform t = projection_transpose(tfm);
+ memcpy((void *)&m, &t, sizeof(m));
}
/* static ustrings */
@@ -129,815 +129,846 @@ ustring OSLRenderServices::u_at_ao("@ao");
OSLRenderServices::OSLRenderServices()
{
- kernel_globals = NULL;
- osl_ts = NULL;
+ kernel_globals = NULL;
+ osl_ts = NULL;
#ifdef WITH_PTEX
- size_t maxmem = 16384 * 1024;
- ptex_cache = PtexCache::create(0, maxmem);
+ size_t maxmem = 16384 * 1024;
+ ptex_cache = PtexCache::create(0, maxmem);
#endif
}
OSLRenderServices::~OSLRenderServices()
{
- if(osl_ts) {
- VLOG(2) << "OSL texture system stats:\n"
- << osl_ts->getstats();
- }
+ if (osl_ts) {
+ VLOG(2) << "OSL texture system stats:\n" << osl_ts->getstats();
+ }
#ifdef WITH_PTEX
- ptex_cache->release();
+ ptex_cache->release();
#endif
}
void OSLRenderServices::thread_init(KernelGlobals *kernel_globals_, OSL::TextureSystem *osl_ts_)
{
- kernel_globals = kernel_globals_;
- osl_ts = osl_ts_;
+ kernel_globals = kernel_globals_;
+ osl_ts = osl_ts_;
}
-bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform, float time)
+bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg,
+ OSL::Matrix44 &result,
+ OSL::TransformationPtr xform,
+ float time)
{
- /* this is only used for shader and object space, we don't really have
- * a concept of shader space, so we just use object space for both. */
- if(xform) {
- const ShaderData *sd = (const ShaderData *)xform;
- KernelGlobals *kg = sd->osl_globals;
- int object = sd->object;
-
- if(object != OBJECT_NONE) {
+ /* this is only used for shader and object space, we don't really have
+ * a concept of shader space, so we just use object space for both. */
+ if (xform) {
+ const ShaderData *sd = (const ShaderData *)xform;
+ KernelGlobals *kg = sd->osl_globals;
+ int object = sd->object;
+
+ if (object != OBJECT_NONE) {
#ifdef __OBJECT_MOTION__
- Transform tfm;
+ Transform tfm;
- if(time == sd->time)
- tfm = sd->ob_tfm;
- else
- tfm = object_fetch_transform_motion_test(kg, object, time, NULL);
+ if (time == sd->time)
+ tfm = sd->ob_tfm;
+ else
+ tfm = object_fetch_transform_motion_test(kg, object, time, NULL);
#else
- Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
+ Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
#endif
- copy_matrix(result, tfm);
+ copy_matrix(result, tfm);
- return true;
- }
- else if(sd->type == PRIMITIVE_LAMP) {
- copy_matrix(result, sd->ob_tfm);
+ return true;
+ }
+ else if (sd->type == PRIMITIVE_LAMP) {
+ copy_matrix(result, sd->ob_tfm);
- return true;
- }
- }
+ return true;
+ }
+ }
- return false;
+ return false;
}
-bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform, float time)
+bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg,
+ OSL::Matrix44 &result,
+ OSL::TransformationPtr xform,
+ float time)
{
- /* this is only used for shader and object space, we don't really have
- * a concept of shader space, so we just use object space for both. */
- if(xform) {
- const ShaderData *sd = (const ShaderData *)xform;
- KernelGlobals *kg = sd->osl_globals;
- int object = sd->object;
-
- if(object != OBJECT_NONE) {
+ /* this is only used for shader and object space, we don't really have
+ * a concept of shader space, so we just use object space for both. */
+ if (xform) {
+ const ShaderData *sd = (const ShaderData *)xform;
+ KernelGlobals *kg = sd->osl_globals;
+ int object = sd->object;
+
+ if (object != OBJECT_NONE) {
#ifdef __OBJECT_MOTION__
- Transform itfm;
+ Transform itfm;
- if(time == sd->time)
- itfm = sd->ob_itfm;
- else
- object_fetch_transform_motion_test(kg, object, time, &itfm);
+ if (time == sd->time)
+ itfm = sd->ob_itfm;
+ else
+ object_fetch_transform_motion_test(kg, object, time, &itfm);
#else
- Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
+ Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
#endif
- copy_matrix(result, itfm);
+ copy_matrix(result, itfm);
- return true;
- }
- else if(sd->type == PRIMITIVE_LAMP) {
- copy_matrix(result, sd->ob_itfm);
+ return true;
+ }
+ else if (sd->type == PRIMITIVE_LAMP) {
+ copy_matrix(result, sd->ob_itfm);
- return true;
- }
- }
+ return true;
+ }
+ }
- return false;
+ return false;
}
-bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring from, float time)
+bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg,
+ OSL::Matrix44 &result,
+ ustring from,
+ float time)
{
- KernelGlobals *kg = kernel_globals;
-
- if(from == u_ndc) {
- copy_matrix(result, kernel_data.cam.ndctoworld);
- return true;
- }
- else if(from == u_raster) {
- copy_matrix(result, kernel_data.cam.rastertoworld);
- return true;
- }
- else if(from == u_screen) {
- copy_matrix(result, kernel_data.cam.screentoworld);
- return true;
- }
- else if(from == u_camera) {
- copy_matrix(result, kernel_data.cam.cameratoworld);
- return true;
- }
- else if(from == u_world) {
- result.makeIdentity();
- return true;
- }
-
- return false;
+ KernelGlobals *kg = kernel_globals;
+
+ if (from == u_ndc) {
+ copy_matrix(result, kernel_data.cam.ndctoworld);
+ return true;
+ }
+ else if (from == u_raster) {
+ copy_matrix(result, kernel_data.cam.rastertoworld);
+ return true;
+ }
+ else if (from == u_screen) {
+ copy_matrix(result, kernel_data.cam.screentoworld);
+ return true;
+ }
+ else if (from == u_camera) {
+ copy_matrix(result, kernel_data.cam.cameratoworld);
+ return true;
+ }
+ else if (from == u_world) {
+ result.makeIdentity();
+ return true;
+ }
+
+ return false;
}
-bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring to, float time)
+bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg,
+ OSL::Matrix44 &result,
+ ustring to,
+ float time)
{
- KernelGlobals *kg = kernel_globals;
-
- if(to == u_ndc) {
- copy_matrix(result, kernel_data.cam.worldtondc);
- return true;
- }
- else if(to == u_raster) {
- copy_matrix(result, kernel_data.cam.worldtoraster);
- return true;
- }
- else if(to == u_screen) {
- copy_matrix(result, kernel_data.cam.worldtoscreen);
- return true;
- }
- else if(to == u_camera) {
- copy_matrix(result, kernel_data.cam.worldtocamera);
- return true;
- }
- else if(to == u_world) {
- result.makeIdentity();
- return true;
- }
-
- return false;
+ KernelGlobals *kg = kernel_globals;
+
+ if (to == u_ndc) {
+ copy_matrix(result, kernel_data.cam.worldtondc);
+ return true;
+ }
+ else if (to == u_raster) {
+ copy_matrix(result, kernel_data.cam.worldtoraster);
+ return true;
+ }
+ else if (to == u_screen) {
+ copy_matrix(result, kernel_data.cam.worldtoscreen);
+ return true;
+ }
+ else if (to == u_camera) {
+ copy_matrix(result, kernel_data.cam.worldtocamera);
+ return true;
+ }
+ else if (to == u_world) {
+ result.makeIdentity();
+ return true;
+ }
+
+ return false;
}
-bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform)
+bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg,
+ OSL::Matrix44 &result,
+ OSL::TransformationPtr xform)
{
- /* this is only used for shader and object space, we don't really have
- * a concept of shader space, so we just use object space for both. */
- if(xform) {
- const ShaderData *sd = (const ShaderData *)xform;
- int object = sd->object;
+ /* this is only used for shader and object space, we don't really have
+ * a concept of shader space, so we just use object space for both. */
+ if (xform) {
+ const ShaderData *sd = (const ShaderData *)xform;
+ int object = sd->object;
- if(object != OBJECT_NONE) {
+ if (object != OBJECT_NONE) {
#ifdef __OBJECT_MOTION__
- Transform tfm = sd->ob_tfm;
+ Transform tfm = sd->ob_tfm;
#else
- KernelGlobals *kg = sd->osl_globals;
- Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
+ KernelGlobals *kg = sd->osl_globals;
+ Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
#endif
- copy_matrix(result, tfm);
+ copy_matrix(result, tfm);
- return true;
- }
- else if(sd->type == PRIMITIVE_LAMP) {
- copy_matrix(result, sd->ob_tfm);
+ return true;
+ }
+ else if (sd->type == PRIMITIVE_LAMP) {
+ copy_matrix(result, sd->ob_tfm);
- return true;
- }
- }
+ return true;
+ }
+ }
- return false;
+ return false;
}
-bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform)
+bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg,
+ OSL::Matrix44 &result,
+ OSL::TransformationPtr xform)
{
- /* this is only used for shader and object space, we don't really have
- * a concept of shader space, so we just use object space for both. */
- if(xform) {
- const ShaderData *sd = (const ShaderData *)xform;
- int object = sd->object;
+ /* this is only used for shader and object space, we don't really have
+ * a concept of shader space, so we just use object space for both. */
+ if (xform) {
+ const ShaderData *sd = (const ShaderData *)xform;
+ int object = sd->object;
- if(object != OBJECT_NONE) {
+ if (object != OBJECT_NONE) {
#ifdef __OBJECT_MOTION__
- Transform tfm = sd->ob_itfm;
+ Transform tfm = sd->ob_itfm;
#else
- KernelGlobals *kg = sd->osl_globals;
- Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
+ KernelGlobals *kg = sd->osl_globals;
+ Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
#endif
- copy_matrix(result, tfm);
+ copy_matrix(result, tfm);
- return true;
- }
- else if(sd->type == PRIMITIVE_LAMP) {
- copy_matrix(result, sd->ob_itfm);
+ return true;
+ }
+ else if (sd->type == PRIMITIVE_LAMP) {
+ copy_matrix(result, sd->ob_itfm);
- return true;
- }
- }
+ return true;
+ }
+ }
- return false;
+ return false;
}
bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring from)
{
- KernelGlobals *kg = kernel_globals;
-
- if(from == u_ndc) {
- copy_matrix(result, kernel_data.cam.ndctoworld);
- return true;
- }
- else if(from == u_raster) {
- copy_matrix(result, kernel_data.cam.rastertoworld);
- return true;
- }
- else if(from == u_screen) {
- copy_matrix(result, kernel_data.cam.screentoworld);
- return true;
- }
- else if(from == u_camera) {
- copy_matrix(result, kernel_data.cam.cameratoworld);
- return true;
- }
-
- return false;
+ KernelGlobals *kg = kernel_globals;
+
+ if (from == u_ndc) {
+ copy_matrix(result, kernel_data.cam.ndctoworld);
+ return true;
+ }
+ else if (from == u_raster) {
+ copy_matrix(result, kernel_data.cam.rastertoworld);
+ return true;
+ }
+ else if (from == u_screen) {
+ copy_matrix(result, kernel_data.cam.screentoworld);
+ return true;
+ }
+ else if (from == u_camera) {
+ copy_matrix(result, kernel_data.cam.cameratoworld);
+ return true;
+ }
+
+ return false;
}
-bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring to)
+bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg,
+ OSL::Matrix44 &result,
+ ustring to)
{
- KernelGlobals *kg = kernel_globals;
-
- if(to == u_ndc) {
- copy_matrix(result, kernel_data.cam.worldtondc);
- return true;
- }
- else if(to == u_raster) {
- copy_matrix(result, kernel_data.cam.worldtoraster);
- return true;
- }
- else if(to == u_screen) {
- copy_matrix(result, kernel_data.cam.worldtoscreen);
- return true;
- }
- else if(to == u_camera) {
- copy_matrix(result, kernel_data.cam.worldtocamera);
- return true;
- }
-
- return false;
+ KernelGlobals *kg = kernel_globals;
+
+ if (to == u_ndc) {
+ copy_matrix(result, kernel_data.cam.worldtondc);
+ return true;
+ }
+ else if (to == u_raster) {
+ copy_matrix(result, kernel_data.cam.worldtoraster);
+ return true;
+ }
+ else if (to == u_screen) {
+ copy_matrix(result, kernel_data.cam.worldtoscreen);
+ return true;
+ }
+ else if (to == u_camera) {
+ copy_matrix(result, kernel_data.cam.worldtocamera);
+ return true;
+ }
+
+ return false;
}
-bool OSLRenderServices::get_array_attribute(OSL::ShaderGlobals *sg, bool derivatives,
- ustring object, TypeDesc type, ustring name,
- int index, void *val)
+bool OSLRenderServices::get_array_attribute(OSL::ShaderGlobals *sg,
+ bool derivatives,
+ ustring object,
+ TypeDesc type,
+ ustring name,
+ int index,
+ void *val)
{
- return false;
+ return false;
}
static bool set_attribute_float2(float2 f[3], TypeDesc type, bool derivatives, void *val)
{
- if(type == TypeDesc::TypePoint || type == TypeDesc::TypeVector ||
- type == TypeDesc::TypeNormal || type == TypeDesc::TypeColor)
- {
- float *fval = (float *)val;
-
- fval[0] = f[0].x;
- fval[1] = f[0].y;
- fval[2] = 0.0f;
-
- if(derivatives) {
- fval[3] = f[1].x;
- fval[4] = f[1].y;
- fval[5] = 0.0f;
-
- fval[6] = f[2].x;
- fval[7] = f[2].y;
- fval[8] = 0.0f;
- }
-
- return true;
- }
- else if(type == TypeDesc::TypeFloat) {
- float *fval = (float *)val;
- fval[0] = average(f[0]);
-
- if(derivatives) {
- fval[1] = average(f[1]);
- fval[2] = average(f[2]);
- }
-
- return true;
- }
-
- return false;
+ if (type == TypeDesc::TypePoint || type == TypeDesc::TypeVector ||
+ type == TypeDesc::TypeNormal || type == TypeDesc::TypeColor) {
+ float *fval = (float *)val;
+
+ fval[0] = f[0].x;
+ fval[1] = f[0].y;
+ fval[2] = 0.0f;
+
+ if (derivatives) {
+ fval[3] = f[1].x;
+ fval[4] = f[1].y;
+ fval[5] = 0.0f;
+
+ fval[6] = f[2].x;
+ fval[7] = f[2].y;
+ fval[8] = 0.0f;
+ }
+
+ return true;
+ }
+ else if (type == TypeDesc::TypeFloat) {
+ float *fval = (float *)val;
+ fval[0] = average(f[0]);
+
+ if (derivatives) {
+ fval[1] = average(f[1]);
+ fval[2] = average(f[2]);
+ }
+
+ return true;
+ }
+
+ return false;
}
static bool set_attribute_float3(float3 f[3], TypeDesc type, bool derivatives, void *val)
{
- if(type == TypeDesc::TypePoint || type == TypeDesc::TypeVector ||
- type == TypeDesc::TypeNormal || type == TypeDesc::TypeColor)
- {
- float *fval = (float *)val;
-
- fval[0] = f[0].x;
- fval[1] = f[0].y;
- fval[2] = f[0].z;
-
- if(derivatives) {
- fval[3] = f[1].x;
- fval[4] = f[1].y;
- fval[5] = f[1].z;
-
- fval[6] = f[2].x;
- fval[7] = f[2].y;
- fval[8] = f[2].z;
- }
-
- return true;
- }
- else if(type == TypeDesc::TypeFloat) {
- float *fval = (float *)val;
- fval[0] = average(f[0]);
-
- if(derivatives) {
- fval[1] = average(f[1]);
- fval[2] = average(f[2]);
- }
-
- return true;
- }
-
- return false;
+ if (type == TypeDesc::TypePoint || type == TypeDesc::TypeVector ||
+ type == TypeDesc::TypeNormal || type == TypeDesc::TypeColor) {
+ float *fval = (float *)val;
+
+ fval[0] = f[0].x;
+ fval[1] = f[0].y;
+ fval[2] = f[0].z;
+
+ if (derivatives) {
+ fval[3] = f[1].x;
+ fval[4] = f[1].y;
+ fval[5] = f[1].z;
+
+ fval[6] = f[2].x;
+ fval[7] = f[2].y;
+ fval[8] = f[2].z;
+ }
+
+ return true;
+ }
+ else if (type == TypeDesc::TypeFloat) {
+ float *fval = (float *)val;
+ fval[0] = average(f[0]);
+
+ if (derivatives) {
+ fval[1] = average(f[1]);
+ fval[2] = average(f[2]);
+ }
+
+ return true;
+ }
+
+ return false;
}
static bool set_attribute_float3(float3 f, TypeDesc type, bool derivatives, void *val)
{
- float3 fv[3];
+ float3 fv[3];
- fv[0] = f;
- fv[1] = make_float3(0.0f, 0.0f, 0.0f);
- fv[2] = make_float3(0.0f, 0.0f, 0.0f);
+ fv[0] = f;
+ fv[1] = make_float3(0.0f, 0.0f, 0.0f);
+ fv[2] = make_float3(0.0f, 0.0f, 0.0f);
- return set_attribute_float3(fv, type, derivatives, val);
+ return set_attribute_float3(fv, type, derivatives, val);
}
static bool set_attribute_float(float f[3], TypeDesc type, bool derivatives, void *val)
{
- if(type == TypeDesc::TypePoint || type == TypeDesc::TypeVector ||
- type == TypeDesc::TypeNormal || type == TypeDesc::TypeColor)
- {
- float *fval = (float *)val;
- fval[0] = f[0];
- fval[1] = f[1];
- fval[2] = f[2];
-
- if(derivatives) {
- fval[3] = f[1];
- fval[4] = f[1];
- fval[5] = f[1];
-
- fval[6] = f[2];
- fval[7] = f[2];
- fval[8] = f[2];
- }
-
- return true;
- }
- else if(type == TypeDesc::TypeFloat) {
- float *fval = (float *)val;
- fval[0] = f[0];
-
- if(derivatives) {
- fval[1] = f[1];
- fval[2] = f[2];
- }
-
- return true;
- }
-
- return false;
+ if (type == TypeDesc::TypePoint || type == TypeDesc::TypeVector ||
+ type == TypeDesc::TypeNormal || type == TypeDesc::TypeColor) {
+ float *fval = (float *)val;
+ fval[0] = f[0];
+ fval[1] = f[1];
+ fval[2] = f[2];
+
+ if (derivatives) {
+ fval[3] = f[1];
+ fval[4] = f[1];
+ fval[5] = f[1];
+
+ fval[6] = f[2];
+ fval[7] = f[2];
+ fval[8] = f[2];
+ }
+
+ return true;
+ }
+ else if (type == TypeDesc::TypeFloat) {
+ float *fval = (float *)val;
+ fval[0] = f[0];
+
+ if (derivatives) {
+ fval[1] = f[1];
+ fval[2] = f[2];
+ }
+
+ return true;
+ }
+
+ return false;
}
static bool set_attribute_float(float f, TypeDesc type, bool derivatives, void *val)
{
- float fv[3];
+ float fv[3];
- fv[0] = f;
- fv[1] = 0.0f;
- fv[2] = 0.0f;
+ fv[0] = f;
+ fv[1] = 0.0f;
+ fv[2] = 0.0f;
- return set_attribute_float(fv, type, derivatives, val);
+ return set_attribute_float(fv, type, derivatives, val);
}
static bool set_attribute_int(int i, TypeDesc type, bool derivatives, void *val)
{
- if(type.basetype == TypeDesc::INT && type.aggregate == TypeDesc::SCALAR && type.arraylen == 0) {
- int *ival = (int *)val;
- ival[0] = i;
+ if (type.basetype == TypeDesc::INT && type.aggregate == TypeDesc::SCALAR && type.arraylen == 0) {
+ int *ival = (int *)val;
+ ival[0] = i;
- if(derivatives) {
- ival[1] = 0;
- ival[2] = 0;
- }
+ if (derivatives) {
+ ival[1] = 0;
+ ival[2] = 0;
+ }
- return true;
- }
+ return true;
+ }
- return false;
+ return false;
}
static bool set_attribute_string(ustring str, TypeDesc type, bool derivatives, void *val)
{
- if(type.basetype == TypeDesc::STRING && type.aggregate == TypeDesc::SCALAR && type.arraylen == 0) {
- ustring *sval = (ustring *)val;
- sval[0] = str;
+ if (type.basetype == TypeDesc::STRING && type.aggregate == TypeDesc::SCALAR &&
+ type.arraylen == 0) {
+ ustring *sval = (ustring *)val;
+ sval[0] = str;
- if(derivatives) {
- sval[1] = OSLRenderServices::u_empty;
- sval[2] = OSLRenderServices::u_empty;
- }
+ if (derivatives) {
+ sval[1] = OSLRenderServices::u_empty;
+ sval[2] = OSLRenderServices::u_empty;
+ }
- return true;
- }
+ return true;
+ }
- return false;
+ return false;
}
static bool set_attribute_float3_3(float3 P[3], TypeDesc type, bool derivatives, void *val)
{
- if(type.vecsemantics == TypeDesc::POINT && type.arraylen >= 3) {
- float *fval = (float *)val;
+ if (type.vecsemantics == TypeDesc::POINT && type.arraylen >= 3) {
+ float *fval = (float *)val;
- fval[0] = P[0].x;
- fval[1] = P[0].y;
- fval[2] = P[0].z;
+ fval[0] = P[0].x;
+ fval[1] = P[0].y;
+ fval[2] = P[0].z;
- fval[3] = P[1].x;
- fval[4] = P[1].y;
- fval[5] = P[1].z;
+ fval[3] = P[1].x;
+ fval[4] = P[1].y;
+ fval[5] = P[1].z;
- fval[6] = P[2].x;
- fval[7] = P[2].y;
- fval[8] = P[2].z;
+ fval[6] = P[2].x;
+ fval[7] = P[2].y;
+ fval[8] = P[2].z;
- if(type.arraylen > 3)
- memset(fval + 3*3, 0, sizeof(float)*3*(type.arraylen - 3));
- if(derivatives)
- memset(fval + type.arraylen*3, 0, sizeof(float)*2*3*type.arraylen);
+ if (type.arraylen > 3)
+ memset(fval + 3 * 3, 0, sizeof(float) * 3 * (type.arraylen - 3));
+ if (derivatives)
+ memset(fval + type.arraylen * 3, 0, sizeof(float) * 2 * 3 * type.arraylen);
- return true;
- }
+ return true;
+ }
- return false;
+ return false;
}
-static bool set_attribute_matrix(const Transform& tfm, TypeDesc type, void *val)
+static bool set_attribute_matrix(const Transform &tfm, TypeDesc type, void *val)
{
- if(type == TypeDesc::TypeMatrix) {
- copy_matrix(*(OSL::Matrix44*)val, tfm);
- return true;
- }
+ if (type == TypeDesc::TypeMatrix) {
+ copy_matrix(*(OSL::Matrix44 *)val, tfm);
+ return true;
+ }
- return false;
+ return false;
}
-static bool get_primitive_attribute(KernelGlobals *kg, const ShaderData *sd, const OSLGlobals::Attribute& attr,
- const TypeDesc& type, bool derivatives, void *val)
+static bool get_primitive_attribute(KernelGlobals *kg,
+ const ShaderData *sd,
+ const OSLGlobals::Attribute &attr,
+ const TypeDesc &type,
+ bool derivatives,
+ void *val)
{
- if(attr.type == TypeDesc::TypePoint || attr.type == TypeDesc::TypeVector ||
- attr.type == TypeDesc::TypeNormal || attr.type == TypeDesc::TypeColor)
- {
- float3 fval[3];
- fval[0] = primitive_attribute_float3(kg, sd, attr.desc,
- (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL);
- return set_attribute_float3(fval, type, derivatives, val);
- }
- else if(attr.type == TypeFloat2) {
- float2 fval[2];
- fval[0] = primitive_attribute_float2(kg, sd, attr.desc,
- (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL);
- return set_attribute_float2(fval, type, derivatives, val);
- }
- else if(attr.type == TypeDesc::TypeFloat) {
- float fval[3];
- fval[0] = primitive_attribute_float(kg, sd, attr.desc,
- (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL);
- return set_attribute_float(fval, type, derivatives, val);
- }
- else {
- return false;
- }
+ if (attr.type == TypeDesc::TypePoint || attr.type == TypeDesc::TypeVector ||
+ attr.type == TypeDesc::TypeNormal || attr.type == TypeDesc::TypeColor) {
+ float3 fval[3];
+ fval[0] = primitive_attribute_float3(
+ kg, sd, attr.desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL);
+ return set_attribute_float3(fval, type, derivatives, val);
+ }
+ else if (attr.type == TypeFloat2) {
+ float2 fval[2];
+ fval[0] = primitive_attribute_float2(
+ kg, sd, attr.desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL);
+ return set_attribute_float2(fval, type, derivatives, val);
+ }
+ else if (attr.type == TypeDesc::TypeFloat) {
+ float fval[3];
+ fval[0] = primitive_attribute_float(
+ kg, sd, attr.desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL);
+ return set_attribute_float(fval, type, derivatives, val);
+ }
+ else {
+ return false;
+ }
}
-static bool get_mesh_attribute(KernelGlobals *kg, const ShaderData *sd, const OSLGlobals::Attribute& attr,
- const TypeDesc& type, bool derivatives, void *val)
+static bool get_mesh_attribute(KernelGlobals *kg,
+ const ShaderData *sd,
+ const OSLGlobals::Attribute &attr,
+ const TypeDesc &type,
+ bool derivatives,
+ void *val)
{
- if(attr.type == TypeDesc::TypeMatrix) {
- Transform tfm = primitive_attribute_matrix(kg, sd, attr.desc);
- return set_attribute_matrix(tfm, type, val);
- }
- else {
- return false;
- }
+ if (attr.type == TypeDesc::TypeMatrix) {
+ Transform tfm = primitive_attribute_matrix(kg, sd, attr.desc);
+ return set_attribute_matrix(tfm, type, val);
+ }
+ else {
+ return false;
+ }
}
-static void get_object_attribute(const OSLGlobals::Attribute& attr, bool derivatives, void *val)
+static void get_object_attribute(const OSLGlobals::Attribute &attr, bool derivatives, void *val)
{
- size_t datasize = attr.value.datasize();
+ size_t datasize = attr.value.datasize();
- memcpy(val, attr.value.data(), datasize);
- if(derivatives)
- memset((char *)val + datasize, 0, datasize * 2);
+ memcpy(val, attr.value.data(), datasize);
+ if (derivatives)
+ memset((char *)val + datasize, 0, datasize * 2);
}
-bool OSLRenderServices::get_object_standard_attribute(KernelGlobals *kg, ShaderData *sd, ustring name,
- TypeDesc type, bool derivatives, void *val)
+bool OSLRenderServices::get_object_standard_attribute(
+ KernelGlobals *kg, ShaderData *sd, ustring name, TypeDesc type, bool derivatives, void *val)
{
- /* todo: turn this into hash table? */
-
- /* Object Attributes */
- if(name == u_object_location) {
- float3 f = object_location(kg, sd);
- return set_attribute_float3(f, type, derivatives, val);
- }
- else if(name == u_object_index) {
- float f = object_pass_id(kg, sd->object);
- return set_attribute_float(f, type, derivatives, val);
- }
- else if(name == u_geom_dupli_generated) {
- float3 f = object_dupli_generated(kg, sd->object);
- return set_attribute_float3(f, type, derivatives, val);
- }
- else if(name == u_geom_dupli_uv) {
- float3 f = object_dupli_uv(kg, sd->object);
- return set_attribute_float3(f, type, derivatives, val);
- }
- else if(name == u_material_index) {
- float f = shader_pass_id(kg, sd);
- return set_attribute_float(f, type, derivatives, val);
- }
- else if(name == u_object_random) {
- float f = object_random_number(kg, sd->object);
- return set_attribute_float(f, type, derivatives, val);
- }
-
- /* Particle Attributes */
- else if(name == u_particle_index) {
- int particle_id = object_particle_id(kg, sd->object);
- float f = particle_index(kg, particle_id);
- return set_attribute_float(f, type, derivatives, val);
- }
- else if(name == u_particle_random) {
- int particle_id = object_particle_id(kg, sd->object);
- float f = hash_int_01(particle_index(kg, particle_id));
- return set_attribute_float(f, type, derivatives, val);
- }
-
- else if(name == u_particle_age) {
- int particle_id = object_particle_id(kg, sd->object);
- float f = particle_age(kg, particle_id);
- return set_attribute_float(f, type, derivatives, val);
- }
- else if(name == u_particle_lifetime) {
- int particle_id = object_particle_id(kg, sd->object);
- float f = particle_lifetime(kg, particle_id);
- return set_attribute_float(f, type, derivatives, val);
- }
- else if(name == u_particle_location) {
- int particle_id = object_particle_id(kg, sd->object);
- float3 f = particle_location(kg, particle_id);
- return set_attribute_float3(f, type, derivatives, val);
- }
-#if 0 /* unsupported */
- else if(name == u_particle_rotation) {
- int particle_id = object_particle_id(kg, sd->object);
- float4 f = particle_rotation(kg, particle_id);
- return set_attribute_float4(f, type, derivatives, val);
- }
+ /* todo: turn this into hash table? */
+
+ /* Object Attributes */
+ if (name == u_object_location) {
+ float3 f = object_location(kg, sd);
+ return set_attribute_float3(f, type, derivatives, val);
+ }
+ else if (name == u_object_index) {
+ float f = object_pass_id(kg, sd->object);
+ return set_attribute_float(f, type, derivatives, val);
+ }
+ else if (name == u_geom_dupli_generated) {
+ float3 f = object_dupli_generated(kg, sd->object);
+ return set_attribute_float3(f, type, derivatives, val);
+ }
+ else if (name == u_geom_dupli_uv) {
+ float3 f = object_dupli_uv(kg, sd->object);
+ return set_attribute_float3(f, type, derivatives, val);
+ }
+ else if (name == u_material_index) {
+ float f = shader_pass_id(kg, sd);
+ return set_attribute_float(f, type, derivatives, val);
+ }
+ else if (name == u_object_random) {
+ float f = object_random_number(kg, sd->object);
+ return set_attribute_float(f, type, derivatives, val);
+ }
+
+ /* Particle Attributes */
+ else if (name == u_particle_index) {
+ int particle_id = object_particle_id(kg, sd->object);
+ float f = particle_index(kg, particle_id);
+ return set_attribute_float(f, type, derivatives, val);
+ }
+ else if (name == u_particle_random) {
+ int particle_id = object_particle_id(kg, sd->object);
+ float f = hash_int_01(particle_index(kg, particle_id));
+ return set_attribute_float(f, type, derivatives, val);
+ }
+
+ else if (name == u_particle_age) {
+ int particle_id = object_particle_id(kg, sd->object);
+ float f = particle_age(kg, particle_id);
+ return set_attribute_float(f, type, derivatives, val);
+ }
+ else if (name == u_particle_lifetime) {
+ int particle_id = object_particle_id(kg, sd->object);
+ float f = particle_lifetime(kg, particle_id);
+ return set_attribute_float(f, type, derivatives, val);
+ }
+ else if (name == u_particle_location) {
+ int particle_id = object_particle_id(kg, sd->object);
+ float3 f = particle_location(kg, particle_id);
+ return set_attribute_float3(f, type, derivatives, val);
+ }
+#if 0 /* unsupported */
+ else if(name == u_particle_rotation) {
+ int particle_id = object_particle_id(kg, sd->object);
+ float4 f = particle_rotation(kg, particle_id);
+ return set_attribute_float4(f, type, derivatives, val);
+ }
#endif
- else if(name == u_particle_size) {
- int particle_id = object_particle_id(kg, sd->object);
- float f = particle_size(kg, particle_id);
- return set_attribute_float(f, type, derivatives, val);
- }
- else if(name == u_particle_velocity) {
- int particle_id = object_particle_id(kg, sd->object);
- float3 f = particle_velocity(kg, particle_id);
- return set_attribute_float3(f, type, derivatives, val);
- }
- else if(name == u_particle_angular_velocity) {
- int particle_id = object_particle_id(kg, sd->object);
- float3 f = particle_angular_velocity(kg, particle_id);
- return set_attribute_float3(f, type, derivatives, val);
- }
-
- /* Geometry Attributes */
- else if(name == u_geom_numpolyvertices) {
- return set_attribute_int(3, type, derivatives, val);
- }
- else if((name == u_geom_trianglevertices || name == u_geom_polyvertices)
- && sd->type & PRIMITIVE_ALL_TRIANGLE)
- {
- float3 P[3];
-
- if(sd->type & PRIMITIVE_TRIANGLE)
- triangle_vertices(kg, sd->prim, P);
- else
- motion_triangle_vertices(kg, sd->object, sd->prim, sd->time, P);
-
- if(!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
- object_position_transform(kg, sd, &P[0]);
- object_position_transform(kg, sd, &P[1]);
- object_position_transform(kg, sd, &P[2]);
- }
-
- return set_attribute_float3_3(P, type, derivatives, val);
- }
- else if(name == u_geom_name) {
- ustring object_name = kg->osl->object_names[sd->object];
- return set_attribute_string(object_name, type, derivatives, val);
- }
- else if(name == u_is_smooth) {
- float f = ((sd->shader & SHADER_SMOOTH_NORMAL) != 0);
- return set_attribute_float(f, type, derivatives, val);
- }
- /* Hair Attributes */
- else if(name == u_is_curve) {
- float f = (sd->type & PRIMITIVE_ALL_CURVE) != 0;
- return set_attribute_float(f, type, derivatives, val);
- }
- else if(name == u_curve_thickness) {
- float f = curve_thickness(kg, sd);
- return set_attribute_float(f, type, derivatives, val);
- }
- else if(name == u_curve_tangent_normal) {
- float3 f = curve_tangent_normal(kg, sd);
- return set_attribute_float3(f, type, derivatives, val);
- }
- else
- return false;
+ else if (name == u_particle_size) {
+ int particle_id = object_particle_id(kg, sd->object);
+ float f = particle_size(kg, particle_id);
+ return set_attribute_float(f, type, derivatives, val);
+ }
+ else if (name == u_particle_velocity) {
+ int particle_id = object_particle_id(kg, sd->object);
+ float3 f = particle_velocity(kg, particle_id);
+ return set_attribute_float3(f, type, derivatives, val);
+ }
+ else if (name == u_particle_angular_velocity) {
+ int particle_id = object_particle_id(kg, sd->object);
+ float3 f = particle_angular_velocity(kg, particle_id);
+ return set_attribute_float3(f, type, derivatives, val);
+ }
+
+ /* Geometry Attributes */
+ else if (name == u_geom_numpolyvertices) {
+ return set_attribute_int(3, type, derivatives, val);
+ }
+ else if ((name == u_geom_trianglevertices || name == u_geom_polyvertices) &&
+ sd->type & PRIMITIVE_ALL_TRIANGLE) {
+ float3 P[3];
+
+ if (sd->type & PRIMITIVE_TRIANGLE)
+ triangle_vertices(kg, sd->prim, P);
+ else
+ motion_triangle_vertices(kg, sd->object, sd->prim, sd->time, P);
+
+ if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+ object_position_transform(kg, sd, &P[0]);
+ object_position_transform(kg, sd, &P[1]);
+ object_position_transform(kg, sd, &P[2]);
+ }
+
+ return set_attribute_float3_3(P, type, derivatives, val);
+ }
+ else if (name == u_geom_name) {
+ ustring object_name = kg->osl->object_names[sd->object];
+ return set_attribute_string(object_name, type, derivatives, val);
+ }
+ else if (name == u_is_smooth) {
+ float f = ((sd->shader & SHADER_SMOOTH_NORMAL) != 0);
+ return set_attribute_float(f, type, derivatives, val);
+ }
+ /* Hair Attributes */
+ else if (name == u_is_curve) {
+ float f = (sd->type & PRIMITIVE_ALL_CURVE) != 0;
+ return set_attribute_float(f, type, derivatives, val);
+ }
+ else if (name == u_curve_thickness) {
+ float f = curve_thickness(kg, sd);
+ return set_attribute_float(f, type, derivatives, val);
+ }
+ else if (name == u_curve_tangent_normal) {
+ float3 f = curve_tangent_normal(kg, sd);
+ return set_attribute_float3(f, type, derivatives, val);
+ }
+ else
+ return false;
}
-bool OSLRenderServices::get_background_attribute(KernelGlobals *kg, ShaderData *sd, ustring name,
- TypeDesc type, bool derivatives, void *val)
+bool OSLRenderServices::get_background_attribute(
+ KernelGlobals *kg, ShaderData *sd, ustring name, TypeDesc type, bool derivatives, void *val)
{
- if(name == u_path_ray_length) {
- /* Ray Length */
- float f = sd->ray_length;
- return set_attribute_float(f, type, derivatives, val);
- }
- else if(name == u_path_ray_depth) {
- /* Ray Depth */
- PathState *state = sd->osl_path_state;
- int f = state->bounce;
- return set_attribute_int(f, type, derivatives, val);
- }
- else if(name == u_path_diffuse_depth) {
- /* Diffuse Ray Depth */
- PathState *state = sd->osl_path_state;
- int f = state->diffuse_bounce;
- return set_attribute_int(f, type, derivatives, val);
- }
- else if(name == u_path_glossy_depth) {
- /* Glossy Ray Depth */
- PathState *state = sd->osl_path_state;
- int f = state->glossy_bounce;
- return set_attribute_int(f, type, derivatives, val);
- }
- else if(name == u_path_transmission_depth) {
- /* Transmission Ray Depth */
- PathState *state = sd->osl_path_state;
- int f = state->transmission_bounce;
- return set_attribute_int(f, type, derivatives, val);
- }
- else if(name == u_path_transparent_depth) {
- /* Transparent Ray Depth */
- PathState *state = sd->osl_path_state;
- int f = state->transparent_bounce;
- return set_attribute_int(f, type, derivatives, val);
- }
- else if(name == u_path_transmission_depth) {
- /* Transmission Ray Depth */
- PathState *state = sd->osl_path_state;
- int f = state->transmission_bounce;
- return set_attribute_int(f, type, derivatives, val);
- }
- else if(name == u_ndc) {
- /* NDC coordinates with special exception for otho */
- OSLThreadData *tdata = kg->osl_tdata;
- OSL::ShaderGlobals *globals = &tdata->globals;
- float3 ndc[3];
-
- if((globals->raytype & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) {
- ndc[0] = camera_world_to_ndc(kg, sd, sd->ray_P);
-
- if(derivatives) {
- ndc[1] = camera_world_to_ndc(kg, sd, sd->ray_P + sd->ray_dP.dx) - ndc[0];
- ndc[2] = camera_world_to_ndc(kg, sd, sd->ray_P + sd->ray_dP.dy) - ndc[0];
- }
- }
- else {
- ndc[0] = camera_world_to_ndc(kg, sd, sd->P);
-
- if(derivatives) {
- ndc[1] = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dx) - ndc[0];
- ndc[2] = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dy) - ndc[0];
- }
- }
-
- return set_attribute_float3(ndc, type, derivatives, val);
- }
- else
- return false;
+ if (name == u_path_ray_length) {
+ /* Ray Length */
+ float f = sd->ray_length;
+ return set_attribute_float(f, type, derivatives, val);
+ }
+ else if (name == u_path_ray_depth) {
+ /* Ray Depth */
+ PathState *state = sd->osl_path_state;
+ int f = state->bounce;
+ return set_attribute_int(f, type, derivatives, val);
+ }
+ else if (name == u_path_diffuse_depth) {
+ /* Diffuse Ray Depth */
+ PathState *state = sd->osl_path_state;
+ int f = state->diffuse_bounce;
+ return set_attribute_int(f, type, derivatives, val);
+ }
+ else if (name == u_path_glossy_depth) {
+ /* Glossy Ray Depth */
+ PathState *state = sd->osl_path_state;
+ int f = state->glossy_bounce;
+ return set_attribute_int(f, type, derivatives, val);
+ }
+ else if (name == u_path_transmission_depth) {
+ /* Transmission Ray Depth */
+ PathState *state = sd->osl_path_state;
+ int f = state->transmission_bounce;
+ return set_attribute_int(f, type, derivatives, val);
+ }
+ else if (name == u_path_transparent_depth) {
+ /* Transparent Ray Depth */
+ PathState *state = sd->osl_path_state;
+ int f = state->transparent_bounce;
+ return set_attribute_int(f, type, derivatives, val);
+ }
+ else if (name == u_path_transmission_depth) {
+ /* Transmission Ray Depth */
+ PathState *state = sd->osl_path_state;
+ int f = state->transmission_bounce;
+ return set_attribute_int(f, type, derivatives, val);
+ }
+ else if (name == u_ndc) {
+ /* NDC coordinates with special exception for otho */
+ OSLThreadData *tdata = kg->osl_tdata;
+ OSL::ShaderGlobals *globals = &tdata->globals;
+ float3 ndc[3];
+
+ if ((globals->raytype & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE &&
+ kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) {
+ ndc[0] = camera_world_to_ndc(kg, sd, sd->ray_P);
+
+ if (derivatives) {
+ ndc[1] = camera_world_to_ndc(kg, sd, sd->ray_P + sd->ray_dP.dx) - ndc[0];
+ ndc[2] = camera_world_to_ndc(kg, sd, sd->ray_P + sd->ray_dP.dy) - ndc[0];
+ }
+ }
+ else {
+ ndc[0] = camera_world_to_ndc(kg, sd, sd->P);
+
+ if (derivatives) {
+ ndc[1] = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dx) - ndc[0];
+ ndc[2] = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dy) - ndc[0];
+ }
+ }
+
+ return set_attribute_float3(ndc, type, derivatives, val);
+ }
+ else
+ return false;
}
-bool OSLRenderServices::get_attribute(OSL::ShaderGlobals *sg, bool derivatives, ustring object_name,
- TypeDesc type, ustring name, void *val)
+bool OSLRenderServices::get_attribute(OSL::ShaderGlobals *sg,
+ bool derivatives,
+ ustring object_name,
+ TypeDesc type,
+ ustring name,
+ void *val)
{
- if(sg == NULL || sg->renderstate == NULL)
- return false;
+ if (sg == NULL || sg->renderstate == NULL)
+ return false;
- ShaderData *sd = (ShaderData *)(sg->renderstate);
- return get_attribute(sd, derivatives, object_name, type, name, val);
+ ShaderData *sd = (ShaderData *)(sg->renderstate);
+ return get_attribute(sd, derivatives, object_name, type, name, val);
}
-bool OSLRenderServices::get_attribute(ShaderData *sd, bool derivatives, ustring object_name,
- TypeDesc type, ustring name, void *val)
+bool OSLRenderServices::get_attribute(
+ ShaderData *sd, bool derivatives, ustring object_name, TypeDesc type, ustring name, void *val)
{
- KernelGlobals *kg = sd->osl_globals;
- int prim_type = 0;
- int object;
-
- /* lookup of attribute on another object */
- if(object_name != u_empty) {
- OSLGlobals::ObjectNameMap::iterator it = kg->osl->object_name_map.find(object_name);
-
- if(it == kg->osl->object_name_map.end())
- return false;
-
- object = it->second;
- }
- else {
- object = sd->object;
- prim_type = attribute_primitive_type(kg, sd);
-
- if(object == OBJECT_NONE)
- return get_background_attribute(kg, sd, name, type, derivatives, val);
- }
-
- /* find attribute on object */
- object = object*ATTR_PRIM_TYPES + prim_type;
- OSLGlobals::AttributeMap& attribute_map = kg->osl->attribute_map[object];
- OSLGlobals::AttributeMap::iterator it = attribute_map.find(name);
-
- if(it != attribute_map.end()) {
- const OSLGlobals::Attribute& attr = it->second;
-
- if(attr.desc.element != ATTR_ELEMENT_OBJECT) {
- /* triangle and vertex attributes */
- if(get_primitive_attribute(kg, sd, attr, type, derivatives, val))
- return true;
- else
- return get_mesh_attribute(kg, sd, attr, type, derivatives, val);
- }
- else {
- /* object attribute */
- get_object_attribute(attr, derivatives, val);
- return true;
- }
- }
- else {
- /* not found in attribute, check standard object info */
- bool is_std_object_attribute = get_object_standard_attribute(kg, sd, name, type, derivatives, val);
-
- if(is_std_object_attribute)
- return true;
-
- return get_background_attribute(kg, sd, name, type, derivatives, val);
- }
-
- return false;
+ KernelGlobals *kg = sd->osl_globals;
+ int prim_type = 0;
+ int object;
+
+ /* lookup of attribute on another object */
+ if (object_name != u_empty) {
+ OSLGlobals::ObjectNameMap::iterator it = kg->osl->object_name_map.find(object_name);
+
+ if (it == kg->osl->object_name_map.end())
+ return false;
+
+ object = it->second;
+ }
+ else {
+ object = sd->object;
+ prim_type = attribute_primitive_type(kg, sd);
+
+ if (object == OBJECT_NONE)
+ return get_background_attribute(kg, sd, name, type, derivatives, val);
+ }
+
+ /* find attribute on object */
+ object = object * ATTR_PRIM_TYPES + prim_type;
+ OSLGlobals::AttributeMap &attribute_map = kg->osl->attribute_map[object];
+ OSLGlobals::AttributeMap::iterator it = attribute_map.find(name);
+
+ if (it != attribute_map.end()) {
+ const OSLGlobals::Attribute &attr = it->second;
+
+ if (attr.desc.element != ATTR_ELEMENT_OBJECT) {
+ /* triangle and vertex attributes */
+ if (get_primitive_attribute(kg, sd, attr, type, derivatives, val))
+ return true;
+ else
+ return get_mesh_attribute(kg, sd, attr, type, derivatives, val);
+ }
+ else {
+ /* object attribute */
+ get_object_attribute(attr, derivatives, val);
+ return true;
+ }
+ }
+ else {
+ /* not found in attribute, check standard object info */
+ bool is_std_object_attribute = get_object_standard_attribute(
+ kg, sd, name, type, derivatives, val);
+
+ if (is_std_object_attribute)
+ return true;
+
+ return get_background_attribute(kg, sd, name, type, derivatives, val);
+ }
+
+ return false;
}
-bool OSLRenderServices::get_userdata(bool derivatives, ustring name, TypeDesc type,
- OSL::ShaderGlobals *sg, void *val)
+bool OSLRenderServices::get_userdata(
+ bool derivatives, ustring name, TypeDesc type, OSL::ShaderGlobals *sg, void *val)
{
- return false; /* disabled by lockgeom */
+ return false; /* disabled by lockgeom */
}
TextureSystem::TextureHandle *OSLRenderServices::get_texture_handle(ustring filename)
{
- if(filename.length() && filename[0] == '@') {
- /* Dummy, we don't use texture handles for builtin textures but need
- * to tell the OSL runtime optimizer that this is a valid texture. */
- return NULL;
- }
- else {
- return texturesys()->get_texture_handle(filename);
- }
+ if (filename.length() && filename[0] == '@') {
+ /* Dummy, we don't use texture handles for builtin textures but need
+ * to tell the OSL runtime optimizer that this is a valid texture. */
+ return NULL;
+ }
+ else {
+ return texturesys()->get_texture_handle(filename);
+ }
}
bool OSLRenderServices::good(TextureSystem::TextureHandle *texture_handle)
{
- return texturesys()->good(texture_handle);
+ return texturesys()->good(texture_handle);
}
bool OSLRenderServices::texture(ustring filename,
@@ -945,157 +976,169 @@ bool OSLRenderServices::texture(ustring filename,
TexturePerthread *texture_thread_info,
TextureOpt &options,
OSL::ShaderGlobals *sg,
- float s, float t,
- float dsdx, float dtdx, float dsdy, float dtdy,
+ float s,
+ float t,
+ float dsdx,
+ float dtdx,
+ float dsdy,
+ float dtdy,
int nchannels,
float *result,
float *dresultds,
float *dresultdt,
ustring *errormessage)
{
- OSL::TextureSystem *ts = osl_ts;
- ShaderData *sd = (ShaderData *)(sg->renderstate);
- KernelGlobals *kg = sd->osl_globals;
+ OSL::TextureSystem *ts = osl_ts;
+ ShaderData *sd = (ShaderData *)(sg->renderstate);
+ KernelGlobals *kg = sd->osl_globals;
- if(texture_thread_info == NULL) {
- OSLThreadData *tdata = kg->osl_tdata;
- texture_thread_info = tdata->oiio_thread_info;
- }
+ if (texture_thread_info == NULL) {
+ OSLThreadData *tdata = kg->osl_tdata;
+ texture_thread_info = tdata->oiio_thread_info;
+ }
#ifdef WITH_PTEX
- /* todo: this is just a quick hack, only works with particular files and options */
- if(string_endswith(filename.string(), ".ptx")) {
- float2 uv;
- int faceid;
+ /* todo: this is just a quick hack, only works with particular files and options */
+ if (string_endswith(filename.string(), ".ptx")) {
+ float2 uv;
+ int faceid;
- if(!primitive_ptex(kg, sd, &uv, &faceid))
- return false;
+ if (!primitive_ptex(kg, sd, &uv, &faceid))
+ return false;
- float u = uv.x;
- float v = uv.y;
- float dudx = 0.0f;
- float dvdx = 0.0f;
- float dudy = 0.0f;
- float dvdy = 0.0f;
+ float u = uv.x;
+ float v = uv.y;
+ float dudx = 0.0f;
+ float dvdx = 0.0f;
+ float dudy = 0.0f;
+ float dvdy = 0.0f;
- Ptex::String error;
- PtexPtr<PtexTexture> r(ptex_cache->get(filename.c_str(), error));
+ Ptex::String error;
+ PtexPtr<PtexTexture> r(ptex_cache->get(filename.c_str(), error));
- if(!r) {
- //std::cerr << error.c_str() << std::endl;
- return false;
- }
+ if (!r) {
+ //std::cerr << error.c_str() << std::endl;
+ return false;
+ }
- bool mipmaplerp = false;
- float sharpness = 1.0f;
- PtexFilter::Options opts(PtexFilter::f_bicubic, mipmaplerp, sharpness);
- PtexPtr<PtexFilter> f(PtexFilter::getFilter(r, opts));
+ bool mipmaplerp = false;
+ float sharpness = 1.0f;
+ PtexFilter::Options opts(PtexFilter::f_bicubic, mipmaplerp, sharpness);
+ PtexPtr<PtexFilter> f(PtexFilter::getFilter(r, opts));
- f->eval(result, options.firstchannel, nchannels, faceid, u, v, dudx, dvdx, dudy, dvdy);
+ f->eval(result, options.firstchannel, nchannels, faceid, u, v, dudx, dvdx, dudy, dvdy);
- for(int c = r->numChannels(); c < nchannels; c++)
- result[c] = result[0];
+ for (int c = r->numChannels(); c < nchannels; c++)
+ result[c] = result[0];
- return true;
- }
+ return true;
+ }
#endif
- bool status = false;
-
- if(filename.length() && filename[0] == '@') {
- if(filename == u_at_bevel) {
- /* Bevel shader hack. */
- if(nchannels >= 3) {
- PathState *state = sd->osl_path_state;
- int num_samples = (int)s;
- float radius = t;
- float3 N = svm_bevel(kg, sd, state, radius, num_samples);
- result[0] = N.x;
- result[1] = N.y;
- result[2] = N.z;
- status = true;
- }
- }
- else if(filename == u_at_ao) {
- /* AO shader hack. */
- PathState *state = sd->osl_path_state;
- int num_samples = (int)s;
- float radius = t;
- float3 N = make_float3(dsdx, dtdx, dsdy);
- int flags = 0;
- if((int)dtdy) {
- flags |= NODE_AO_INSIDE;
- }
- if((int)options.sblur) {
- flags |= NODE_AO_ONLY_LOCAL;
- }
- if((int)options.tblur) {
- flags |= NODE_AO_GLOBAL_RADIUS;
- }
- result[0] = svm_ao(kg, sd, N, state, radius, num_samples, flags);
- status = true;
- }
- else if(filename[1] == 'l') {
- /* IES light. */
- int slot = atoi(filename.c_str() + 2);
- result[0] = kernel_ies_interp(kg, slot, s, t);
- status = true;
- }
- else {
- /* Packed texture. */
- int slot = atoi(filename.c_str() + 2);
- float4 rgba = kernel_tex_image_interp(kg, slot, s, 1.0f - t);
-
- result[0] = rgba[0];
- if(nchannels > 1)
- result[1] = rgba[1];
- if(nchannels > 2)
- result[2] = rgba[2];
- if(nchannels > 3)
- result[3] = rgba[3];
- status = true;
- }
- }
- else {
- if(texture_handle != NULL) {
- status = ts->texture(texture_handle,
- texture_thread_info,
- options,
- s, t,
- dsdx, dtdx,
- dsdy, dtdy,
- nchannels,
- result,
- dresultds, dresultdt);
- }
- else {
- status = ts->texture(filename,
- options,
- s, t,
- dsdx, dtdx,
- dsdy, dtdy,
- nchannels,
- result,
- dresultds, dresultdt);
- }
- }
-
- if(!status) {
- if(nchannels == 3 || nchannels == 4) {
- result[0] = 1.0f;
- result[1] = 0.0f;
- result[2] = 1.0f;
-
- if(nchannels == 4)
- result[3] = 1.0f;
- }
- /* This might be slow, but prevents error messages leak and
- * other nasty stuff happening.
- */
- string err = ts->geterror();
- (void) err;
- }
-
- return status;
+ bool status = false;
+
+ if (filename.length() && filename[0] == '@') {
+ if (filename == u_at_bevel) {
+ /* Bevel shader hack. */
+ if (nchannels >= 3) {
+ PathState *state = sd->osl_path_state;
+ int num_samples = (int)s;
+ float radius = t;
+ float3 N = svm_bevel(kg, sd, state, radius, num_samples);
+ result[0] = N.x;
+ result[1] = N.y;
+ result[2] = N.z;
+ status = true;
+ }
+ }
+ else if (filename == u_at_ao) {
+ /* AO shader hack. */
+ PathState *state = sd->osl_path_state;
+ int num_samples = (int)s;
+ float radius = t;
+ float3 N = make_float3(dsdx, dtdx, dsdy);
+ int flags = 0;
+ if ((int)dtdy) {
+ flags |= NODE_AO_INSIDE;
+ }
+ if ((int)options.sblur) {
+ flags |= NODE_AO_ONLY_LOCAL;
+ }
+ if ((int)options.tblur) {
+ flags |= NODE_AO_GLOBAL_RADIUS;
+ }
+ result[0] = svm_ao(kg, sd, N, state, radius, num_samples, flags);
+ status = true;
+ }
+ else if (filename[1] == 'l') {
+ /* IES light. */
+ int slot = atoi(filename.c_str() + 2);
+ result[0] = kernel_ies_interp(kg, slot, s, t);
+ status = true;
+ }
+ else {
+ /* Packed texture. */
+ int slot = atoi(filename.c_str() + 2);
+ float4 rgba = kernel_tex_image_interp(kg, slot, s, 1.0f - t);
+
+ result[0] = rgba[0];
+ if (nchannels > 1)
+ result[1] = rgba[1];
+ if (nchannels > 2)
+ result[2] = rgba[2];
+ if (nchannels > 3)
+ result[3] = rgba[3];
+ status = true;
+ }
+ }
+ else {
+ if (texture_handle != NULL) {
+ status = ts->texture(texture_handle,
+ texture_thread_info,
+ options,
+ s,
+ t,
+ dsdx,
+ dtdx,
+ dsdy,
+ dtdy,
+ nchannels,
+ result,
+ dresultds,
+ dresultdt);
+ }
+ else {
+ status = ts->texture(filename,
+ options,
+ s,
+ t,
+ dsdx,
+ dtdx,
+ dsdy,
+ dtdy,
+ nchannels,
+ result,
+ dresultds,
+ dresultdt);
+ }
+ }
+
+ if (!status) {
+ if (nchannels == 3 || nchannels == 4) {
+ result[0] = 1.0f;
+ result[1] = 0.0f;
+ result[2] = 1.0f;
+
+ if (nchannels == 4)
+ result[3] = 1.0f;
+ }
+ /* This might be slow, but prevents error messages leak and
+ * other nasty stuff happening.
+ */
+ string err = ts->geterror();
+ (void)err;
+ }
+
+ return status;
}
bool OSLRenderServices::texture3d(ustring filename,
@@ -1114,68 +1157,76 @@ bool OSLRenderServices::texture3d(ustring filename,
float *dresultdr,
ustring *errormessage)
{
- OSL::TextureSystem *ts = osl_ts;
- ShaderData *sd = (ShaderData *)(sg->renderstate);
- KernelGlobals *kg = sd->osl_globals;
-
- if(texture_thread_info == NULL) {
- OSLThreadData *tdata = kg->osl_tdata;
- texture_thread_info = tdata->oiio_thread_info;
- }
-
- bool status;
- if(filename.length() && filename[0] == '@') {
- int slot = atoi(filename.c_str() + 1);
- float4 rgba = kernel_tex_image_interp_3d(kg, slot, P.x, P.y, P.z, INTERPOLATION_NONE);
-
- result[0] = rgba[0];
- if(nchannels > 1)
- result[1] = rgba[1];
- if(nchannels > 2)
- result[2] = rgba[2];
- if(nchannels > 3)
- result[3] = rgba[3];
- status = true;
- }
- else {
- if(texture_handle != NULL) {
- status = ts->texture3d(texture_handle,
- texture_thread_info,
- options,
- P,
- dPdx, dPdy, dPdz,
- nchannels,
- result,
- dresultds, dresultdt, dresultdr);
- }
- else {
- status = ts->texture3d(filename,
- options,
- P,
- dPdx, dPdy, dPdz,
- nchannels,
- result,
- dresultds, dresultdt, dresultdr);
- }
- }
-
- if(!status) {
- if(nchannels == 3 || nchannels == 4) {
- result[0] = 1.0f;
- result[1] = 0.0f;
- result[2] = 1.0f;
-
- if(nchannels == 4)
- result[3] = 1.0f;
- }
- /* This might be slow, but prevents error messages leak and
- * other nasty stuff happening.
- */
- string err = ts->geterror();
- (void) err;
- }
-
- return status;
+ OSL::TextureSystem *ts = osl_ts;
+ ShaderData *sd = (ShaderData *)(sg->renderstate);
+ KernelGlobals *kg = sd->osl_globals;
+
+ if (texture_thread_info == NULL) {
+ OSLThreadData *tdata = kg->osl_tdata;
+ texture_thread_info = tdata->oiio_thread_info;
+ }
+
+ bool status;
+ if (filename.length() && filename[0] == '@') {
+ int slot = atoi(filename.c_str() + 1);
+ float4 rgba = kernel_tex_image_interp_3d(kg, slot, P.x, P.y, P.z, INTERPOLATION_NONE);
+
+ result[0] = rgba[0];
+ if (nchannels > 1)
+ result[1] = rgba[1];
+ if (nchannels > 2)
+ result[2] = rgba[2];
+ if (nchannels > 3)
+ result[3] = rgba[3];
+ status = true;
+ }
+ else {
+ if (texture_handle != NULL) {
+ status = ts->texture3d(texture_handle,
+ texture_thread_info,
+ options,
+ P,
+ dPdx,
+ dPdy,
+ dPdz,
+ nchannels,
+ result,
+ dresultds,
+ dresultdt,
+ dresultdr);
+ }
+ else {
+ status = ts->texture3d(filename,
+ options,
+ P,
+ dPdx,
+ dPdy,
+ dPdz,
+ nchannels,
+ result,
+ dresultds,
+ dresultdt,
+ dresultdr);
+ }
+ }
+
+ if (!status) {
+ if (nchannels == 3 || nchannels == 4) {
+ result[0] = 1.0f;
+ result[1] = 0.0f;
+ result[2] = 1.0f;
+
+ if (nchannels == 4)
+ result[3] = 1.0f;
+ }
+ /* This might be slow, but prevents error messages leak and
+ * other nasty stuff happening.
+ */
+ string err = ts->geterror();
+ (void)err;
+ }
+
+ return status;
}
bool OSLRenderServices::environment(ustring filename,
@@ -1192,35 +1243,34 @@ bool OSLRenderServices::environment(ustring filename,
float *dresultdt,
ustring *errormessage)
{
- OSL::TextureSystem *ts = osl_ts;
-
- if (thread_info == NULL) {
- ShaderData *sd = (ShaderData *)(sg->renderstate);
- KernelGlobals *kg = sd->osl_globals;
- OSLThreadData *tdata = kg->osl_tdata;
- thread_info = tdata->oiio_thread_info;
- }
-
- if (th == NULL) {
- th = ts->get_texture_handle(filename, thread_info);
- }
-
- bool status = ts->environment(th, thread_info,
- options, R, dRdx, dRdy,
- nchannels, result, dresultds, dresultdt);
-
- if(!status) {
- if(nchannels == 3 || nchannels == 4) {
- result[0] = 1.0f;
- result[1] = 0.0f;
- result[2] = 1.0f;
-
- if(nchannels == 4)
- result[3] = 1.0f;
- }
- }
-
- return status;
+ OSL::TextureSystem *ts = osl_ts;
+
+ if (thread_info == NULL) {
+ ShaderData *sd = (ShaderData *)(sg->renderstate);
+ KernelGlobals *kg = sd->osl_globals;
+ OSLThreadData *tdata = kg->osl_tdata;
+ thread_info = tdata->oiio_thread_info;
+ }
+
+ if (th == NULL) {
+ th = ts->get_texture_handle(filename, thread_info);
+ }
+
+ bool status = ts->environment(
+ th, thread_info, options, R, dRdx, dRdy, nchannels, result, dresultds, dresultdt);
+
+ if (!status) {
+ if (nchannels == 3 || nchannels == 4) {
+ result[0] = 1.0f;
+ result[1] = 0.0f;
+ result[2] = 1.0f;
+
+ if (nchannels == 4)
+ result[3] = 1.0f;
+ }
+ }
+
+ return status;
}
bool OSLRenderServices::get_texture_info(OSL::ShaderGlobals *sg,
@@ -1231,138 +1281,158 @@ bool OSLRenderServices::get_texture_info(OSL::ShaderGlobals *sg,
TypeDesc datatype,
void *data)
{
- OSL::TextureSystem *ts = osl_ts;
- if(filename.length() && filename[0] == '@') {
- /* Special builtin textures. */
- return false;
- }
- else {
- return ts->get_texture_info(filename, subimage, dataname, datatype, data);
- }
+ OSL::TextureSystem *ts = osl_ts;
+ if (filename.length() && filename[0] == '@') {
+ /* Special builtin textures. */
+ return false;
+ }
+ else {
+ return ts->get_texture_info(filename, subimage, dataname, datatype, data);
+ }
}
-int OSLRenderServices::pointcloud_search(OSL::ShaderGlobals *sg, ustring filename, const OSL::Vec3 &center,
- float radius, int max_points, bool sort,
- size_t *out_indices, float *out_distances, int derivs_offset)
+int OSLRenderServices::pointcloud_search(OSL::ShaderGlobals *sg,
+ ustring filename,
+ const OSL::Vec3 &center,
+ float radius,
+ int max_points,
+ bool sort,
+ size_t *out_indices,
+ float *out_distances,
+ int derivs_offset)
{
- return 0;
+ return 0;
}
-int OSLRenderServices::pointcloud_get(OSL::ShaderGlobals *sg, ustring filename, size_t *indices, int count,
- ustring attr_name, TypeDesc attr_type, void *out_data)
+int OSLRenderServices::pointcloud_get(OSL::ShaderGlobals *sg,
+ ustring filename,
+ size_t *indices,
+ int count,
+ ustring attr_name,
+ TypeDesc attr_type,
+ void *out_data)
{
- return 0;
+ return 0;
}
bool OSLRenderServices::pointcloud_write(OSL::ShaderGlobals *sg,
- ustring filename, const OSL::Vec3 &pos,
- int nattribs, const ustring *names,
+ ustring filename,
+ const OSL::Vec3 &pos,
+ int nattribs,
+ const ustring *names,
const TypeDesc *types,
const void **data)
{
- return false;
+ return false;
}
-bool OSLRenderServices::trace(TraceOpt &options, OSL::ShaderGlobals *sg,
- const OSL::Vec3 &P, const OSL::Vec3 &dPdx,
- const OSL::Vec3 &dPdy, const OSL::Vec3 &R,
- const OSL::Vec3 &dRdx, const OSL::Vec3 &dRdy)
+bool OSLRenderServices::trace(TraceOpt &options,
+ OSL::ShaderGlobals *sg,
+ const OSL::Vec3 &P,
+ const OSL::Vec3 &dPdx,
+ const OSL::Vec3 &dPdy,
+ const OSL::Vec3 &R,
+ const OSL::Vec3 &dRdx,
+ const OSL::Vec3 &dRdy)
{
- /* todo: options.shader support, maybe options.traceset */
- ShaderData *sd = (ShaderData *)(sg->renderstate);
-
- /* setup ray */
- Ray ray;
-
- ray.P = TO_FLOAT3(P);
- ray.D = TO_FLOAT3(R);
- ray.t = (options.maxdist == 1.0e30f)? FLT_MAX: options.maxdist - options.mindist;
- ray.time = sd->time;
-
- if(options.mindist == 0.0f) {
- /* avoid self-intersections */
- if(ray.P == sd->P) {
- bool transmit = (dot(sd->Ng, ray.D) < 0.0f);
- ray.P = ray_offset(sd->P, (transmit)? -sd->Ng: sd->Ng);
- }
- }
- else {
- /* offset for minimum distance */
- ray.P += options.mindist*ray.D;
- }
-
- /* ray differentials */
- ray.dP.dx = TO_FLOAT3(dPdx);
- ray.dP.dy = TO_FLOAT3(dPdy);
- ray.dD.dx = TO_FLOAT3(dRdx);
- ray.dD.dy = TO_FLOAT3(dRdy);
-
- /* allocate trace data */
- OSLTraceData *tracedata = (OSLTraceData*)sg->tracedata;
- tracedata->ray = ray;
- tracedata->setup = false;
- tracedata->init = true;
- tracedata->sd.osl_globals = sd->osl_globals;
-
- /* Raytrace, leaving out shadow opaque to avoid early exit. */
- uint visibility = PATH_RAY_ALL_VISIBILITY - PATH_RAY_SHADOW_OPAQUE;
- return scene_intersect(sd->osl_globals, ray, visibility, &tracedata->isect, NULL, 0.0f, 0.0f);
+ /* todo: options.shader support, maybe options.traceset */
+ ShaderData *sd = (ShaderData *)(sg->renderstate);
+
+ /* setup ray */
+ Ray ray;
+
+ ray.P = TO_FLOAT3(P);
+ ray.D = TO_FLOAT3(R);
+ ray.t = (options.maxdist == 1.0e30f) ? FLT_MAX : options.maxdist - options.mindist;
+ ray.time = sd->time;
+
+ if (options.mindist == 0.0f) {
+ /* avoid self-intersections */
+ if (ray.P == sd->P) {
+ bool transmit = (dot(sd->Ng, ray.D) < 0.0f);
+ ray.P = ray_offset(sd->P, (transmit) ? -sd->Ng : sd->Ng);
+ }
+ }
+ else {
+ /* offset for minimum distance */
+ ray.P += options.mindist * ray.D;
+ }
+
+ /* ray differentials */
+ ray.dP.dx = TO_FLOAT3(dPdx);
+ ray.dP.dy = TO_FLOAT3(dPdy);
+ ray.dD.dx = TO_FLOAT3(dRdx);
+ ray.dD.dy = TO_FLOAT3(dRdy);
+
+ /* allocate trace data */
+ OSLTraceData *tracedata = (OSLTraceData *)sg->tracedata;
+ tracedata->ray = ray;
+ tracedata->setup = false;
+ tracedata->init = true;
+ tracedata->sd.osl_globals = sd->osl_globals;
+
+ /* Raytrace, leaving out shadow opaque to avoid early exit. */
+ uint visibility = PATH_RAY_ALL_VISIBILITY - PATH_RAY_SHADOW_OPAQUE;
+ return scene_intersect(sd->osl_globals, ray, visibility, &tracedata->isect, NULL, 0.0f, 0.0f);
}
-
-bool OSLRenderServices::getmessage(OSL::ShaderGlobals *sg, ustring source, ustring name,
- TypeDesc type, void *val, bool derivatives)
+bool OSLRenderServices::getmessage(OSL::ShaderGlobals *sg,
+ ustring source,
+ ustring name,
+ TypeDesc type,
+ void *val,
+ bool derivatives)
{
- OSLTraceData *tracedata = (OSLTraceData*)sg->tracedata;
-
- if(source == u_trace && tracedata->init) {
- if(name == u_hit) {
- return set_attribute_int((tracedata->isect.prim != PRIM_NONE), type, derivatives, val);
- }
- else if(tracedata->isect.prim != PRIM_NONE) {
- if(name == u_hitdist) {
- float f[3] = {tracedata->isect.t, 0.0f, 0.0f};
- return set_attribute_float(f, type, derivatives, val);
- }
- else {
- ShaderData *sd = &tracedata->sd;
- KernelGlobals *kg = sd->osl_globals;
-
- if(!tracedata->setup) {
- /* lazy shader data setup */
- shader_setup_from_ray(kg, sd, &tracedata->isect, &tracedata->ray);
- tracedata->setup = true;
- }
-
- if(name == u_N) {
- return set_attribute_float3(sd->N, type, derivatives, val);
- }
- else if(name == u_Ng) {
- return set_attribute_float3(sd->Ng, type, derivatives, val);
- }
- else if(name == u_P) {
- float3 f[3] = {sd->P, sd->dP.dx, sd->dP.dy};
- return set_attribute_float3(f, type, derivatives, val);
- }
- else if(name == u_I) {
- float3 f[3] = {sd->I, sd->dI.dx, sd->dI.dy};
- return set_attribute_float3(f, type, derivatives, val);
- }
- else if(name == u_u) {
- float f[3] = {sd->u, sd->du.dx, sd->du.dy};
- return set_attribute_float(f, type, derivatives, val);
- }
- else if(name == u_v) {
- float f[3] = {sd->v, sd->dv.dx, sd->dv.dy};
- return set_attribute_float(f, type, derivatives, val);
- }
-
- return get_attribute(sd, derivatives, u_empty, type, name, val);
- }
- }
- }
-
- return false;
+ OSLTraceData *tracedata = (OSLTraceData *)sg->tracedata;
+
+ if (source == u_trace && tracedata->init) {
+ if (name == u_hit) {
+ return set_attribute_int((tracedata->isect.prim != PRIM_NONE), type, derivatives, val);
+ }
+ else if (tracedata->isect.prim != PRIM_NONE) {
+ if (name == u_hitdist) {
+ float f[3] = {tracedata->isect.t, 0.0f, 0.0f};
+ return set_attribute_float(f, type, derivatives, val);
+ }
+ else {
+ ShaderData *sd = &tracedata->sd;
+ KernelGlobals *kg = sd->osl_globals;
+
+ if (!tracedata->setup) {
+ /* lazy shader data setup */
+ shader_setup_from_ray(kg, sd, &tracedata->isect, &tracedata->ray);
+ tracedata->setup = true;
+ }
+
+ if (name == u_N) {
+ return set_attribute_float3(sd->N, type, derivatives, val);
+ }
+ else if (name == u_Ng) {
+ return set_attribute_float3(sd->Ng, type, derivatives, val);
+ }
+ else if (name == u_P) {
+ float3 f[3] = {sd->P, sd->dP.dx, sd->dP.dy};
+ return set_attribute_float3(f, type, derivatives, val);
+ }
+ else if (name == u_I) {
+ float3 f[3] = {sd->I, sd->dI.dx, sd->dI.dy};
+ return set_attribute_float3(f, type, derivatives, val);
+ }
+ else if (name == u_u) {
+ float f[3] = {sd->u, sd->du.dx, sd->du.dy};
+ return set_attribute_float(f, type, derivatives, val);
+ }
+ else if (name == u_v) {
+ float f[3] = {sd->v, sd->dv.dx, sd->dv.dy};
+ return set_attribute_float(f, type, derivatives, val);
+ }
+
+ return get_attribute(sd, derivatives, u_empty, type, name, val);
+ }
+ }
+ }
+
+ return false;
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/osl/osl_services.h b/intern/cycles/kernel/osl/osl_services.h
index 3990a22aefd..2fad5833fc9 100644
--- a/intern/cycles/kernel/osl/osl_services.h
+++ b/intern/cycles/kernel/osl/osl_services.h
@@ -40,177 +40,229 @@ class Shader;
struct ShaderData;
struct float3;
struct KernelGlobals;
-class OSLRenderServices : public OSL::RendererServices
-{
-public:
- OSLRenderServices();
- ~OSLRenderServices();
-
- void thread_init(KernelGlobals *kernel_globals, OSL::TextureSystem *ts);
-
- bool get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform, float time) override;
- bool get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform, float time) override;
-
- bool get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring from, float time) override;
- bool get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring to, float time) override;
-
- bool get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform) override;
- bool get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform) override;
-
- bool get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring from) override;
- bool get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring from) override;
-
- bool get_array_attribute(OSL::ShaderGlobals *sg, bool derivatives,
- ustring object, TypeDesc type, ustring name,
- int index, void *val) override;
- bool get_attribute(OSL::ShaderGlobals *sg, bool derivatives, ustring object,
- TypeDesc type, ustring name, void *val) override;
- bool get_attribute(ShaderData *sd, bool derivatives, ustring object_name,
- TypeDesc type, ustring name, void *val);
-
- bool get_userdata(bool derivatives, ustring name, TypeDesc type,
- OSL::ShaderGlobals *sg, void *val) override;
-
- int pointcloud_search(OSL::ShaderGlobals *sg, ustring filename, const OSL::Vec3 &center,
- float radius, int max_points, bool sort, size_t *out_indices,
- float *out_distances, int derivs_offset) override;
-
- int pointcloud_get(OSL::ShaderGlobals *sg, ustring filename, size_t *indices, int count,
- ustring attr_name, TypeDesc attr_type, void *out_data) override;
-
- bool pointcloud_write(OSL::ShaderGlobals *sg,
- ustring filename, const OSL::Vec3 &pos,
- int nattribs, const ustring *names,
- const TypeDesc *types,
- const void **data) override;
-
- bool trace(TraceOpt &options, OSL::ShaderGlobals *sg,
- const OSL::Vec3 &P, const OSL::Vec3 &dPdx,
- const OSL::Vec3 &dPdy, const OSL::Vec3 &R,
- const OSL::Vec3 &dRdx, const OSL::Vec3 &dRdy) override;
-
- bool getmessage(OSL::ShaderGlobals *sg, ustring source, ustring name,
- TypeDesc type, void *val, bool derivatives) override;
-
- TextureSystem::TextureHandle *get_texture_handle(ustring filename) override;
-
- bool good(TextureSystem::TextureHandle *texture_handle) override;
-
- bool texture(ustring filename,
- TextureSystem::TextureHandle *texture_handle,
- TexturePerthread *texture_thread_info,
- TextureOpt &options,
- OSL::ShaderGlobals *sg,
- float s, float t,
- float dsdx, float dtdx, float dsdy, float dtdy,
- int nchannels,
- float *result,
- float *dresultds,
- float *dresultdt,
- ustring *errormessage) override;
-
- bool texture3d(ustring filename,
- TextureHandle *texture_handle,
- TexturePerthread *texture_thread_info,
- TextureOpt &options,
- OSL::ShaderGlobals *sg,
- const OSL::Vec3 &P,
- const OSL::Vec3 &dPdx,
- const OSL::Vec3 &dPdy,
- const OSL::Vec3 &dPdz,
- int nchannels,
- float *result,
- float *dresultds,
- float *dresultdt,
- float *dresultdr,
- ustring *errormessage) override;
-
- bool environment(ustring filename,
- TextureHandle *texture_handle,
- TexturePerthread *texture_thread_info,
- TextureOpt &options,
- OSL::ShaderGlobals *sg,
- const OSL::Vec3 &R,
- const OSL::Vec3 &dRdx,
- const OSL::Vec3 &dRdy,
- int nchannels,
- float *result,
- float *dresultds,
- float *dresultdt,
- ustring *errormessage) override;
-
- bool get_texture_info(OSL::ShaderGlobals *sg,
- ustring filename,
- TextureHandle *texture_handle,
- int subimage,
- ustring dataname,
- TypeDesc datatype,
- void *data) override;
-
- static bool get_background_attribute(KernelGlobals *kg, ShaderData *sd, ustring name,
- TypeDesc type, bool derivatives, void *val);
- static bool get_object_standard_attribute(KernelGlobals *kg, ShaderData *sd, ustring name,
- TypeDesc type, bool derivatives, void *val);
-
- static ustring u_distance;
- static ustring u_index;
- static ustring u_world;
- static ustring u_camera;
- static ustring u_screen;
- static ustring u_raster;
- static ustring u_ndc;
- static ustring u_object_location;
- static ustring u_object_index;
- static ustring u_geom_dupli_generated;
- static ustring u_geom_dupli_uv;
- static ustring u_material_index;
- static ustring u_object_random;
- static ustring u_particle_index;
- static ustring u_particle_random;
- static ustring u_particle_age;
- static ustring u_particle_lifetime;
- static ustring u_particle_location;
- static ustring u_particle_rotation;
- static ustring u_particle_size;
- static ustring u_particle_velocity;
- static ustring u_particle_angular_velocity;
- static ustring u_geom_numpolyvertices;
- static ustring u_geom_trianglevertices;
- static ustring u_geom_polyvertices;
- static ustring u_geom_name;
- static ustring u_geom_undisplaced;
- static ustring u_is_smooth;
- static ustring u_is_curve;
- static ustring u_curve_thickness;
- static ustring u_curve_tangent_normal;
- static ustring u_curve_random;
- static ustring u_path_ray_length;
- static ustring u_path_ray_depth;
- static ustring u_path_diffuse_depth;
- static ustring u_path_glossy_depth;
- static ustring u_path_transparent_depth;
- static ustring u_path_transmission_depth;
- static ustring u_trace;
- static ustring u_hit;
- static ustring u_hitdist;
- static ustring u_N;
- static ustring u_Ng;
- static ustring u_P;
- static ustring u_I;
- static ustring u_u;
- static ustring u_v;
- static ustring u_empty;
- static ustring u_at_bevel;
- static ustring u_at_ao;
-
-private:
- KernelGlobals *kernel_globals;
- OSL::TextureSystem *osl_ts;
+class OSLRenderServices : public OSL::RendererServices {
+ public:
+ OSLRenderServices();
+ ~OSLRenderServices();
+
+ void thread_init(KernelGlobals *kernel_globals, OSL::TextureSystem *ts);
+
+ bool get_matrix(OSL::ShaderGlobals *sg,
+ OSL::Matrix44 &result,
+ OSL::TransformationPtr xform,
+ float time) override;
+ bool get_inverse_matrix(OSL::ShaderGlobals *sg,
+ OSL::Matrix44 &result,
+ OSL::TransformationPtr xform,
+ float time) override;
+
+ bool get_matrix(OSL::ShaderGlobals *sg,
+ OSL::Matrix44 &result,
+ ustring from,
+ float time) override;
+ bool get_inverse_matrix(OSL::ShaderGlobals *sg,
+ OSL::Matrix44 &result,
+ ustring to,
+ float time) override;
+
+ bool get_matrix(OSL::ShaderGlobals *sg,
+ OSL::Matrix44 &result,
+ OSL::TransformationPtr xform) override;
+ bool get_inverse_matrix(OSL::ShaderGlobals *sg,
+ OSL::Matrix44 &result,
+ OSL::TransformationPtr xform) override;
+
+ bool get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring from) override;
+ bool get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring from) override;
+
+ bool get_array_attribute(OSL::ShaderGlobals *sg,
+ bool derivatives,
+ ustring object,
+ TypeDesc type,
+ ustring name,
+ int index,
+ void *val) override;
+ bool get_attribute(OSL::ShaderGlobals *sg,
+ bool derivatives,
+ ustring object,
+ TypeDesc type,
+ ustring name,
+ void *val) override;
+ bool get_attribute(ShaderData *sd,
+ bool derivatives,
+ ustring object_name,
+ TypeDesc type,
+ ustring name,
+ void *val);
+
+ bool get_userdata(
+ bool derivatives, ustring name, TypeDesc type, OSL::ShaderGlobals *sg, void *val) override;
+
+ int pointcloud_search(OSL::ShaderGlobals *sg,
+ ustring filename,
+ const OSL::Vec3 &center,
+ float radius,
+ int max_points,
+ bool sort,
+ size_t *out_indices,
+ float *out_distances,
+ int derivs_offset) override;
+
+ int pointcloud_get(OSL::ShaderGlobals *sg,
+ ustring filename,
+ size_t *indices,
+ int count,
+ ustring attr_name,
+ TypeDesc attr_type,
+ void *out_data) override;
+
+ bool pointcloud_write(OSL::ShaderGlobals *sg,
+ ustring filename,
+ const OSL::Vec3 &pos,
+ int nattribs,
+ const ustring *names,
+ const TypeDesc *types,
+ const void **data) override;
+
+ bool trace(TraceOpt &options,
+ OSL::ShaderGlobals *sg,
+ const OSL::Vec3 &P,
+ const OSL::Vec3 &dPdx,
+ const OSL::Vec3 &dPdy,
+ const OSL::Vec3 &R,
+ const OSL::Vec3 &dRdx,
+ const OSL::Vec3 &dRdy) override;
+
+ bool getmessage(OSL::ShaderGlobals *sg,
+ ustring source,
+ ustring name,
+ TypeDesc type,
+ void *val,
+ bool derivatives) override;
+
+ TextureSystem::TextureHandle *get_texture_handle(ustring filename) override;
+
+ bool good(TextureSystem::TextureHandle *texture_handle) override;
+
+ bool texture(ustring filename,
+ TextureSystem::TextureHandle *texture_handle,
+ TexturePerthread *texture_thread_info,
+ TextureOpt &options,
+ OSL::ShaderGlobals *sg,
+ float s,
+ float t,
+ float dsdx,
+ float dtdx,
+ float dsdy,
+ float dtdy,
+ int nchannels,
+ float *result,
+ float *dresultds,
+ float *dresultdt,
+ ustring *errormessage) override;
+
+ bool texture3d(ustring filename,
+ TextureHandle *texture_handle,
+ TexturePerthread *texture_thread_info,
+ TextureOpt &options,
+ OSL::ShaderGlobals *sg,
+ const OSL::Vec3 &P,
+ const OSL::Vec3 &dPdx,
+ const OSL::Vec3 &dPdy,
+ const OSL::Vec3 &dPdz,
+ int nchannels,
+ float *result,
+ float *dresultds,
+ float *dresultdt,
+ float *dresultdr,
+ ustring *errormessage) override;
+
+ bool environment(ustring filename,
+ TextureHandle *texture_handle,
+ TexturePerthread *texture_thread_info,
+ TextureOpt &options,
+ OSL::ShaderGlobals *sg,
+ const OSL::Vec3 &R,
+ const OSL::Vec3 &dRdx,
+ const OSL::Vec3 &dRdy,
+ int nchannels,
+ float *result,
+ float *dresultds,
+ float *dresultdt,
+ ustring *errormessage) override;
+
+ bool get_texture_info(OSL::ShaderGlobals *sg,
+ ustring filename,
+ TextureHandle *texture_handle,
+ int subimage,
+ ustring dataname,
+ TypeDesc datatype,
+ void *data) override;
+
+ static bool get_background_attribute(
+ KernelGlobals *kg, ShaderData *sd, ustring name, TypeDesc type, bool derivatives, void *val);
+ static bool get_object_standard_attribute(
+ KernelGlobals *kg, ShaderData *sd, ustring name, TypeDesc type, bool derivatives, void *val);
+
+ static ustring u_distance;
+ static ustring u_index;
+ static ustring u_world;
+ static ustring u_camera;
+ static ustring u_screen;
+ static ustring u_raster;
+ static ustring u_ndc;
+ static ustring u_object_location;
+ static ustring u_object_index;
+ static ustring u_geom_dupli_generated;
+ static ustring u_geom_dupli_uv;
+ static ustring u_material_index;
+ static ustring u_object_random;
+ static ustring u_particle_index;
+ static ustring u_particle_random;
+ static ustring u_particle_age;
+ static ustring u_particle_lifetime;
+ static ustring u_particle_location;
+ static ustring u_particle_rotation;
+ static ustring u_particle_size;
+ static ustring u_particle_velocity;
+ static ustring u_particle_angular_velocity;
+ static ustring u_geom_numpolyvertices;
+ static ustring u_geom_trianglevertices;
+ static ustring u_geom_polyvertices;
+ static ustring u_geom_name;
+ static ustring u_geom_undisplaced;
+ static ustring u_is_smooth;
+ static ustring u_is_curve;
+ static ustring u_curve_thickness;
+ static ustring u_curve_tangent_normal;
+ static ustring u_curve_random;
+ static ustring u_path_ray_length;
+ static ustring u_path_ray_depth;
+ static ustring u_path_diffuse_depth;
+ static ustring u_path_glossy_depth;
+ static ustring u_path_transparent_depth;
+ static ustring u_path_transmission_depth;
+ static ustring u_trace;
+ static ustring u_hit;
+ static ustring u_hitdist;
+ static ustring u_N;
+ static ustring u_Ng;
+ static ustring u_P;
+ static ustring u_I;
+ static ustring u_u;
+ static ustring u_v;
+ static ustring u_empty;
+ static ustring u_at_bevel;
+ static ustring u_at_ao;
+
+ private:
+ KernelGlobals *kernel_globals;
+ OSL::TextureSystem *osl_ts;
#ifdef WITH_PTEX
- PtexCache *ptex_cache;
+ PtexCache *ptex_cache;
#endif
};
CCL_NAMESPACE_END
-#endif /* __OSL_SERVICES_H__ */
+#endif /* __OSL_SERVICES_H__ */
diff --git a/intern/cycles/kernel/osl/osl_shader.cpp b/intern/cycles/kernel/osl/osl_shader.cpp
index a89bb3fd1a3..3d9c579c9ff 100644
--- a/intern/cycles/kernel/osl/osl_shader.cpp
+++ b/intern/cycles/kernel/osl/osl_shader.cpp
@@ -33,103 +33,104 @@
#include "render/attribute.h"
-
CCL_NAMESPACE_BEGIN
/* Threads */
-void OSLShader::thread_init(KernelGlobals *kg, KernelGlobals *kernel_globals, OSLGlobals *osl_globals)
+void OSLShader::thread_init(KernelGlobals *kg,
+ KernelGlobals *kernel_globals,
+ OSLGlobals *osl_globals)
{
- /* no osl used? */
- if(!osl_globals->use) {
- kg->osl = NULL;
- return;
- }
+ /* no osl used? */
+ if (!osl_globals->use) {
+ kg->osl = NULL;
+ return;
+ }
- /* per thread kernel data init*/
- kg->osl = osl_globals;
- kg->osl->services->thread_init(kernel_globals, osl_globals->ts);
+ /* per thread kernel data init*/
+ kg->osl = osl_globals;
+ kg->osl->services->thread_init(kernel_globals, osl_globals->ts);
- OSL::ShadingSystem *ss = kg->osl->ss;
- OSLThreadData *tdata = new OSLThreadData();
+ OSL::ShadingSystem *ss = kg->osl->ss;
+ OSLThreadData *tdata = new OSLThreadData();
- memset((void *)&tdata->globals, 0, sizeof(OSL::ShaderGlobals));
- tdata->globals.tracedata = &tdata->tracedata;
- tdata->globals.flipHandedness = false;
- tdata->osl_thread_info = ss->create_thread_info();
- tdata->context = ss->get_context(tdata->osl_thread_info);
+ memset((void *)&tdata->globals, 0, sizeof(OSL::ShaderGlobals));
+ tdata->globals.tracedata = &tdata->tracedata;
+ tdata->globals.flipHandedness = false;
+ tdata->osl_thread_info = ss->create_thread_info();
+ tdata->context = ss->get_context(tdata->osl_thread_info);
- tdata->oiio_thread_info = osl_globals->ts->get_perthread_info();
+ tdata->oiio_thread_info = osl_globals->ts->get_perthread_info();
- kg->osl_ss = (OSLShadingSystem*)ss;
- kg->osl_tdata = tdata;
+ kg->osl_ss = (OSLShadingSystem *)ss;
+ kg->osl_tdata = tdata;
}
void OSLShader::thread_free(KernelGlobals *kg)
{
- if(!kg->osl)
- return;
+ if (!kg->osl)
+ return;
- OSL::ShadingSystem *ss = (OSL::ShadingSystem*)kg->osl_ss;
- OSLThreadData *tdata = kg->osl_tdata;
- ss->release_context(tdata->context);
+ OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss;
+ OSLThreadData *tdata = kg->osl_tdata;
+ ss->release_context(tdata->context);
- ss->destroy_thread_info(tdata->osl_thread_info);
+ ss->destroy_thread_info(tdata->osl_thread_info);
- delete tdata;
+ delete tdata;
- kg->osl = NULL;
- kg->osl_ss = NULL;
- kg->osl_tdata = NULL;
+ kg->osl = NULL;
+ kg->osl_ss = NULL;
+ kg->osl_tdata = NULL;
}
/* Globals */
-static void shaderdata_to_shaderglobals(KernelGlobals *kg, ShaderData *sd, PathState *state,
- int path_flag, OSLThreadData *tdata)
+static void shaderdata_to_shaderglobals(
+ KernelGlobals *kg, ShaderData *sd, PathState *state, int path_flag, OSLThreadData *tdata)
{
- OSL::ShaderGlobals *globals = &tdata->globals;
-
- /* copy from shader data to shader globals */
- globals->P = TO_VEC3(sd->P);
- globals->dPdx = TO_VEC3(sd->dP.dx);
- globals->dPdy = TO_VEC3(sd->dP.dy);
- globals->I = TO_VEC3(sd->I);
- globals->dIdx = TO_VEC3(sd->dI.dx);
- globals->dIdy = TO_VEC3(sd->dI.dy);
- globals->N = TO_VEC3(sd->N);
- globals->Ng = TO_VEC3(sd->Ng);
- globals->u = sd->u;
- globals->dudx = sd->du.dx;
- globals->dudy = sd->du.dy;
- globals->v = sd->v;
- globals->dvdx = sd->dv.dx;
- globals->dvdy = sd->dv.dy;
- globals->dPdu = TO_VEC3(sd->dPdu);
- globals->dPdv = TO_VEC3(sd->dPdv);
- globals->surfacearea = (sd->object == OBJECT_NONE) ? 1.0f : object_surface_area(kg, sd->object);
- globals->time = sd->time;
-
- /* booleans */
- globals->raytype = path_flag;
- globals->backfacing = (sd->flag & SD_BACKFACING);
-
- /* shader data to be used in services callbacks */
- globals->renderstate = sd;
-
- /* hacky, we leave it to services to fetch actual object matrix */
- globals->shader2common = sd;
- globals->object2common = sd;
-
- /* must be set to NULL before execute */
- globals->Ci = NULL;
-
- /* clear trace data */
- tdata->tracedata.init = false;
-
- /* used by renderservices */
- sd->osl_globals = kg;
- sd->osl_path_state = state;
+ OSL::ShaderGlobals *globals = &tdata->globals;
+
+ /* copy from shader data to shader globals */
+ globals->P = TO_VEC3(sd->P);
+ globals->dPdx = TO_VEC3(sd->dP.dx);
+ globals->dPdy = TO_VEC3(sd->dP.dy);
+ globals->I = TO_VEC3(sd->I);
+ globals->dIdx = TO_VEC3(sd->dI.dx);
+ globals->dIdy = TO_VEC3(sd->dI.dy);
+ globals->N = TO_VEC3(sd->N);
+ globals->Ng = TO_VEC3(sd->Ng);
+ globals->u = sd->u;
+ globals->dudx = sd->du.dx;
+ globals->dudy = sd->du.dy;
+ globals->v = sd->v;
+ globals->dvdx = sd->dv.dx;
+ globals->dvdy = sd->dv.dy;
+ globals->dPdu = TO_VEC3(sd->dPdu);
+ globals->dPdv = TO_VEC3(sd->dPdv);
+ globals->surfacearea = (sd->object == OBJECT_NONE) ? 1.0f : object_surface_area(kg, sd->object);
+ globals->time = sd->time;
+
+ /* booleans */
+ globals->raytype = path_flag;
+ globals->backfacing = (sd->flag & SD_BACKFACING);
+
+ /* shader data to be used in services callbacks */
+ globals->renderstate = sd;
+
+ /* hacky, we leave it to services to fetch actual object matrix */
+ globals->shader2common = sd;
+ globals->object2common = sd;
+
+ /* must be set to NULL before execute */
+ globals->Ci = NULL;
+
+ /* clear trace data */
+ tdata->tracedata.init = false;
+
+ /* used by renderservices */
+ sd->osl_globals = kg;
+ sd->osl_path_state = state;
}
/* Surface */
@@ -139,97 +140,101 @@ static void flatten_surface_closure_tree(ShaderData *sd,
const OSL::ClosureColor *closure,
float3 weight = make_float3(1.0f, 1.0f, 1.0f))
{
- /* OSL gives us a closure tree, we flatten it into arrays per
- * closure type, for evaluation, sampling, etc later on. */
-
- switch(closure->id) {
- case OSL::ClosureColor::MUL: {
- OSL::ClosureMul *mul = (OSL::ClosureMul *)closure;
- flatten_surface_closure_tree(sd, path_flag, mul->closure, TO_FLOAT3(mul->weight) * weight);
- break;
- }
- case OSL::ClosureColor::ADD: {
- OSL::ClosureAdd *add = (OSL::ClosureAdd *)closure;
- flatten_surface_closure_tree(sd, path_flag, add->closureA, weight);
- flatten_surface_closure_tree(sd, path_flag, add->closureB, weight);
- break;
- }
- default: {
- OSL::ClosureComponent *comp = (OSL::ClosureComponent *)closure;
- CClosurePrimitive *prim = (CClosurePrimitive *)comp->data();
-
- if(prim) {
+ /* OSL gives us a closure tree, we flatten it into arrays per
+ * closure type, for evaluation, sampling, etc later on. */
+
+ switch (closure->id) {
+ case OSL::ClosureColor::MUL: {
+ OSL::ClosureMul *mul = (OSL::ClosureMul *)closure;
+ flatten_surface_closure_tree(sd, path_flag, mul->closure, TO_FLOAT3(mul->weight) * weight);
+ break;
+ }
+ case OSL::ClosureColor::ADD: {
+ OSL::ClosureAdd *add = (OSL::ClosureAdd *)closure;
+ flatten_surface_closure_tree(sd, path_flag, add->closureA, weight);
+ flatten_surface_closure_tree(sd, path_flag, add->closureB, weight);
+ break;
+ }
+ default: {
+ OSL::ClosureComponent *comp = (OSL::ClosureComponent *)closure;
+ CClosurePrimitive *prim = (CClosurePrimitive *)comp->data();
+
+ if (prim) {
#ifdef OSL_SUPPORTS_WEIGHTED_CLOSURE_COMPONENTS
- weight = weight*TO_FLOAT3(comp->w);
+ weight = weight * TO_FLOAT3(comp->w);
#endif
- prim->setup(sd, path_flag, weight);
- }
- break;
- }
- }
+ prim->setup(sd, path_flag, weight);
+ }
+ break;
+ }
+ }
}
void OSLShader::eval_surface(KernelGlobals *kg, ShaderData *sd, PathState *state, int path_flag)
{
- /* setup shader globals from shader data */
- OSLThreadData *tdata = kg->osl_tdata;
- shaderdata_to_shaderglobals(kg, sd, state, path_flag, tdata);
-
- /* execute shader for this point */
- OSL::ShadingSystem *ss = (OSL::ShadingSystem*)kg->osl_ss;
- OSL::ShaderGlobals *globals = &tdata->globals;
- OSL::ShadingContext *octx = tdata->context;
- int shader = sd->shader & SHADER_MASK;
-
- /* automatic bump shader */
- if(kg->osl->bump_state[shader]) {
- /* save state */
- float3 P = sd->P;
- float3 dPdx = sd->dP.dx;
- float3 dPdy = sd->dP.dy;
-
- /* set state as if undisplaced */
- if(sd->flag & SD_HAS_DISPLACEMENT) {
- float data[9];
- bool found = kg->osl->services->get_attribute(sd, true, OSLRenderServices::u_empty, TypeDesc::TypeVector,
- OSLRenderServices::u_geom_undisplaced, data);
- (void) found;
- assert(found);
-
- memcpy(&sd->P, data, sizeof(float)*3);
- memcpy(&sd->dP.dx, data+3, sizeof(float)*3);
- memcpy(&sd->dP.dy, data+6, sizeof(float)*3);
-
- object_position_transform(kg, sd, &sd->P);
- object_dir_transform(kg, sd, &sd->dP.dx);
- object_dir_transform(kg, sd, &sd->dP.dy);
-
- globals->P = TO_VEC3(sd->P);
- globals->dPdx = TO_VEC3(sd->dP.dx);
- globals->dPdy = TO_VEC3(sd->dP.dy);
- }
-
- /* execute bump shader */
- ss->execute(octx, *(kg->osl->bump_state[shader]), *globals);
-
- /* reset state */
- sd->P = P;
- sd->dP.dx = dPdx;
- sd->dP.dy = dPdy;
-
- globals->P = TO_VEC3(P);
- globals->dPdx = TO_VEC3(dPdx);
- globals->dPdy = TO_VEC3(dPdy);
- }
-
- /* surface shader */
- if(kg->osl->surface_state[shader]) {
- ss->execute(octx, *(kg->osl->surface_state[shader]), *globals);
- }
-
- /* flatten closure tree */
- if(globals->Ci)
- flatten_surface_closure_tree(sd, path_flag, globals->Ci);
+ /* setup shader globals from shader data */
+ OSLThreadData *tdata = kg->osl_tdata;
+ shaderdata_to_shaderglobals(kg, sd, state, path_flag, tdata);
+
+ /* execute shader for this point */
+ OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss;
+ OSL::ShaderGlobals *globals = &tdata->globals;
+ OSL::ShadingContext *octx = tdata->context;
+ int shader = sd->shader & SHADER_MASK;
+
+ /* automatic bump shader */
+ if (kg->osl->bump_state[shader]) {
+ /* save state */
+ float3 P = sd->P;
+ float3 dPdx = sd->dP.dx;
+ float3 dPdy = sd->dP.dy;
+
+ /* set state as if undisplaced */
+ if (sd->flag & SD_HAS_DISPLACEMENT) {
+ float data[9];
+ bool found = kg->osl->services->get_attribute(sd,
+ true,
+ OSLRenderServices::u_empty,
+ TypeDesc::TypeVector,
+ OSLRenderServices::u_geom_undisplaced,
+ data);
+ (void)found;
+ assert(found);
+
+ memcpy(&sd->P, data, sizeof(float) * 3);
+ memcpy(&sd->dP.dx, data + 3, sizeof(float) * 3);
+ memcpy(&sd->dP.dy, data + 6, sizeof(float) * 3);
+
+ object_position_transform(kg, sd, &sd->P);
+ object_dir_transform(kg, sd, &sd->dP.dx);
+ object_dir_transform(kg, sd, &sd->dP.dy);
+
+ globals->P = TO_VEC3(sd->P);
+ globals->dPdx = TO_VEC3(sd->dP.dx);
+ globals->dPdy = TO_VEC3(sd->dP.dy);
+ }
+
+ /* execute bump shader */
+ ss->execute(octx, *(kg->osl->bump_state[shader]), *globals);
+
+ /* reset state */
+ sd->P = P;
+ sd->dP.dx = dPdx;
+ sd->dP.dy = dPdy;
+
+ globals->P = TO_VEC3(P);
+ globals->dPdx = TO_VEC3(dPdx);
+ globals->dPdy = TO_VEC3(dPdy);
+ }
+
+ /* surface shader */
+ if (kg->osl->surface_state[shader]) {
+ ss->execute(octx, *(kg->osl->surface_state[shader]), *globals);
+ }
+
+ /* flatten closure tree */
+ if (globals->Ci)
+ flatten_surface_closure_tree(sd, path_flag, globals->Ci);
}
/* Background */
@@ -238,56 +243,56 @@ static void flatten_background_closure_tree(ShaderData *sd,
const OSL::ClosureColor *closure,
float3 weight = make_float3(1.0f, 1.0f, 1.0f))
{
- /* OSL gives us a closure tree, if we are shading for background there
- * is only one supported closure type at the moment, which has no evaluation
- * functions, so we just sum the weights */
-
- switch(closure->id) {
- case OSL::ClosureColor::MUL: {
- OSL::ClosureMul *mul = (OSL::ClosureMul *)closure;
- flatten_background_closure_tree(sd, mul->closure, weight * TO_FLOAT3(mul->weight));
- break;
- }
- case OSL::ClosureColor::ADD: {
- OSL::ClosureAdd *add = (OSL::ClosureAdd *)closure;
-
- flatten_background_closure_tree(sd, add->closureA, weight);
- flatten_background_closure_tree(sd, add->closureB, weight);
- break;
- }
- default: {
- OSL::ClosureComponent *comp = (OSL::ClosureComponent *)closure;
- CClosurePrimitive *prim = (CClosurePrimitive *)comp->data();
-
- if(prim) {
+ /* OSL gives us a closure tree, if we are shading for background there
+ * is only one supported closure type at the moment, which has no evaluation
+ * functions, so we just sum the weights */
+
+ switch (closure->id) {
+ case OSL::ClosureColor::MUL: {
+ OSL::ClosureMul *mul = (OSL::ClosureMul *)closure;
+ flatten_background_closure_tree(sd, mul->closure, weight * TO_FLOAT3(mul->weight));
+ break;
+ }
+ case OSL::ClosureColor::ADD: {
+ OSL::ClosureAdd *add = (OSL::ClosureAdd *)closure;
+
+ flatten_background_closure_tree(sd, add->closureA, weight);
+ flatten_background_closure_tree(sd, add->closureB, weight);
+ break;
+ }
+ default: {
+ OSL::ClosureComponent *comp = (OSL::ClosureComponent *)closure;
+ CClosurePrimitive *prim = (CClosurePrimitive *)comp->data();
+
+ if (prim) {
#ifdef OSL_SUPPORTS_WEIGHTED_CLOSURE_COMPONENTS
- weight = weight*TO_FLOAT3(comp->w);
+ weight = weight * TO_FLOAT3(comp->w);
#endif
- prim->setup(sd, 0, weight);
- }
- break;
- }
- }
+ prim->setup(sd, 0, weight);
+ }
+ break;
+ }
+ }
}
void OSLShader::eval_background(KernelGlobals *kg, ShaderData *sd, PathState *state, int path_flag)
{
- /* setup shader globals from shader data */
- OSLThreadData *tdata = kg->osl_tdata;
- shaderdata_to_shaderglobals(kg, sd, state, path_flag, tdata);
-
- /* execute shader for this point */
- OSL::ShadingSystem *ss = (OSL::ShadingSystem*)kg->osl_ss;
- OSL::ShaderGlobals *globals = &tdata->globals;
- OSL::ShadingContext *octx = tdata->context;
-
- if(kg->osl->background_state) {
- ss->execute(octx, *(kg->osl->background_state), *globals);
- }
-
- /* return background color immediately */
- if(globals->Ci)
- flatten_background_closure_tree(sd, globals->Ci);
+ /* setup shader globals from shader data */
+ OSLThreadData *tdata = kg->osl_tdata;
+ shaderdata_to_shaderglobals(kg, sd, state, path_flag, tdata);
+
+ /* execute shader for this point */
+ OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss;
+ OSL::ShaderGlobals *globals = &tdata->globals;
+ OSL::ShadingContext *octx = tdata->context;
+
+ if (kg->osl->background_state) {
+ ss->execute(octx, *(kg->osl->background_state), *globals);
+ }
+
+ /* return background color immediately */
+ if (globals->Ci)
+ flatten_background_closure_tree(sd, globals->Ci);
}
/* Volume */
@@ -296,112 +301,117 @@ static void flatten_volume_closure_tree(ShaderData *sd,
const OSL::ClosureColor *closure,
float3 weight = make_float3(1.0f, 1.0f, 1.0f))
{
- /* OSL gives us a closure tree, we flatten it into arrays per
- * closure type, for evaluation, sampling, etc later on. */
-
- switch(closure->id) {
- case OSL::ClosureColor::MUL: {
- OSL::ClosureMul *mul = (OSL::ClosureMul *)closure;
- flatten_volume_closure_tree(sd, mul->closure, TO_FLOAT3(mul->weight) * weight);
- break;
- }
- case OSL::ClosureColor::ADD: {
- OSL::ClosureAdd *add = (OSL::ClosureAdd *)closure;
- flatten_volume_closure_tree(sd, add->closureA, weight);
- flatten_volume_closure_tree(sd, add->closureB, weight);
- break;
- }
- default: {
- OSL::ClosureComponent *comp = (OSL::ClosureComponent *)closure;
- CClosurePrimitive *prim = (CClosurePrimitive *)comp->data();
-
- if(prim) {
+ /* OSL gives us a closure tree, we flatten it into arrays per
+ * closure type, for evaluation, sampling, etc later on. */
+
+ switch (closure->id) {
+ case OSL::ClosureColor::MUL: {
+ OSL::ClosureMul *mul = (OSL::ClosureMul *)closure;
+ flatten_volume_closure_tree(sd, mul->closure, TO_FLOAT3(mul->weight) * weight);
+ break;
+ }
+ case OSL::ClosureColor::ADD: {
+ OSL::ClosureAdd *add = (OSL::ClosureAdd *)closure;
+ flatten_volume_closure_tree(sd, add->closureA, weight);
+ flatten_volume_closure_tree(sd, add->closureB, weight);
+ break;
+ }
+ default: {
+ OSL::ClosureComponent *comp = (OSL::ClosureComponent *)closure;
+ CClosurePrimitive *prim = (CClosurePrimitive *)comp->data();
+
+ if (prim) {
#ifdef OSL_SUPPORTS_WEIGHTED_CLOSURE_COMPONENTS
- weight = weight*TO_FLOAT3(comp->w);
+ weight = weight * TO_FLOAT3(comp->w);
#endif
- prim->setup(sd, 0, weight);
- }
- }
- }
+ prim->setup(sd, 0, weight);
+ }
+ }
+ }
}
void OSLShader::eval_volume(KernelGlobals *kg, ShaderData *sd, PathState *state, int path_flag)
{
- /* setup shader globals from shader data */
- OSLThreadData *tdata = kg->osl_tdata;
- shaderdata_to_shaderglobals(kg, sd, state, path_flag, tdata);
-
- /* execute shader */
- OSL::ShadingSystem *ss = (OSL::ShadingSystem*)kg->osl_ss;
- OSL::ShaderGlobals *globals = &tdata->globals;
- OSL::ShadingContext *octx = tdata->context;
- int shader = sd->shader & SHADER_MASK;
-
- if(kg->osl->volume_state[shader]) {
- ss->execute(octx, *(kg->osl->volume_state[shader]), *globals);
- }
-
- /* flatten closure tree */
- if(globals->Ci)
- flatten_volume_closure_tree(sd, globals->Ci);
+ /* setup shader globals from shader data */
+ OSLThreadData *tdata = kg->osl_tdata;
+ shaderdata_to_shaderglobals(kg, sd, state, path_flag, tdata);
+
+ /* execute shader */
+ OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss;
+ OSL::ShaderGlobals *globals = &tdata->globals;
+ OSL::ShadingContext *octx = tdata->context;
+ int shader = sd->shader & SHADER_MASK;
+
+ if (kg->osl->volume_state[shader]) {
+ ss->execute(octx, *(kg->osl->volume_state[shader]), *globals);
+ }
+
+ /* flatten closure tree */
+ if (globals->Ci)
+ flatten_volume_closure_tree(sd, globals->Ci);
}
/* Displacement */
void OSLShader::eval_displacement(KernelGlobals *kg, ShaderData *sd, PathState *state)
{
- /* setup shader globals from shader data */
- OSLThreadData *tdata = kg->osl_tdata;
+ /* setup shader globals from shader data */
+ OSLThreadData *tdata = kg->osl_tdata;
- shaderdata_to_shaderglobals(kg, sd, state, 0, tdata);
+ shaderdata_to_shaderglobals(kg, sd, state, 0, tdata);
- /* execute shader */
- OSL::ShadingSystem *ss = (OSL::ShadingSystem*)kg->osl_ss;
- OSL::ShaderGlobals *globals = &tdata->globals;
- OSL::ShadingContext *octx = tdata->context;
- int shader = sd->shader & SHADER_MASK;
+ /* execute shader */
+ OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss;
+ OSL::ShaderGlobals *globals = &tdata->globals;
+ OSL::ShadingContext *octx = tdata->context;
+ int shader = sd->shader & SHADER_MASK;
- if(kg->osl->displacement_state[shader]) {
- ss->execute(octx, *(kg->osl->displacement_state[shader]), *globals);
- }
+ if (kg->osl->displacement_state[shader]) {
+ ss->execute(octx, *(kg->osl->displacement_state[shader]), *globals);
+ }
- /* get back position */
- sd->P = TO_FLOAT3(globals->P);
+ /* get back position */
+ sd->P = TO_FLOAT3(globals->P);
}
/* Attributes */
-int OSLShader::find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id, AttributeDescriptor *desc)
+int OSLShader::find_attribute(KernelGlobals *kg,
+ const ShaderData *sd,
+ uint id,
+ AttributeDescriptor *desc)
{
- /* for OSL, a hash map is used to lookup the attribute by name. */
- int object = sd->object*ATTR_PRIM_TYPES;
+ /* for OSL, a hash map is used to lookup the attribute by name. */
+ int object = sd->object * ATTR_PRIM_TYPES;
#ifdef __HAIR__
- if(sd->type & PRIMITIVE_ALL_CURVE) object += ATTR_PRIM_CURVE;
+ if (sd->type & PRIMITIVE_ALL_CURVE)
+ object += ATTR_PRIM_CURVE;
#endif
- OSLGlobals::AttributeMap &attr_map = kg->osl->attribute_map[object];
- ustring stdname(std::string("geom:") + std::string(Attribute::standard_name((AttributeStandard)id)));
- OSLGlobals::AttributeMap::const_iterator it = attr_map.find(stdname);
-
- if(it != attr_map.end()) {
- const OSLGlobals::Attribute &osl_attr = it->second;
- *desc = osl_attr.desc;
-
- if(sd->prim == PRIM_NONE && (AttributeElement)osl_attr.desc.element != ATTR_ELEMENT_MESH) {
- desc->offset = ATTR_STD_NOT_FOUND;
- return ATTR_STD_NOT_FOUND;
- }
-
- /* return result */
- if(osl_attr.desc.element == ATTR_ELEMENT_NONE) {
- desc->offset = ATTR_STD_NOT_FOUND;
- }
- return desc->offset;
- }
- else {
- desc->offset = ATTR_STD_NOT_FOUND;
- return (int)ATTR_STD_NOT_FOUND;
- }
+ OSLGlobals::AttributeMap &attr_map = kg->osl->attribute_map[object];
+ ustring stdname(std::string("geom:") +
+ std::string(Attribute::standard_name((AttributeStandard)id)));
+ OSLGlobals::AttributeMap::const_iterator it = attr_map.find(stdname);
+
+ if (it != attr_map.end()) {
+ const OSLGlobals::Attribute &osl_attr = it->second;
+ *desc = osl_attr.desc;
+
+ if (sd->prim == PRIM_NONE && (AttributeElement)osl_attr.desc.element != ATTR_ELEMENT_MESH) {
+ desc->offset = ATTR_STD_NOT_FOUND;
+ return ATTR_STD_NOT_FOUND;
+ }
+
+ /* return result */
+ if (osl_attr.desc.element == ATTR_ELEMENT_NONE) {
+ desc->offset = ATTR_STD_NOT_FOUND;
+ }
+ return desc->offset;
+ }
+ else {
+ desc->offset = ATTR_STD_NOT_FOUND;
+ return (int)ATTR_STD_NOT_FOUND;
+ }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/osl/osl_shader.h b/intern/cycles/kernel/osl/osl_shader.h
index 9824f966a44..a4fa24d0a90 100644
--- a/intern/cycles/kernel/osl/osl_shader.h
+++ b/intern/cycles/kernel/osl/osl_shader.h
@@ -29,7 +29,7 @@
* This means no thread state must be passed along in the kernel itself.
*/
-#include "kernel/kernel_types.h"
+# include "kernel/kernel_types.h"
CCL_NAMESPACE_BEGIN
@@ -44,26 +44,31 @@ struct OSLGlobals;
struct OSLShadingSystem;
class OSLShader {
-public:
- /* init */
- static void register_closures(OSLShadingSystem *ss);
+ public:
+ /* init */
+ static void register_closures(OSLShadingSystem *ss);
- /* per thread data */
- static void thread_init(KernelGlobals *kg, KernelGlobals *kernel_globals, OSLGlobals *osl_globals);
- static void thread_free(KernelGlobals *kg);
+ /* per thread data */
+ static void thread_init(KernelGlobals *kg,
+ KernelGlobals *kernel_globals,
+ OSLGlobals *osl_globals);
+ static void thread_free(KernelGlobals *kg);
- /* eval */
- static void eval_surface(KernelGlobals *kg, ShaderData *sd, PathState *state, int path_flag);
- static void eval_background(KernelGlobals *kg, ShaderData *sd, PathState *state, int path_flag);
- static void eval_volume(KernelGlobals *kg, ShaderData *sd, PathState *state, int path_flag);
- static void eval_displacement(KernelGlobals *kg, ShaderData *sd, PathState *state);
+ /* eval */
+ static void eval_surface(KernelGlobals *kg, ShaderData *sd, PathState *state, int path_flag);
+ static void eval_background(KernelGlobals *kg, ShaderData *sd, PathState *state, int path_flag);
+ static void eval_volume(KernelGlobals *kg, ShaderData *sd, PathState *state, int path_flag);
+ static void eval_displacement(KernelGlobals *kg, ShaderData *sd, PathState *state);
- /* attributes */
- static int find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id, AttributeDescriptor *desc);
+ /* attributes */
+ static int find_attribute(KernelGlobals *kg,
+ const ShaderData *sd,
+ uint id,
+ AttributeDescriptor *desc);
};
CCL_NAMESPACE_END
#endif
-#endif /* __OSL_SHADER_H__ */
+#endif /* __OSL_SHADER_H__ */
diff --git a/intern/cycles/kernel/shaders/CMakeLists.txt b/intern/cycles/kernel/shaders/CMakeLists.txt
index 4740db27d4e..b42b9b2fe64 100644
--- a/intern/cycles/kernel/shaders/CMakeLists.txt
+++ b/intern/cycles/kernel/shaders/CMakeLists.txt
@@ -2,102 +2,102 @@
# OSL node shaders
set(SRC_OSL
- node_add_closure.osl
- node_ambient_occlusion.osl
- node_anisotropic_bsdf.osl
- node_attribute.osl
- node_background.osl
- node_bevel.osl
- node_brick_texture.osl
- node_brightness.osl
- node_bump.osl
- node_camera.osl
- node_checker_texture.osl
- node_combine_rgb.osl
- node_combine_hsv.osl
- node_combine_xyz.osl
- node_convert_from_color.osl
- node_convert_from_float.osl
- node_convert_from_int.osl
- node_convert_from_normal.osl
- node_convert_from_point.osl
- node_convert_from_vector.osl
- node_diffuse_bsdf.osl
- node_displacement.osl
- node_vector_displacement.osl
- node_emission.osl
- node_environment_texture.osl
- node_fresnel.osl
- node_gamma.osl
- node_geometry.osl
- node_glass_bsdf.osl
- node_glossy_bsdf.osl
- node_gradient_texture.osl
- node_hair_info.osl
- node_scatter_volume.osl
- node_absorption_volume.osl
- node_principled_volume.osl
- node_holdout.osl
- node_hsv.osl
- node_ies_light.osl
- node_image_texture.osl
- node_invert.osl
- node_layer_weight.osl
- node_light_falloff.osl
- node_light_path.osl
- node_magic_texture.osl
- node_mapping.osl
- node_math.osl
- node_mix.osl
- node_mix_closure.osl
- node_musgrave_texture.osl
- node_noise_texture.osl
- node_normal.osl
- node_normal_map.osl
- node_object_info.osl
- node_output_displacement.osl
- node_output_surface.osl
- node_output_volume.osl
- node_particle_info.osl
- node_refraction_bsdf.osl
- node_rgb_curves.osl
- node_rgb_ramp.osl
- node_separate_rgb.osl
- node_separate_hsv.osl
- node_separate_xyz.osl
- node_set_normal.osl
- node_sky_texture.osl
- node_subsurface_scattering.osl
- node_tangent.osl
- node_texture_coordinate.osl
- node_toon_bsdf.osl
- node_translucent_bsdf.osl
- node_transparent_bsdf.osl
- node_value.osl
- node_vector_curves.osl
- node_vector_math.osl
- node_vector_transform.osl
- node_velvet_bsdf.osl
- node_voronoi_texture.osl
- node_voxel_texture.osl
- node_wavelength.osl
- node_blackbody.osl
- node_wave_texture.osl
- node_wireframe.osl
- node_hair_bsdf.osl
- node_principled_hair_bsdf.osl
- node_uv_map.osl
- node_principled_bsdf.osl
- node_rgb_to_bw.osl
+ node_add_closure.osl
+ node_ambient_occlusion.osl
+ node_anisotropic_bsdf.osl
+ node_attribute.osl
+ node_background.osl
+ node_bevel.osl
+ node_brick_texture.osl
+ node_brightness.osl
+ node_bump.osl
+ node_camera.osl
+ node_checker_texture.osl
+ node_combine_rgb.osl
+ node_combine_hsv.osl
+ node_combine_xyz.osl
+ node_convert_from_color.osl
+ node_convert_from_float.osl
+ node_convert_from_int.osl
+ node_convert_from_normal.osl
+ node_convert_from_point.osl
+ node_convert_from_vector.osl
+ node_diffuse_bsdf.osl
+ node_displacement.osl
+ node_vector_displacement.osl
+ node_emission.osl
+ node_environment_texture.osl
+ node_fresnel.osl
+ node_gamma.osl
+ node_geometry.osl
+ node_glass_bsdf.osl
+ node_glossy_bsdf.osl
+ node_gradient_texture.osl
+ node_hair_info.osl
+ node_scatter_volume.osl
+ node_absorption_volume.osl
+ node_principled_volume.osl
+ node_holdout.osl
+ node_hsv.osl
+ node_ies_light.osl
+ node_image_texture.osl
+ node_invert.osl
+ node_layer_weight.osl
+ node_light_falloff.osl
+ node_light_path.osl
+ node_magic_texture.osl
+ node_mapping.osl
+ node_math.osl
+ node_mix.osl
+ node_mix_closure.osl
+ node_musgrave_texture.osl
+ node_noise_texture.osl
+ node_normal.osl
+ node_normal_map.osl
+ node_object_info.osl
+ node_output_displacement.osl
+ node_output_surface.osl
+ node_output_volume.osl
+ node_particle_info.osl
+ node_refraction_bsdf.osl
+ node_rgb_curves.osl
+ node_rgb_ramp.osl
+ node_separate_rgb.osl
+ node_separate_hsv.osl
+ node_separate_xyz.osl
+ node_set_normal.osl
+ node_sky_texture.osl
+ node_subsurface_scattering.osl
+ node_tangent.osl
+ node_texture_coordinate.osl
+ node_toon_bsdf.osl
+ node_translucent_bsdf.osl
+ node_transparent_bsdf.osl
+ node_value.osl
+ node_vector_curves.osl
+ node_vector_math.osl
+ node_vector_transform.osl
+ node_velvet_bsdf.osl
+ node_voronoi_texture.osl
+ node_voxel_texture.osl
+ node_wavelength.osl
+ node_blackbody.osl
+ node_wave_texture.osl
+ node_wireframe.osl
+ node_hair_bsdf.osl
+ node_principled_hair_bsdf.osl
+ node_uv_map.osl
+ node_principled_bsdf.osl
+ node_rgb_to_bw.osl
)
set(SRC_OSL_HEADERS
- node_color.h
- node_fresnel.h
- node_ramp_util.h
- node_texture.h
- stdosl.h
- oslutil.h
+ node_color.h
+ node_fresnel.h
+ node_ramp_util.h
+ node_texture.h
+ stdosl.h
+ oslutil.h
)
set(SRC_OSO
@@ -106,20 +106,20 @@ set(SRC_OSO
# TODO, add a module to compile OSL
foreach(_file ${SRC_OSL})
- set(_OSL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${_file})
- set_source_files_properties(${_file} PROPERTIES HEADER_FILE_ONLY TRUE)
- string(REPLACE ".osl" ".oso" _OSO_FILE ${_OSL_FILE})
- string(REPLACE ${CMAKE_SOURCE_DIR} ${CMAKE_BINARY_DIR} _OSO_FILE ${_OSO_FILE})
- add_custom_command(
- OUTPUT ${_OSO_FILE}
- COMMAND ${OSL_COMPILER} -q -O2 -I"${CMAKE_CURRENT_SOURCE_DIR}" -o ${_OSO_FILE} ${_OSL_FILE}
- DEPENDS ${_OSL_FILE} ${SRC_OSL_HEADERS} ${OSL_COMPILER})
- list(APPEND SRC_OSO
- ${_OSO_FILE}
- )
+ set(_OSL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${_file})
+ set_source_files_properties(${_file} PROPERTIES HEADER_FILE_ONLY TRUE)
+ string(REPLACE ".osl" ".oso" _OSO_FILE ${_OSL_FILE})
+ string(REPLACE ${CMAKE_SOURCE_DIR} ${CMAKE_BINARY_DIR} _OSO_FILE ${_OSO_FILE})
+ add_custom_command(
+ OUTPUT ${_OSO_FILE}
+ COMMAND ${OSL_COMPILER} -q -O2 -I"${CMAKE_CURRENT_SOURCE_DIR}" -o ${_OSO_FILE} ${_OSL_FILE}
+ DEPENDS ${_OSL_FILE} ${SRC_OSL_HEADERS} ${OSL_COMPILER})
+ list(APPEND SRC_OSO
+ ${_OSO_FILE}
+ )
- unset(_OSL_FILE)
- unset(_OSO_FILE)
+ unset(_OSL_FILE)
+ unset(_OSO_FILE)
endforeach()
add_custom_target(cycles_osl_shaders ALL DEPENDS ${SRC_OSO} ${SRC_OSL_HEADERS} ${OSL_COMPILER} SOURCES ${SRC_OSL})
diff --git a/intern/cycles/kernel/shaders/node_absorption_volume.osl b/intern/cycles/kernel/shaders/node_absorption_volume.osl
index 18f662ebbbd..e99bd254666 100644
--- a/intern/cycles/kernel/shaders/node_absorption_volume.osl
+++ b/intern/cycles/kernel/shaders/node_absorption_volume.osl
@@ -16,11 +16,9 @@
#include "stdosl.h"
-shader node_absorption_volume(
- color Color = color(0.8, 0.8, 0.8),
- float Density = 1.0,
- output closure color Volume = 0)
+shader node_absorption_volume(color Color = color(0.8, 0.8, 0.8),
+ float Density = 1.0,
+ output closure color Volume = 0)
{
- Volume = ((color(1.0, 1.0, 1.0) - Color) * max(Density, 0.0)) * absorption();
+ Volume = ((color(1.0, 1.0, 1.0) - Color) * max(Density, 0.0)) * absorption();
}
-
diff --git a/intern/cycles/kernel/shaders/node_add_closure.osl b/intern/cycles/kernel/shaders/node_add_closure.osl
index b6596e0b6bd..077e2735e61 100644
--- a/intern/cycles/kernel/shaders/node_add_closure.osl
+++ b/intern/cycles/kernel/shaders/node_add_closure.osl
@@ -16,11 +16,9 @@
#include "stdosl.h"
-shader node_add_closure(
- closure color Closure1 = 0,
- closure color Closure2 = 0,
- output closure color Closure = 0)
+shader node_add_closure(closure color Closure1 = 0,
+ closure color Closure2 = 0,
+ output closure color Closure = 0)
{
- Closure = Closure1 + Closure2;
+ Closure = Closure1 + Closure2;
}
-
diff --git a/intern/cycles/kernel/shaders/node_ambient_occlusion.osl b/intern/cycles/kernel/shaders/node_ambient_occlusion.osl
index 825cccd59ce..7bf28719e78 100644
--- a/intern/cycles/kernel/shaders/node_ambient_occlusion.osl
+++ b/intern/cycles/kernel/shaders/node_ambient_occlusion.osl
@@ -16,20 +16,28 @@
#include "stdosl.h"
-shader node_ambient_occlusion(
- color ColorIn = color(1.0, 1.0, 1.0),
- int samples = 16,
- float Distance = 1.0,
- normal Normal = N,
- int inside = 0,
- int only_local = 0,
- output color ColorOut = color(1.0, 1.0, 1.0),
- output float AO = 1.0)
+shader node_ambient_occlusion(color ColorIn = color(1.0, 1.0, 1.0),
+ int samples = 16,
+ float Distance = 1.0,
+ normal Normal = N,
+ int inside = 0,
+ int only_local = 0,
+ output color ColorOut = color(1.0, 1.0, 1.0),
+ output float AO = 1.0)
{
- int global_radius = (Distance == 0.0 && !isconnected(Distance));
+ int global_radius = (Distance == 0.0 && !isconnected(Distance));
- /* Abuse texture call with special @ao token. */
- AO = texture("@ao", samples, Distance, Normal[0], Normal[1], Normal[2], inside, "sblur", only_local, "tblur", global_radius);
- ColorOut = ColorIn * AO;
+ /* Abuse texture call with special @ao token. */
+ AO = texture("@ao",
+ samples,
+ Distance,
+ Normal[0],
+ Normal[1],
+ Normal[2],
+ inside,
+ "sblur",
+ only_local,
+ "tblur",
+ global_radius);
+ ColorOut = ColorIn * AO;
}
-
diff --git a/intern/cycles/kernel/shaders/node_anisotropic_bsdf.osl b/intern/cycles/kernel/shaders/node_anisotropic_bsdf.osl
index 21e28ece65d..165c09eb8e0 100644
--- a/intern/cycles/kernel/shaders/node_anisotropic_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_anisotropic_bsdf.osl
@@ -16,45 +16,43 @@
#include "stdosl.h"
-shader node_anisotropic_bsdf(
- color Color = 0.0,
- string distribution = "GGX",
- float Roughness = 0.0,
- float Anisotropy = 0.0,
- float Rotation = 0.0,
- normal Normal = N,
- normal Tangent = normalize(dPdu),
- output closure color BSDF = 0)
+shader node_anisotropic_bsdf(color Color = 0.0,
+ string distribution = "GGX",
+ float Roughness = 0.0,
+ float Anisotropy = 0.0,
+ float Rotation = 0.0,
+ normal Normal = N,
+ normal Tangent = normalize(dPdu),
+ output closure color BSDF = 0)
{
- /* rotate tangent around normal */
- vector T = Tangent;
+ /* rotate tangent around normal */
+ vector T = Tangent;
- if (Rotation != 0.0)
- T = rotate(T, Rotation * M_2PI, point(0.0, 0.0, 0.0), Normal);
+ if (Rotation != 0.0)
+ T = rotate(T, Rotation * M_2PI, point(0.0, 0.0, 0.0), Normal);
- /* compute roughness */
- float roughness = Roughness * Roughness;
- float roughness_u, roughness_v;
- float aniso = clamp(Anisotropy, -0.99, 0.99);
+ /* compute roughness */
+ float roughness = Roughness * Roughness;
+ float roughness_u, roughness_v;
+ float aniso = clamp(Anisotropy, -0.99, 0.99);
- if (aniso < 0.0) {
- roughness_u = roughness / (1.0 + aniso);
- roughness_v = roughness * (1.0 + aniso);
- }
- else {
- roughness_u = roughness * (1.0 - aniso);
- roughness_v = roughness / (1.0 - aniso);
- }
+ if (aniso < 0.0) {
+ roughness_u = roughness / (1.0 + aniso);
+ roughness_v = roughness * (1.0 + aniso);
+ }
+ else {
+ roughness_u = roughness * (1.0 - aniso);
+ roughness_v = roughness / (1.0 - aniso);
+ }
- if (distribution == "sharp")
- BSDF = Color * reflection(Normal);
- else if (distribution == "beckmann")
- BSDF = Color * microfacet_beckmann_aniso(Normal, T, roughness_u, roughness_v);
- else if (distribution == "GGX")
- BSDF = Color * microfacet_ggx_aniso(Normal, T, roughness_u, roughness_v);
- else if (distribution == "Multiscatter GGX")
- BSDF = Color * microfacet_multi_ggx_aniso(Normal, T, roughness_u, roughness_v, Color);
- else
- BSDF = Color * ashikhmin_shirley(Normal, T, roughness_u, roughness_v);
+ if (distribution == "sharp")
+ BSDF = Color * reflection(Normal);
+ else if (distribution == "beckmann")
+ BSDF = Color * microfacet_beckmann_aniso(Normal, T, roughness_u, roughness_v);
+ else if (distribution == "GGX")
+ BSDF = Color * microfacet_ggx_aniso(Normal, T, roughness_u, roughness_v);
+ else if (distribution == "Multiscatter GGX")
+ BSDF = Color * microfacet_multi_ggx_aniso(Normal, T, roughness_u, roughness_v, Color);
+ else
+ BSDF = Color * ashikhmin_shirley(Normal, T, roughness_u, roughness_v);
}
-
diff --git a/intern/cycles/kernel/shaders/node_attribute.osl b/intern/cycles/kernel/shaders/node_attribute.osl
index 67183e9ffe0..336543cc130 100644
--- a/intern/cycles/kernel/shaders/node_attribute.osl
+++ b/intern/cycles/kernel/shaders/node_attribute.osl
@@ -16,26 +16,24 @@
#include "stdosl.h"
-shader node_attribute(
- string bump_offset = "center",
- string name = "",
- output point Vector = point(0.0, 0.0, 0.0),
- output color Color = 0.0,
- output float Fac = 0.0)
+shader node_attribute(string bump_offset = "center",
+ string name = "",
+ output point Vector = point(0.0, 0.0, 0.0),
+ output color Color = 0.0,
+ output float Fac = 0.0)
{
- getattribute(name, Color);
- Vector = point(Color);
- getattribute(name, Fac);
+ getattribute(name, Color);
+ Vector = point(Color);
+ getattribute(name, Fac);
- if (bump_offset == "dx") {
- Color += Dx(Color);
- Vector += Dx(Vector);
- Fac += Dx(Fac);
- }
- else if (bump_offset == "dy") {
- Color += Dy(Color);
- Vector += Dy(Vector);
- Fac += Dy(Fac);
- }
+ if (bump_offset == "dx") {
+ Color += Dx(Color);
+ Vector += Dx(Vector);
+ Fac += Dx(Fac);
+ }
+ else if (bump_offset == "dy") {
+ Color += Dy(Color);
+ Vector += Dy(Vector);
+ Fac += Dy(Fac);
+ }
}
-
diff --git a/intern/cycles/kernel/shaders/node_background.osl b/intern/cycles/kernel/shaders/node_background.osl
index 613d4e360fa..6ded0d2c65c 100644
--- a/intern/cycles/kernel/shaders/node_background.osl
+++ b/intern/cycles/kernel/shaders/node_background.osl
@@ -16,11 +16,9 @@
#include "stdosl.h"
-shader node_background(
- color Color = 0.8,
- float Strength = 1.0,
- output closure color Background = 0)
+shader node_background(color Color = 0.8,
+ float Strength = 1.0,
+ output closure color Background = 0)
{
- Background = Color * Strength * background();
+ Background = Color * Strength * background();
}
-
diff --git a/intern/cycles/kernel/shaders/node_bevel.osl b/intern/cycles/kernel/shaders/node_bevel.osl
index 9c4ca15be17..189c20c52e7 100644
--- a/intern/cycles/kernel/shaders/node_bevel.osl
+++ b/intern/cycles/kernel/shaders/node_bevel.osl
@@ -16,16 +16,14 @@
#include "stdosl.h"
-shader node_bevel(
- int samples = 4,
- float Radius = 0.05,
- normal NormalIn = N,
- output normal NormalOut = N)
+shader node_bevel(int samples = 4,
+ float Radius = 0.05,
+ normal NormalIn = N,
+ output normal NormalOut = N)
{
- /* Abuse texture call with special @bevel token. */
- vector bevel_N = (normal)(color)texture("@bevel", samples, Radius);
+ /* Abuse texture call with special @bevel token. */
+ vector bevel_N = (normal)(color)texture("@bevel", samples, Radius);
- /* Preserve input normal. */
- NormalOut = normalize(NormalIn + (bevel_N - N));
+ /* Preserve input normal. */
+ NormalOut = normalize(NormalIn + (bevel_N - N));
}
-
diff --git a/intern/cycles/kernel/shaders/node_blackbody.osl b/intern/cycles/kernel/shaders/node_blackbody.osl
index 1da6894d0f0..8a24bf1e28b 100644
--- a/intern/cycles/kernel/shaders/node_blackbody.osl
+++ b/intern/cycles/kernel/shaders/node_blackbody.osl
@@ -16,16 +16,13 @@
#include "stdosl.h"
-shader node_blackbody(
- float Temperature = 1200.0,
- output color Color = 0.0)
+shader node_blackbody(float Temperature = 1200.0, output color Color = 0.0)
{
- color rgb = blackbody(Temperature);
-
- /* Scale by luminance */
- float l = luminance(rgb);
- if (l != 0.0)
- rgb /= l;
- Color = rgb;
-}
+ color rgb = blackbody(Temperature);
+ /* Scale by luminance */
+ float l = luminance(rgb);
+ if (l != 0.0)
+ rgb /= l;
+ Color = rgb;
+}
diff --git a/intern/cycles/kernel/shaders/node_brick_texture.osl b/intern/cycles/kernel/shaders/node_brick_texture.osl
index 9d2e5b74ce6..0abc3574c48 100644
--- a/intern/cycles/kernel/shaders/node_brick_texture.osl
+++ b/intern/cycles/kernel/shaders/node_brick_texture.osl
@@ -21,85 +21,100 @@
float brick_noise(int ns) /* fast integer noise */
{
- int nn;
- int n = (ns + 1013) & 2147483647;
- n = (n >> 13) ^ n;
- nn = (n * (n * n * 60493 + 19990303) + 1376312589) & 2147483647;
- return 0.5 * ((float)nn / 1073741824.0);
+ int nn;
+ int n = (ns + 1013) & 2147483647;
+ n = (n >> 13) ^ n;
+ nn = (n * (n * n * 60493 + 19990303) + 1376312589) & 2147483647;
+ return 0.5 * ((float)nn / 1073741824.0);
}
-float brick(point p, float mortar_size, float mortar_smooth, float bias,
- float BrickWidth, float row_height, float offset_amount, int offset_frequency,
- float squash_amount, int squash_frequency, output float tint)
+float brick(point p,
+ float mortar_size,
+ float mortar_smooth,
+ float bias,
+ float BrickWidth,
+ float row_height,
+ float offset_amount,
+ int offset_frequency,
+ float squash_amount,
+ int squash_frequency,
+ output float tint)
{
- int bricknum, rownum;
- float offset = 0.0;
- float brick_width = BrickWidth;
- float x, y;
-
- rownum = (int)floor(p[1] / row_height);
-
- if (offset_frequency && squash_frequency) {
- brick_width *= (rownum % squash_frequency) ? 1.0 : squash_amount; /* squash */
- offset = (rownum % offset_frequency) ? 0.0 : (brick_width * offset_amount); /* offset */
- }
-
- bricknum = (int)floor((p[0] + offset) / brick_width);
-
- x = (p[0] + offset) - brick_width * bricknum;
- y = p[1] - row_height * rownum;
-
- tint = clamp((brick_noise((rownum << 16) + (bricknum & 65535)) + bias), 0.0, 1.0);
-
- float min_dist = min(min(x, y), min(brick_width - x, row_height - y));
- if(min_dist >= mortar_size) {
- return 0.0;
- }
- else if(mortar_smooth == 0.0) {
- return 1.0;
- }
- else {
- min_dist = 1.0 - min_dist/mortar_size;
- return smoothstep(0.0, mortar_smooth, min_dist);
- }
+ int bricknum, rownum;
+ float offset = 0.0;
+ float brick_width = BrickWidth;
+ float x, y;
+
+ rownum = (int)floor(p[1] / row_height);
+
+ if (offset_frequency && squash_frequency) {
+ brick_width *= (rownum % squash_frequency) ? 1.0 : squash_amount; /* squash */
+ offset = (rownum % offset_frequency) ? 0.0 : (brick_width * offset_amount); /* offset */
+ }
+
+ bricknum = (int)floor((p[0] + offset) / brick_width);
+
+ x = (p[0] + offset) - brick_width * bricknum;
+ y = p[1] - row_height * rownum;
+
+ tint = clamp((brick_noise((rownum << 16) + (bricknum & 65535)) + bias), 0.0, 1.0);
+
+ float min_dist = min(min(x, y), min(brick_width - x, row_height - y));
+ if (min_dist >= mortar_size) {
+ return 0.0;
+ }
+ else if (mortar_smooth == 0.0) {
+ return 1.0;
+ }
+ else {
+ min_dist = 1.0 - min_dist / mortar_size;
+ return smoothstep(0.0, mortar_smooth, min_dist);
+ }
}
-shader node_brick_texture(
- int use_mapping = 0,
- matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
- float offset = 0.5,
- int offset_frequency = 2,
- float squash = 1.0,
- int squash_frequency = 1,
- point Vector = P,
- color Color1 = 0.2,
- color Color2 = 0.8,
- color Mortar = 0.0,
- float Scale = 5.0,
- float MortarSize = 0.02,
- float MortarSmooth = 0.0,
- float Bias = 0.0,
- float BrickWidth = 0.5,
- float RowHeight = 0.25,
- output float Fac = 0.0,
- output color Color = 0.2)
+shader node_brick_texture(int use_mapping = 0,
+ matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+ float offset = 0.5,
+ int offset_frequency = 2,
+ float squash = 1.0,
+ int squash_frequency = 1,
+ point Vector = P,
+ color Color1 = 0.2,
+ color Color2 = 0.8,
+ color Mortar = 0.0,
+ float Scale = 5.0,
+ float MortarSize = 0.02,
+ float MortarSmooth = 0.0,
+ float Bias = 0.0,
+ float BrickWidth = 0.5,
+ float RowHeight = 0.25,
+ output float Fac = 0.0,
+ output color Color = 0.2)
{
- point p = Vector;
-
- if (use_mapping)
- p = transform(mapping, p);
-
- float tint = 0.0;
- color Col = Color1;
-
- Fac = brick(p * Scale, MortarSize, MortarSmooth, Bias, BrickWidth, RowHeight,
- offset, offset_frequency, squash, squash_frequency, tint);
-
- if (Fac != 1.0) {
- float facm = 1.0 - tint;
- Col = facm * Color1 + tint * Color2;
- }
-
- Color = mix(Col, Mortar, Fac);
-}
+ point p = Vector;
+
+ if (use_mapping)
+ p = transform(mapping, p);
+
+ float tint = 0.0;
+ color Col = Color1;
+ Fac = brick(p * Scale,
+ MortarSize,
+ MortarSmooth,
+ Bias,
+ BrickWidth,
+ RowHeight,
+ offset,
+ offset_frequency,
+ squash,
+ squash_frequency,
+ tint);
+
+ if (Fac != 1.0) {
+ float facm = 1.0 - tint;
+ Col = facm * Color1 + tint * Color2;
+ }
+
+ Color = mix(Col, Mortar, Fac);
+}
diff --git a/intern/cycles/kernel/shaders/node_brightness.osl b/intern/cycles/kernel/shaders/node_brightness.osl
index 00cfb167885..2defbc4b1db 100644
--- a/intern/cycles/kernel/shaders/node_brightness.osl
+++ b/intern/cycles/kernel/shaders/node_brightness.osl
@@ -16,17 +16,15 @@
#include "stdosl.h"
-shader node_brightness(
- color ColorIn = 0.8,
- float Bright = 0.0,
- float Contrast = 0.0,
- output color ColorOut = 0.8)
+shader node_brightness(color ColorIn = 0.8,
+ float Bright = 0.0,
+ float Contrast = 0.0,
+ output color ColorOut = 0.8)
{
- float a = 1.0 + Contrast;
- float b = Bright - Contrast * 0.5;
+ float a = 1.0 + Contrast;
+ float b = Bright - Contrast * 0.5;
- ColorOut[0] = max(a * ColorIn[0] + b, 0.0);
- ColorOut[1] = max(a * ColorIn[1] + b, 0.0);
- ColorOut[2] = max(a * ColorIn[2] + b, 0.0);
+ ColorOut[0] = max(a * ColorIn[0] + b, 0.0);
+ ColorOut[1] = max(a * ColorIn[1] + b, 0.0);
+ ColorOut[2] = max(a * ColorIn[2] + b, 0.0);
}
-
diff --git a/intern/cycles/kernel/shaders/node_bump.osl b/intern/cycles/kernel/shaders/node_bump.osl
index a2a4468d5f3..3697bb37fd9 100644
--- a/intern/cycles/kernel/shaders/node_bump.osl
+++ b/intern/cycles/kernel/shaders/node_bump.osl
@@ -19,52 +19,50 @@
/* "Bump Mapping Unparameterized Surfaces on the GPU"
* Morten S. Mikkelsen, 2010 */
-surface node_bump(
- int invert = 0,
- int use_object_space = 0,
- normal NormalIn = N,
- float Strength = 0.1,
- float Distance = 1.0,
- float SampleCenter = 0.0,
- float SampleX = 0.0,
- float SampleY = 0.0,
- output normal NormalOut = N)
+surface node_bump(int invert = 0,
+ int use_object_space = 0,
+ normal NormalIn = N,
+ float Strength = 0.1,
+ float Distance = 1.0,
+ float SampleCenter = 0.0,
+ float SampleX = 0.0,
+ float SampleY = 0.0,
+ output normal NormalOut = N)
{
- point Ptmp = P;
- normal Normal = NormalIn;
+ point Ptmp = P;
+ normal Normal = NormalIn;
- if (use_object_space) {
- Ptmp = transform("object", Ptmp);
- Normal = normalize(transform("object", Normal));
- }
+ if (use_object_space) {
+ Ptmp = transform("object", Ptmp);
+ Normal = normalize(transform("object", Normal));
+ }
- /* get surface tangents from normal */
- vector dPdx = Dx(Ptmp);
- vector dPdy = Dy(Ptmp);
+ /* get surface tangents from normal */
+ vector dPdx = Dx(Ptmp);
+ vector dPdy = Dy(Ptmp);
- vector Rx = cross(dPdy, Normal);
- vector Ry = cross(Normal, dPdx);
+ vector Rx = cross(dPdy, Normal);
+ vector Ry = cross(Normal, dPdx);
- /* compute surface gradient and determinant */
- float det = dot(dPdx, Rx);
- vector surfgrad = (SampleX - SampleCenter) * Rx + (SampleY - SampleCenter) * Ry;
+ /* compute surface gradient and determinant */
+ float det = dot(dPdx, Rx);
+ vector surfgrad = (SampleX - SampleCenter) * Rx + (SampleY - SampleCenter) * Ry;
- float absdet = fabs(det);
+ float absdet = fabs(det);
- float strength = max(Strength, 0.0);
- float dist = Distance;
+ float strength = max(Strength, 0.0);
+ float dist = Distance;
- if (invert)
- dist *= -1.0;
-
- /* compute and output perturbed normal */
- NormalOut = normalize(absdet * Normal - dist * sign(det) * surfgrad);
- NormalOut = normalize(strength * NormalOut + (1.0 - strength) * Normal);
+ if (invert)
+ dist *= -1.0;
- if (use_object_space) {
- NormalOut = normalize(transform("object", "world", NormalOut));
- }
+ /* compute and output perturbed normal */
+ NormalOut = normalize(absdet * Normal - dist * sign(det) * surfgrad);
+ NormalOut = normalize(strength * NormalOut + (1.0 - strength) * Normal);
- NormalOut = ensure_valid_reflection(Ng, I, NormalOut);
-}
+ if (use_object_space) {
+ NormalOut = normalize(transform("object", "world", NormalOut));
+ }
+ NormalOut = ensure_valid_reflection(Ng, I, NormalOut);
+}
diff --git a/intern/cycles/kernel/shaders/node_camera.osl b/intern/cycles/kernel/shaders/node_camera.osl
index 5e90cb8b8ee..833e9e775fe 100644
--- a/intern/cycles/kernel/shaders/node_camera.osl
+++ b/intern/cycles/kernel/shaders/node_camera.osl
@@ -16,16 +16,14 @@
#include "stdosl.h"
-shader node_camera(
- output vector ViewVector = vector(0.0, 0.0, 0.0),
- output float ViewZDepth = 0.0,
- output float ViewDistance = 0.0)
+shader node_camera(output vector ViewVector = vector(0.0, 0.0, 0.0),
+ output float ViewZDepth = 0.0,
+ output float ViewDistance = 0.0)
{
- ViewVector = (vector)transform("world", "camera", P);
+ ViewVector = (vector)transform("world", "camera", P);
- ViewZDepth = fabs(ViewVector[2]);
- ViewDistance = length(ViewVector);
+ ViewZDepth = fabs(ViewVector[2]);
+ ViewDistance = length(ViewVector);
- ViewVector = normalize(ViewVector);
+ ViewVector = normalize(ViewVector);
}
-
diff --git a/intern/cycles/kernel/shaders/node_checker_texture.osl b/intern/cycles/kernel/shaders/node_checker_texture.osl
index e745cfaee06..e068f7952ed 100644
--- a/intern/cycles/kernel/shaders/node_checker_texture.osl
+++ b/intern/cycles/kernel/shaders/node_checker_texture.osl
@@ -21,44 +21,43 @@
float checker(point ip)
{
- point p;
- p[0] = (ip[0] + 0.000001) * 0.999999;
- p[1] = (ip[1] + 0.000001) * 0.999999;
- p[2] = (ip[2] + 0.000001) * 0.999999;
-
- int xi = (int)fabs(floor(p[0]));
- int yi = (int)fabs(floor(p[1]));
- int zi = (int)fabs(floor(p[2]));
-
- if ((xi % 2 == yi % 2) == (zi % 2)) {
- return 1.0;
- }
- else {
- return 0.0;
- }
+ point p;
+ p[0] = (ip[0] + 0.000001) * 0.999999;
+ p[1] = (ip[1] + 0.000001) * 0.999999;
+ p[2] = (ip[2] + 0.000001) * 0.999999;
+
+ int xi = (int)fabs(floor(p[0]));
+ int yi = (int)fabs(floor(p[1]));
+ int zi = (int)fabs(floor(p[2]));
+
+ if ((xi % 2 == yi % 2) == (zi % 2)) {
+ return 1.0;
+ }
+ else {
+ return 0.0;
+ }
}
shader node_checker_texture(
- int use_mapping = 0,
- matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
- float Scale = 5.0,
- point Vector = P,
- color Color1 = 0.8,
- color Color2 = 0.2,
- output float Fac = 0.0,
- output color Color = 0.0)
+ int use_mapping = 0,
+ matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+ float Scale = 5.0,
+ point Vector = P,
+ color Color1 = 0.8,
+ color Color2 = 0.2,
+ output float Fac = 0.0,
+ output color Color = 0.0)
{
- point p = Vector;
-
- if (use_mapping)
- p = transform(mapping, p);
-
- Fac = checker(p * Scale);
- if (Fac == 1.0) {
- Color = Color1;
- }
- else {
- Color = Color2;
- }
+ point p = Vector;
+
+ if (use_mapping)
+ p = transform(mapping, p);
+
+ Fac = checker(p * Scale);
+ if (Fac == 1.0) {
+ Color = Color1;
+ }
+ else {
+ Color = Color2;
+ }
}
-
diff --git a/intern/cycles/kernel/shaders/node_color.h b/intern/cycles/kernel/shaders/node_color.h
index fc758bef1fa..276c91843e8 100644
--- a/intern/cycles/kernel/shaders/node_color.h
+++ b/intern/cycles/kernel/shaders/node_color.h
@@ -18,135 +18,146 @@
float color_srgb_to_scene_linear(float c)
{
- if (c < 0.04045)
- return (c < 0.0) ? 0.0 : c * (1.0 / 12.92);
- else
- return pow((c + 0.055) * (1.0 / 1.055), 2.4);
+ if (c < 0.04045)
+ return (c < 0.0) ? 0.0 : c * (1.0 / 12.92);
+ else
+ return pow((c + 0.055) * (1.0 / 1.055), 2.4);
}
float color_scene_linear_to_srgb(float c)
{
- if (c < 0.0031308)
- return (c < 0.0) ? 0.0 : c * 12.92;
- else
- return 1.055 * pow(c, 1.0 / 2.4) - 0.055;
+ if (c < 0.0031308)
+ return (c < 0.0) ? 0.0 : c * 12.92;
+ else
+ return 1.055 * pow(c, 1.0 / 2.4) - 0.055;
}
color color_srgb_to_scene_linear(color c)
{
- return color(
- color_srgb_to_scene_linear(c[0]),
- color_srgb_to_scene_linear(c[1]),
- color_srgb_to_scene_linear(c[2]));
+ return color(color_srgb_to_scene_linear(c[0]),
+ color_srgb_to_scene_linear(c[1]),
+ color_srgb_to_scene_linear(c[2]));
}
color color_scene_linear_to_srgb(color c)
{
- return color(
- color_scene_linear_to_srgb(c[0]),
- color_scene_linear_to_srgb(c[1]),
- color_scene_linear_to_srgb(c[2]));
+ return color(color_scene_linear_to_srgb(c[0]),
+ color_scene_linear_to_srgb(c[1]),
+ color_scene_linear_to_srgb(c[2]));
}
color color_unpremultiply(color c, float alpha)
{
- if (alpha != 1.0 && alpha != 0.0)
- return c / alpha;
+ if (alpha != 1.0 && alpha != 0.0)
+ return c / alpha;
- return c;
+ return c;
}
/* Color Operations */
color xyY_to_xyz(float x, float y, float Y)
{
- float X, Z;
+ float X, Z;
- if (y != 0.0) X = (x / y) * Y;
- else X = 0.0;
+ if (y != 0.0)
+ X = (x / y) * Y;
+ else
+ X = 0.0;
- if (y != 0.0 && Y != 0.0) Z = ((1.0 - x - y) / y) * Y;
- else Z = 0.0;
+ if (y != 0.0 && Y != 0.0)
+ Z = ((1.0 - x - y) / y) * Y;
+ else
+ Z = 0.0;
- return color(X, Y, Z);
+ return color(X, Y, Z);
}
color xyz_to_rgb(float x, float y, float z)
{
- return color( 3.240479 * x + -1.537150 * y + -0.498535 * z,
- -0.969256 * x + 1.875991 * y + 0.041556 * z,
- 0.055648 * x + -0.204043 * y + 1.057311 * z);
+ return color(3.240479 * x + -1.537150 * y + -0.498535 * z,
+ -0.969256 * x + 1.875991 * y + 0.041556 * z,
+ 0.055648 * x + -0.204043 * y + 1.057311 * z);
}
color rgb_to_hsv(color rgb)
{
- float cmax, cmin, h, s, v, cdelta;
- color c;
-
- cmax = max(rgb[0], max(rgb[1], rgb[2]));
- cmin = min(rgb[0], min(rgb[1], rgb[2]));
- cdelta = cmax - cmin;
-
- v = cmax;
-
- if (cmax != 0.0) {
- s = cdelta / cmax;
- }
- else {
- s = 0.0;
- h = 0.0;
- }
-
- if (s == 0.0) {
- h = 0.0;
- }
- else {
- c = (color(cmax, cmax, cmax) - rgb) / cdelta;
-
- if (rgb[0] == cmax) h = c[2] - c[1];
- else if (rgb[1] == cmax) h = 2.0 + c[0] - c[2];
- else h = 4.0 + c[1] - c[0];
-
- h /= 6.0;
-
- if (h < 0.0)
- h += 1.0;
- }
-
- return color(h, s, v);
+ float cmax, cmin, h, s, v, cdelta;
+ color c;
+
+ cmax = max(rgb[0], max(rgb[1], rgb[2]));
+ cmin = min(rgb[0], min(rgb[1], rgb[2]));
+ cdelta = cmax - cmin;
+
+ v = cmax;
+
+ if (cmax != 0.0) {
+ s = cdelta / cmax;
+ }
+ else {
+ s = 0.0;
+ h = 0.0;
+ }
+
+ if (s == 0.0) {
+ h = 0.0;
+ }
+ else {
+ c = (color(cmax, cmax, cmax) - rgb) / cdelta;
+
+ if (rgb[0] == cmax)
+ h = c[2] - c[1];
+ else if (rgb[1] == cmax)
+ h = 2.0 + c[0] - c[2];
+ else
+ h = 4.0 + c[1] - c[0];
+
+ h /= 6.0;
+
+ if (h < 0.0)
+ h += 1.0;
+ }
+
+ return color(h, s, v);
}
color hsv_to_rgb(color hsv)
{
- float i, f, p, q, t, h, s, v;
- color rgb;
-
- h = hsv[0];
- s = hsv[1];
- v = hsv[2];
-
- if (s == 0.0) {
- rgb = color(v, v, v);
- }
- else {
- if (h == 1.0)
- h = 0.0;
-
- h *= 6.0;
- i = floor(h);
- f = h - i;
- rgb = color(f, f, f);
- p = v * (1.0 - s);
- q = v * (1.0 - (s * f));
- t = v * (1.0 - (s * (1.0 - f)));
-
- if (i == 0.0) rgb = color(v, t, p);
- else if (i == 1.0) rgb = color(q, v, p);
- else if (i == 2.0) rgb = color(p, v, t);
- else if (i == 3.0) rgb = color(p, q, v);
- else if (i == 4.0) rgb = color(t, p, v);
- else rgb = color(v, p, q);
- }
-
- return rgb;
+ float i, f, p, q, t, h, s, v;
+ color rgb;
+
+ h = hsv[0];
+ s = hsv[1];
+ v = hsv[2];
+
+ if (s == 0.0) {
+ rgb = color(v, v, v);
+ }
+ else {
+ if (h == 1.0)
+ h = 0.0;
+
+ h *= 6.0;
+ i = floor(h);
+ f = h - i;
+ rgb = color(f, f, f);
+ p = v * (1.0 - s);
+ q = v * (1.0 - (s * f));
+ t = v * (1.0 - (s * (1.0 - f)));
+
+ if (i == 0.0)
+ rgb = color(v, t, p);
+ else if (i == 1.0)
+ rgb = color(q, v, p);
+ else if (i == 2.0)
+ rgb = color(p, v, t);
+ else if (i == 3.0)
+ rgb = color(p, q, v);
+ else if (i == 4.0)
+ rgb = color(t, p, v);
+ else
+ rgb = color(v, p, q);
+ }
+
+ return rgb;
}
diff --git a/intern/cycles/kernel/shaders/node_combine_hsv.osl b/intern/cycles/kernel/shaders/node_combine_hsv.osl
index 6b922bf4e6b..1658cf3d774 100644
--- a/intern/cycles/kernel/shaders/node_combine_hsv.osl
+++ b/intern/cycles/kernel/shaders/node_combine_hsv.osl
@@ -16,12 +16,7 @@
#include "stdosl.h"
-shader node_combine_hsv(
- float H = 0.0,
- float S = 0.0,
- float V = 0.0,
- output color Color = 0.8)
+shader node_combine_hsv(float H = 0.0, float S = 0.0, float V = 0.0, output color Color = 0.8)
{
- Color = color("hsv", H, S, V);
+ Color = color("hsv", H, S, V);
}
-
diff --git a/intern/cycles/kernel/shaders/node_combine_rgb.osl b/intern/cycles/kernel/shaders/node_combine_rgb.osl
index f343fdefd84..aaa95e9c5af 100644
--- a/intern/cycles/kernel/shaders/node_combine_rgb.osl
+++ b/intern/cycles/kernel/shaders/node_combine_rgb.osl
@@ -16,12 +16,7 @@
#include "stdosl.h"
-shader node_combine_rgb(
- float R = 0.0,
- float G = 0.0,
- float B = 0.0,
- output color Image = 0.8)
+shader node_combine_rgb(float R = 0.0, float G = 0.0, float B = 0.0, output color Image = 0.8)
{
- Image = color(R, G, B);
+ Image = color(R, G, B);
}
-
diff --git a/intern/cycles/kernel/shaders/node_combine_xyz.osl b/intern/cycles/kernel/shaders/node_combine_xyz.osl
index 86182056b09..4ab49168704 100644
--- a/intern/cycles/kernel/shaders/node_combine_xyz.osl
+++ b/intern/cycles/kernel/shaders/node_combine_xyz.osl
@@ -16,12 +16,7 @@
#include "stdosl.h"
-shader node_combine_xyz(
- float X = 0.0,
- float Y = 0.0,
- float Z = 0.0,
- output vector Vector = 0.8)
+shader node_combine_xyz(float X = 0.0, float Y = 0.0, float Z = 0.0, output vector Vector = 0.8)
{
- Vector = vector(X, Y, Z);
+ Vector = vector(X, Y, Z);
}
-
diff --git a/intern/cycles/kernel/shaders/node_convert_from_color.osl b/intern/cycles/kernel/shaders/node_convert_from_color.osl
index e95a17f6fa1..7ea9a1e4fb3 100644
--- a/intern/cycles/kernel/shaders/node_convert_from_color.osl
+++ b/intern/cycles/kernel/shaders/node_convert_from_color.osl
@@ -16,19 +16,17 @@
#include "stdosl.h"
-shader node_convert_from_color(
- color value_color = 0.0,
- output string value_string = "",
- output float value_float = 0.0,
- output int value_int = 0,
- output vector value_vector = vector(0.0, 0.0, 0.0),
- output point value_point = point(0.0, 0.0, 0.0),
- output normal value_normal = normal(0.0, 0.0, 0.0))
+shader node_convert_from_color(color value_color = 0.0,
+ output string value_string = "",
+ output float value_float = 0.0,
+ output int value_int = 0,
+ output vector value_vector = vector(0.0, 0.0, 0.0),
+ output point value_point = point(0.0, 0.0, 0.0),
+ output normal value_normal = normal(0.0, 0.0, 0.0))
{
- value_float = value_color[0] * 0.2126 + value_color[1] * 0.7152 + value_color[2] * 0.0722;
- value_int = (int)(value_color[0] * 0.2126 + value_color[1] * 0.7152 + value_color[2] * 0.0722);
- value_vector = vector(value_color[0], value_color[1], value_color[2]);
- value_point = point(value_color[0], value_color[1], value_color[2]);
- value_normal = normal(value_color[0], value_color[1], value_color[2]);
+ value_float = value_color[0] * 0.2126 + value_color[1] * 0.7152 + value_color[2] * 0.0722;
+ value_int = (int)(value_color[0] * 0.2126 + value_color[1] * 0.7152 + value_color[2] * 0.0722);
+ value_vector = vector(value_color[0], value_color[1], value_color[2]);
+ value_point = point(value_color[0], value_color[1], value_color[2]);
+ value_normal = normal(value_color[0], value_color[1], value_color[2]);
}
-
diff --git a/intern/cycles/kernel/shaders/node_convert_from_float.osl b/intern/cycles/kernel/shaders/node_convert_from_float.osl
index a5c2e3b26ad..13b5dea0838 100644
--- a/intern/cycles/kernel/shaders/node_convert_from_float.osl
+++ b/intern/cycles/kernel/shaders/node_convert_from_float.osl
@@ -16,19 +16,17 @@
#include "stdosl.h"
-shader node_convert_from_float(
- float value_float = 0.0,
- output string value_string = "",
- output int value_int = 0,
- output color value_color = 0.0,
- output vector value_vector = vector(0.0, 0.0, 0.0),
- output point value_point = point(0.0, 0.0, 0.0),
- output normal value_normal = normal(0.0, 0.0, 0.0))
+shader node_convert_from_float(float value_float = 0.0,
+ output string value_string = "",
+ output int value_int = 0,
+ output color value_color = 0.0,
+ output vector value_vector = vector(0.0, 0.0, 0.0),
+ output point value_point = point(0.0, 0.0, 0.0),
+ output normal value_normal = normal(0.0, 0.0, 0.0))
{
- value_int = (int)value_float;
- value_color = color(value_float, value_float, value_float);
- value_vector = vector(value_float, value_float, value_float);
- value_point = point(value_float, value_float, value_float);
- value_normal = normal(value_float, value_float, value_float);
+ value_int = (int)value_float;
+ value_color = color(value_float, value_float, value_float);
+ value_vector = vector(value_float, value_float, value_float);
+ value_point = point(value_float, value_float, value_float);
+ value_normal = normal(value_float, value_float, value_float);
}
-
diff --git a/intern/cycles/kernel/shaders/node_convert_from_int.osl b/intern/cycles/kernel/shaders/node_convert_from_int.osl
index 0e6ae711210..a59e025d822 100644
--- a/intern/cycles/kernel/shaders/node_convert_from_int.osl
+++ b/intern/cycles/kernel/shaders/node_convert_from_int.osl
@@ -16,20 +16,18 @@
#include "stdosl.h"
-shader node_convert_from_int(
- int value_int = 0,
- output string value_string = "",
- output float value_float = 0.0,
- output color value_color = 0.0,
- output vector value_vector = vector(0.0, 0.0, 0.0),
- output point value_point = point(0.0, 0.0, 0.0),
- output normal value_normal = normal(0.0, 0.0, 0.0))
+shader node_convert_from_int(int value_int = 0,
+ output string value_string = "",
+ output float value_float = 0.0,
+ output color value_color = 0.0,
+ output vector value_vector = vector(0.0, 0.0, 0.0),
+ output point value_point = point(0.0, 0.0, 0.0),
+ output normal value_normal = normal(0.0, 0.0, 0.0))
{
- float f = (float)value_int;
- value_float = f;
- value_color = color(f, f, f);
- value_vector = vector(f, f, f);
- value_point = point(f, f, f);
- value_normal = normal(f, f, f);
+ float f = (float)value_int;
+ value_float = f;
+ value_color = color(f, f, f);
+ value_vector = vector(f, f, f);
+ value_point = point(f, f, f);
+ value_normal = normal(f, f, f);
}
-
diff --git a/intern/cycles/kernel/shaders/node_convert_from_normal.osl b/intern/cycles/kernel/shaders/node_convert_from_normal.osl
index 7fffa7f6169..7bdd94d1941 100644
--- a/intern/cycles/kernel/shaders/node_convert_from_normal.osl
+++ b/intern/cycles/kernel/shaders/node_convert_from_normal.osl
@@ -16,19 +16,17 @@
#include "stdosl.h"
-shader node_convert_from_normal(
- normal value_normal = normal(0.0, 0.0, 0.0),
- output string value_string = "",
- output float value_float = 0.0,
- output int value_int = 0,
- output vector value_vector = vector(0.0, 0.0, 0.0),
- output color value_color = 0.0,
- output point value_point = point(0.0, 0.0, 0.0))
+shader node_convert_from_normal(normal value_normal = normal(0.0, 0.0, 0.0),
+ output string value_string = "",
+ output float value_float = 0.0,
+ output int value_int = 0,
+ output vector value_vector = vector(0.0, 0.0, 0.0),
+ output color value_color = 0.0,
+ output point value_point = point(0.0, 0.0, 0.0))
{
- value_float = (value_normal[0] + value_normal[1] + value_normal[2]) * (1.0 / 3.0);
- value_int = (int)((value_normal[0] + value_normal[1] + value_normal[2]) * (1.0 / 3.0));
- value_vector = vector(value_normal[0], value_normal[1], value_normal[2]);
- value_color = color(value_normal[0], value_normal[1], value_normal[2]);
- value_point = point(value_normal[0], value_normal[1], value_normal[2]);
+ value_float = (value_normal[0] + value_normal[1] + value_normal[2]) * (1.0 / 3.0);
+ value_int = (int)((value_normal[0] + value_normal[1] + value_normal[2]) * (1.0 / 3.0));
+ value_vector = vector(value_normal[0], value_normal[1], value_normal[2]);
+ value_color = color(value_normal[0], value_normal[1], value_normal[2]);
+ value_point = point(value_normal[0], value_normal[1], value_normal[2]);
}
-
diff --git a/intern/cycles/kernel/shaders/node_convert_from_point.osl b/intern/cycles/kernel/shaders/node_convert_from_point.osl
index 9e4930296bb..79c1719e7a7 100644
--- a/intern/cycles/kernel/shaders/node_convert_from_point.osl
+++ b/intern/cycles/kernel/shaders/node_convert_from_point.osl
@@ -16,19 +16,17 @@
#include "stdosl.h"
-shader node_convert_from_point(
- point value_point = point(0.0, 0.0, 0.0),
- output string value_string = "",
- output float value_float = 0.0,
- output int value_int = 0,
- output vector value_vector = vector(0.0, 0.0, 0.0),
- output color value_color = 0.0,
- output normal value_normal = normal(0.0, 0.0, 0.0))
+shader node_convert_from_point(point value_point = point(0.0, 0.0, 0.0),
+ output string value_string = "",
+ output float value_float = 0.0,
+ output int value_int = 0,
+ output vector value_vector = vector(0.0, 0.0, 0.0),
+ output color value_color = 0.0,
+ output normal value_normal = normal(0.0, 0.0, 0.0))
{
- value_float = (value_point[0] + value_point[1] + value_point[2]) * (1.0 / 3.0);
- value_int = (int)((value_normal[0] + value_normal[1] + value_normal[2]) * (1.0 / 3.0));
- value_vector = vector(value_point[0], value_point[1], value_point[2]);
- value_color = color(value_point[0], value_point[1], value_point[2]);
- value_normal = normal(value_point[0], value_point[1], value_point[2]);
+ value_float = (value_point[0] + value_point[1] + value_point[2]) * (1.0 / 3.0);
+ value_int = (int)((value_normal[0] + value_normal[1] + value_normal[2]) * (1.0 / 3.0));
+ value_vector = vector(value_point[0], value_point[1], value_point[2]);
+ value_color = color(value_point[0], value_point[1], value_point[2]);
+ value_normal = normal(value_point[0], value_point[1], value_point[2]);
}
-
diff --git a/intern/cycles/kernel/shaders/node_convert_from_string.osl b/intern/cycles/kernel/shaders/node_convert_from_string.osl
index cbc6653eada..48d894a6b3e 100644
--- a/intern/cycles/kernel/shaders/node_convert_from_string.osl
+++ b/intern/cycles/kernel/shaders/node_convert_from_string.osl
@@ -16,14 +16,12 @@
#include "stdosl.h"
-shader node_convert_from_string(
- string value_string = "",
- output color value_color = color(0.0, 0.0, 0.0),
- output float value_float = 0.0,
- output int value_int = 0,
- output vector value_vector = vector(0.0, 0.0, 0.0),
- output point value_point = point(0.0, 0.0, 0.0),
- output normal value_normal = normal(0.0, 0.0, 0.0))
+shader node_convert_from_string(string value_string = "",
+ output color value_color = color(0.0, 0.0, 0.0),
+ output float value_float = 0.0,
+ output int value_int = 0,
+ output vector value_vector = vector(0.0, 0.0, 0.0),
+ output point value_point = point(0.0, 0.0, 0.0),
+ output normal value_normal = normal(0.0, 0.0, 0.0))
{
}
-
diff --git a/intern/cycles/kernel/shaders/node_convert_from_vector.osl b/intern/cycles/kernel/shaders/node_convert_from_vector.osl
index 8bdca469b90..92ab2313bcb 100644
--- a/intern/cycles/kernel/shaders/node_convert_from_vector.osl
+++ b/intern/cycles/kernel/shaders/node_convert_from_vector.osl
@@ -16,19 +16,17 @@
#include "stdosl.h"
-shader node_convert_from_vector(
- vector value_vector = vector(0.0, 0.0, 0.0),
- output string value_string = "",
- output float value_float = 0.0,
- output int value_int = 0,
- output color value_color = color(0.0, 0.0, 0.0),
- output point value_point = point(0.0, 0.0, 0.0),
- output normal value_normal = normal(0.0, 0.0, 0.0))
+shader node_convert_from_vector(vector value_vector = vector(0.0, 0.0, 0.0),
+ output string value_string = "",
+ output float value_float = 0.0,
+ output int value_int = 0,
+ output color value_color = color(0.0, 0.0, 0.0),
+ output point value_point = point(0.0, 0.0, 0.0),
+ output normal value_normal = normal(0.0, 0.0, 0.0))
{
- value_float = (value_vector[0] + value_vector[1] + value_vector[2]) * (1.0 / 3.0);
- value_int = (int)((value_normal[0] + value_normal[1] + value_normal[2]) * (1.0 / 3.0));
- value_color = color(value_vector[0], value_vector[1], value_vector[2]);
- value_point = point(value_vector[0], value_vector[1], value_vector[2]);
- value_normal = normal(value_vector[0], value_vector[1], value_vector[2]);
+ value_float = (value_vector[0] + value_vector[1] + value_vector[2]) * (1.0 / 3.0);
+ value_int = (int)((value_normal[0] + value_normal[1] + value_normal[2]) * (1.0 / 3.0));
+ value_color = color(value_vector[0], value_vector[1], value_vector[2]);
+ value_point = point(value_vector[0], value_vector[1], value_vector[2]);
+ value_normal = normal(value_vector[0], value_vector[1], value_vector[2]);
}
-
diff --git a/intern/cycles/kernel/shaders/node_diffuse_bsdf.osl b/intern/cycles/kernel/shaders/node_diffuse_bsdf.osl
index 2bef2d65baa..bd5554b838a 100644
--- a/intern/cycles/kernel/shaders/node_diffuse_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_diffuse_bsdf.osl
@@ -16,15 +16,13 @@
#include "stdosl.h"
-shader node_diffuse_bsdf(
- color Color = 0.8,
- float Roughness = 0.0,
- normal Normal = N,
- output closure color BSDF = 0)
+shader node_diffuse_bsdf(color Color = 0.8,
+ float Roughness = 0.0,
+ normal Normal = N,
+ output closure color BSDF = 0)
{
- if (Roughness == 0.0)
- BSDF = Color * diffuse(Normal);
- else
- BSDF = Color * oren_nayar(Normal, Roughness);
+ if (Roughness == 0.0)
+ BSDF = Color * diffuse(Normal);
+ else
+ BSDF = Color * oren_nayar(Normal, Roughness);
}
-
diff --git a/intern/cycles/kernel/shaders/node_displacement.osl b/intern/cycles/kernel/shaders/node_displacement.osl
index 89f35841527..a1f3b7b7737 100644
--- a/intern/cycles/kernel/shaders/node_displacement.osl
+++ b/intern/cycles/kernel/shaders/node_displacement.osl
@@ -16,23 +16,21 @@
#include "stdosl.h"
-shader node_displacement(
- string space = "object",
- float Height = 0.0,
- float Midlevel = 0.5,
- float Scale = 1.0,
- normal Normal = N,
- output vector Displacement = vector(0.0, 0.0, 0.0))
+shader node_displacement(string space = "object",
+ float Height = 0.0,
+ float Midlevel = 0.5,
+ float Scale = 1.0,
+ normal Normal = N,
+ output vector Displacement = vector(0.0, 0.0, 0.0))
{
- Displacement = Normal;
- if(space == "object") {
- Displacement = transform("object", Displacement);
- }
+ Displacement = Normal;
+ if (space == "object") {
+ Displacement = transform("object", Displacement);
+ }
- Displacement = normalize(Displacement) * (Height - Midlevel) * Scale;
+ Displacement = normalize(Displacement) * (Height - Midlevel) * Scale;
- if(space == "object") {
- Displacement = transform("object", "world", Displacement);
- }
+ if (space == "object") {
+ Displacement = transform("object", "world", Displacement);
+ }
}
-
diff --git a/intern/cycles/kernel/shaders/node_emission.osl b/intern/cycles/kernel/shaders/node_emission.osl
index c36e2a4c0f3..57973f57ac6 100644
--- a/intern/cycles/kernel/shaders/node_emission.osl
+++ b/intern/cycles/kernel/shaders/node_emission.osl
@@ -16,11 +16,7 @@
#include "stdosl.h"
-shader node_emission(
- color Color = 0.8,
- float Strength = 1.0,
- output closure color Emission = 0)
+shader node_emission(color Color = 0.8, float Strength = 1.0, output closure color Emission = 0)
{
- Emission = (Strength * Color) * emission();
+ Emission = (Strength * Color) * emission();
}
-
diff --git a/intern/cycles/kernel/shaders/node_environment_texture.osl b/intern/cycles/kernel/shaders/node_environment_texture.osl
index 95d9d813969..eb32dad392f 100644
--- a/intern/cycles/kernel/shaders/node_environment_texture.osl
+++ b/intern/cycles/kernel/shaders/node_environment_texture.osl
@@ -19,63 +19,63 @@
vector environment_texture_direction_to_equirectangular(vector dir)
{
- float u = -atan2(dir[1], dir[0]) / (M_2PI) + 0.5;
- float v = atan2(dir[2], hypot(dir[0], dir[1])) / M_PI + 0.5;
+ float u = -atan2(dir[1], dir[0]) / (M_2PI) + 0.5;
+ float v = atan2(dir[2], hypot(dir[0], dir[1])) / M_PI + 0.5;
- return vector(u, v, 0.0);
+ return vector(u, v, 0.0);
}
vector environment_texture_direction_to_mirrorball(vector idir)
{
- vector dir = idir;
- dir[1] -= 1.0;
+ vector dir = idir;
+ dir[1] -= 1.0;
- float div = 2.0 * sqrt(max(-0.5 * dir[1], 0.0));
- if (div > 0.0)
- dir /= div;
+ float div = 2.0 * sqrt(max(-0.5 * dir[1], 0.0));
+ if (div > 0.0)
+ dir /= div;
- float u = 0.5 * (dir[0] + 1.0);
- float v = 0.5 * (dir[2] + 1.0);
+ float u = 0.5 * (dir[0] + 1.0);
+ float v = 0.5 * (dir[2] + 1.0);
- return vector(u, v, 0.0);
+ return vector(u, v, 0.0);
}
shader node_environment_texture(
- int use_mapping = 0,
- matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
- vector Vector = P,
- string filename = "",
- string projection = "equirectangular",
- string interpolation = "linear",
- string color_space = "sRGB",
- int is_float = 1,
- int use_alpha = 1,
- output color Color = 0.0,
- output float Alpha = 1.0)
+ int use_mapping = 0,
+ matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+ vector Vector = P,
+ string filename = "",
+ string projection = "equirectangular",
+ string interpolation = "linear",
+ string color_space = "sRGB",
+ int is_float = 1,
+ int use_alpha = 1,
+ output color Color = 0.0,
+ output float Alpha = 1.0)
{
- vector p = Vector;
+ vector p = Vector;
- if (use_mapping)
- p = transform(mapping, p);
-
- p = normalize(p);
+ if (use_mapping)
+ p = transform(mapping, p);
- if (projection == "equirectangular")
- p = environment_texture_direction_to_equirectangular(p);
- else
- p = environment_texture_direction_to_mirrorball(p);
+ p = normalize(p);
- /* todo: use environment for better texture filtering of equirectangular */
- Color = (color)texture(filename, p[0], 1.0 - p[1], "wrap", "periodic", "interp", interpolation, "alpha", Alpha);
+ if (projection == "equirectangular")
+ p = environment_texture_direction_to_equirectangular(p);
+ else
+ p = environment_texture_direction_to_mirrorball(p);
- if (use_alpha) {
- Color = color_unpremultiply(Color, Alpha);
+ /* todo: use environment for better texture filtering of equirectangular */
+ Color = (color)texture(
+ filename, p[0], 1.0 - p[1], "wrap", "periodic", "interp", interpolation, "alpha", Alpha);
- if (!is_float)
- Color = min(Color, 1.0);
- }
+ if (use_alpha) {
+ Color = color_unpremultiply(Color, Alpha);
- if (color_space == "sRGB")
- Color = color_srgb_to_scene_linear(Color);
-}
+ if (!is_float)
+ Color = min(Color, 1.0);
+ }
+ if (color_space == "sRGB")
+ Color = color_srgb_to_scene_linear(Color);
+}
diff --git a/intern/cycles/kernel/shaders/node_fresnel.h b/intern/cycles/kernel/shaders/node_fresnel.h
index 40793479d8a..ade1d4c6207 100644
--- a/intern/cycles/kernel/shaders/node_fresnel.h
+++ b/intern/cycles/kernel/shaders/node_fresnel.h
@@ -32,33 +32,31 @@
float fresnel_dielectric_cos(float cosi, float eta)
{
- /* compute fresnel reflectance without explicitly computing
- * the refracted direction */
- float c = fabs(cosi);
- float g = eta * eta - 1 + c * c;
- float result;
+ /* compute fresnel reflectance without explicitly computing
+ * the refracted direction */
+ float c = fabs(cosi);
+ float g = eta * eta - 1 + c * c;
+ float result;
- if (g > 0) {
- g = sqrt(g);
- float A = (g - c) / (g + c);
- float B = (c * (g + c) - 1) / (c * (g - c) + 1);
- result = 0.5 * A * A * (1 + B * B);
- }
- else
- result = 1.0; /* TIR (no refracted component) */
+ if (g > 0) {
+ g = sqrt(g);
+ float A = (g - c) / (g + c);
+ float B = (c * (g + c) - 1) / (c * (g - c) + 1);
+ result = 0.5 * A * A * (1 + B * B);
+ }
+ else
+ result = 1.0; /* TIR (no refracted component) */
- return result;
+ return result;
}
color fresnel_conductor(float cosi, color eta, color k)
{
- color cosi2 = color(cosi * cosi);
- color one = color(1, 1, 1);
- color tmp_f = eta * eta + k * k;
- color tmp = tmp_f * cosi2;
- color Rparl2 = (tmp - (2.0 * eta * cosi) + one) /
- (tmp + (2.0 * eta * cosi) + one);
- color Rperp2 = (tmp_f - (2.0 * eta * cosi) + cosi2) /
- (tmp_f + (2.0 * eta * cosi) + cosi2);
- return (Rparl2 + Rperp2) * 0.5;
+ color cosi2 = color(cosi * cosi);
+ color one = color(1, 1, 1);
+ color tmp_f = eta * eta + k * k;
+ color tmp = tmp_f * cosi2;
+ color Rparl2 = (tmp - (2.0 * eta * cosi) + one) / (tmp + (2.0 * eta * cosi) + one);
+ color Rperp2 = (tmp_f - (2.0 * eta * cosi) + cosi2) / (tmp_f + (2.0 * eta * cosi) + cosi2);
+ return (Rparl2 + Rperp2) * 0.5;
}
diff --git a/intern/cycles/kernel/shaders/node_fresnel.osl b/intern/cycles/kernel/shaders/node_fresnel.osl
index 8bec7b432f5..89250db40f3 100644
--- a/intern/cycles/kernel/shaders/node_fresnel.osl
+++ b/intern/cycles/kernel/shaders/node_fresnel.osl
@@ -17,14 +17,10 @@
#include "stdosl.h"
#include "node_fresnel.h"
-shader node_fresnel(
- float IOR = 1.45,
- normal Normal = N,
- output float Fac = 0.0)
+shader node_fresnel(float IOR = 1.45, normal Normal = N, output float Fac = 0.0)
{
- float f = max(IOR, 1e-5);
- float eta = backfacing() ? 1.0 / f : f;
- float cosi = dot(I, Normal);
- Fac = fresnel_dielectric_cos(cosi, eta);
+ float f = max(IOR, 1e-5);
+ float eta = backfacing() ? 1.0 / f : f;
+ float cosi = dot(I, Normal);
+ Fac = fresnel_dielectric_cos(cosi, eta);
}
-
diff --git a/intern/cycles/kernel/shaders/node_gamma.osl b/intern/cycles/kernel/shaders/node_gamma.osl
index bc4c1b34266..9b9c17dc8af 100644
--- a/intern/cycles/kernel/shaders/node_gamma.osl
+++ b/intern/cycles/kernel/shaders/node_gamma.osl
@@ -16,10 +16,7 @@
#include "stdosl.h"
-shader node_gamma(
- color ColorIn = 0.8,
- float Gamma = 1.0,
- output color ColorOut = 0.0)
+shader node_gamma(color ColorIn = 0.8, float Gamma = 1.0, output color ColorOut = 0.0)
{
- ColorOut = pow(ColorIn, Gamma);
+ ColorOut = pow(ColorIn, Gamma);
}
diff --git a/intern/cycles/kernel/shaders/node_geometry.osl b/intern/cycles/kernel/shaders/node_geometry.osl
index b0bd7692489..b5c1c6611c1 100644
--- a/intern/cycles/kernel/shaders/node_geometry.osl
+++ b/intern/cycles/kernel/shaders/node_geometry.osl
@@ -16,55 +16,53 @@
#include "stdosl.h"
-shader node_geometry(
- normal NormalIn = N,
- string bump_offset = "center",
+shader node_geometry(normal NormalIn = N,
+ string bump_offset = "center",
- output point Position = point(0.0, 0.0, 0.0),
- output normal Normal = normal(0.0, 0.0, 0.0),
- output normal Tangent = normal(0.0, 0.0, 0.0),
- output normal TrueNormal = normal(0.0, 0.0, 0.0),
- output vector Incoming = vector(0.0, 0.0, 0.0),
- output point Parametric = point(0.0, 0.0, 0.0),
- output float Backfacing = 0.0,
- output float Pointiness = 0.0)
+ output point Position = point(0.0, 0.0, 0.0),
+ output normal Normal = normal(0.0, 0.0, 0.0),
+ output normal Tangent = normal(0.0, 0.0, 0.0),
+ output normal TrueNormal = normal(0.0, 0.0, 0.0),
+ output vector Incoming = vector(0.0, 0.0, 0.0),
+ output point Parametric = point(0.0, 0.0, 0.0),
+ output float Backfacing = 0.0,
+ output float Pointiness = 0.0)
{
- Position = P;
- Normal = NormalIn;
- TrueNormal = Ng;
- Incoming = I;
- Parametric = point(u, v, 0.0);
- Backfacing = backfacing();
+ Position = P;
+ Normal = NormalIn;
+ TrueNormal = Ng;
+ Incoming = I;
+ Parametric = point(u, v, 0.0);
+ Backfacing = backfacing();
- if (bump_offset == "dx") {
- Position += Dx(Position);
- Parametric += Dx(Parametric);
- }
- else if (bump_offset == "dy") {
- Position += Dy(Position);
- Parametric += Dy(Parametric);
- }
+ if (bump_offset == "dx") {
+ Position += Dx(Position);
+ Parametric += Dx(Parametric);
+ }
+ else if (bump_offset == "dy") {
+ Position += Dy(Position);
+ Parametric += Dy(Parametric);
+ }
- /* first try to get tangent attribute */
- point generated;
+ /* first try to get tangent attribute */
+ point generated;
- /* try to create spherical tangent from generated coordinates */
- if (getattribute("geom:generated", generated)) {
- normal data = normal(-(generated[1] - 0.5), (generated[0] - 0.5), 0.0);
- vector T = transform("object", "world", data);
- Tangent = cross(Normal, normalize(cross(T, Normal)));
- }
- else {
- /* otherwise use surface derivatives */
- Tangent = normalize(dPdu);
- }
+ /* try to create spherical tangent from generated coordinates */
+ if (getattribute("geom:generated", generated)) {
+ normal data = normal(-(generated[1] - 0.5), (generated[0] - 0.5), 0.0);
+ vector T = transform("object", "world", data);
+ Tangent = cross(Normal, normalize(cross(T, Normal)));
+ }
+ else {
+ /* otherwise use surface derivatives */
+ Tangent = normalize(dPdu);
+ }
- getattribute("geom:pointiness", Pointiness);
- if (bump_offset == "dx") {
- Pointiness += Dx(Pointiness);
- }
- else if (bump_offset == "dy") {
- Pointiness += Dy(Pointiness);
- }
+ getattribute("geom:pointiness", Pointiness);
+ if (bump_offset == "dx") {
+ Pointiness += Dx(Pointiness);
+ }
+ else if (bump_offset == "dy") {
+ Pointiness += Dy(Pointiness);
+ }
}
-
diff --git a/intern/cycles/kernel/shaders/node_glass_bsdf.osl b/intern/cycles/kernel/shaders/node_glass_bsdf.osl
index 2e713861c58..c0b8a002536 100644
--- a/intern/cycles/kernel/shaders/node_glass_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_glass_bsdf.osl
@@ -17,29 +17,27 @@
#include "stdosl.h"
#include "node_fresnel.h"
-shader node_glass_bsdf(
- color Color = 0.8,
- string distribution = "sharp",
- float Roughness = 0.2,
- float IOR = 1.45,
- normal Normal = N,
- output closure color BSDF = 0)
+shader node_glass_bsdf(color Color = 0.8,
+ string distribution = "sharp",
+ float Roughness = 0.2,
+ float IOR = 1.45,
+ normal Normal = N,
+ output closure color BSDF = 0)
{
- float f = max(IOR, 1e-5);
- float eta = backfacing() ? 1.0 / f : f;
- float cosi = dot(I, Normal);
- float Fr = fresnel_dielectric_cos(cosi, eta);
- float roughness = Roughness * Roughness;
+ float f = max(IOR, 1e-5);
+ float eta = backfacing() ? 1.0 / f : f;
+ float cosi = dot(I, Normal);
+ float Fr = fresnel_dielectric_cos(cosi, eta);
+ float roughness = Roughness * Roughness;
- if (distribution == "sharp")
- BSDF = Color * (Fr * reflection(Normal) + (1.0 - Fr) * refraction(Normal, eta));
- else if (distribution == "beckmann")
- BSDF = Color * (Fr * microfacet_beckmann(Normal, roughness) +
- (1.0 - Fr) * microfacet_beckmann_refraction(Normal, roughness, eta));
- else if (distribution == "Multiscatter GGX")
- BSDF = Color * microfacet_multi_ggx_glass(Normal, roughness, eta, Color);
- else if (distribution == "GGX")
- BSDF = Color * (Fr * microfacet_ggx(Normal, roughness) +
- (1.0 - Fr) * microfacet_ggx_refraction(Normal, roughness, eta));
+ if (distribution == "sharp")
+ BSDF = Color * (Fr * reflection(Normal) + (1.0 - Fr) * refraction(Normal, eta));
+ else if (distribution == "beckmann")
+ BSDF = Color * (Fr * microfacet_beckmann(Normal, roughness) +
+ (1.0 - Fr) * microfacet_beckmann_refraction(Normal, roughness, eta));
+ else if (distribution == "Multiscatter GGX")
+ BSDF = Color * microfacet_multi_ggx_glass(Normal, roughness, eta, Color);
+ else if (distribution == "GGX")
+ BSDF = Color * (Fr * microfacet_ggx(Normal, roughness) +
+ (1.0 - Fr) * microfacet_ggx_refraction(Normal, roughness, eta));
}
-
diff --git a/intern/cycles/kernel/shaders/node_glossy_bsdf.osl b/intern/cycles/kernel/shaders/node_glossy_bsdf.osl
index 7415211b56d..2d40ee8d3f6 100644
--- a/intern/cycles/kernel/shaders/node_glossy_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_glossy_bsdf.osl
@@ -17,25 +17,22 @@
#include "stdosl.h"
#include "node_fresnel.h"
-shader node_glossy_bsdf(
- color Color = 0.8,
- string distribution = "GGX",
- float Roughness = 0.2,
- normal Normal = N,
- output closure color BSDF = 0)
+shader node_glossy_bsdf(color Color = 0.8,
+ string distribution = "GGX",
+ float Roughness = 0.2,
+ normal Normal = N,
+ output closure color BSDF = 0)
{
- float roughness = Roughness * Roughness;
-
- if (distribution == "sharp")
- BSDF = Color * reflection(Normal);
- else if (distribution == "beckmann")
- BSDF = Color * microfacet_beckmann(Normal, roughness);
- else if (distribution == "GGX")
- BSDF = Color * microfacet_ggx(Normal, roughness);
- else if (distribution == "Multiscatter GGX")
- BSDF = Color * microfacet_multi_ggx(Normal, roughness, Color);
- else
- BSDF = Color * ashikhmin_shirley(Normal, vector(0, 0, 0), roughness, roughness);
+ float roughness = Roughness * Roughness;
+ if (distribution == "sharp")
+ BSDF = Color * reflection(Normal);
+ else if (distribution == "beckmann")
+ BSDF = Color * microfacet_beckmann(Normal, roughness);
+ else if (distribution == "GGX")
+ BSDF = Color * microfacet_ggx(Normal, roughness);
+ else if (distribution == "Multiscatter GGX")
+ BSDF = Color * microfacet_multi_ggx(Normal, roughness, Color);
+ else
+ BSDF = Color * ashikhmin_shirley(Normal, vector(0, 0, 0), roughness, roughness);
}
-
diff --git a/intern/cycles/kernel/shaders/node_gradient_texture.osl b/intern/cycles/kernel/shaders/node_gradient_texture.osl
index f458937a18f..52bf466673d 100644
--- a/intern/cycles/kernel/shaders/node_gradient_texture.osl
+++ b/intern/cycles/kernel/shaders/node_gradient_texture.osl
@@ -21,59 +21,58 @@
float gradient(point p, string type)
{
- float x, y, z;
-
- x = p[0];
- y = p[1];
- z = p[2];
+ float x, y, z;
- float result = 0.0;
+ x = p[0];
+ y = p[1];
+ z = p[2];
- if (type == "linear") {
- result = x;
- }
- else if (type == "quadratic") {
- float r = max(x, 0.0);
- result = r * r;
- }
- else if (type == "easing") {
- float r = min(max(x, 0.0), 1.0);
- float t = r * r;
-
- result = (3.0 * t - 2.0 * t * r);
- }
- else if (type == "diagonal") {
- result = (x + y) * 0.5;
- }
- else if (type == "radial") {
- result = atan2(y, x) / M_2PI + 0.5;
- }
- else {
- float r = max(1.0 - sqrt(x * x + y * y + z * z), 0.0);
+ float result = 0.0;
- if (type == "quadratic_sphere")
- result = r * r;
- else if (type == "spherical")
- result = r;
- }
+ if (type == "linear") {
+ result = x;
+ }
+ else if (type == "quadratic") {
+ float r = max(x, 0.0);
+ result = r * r;
+ }
+ else if (type == "easing") {
+ float r = min(max(x, 0.0), 1.0);
+ float t = r * r;
- return result;
+ result = (3.0 * t - 2.0 * t * r);
+ }
+ else if (type == "diagonal") {
+ result = (x + y) * 0.5;
+ }
+ else if (type == "radial") {
+ result = atan2(y, x) / M_2PI + 0.5;
+ }
+ else {
+ float r = max(1.0 - sqrt(x * x + y * y + z * z), 0.0);
+
+ if (type == "quadratic_sphere")
+ result = r * r;
+ else if (type == "spherical")
+ result = r;
+ }
+
+ return result;
}
shader node_gradient_texture(
- int use_mapping = 0,
- matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
- string type = "linear",
- point Vector = P,
- output float Fac = 0.0,
- output color Color = 0.0)
+ int use_mapping = 0,
+ matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+ string type = "linear",
+ point Vector = P,
+ output float Fac = 0.0,
+ output color Color = 0.0)
{
- point p = Vector;
+ point p = Vector;
- if (use_mapping)
- p = transform(mapping, p);
+ if (use_mapping)
+ p = transform(mapping, p);
- Fac = gradient(p, type);
- Color = color(Fac, Fac, Fac);
+ Fac = gradient(p, type);
+ Color = color(Fac, Fac, Fac);
}
-
diff --git a/intern/cycles/kernel/shaders/node_hair_bsdf.osl b/intern/cycles/kernel/shaders/node_hair_bsdf.osl
index ef8f2fae894..bc912087666 100644
--- a/intern/cycles/kernel/shaders/node_hair_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_hair_bsdf.osl
@@ -18,41 +18,40 @@
#include "stdosl.h"
-shader node_hair_bsdf(
- color Color = 0.8,
- string component = "reflection",
- float Offset = 0.0,
- float RoughnessU = 0.1,
- float RoughnessV = 1.0,
- normal Tangent = normal(0, 0, 0),
- output closure color BSDF = 0)
+shader node_hair_bsdf(color Color = 0.8,
+ string component = "reflection",
+ float Offset = 0.0,
+ float RoughnessU = 0.1,
+ float RoughnessV = 1.0,
+ normal Tangent = normal(0, 0, 0),
+ output closure color BSDF = 0)
{
- float roughnessh = clamp(RoughnessU, 0.001, 1.0);
- float roughnessv = clamp(RoughnessV, 0.001, 1.0);
- float offset = -Offset;
+ float roughnessh = clamp(RoughnessU, 0.001, 1.0);
+ float roughnessv = clamp(RoughnessV, 0.001, 1.0);
+ float offset = -Offset;
- normal T;
- float IsCurve = 0;
- getattribute("geom:is_curve", IsCurve);
+ normal T;
+ float IsCurve = 0;
+ getattribute("geom:is_curve", IsCurve);
- if (isconnected(Tangent)) {
- T = Tangent;
- }
- else if(!IsCurve) {
- T = normalize(dPdv);
- offset = 0.0;
- }
- else {
- T = normalize(dPdu);
- }
+ if (isconnected(Tangent)) {
+ T = Tangent;
+ }
+ else if (!IsCurve) {
+ T = normalize(dPdv);
+ offset = 0.0;
+ }
+ else {
+ T = normalize(dPdu);
+ }
- if (backfacing() && IsCurve) {
- BSDF = transparent();
- }
- else {
- if (component == "reflection")
- BSDF = Color * hair_reflection(Ng, roughnessh, roughnessv, T, offset);
- else
- BSDF = Color * hair_transmission(Ng, roughnessh, roughnessv, T, offset);
- }
+ if (backfacing() && IsCurve) {
+ BSDF = transparent();
+ }
+ else {
+ if (component == "reflection")
+ BSDF = Color * hair_reflection(Ng, roughnessh, roughnessv, T, offset);
+ else
+ BSDF = Color * hair_transmission(Ng, roughnessh, roughnessv, T, offset);
+ }
}
diff --git a/intern/cycles/kernel/shaders/node_hair_info.osl b/intern/cycles/kernel/shaders/node_hair_info.osl
index 19216f67579..991a27c4103 100644
--- a/intern/cycles/kernel/shaders/node_hair_info.osl
+++ b/intern/cycles/kernel/shaders/node_hair_info.osl
@@ -16,17 +16,15 @@
#include "stdosl.h"
-shader node_hair_info(
- output float IsStrand = 0.0,
- output float Intercept = 0.0,
- output float Thickness = 0.0,
- output normal TangentNormal = N,
- output float Random = 0)
+shader node_hair_info(output float IsStrand = 0.0,
+ output float Intercept = 0.0,
+ output float Thickness = 0.0,
+ output normal TangentNormal = N,
+ output float Random = 0)
{
- getattribute("geom:is_curve", IsStrand);
- getattribute("geom:curve_intercept", Intercept);
- getattribute("geom:curve_thickness", Thickness);
- getattribute("geom:curve_tangent_normal", TangentNormal);
- getattribute("geom:curve_random", Random);
+ getattribute("geom:is_curve", IsStrand);
+ getattribute("geom:curve_intercept", Intercept);
+ getattribute("geom:curve_thickness", Thickness);
+ getattribute("geom:curve_tangent_normal", TangentNormal);
+ getattribute("geom:curve_random", Random);
}
-
diff --git a/intern/cycles/kernel/shaders/node_holdout.osl b/intern/cycles/kernel/shaders/node_holdout.osl
index 78a9f46fd15..b51bc0543a5 100644
--- a/intern/cycles/kernel/shaders/node_holdout.osl
+++ b/intern/cycles/kernel/shaders/node_holdout.osl
@@ -16,9 +16,6 @@
#include "stdosl.h"
-shader node_holdout(
- output closure color Holdout = holdout())
+shader node_holdout(output closure color Holdout = holdout())
{
-
}
-
diff --git a/intern/cycles/kernel/shaders/node_hsv.osl b/intern/cycles/kernel/shaders/node_hsv.osl
index d72a87a951f..30c56a20a92 100644
--- a/intern/cycles/kernel/shaders/node_hsv.osl
+++ b/intern/cycles/kernel/shaders/node_hsv.osl
@@ -17,28 +17,26 @@
#include "stdosl.h"
#include "node_color.h"
-shader node_hsv(
- float Hue = 0.5,
- float Saturation = 1.0,
- float Value = 1.0,
- float Fac = 0.5,
- color ColorIn = 0.0,
- output color ColorOut = 0.0)
+shader node_hsv(float Hue = 0.5,
+ float Saturation = 1.0,
+ float Value = 1.0,
+ float Fac = 0.5,
+ color ColorIn = 0.0,
+ output color ColorOut = 0.0)
{
- color Color = rgb_to_hsv(ColorIn);
+ color Color = rgb_to_hsv(ColorIn);
- // remember: fmod doesn't work for negative numbers
- Color[0] = fmod(Color[0] + Hue + 0.5, 1.0);
- Color[1] = clamp(Color[1] * Saturation, 0.0, 1.0);
- Color[2] *= Value;
+ // remember: fmod doesn't work for negative numbers
+ Color[0] = fmod(Color[0] + Hue + 0.5, 1.0);
+ Color[1] = clamp(Color[1] * Saturation, 0.0, 1.0);
+ Color[2] *= Value;
- Color = hsv_to_rgb(Color);
+ Color = hsv_to_rgb(Color);
- // Clamp color to prevent negative values cauzed by oversaturation.
- Color[0] = max(Color[0], 0.0);
- Color[1] = max(Color[1], 0.0);
- Color[2] = max(Color[2], 0.0);
+ // Clamp color to prevent negative values cauzed by oversaturation.
+ Color[0] = max(Color[0], 0.0);
+ Color[1] = max(Color[1], 0.0);
+ Color[2] = max(Color[2], 0.0);
- ColorOut = mix(ColorIn, Color, Fac);
+ ColorOut = mix(ColorIn, Color, Fac);
}
-
diff --git a/intern/cycles/kernel/shaders/node_ies_light.osl b/intern/cycles/kernel/shaders/node_ies_light.osl
index a0954e3a444..ea8c44e09de 100644
--- a/intern/cycles/kernel/shaders/node_ies_light.osl
+++ b/intern/cycles/kernel/shaders/node_ies_light.osl
@@ -19,24 +19,23 @@
/* IES Light */
-shader node_ies_light(
- int use_mapping = 0,
- matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
- int slot = 0,
- float Strength = 1.0,
- point Vector = I,
- output float Fac = 0.0)
+shader node_ies_light(int use_mapping = 0,
+ matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+ int slot = 0,
+ float Strength = 1.0,
+ point Vector = I,
+ output float Fac = 0.0)
{
- point p = Vector;
+ point p = Vector;
- if (use_mapping) {
- p = transform(mapping, p);
- }
+ if (use_mapping) {
+ p = transform(mapping, p);
+ }
- p = normalize(p);
+ p = normalize(p);
- float v_angle = acos(-p[2]);
- float h_angle = atan2(p[0], p[1]) + M_PI;
+ float v_angle = acos(-p[2]);
+ float h_angle = atan2(p[0], p[1]) + M_PI;
- Fac = Strength * texture(format("@l%d", slot), h_angle, v_angle);
+ Fac = Strength * texture(format("@l%d", slot), h_angle, v_angle);
}
diff --git a/intern/cycles/kernel/shaders/node_image_texture.osl b/intern/cycles/kernel/shaders/node_image_texture.osl
index 7cd2922dd4f..df5eda39985 100644
--- a/intern/cycles/kernel/shaders/node_image_texture.osl
+++ b/intern/cycles/kernel/shaders/node_image_texture.osl
@@ -19,217 +19,217 @@
point texco_remap_square(point co)
{
- return (co - point(0.5, 0.5, 0.5)) * 2.0;
+ return (co - point(0.5, 0.5, 0.5)) * 2.0;
}
point map_to_tube(vector dir)
{
- float u, v;
- v = (dir[2] + 1.0) * 0.5;
- float len = sqrt(dir[0] * dir[0] + dir[1] * dir[1]);
- if (len > 0.0) {
- u = (1.0 - (atan2(dir[0] / len, dir[1] / len) / M_PI)) * 0.5;
- }
- else {
- v = u = 0.0; /* To avoid un-initialized variables. */
- }
- return point(u, v, 0.0);
+ float u, v;
+ v = (dir[2] + 1.0) * 0.5;
+ float len = sqrt(dir[0] * dir[0] + dir[1] * dir[1]);
+ if (len > 0.0) {
+ u = (1.0 - (atan2(dir[0] / len, dir[1] / len) / M_PI)) * 0.5;
+ }
+ else {
+ v = u = 0.0; /* To avoid un-initialized variables. */
+ }
+ return point(u, v, 0.0);
}
point map_to_sphere(vector dir)
{
- float len = length(dir);
- float v, u;
- if (len > 0.0) {
- if (dir[0] == 0.0 && dir[1] == 0.0) {
- u = 0.0; /* Othwise domain error. */
- }
- else {
- u = (1.0 - atan2(dir[0], dir[1]) / M_PI) / 2.0;
- }
- v = 1.0 - acos(dir[2] / len) / M_PI;
- }
- else {
- v = u = 0.0; /* To avoid un-initialized variables. */
- }
- return point(u, v, 0.0);
+ float len = length(dir);
+ float v, u;
+ if (len > 0.0) {
+ if (dir[0] == 0.0 && dir[1] == 0.0) {
+ u = 0.0; /* Othwise domain error. */
+ }
+ else {
+ u = (1.0 - atan2(dir[0], dir[1]) / M_PI) / 2.0;
+ }
+ v = 1.0 - acos(dir[2] / len) / M_PI;
+ }
+ else {
+ v = u = 0.0; /* To avoid un-initialized variables. */
+ }
+ return point(u, v, 0.0);
}
color image_texture_lookup(string filename,
string color_space,
- float u, float v,
+ float u,
+ float v,
output float Alpha,
int use_alpha,
int is_float,
string interpolation,
string extension)
{
- color rgb = (color)texture(filename, u, 1.0 - v, "wrap", extension, "interp", interpolation, "alpha", Alpha);
+ color rgb = (color)texture(
+ filename, u, 1.0 - v, "wrap", extension, "interp", interpolation, "alpha", Alpha);
- if (use_alpha) {
- rgb = color_unpremultiply(rgb, Alpha);
-
- if (!is_float)
- rgb = min(rgb, 1.0);
- }
+ if (use_alpha) {
+ rgb = color_unpremultiply(rgb, Alpha);
- if (color_space == "sRGB") {
- rgb = color_srgb_to_scene_linear(rgb);
- }
+ if (!is_float)
+ rgb = min(rgb, 1.0);
+ }
- return rgb;
+ if (color_space == "sRGB") {
+ rgb = color_srgb_to_scene_linear(rgb);
+ }
+
+ return rgb;
}
-shader node_image_texture(
- int use_mapping = 0,
- matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
- point Vector = P,
- string filename = "",
- string color_space = "sRGB",
- string projection = "flat",
- string interpolation = "smartcubic",
- string extension = "periodic",
- float projection_blend = 0.0,
- int is_float = 1,
- int use_alpha = 1,
- output color Color = 0.0,
- output float Alpha = 1.0)
+shader node_image_texture(int use_mapping = 0,
+ matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+ point Vector = P,
+ string filename = "",
+ string color_space = "sRGB",
+ string projection = "flat",
+ string interpolation = "smartcubic",
+ string extension = "periodic",
+ float projection_blend = 0.0,
+ int is_float = 1,
+ int use_alpha = 1,
+ output color Color = 0.0,
+ output float Alpha = 1.0)
{
- point p = Vector;
-
- if (use_mapping)
- p = transform(mapping, p);
-
- if (projection == "flat") {
- Color = image_texture_lookup(filename,
- color_space,
- p[0], p[1],
- Alpha,
- use_alpha,
- is_float,
- interpolation,
- extension);
- }
- else if (projection == "box") {
- /* object space normal */
- vector Nob = transform("world", "object", N);
-
- /* project from direction vector to barycentric coordinates in triangles */
- Nob = vector(fabs(Nob[0]), fabs(Nob[1]), fabs(Nob[2]));
- Nob /= (Nob[0] + Nob[1] + Nob[2]);
-
- /* basic idea is to think of this as a triangle, each corner representing
- * one of the 3 faces of the cube. in the corners we have single textures,
- * in between we blend between two textures, and in the middle we a blend
- * between three textures.
- *
- * the Nxyz values are the barycentric coordinates in an equilateral
- * triangle, which in case of blending, in the middle has a smaller
- * equilateral triangle where 3 textures blend. this divides things into
- * 7 zones, with an if () test for each zone */
-
- vector weight = vector(0.0, 0.0, 0.0);
- float blend = projection_blend;
- float limit = 0.5 * (1.0 + blend);
-
- /* first test for corners with single texture */
- if (Nob[0] > limit * (Nob[0] + Nob[1]) && Nob[0] > limit * (Nob[0] + Nob[2])) {
- weight[0] = 1.0;
- }
- else if (Nob[1] > limit * (Nob[0] + Nob[1]) && Nob[1] > limit * (Nob[1] + Nob[2])) {
- weight[1] = 1.0;
- }
- else if (Nob[2] > limit * (Nob[0] + Nob[2]) && Nob[2] > limit * (Nob[1] + Nob[2])) {
- weight[2] = 1.0;
- }
- else if (blend > 0.0) {
- /* in case of blending, test for mixes between two textures */
- if (Nob[2] < (1.0 - limit) * (Nob[1] + Nob[0])) {
- weight[0] = Nob[0] / (Nob[0] + Nob[1]);
- weight[0] = clamp((weight[0] - 0.5 * (1.0 - blend)) / blend, 0.0, 1.0);
- weight[1] = 1.0 - weight[0];
- }
- else if (Nob[0] < (1.0 - limit) * (Nob[1] + Nob[2])) {
- weight[1] = Nob[1] / (Nob[1] + Nob[2]);
- weight[1] = clamp((weight[1] - 0.5 * (1.0 - blend)) / blend, 0.0, 1.0);
- weight[2] = 1.0 - weight[1];
- }
- else if (Nob[1] < (1.0 - limit) * (Nob[0] + Nob[2])) {
- weight[0] = Nob[0] / (Nob[0] + Nob[2]);
- weight[0] = clamp((weight[0] - 0.5 * (1.0 - blend)) / blend, 0.0, 1.0);
- weight[2] = 1.0 - weight[0];
- }
- else {
- /* last case, we have a mix between three */
- weight[0] = ((2.0 - limit) * Nob[0] + (limit - 1.0)) / (2.0 * limit - 1.0);
- weight[1] = ((2.0 - limit) * Nob[1] + (limit - 1.0)) / (2.0 * limit - 1.0);
- weight[2] = ((2.0 - limit) * Nob[2] + (limit - 1.0)) / (2.0 * limit - 1.0);
- }
- }
- else {
- /* Desperate mode, no valid choice anyway, fallback to one side.*/
- weight[0] = 1.0;
- }
-
- Color = color(0.0, 0.0, 0.0);
- Alpha = 0.0;
-
- float tmp_alpha;
-
- if (weight[0] > 0.0) {
- Color += weight[0] * image_texture_lookup(filename,
- color_space,
- p[1], p[2],
- tmp_alpha,
- use_alpha,
- is_float,
- interpolation,
- extension);
- Alpha += weight[0] * tmp_alpha;
- }
- if (weight[1] > 0.0) {
- Color += weight[1] * image_texture_lookup(filename,
- color_space,
- p[0], p[2],
- tmp_alpha,
- use_alpha,
- is_float,
- interpolation,
- extension);
- Alpha += weight[1] * tmp_alpha;
- }
- if (weight[2] > 0.0) {
- Color += weight[2] * image_texture_lookup(filename,
- color_space,
- p[1], p[0],
- tmp_alpha,
- use_alpha,
- is_float,
- interpolation,
- extension);
- Alpha += weight[2] * tmp_alpha;
- }
- }
- else if (projection == "sphere") {
- point projected = map_to_sphere(texco_remap_square(p));
- Color = image_texture_lookup(filename,
- color_space,
- projected[0], projected[1],
- Alpha,
- use_alpha,
- is_float,
- interpolation,
- extension);
- }
- else if (projection == "tube") {
- point projected = map_to_tube(texco_remap_square(p));
- Color = image_texture_lookup(filename,
- color_space,
- projected[0], projected[1],
- Alpha,
- use_alpha,
- is_float,
- interpolation,
- extension);
- }
+ point p = Vector;
+
+ if (use_mapping)
+ p = transform(mapping, p);
+
+ if (projection == "flat") {
+ Color = image_texture_lookup(
+ filename, color_space, p[0], p[1], Alpha, use_alpha, is_float, interpolation, extension);
+ }
+ else if (projection == "box") {
+ /* object space normal */
+ vector Nob = transform("world", "object", N);
+
+ /* project from direction vector to barycentric coordinates in triangles */
+ Nob = vector(fabs(Nob[0]), fabs(Nob[1]), fabs(Nob[2]));
+ Nob /= (Nob[0] + Nob[1] + Nob[2]);
+
+ /* basic idea is to think of this as a triangle, each corner representing
+ * one of the 3 faces of the cube. in the corners we have single textures,
+ * in between we blend between two textures, and in the middle we a blend
+ * between three textures.
+ *
+ * the Nxyz values are the barycentric coordinates in an equilateral
+ * triangle, which in case of blending, in the middle has a smaller
+ * equilateral triangle where 3 textures blend. this divides things into
+ * 7 zones, with an if () test for each zone */
+
+ vector weight = vector(0.0, 0.0, 0.0);
+ float blend = projection_blend;
+ float limit = 0.5 * (1.0 + blend);
+
+ /* first test for corners with single texture */
+ if (Nob[0] > limit * (Nob[0] + Nob[1]) && Nob[0] > limit * (Nob[0] + Nob[2])) {
+ weight[0] = 1.0;
+ }
+ else if (Nob[1] > limit * (Nob[0] + Nob[1]) && Nob[1] > limit * (Nob[1] + Nob[2])) {
+ weight[1] = 1.0;
+ }
+ else if (Nob[2] > limit * (Nob[0] + Nob[2]) && Nob[2] > limit * (Nob[1] + Nob[2])) {
+ weight[2] = 1.0;
+ }
+ else if (blend > 0.0) {
+ /* in case of blending, test for mixes between two textures */
+ if (Nob[2] < (1.0 - limit) * (Nob[1] + Nob[0])) {
+ weight[0] = Nob[0] / (Nob[0] + Nob[1]);
+ weight[0] = clamp((weight[0] - 0.5 * (1.0 - blend)) / blend, 0.0, 1.0);
+ weight[1] = 1.0 - weight[0];
+ }
+ else if (Nob[0] < (1.0 - limit) * (Nob[1] + Nob[2])) {
+ weight[1] = Nob[1] / (Nob[1] + Nob[2]);
+ weight[1] = clamp((weight[1] - 0.5 * (1.0 - blend)) / blend, 0.0, 1.0);
+ weight[2] = 1.0 - weight[1];
+ }
+ else if (Nob[1] < (1.0 - limit) * (Nob[0] + Nob[2])) {
+ weight[0] = Nob[0] / (Nob[0] + Nob[2]);
+ weight[0] = clamp((weight[0] - 0.5 * (1.0 - blend)) / blend, 0.0, 1.0);
+ weight[2] = 1.0 - weight[0];
+ }
+ else {
+ /* last case, we have a mix between three */
+ weight[0] = ((2.0 - limit) * Nob[0] + (limit - 1.0)) / (2.0 * limit - 1.0);
+ weight[1] = ((2.0 - limit) * Nob[1] + (limit - 1.0)) / (2.0 * limit - 1.0);
+ weight[2] = ((2.0 - limit) * Nob[2] + (limit - 1.0)) / (2.0 * limit - 1.0);
+ }
+ }
+ else {
+ /* Desperate mode, no valid choice anyway, fallback to one side.*/
+ weight[0] = 1.0;
+ }
+
+ Color = color(0.0, 0.0, 0.0);
+ Alpha = 0.0;
+
+ float tmp_alpha;
+
+ if (weight[0] > 0.0) {
+ Color += weight[0] * image_texture_lookup(filename,
+ color_space,
+ p[1],
+ p[2],
+ tmp_alpha,
+ use_alpha,
+ is_float,
+ interpolation,
+ extension);
+ Alpha += weight[0] * tmp_alpha;
+ }
+ if (weight[1] > 0.0) {
+ Color += weight[1] * image_texture_lookup(filename,
+ color_space,
+ p[0],
+ p[2],
+ tmp_alpha,
+ use_alpha,
+ is_float,
+ interpolation,
+ extension);
+ Alpha += weight[1] * tmp_alpha;
+ }
+ if (weight[2] > 0.0) {
+ Color += weight[2] * image_texture_lookup(filename,
+ color_space,
+ p[1],
+ p[0],
+ tmp_alpha,
+ use_alpha,
+ is_float,
+ interpolation,
+ extension);
+ Alpha += weight[2] * tmp_alpha;
+ }
+ }
+ else if (projection == "sphere") {
+ point projected = map_to_sphere(texco_remap_square(p));
+ Color = image_texture_lookup(filename,
+ color_space,
+ projected[0],
+ projected[1],
+ Alpha,
+ use_alpha,
+ is_float,
+ interpolation,
+ extension);
+ }
+ else if (projection == "tube") {
+ point projected = map_to_tube(texco_remap_square(p));
+ Color = image_texture_lookup(filename,
+ color_space,
+ projected[0],
+ projected[1],
+ Alpha,
+ use_alpha,
+ is_float,
+ interpolation,
+ extension);
+ }
}
diff --git a/intern/cycles/kernel/shaders/node_invert.osl b/intern/cycles/kernel/shaders/node_invert.osl
index b33b0a43d63..c7d41e4e129 100644
--- a/intern/cycles/kernel/shaders/node_invert.osl
+++ b/intern/cycles/kernel/shaders/node_invert.osl
@@ -16,12 +16,8 @@
#include "stdosl.h"
-shader node_invert(
- float Fac = 1.0,
- color ColorIn = 0.8,
- output color ColorOut = 0.8)
+shader node_invert(float Fac = 1.0, color ColorIn = 0.8, output color ColorOut = 0.8)
{
- color ColorInv = color(1.0) - ColorIn;
- ColorOut = mix(ColorIn, ColorInv, Fac);
+ color ColorInv = color(1.0) - ColorIn;
+ ColorOut = mix(ColorIn, ColorInv, Fac);
}
-
diff --git a/intern/cycles/kernel/shaders/node_layer_weight.osl b/intern/cycles/kernel/shaders/node_layer_weight.osl
index f583df25773..7c46f28b41b 100644
--- a/intern/cycles/kernel/shaders/node_layer_weight.osl
+++ b/intern/cycles/kernel/shaders/node_layer_weight.osl
@@ -17,29 +17,28 @@
#include "stdosl.h"
#include "node_fresnel.h"
-shader node_layer_weight(
- float Blend = 0.5,
- normal Normal = N,
- output float Fresnel = 0.0,
- output float Facing = 0.0)
+shader node_layer_weight(float Blend = 0.5,
+ normal Normal = N,
+ output float Fresnel = 0.0,
+ output float Facing = 0.0)
{
- float blend = Blend;
- float cosi = dot(I, Normal);
+ float blend = Blend;
+ float cosi = dot(I, Normal);
- /* Fresnel */
- float eta = max(1.0 - Blend, 1e-5);
- eta = backfacing() ? eta : 1.0 / eta;
- Fresnel = fresnel_dielectric_cos(cosi, eta);
+ /* Fresnel */
+ float eta = max(1.0 - Blend, 1e-5);
+ eta = backfacing() ? eta : 1.0 / eta;
+ Fresnel = fresnel_dielectric_cos(cosi, eta);
- /* Facing */
- Facing = fabs(cosi);
+ /* Facing */
+ Facing = fabs(cosi);
- if (blend != 0.5) {
- blend = clamp(blend, 0.0, 1.0 - 1e-5);
- blend = (blend < 0.5) ? 2.0 * blend : 0.5 / (1.0 - blend);
+ if (blend != 0.5) {
+ blend = clamp(blend, 0.0, 1.0 - 1e-5);
+ blend = (blend < 0.5) ? 2.0 * blend : 0.5 / (1.0 - blend);
- Facing = pow(Facing, blend);
- }
+ Facing = pow(Facing, blend);
+ }
- Facing = 1.0 - Facing;
+ Facing = 1.0 - Facing;
}
diff --git a/intern/cycles/kernel/shaders/node_light_falloff.osl b/intern/cycles/kernel/shaders/node_light_falloff.osl
index a594e33d643..d0d7dd9c5aa 100644
--- a/intern/cycles/kernel/shaders/node_light_falloff.osl
+++ b/intern/cycles/kernel/shaders/node_light_falloff.osl
@@ -16,29 +16,27 @@
#include "stdosl.h"
-shader node_light_falloff(
- float Strength = 0.0,
- float Smooth = 0.0,
- output float Quadratic = 0.0,
- output float Linear = 0.0,
- output float Constant = 0.0)
+shader node_light_falloff(float Strength = 0.0,
+ float Smooth = 0.0,
+ output float Quadratic = 0.0,
+ output float Linear = 0.0,
+ output float Constant = 0.0)
{
- float ray_length = 0.0;
- float strength = Strength;
- getattribute("path:ray_length", ray_length);
+ float ray_length = 0.0;
+ float strength = Strength;
+ getattribute("path:ray_length", ray_length);
- if (Smooth > 0.0) {
- float squared = ray_length * ray_length;
- strength *= squared / (Smooth + squared);
- }
+ if (Smooth > 0.0) {
+ float squared = ray_length * ray_length;
+ strength *= squared / (Smooth + squared);
+ }
- /* Quadratic */
- Quadratic = strength;
-
- /* Linear */
- Linear = (strength * ray_length);
+ /* Quadratic */
+ Quadratic = strength;
- /* Constant */
- Constant = (strength * ray_length * ray_length);
-}
+ /* Linear */
+ Linear = (strength * ray_length);
+ /* Constant */
+ Constant = (strength * ray_length * ray_length);
+}
diff --git a/intern/cycles/kernel/shaders/node_light_path.osl b/intern/cycles/kernel/shaders/node_light_path.osl
index 64fe4c20132..c4a3624a67f 100644
--- a/intern/cycles/kernel/shaders/node_light_path.osl
+++ b/intern/cycles/kernel/shaders/node_light_path.osl
@@ -16,51 +16,49 @@
#include "stdosl.h"
-shader node_light_path(
- output float IsCameraRay = 0.0,
- output float IsShadowRay = 0.0,
- output float IsDiffuseRay = 0.0,
- output float IsGlossyRay = 0.0,
- output float IsSingularRay = 0.0,
- output float IsReflectionRay = 0.0,
- output float IsTransmissionRay = 0.0,
- output float IsVolumeScatterRay = 0.0,
- output float RayLength = 0.0,
- output float RayDepth = 0.0,
- output float DiffuseDepth = 0.0,
- output float GlossyDepth = 0.0,
- output float TransparentDepth = 0.0,
- output float TransmissionDepth = 0.0)
+shader node_light_path(output float IsCameraRay = 0.0,
+ output float IsShadowRay = 0.0,
+ output float IsDiffuseRay = 0.0,
+ output float IsGlossyRay = 0.0,
+ output float IsSingularRay = 0.0,
+ output float IsReflectionRay = 0.0,
+ output float IsTransmissionRay = 0.0,
+ output float IsVolumeScatterRay = 0.0,
+ output float RayLength = 0.0,
+ output float RayDepth = 0.0,
+ output float DiffuseDepth = 0.0,
+ output float GlossyDepth = 0.0,
+ output float TransparentDepth = 0.0,
+ output float TransmissionDepth = 0.0)
{
- IsCameraRay = raytype("camera");
- IsShadowRay = raytype("shadow");
- IsDiffuseRay = raytype("diffuse");
- IsGlossyRay = raytype("glossy");
- IsSingularRay = raytype("singular");
- IsReflectionRay = raytype("reflection");
- IsTransmissionRay = raytype("refraction");
- IsVolumeScatterRay = raytype("volume_scatter");
+ IsCameraRay = raytype("camera");
+ IsShadowRay = raytype("shadow");
+ IsDiffuseRay = raytype("diffuse");
+ IsGlossyRay = raytype("glossy");
+ IsSingularRay = raytype("singular");
+ IsReflectionRay = raytype("reflection");
+ IsTransmissionRay = raytype("refraction");
+ IsVolumeScatterRay = raytype("volume_scatter");
- getattribute("path:ray_length", RayLength);
+ getattribute("path:ray_length", RayLength);
- int ray_depth;
- getattribute("path:ray_depth", ray_depth);
- RayDepth = (float)ray_depth;
+ int ray_depth;
+ getattribute("path:ray_depth", ray_depth);
+ RayDepth = (float)ray_depth;
- int diffuse_depth;
- getattribute("path:diffuse_depth", diffuse_depth);
- DiffuseDepth = (float)diffuse_depth;
+ int diffuse_depth;
+ getattribute("path:diffuse_depth", diffuse_depth);
+ DiffuseDepth = (float)diffuse_depth;
- int glossy_depth;
- getattribute("path:glossy_depth", glossy_depth);
- GlossyDepth = (float)glossy_depth;
+ int glossy_depth;
+ getattribute("path:glossy_depth", glossy_depth);
+ GlossyDepth = (float)glossy_depth;
- int transparent_depth;
- getattribute("path:transparent_depth", transparent_depth);
- TransparentDepth = (float)transparent_depth;
+ int transparent_depth;
+ getattribute("path:transparent_depth", transparent_depth);
+ TransparentDepth = (float)transparent_depth;
- int transmission_depth;
- getattribute("path:transmission_depth", transmission_depth);
- TransmissionDepth = (float)transmission_depth;
+ int transmission_depth;
+ getattribute("path:transmission_depth", transmission_depth);
+ TransmissionDepth = (float)transmission_depth;
}
-
diff --git a/intern/cycles/kernel/shaders/node_magic_texture.osl b/intern/cycles/kernel/shaders/node_magic_texture.osl
index 8d6af391e04..aa700e575ef 100644
--- a/intern/cycles/kernel/shaders/node_magic_texture.osl
+++ b/intern/cycles/kernel/shaders/node_magic_texture.osl
@@ -21,91 +21,89 @@
color magic(point p, int n, float distortion)
{
- float dist = distortion;
-
- float x = sin(( p[0] + p[1] + p[2]) * 5.0);
- float y = cos((-p[0] + p[1] - p[2]) * 5.0);
- float z = -cos((-p[0] - p[1] + p[2]) * 5.0);
-
- if (n > 0) {
- x *= dist;
- y *= dist;
- z *= dist;
- y = -cos(x - y + z);
- y *= dist;
-
- if (n > 1) {
- x = cos(x - y - z);
- x *= dist;
-
- if (n > 2) {
- z = sin(-x - y - z);
- z *= dist;
-
- if (n > 3) {
- x = -cos(-x + y - z);
- x *= dist;
-
- if (n > 4) {
- y = -sin(-x + y + z);
- y *= dist;
-
- if (n > 5) {
- y = -cos(-x + y + z);
- y *= dist;
-
- if (n > 6) {
- x = cos(x + y + z);
- x *= dist;
-
- if (n > 7) {
- z = sin(x + y - z);
- z *= dist;
-
- if (n > 8) {
- x = -cos(-x - y + z);
- x *= dist;
-
- if (n > 9) {
- y = -sin(x - y + z);
- y *= dist;
- }
- }
- }
- }
- }
- }
- }
- }
- }
- }
-
- if (dist != 0.0) {
- dist *= 2.0;
- x /= dist;
- y /= dist;
- z /= dist;
- }
-
- return color(0.5 - x, 0.5 - y, 0.5 - z);
+ float dist = distortion;
+
+ float x = sin((p[0] + p[1] + p[2]) * 5.0);
+ float y = cos((-p[0] + p[1] - p[2]) * 5.0);
+ float z = -cos((-p[0] - p[1] + p[2]) * 5.0);
+
+ if (n > 0) {
+ x *= dist;
+ y *= dist;
+ z *= dist;
+ y = -cos(x - y + z);
+ y *= dist;
+
+ if (n > 1) {
+ x = cos(x - y - z);
+ x *= dist;
+
+ if (n > 2) {
+ z = sin(-x - y - z);
+ z *= dist;
+
+ if (n > 3) {
+ x = -cos(-x + y - z);
+ x *= dist;
+
+ if (n > 4) {
+ y = -sin(-x + y + z);
+ y *= dist;
+
+ if (n > 5) {
+ y = -cos(-x + y + z);
+ y *= dist;
+
+ if (n > 6) {
+ x = cos(x + y + z);
+ x *= dist;
+
+ if (n > 7) {
+ z = sin(x + y - z);
+ z *= dist;
+
+ if (n > 8) {
+ x = -cos(-x - y + z);
+ x *= dist;
+
+ if (n > 9) {
+ y = -sin(x - y + z);
+ y *= dist;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (dist != 0.0) {
+ dist *= 2.0;
+ x /= dist;
+ y /= dist;
+ z /= dist;
+ }
+
+ return color(0.5 - x, 0.5 - y, 0.5 - z);
}
-shader node_magic_texture(
- int use_mapping = 0,
- matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
- int depth = 2,
- float Distortion = 5.0,
- float Scale = 5.0,
- point Vector = P,
- output float Fac = 0.0,
- output color Color = 0.0)
+shader node_magic_texture(int use_mapping = 0,
+ matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+ int depth = 2,
+ float Distortion = 5.0,
+ float Scale = 5.0,
+ point Vector = P,
+ output float Fac = 0.0,
+ output color Color = 0.0)
{
- point p = Vector;
+ point p = Vector;
- if (use_mapping)
- p = transform(mapping, p);
+ if (use_mapping)
+ p = transform(mapping, p);
- Color = magic(p * Scale, depth, Distortion);
- Fac = (Color[0] + Color[1] + Color[2]) * (1.0 / 3.0);
+ Color = magic(p * Scale, depth, Distortion);
+ Fac = (Color[0] + Color[1] + Color[2]) * (1.0 / 3.0);
}
-
diff --git a/intern/cycles/kernel/shaders/node_mapping.osl b/intern/cycles/kernel/shaders/node_mapping.osl
index 69106957ee4..f5cc2d1c5dd 100644
--- a/intern/cycles/kernel/shaders/node_mapping.osl
+++ b/intern/cycles/kernel/shaders/node_mapping.osl
@@ -16,18 +16,17 @@
#include "stdosl.h"
-shader node_mapping(
- matrix Matrix = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
- point mapping_min = point(0.0, 0.0, 0.0),
- point mapping_max = point(0.0, 0.0, 0.0),
- int use_minmax = 0,
- point VectorIn = point(0.0, 0.0, 0.0),
- output point VectorOut = point(0.0, 0.0, 0.0))
+shader node_mapping(matrix Matrix = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+ point mapping_min = point(0.0, 0.0, 0.0),
+ point mapping_max = point(0.0, 0.0, 0.0),
+ int use_minmax = 0,
+ point VectorIn = point(0.0, 0.0, 0.0),
+ output point VectorOut = point(0.0, 0.0, 0.0))
{
- point p = transform(Matrix, VectorIn);
+ point p = transform(Matrix, VectorIn);
- if (use_minmax)
- p = min(max(mapping_min, p), mapping_max);
-
- VectorOut = p;
+ if (use_minmax)
+ p = min(max(mapping_min, p), mapping_max);
+
+ VectorOut = p;
}
diff --git a/intern/cycles/kernel/shaders/node_math.osl b/intern/cycles/kernel/shaders/node_math.osl
index aa9f6e671c3..8830339e05f 100644
--- a/intern/cycles/kernel/shaders/node_math.osl
+++ b/intern/cycles/kernel/shaders/node_math.osl
@@ -18,107 +18,105 @@
float safe_divide(float a, float b)
{
- float result;
+ float result;
- if (b == 0.0)
- result = 0.0;
- else
- result = a / b;
-
- return result;
+ if (b == 0.0)
+ result = 0.0;
+ else
+ result = a / b;
+
+ return result;
}
float safe_modulo(float a, float b)
{
- float result;
+ float result;
+
+ if (b == 0.0)
+ result = 0.0;
+ else
+ result = fmod(a, b);
- if (b == 0.0)
- result = 0.0;
- else
- result = fmod(a, b);
-
- return result;
+ return result;
}
float safe_sqrt(float a)
{
- float result;
+ float result;
- if (a > 0.0)
- result = sqrt(a);
- else
- result = 0.0;
+ if (a > 0.0)
+ result = sqrt(a);
+ else
+ result = 0.0;
- return result;
+ return result;
}
float safe_log(float a, float b)
{
- if (a < 0.0 || b < 0.0)
- return 0.0;
-
- return log(a) / log(b);
+ if (a < 0.0 || b < 0.0)
+ return 0.0;
+
+ return log(a) / log(b);
}
-shader node_math(
- string type = "add",
- int use_clamp = 0,
- float Value1 = 0.0,
- float Value2 = 0.0,
- output float Value = 0.0)
+shader node_math(string type = "add",
+ int use_clamp = 0,
+ float Value1 = 0.0,
+ float Value2 = 0.0,
+ output float Value = 0.0)
{
- /* OSL asin, acos, pow check for values that could give rise to nan */
+ /* OSL asin, acos, pow check for values that could give rise to nan */
- if (type == "add")
- Value = Value1 + Value2;
- else if (type == "subtract")
- Value = Value1 - Value2;
- else if (type == "multiply")
- Value = Value1 * Value2;
- else if (type == "divide")
- Value = safe_divide(Value1, Value2);
- else if (type == "sine")
- Value = sin(Value1);
- else if (type == "cosine")
- Value = cos(Value1);
- else if (type == "tangent")
- Value = tan(Value1);
- else if (type == "arcsine")
- Value = asin(Value1);
- else if (type == "arccosine")
- Value = acos(Value1);
- else if (type == "arctangent")
- Value = atan(Value1);
- else if (type == "power")
- Value = pow(Value1, Value2);
- else if (type == "logarithm")
- Value = safe_log(Value1, Value2);
- else if (type == "minimum")
- Value = min(Value1, Value2);
- else if (type == "maximum")
- Value = max(Value1, Value2);
- else if (type == "round")
- Value = floor(Value1 + 0.5);
- else if (type == "less_than")
- Value = Value1 < Value2;
- else if (type == "greater_than")
- Value = Value1 > Value2;
- else if (type == "modulo")
- Value = safe_modulo(Value1, Value2);
- else if (type == "absolute")
- Value = fabs(Value1);
- else if (type == "arctan2")
- Value = atan2(Value1, Value2);
- else if (type == "floor")
- Value = floor(Value1);
- else if (type == "ceil")
- Value = ceil(Value1);
- else if (type == "fract")
- Value = Value1 - floor(Value1);
- else if (type == "sqrt")
- Value = safe_sqrt(Value1);
+ if (type == "add")
+ Value = Value1 + Value2;
+ else if (type == "subtract")
+ Value = Value1 - Value2;
+ else if (type == "multiply")
+ Value = Value1 * Value2;
+ else if (type == "divide")
+ Value = safe_divide(Value1, Value2);
+ else if (type == "sine")
+ Value = sin(Value1);
+ else if (type == "cosine")
+ Value = cos(Value1);
+ else if (type == "tangent")
+ Value = tan(Value1);
+ else if (type == "arcsine")
+ Value = asin(Value1);
+ else if (type == "arccosine")
+ Value = acos(Value1);
+ else if (type == "arctangent")
+ Value = atan(Value1);
+ else if (type == "power")
+ Value = pow(Value1, Value2);
+ else if (type == "logarithm")
+ Value = safe_log(Value1, Value2);
+ else if (type == "minimum")
+ Value = min(Value1, Value2);
+ else if (type == "maximum")
+ Value = max(Value1, Value2);
+ else if (type == "round")
+ Value = floor(Value1 + 0.5);
+ else if (type == "less_than")
+ Value = Value1 < Value2;
+ else if (type == "greater_than")
+ Value = Value1 > Value2;
+ else if (type == "modulo")
+ Value = safe_modulo(Value1, Value2);
+ else if (type == "absolute")
+ Value = fabs(Value1);
+ else if (type == "arctan2")
+ Value = atan2(Value1, Value2);
+ else if (type == "floor")
+ Value = floor(Value1);
+ else if (type == "ceil")
+ Value = ceil(Value1);
+ else if (type == "fract")
+ Value = Value1 - floor(Value1);
+ else if (type == "sqrt")
+ Value = safe_sqrt(Value1);
- if (use_clamp)
- Value = clamp(Value, 0.0, 1.0);
+ if (use_clamp)
+ Value = clamp(Value, 0.0, 1.0);
}
-
diff --git a/intern/cycles/kernel/shaders/node_mix.osl b/intern/cycles/kernel/shaders/node_mix.osl
index 0862c34b6e1..8caea6803ed 100644
--- a/intern/cycles/kernel/shaders/node_mix.osl
+++ b/intern/cycles/kernel/shaders/node_mix.osl
@@ -19,311 +19,312 @@
color node_mix_blend(float t, color col1, color col2)
{
- return mix(col1, col2, t);
+ return mix(col1, col2, t);
}
color node_mix_add(float t, color col1, color col2)
{
- return mix(col1, col1 + col2, t);
+ return mix(col1, col1 + col2, t);
}
color node_mix_mul(float t, color col1, color col2)
{
- return mix(col1, col1 * col2, t);
+ return mix(col1, col1 * col2, t);
}
color node_mix_screen(float t, color col1, color col2)
{
- float tm = 1.0 - t;
+ float tm = 1.0 - t;
- return color(1.0) - (color(tm) + t * (color(1.0) - col2)) * (color(1.0) - col1);
+ return color(1.0) - (color(tm) + t * (color(1.0) - col2)) * (color(1.0) - col1);
}
color node_mix_overlay(float t, color col1, color col2)
{
- float tm = 1.0 - t;
-
- color outcol = col1;
-
- if (outcol[0] < 0.5)
- outcol[0] *= tm + 2.0 * t * col2[0];
- else
- outcol[0] = 1.0 - (tm + 2.0 * t * (1.0 - col2[0])) * (1.0 - outcol[0]);
-
- if (outcol[1] < 0.5)
- outcol[1] *= tm + 2.0 * t * col2[1];
- else
- outcol[1] = 1.0 - (tm + 2.0 * t * (1.0 - col2[1])) * (1.0 - outcol[1]);
-
- if (outcol[2] < 0.5)
- outcol[2] *= tm + 2.0 * t * col2[2];
- else
- outcol[2] = 1.0 - (tm + 2.0 * t * (1.0 - col2[2])) * (1.0 - outcol[2]);
-
- return outcol;
+ float tm = 1.0 - t;
+
+ color outcol = col1;
+
+ if (outcol[0] < 0.5)
+ outcol[0] *= tm + 2.0 * t * col2[0];
+ else
+ outcol[0] = 1.0 - (tm + 2.0 * t * (1.0 - col2[0])) * (1.0 - outcol[0]);
+
+ if (outcol[1] < 0.5)
+ outcol[1] *= tm + 2.0 * t * col2[1];
+ else
+ outcol[1] = 1.0 - (tm + 2.0 * t * (1.0 - col2[1])) * (1.0 - outcol[1]);
+
+ if (outcol[2] < 0.5)
+ outcol[2] *= tm + 2.0 * t * col2[2];
+ else
+ outcol[2] = 1.0 - (tm + 2.0 * t * (1.0 - col2[2])) * (1.0 - outcol[2]);
+
+ return outcol;
}
color node_mix_sub(float t, color col1, color col2)
{
- return mix(col1, col1 - col2, t);
+ return mix(col1, col1 - col2, t);
}
color node_mix_div(float t, color col1, color col2)
{
- float tm = 1.0 - t;
+ float tm = 1.0 - t;
- color outcol = col1;
+ color outcol = col1;
- if (col2[0] != 0.0) outcol[0] = tm * outcol[0] + t * outcol[0] / col2[0];
- if (col2[1] != 0.0) outcol[1] = tm * outcol[1] + t * outcol[1] / col2[1];
- if (col2[2] != 0.0) outcol[2] = tm * outcol[2] + t * outcol[2] / col2[2];
+ if (col2[0] != 0.0)
+ outcol[0] = tm * outcol[0] + t * outcol[0] / col2[0];
+ if (col2[1] != 0.0)
+ outcol[1] = tm * outcol[1] + t * outcol[1] / col2[1];
+ if (col2[2] != 0.0)
+ outcol[2] = tm * outcol[2] + t * outcol[2] / col2[2];
- return outcol;
+ return outcol;
}
color node_mix_diff(float t, color col1, color col2)
{
- return mix(col1, abs(col1 - col2), t);
+ return mix(col1, abs(col1 - col2), t);
}
color node_mix_dark(float t, color col1, color col2)
{
- return min(col1, col2) * t + col1 * (1.0 - t);
+ return min(col1, col2) * t + col1 * (1.0 - t);
}
color node_mix_light(float t, color col1, color col2)
{
- return max(col1, col2 * t);
+ return max(col1, col2 * t);
}
color node_mix_dodge(float t, color col1, color col2)
{
- color outcol = col1;
-
- if (outcol[0] != 0.0) {
- float tmp = 1.0 - t * col2[0];
- if (tmp <= 0.0)
- outcol[0] = 1.0;
- else if ((tmp = outcol[0] / tmp) > 1.0)
- outcol[0] = 1.0;
- else
- outcol[0] = tmp;
- }
- if (outcol[1] != 0.0) {
- float tmp = 1.0 - t * col2[1];
- if (tmp <= 0.0)
- outcol[1] = 1.0;
- else if ((tmp = outcol[1] / tmp) > 1.0)
- outcol[1] = 1.0;
- else
- outcol[1] = tmp;
- }
- if (outcol[2] != 0.0) {
- float tmp = 1.0 - t * col2[2];
- if (tmp <= 0.0)
- outcol[2] = 1.0;
- else if ((tmp = outcol[2] / tmp) > 1.0)
- outcol[2] = 1.0;
- else
- outcol[2] = tmp;
- }
-
- return outcol;
+ color outcol = col1;
+
+ if (outcol[0] != 0.0) {
+ float tmp = 1.0 - t * col2[0];
+ if (tmp <= 0.0)
+ outcol[0] = 1.0;
+ else if ((tmp = outcol[0] / tmp) > 1.0)
+ outcol[0] = 1.0;
+ else
+ outcol[0] = tmp;
+ }
+ if (outcol[1] != 0.0) {
+ float tmp = 1.0 - t * col2[1];
+ if (tmp <= 0.0)
+ outcol[1] = 1.0;
+ else if ((tmp = outcol[1] / tmp) > 1.0)
+ outcol[1] = 1.0;
+ else
+ outcol[1] = tmp;
+ }
+ if (outcol[2] != 0.0) {
+ float tmp = 1.0 - t * col2[2];
+ if (tmp <= 0.0)
+ outcol[2] = 1.0;
+ else if ((tmp = outcol[2] / tmp) > 1.0)
+ outcol[2] = 1.0;
+ else
+ outcol[2] = tmp;
+ }
+
+ return outcol;
}
color node_mix_burn(float t, color col1, color col2)
{
- float tmp, tm = 1.0 - t;
-
- color outcol = col1;
-
- tmp = tm + t * col2[0];
- if (tmp <= 0.0)
- outcol[0] = 0.0;
- else if ((tmp = (1.0 - (1.0 - outcol[0]) / tmp)) < 0.0)
- outcol[0] = 0.0;
- else if (tmp > 1.0)
- outcol[0] = 1.0;
- else
- outcol[0] = tmp;
-
- tmp = tm + t * col2[1];
- if (tmp <= 0.0)
- outcol[1] = 0.0;
- else if ((tmp = (1.0 - (1.0 - outcol[1]) / tmp)) < 0.0)
- outcol[1] = 0.0;
- else if (tmp > 1.0)
- outcol[1] = 1.0;
- else
- outcol[1] = tmp;
-
- tmp = tm + t * col2[2];
- if (tmp <= 0.0)
- outcol[2] = 0.0;
- else if ((tmp = (1.0 - (1.0 - outcol[2]) / tmp)) < 0.0)
- outcol[2] = 0.0;
- else if (tmp > 1.0)
- outcol[2] = 1.0;
- else
- outcol[2] = tmp;
-
- return outcol;
+ float tmp, tm = 1.0 - t;
+
+ color outcol = col1;
+
+ tmp = tm + t * col2[0];
+ if (tmp <= 0.0)
+ outcol[0] = 0.0;
+ else if ((tmp = (1.0 - (1.0 - outcol[0]) / tmp)) < 0.0)
+ outcol[0] = 0.0;
+ else if (tmp > 1.0)
+ outcol[0] = 1.0;
+ else
+ outcol[0] = tmp;
+
+ tmp = tm + t * col2[1];
+ if (tmp <= 0.0)
+ outcol[1] = 0.0;
+ else if ((tmp = (1.0 - (1.0 - outcol[1]) / tmp)) < 0.0)
+ outcol[1] = 0.0;
+ else if (tmp > 1.0)
+ outcol[1] = 1.0;
+ else
+ outcol[1] = tmp;
+
+ tmp = tm + t * col2[2];
+ if (tmp <= 0.0)
+ outcol[2] = 0.0;
+ else if ((tmp = (1.0 - (1.0 - outcol[2]) / tmp)) < 0.0)
+ outcol[2] = 0.0;
+ else if (tmp > 1.0)
+ outcol[2] = 1.0;
+ else
+ outcol[2] = tmp;
+
+ return outcol;
}
color node_mix_hue(float t, color col1, color col2)
{
- color outcol = col1;
- color hsv2 = rgb_to_hsv(col2);
+ color outcol = col1;
+ color hsv2 = rgb_to_hsv(col2);
- if (hsv2[1] != 0.0) {
- color hsv = rgb_to_hsv(outcol);
- hsv[0] = hsv2[0];
- color tmp = hsv_to_rgb(hsv);
+ if (hsv2[1] != 0.0) {
+ color hsv = rgb_to_hsv(outcol);
+ hsv[0] = hsv2[0];
+ color tmp = hsv_to_rgb(hsv);
- outcol = mix(outcol, tmp, t);
- }
+ outcol = mix(outcol, tmp, t);
+ }
- return outcol;
+ return outcol;
}
color node_mix_sat(float t, color col1, color col2)
{
- float tm = 1.0 - t;
+ float tm = 1.0 - t;
- color outcol = col1;
+ color outcol = col1;
- color hsv = rgb_to_hsv(outcol);
+ color hsv = rgb_to_hsv(outcol);
- if (hsv[1] != 0.0) {
- color hsv2 = rgb_to_hsv(col2);
+ if (hsv[1] != 0.0) {
+ color hsv2 = rgb_to_hsv(col2);
- hsv[1] = tm * hsv[1] + t * hsv2[1];
- outcol = hsv_to_rgb(hsv);
- }
+ hsv[1] = tm * hsv[1] + t * hsv2[1];
+ outcol = hsv_to_rgb(hsv);
+ }
- return outcol;
+ return outcol;
}
color node_mix_val(float t, color col1, color col2)
{
- float tm = 1.0 - t;
+ float tm = 1.0 - t;
- color hsv = rgb_to_hsv(col1);
- color hsv2 = rgb_to_hsv(col2);
+ color hsv = rgb_to_hsv(col1);
+ color hsv2 = rgb_to_hsv(col2);
- hsv[2] = tm * hsv[2] + t * hsv2[2];
+ hsv[2] = tm * hsv[2] + t * hsv2[2];
- return hsv_to_rgb(hsv);
+ return hsv_to_rgb(hsv);
}
color node_mix_color(float t, color col1, color col2)
{
- color outcol = col1;
- color hsv2 = rgb_to_hsv(col2);
+ color outcol = col1;
+ color hsv2 = rgb_to_hsv(col2);
- if (hsv2[1] != 0.0) {
- color hsv = rgb_to_hsv(outcol);
- hsv[0] = hsv2[0];
- hsv[1] = hsv2[1];
- color tmp = hsv_to_rgb(hsv);
+ if (hsv2[1] != 0.0) {
+ color hsv = rgb_to_hsv(outcol);
+ hsv[0] = hsv2[0];
+ hsv[1] = hsv2[1];
+ color tmp = hsv_to_rgb(hsv);
- outcol = mix(outcol, tmp, t);
- }
+ outcol = mix(outcol, tmp, t);
+ }
- return outcol;
+ return outcol;
}
color node_mix_soft(float t, color col1, color col2)
{
- float tm = 1.0 - t;
+ float tm = 1.0 - t;
- color one = color(1.0);
- color scr = one - (one - col2) * (one - col1);
+ color one = color(1.0);
+ color scr = one - (one - col2) * (one - col1);
- return tm * col1 + t * ((one - col1) * col2 * col1 + col1 * scr);
+ return tm * col1 + t * ((one - col1) * col2 * col1 + col1 * scr);
}
color node_mix_linear(float t, color col1, color col2)
{
- color outcol = col1;
-
- if (col2[0] > 0.5)
- outcol[0] = col1[0] + t * (2.0 * (col2[0] - 0.5));
- else
- outcol[0] = col1[0] + t * (2.0 * (col2[0]) - 1.0);
-
- if (col2[1] > 0.5)
- outcol[1] = col1[1] + t * (2.0 * (col2[1] - 0.5));
- else
- outcol[1] = col1[1] + t * (2.0 * (col2[1]) - 1.0);
-
- if (col2[2] > 0.5)
- outcol[2] = col1[2] + t * (2.0 * (col2[2] - 0.5));
- else
- outcol[2] = col1[2] + t * (2.0 * (col2[2]) - 1.0);
-
- return outcol;
+ color outcol = col1;
+
+ if (col2[0] > 0.5)
+ outcol[0] = col1[0] + t * (2.0 * (col2[0] - 0.5));
+ else
+ outcol[0] = col1[0] + t * (2.0 * (col2[0]) - 1.0);
+
+ if (col2[1] > 0.5)
+ outcol[1] = col1[1] + t * (2.0 * (col2[1] - 0.5));
+ else
+ outcol[1] = col1[1] + t * (2.0 * (col2[1]) - 1.0);
+
+ if (col2[2] > 0.5)
+ outcol[2] = col1[2] + t * (2.0 * (col2[2] - 0.5));
+ else
+ outcol[2] = col1[2] + t * (2.0 * (col2[2]) - 1.0);
+
+ return outcol;
}
color node_mix_clamp(color col)
{
- color outcol = col;
+ color outcol = col;
- outcol[0] = clamp(col[0], 0.0, 1.0);
- outcol[1] = clamp(col[1], 0.0, 1.0);
- outcol[2] = clamp(col[2], 0.0, 1.0);
+ outcol[0] = clamp(col[0], 0.0, 1.0);
+ outcol[1] = clamp(col[1], 0.0, 1.0);
+ outcol[2] = clamp(col[2], 0.0, 1.0);
- return outcol;
+ return outcol;
}
-shader node_mix(
- string type = "mix",
- int use_clamp = 0,
- float Fac = 0.5,
- color Color1 = 0.0,
- color Color2 = 0.0,
- output color Color = 0.0)
+shader node_mix(string type = "mix",
+ int use_clamp = 0,
+ float Fac = 0.5,
+ color Color1 = 0.0,
+ color Color2 = 0.0,
+ output color Color = 0.0)
{
- float t = clamp(Fac, 0.0, 1.0);
-
- if (type == "mix")
- Color = node_mix_blend(t, Color1, Color2);
- if (type == "add")
- Color = node_mix_add(t, Color1, Color2);
- if (type == "multiply")
- Color = node_mix_mul(t, Color1, Color2);
- if (type == "screen")
- Color = node_mix_screen(t, Color1, Color2);
- if (type == "overlay")
- Color = node_mix_overlay(t, Color1, Color2);
- if (type == "subtract")
- Color = node_mix_sub(t, Color1, Color2);
- if (type == "divide")
- Color = node_mix_div(t, Color1, Color2);
- if (type == "difference")
- Color = node_mix_diff(t, Color1, Color2);
- if (type == "darken")
- Color = node_mix_dark(t, Color1, Color2);
- if (type == "lighten")
- Color = node_mix_light(t, Color1, Color2);
- if (type == "dodge")
- Color = node_mix_dodge(t, Color1, Color2);
- if (type == "burn")
- Color = node_mix_burn(t, Color1, Color2);
- if (type == "hue")
- Color = node_mix_hue(t, Color1, Color2);
- if (type == "saturation")
- Color = node_mix_sat(t, Color1, Color2);
- if (type == "value")
- Color = node_mix_val (t, Color1, Color2);
- if (type == "color")
- Color = node_mix_color(t, Color1, Color2);
- if (type == "soft_light")
- Color = node_mix_soft(t, Color1, Color2);
- if (type == "linear_light")
- Color = node_mix_linear(t, Color1, Color2);
-
- if (use_clamp)
- Color = node_mix_clamp(Color);
+ float t = clamp(Fac, 0.0, 1.0);
+
+ if (type == "mix")
+ Color = node_mix_blend(t, Color1, Color2);
+ if (type == "add")
+ Color = node_mix_add(t, Color1, Color2);
+ if (type == "multiply")
+ Color = node_mix_mul(t, Color1, Color2);
+ if (type == "screen")
+ Color = node_mix_screen(t, Color1, Color2);
+ if (type == "overlay")
+ Color = node_mix_overlay(t, Color1, Color2);
+ if (type == "subtract")
+ Color = node_mix_sub(t, Color1, Color2);
+ if (type == "divide")
+ Color = node_mix_div(t, Color1, Color2);
+ if (type == "difference")
+ Color = node_mix_diff(t, Color1, Color2);
+ if (type == "darken")
+ Color = node_mix_dark(t, Color1, Color2);
+ if (type == "lighten")
+ Color = node_mix_light(t, Color1, Color2);
+ if (type == "dodge")
+ Color = node_mix_dodge(t, Color1, Color2);
+ if (type == "burn")
+ Color = node_mix_burn(t, Color1, Color2);
+ if (type == "hue")
+ Color = node_mix_hue(t, Color1, Color2);
+ if (type == "saturation")
+ Color = node_mix_sat(t, Color1, Color2);
+ if (type == "value")
+ Color = node_mix_val(t, Color1, Color2);
+ if (type == "color")
+ Color = node_mix_color(t, Color1, Color2);
+ if (type == "soft_light")
+ Color = node_mix_soft(t, Color1, Color2);
+ if (type == "linear_light")
+ Color = node_mix_linear(t, Color1, Color2);
+
+ if (use_clamp)
+ Color = node_mix_clamp(Color);
}
-
diff --git a/intern/cycles/kernel/shaders/node_mix_closure.osl b/intern/cycles/kernel/shaders/node_mix_closure.osl
index 5946dfdaaba..517c59c8786 100644
--- a/intern/cycles/kernel/shaders/node_mix_closure.osl
+++ b/intern/cycles/kernel/shaders/node_mix_closure.osl
@@ -16,13 +16,11 @@
#include "stdosl.h"
-shader node_mix_closure(
- float Fac = 0.5,
- closure color Closure1 = 0,
- closure color Closure2 = 0,
- output closure color Closure = 0)
+shader node_mix_closure(float Fac = 0.5,
+ closure color Closure1 = 0,
+ closure color Closure2 = 0,
+ output closure color Closure = 0)
{
- float t = clamp(Fac, 0.0, 1.0);
- Closure = (1.0 - t) * Closure1 + t * Closure2;
+ float t = clamp(Fac, 0.0, 1.0);
+ Closure = (1.0 - t) * Closure1 + t * Closure2;
}
-
diff --git a/intern/cycles/kernel/shaders/node_musgrave_texture.osl b/intern/cycles/kernel/shaders/node_musgrave_texture.osl
index 454b3834081..a7877c43d46 100644
--- a/intern/cycles/kernel/shaders/node_musgrave_texture.osl
+++ b/intern/cycles/kernel/shaders/node_musgrave_texture.osl
@@ -28,24 +28,24 @@
float noise_musgrave_fBm(point ip, float H, float lacunarity, float octaves)
{
- float rmd;
- float value = 0.0;
- float pwr = 1.0;
- float pwHL = pow(lacunarity, -H);
- int i;
- point p = ip;
-
- for (i = 0; i < (int)octaves; i++) {
- value += safe_noise(p, "signed") * pwr;
- pwr *= pwHL;
- p *= lacunarity;
- }
-
- rmd = octaves - floor(octaves);
- if (rmd != 0.0)
- value += rmd * safe_noise(p, "signed") * pwr;
-
- return value;
+ float rmd;
+ float value = 0.0;
+ float pwr = 1.0;
+ float pwHL = pow(lacunarity, -H);
+ int i;
+ point p = ip;
+
+ for (i = 0; i < (int)octaves; i++) {
+ value += safe_noise(p, "signed") * pwr;
+ pwr *= pwHL;
+ p *= lacunarity;
+ }
+
+ rmd = octaves - floor(octaves);
+ if (rmd != 0.0)
+ value += rmd * safe_noise(p, "signed") * pwr;
+
+ return value;
}
/* Musgrave Multifractal
@@ -57,24 +57,24 @@ float noise_musgrave_fBm(point ip, float H, float lacunarity, float octaves)
float noise_musgrave_multi_fractal(point ip, float H, float lacunarity, float octaves)
{
- float rmd;
- float value = 1.0;
- float pwr = 1.0;
- float pwHL = pow(lacunarity, -H);
- int i;
- point p = ip;
-
- for (i = 0; i < (int)octaves; i++) {
- value *= (pwr * safe_noise(p, "signed") + 1.0);
- pwr *= pwHL;
- p *= lacunarity;
- }
-
- rmd = octaves - floor(octaves);
- if (rmd != 0.0)
- value *= (rmd * pwr * safe_noise(p, "signed") + 1.0); /* correct? */
-
- return value;
+ float rmd;
+ float value = 1.0;
+ float pwr = 1.0;
+ float pwHL = pow(lacunarity, -H);
+ int i;
+ point p = ip;
+
+ for (i = 0; i < (int)octaves; i++) {
+ value *= (pwr * safe_noise(p, "signed") + 1.0);
+ pwr *= pwHL;
+ p *= lacunarity;
+ }
+
+ rmd = octaves - floor(octaves);
+ if (rmd != 0.0)
+ value *= (rmd * pwr * safe_noise(p, "signed") + 1.0); /* correct? */
+
+ return value;
}
/* Musgrave Heterogeneous Terrain
@@ -85,32 +85,33 @@ float noise_musgrave_multi_fractal(point ip, float H, float lacunarity, float oc
* offset: raises the terrain from `sea level'
*/
-float noise_musgrave_hetero_terrain(point ip, float H, float lacunarity, float octaves, float offset)
+float noise_musgrave_hetero_terrain(
+ point ip, float H, float lacunarity, float octaves, float offset)
{
- float value, increment, rmd;
- float pwHL = pow(lacunarity, -H);
- float pwr = pwHL;
- int i;
- point p = ip;
-
- /* first unscaled octave of function; later octaves are scaled */
- value = offset + safe_noise(p, "signed");
- p *= lacunarity;
-
- for (i = 1; i < (int)octaves; i++) {
- increment = (safe_noise(p, "signed") + offset) * pwr * value;
- value += increment;
- pwr *= pwHL;
- p *= lacunarity;
- }
-
- rmd = octaves - floor(octaves);
- if (rmd != 0.0) {
- increment = (safe_noise(p, "signed") + offset) * pwr * value;
- value += rmd * increment;
- }
-
- return value;
+ float value, increment, rmd;
+ float pwHL = pow(lacunarity, -H);
+ float pwr = pwHL;
+ int i;
+ point p = ip;
+
+ /* first unscaled octave of function; later octaves are scaled */
+ value = offset + safe_noise(p, "signed");
+ p *= lacunarity;
+
+ for (i = 1; i < (int)octaves; i++) {
+ increment = (safe_noise(p, "signed") + offset) * pwr * value;
+ value += increment;
+ pwr *= pwHL;
+ p *= lacunarity;
+ }
+
+ rmd = octaves - floor(octaves);
+ if (rmd != 0.0) {
+ increment = (safe_noise(p, "signed") + offset) * pwr * value;
+ value += rmd * increment;
+ }
+
+ return value;
}
/* Hybrid Additive/Multiplicative Multifractal Terrain
@@ -121,35 +122,35 @@ float noise_musgrave_hetero_terrain(point ip, float H, float lacunarity, float o
* offset: raises the terrain from `sea level'
*/
-float noise_musgrave_hybrid_multi_fractal(point ip, float H, float lacunarity,
- float octaves, float offset, float gain)
+float noise_musgrave_hybrid_multi_fractal(
+ point ip, float H, float lacunarity, float octaves, float offset, float gain)
{
- float result, signal, weight, rmd;
- float pwHL = pow(lacunarity, -H);
- float pwr = pwHL;
- int i;
- point p = ip;
-
- result = safe_noise(p, "signed") + offset;
- weight = gain * result;
- p *= lacunarity;
-
- for (i = 1; (weight > 0.001) && (i < (int)octaves); i++) {
- if (weight > 1.0)
- weight = 1.0;
-
- signal = (safe_noise(p, "signed") + offset) * pwr;
- pwr *= pwHL;
- result += weight * signal;
- weight *= gain * signal;
- p *= lacunarity;
- }
-
- rmd = octaves - floor(octaves);
- if (rmd != 0.0)
- result += rmd * ((safe_noise(p, "signed") + offset) * pwr);
-
- return result;
+ float result, signal, weight, rmd;
+ float pwHL = pow(lacunarity, -H);
+ float pwr = pwHL;
+ int i;
+ point p = ip;
+
+ result = safe_noise(p, "signed") + offset;
+ weight = gain * result;
+ p *= lacunarity;
+
+ for (i = 1; (weight > 0.001) && (i < (int)octaves); i++) {
+ if (weight > 1.0)
+ weight = 1.0;
+
+ signal = (safe_noise(p, "signed") + offset) * pwr;
+ pwr *= pwHL;
+ result += weight * signal;
+ weight *= gain * signal;
+ p *= lacunarity;
+ }
+
+ rmd = octaves - floor(octaves);
+ if (rmd != 0.0)
+ result += rmd * ((safe_noise(p, "signed") + offset) * pwr);
+
+ return result;
}
/* Ridged Multifractal Terrain
@@ -160,72 +161,73 @@ float noise_musgrave_hybrid_multi_fractal(point ip, float H, float lacunarity,
* offset: raises the terrain from `sea level'
*/
-float noise_musgrave_ridged_multi_fractal(point ip, float H, float lacunarity,
- float octaves, float offset, float gain)
+float noise_musgrave_ridged_multi_fractal(
+ point ip, float H, float lacunarity, float octaves, float offset, float gain)
{
- float result, signal, weight;
- float pwHL = pow(lacunarity, -H);
- float pwr = pwHL;
- int i;
- point p = ip;
-
- signal = offset - fabs(safe_noise(p, "signed"));
- signal *= signal;
- result = signal;
- weight = 1.0;
-
- for (i = 1; i < (int)octaves; i++) {
- p *= lacunarity;
- weight = clamp(signal * gain, 0.0, 1.0);
- signal = offset - fabs(safe_noise(p, "signed"));
- signal *= signal;
- signal *= weight;
- result += signal * pwr;
- pwr *= pwHL;
- }
-
- return result;
+ float result, signal, weight;
+ float pwHL = pow(lacunarity, -H);
+ float pwr = pwHL;
+ int i;
+ point p = ip;
+
+ signal = offset - fabs(safe_noise(p, "signed"));
+ signal *= signal;
+ result = signal;
+ weight = 1.0;
+
+ for (i = 1; i < (int)octaves; i++) {
+ p *= lacunarity;
+ weight = clamp(signal * gain, 0.0, 1.0);
+ signal = offset - fabs(safe_noise(p, "signed"));
+ signal *= signal;
+ signal *= weight;
+ result += signal * pwr;
+ pwr *= pwHL;
+ }
+
+ return result;
}
/* Shader */
shader node_musgrave_texture(
- int use_mapping = 0,
- matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
- string type = "fBM",
- float Dimension = 2.0,
- float Lacunarity = 1.0,
- float Detail = 2.0,
- float Offset = 0.0,
- float Gain = 1.0,
- float Scale = 5.0,
- point Vector = P,
- output float Fac = 0.0,
- output color Color = 0.0)
+ int use_mapping = 0,
+ matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+ string type = "fBM",
+ float Dimension = 2.0,
+ float Lacunarity = 1.0,
+ float Detail = 2.0,
+ float Offset = 0.0,
+ float Gain = 1.0,
+ float Scale = 5.0,
+ point Vector = P,
+ output float Fac = 0.0,
+ output color Color = 0.0)
{
- float dimension = max(Dimension, 1e-5);
- float octaves = clamp(Detail, 0.0, 16.0);
- float lacunarity = max(Lacunarity, 1e-5);
- float intensity = 1.0;
-
- point p = Vector;
-
- if (use_mapping)
- p = transform(mapping, p);
-
- p = p * Scale;
-
- if (type == "multifractal")
- Fac = intensity * noise_musgrave_multi_fractal(p, dimension, lacunarity, octaves);
- else if (type == "fBM")
- Fac = intensity * noise_musgrave_fBm(p, dimension, lacunarity, octaves);
- else if (type == "hybrid_multifractal")
- Fac = intensity * noise_musgrave_hybrid_multi_fractal(p, dimension, lacunarity, octaves, Offset, Gain);
- else if (type == "ridged_multifractal")
- Fac = intensity * noise_musgrave_ridged_multi_fractal(p, dimension, lacunarity, octaves, Offset, Gain);
- else if (type == "hetero_terrain")
- Fac = intensity * noise_musgrave_hetero_terrain(p, dimension, lacunarity, octaves, Offset);
-
- Color = color(Fac, Fac, Fac);
+ float dimension = max(Dimension, 1e-5);
+ float octaves = clamp(Detail, 0.0, 16.0);
+ float lacunarity = max(Lacunarity, 1e-5);
+ float intensity = 1.0;
+
+ point p = Vector;
+
+ if (use_mapping)
+ p = transform(mapping, p);
+
+ p = p * Scale;
+
+ if (type == "multifractal")
+ Fac = intensity * noise_musgrave_multi_fractal(p, dimension, lacunarity, octaves);
+ else if (type == "fBM")
+ Fac = intensity * noise_musgrave_fBm(p, dimension, lacunarity, octaves);
+ else if (type == "hybrid_multifractal")
+ Fac = intensity *
+ noise_musgrave_hybrid_multi_fractal(p, dimension, lacunarity, octaves, Offset, Gain);
+ else if (type == "ridged_multifractal")
+ Fac = intensity *
+ noise_musgrave_ridged_multi_fractal(p, dimension, lacunarity, octaves, Offset, Gain);
+ else if (type == "hetero_terrain")
+ Fac = intensity * noise_musgrave_hetero_terrain(p, dimension, lacunarity, octaves, Offset);
+
+ Color = color(Fac, Fac, Fac);
}
-
diff --git a/intern/cycles/kernel/shaders/node_noise_texture.osl b/intern/cycles/kernel/shaders/node_noise_texture.osl
index 42a30897341..2cbd571e206 100644
--- a/intern/cycles/kernel/shaders/node_noise_texture.osl
+++ b/intern/cycles/kernel/shaders/node_noise_texture.osl
@@ -21,41 +21,40 @@
float noise(point ip, float distortion, float detail, output color Color)
{
- point r;
- point p = ip;
- int hard = 0;
-
- if (distortion != 0.0) {
- r[0] = safe_noise(p + point(13.5), "unsigned") * distortion;
- r[1] = safe_noise(p, "unsigned") * distortion;
- r[2] = safe_noise(p - point(13.5), "unsigned") * distortion;
-
- p += r;
- }
-
- float fac = noise_turbulence(p, detail, hard);
-
- Color = color(fac, noise_turbulence(point(p[1], p[0], p[2]), detail, hard),
- noise_turbulence(point(p[1], p[2], p[0]), detail, hard));
-
- return fac;
+ point r;
+ point p = ip;
+ int hard = 0;
+
+ if (distortion != 0.0) {
+ r[0] = safe_noise(p + point(13.5), "unsigned") * distortion;
+ r[1] = safe_noise(p, "unsigned") * distortion;
+ r[2] = safe_noise(p - point(13.5), "unsigned") * distortion;
+
+ p += r;
+ }
+
+ float fac = noise_turbulence(p, detail, hard);
+
+ Color = color(fac,
+ noise_turbulence(point(p[1], p[0], p[2]), detail, hard),
+ noise_turbulence(point(p[1], p[2], p[0]), detail, hard));
+
+ return fac;
}
-shader node_noise_texture(
- int use_mapping = 0,
- matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
- float Distortion = 0.0,
- float Scale = 5.0,
- float Detail = 2.0,
- point Vector = P,
- output float Fac = 0.0,
- output color Color = 0.0)
+shader node_noise_texture(int use_mapping = 0,
+ matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+ float Distortion = 0.0,
+ float Scale = 5.0,
+ float Detail = 2.0,
+ point Vector = P,
+ output float Fac = 0.0,
+ output color Color = 0.0)
{
- point p = Vector;
+ point p = Vector;
- if (use_mapping)
- p = transform(mapping, p);
+ if (use_mapping)
+ p = transform(mapping, p);
- Fac = noise(p * Scale, Distortion, Detail, Color);
+ Fac = noise(p * Scale, Distortion, Detail, Color);
}
-
diff --git a/intern/cycles/kernel/shaders/node_normal.osl b/intern/cycles/kernel/shaders/node_normal.osl
index 7307971eddd..1d20c3e7cac 100644
--- a/intern/cycles/kernel/shaders/node_normal.osl
+++ b/intern/cycles/kernel/shaders/node_normal.osl
@@ -16,13 +16,11 @@
#include "stdosl.h"
-shader node_normal(
- normal direction = normal(0.0, 0.0, 0.0),
- normal NormalIn = normal(0.0, 0.0, 0.0),
- output normal NormalOut = normal(0.0, 0.0, 0.0),
- output float Dot = 1.0)
+shader node_normal(normal direction = normal(0.0, 0.0, 0.0),
+ normal NormalIn = normal(0.0, 0.0, 0.0),
+ output normal NormalOut = normal(0.0, 0.0, 0.0),
+ output float Dot = 1.0)
{
- NormalOut = normalize(direction);
- Dot = dot(NormalOut, normalize(NormalIn));
+ NormalOut = normalize(direction);
+ Dot = dot(NormalOut, normalize(NormalIn));
}
-
diff --git a/intern/cycles/kernel/shaders/node_normal_map.osl b/intern/cycles/kernel/shaders/node_normal_map.osl
index fda6f12a5da..90b593d00bc 100644
--- a/intern/cycles/kernel/shaders/node_normal_map.osl
+++ b/intern/cycles/kernel/shaders/node_normal_map.osl
@@ -16,79 +16,75 @@
#include "stdosl.h"
-shader node_normal_map(
- normal NormalIn = N,
- float Strength = 1.0,
- color Color = color(0.5, 0.5, 1.0),
- string space = "tangent",
- string attr_name = "geom:tangent",
- string attr_sign_name = "geom:tangent_sign",
- output normal Normal = NormalIn)
+shader node_normal_map(normal NormalIn = N,
+ float Strength = 1.0,
+ color Color = color(0.5, 0.5, 1.0),
+ string space = "tangent",
+ string attr_name = "geom:tangent",
+ string attr_sign_name = "geom:tangent_sign",
+ output normal Normal = NormalIn)
{
- color mcolor = 2.0 * color(Color[0] - 0.5, Color[1] - 0.5, Color[2] - 0.5);
- int is_backfacing = backfacing();
+ color mcolor = 2.0 * color(Color[0] - 0.5, Color[1] - 0.5, Color[2] - 0.5);
+ int is_backfacing = backfacing();
- if (space == "tangent") {
- vector tangent;
- vector ninterp;
- float tangent_sign;
- float is_smooth;
+ if (space == "tangent") {
+ vector tangent;
+ vector ninterp;
+ float tangent_sign;
+ float is_smooth;
- getattribute("geom:is_smooth", is_smooth);
- if (!is_smooth) {
- ninterp = normalize(transform("world", "object", Ng));
+ getattribute("geom:is_smooth", is_smooth);
+ if (!is_smooth) {
+ ninterp = normalize(transform("world", "object", Ng));
- /* the normal is already inverted, which is too soon for the math here */
- if (is_backfacing) {
- ninterp = -ninterp;
- }
- }
+ /* the normal is already inverted, which is too soon for the math here */
+ if (is_backfacing) {
+ ninterp = -ninterp;
+ }
+ }
- // get _unnormalized_ interpolated normal and tangent
- if (getattribute(attr_name, tangent) &&
- getattribute(attr_sign_name, tangent_sign) &&
- (!is_smooth || getattribute("geom:N", ninterp)))
- {
- // apply normal map
- vector B = tangent_sign * cross(ninterp, tangent);
- Normal = normalize(mcolor[0] * tangent + mcolor[1] * B + mcolor[2] * ninterp);
+ // get _unnormalized_ interpolated normal and tangent
+ if (getattribute(attr_name, tangent) && getattribute(attr_sign_name, tangent_sign) &&
+ (!is_smooth || getattribute("geom:N", ninterp))) {
+ // apply normal map
+ vector B = tangent_sign * cross(ninterp, tangent);
+ Normal = normalize(mcolor[0] * tangent + mcolor[1] * B + mcolor[2] * ninterp);
- // transform to world space
- Normal = normalize(transform("object", "world", Normal));
- }
- else {
- Normal = normal(0, 0, 0);
- }
- }
- else if (space == "object") {
- Normal = normalize(transform("object", "world", vector(mcolor)));
- }
- else if (space == "world") {
- Normal = normalize(vector(mcolor));
- }
- else if (space == "blender_object") {
- /* strange blender convention */
- mcolor[1] = -mcolor[1];
- mcolor[2] = -mcolor[2];
-
- Normal = normalize(transform("object", "world", vector(mcolor)));
- }
- else if (space == "blender_world") {
- /* strange blender convention */
- mcolor[1] = -mcolor[1];
- mcolor[2] = -mcolor[2];
-
- Normal = normalize(vector(mcolor));
- }
+ // transform to world space
+ Normal = normalize(transform("object", "world", Normal));
+ }
+ else {
+ Normal = normal(0, 0, 0);
+ }
+ }
+ else if (space == "object") {
+ Normal = normalize(transform("object", "world", vector(mcolor)));
+ }
+ else if (space == "world") {
+ Normal = normalize(vector(mcolor));
+ }
+ else if (space == "blender_object") {
+ /* strange blender convention */
+ mcolor[1] = -mcolor[1];
+ mcolor[2] = -mcolor[2];
- /* invert normal for backfacing polygons */
- if (is_backfacing) {
- Normal = -Normal;
- }
+ Normal = normalize(transform("object", "world", vector(mcolor)));
+ }
+ else if (space == "blender_world") {
+ /* strange blender convention */
+ mcolor[1] = -mcolor[1];
+ mcolor[2] = -mcolor[2];
- if (Strength != 1.0)
- Normal = normalize(NormalIn + (Normal - NormalIn) * max(Strength, 0.0));
+ Normal = normalize(vector(mcolor));
+ }
- Normal = ensure_valid_reflection(Ng, I, Normal);
-}
+ /* invert normal for backfacing polygons */
+ if (is_backfacing) {
+ Normal = -Normal;
+ }
+
+ if (Strength != 1.0)
+ Normal = normalize(NormalIn + (Normal - NormalIn) * max(Strength, 0.0));
+ Normal = ensure_valid_reflection(Ng, I, Normal);
+}
diff --git a/intern/cycles/kernel/shaders/node_object_info.osl b/intern/cycles/kernel/shaders/node_object_info.osl
index dd7c663b8d8..0904a30a53f 100644
--- a/intern/cycles/kernel/shaders/node_object_info.osl
+++ b/intern/cycles/kernel/shaders/node_object_info.osl
@@ -16,15 +16,13 @@
#include "stdosl.h"
-shader node_object_info(
- output point Location = point(0.0, 0.0, 0.0),
- output float ObjectIndex = 0.0,
- output float MaterialIndex = 0.0,
- output float Random = 0.0)
+shader node_object_info(output point Location = point(0.0, 0.0, 0.0),
+ output float ObjectIndex = 0.0,
+ output float MaterialIndex = 0.0,
+ output float Random = 0.0)
{
- getattribute("object:location", Location);
- getattribute("object:index", ObjectIndex);
- getattribute("material:index", MaterialIndex);
- getattribute("object:random", Random);
+ getattribute("object:location", Location);
+ getattribute("object:index", ObjectIndex);
+ getattribute("material:index", MaterialIndex);
+ getattribute("object:random", Random);
}
-
diff --git a/intern/cycles/kernel/shaders/node_output_displacement.osl b/intern/cycles/kernel/shaders/node_output_displacement.osl
index 5dbef0244fe..fa7f603980b 100644
--- a/intern/cycles/kernel/shaders/node_output_displacement.osl
+++ b/intern/cycles/kernel/shaders/node_output_displacement.osl
@@ -18,6 +18,5 @@
displacement node_output_displacement(vector Displacement = 0.0)
{
- P += Displacement;
+ P += Displacement;
}
-
diff --git a/intern/cycles/kernel/shaders/node_output_surface.osl b/intern/cycles/kernel/shaders/node_output_surface.osl
index 2cc4575a8c8..013666145da 100644
--- a/intern/cycles/kernel/shaders/node_output_surface.osl
+++ b/intern/cycles/kernel/shaders/node_output_surface.osl
@@ -18,6 +18,5 @@
surface node_output_surface(closure color Surface = 0)
{
- Ci = Surface;
+ Ci = Surface;
}
-
diff --git a/intern/cycles/kernel/shaders/node_output_volume.osl b/intern/cycles/kernel/shaders/node_output_volume.osl
index f220ba866e3..dd479e751b3 100644
--- a/intern/cycles/kernel/shaders/node_output_volume.osl
+++ b/intern/cycles/kernel/shaders/node_output_volume.osl
@@ -18,6 +18,5 @@
volume node_output_volume(closure color Volume = 0)
{
- Ci = Volume;
+ Ci = Volume;
}
-
diff --git a/intern/cycles/kernel/shaders/node_particle_info.osl b/intern/cycles/kernel/shaders/node_particle_info.osl
index 2a0252d5e45..e286c33a1ff 100644
--- a/intern/cycles/kernel/shaders/node_particle_info.osl
+++ b/intern/cycles/kernel/shaders/node_particle_info.osl
@@ -16,23 +16,21 @@
#include "stdosl.h"
-shader node_particle_info(
- output float Index = 0.0,
- output float Random = 0.0,
- output float Age = 0.0,
- output float Lifetime = 0.0,
- output point Location = point(0.0, 0.0, 0.0),
- output float Size = 0.0,
- output vector Velocity = point(0.0, 0.0, 0.0),
- output vector AngularVelocity = point(0.0, 0.0, 0.0))
+shader node_particle_info(output float Index = 0.0,
+ output float Random = 0.0,
+ output float Age = 0.0,
+ output float Lifetime = 0.0,
+ output point Location = point(0.0, 0.0, 0.0),
+ output float Size = 0.0,
+ output vector Velocity = point(0.0, 0.0, 0.0),
+ output vector AngularVelocity = point(0.0, 0.0, 0.0))
{
- getattribute("particle:index", Index);
- getattribute("particle:random", Random);
- getattribute("particle:age", Age);
- getattribute("particle:lifetime", Lifetime);
- getattribute("particle:location", Location);
- getattribute("particle:size", Size);
- getattribute("particle:velocity", Velocity);
- getattribute("particle:angular_velocity", AngularVelocity);
+ getattribute("particle:index", Index);
+ getattribute("particle:random", Random);
+ getattribute("particle:age", Age);
+ getattribute("particle:lifetime", Lifetime);
+ getattribute("particle:location", Location);
+ getattribute("particle:size", Size);
+ getattribute("particle:velocity", Velocity);
+ getattribute("particle:angular_velocity", AngularVelocity);
}
-
diff --git a/intern/cycles/kernel/shaders/node_principled_bsdf.osl b/intern/cycles/kernel/shaders/node_principled_bsdf.osl
index 6f54ba3a462..657ced9b6e6 100644
--- a/intern/cycles/kernel/shaders/node_principled_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_principled_bsdf.osl
@@ -17,111 +17,144 @@
#include "stdosl.h"
#include "node_fresnel.h"
-shader node_principled_bsdf(
- string distribution = "Multiscatter GGX",
- string subsurface_method = "burley",
- color BaseColor = color(0.8, 0.8, 0.8),
- float Subsurface = 0.0,
- vector SubsurfaceRadius = vector(1.0, 1.0, 1.0),
- color SubsurfaceColor = color(0.7, 0.1, 0.1),
- float Metallic = 0.0,
- float Specular = 0.5,
- float SpecularTint = 0.0,
- float Roughness = 0.5,
- float Anisotropic = 0.0,
- float AnisotropicRotation = 0.0,
- float Sheen = 0.0,
- float SheenTint = 0.5,
- float Clearcoat = 0.0,
- float ClearcoatRoughness = 0.03,
- float IOR = 1.45,
- float Transmission = 0.0,
- float TransmissionRoughness = 0.0,
- normal Normal = N,
- normal ClearcoatNormal = N,
- normal Tangent = normalize(dPdu),
- output closure color BSDF = 0)
+shader node_principled_bsdf(string distribution = "Multiscatter GGX",
+ string subsurface_method = "burley",
+ color BaseColor = color(0.8, 0.8, 0.8),
+ float Subsurface = 0.0,
+ vector SubsurfaceRadius = vector(1.0, 1.0, 1.0),
+ color SubsurfaceColor = color(0.7, 0.1, 0.1),
+ float Metallic = 0.0,
+ float Specular = 0.5,
+ float SpecularTint = 0.0,
+ float Roughness = 0.5,
+ float Anisotropic = 0.0,
+ float AnisotropicRotation = 0.0,
+ float Sheen = 0.0,
+ float SheenTint = 0.5,
+ float Clearcoat = 0.0,
+ float ClearcoatRoughness = 0.03,
+ float IOR = 1.45,
+ float Transmission = 0.0,
+ float TransmissionRoughness = 0.0,
+ normal Normal = N,
+ normal ClearcoatNormal = N,
+ normal Tangent = normalize(dPdu),
+ output closure color BSDF = 0)
{
- float f = max(IOR, 1e-5);
- float diffuse_weight = (1.0 - clamp(Metallic, 0.0, 1.0)) * (1.0 - clamp(Transmission, 0.0, 1.0));
- float final_transmission = clamp(Transmission, 0.0, 1.0) * (1.0 - clamp(Metallic, 0.0, 1.0));
- float specular_weight = (1.0 - final_transmission);
-
- vector T = Tangent;
-
- float m_cdlum = luminance(BaseColor);
- color m_ctint = m_cdlum > 0.0 ? BaseColor / m_cdlum : color(0.0, 0.0, 0.0); // normalize lum. to isolate hue+sat
-
- /* rotate tangent */
- if (AnisotropicRotation != 0.0)
- T = rotate(T, AnisotropicRotation * M_2PI, point(0.0, 0.0, 0.0), Normal);
-
- if (diffuse_weight > 1e-5) {
- if (Subsurface > 1e-5) {
- color mixed_ss_base_color = SubsurfaceColor * Subsurface + BaseColor * (1.0 - Subsurface);
- if (subsurface_method == "burley") {
- BSDF = mixed_ss_base_color * bssrdf("principled", Normal, Subsurface * SubsurfaceRadius, SubsurfaceColor, "roughness", Roughness);
- }
- else {
- BSDF = mixed_ss_base_color * bssrdf("principled_random_walk", Normal, Subsurface * SubsurfaceRadius, mixed_ss_base_color, "roughness", Roughness);
- }
- }
- else {
- BSDF = BaseColor * principled_diffuse(Normal, Roughness);
- }
-
- if (Sheen > 1e-5) {
- color sheen_color = color(1.0, 1.0, 1.0) * (1.0 - SheenTint) + m_ctint * SheenTint;
-
- BSDF = BSDF + sheen_color * Sheen * principled_sheen(Normal);
- }
-
- BSDF = BSDF * diffuse_weight;
- }
-
- if (specular_weight > 1e-5) {
- float aspect = sqrt(1.0 - Anisotropic * 0.9);
- float r2 = Roughness * Roughness;
-
- float alpha_x = r2 / aspect;
- float alpha_y = r2 * aspect;
-
- color tmp_col = color(1.0, 1.0, 1.0) * (1.0 - SpecularTint) + m_ctint * SpecularTint;
-
- color Cspec0 = (Specular * 0.08 * tmp_col) * (1.0 - Metallic) + BaseColor * Metallic;
-
- if (distribution == "GGX" || Roughness <= 0.075) {
- BSDF = BSDF + specular_weight * microfacet_ggx_aniso_fresnel(Normal, T, alpha_x, alpha_y, (2.0 / (1.0 - sqrt(0.08 * Specular))) - 1.0, BaseColor, Cspec0);
- } else {
- BSDF = BSDF + specular_weight * microfacet_multi_ggx_aniso_fresnel(Normal, T, alpha_x, alpha_y, (2.0 / (1.0 - sqrt(0.08 * Specular))) - 1.0, BaseColor, Cspec0);
- }
- }
-
- if (final_transmission > 1e-5) {
- color Cspec0 = BaseColor * SpecularTint + color(1.0, 1.0, 1.0) * (1.0 - SpecularTint);
- float eta = backfacing() ? 1.0 / f : f;
-
- if (distribution == "GGX" || Roughness <= 5e-2) {
- float cosNO = dot(Normal, I);
- float Fr = fresnel_dielectric_cos(cosNO, eta);
-
- float refl_roughness = Roughness;
- if (Roughness <= 1e-2)
- refl_roughness = 0.0;
-
- float transmission_roughness = refl_roughness;
- if (distribution == "GGX")
- transmission_roughness = 1.0 - (1.0 - refl_roughness) * (1.0 - TransmissionRoughness);
-
- BSDF = BSDF + final_transmission * (Fr * microfacet_ggx_fresnel(Normal, refl_roughness * refl_roughness, eta, BaseColor, Cspec0) +
- (1.0 - Fr) * BaseColor * microfacet_ggx_refraction(Normal, transmission_roughness * transmission_roughness, eta));
- } else {
- BSDF = BSDF + final_transmission * microfacet_multi_ggx_glass_fresnel(Normal, Roughness * Roughness, eta, BaseColor, Cspec0);
- }
- }
-
- if (Clearcoat > 1e-5) {
- BSDF = BSDF + principled_clearcoat(ClearcoatNormal, Clearcoat, ClearcoatRoughness * ClearcoatRoughness);
- }
+ float f = max(IOR, 1e-5);
+ float diffuse_weight = (1.0 - clamp(Metallic, 0.0, 1.0)) * (1.0 - clamp(Transmission, 0.0, 1.0));
+ float final_transmission = clamp(Transmission, 0.0, 1.0) * (1.0 - clamp(Metallic, 0.0, 1.0));
+ float specular_weight = (1.0 - final_transmission);
+
+ vector T = Tangent;
+
+ float m_cdlum = luminance(BaseColor);
+ color m_ctint = m_cdlum > 0.0 ? BaseColor / m_cdlum :
+ color(0.0, 0.0, 0.0); // normalize lum. to isolate hue+sat
+
+ /* rotate tangent */
+ if (AnisotropicRotation != 0.0)
+ T = rotate(T, AnisotropicRotation * M_2PI, point(0.0, 0.0, 0.0), Normal);
+
+ if (diffuse_weight > 1e-5) {
+ if (Subsurface > 1e-5) {
+ color mixed_ss_base_color = SubsurfaceColor * Subsurface + BaseColor * (1.0 - Subsurface);
+ if (subsurface_method == "burley") {
+ BSDF = mixed_ss_base_color * bssrdf("principled",
+ Normal,
+ Subsurface * SubsurfaceRadius,
+ SubsurfaceColor,
+ "roughness",
+ Roughness);
+ }
+ else {
+ BSDF = mixed_ss_base_color * bssrdf("principled_random_walk",
+ Normal,
+ Subsurface * SubsurfaceRadius,
+ mixed_ss_base_color,
+ "roughness",
+ Roughness);
+ }
+ }
+ else {
+ BSDF = BaseColor * principled_diffuse(Normal, Roughness);
+ }
+
+ if (Sheen > 1e-5) {
+ color sheen_color = color(1.0, 1.0, 1.0) * (1.0 - SheenTint) + m_ctint * SheenTint;
+
+ BSDF = BSDF + sheen_color * Sheen * principled_sheen(Normal);
+ }
+
+ BSDF = BSDF * diffuse_weight;
+ }
+
+ if (specular_weight > 1e-5) {
+ float aspect = sqrt(1.0 - Anisotropic * 0.9);
+ float r2 = Roughness * Roughness;
+
+ float alpha_x = r2 / aspect;
+ float alpha_y = r2 * aspect;
+
+ color tmp_col = color(1.0, 1.0, 1.0) * (1.0 - SpecularTint) + m_ctint * SpecularTint;
+
+ color Cspec0 = (Specular * 0.08 * tmp_col) * (1.0 - Metallic) + BaseColor * Metallic;
+
+ if (distribution == "GGX" || Roughness <= 0.075) {
+ BSDF = BSDF + specular_weight *
+ microfacet_ggx_aniso_fresnel(Normal,
+ T,
+ alpha_x,
+ alpha_y,
+ (2.0 / (1.0 - sqrt(0.08 * Specular))) - 1.0,
+ BaseColor,
+ Cspec0);
+ }
+ else {
+ BSDF = BSDF + specular_weight * microfacet_multi_ggx_aniso_fresnel(
+ Normal,
+ T,
+ alpha_x,
+ alpha_y,
+ (2.0 / (1.0 - sqrt(0.08 * Specular))) - 1.0,
+ BaseColor,
+ Cspec0);
+ }
+ }
+
+ if (final_transmission > 1e-5) {
+ color Cspec0 = BaseColor * SpecularTint + color(1.0, 1.0, 1.0) * (1.0 - SpecularTint);
+ float eta = backfacing() ? 1.0 / f : f;
+
+ if (distribution == "GGX" || Roughness <= 5e-2) {
+ float cosNO = dot(Normal, I);
+ float Fr = fresnel_dielectric_cos(cosNO, eta);
+
+ float refl_roughness = Roughness;
+ if (Roughness <= 1e-2)
+ refl_roughness = 0.0;
+
+ float transmission_roughness = refl_roughness;
+ if (distribution == "GGX")
+ transmission_roughness = 1.0 - (1.0 - refl_roughness) * (1.0 - TransmissionRoughness);
+
+ BSDF = BSDF +
+ final_transmission *
+ (Fr * microfacet_ggx_fresnel(
+ Normal, refl_roughness * refl_roughness, eta, BaseColor, Cspec0) +
+ (1.0 - Fr) * BaseColor *
+ microfacet_ggx_refraction(
+ Normal, transmission_roughness * transmission_roughness, eta));
+ }
+ else {
+ BSDF = BSDF +
+ final_transmission * microfacet_multi_ggx_glass_fresnel(
+ Normal, Roughness * Roughness, eta, BaseColor, Cspec0);
+ }
+ }
+
+ if (Clearcoat > 1e-5) {
+ BSDF = BSDF + principled_clearcoat(
+ ClearcoatNormal, Clearcoat, ClearcoatRoughness * ClearcoatRoughness);
+ }
}
-
diff --git a/intern/cycles/kernel/shaders/node_principled_hair_bsdf.osl b/intern/cycles/kernel/shaders/node_principled_hair_bsdf.osl
index 757a88f8ece..bf986438fca 100644
--- a/intern/cycles/kernel/shaders/node_principled_hair_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_principled_hair_bsdf.osl
@@ -18,88 +18,88 @@
color log3(color a)
{
- return color(log(a[0]), log(a[1]), log(a[2]));
+ return color(log(a[0]), log(a[1]), log(a[2]));
}
color sigma_from_concentration(float eumelanin, float pheomelanin)
{
- return eumelanin*color(0.506, 0.841, 1.653) + pheomelanin*color(0.343, 0.733, 1.924);
+ return eumelanin * color(0.506, 0.841, 1.653) + pheomelanin * color(0.343, 0.733, 1.924);
}
color sigma_from_reflectance(color c, float azimuthal_roughness)
{
- float x = azimuthal_roughness;
- float roughness_fac = (((((0.245*x) + 5.574)*x - 10.73)*x + 2.532)*x - 0.215)*x + 5.969;
- color sigma = log3(c) / roughness_fac;
- return sigma * sigma;
+ float x = azimuthal_roughness;
+ float roughness_fac = (((((0.245 * x) + 5.574) * x - 10.73) * x + 2.532) * x - 0.215) * x +
+ 5.969;
+ color sigma = log3(c) / roughness_fac;
+ return sigma * sigma;
}
-shader node_principled_hair_bsdf(
- color Color = color(0.017513, 0.005763, 0.002059),
- float Melanin = 0.8,
- float MelaninRedness = 1.0,
- float RandomColor = 0.0,
- color Tint = 1.0,
- color AbsorptionCoefficient = color(0.245531, 0.52, 1.365),
- normal Normal = Ng,
- string parametrization = "Absorption coefficient",
- float Offset = radians(2),
- float Roughness = 0.3,
- float RadialRoughness = 0.3,
- float RandomRoughness = 0.0,
- float Coat = 0.0,
- float IOR = 1.55,
- string AttrRandom = "geom:curve_random",
- float Random = 0.0,
+shader node_principled_hair_bsdf(color Color = color(0.017513, 0.005763, 0.002059),
+ float Melanin = 0.8,
+ float MelaninRedness = 1.0,
+ float RandomColor = 0.0,
+ color Tint = 1.0,
+ color AbsorptionCoefficient = color(0.245531, 0.52, 1.365),
+ normal Normal = Ng,
+ string parametrization = "Absorption coefficient",
+ float Offset = radians(2),
+ float Roughness = 0.3,
+ float RadialRoughness = 0.3,
+ float RandomRoughness = 0.0,
+ float Coat = 0.0,
+ float IOR = 1.55,
+ string AttrRandom = "geom:curve_random",
+ float Random = 0.0,
- output closure color BSDF = 0)
+ output closure color BSDF = 0)
{
- /* Get random value from curve in none is specified. */
- float random_value = 0.0;
+ /* Get random value from curve in none is specified. */
+ float random_value = 0.0;
- if (isconnected(Random)) {
- random_value = Random;
- }
- else {
- getattribute(AttrRandom, random_value);
- }
+ if (isconnected(Random)) {
+ random_value = Random;
+ }
+ else {
+ getattribute(AttrRandom, random_value);
+ }
- /* Compute roughness. */
- float factor_random_roughness = 1.0 + 2.0*(random_value - 0.5)*RandomRoughness;
- float m0_roughness = 1.0 - clamp(Coat, 0.0, 1.0);
- float roughness = Roughness*factor_random_roughness;
- float radial_roughness = RadialRoughness*factor_random_roughness;
+ /* Compute roughness. */
+ float factor_random_roughness = 1.0 + 2.0 * (random_value - 0.5) * RandomRoughness;
+ float m0_roughness = 1.0 - clamp(Coat, 0.0, 1.0);
+ float roughness = Roughness * factor_random_roughness;
+ float radial_roughness = RadialRoughness * factor_random_roughness;
- /* Compute absorption. */
- color sigma;
+ /* Compute absorption. */
+ color sigma;
- if (parametrization == "Absorption coefficient") {
- sigma = AbsorptionCoefficient;
- }
- else if (parametrization == "Melanin concentration") {
- /* Randomize melanin. */
- float factor_random_color = 1.0 + 2.0*(random_value - 0.5) * RandomColor;
- float melanin = Melanin * factor_random_color;
+ if (parametrization == "Absorption coefficient") {
+ sigma = AbsorptionCoefficient;
+ }
+ else if (parametrization == "Melanin concentration") {
+ /* Randomize melanin. */
+ float factor_random_color = 1.0 + 2.0 * (random_value - 0.5) * RandomColor;
+ float melanin = Melanin * factor_random_color;
- /* Map melanin 0..inf from more perceptually linear 0..1. */
- melanin = -log(max(1.0 - melanin, 0.0001));
+ /* Map melanin 0..inf from more perceptually linear 0..1. */
+ melanin = -log(max(1.0 - melanin, 0.0001));
- /* Benedikt Bitterli's melanin ratio remapping. */
- float eumelanin = melanin * (1.0 - MelaninRedness);
- float pheomelanin = melanin * MelaninRedness;
- color melanin_sigma = sigma_from_concentration(eumelanin, pheomelanin);
+ /* Benedikt Bitterli's melanin ratio remapping. */
+ float eumelanin = melanin * (1.0 - MelaninRedness);
+ float pheomelanin = melanin * MelaninRedness;
+ color melanin_sigma = sigma_from_concentration(eumelanin, pheomelanin);
- /* Optional tint. */
- color tint_sigma = sigma_from_reflectance(Tint, radial_roughness);
- sigma = melanin_sigma + tint_sigma;
- }
- else if (parametrization == "Direct coloring"){
- sigma = sigma_from_reflectance(Color, radial_roughness);
- }
- else {
- /* Fallback to brownish hair, same as defaults for melanin. */
- sigma = sigma_from_concentration(0.0, 0.8054375);
- }
+ /* Optional tint. */
+ color tint_sigma = sigma_from_reflectance(Tint, radial_roughness);
+ sigma = melanin_sigma + tint_sigma;
+ }
+ else if (parametrization == "Direct coloring") {
+ sigma = sigma_from_reflectance(Color, radial_roughness);
+ }
+ else {
+ /* Fallback to brownish hair, same as defaults for melanin. */
+ sigma = sigma_from_concentration(0.0, 0.8054375);
+ }
- BSDF = principled_hair(Normal, sigma, roughness, radial_roughness, m0_roughness, Offset, IOR);
+ BSDF = principled_hair(Normal, sigma, roughness, radial_roughness, m0_roughness, Offset, IOR);
}
diff --git a/intern/cycles/kernel/shaders/node_principled_volume.osl b/intern/cycles/kernel/shaders/node_principled_volume.osl
index ea8d6ab12c5..39cf6837eb2 100644
--- a/intern/cycles/kernel/shaders/node_principled_volume.osl
+++ b/intern/cycles/kernel/shaders/node_principled_volume.osl
@@ -16,80 +16,78 @@
#include "stdosl.h"
-shader node_principled_volume(
- color Color = color(0.5, 0.5, 0.5),
- float Density = 1.0,
- float Anisotropy = 0.0,
- color AbsorptionColor = color(0.0, 0.0, 0.0),
- float EmissionStrength = 0.0,
- color EmissionColor = color(1.0, 1.0, 1.0),
- float BlackbodyIntensity = 0.0,
- color BlackbodyTint = color(1.0, 1.0, 1.0),
- float Temperature = 1500.0,
- string DensityAttribute = "geom:density",
- string ColorAttribute = "geom:color",
- string TemperatureAttribute = "geom:temperature",
- output closure color Volume = 0)
+shader node_principled_volume(color Color = color(0.5, 0.5, 0.5),
+ float Density = 1.0,
+ float Anisotropy = 0.0,
+ color AbsorptionColor = color(0.0, 0.0, 0.0),
+ float EmissionStrength = 0.0,
+ color EmissionColor = color(1.0, 1.0, 1.0),
+ float BlackbodyIntensity = 0.0,
+ color BlackbodyTint = color(1.0, 1.0, 1.0),
+ float Temperature = 1500.0,
+ string DensityAttribute = "geom:density",
+ string ColorAttribute = "geom:color",
+ string TemperatureAttribute = "geom:temperature",
+ output closure color Volume = 0)
{
- /* Compute density. */
- float primitive_density = 1.0;
- float density = max(Density, 0.0);
+ /* Compute density. */
+ float primitive_density = 1.0;
+ float density = max(Density, 0.0);
- if(density > 1e-5) {
- if(getattribute(DensityAttribute, primitive_density)) {
- density = max(density * primitive_density, 0.0);
- }
- }
+ if (density > 1e-5) {
+ if (getattribute(DensityAttribute, primitive_density)) {
+ density = max(density * primitive_density, 0.0);
+ }
+ }
- if(density > 1e-5) {
- /* Compute scattering color. */
- color scatter_color = Color;
- color primitive_color;
- if(getattribute(ColorAttribute, primitive_color)) {
- scatter_color *= primitive_color;
- }
+ if (density > 1e-5) {
+ /* Compute scattering color. */
+ color scatter_color = Color;
+ color primitive_color;
+ if (getattribute(ColorAttribute, primitive_color)) {
+ scatter_color *= primitive_color;
+ }
- /* Add scattering and absorption closures. */
- color scatter_coeff = scatter_color;
- color absorption_color = sqrt(max(AbsorptionColor, 0.0));
- color absorption_coeff = max(1.0 - scatter_color, 0.0) * max(1.0 - absorption_color, 0.0);
- Volume = scatter_coeff * density * henyey_greenstein(Anisotropy) +
- absorption_coeff * density * absorption();
- }
+ /* Add scattering and absorption closures. */
+ color scatter_coeff = scatter_color;
+ color absorption_color = sqrt(max(AbsorptionColor, 0.0));
+ color absorption_coeff = max(1.0 - scatter_color, 0.0) * max(1.0 - absorption_color, 0.0);
+ Volume = scatter_coeff * density * henyey_greenstein(Anisotropy) +
+ absorption_coeff * density * absorption();
+ }
- /* Compute emission. */
- float emission_strength = max(EmissionStrength, 0.0);
- float blackbody_intensity = BlackbodyIntensity;
+ /* Compute emission. */
+ float emission_strength = max(EmissionStrength, 0.0);
+ float blackbody_intensity = BlackbodyIntensity;
- if(emission_strength > 1e-5) {
- Volume += emission_strength * EmissionColor * emission();
- }
+ if (emission_strength > 1e-5) {
+ Volume += emission_strength * EmissionColor * emission();
+ }
- if(blackbody_intensity > 1e-3) {
- float T = Temperature;
+ if (blackbody_intensity > 1e-3) {
+ float T = Temperature;
- /* Add temperature from attribute if available. */
- float temperature;
- if(getattribute(TemperatureAttribute, temperature)) {
- T *= max(temperature, 0.0);
- }
+ /* Add temperature from attribute if available. */
+ float temperature;
+ if (getattribute(TemperatureAttribute, temperature)) {
+ T *= max(temperature, 0.0);
+ }
- T = max(T, 0.0);
+ T = max(T, 0.0);
- /* Stefan-Boltzman law. */
- float T4 = (T * T) * (T * T);
- float sigma = 5.670373e-8 * 1e-6 / M_PI;
- float intensity = sigma * mix(1.0, T4, blackbody_intensity);
+ /* Stefan-Boltzman law. */
+ float T4 = (T * T) * (T * T);
+ float sigma = 5.670373e-8 * 1e-6 / M_PI;
+ float intensity = sigma * mix(1.0, T4, blackbody_intensity);
- if(intensity > 1e-5) {
- color bb = blackbody(T);
- float l = luminance(bb);
+ if (intensity > 1e-5) {
+ color bb = blackbody(T);
+ float l = luminance(bb);
- if(l != 0.0) {
- bb *= BlackbodyTint * intensity / l;
- Volume += bb * emission();
- }
- }
- }
+ if (l != 0.0) {
+ bb *= BlackbodyTint * intensity / l;
+ Volume += bb * emission();
+ }
+ }
+ }
}
-
diff --git a/intern/cycles/kernel/shaders/node_ramp_util.h b/intern/cycles/kernel/shaders/node_ramp_util.h
index d07d5a98316..f7fb07b257d 100644
--- a/intern/cycles/kernel/shaders/node_ramp_util.h
+++ b/intern/cycles/kernel/shaders/node_ramp_util.h
@@ -18,72 +18,76 @@
color rgb_ramp_lookup(color ramp[], float at, int interpolate, int extrapolate)
{
- float f = at;
- int table_size = arraylength(ramp);
+ float f = at;
+ int table_size = arraylength(ramp);
- if ((f < 0.0 || f > 1.0) && extrapolate) {
- color t0, dy;
- if (f < 0.0) {
- t0 = ramp[0];
- dy = t0 - ramp[1];
- f = -f;
- }
- else {
- t0 = ramp[table_size - 1];
- dy = t0 - ramp[table_size - 2];
- f = f - 1.0;
- }
- return t0 + dy * f * (table_size - 1);
- }
+ if ((f < 0.0 || f > 1.0) && extrapolate) {
+ color t0, dy;
+ if (f < 0.0) {
+ t0 = ramp[0];
+ dy = t0 - ramp[1];
+ f = -f;
+ }
+ else {
+ t0 = ramp[table_size - 1];
+ dy = t0 - ramp[table_size - 2];
+ f = f - 1.0;
+ }
+ return t0 + dy * f * (table_size - 1);
+ }
- f = clamp(at, 0.0, 1.0) * (table_size - 1);
+ f = clamp(at, 0.0, 1.0) * (table_size - 1);
- /* clamp int as well in case of NaN */
- int i = (int)f;
- if (i < 0) i = 0;
- if (i >= table_size) i = table_size - 1;
- float t = f - (float)i;
+ /* clamp int as well in case of NaN */
+ int i = (int)f;
+ if (i < 0)
+ i = 0;
+ if (i >= table_size)
+ i = table_size - 1;
+ float t = f - (float)i;
- color result = ramp[i];
+ color result = ramp[i];
- if (interpolate && t > 0.0)
- result = (1.0 - t) * result + t * ramp[i + 1];
+ if (interpolate && t > 0.0)
+ result = (1.0 - t) * result + t * ramp[i + 1];
- return result;
+ return result;
}
float rgb_ramp_lookup(float ramp[], float at, int interpolate, int extrapolate)
{
- float f = at;
- int table_size = arraylength(ramp);
+ float f = at;
+ int table_size = arraylength(ramp);
- if ((f < 0.0 || f > 1.0) && extrapolate) {
- float t0, dy;
- if (f < 0.0) {
- t0 = ramp[0];
- dy = t0 - ramp[1];
- f = -f;
- }
- else {
- t0 = ramp[table_size - 1];
- dy = t0 - ramp[table_size - 2];
- f = f - 1.0;
- }
- return t0 + dy * f * (table_size - 1);
- }
+ if ((f < 0.0 || f > 1.0) && extrapolate) {
+ float t0, dy;
+ if (f < 0.0) {
+ t0 = ramp[0];
+ dy = t0 - ramp[1];
+ f = -f;
+ }
+ else {
+ t0 = ramp[table_size - 1];
+ dy = t0 - ramp[table_size - 2];
+ f = f - 1.0;
+ }
+ return t0 + dy * f * (table_size - 1);
+ }
- f = clamp(at, 0.0, 1.0) * (table_size - 1);
+ f = clamp(at, 0.0, 1.0) * (table_size - 1);
- /* clamp int as well in case of NaN */
- int i = (int)f;
- if (i < 0) i = 0;
- if (i >= table_size) i = table_size - 1;
- float t = f - (float)i;
+ /* clamp int as well in case of NaN */
+ int i = (int)f;
+ if (i < 0)
+ i = 0;
+ if (i >= table_size)
+ i = table_size - 1;
+ float t = f - (float)i;
- float result = ramp[i];
+ float result = ramp[i];
- if (interpolate && t > 0.0)
- result = (1.0 - t) * result + t * ramp[i + 1];
+ if (interpolate && t > 0.0)
+ result = (1.0 - t) * result + t * ramp[i + 1];
- return result;
+ return result;
}
diff --git a/intern/cycles/kernel/shaders/node_refraction_bsdf.osl b/intern/cycles/kernel/shaders/node_refraction_bsdf.osl
index eaab7282243..941d99dd44d 100644
--- a/intern/cycles/kernel/shaders/node_refraction_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_refraction_bsdf.osl
@@ -16,23 +16,21 @@
#include "stdosl.h"
-shader node_refraction_bsdf(
- color Color = 0.8,
- string distribution = "sharp",
- float Roughness = 0.2,
- float IOR = 1.45,
- normal Normal = N,
- output closure color BSDF = 0)
+shader node_refraction_bsdf(color Color = 0.8,
+ string distribution = "sharp",
+ float Roughness = 0.2,
+ float IOR = 1.45,
+ normal Normal = N,
+ output closure color BSDF = 0)
{
- float f = max(IOR, 1e-5);
- float eta = backfacing() ? 1.0 / f : f;
- float roughness = Roughness * Roughness;
+ float f = max(IOR, 1e-5);
+ float eta = backfacing() ? 1.0 / f : f;
+ float roughness = Roughness * Roughness;
- if (distribution == "sharp")
- BSDF = Color * refraction(Normal, eta);
- else if (distribution == "beckmann")
- BSDF = Color * microfacet_beckmann_refraction(Normal, roughness, eta);
- else if (distribution == "GGX")
- BSDF = Color * microfacet_ggx_refraction(Normal, roughness, eta);
+ if (distribution == "sharp")
+ BSDF = Color * refraction(Normal, eta);
+ else if (distribution == "beckmann")
+ BSDF = Color * microfacet_beckmann_refraction(Normal, roughness, eta);
+ else if (distribution == "GGX")
+ BSDF = Color * microfacet_ggx_refraction(Normal, roughness, eta);
}
-
diff --git a/intern/cycles/kernel/shaders/node_rgb_curves.osl b/intern/cycles/kernel/shaders/node_rgb_curves.osl
index 0d5eeea5c43..e34eb027cc3 100644
--- a/intern/cycles/kernel/shaders/node_rgb_curves.osl
+++ b/intern/cycles/kernel/shaders/node_rgb_curves.osl
@@ -17,25 +17,23 @@
#include "stdosl.h"
#include "node_ramp_util.h"
-shader node_rgb_curves(
- color ramp[] = {0.0},
- float min_x = 0.0,
- float max_x = 1.0,
+shader node_rgb_curves(color ramp[] = {0.0},
+ float min_x = 0.0,
+ float max_x = 1.0,
- color ColorIn = 0.0,
- float Fac = 0.0,
- output color ColorOut = 0.0)
+ color ColorIn = 0.0,
+ float Fac = 0.0,
+ output color ColorOut = 0.0)
{
- color c = (ColorIn - color(min_x, min_x, min_x)) / (max_x - min_x);
+ color c = (ColorIn - color(min_x, min_x, min_x)) / (max_x - min_x);
- color r = rgb_ramp_lookup(ramp, c[0], 1, 1);
- color g = rgb_ramp_lookup(ramp, c[1], 1, 1);
- color b = rgb_ramp_lookup(ramp, c[2], 1, 1);
+ color r = rgb_ramp_lookup(ramp, c[0], 1, 1);
+ color g = rgb_ramp_lookup(ramp, c[1], 1, 1);
+ color b = rgb_ramp_lookup(ramp, c[2], 1, 1);
- ColorOut[0] = r[0];
- ColorOut[1] = g[1];
- ColorOut[2] = b[2];
+ ColorOut[0] = r[0];
+ ColorOut[1] = g[1];
+ ColorOut[2] = b[2];
- ColorOut = mix(ColorIn, ColorOut, Fac);
+ ColorOut = mix(ColorIn, ColorOut, Fac);
}
-
diff --git a/intern/cycles/kernel/shaders/node_rgb_ramp.osl b/intern/cycles/kernel/shaders/node_rgb_ramp.osl
index 4e7d8fdcf65..c9f9746a4fb 100644
--- a/intern/cycles/kernel/shaders/node_rgb_ramp.osl
+++ b/intern/cycles/kernel/shaders/node_rgb_ramp.osl
@@ -17,16 +17,14 @@
#include "stdosl.h"
#include "node_ramp_util.h"
-shader node_rgb_ramp(
- color ramp_color[] = {0.0},
- float ramp_alpha[] = {0.0},
- int interpolate = 1,
+shader node_rgb_ramp(color ramp_color[] = {0.0},
+ float ramp_alpha[] = {0.0},
+ int interpolate = 1,
- float Fac = 0.0,
- output color Color = 0.0,
- output float Alpha = 1.0)
+ float Fac = 0.0,
+ output color Color = 0.0,
+ output float Alpha = 1.0)
{
- Color = rgb_ramp_lookup(ramp_color, Fac, interpolate, 0);
- Alpha = rgb_ramp_lookup(ramp_alpha, Fac, interpolate, 0);
+ Color = rgb_ramp_lookup(ramp_color, Fac, interpolate, 0);
+ Alpha = rgb_ramp_lookup(ramp_alpha, Fac, interpolate, 0);
}
-
diff --git a/intern/cycles/kernel/shaders/node_rgb_to_bw.osl b/intern/cycles/kernel/shaders/node_rgb_to_bw.osl
index 903dfcdc881..837d6caf5fc 100644
--- a/intern/cycles/kernel/shaders/node_rgb_to_bw.osl
+++ b/intern/cycles/kernel/shaders/node_rgb_to_bw.osl
@@ -16,10 +16,7 @@
#include "stdosl.h"
-shader node_rgb_to_bw(
- color Color = 0.0,
- output float Val = 0.0)
+shader node_rgb_to_bw(color Color = 0.0, output float Val = 0.0)
{
- Val = Color[0] * 0.2126 + Color[1] * 0.7152 + Color[2] * 0.0722;
+ Val = Color[0] * 0.2126 + Color[1] * 0.7152 + Color[2] * 0.0722;
}
-
diff --git a/intern/cycles/kernel/shaders/node_scatter_volume.osl b/intern/cycles/kernel/shaders/node_scatter_volume.osl
index 002e2750fca..fce5716f372 100644
--- a/intern/cycles/kernel/shaders/node_scatter_volume.osl
+++ b/intern/cycles/kernel/shaders/node_scatter_volume.osl
@@ -16,12 +16,10 @@
#include "stdosl.h"
-shader node_scatter_volume(
- color Color = color(0.8, 0.8, 0.8),
- float Density = 1.0,
- float Anisotropy = 0.0,
- output closure color Volume = 0)
+shader node_scatter_volume(color Color = color(0.8, 0.8, 0.8),
+ float Density = 1.0,
+ float Anisotropy = 0.0,
+ output closure color Volume = 0)
{
- Volume = (Color * max(Density, 0.0)) * henyey_greenstein(Anisotropy);
+ Volume = (Color * max(Density, 0.0)) * henyey_greenstein(Anisotropy);
}
-
diff --git a/intern/cycles/kernel/shaders/node_separate_hsv.osl b/intern/cycles/kernel/shaders/node_separate_hsv.osl
index 2a804040294..c77ed1f3755 100644
--- a/intern/cycles/kernel/shaders/node_separate_hsv.osl
+++ b/intern/cycles/kernel/shaders/node_separate_hsv.osl
@@ -17,15 +17,14 @@
#include "stdosl.h"
#include "node_color.h"
-shader node_separate_hsv(
- color Color = 0.8,
- output float H = 0.0,
- output float S = 0.0,
- output float V = 0.0)
+shader node_separate_hsv(color Color = 0.8,
+ output float H = 0.0,
+ output float S = 0.0,
+ output float V = 0.0)
{
- color col = rgb_to_hsv(Color);
-
- H = col[0];
- S = col[1];
- V = col[2];
+ color col = rgb_to_hsv(Color);
+
+ H = col[0];
+ S = col[1];
+ V = col[2];
}
diff --git a/intern/cycles/kernel/shaders/node_separate_rgb.osl b/intern/cycles/kernel/shaders/node_separate_rgb.osl
index 43d9e3aa4b1..ee64add27e2 100644
--- a/intern/cycles/kernel/shaders/node_separate_rgb.osl
+++ b/intern/cycles/kernel/shaders/node_separate_rgb.osl
@@ -16,13 +16,12 @@
#include "stdosl.h"
-shader node_separate_rgb(
- color Image = 0.8,
- output float R = 0.0,
- output float G = 0.0,
- output float B = 0.0)
+shader node_separate_rgb(color Image = 0.8,
+ output float R = 0.0,
+ output float G = 0.0,
+ output float B = 0.0)
{
- R = Image[0];
- G = Image[1];
- B = Image[2];
+ R = Image[0];
+ G = Image[1];
+ B = Image[2];
}
diff --git a/intern/cycles/kernel/shaders/node_separate_xyz.osl b/intern/cycles/kernel/shaders/node_separate_xyz.osl
index e1963a1902f..8a563f5e920 100644
--- a/intern/cycles/kernel/shaders/node_separate_xyz.osl
+++ b/intern/cycles/kernel/shaders/node_separate_xyz.osl
@@ -16,13 +16,12 @@
#include "stdosl.h"
-shader node_separate_xyz(
- vector Vector = 0.8,
- output float X = 0.0,
- output float Y = 0.0,
- output float Z = 0.0)
+shader node_separate_xyz(vector Vector = 0.8,
+ output float X = 0.0,
+ output float Y = 0.0,
+ output float Z = 0.0)
{
- X = Vector[0];
- Y = Vector[1];
- Z = Vector[2];
+ X = Vector[0];
+ Y = Vector[1];
+ Z = Vector[2];
}
diff --git a/intern/cycles/kernel/shaders/node_set_normal.osl b/intern/cycles/kernel/shaders/node_set_normal.osl
index 7ca7ac9350c..9541b829ef7 100644
--- a/intern/cycles/kernel/shaders/node_set_normal.osl
+++ b/intern/cycles/kernel/shaders/node_set_normal.osl
@@ -16,11 +16,8 @@
#include "stdosl.h"
-surface node_set_normal(
- normal Direction = N,
- output normal Normal = N)
+surface node_set_normal(normal Direction = N, output normal Normal = N)
{
- N = Direction;
- Normal = Direction;
+ N = Direction;
+ Normal = Direction;
}
-
diff --git a/intern/cycles/kernel/shaders/node_sky_texture.osl b/intern/cycles/kernel/shaders/node_sky_texture.osl
index a6c187d15f2..9b29e5489c2 100644
--- a/intern/cycles/kernel/shaders/node_sky_texture.osl
+++ b/intern/cycles/kernel/shaders/node_sky_texture.osl
@@ -19,115 +19,122 @@
float sky_angle_between(float thetav, float phiv, float theta, float phi)
{
- float cospsi = sin(thetav) * sin(theta) * cos(phi - phiv) + cos(thetav) * cos(theta);
+ float cospsi = sin(thetav) * sin(theta) * cos(phi - phiv) + cos(thetav) * cos(theta);
- if (cospsi > 1.0)
- return 0.0;
- if (cospsi < -1.0)
- return M_PI;
+ if (cospsi > 1.0)
+ return 0.0;
+ if (cospsi < -1.0)
+ return M_PI;
- return acos(cospsi);
+ return acos(cospsi);
}
vector sky_spherical_coordinates(vector dir)
{
- return vector(acos(dir[2]), atan2(dir[0], dir[1]), 0);
+ return vector(acos(dir[2]), atan2(dir[0], dir[1]), 0);
}
/* Preetham */
float sky_perez_function(float lam[9], float theta, float gamma)
{
- float ctheta = cos(theta);
- float cgamma = cos(gamma);
+ float ctheta = cos(theta);
+ float cgamma = cos(gamma);
- return (1.0 + lam[0] * exp(lam[1] / ctheta)) * (1.0 + lam[2] * exp(lam[3] * gamma) + lam[4] * cgamma * cgamma);
+ return (1.0 + lam[0] * exp(lam[1] / ctheta)) *
+ (1.0 + lam[2] * exp(lam[3] * gamma) + lam[4] * cgamma * cgamma);
}
color sky_radiance_old(normal dir,
- float sunphi, float suntheta, color radiance,
- float config_x[9], float config_y[9], float config_z[9])
+ float sunphi,
+ float suntheta,
+ color radiance,
+ float config_x[9],
+ float config_y[9],
+ float config_z[9])
{
- /* convert vector to spherical coordinates */
- vector spherical = sky_spherical_coordinates(dir);
- float theta = spherical[0];
- float phi = spherical[1];
+ /* convert vector to spherical coordinates */
+ vector spherical = sky_spherical_coordinates(dir);
+ float theta = spherical[0];
+ float phi = spherical[1];
- /* angle between sun direction and dir */
- float gamma = sky_angle_between(theta, phi, suntheta, sunphi);
+ /* angle between sun direction and dir */
+ float gamma = sky_angle_between(theta, phi, suntheta, sunphi);
- /* clamp theta to horizon */
- theta = min(theta, M_PI_2 - 0.001);
+ /* clamp theta to horizon */
+ theta = min(theta, M_PI_2 - 0.001);
- /* compute xyY color space values */
- float x = radiance[1] * sky_perez_function(config_y, theta, gamma);
- float y = radiance[2] * sky_perez_function(config_z, theta, gamma);
- float Y = radiance[0] * sky_perez_function(config_x, theta, gamma);
+ /* compute xyY color space values */
+ float x = radiance[1] * sky_perez_function(config_y, theta, gamma);
+ float y = radiance[2] * sky_perez_function(config_z, theta, gamma);
+ float Y = radiance[0] * sky_perez_function(config_x, theta, gamma);
- /* convert to RGB */
- color xyz = xyY_to_xyz(x, y, Y);
- return xyz_to_rgb(xyz[0], xyz[1], xyz[2]);
+ /* convert to RGB */
+ color xyz = xyY_to_xyz(x, y, Y);
+ return xyz_to_rgb(xyz[0], xyz[1], xyz[2]);
}
/* Hosek / Wilkie */
float sky_radiance_internal(float config[9], float theta, float gamma)
{
- float ctheta = cos(theta);
- float cgamma = cos(gamma);
-
- float expM = exp(config[4] * gamma);
- float rayM = cgamma * cgamma;
- float mieM = (1.0 + rayM) / pow((1.0 + config[8] * config[8] - 2.0 * config[8] * cgamma), 1.5);
- float zenith = sqrt(ctheta);
-
- return (1.0 + config[0] * exp(config[1] / (ctheta + 0.01))) *
- (config[2] + config[3] * expM + config[5] * rayM + config[6] * mieM + config[7] * zenith);
+ float ctheta = cos(theta);
+ float cgamma = cos(gamma);
+
+ float expM = exp(config[4] * gamma);
+ float rayM = cgamma * cgamma;
+ float mieM = (1.0 + rayM) / pow((1.0 + config[8] * config[8] - 2.0 * config[8] * cgamma), 1.5);
+ float zenith = sqrt(ctheta);
+
+ return (1.0 + config[0] * exp(config[1] / (ctheta + 0.01))) *
+ (config[2] + config[3] * expM + config[5] * rayM + config[6] * mieM + config[7] * zenith);
}
color sky_radiance_new(normal dir,
- float sunphi, float suntheta, color radiance,
- float config_x[9], float config_y[9], float config_z[9])
+ float sunphi,
+ float suntheta,
+ color radiance,
+ float config_x[9],
+ float config_y[9],
+ float config_z[9])
{
- /* convert vector to spherical coordinates */
- vector spherical = sky_spherical_coordinates(dir);
- float theta = spherical[0];
- float phi = spherical[1];
+ /* convert vector to spherical coordinates */
+ vector spherical = sky_spherical_coordinates(dir);
+ float theta = spherical[0];
+ float phi = spherical[1];
- /* angle between sun direction and dir */
- float gamma = sky_angle_between(theta, phi, suntheta, sunphi);
+ /* angle between sun direction and dir */
+ float gamma = sky_angle_between(theta, phi, suntheta, sunphi);
- /* clamp theta to horizon */
- theta = min(theta, M_PI_2 - 0.001);
+ /* clamp theta to horizon */
+ theta = min(theta, M_PI_2 - 0.001);
- /* compute xyz color space values */
- float x = sky_radiance_internal(config_x, theta, gamma) * radiance[0];
- float y = sky_radiance_internal(config_y, theta, gamma) * radiance[1];
- float z = sky_radiance_internal(config_z, theta, gamma) * radiance[2];
+ /* compute xyz color space values */
+ float x = sky_radiance_internal(config_x, theta, gamma) * radiance[0];
+ float y = sky_radiance_internal(config_y, theta, gamma) * radiance[1];
+ float z = sky_radiance_internal(config_z, theta, gamma) * radiance[2];
- /* convert to RGB and adjust strength */
- return xyz_to_rgb(x, y, z) * (M_2PI / 683);
+ /* convert to RGB and adjust strength */
+ return xyz_to_rgb(x, y, z) * (M_2PI / 683);
}
-shader node_sky_texture(
- int use_mapping = 0,
- matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
- vector Vector = P,
- string type = "hosek_wilkie",
- float theta = 0.0,
- float phi = 0.0,
- color radiance = color(0.0, 0.0, 0.0),
- float config_x[9] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0},
- float config_y[9] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0},
- float config_z[9] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0},
- output color Color = color(0.0, 0.0, 0.0))
+shader node_sky_texture(int use_mapping = 0,
+ matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+ vector Vector = P,
+ string type = "hosek_wilkie",
+ float theta = 0.0,
+ float phi = 0.0,
+ color radiance = color(0.0, 0.0, 0.0),
+ float config_x[9] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0},
+ float config_y[9] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0},
+ float config_z[9] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0},
+ output color Color = color(0.0, 0.0, 0.0))
{
- vector p = Vector;
-
- if (use_mapping)
- p = transform(mapping, p);
-
- if (type == "hosek_wilkie")
- Color = sky_radiance_new(p, phi, theta, radiance, config_x, config_y, config_z);
- else
- Color = sky_radiance_old(p, phi, theta, radiance, config_x, config_y, config_z);
-}
+ vector p = Vector;
+ if (use_mapping)
+ p = transform(mapping, p);
+
+ if (type == "hosek_wilkie")
+ Color = sky_radiance_new(p, phi, theta, radiance, config_x, config_y, config_z);
+ else
+ Color = sky_radiance_old(p, phi, theta, radiance, config_x, config_y, config_z);
+}
diff --git a/intern/cycles/kernel/shaders/node_subsurface_scattering.osl b/intern/cycles/kernel/shaders/node_subsurface_scattering.osl
index 0df3256e1fd..e12199d8c3d 100644
--- a/intern/cycles/kernel/shaders/node_subsurface_scattering.osl
+++ b/intern/cycles/kernel/shaders/node_subsurface_scattering.osl
@@ -16,23 +16,30 @@
#include "stdosl.h"
-shader node_subsurface_scattering(
- color Color = 0.8,
- float Scale = 1.0,
- vector Radius = vector(0.1, 0.1, 0.1),
- float TextureBlur = 0.0,
- float Sharpness = 0.0,
- string falloff = "cubic",
- normal Normal = N,
- output closure color BSSRDF = 0)
+shader node_subsurface_scattering(color Color = 0.8,
+ float Scale = 1.0,
+ vector Radius = vector(0.1, 0.1, 0.1),
+ float TextureBlur = 0.0,
+ float Sharpness = 0.0,
+ string falloff = "cubic",
+ normal Normal = N,
+ output closure color BSSRDF = 0)
{
- if (falloff == "gaussian")
- BSSRDF = Color * bssrdf("gaussian", Normal, Scale * Radius, Color, "texture_blur", TextureBlur);
- else if (falloff == "cubic")
- BSSRDF = Color * bssrdf("cubic", Normal, Scale * Radius, Color, "texture_blur", TextureBlur, "sharpness", Sharpness);
- else if (falloff == "burley")
- BSSRDF = Color * bssrdf("burley", Normal, Scale * Radius, Color, "texture_blur", TextureBlur);
- else
- BSSRDF = Color * bssrdf("random_walk", Normal, Scale * Radius, Color, "texture_blur", TextureBlur);
+ if (falloff == "gaussian")
+ BSSRDF = Color *
+ bssrdf("gaussian", Normal, Scale * Radius, Color, "texture_blur", TextureBlur);
+ else if (falloff == "cubic")
+ BSSRDF = Color * bssrdf("cubic",
+ Normal,
+ Scale * Radius,
+ Color,
+ "texture_blur",
+ TextureBlur,
+ "sharpness",
+ Sharpness);
+ else if (falloff == "burley")
+ BSSRDF = Color * bssrdf("burley", Normal, Scale * Radius, Color, "texture_blur", TextureBlur);
+ else
+ BSSRDF = Color *
+ bssrdf("random_walk", Normal, Scale * Radius, Color, "texture_blur", TextureBlur);
}
-
diff --git a/intern/cycles/kernel/shaders/node_tangent.osl b/intern/cycles/kernel/shaders/node_tangent.osl
index c527070a2c8..44eb9973f3d 100644
--- a/intern/cycles/kernel/shaders/node_tangent.osl
+++ b/intern/cycles/kernel/shaders/node_tangent.osl
@@ -16,33 +16,31 @@
#include "stdosl.h"
-shader node_tangent(
- normal NormalIn = N,
- string attr_name = "geom:tangent",
- string direction_type = "radial",
- string axis = "z",
- output normal Tangent = normalize(dPdu))
+shader node_tangent(normal NormalIn = N,
+ string attr_name = "geom:tangent",
+ string direction_type = "radial",
+ string axis = "z",
+ output normal Tangent = normalize(dPdu))
{
- vector T;
+ vector T;
- if (direction_type == "uv_map") {
- getattribute(attr_name, T);
- }
- else if (direction_type == "radial") {
- point generated;
+ if (direction_type == "uv_map") {
+ getattribute(attr_name, T);
+ }
+ else if (direction_type == "radial") {
+ point generated;
- if (!getattribute("geom:generated", generated))
- generated = P;
+ if (!getattribute("geom:generated", generated))
+ generated = P;
- if (axis == "x")
- T = vector(0.0, -(generated[2] - 0.5), (generated[1] - 0.5));
- else if (axis == "y")
- T = vector(-(generated[2] - 0.5), 0.0, (generated[0] - 0.5));
- else
- T = vector(-(generated[1] - 0.5), (generated[0] - 0.5), 0.0);
- }
+ if (axis == "x")
+ T = vector(0.0, -(generated[2] - 0.5), (generated[1] - 0.5));
+ else if (axis == "y")
+ T = vector(-(generated[2] - 0.5), 0.0, (generated[0] - 0.5));
+ else
+ T = vector(-(generated[1] - 0.5), (generated[0] - 0.5), 0.0);
+ }
- T = transform("object", "world", T);
- Tangent = cross(NormalIn, normalize(cross(T, NormalIn)));
+ T = transform("object", "world", T);
+ Tangent = cross(NormalIn, normalize(cross(T, NormalIn)));
}
-
diff --git a/intern/cycles/kernel/shaders/node_texture.h b/intern/cycles/kernel/shaders/node_texture.h
index 88305fb320f..e1f3b900ee5 100644
--- a/intern/cycles/kernel/shaders/node_texture.h
+++ b/intern/cycles/kernel/shaders/node_texture.h
@@ -18,148 +18,148 @@
color cellnoise_color(point p)
{
- float r = cellnoise(p);
- float g = cellnoise(point(p[1], p[0], p[2]));
- float b = cellnoise(point(p[1], p[2], p[0]));
+ float r = cellnoise(p);
+ float g = cellnoise(point(p[1], p[0], p[2]));
+ float b = cellnoise(point(p[1], p[2], p[0]));
- return color(r, g, b);
+ return color(r, g, b);
}
void voronoi(point p, float e, float da[4], point pa[4])
{
- /* returns distances in da and point coords in pa */
- int xx, yy, zz, xi, yi, zi;
-
- xi = (int)floor(p[0]);
- yi = (int)floor(p[1]);
- zi = (int)floor(p[2]);
-
- da[0] = 1e10;
- da[1] = 1e10;
- da[2] = 1e10;
- da[3] = 1e10;
-
- for (xx = xi - 1; xx <= xi + 1; xx++) {
- for (yy = yi - 1; yy <= yi + 1; yy++) {
- for (zz = zi - 1; zz <= zi + 1; zz++) {
- point ip = point(xx, yy, zz);
- point vp = (point)cellnoise_color(ip);
- point pd = p - (vp + ip);
- float d = dot(pd, pd);
-
- vp += point(xx, yy, zz);
-
- if (d < da[0]) {
- da[3] = da[2];
- da[2] = da[1];
- da[1] = da[0];
- da[0] = d;
-
- pa[3] = pa[2];
- pa[2] = pa[1];
- pa[1] = pa[0];
- pa[0] = vp;
- }
- else if (d < da[1]) {
- da[3] = da[2];
- da[2] = da[1];
- da[1] = d;
-
- pa[3] = pa[2];
- pa[2] = pa[1];
- pa[1] = vp;
- }
- else if (d < da[2]) {
- da[3] = da[2];
- da[2] = d;
-
- pa[3] = pa[2];
- pa[2] = vp;
- }
- else if (d < da[3]) {
- da[3] = d;
- pa[3] = vp;
- }
- }
- }
- }
+ /* returns distances in da and point coords in pa */
+ int xx, yy, zz, xi, yi, zi;
+
+ xi = (int)floor(p[0]);
+ yi = (int)floor(p[1]);
+ zi = (int)floor(p[2]);
+
+ da[0] = 1e10;
+ da[1] = 1e10;
+ da[2] = 1e10;
+ da[3] = 1e10;
+
+ for (xx = xi - 1; xx <= xi + 1; xx++) {
+ for (yy = yi - 1; yy <= yi + 1; yy++) {
+ for (zz = zi - 1; zz <= zi + 1; zz++) {
+ point ip = point(xx, yy, zz);
+ point vp = (point)cellnoise_color(ip);
+ point pd = p - (vp + ip);
+ float d = dot(pd, pd);
+
+ vp += point(xx, yy, zz);
+
+ if (d < da[0]) {
+ da[3] = da[2];
+ da[2] = da[1];
+ da[1] = da[0];
+ da[0] = d;
+
+ pa[3] = pa[2];
+ pa[2] = pa[1];
+ pa[1] = pa[0];
+ pa[0] = vp;
+ }
+ else if (d < da[1]) {
+ da[3] = da[2];
+ da[2] = da[1];
+ da[1] = d;
+
+ pa[3] = pa[2];
+ pa[2] = pa[1];
+ pa[1] = vp;
+ }
+ else if (d < da[2]) {
+ da[3] = da[2];
+ da[2] = d;
+
+ pa[3] = pa[2];
+ pa[2] = vp;
+ }
+ else if (d < da[3]) {
+ da[3] = d;
+ pa[3] = vp;
+ }
+ }
+ }
+ }
}
/* Noise Bases */
float safe_noise(point p, string type)
{
- float f = 0.0;
+ float f = 0.0;
- /* Perlin noise in range -1..1 */
- if (type == "signed")
- f = noise("perlin", p);
+ /* Perlin noise in range -1..1 */
+ if (type == "signed")
+ f = noise("perlin", p);
- /* Perlin noise in range 0..1 */
- else
- f = noise(p);
+ /* Perlin noise in range 0..1 */
+ else
+ f = noise(p);
- /* can happen for big coordinates, things even out to 0.5 then anyway */
- if (!isfinite(f))
- return 0.5;
+ /* can happen for big coordinates, things even out to 0.5 then anyway */
+ if (!isfinite(f))
+ return 0.5;
- return f;
+ return f;
}
/* Turbulence */
float noise_turbulence(point p, float details, int hard)
{
- float fscale = 1.0;
- float amp = 1.0;
- float sum = 0.0;
- int i, n;
+ float fscale = 1.0;
+ float amp = 1.0;
+ float sum = 0.0;
+ int i, n;
- float octaves = clamp(details, 0.0, 16.0);
- n = (int)octaves;
+ float octaves = clamp(details, 0.0, 16.0);
+ n = (int)octaves;
- for (i = 0; i <= n; i++) {
- float t = safe_noise(fscale * p, "unsigned");
+ for (i = 0; i <= n; i++) {
+ float t = safe_noise(fscale * p, "unsigned");
- if (hard)
- t = fabs(2.0 * t - 1.0);
+ if (hard)
+ t = fabs(2.0 * t - 1.0);
- sum += t * amp;
- amp *= 0.5;
- fscale *= 2.0;
- }
+ sum += t * amp;
+ amp *= 0.5;
+ fscale *= 2.0;
+ }
- float rmd = octaves - floor(octaves);
+ float rmd = octaves - floor(octaves);
- if (rmd != 0.0) {
- float t = safe_noise(fscale * p, "unsigned");
+ if (rmd != 0.0) {
+ float t = safe_noise(fscale * p, "unsigned");
- if (hard)
- t = fabs(2.0 * t - 1.0);
+ if (hard)
+ t = fabs(2.0 * t - 1.0);
- float sum2 = sum + t * amp;
+ float sum2 = sum + t * amp;
- sum *= ((float)(1 << n) / (float)((1 << (n + 1)) - 1));
- sum2 *= ((float)(1 << (n + 1)) / (float)((1 << (n + 2)) - 1));
+ sum *= ((float)(1 << n) / (float)((1 << (n + 1)) - 1));
+ sum2 *= ((float)(1 << (n + 1)) / (float)((1 << (n + 2)) - 1));
- return (1.0 - rmd) * sum + rmd * sum2;
- }
- else {
- sum *= ((float)(1 << n) / (float)((1 << (n + 1)) - 1));
- return sum;
- }
+ return (1.0 - rmd) * sum + rmd * sum2;
+ }
+ else {
+ sum *= ((float)(1 << n) / (float)((1 << (n + 1)) - 1));
+ return sum;
+ }
}
/* Utility */
float nonzero(float f, float eps)
{
- float r;
+ float r;
- if (abs(f) < eps)
- r = sign(f) * eps;
- else
- r = f;
+ if (abs(f) < eps)
+ r = sign(f) * eps;
+ else
+ r = f;
- return r;
+ return r;
}
diff --git a/intern/cycles/kernel/shaders/node_texture_coordinate.osl b/intern/cycles/kernel/shaders/node_texture_coordinate.osl
index 9e2109fa082..13861653d04 100644
--- a/intern/cycles/kernel/shaders/node_texture_coordinate.osl
+++ b/intern/cycles/kernel/shaders/node_texture_coordinate.osl
@@ -17,82 +17,81 @@
#include "stdosl.h"
shader node_texture_coordinate(
- normal NormalIn = N,
- int is_background = 0,
- int is_volume = 0,
- int from_dupli = 0,
- int use_transform = 0,
- string bump_offset = "center",
- matrix object_itfm = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+ normal NormalIn = N,
+ int is_background = 0,
+ int is_volume = 0,
+ int from_dupli = 0,
+ int use_transform = 0,
+ string bump_offset = "center",
+ matrix object_itfm = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
- output point Generated = point(0.0, 0.0, 0.0),
- output point UV = point(0.0, 0.0, 0.0),
- output point Object = point(0.0, 0.0, 0.0),
- output point Camera = point(0.0, 0.0, 0.0),
- output point Window = point(0.0, 0.0, 0.0),
- output normal Normal = normal(0.0, 0.0, 0.0),
- output point Reflection = point(0.0, 0.0, 0.0))
+ output point Generated = point(0.0, 0.0, 0.0),
+ output point UV = point(0.0, 0.0, 0.0),
+ output point Object = point(0.0, 0.0, 0.0),
+ output point Camera = point(0.0, 0.0, 0.0),
+ output point Window = point(0.0, 0.0, 0.0),
+ output normal Normal = normal(0.0, 0.0, 0.0),
+ output point Reflection = point(0.0, 0.0, 0.0))
{
- if (is_background) {
- Generated = P;
- UV = point(0.0, 0.0, 0.0);
- Object = P;
- point Pcam = transform("camera", "world", point(0, 0, 0));
- Camera = transform("camera", P + Pcam);
- getattribute("NDC", Window);
- Normal = NormalIn;
- Reflection = I;
- }
- else {
- if (from_dupli) {
- getattribute("geom:dupli_generated", Generated);
- getattribute("geom:dupli_uv", UV);
- }
- else if (is_volume) {
- Generated = transform("object", P);
+ if (is_background) {
+ Generated = P;
+ UV = point(0.0, 0.0, 0.0);
+ Object = P;
+ point Pcam = transform("camera", "world", point(0, 0, 0));
+ Camera = transform("camera", P + Pcam);
+ getattribute("NDC", Window);
+ Normal = NormalIn;
+ Reflection = I;
+ }
+ else {
+ if (from_dupli) {
+ getattribute("geom:dupli_generated", Generated);
+ getattribute("geom:dupli_uv", UV);
+ }
+ else if (is_volume) {
+ Generated = transform("object", P);
- matrix tfm;
- if (getattribute("geom:generated_transform", tfm))
- Generated = transform(tfm, Generated);
+ matrix tfm;
+ if (getattribute("geom:generated_transform", tfm))
+ Generated = transform(tfm, Generated);
- getattribute("geom:uv", UV);
- }
- else {
- getattribute("geom:generated", Generated);
- getattribute("geom:uv", UV);
- }
+ getattribute("geom:uv", UV);
+ }
+ else {
+ getattribute("geom:generated", Generated);
+ getattribute("geom:uv", UV);
+ }
- if (use_transform) {
- Object = transform(object_itfm, P);
- }
- else {
- Object = transform("object", P);
- }
- Camera = transform("camera", P);
- Window = transform("NDC", P);
- Normal = transform("world", "object", NormalIn);
- Reflection = -reflect(I, NormalIn);
- }
+ if (use_transform) {
+ Object = transform(object_itfm, P);
+ }
+ else {
+ Object = transform("object", P);
+ }
+ Camera = transform("camera", P);
+ Window = transform("NDC", P);
+ Normal = transform("world", "object", NormalIn);
+ Reflection = -reflect(I, NormalIn);
+ }
- if (bump_offset == "dx") {
- if (!from_dupli) {
- Generated += Dx(Generated);
- UV += Dx(UV);
- }
- Object += Dx(Object);
- Camera += Dx(Camera);
- Window += Dx(Window);
- }
- else if (bump_offset == "dy") {
- if (!from_dupli) {
- Generated += Dy(Generated);
- UV += Dy(UV);
- }
- Object += Dy(Object);
- Camera += Dy(Camera);
- Window += Dy(Window);
- }
+ if (bump_offset == "dx") {
+ if (!from_dupli) {
+ Generated += Dx(Generated);
+ UV += Dx(UV);
+ }
+ Object += Dx(Object);
+ Camera += Dx(Camera);
+ Window += Dx(Window);
+ }
+ else if (bump_offset == "dy") {
+ if (!from_dupli) {
+ Generated += Dy(Generated);
+ UV += Dy(UV);
+ }
+ Object += Dy(Object);
+ Camera += Dy(Camera);
+ Window += Dy(Window);
+ }
- Window[2] = 0.0;
+ Window[2] = 0.0;
}
-
diff --git a/intern/cycles/kernel/shaders/node_toon_bsdf.osl b/intern/cycles/kernel/shaders/node_toon_bsdf.osl
index ae68a463e46..ed3a0b25c60 100644
--- a/intern/cycles/kernel/shaders/node_toon_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_toon_bsdf.osl
@@ -16,17 +16,15 @@
#include "stdosl.h"
-shader node_toon_bsdf(
- color Color = 0.8,
- string component = "diffuse",
- float Size = 0.5,
- float Smooth = 0.0,
- normal Normal = N,
- output closure color BSDF = 0)
+shader node_toon_bsdf(color Color = 0.8,
+ string component = "diffuse",
+ float Size = 0.5,
+ float Smooth = 0.0,
+ normal Normal = N,
+ output closure color BSDF = 0)
{
- if (component == "diffuse")
- BSDF = Color * diffuse_toon(Normal, Size, Smooth);
- else if (component == "glossy")
- BSDF = Color * glossy_toon(Normal, Size, Smooth);
+ if (component == "diffuse")
+ BSDF = Color * diffuse_toon(Normal, Size, Smooth);
+ else if (component == "glossy")
+ BSDF = Color * glossy_toon(Normal, Size, Smooth);
}
-
diff --git a/intern/cycles/kernel/shaders/node_translucent_bsdf.osl b/intern/cycles/kernel/shaders/node_translucent_bsdf.osl
index 94d23d35326..7ce1ab08c59 100644
--- a/intern/cycles/kernel/shaders/node_translucent_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_translucent_bsdf.osl
@@ -16,11 +16,7 @@
#include "stdosl.h"
-shader node_translucent_bsdf(
- color Color = 0.8,
- normal Normal = N,
- output closure color BSDF = 0)
+shader node_translucent_bsdf(color Color = 0.8, normal Normal = N, output closure color BSDF = 0)
{
- BSDF = Color * translucent(Normal);
+ BSDF = Color * translucent(Normal);
}
-
diff --git a/intern/cycles/kernel/shaders/node_transparent_bsdf.osl b/intern/cycles/kernel/shaders/node_transparent_bsdf.osl
index 5d6798f19a6..a735513ba89 100644
--- a/intern/cycles/kernel/shaders/node_transparent_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_transparent_bsdf.osl
@@ -16,11 +16,7 @@
#include "stdosl.h"
-shader node_transparent_bsdf(
- color Color = 0.8,
- normal Normal = N,
- output closure color BSDF = 0)
+shader node_transparent_bsdf(color Color = 0.8, normal Normal = N, output closure color BSDF = 0)
{
- BSDF = Color * transparent();
+ BSDF = Color * transparent();
}
-
diff --git a/intern/cycles/kernel/shaders/node_uv_map.osl b/intern/cycles/kernel/shaders/node_uv_map.osl
index b46b2e73457..6f2887be63c 100644
--- a/intern/cycles/kernel/shaders/node_uv_map.osl
+++ b/intern/cycles/kernel/shaders/node_uv_map.osl
@@ -16,30 +16,29 @@
#include "stdosl.h"
-shader node_uv_map(
- int from_dupli = 0,
- string attribute = "",
- string bump_offset = "center",
- output point UV = point(0.0, 0.0, 0.0))
+shader node_uv_map(int from_dupli = 0,
+ string attribute = "",
+ string bump_offset = "center",
+ output point UV = point(0.0, 0.0, 0.0))
{
- if (from_dupli) {
- getattribute("geom:dupli_uv", UV);
- }
- else {
- if (attribute == "")
- getattribute("geom:uv", UV);
- else
- getattribute(attribute, UV);
- }
+ if (from_dupli) {
+ getattribute("geom:dupli_uv", UV);
+ }
+ else {
+ if (attribute == "")
+ getattribute("geom:uv", UV);
+ else
+ getattribute(attribute, UV);
+ }
- if (bump_offset == "dx") {
- if (!from_dupli) {
- UV += Dx(UV);
- }
- }
- else if (bump_offset == "dy") {
- if (!from_dupli) {
- UV += Dy(UV);
- }
- }
+ if (bump_offset == "dx") {
+ if (!from_dupli) {
+ UV += Dx(UV);
+ }
+ }
+ else if (bump_offset == "dy") {
+ if (!from_dupli) {
+ UV += Dy(UV);
+ }
+ }
}
diff --git a/intern/cycles/kernel/shaders/node_value.osl b/intern/cycles/kernel/shaders/node_value.osl
index f75388d1f76..398e2c0e392 100644
--- a/intern/cycles/kernel/shaders/node_value.osl
+++ b/intern/cycles/kernel/shaders/node_value.osl
@@ -16,16 +16,14 @@
#include "stdosl.h"
-shader node_value(
- float value_value = 0.0,
- vector vector_value = vector(0.0, 0.0, 0.0),
- color color_value = 0.0,
- output float Value = 0.0,
- output vector Vector = vector(0.0, 0.0, 0.0),
- output color Color = 0.0)
+shader node_value(float value_value = 0.0,
+ vector vector_value = vector(0.0, 0.0, 0.0),
+ color color_value = 0.0,
+ output float Value = 0.0,
+ output vector Vector = vector(0.0, 0.0, 0.0),
+ output color Color = 0.0)
{
- Value = value_value;
- Vector = vector_value;
- Color = color_value;
+ Value = value_value;
+ Vector = vector_value;
+ Color = color_value;
}
-
diff --git a/intern/cycles/kernel/shaders/node_vector_curves.osl b/intern/cycles/kernel/shaders/node_vector_curves.osl
index ff284c48e0a..e8c8036b550 100644
--- a/intern/cycles/kernel/shaders/node_vector_curves.osl
+++ b/intern/cycles/kernel/shaders/node_vector_curves.osl
@@ -17,25 +17,23 @@
#include "stdosl.h"
#include "node_ramp_util.h"
-shader node_vector_curves(
- color ramp[] = {0.0},
- float min_x = 0.0,
- float max_x = 1.0,
+shader node_vector_curves(color ramp[] = {0.0},
+ float min_x = 0.0,
+ float max_x = 1.0,
- vector VectorIn = vector(0.0, 0.0, 0.0),
- float Fac = 0.0,
- output vector VectorOut = vector(0.0, 0.0, 0.0))
+ vector VectorIn = vector(0.0, 0.0, 0.0),
+ float Fac = 0.0,
+ output vector VectorOut = vector(0.0, 0.0, 0.0))
{
- vector c = (VectorIn - vector(min_x, min_x, min_x)) / (max_x - min_x);
+ vector c = (VectorIn - vector(min_x, min_x, min_x)) / (max_x - min_x);
- color r = rgb_ramp_lookup(ramp, c[0], 1, 1);
- color g = rgb_ramp_lookup(ramp, c[0], 1, 1);
- color b = rgb_ramp_lookup(ramp, c[0], 1, 1);
+ color r = rgb_ramp_lookup(ramp, c[0], 1, 1);
+ color g = rgb_ramp_lookup(ramp, c[0], 1, 1);
+ color b = rgb_ramp_lookup(ramp, c[0], 1, 1);
- VectorOut[0] = r[0];
- VectorOut[1] = g[1];
- VectorOut[2] = b[2];
+ VectorOut[0] = r[0];
+ VectorOut[1] = g[1];
+ VectorOut[2] = b[2];
- VectorOut = mix(VectorIn, VectorOut, Fac);
+ VectorOut = mix(VectorIn, VectorOut, Fac);
}
-
diff --git a/intern/cycles/kernel/shaders/node_vector_displacement.osl b/intern/cycles/kernel/shaders/node_vector_displacement.osl
index b19bc228e37..e9bd336347f 100644
--- a/intern/cycles/kernel/shaders/node_vector_displacement.osl
+++ b/intern/cycles/kernel/shaders/node_vector_displacement.osl
@@ -16,45 +16,43 @@
#include "stdosl.h"
-shader node_vector_displacement(
- color Vector = color(0.0, 0.0, 0.0),
- float Midlevel = 0.0,
- float Scale = 1.0,
- string space = "tangent",
- string attr_name = "geom:tangent",
- string attr_sign_name = "geom:tangent_sign",
- output vector Displacement = vector(0.0, 0.0, 0.0))
+shader node_vector_displacement(color Vector = color(0.0, 0.0, 0.0),
+ float Midlevel = 0.0,
+ float Scale = 1.0,
+ string space = "tangent",
+ string attr_name = "geom:tangent",
+ string attr_sign_name = "geom:tangent_sign",
+ output vector Displacement = vector(0.0, 0.0, 0.0))
{
- vector offset = (Vector - vector(Midlevel)) * Scale;
-
- if(space == "tangent") {
- /* Tangent space. */
- vector N_object = normalize(transform("world", "object", N));
-
- vector T_object;
- if(getattribute(attr_name, T_object)) {
- T_object = normalize(T_object);
- }
- else {
- T_object = normalize(dPdu);
- }
-
- vector B_object = normalize(cross(N_object, T_object));
- float tangent_sign;
- if(getattribute(attr_sign_name, tangent_sign)) {
- B_object *= tangent_sign;
- }
-
- Displacement = T_object*offset[0] + N_object*offset[1] + B_object*offset[2];
- }
- else {
- /* Object or world space. */
- Displacement = offset;
- }
-
- if(space != "world") {
- /* Tangent or object space. */
- Displacement = transform("object", "world", Displacement);
- }
+ vector offset = (Vector - vector(Midlevel)) * Scale;
+
+ if (space == "tangent") {
+ /* Tangent space. */
+ vector N_object = normalize(transform("world", "object", N));
+
+ vector T_object;
+ if (getattribute(attr_name, T_object)) {
+ T_object = normalize(T_object);
+ }
+ else {
+ T_object = normalize(dPdu);
+ }
+
+ vector B_object = normalize(cross(N_object, T_object));
+ float tangent_sign;
+ if (getattribute(attr_sign_name, tangent_sign)) {
+ B_object *= tangent_sign;
+ }
+
+ Displacement = T_object * offset[0] + N_object * offset[1] + B_object * offset[2];
+ }
+ else {
+ /* Object or world space. */
+ Displacement = offset;
+ }
+
+ if (space != "world") {
+ /* Tangent or object space. */
+ Displacement = transform("object", "world", Displacement);
+ }
}
-
diff --git a/intern/cycles/kernel/shaders/node_vector_math.osl b/intern/cycles/kernel/shaders/node_vector_math.osl
index a7e3637402e..10bb0c7283c 100644
--- a/intern/cycles/kernel/shaders/node_vector_math.osl
+++ b/intern/cycles/kernel/shaders/node_vector_math.osl
@@ -16,36 +16,34 @@
#include "stdosl.h"
-shader node_vector_math(
- string type = "add",
- vector Vector1 = vector(0.0, 0.0, 0.0),
- vector Vector2 = vector(0.0, 0.0, 0.0),
- output float Value = 0.0,
- output vector Vector = vector(0.0, 0.0, 0.0))
+shader node_vector_math(string type = "add",
+ vector Vector1 = vector(0.0, 0.0, 0.0),
+ vector Vector2 = vector(0.0, 0.0, 0.0),
+ output float Value = 0.0,
+ output vector Vector = vector(0.0, 0.0, 0.0))
{
- if (type == "add") {
- Vector = Vector1 + Vector2;
- Value = (abs(Vector[0]) + abs(Vector[1]) + abs(Vector[2])) / 3.0;
- }
- else if (type == "subtract") {
- Vector = Vector1 - Vector2;
- Value = (abs(Vector[0]) + abs(Vector[1]) + abs(Vector[2])) / 3.0;
- }
- else if (type == "average") {
- Value = length(Vector1 + Vector2);
- Vector = normalize(Vector1 + Vector2);
- }
- else if (type == "dot_product") {
- Value = dot(Vector1, Vector2);
- }
- else if (type == "cross_product") {
- vector c = cross(Vector1, Vector2);
- Value = length(c);
- Vector = normalize(c);
- }
- else if (type == "normalize") {
- Value = length(Vector1);
- Vector = normalize(Vector1);
- }
+ if (type == "add") {
+ Vector = Vector1 + Vector2;
+ Value = (abs(Vector[0]) + abs(Vector[1]) + abs(Vector[2])) / 3.0;
+ }
+ else if (type == "subtract") {
+ Vector = Vector1 - Vector2;
+ Value = (abs(Vector[0]) + abs(Vector[1]) + abs(Vector[2])) / 3.0;
+ }
+ else if (type == "average") {
+ Value = length(Vector1 + Vector2);
+ Vector = normalize(Vector1 + Vector2);
+ }
+ else if (type == "dot_product") {
+ Value = dot(Vector1, Vector2);
+ }
+ else if (type == "cross_product") {
+ vector c = cross(Vector1, Vector2);
+ Value = length(c);
+ Vector = normalize(c);
+ }
+ else if (type == "normalize") {
+ Value = length(Vector1);
+ Vector = normalize(Vector1);
+ }
}
-
diff --git a/intern/cycles/kernel/shaders/node_vector_transform.osl b/intern/cycles/kernel/shaders/node_vector_transform.osl
index afb95b340d1..22939577be0 100644
--- a/intern/cycles/kernel/shaders/node_vector_transform.osl
+++ b/intern/cycles/kernel/shaders/node_vector_transform.osl
@@ -16,21 +16,19 @@
#include "stdosl.h"
-shader node_vector_transform(
- string type = "vector",
- string convert_from = "world",
- string convert_to = "object",
- vector VectorIn = vector(0.0, 0.0, 0.0),
- output vector VectorOut = vector(0.0, 0.0, 0.0))
+shader node_vector_transform(string type = "vector",
+ string convert_from = "world",
+ string convert_to = "object",
+ vector VectorIn = vector(0.0, 0.0, 0.0),
+ output vector VectorOut = vector(0.0, 0.0, 0.0))
{
- if (type == "vector" || type == "normal") {
- VectorOut = transform(convert_from, convert_to, VectorIn);
- if (type == "normal")
- VectorOut = normalize(VectorOut);
- }
- else if (type == "point") {
- point Point = (point)VectorIn;
- VectorOut = transform(convert_from, convert_to, Point);
- }
+ if (type == "vector" || type == "normal") {
+ VectorOut = transform(convert_from, convert_to, VectorIn);
+ if (type == "normal")
+ VectorOut = normalize(VectorOut);
+ }
+ else if (type == "point") {
+ point Point = (point)VectorIn;
+ VectorOut = transform(convert_from, convert_to, Point);
+ }
}
-
diff --git a/intern/cycles/kernel/shaders/node_velvet_bsdf.osl b/intern/cycles/kernel/shaders/node_velvet_bsdf.osl
index 456c26998c8..9290b845325 100644
--- a/intern/cycles/kernel/shaders/node_velvet_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_velvet_bsdf.osl
@@ -17,14 +17,12 @@
#include "stdosl.h"
#include "node_fresnel.h"
-shader node_velvet_bsdf(
- color Color = 0.8,
- float Sigma = 0.0,
- normal Normal = N,
- output closure color BSDF = 0)
+shader node_velvet_bsdf(color Color = 0.8,
+ float Sigma = 0.0,
+ normal Normal = N,
+ output closure color BSDF = 0)
{
- float sigma = clamp(Sigma, 0.0, 1.0);
+ float sigma = clamp(Sigma, 0.0, 1.0);
- BSDF = Color * ashikhmin_velvet(Normal, sigma);
+ BSDF = Color * ashikhmin_velvet(Normal, sigma);
}
-
diff --git a/intern/cycles/kernel/shaders/node_voronoi_texture.osl b/intern/cycles/kernel/shaders/node_voronoi_texture.osl
index 2e47d74a414..34c86d5b98d 100644
--- a/intern/cycles/kernel/shaders/node_voronoi_texture.osl
+++ b/intern/cycles/kernel/shaders/node_voronoi_texture.osl
@@ -19,147 +19,146 @@
void voronoi_m(point p, string metric, float e, float da[4], point pa[4])
{
- /* Compute the distance to and the position of the four closest neighbors to p.
- *
- * The neighbors are randomly placed, 1 each in a 3x3x3 grid (Worley pattern).
- * The distances and points are returned in ascending order, i.e. da[0] and pa[0] will
- * contain the distance to the closest point and its coordinates respectively.
- */
- int xx, yy, zz, xi, yi, zi;
-
- xi = (int)floor(p[0]);
- yi = (int)floor(p[1]);
- zi = (int)floor(p[2]);
-
- da[0] = 1e10;
- da[1] = 1e10;
- da[2] = 1e10;
- da[3] = 1e10;
-
- for (xx = xi - 1; xx <= xi + 1; xx++) {
- for (yy = yi - 1; yy <= yi + 1; yy++) {
- for (zz = zi - 1; zz <= zi + 1; zz++) {
- point ip = point(xx, yy, zz);
- point vp = (point)cellnoise_color(ip);
- point pd = p - (vp + ip);
-
- float d = 0.0;
- if (metric == "distance") {
- d = dot(pd, pd);
- }
- else if (metric == "manhattan") {
- d = fabs(pd[0]) + fabs(pd[1]) + fabs(pd[2]);
- }
- else if (metric == "chebychev") {
- d = max(fabs(pd[0]), max(fabs(pd[1]), fabs(pd[2])));
- }
- else if (metric == "minkowski") {
- d = pow(pow(fabs(pd[0]), e) + pow(fabs(pd[1]), e) + pow(fabs(pd[2]), e), 1.0/e);
- }
-
- vp += point(xx, yy, zz);
-
- if (d < da[0]) {
- da[3] = da[2];
- da[2] = da[1];
- da[1] = da[0];
- da[0] = d;
-
- pa[3] = pa[2];
- pa[2] = pa[1];
- pa[1] = pa[0];
- pa[0] = vp;
- }
- else if (d < da[1]) {
- da[3] = da[2];
- da[2] = da[1];
- da[1] = d;
-
- pa[3] = pa[2];
- pa[2] = pa[1];
- pa[1] = vp;
- }
- else if (d < da[2]) {
- da[3] = da[2];
- da[2] = d;
-
- pa[3] = pa[2];
- pa[2] = vp;
- }
- else if (d < da[3]) {
- da[3] = d;
- pa[3] = vp;
- }
- }
- }
- }
+ /* Compute the distance to and the position of the four closest neighbors to p.
+ *
+ * The neighbors are randomly placed, 1 each in a 3x3x3 grid (Worley pattern).
+ * The distances and points are returned in ascending order, i.e. da[0] and pa[0] will
+ * contain the distance to the closest point and its coordinates respectively.
+ */
+ int xx, yy, zz, xi, yi, zi;
+
+ xi = (int)floor(p[0]);
+ yi = (int)floor(p[1]);
+ zi = (int)floor(p[2]);
+
+ da[0] = 1e10;
+ da[1] = 1e10;
+ da[2] = 1e10;
+ da[3] = 1e10;
+
+ for (xx = xi - 1; xx <= xi + 1; xx++) {
+ for (yy = yi - 1; yy <= yi + 1; yy++) {
+ for (zz = zi - 1; zz <= zi + 1; zz++) {
+ point ip = point(xx, yy, zz);
+ point vp = (point)cellnoise_color(ip);
+ point pd = p - (vp + ip);
+
+ float d = 0.0;
+ if (metric == "distance") {
+ d = dot(pd, pd);
+ }
+ else if (metric == "manhattan") {
+ d = fabs(pd[0]) + fabs(pd[1]) + fabs(pd[2]);
+ }
+ else if (metric == "chebychev") {
+ d = max(fabs(pd[0]), max(fabs(pd[1]), fabs(pd[2])));
+ }
+ else if (metric == "minkowski") {
+ d = pow(pow(fabs(pd[0]), e) + pow(fabs(pd[1]), e) + pow(fabs(pd[2]), e), 1.0 / e);
+ }
+
+ vp += point(xx, yy, zz);
+
+ if (d < da[0]) {
+ da[3] = da[2];
+ da[2] = da[1];
+ da[1] = da[0];
+ da[0] = d;
+
+ pa[3] = pa[2];
+ pa[2] = pa[1];
+ pa[1] = pa[0];
+ pa[0] = vp;
+ }
+ else if (d < da[1]) {
+ da[3] = da[2];
+ da[2] = da[1];
+ da[1] = d;
+
+ pa[3] = pa[2];
+ pa[2] = pa[1];
+ pa[1] = vp;
+ }
+ else if (d < da[2]) {
+ da[3] = da[2];
+ da[2] = d;
+
+ pa[3] = pa[2];
+ pa[2] = vp;
+ }
+ else if (d < da[3]) {
+ da[3] = d;
+ pa[3] = vp;
+ }
+ }
+ }
+ }
}
/* Voronoi */
shader node_voronoi_texture(
- int use_mapping = 0,
- matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
- string coloring = "intensity",
- string metric = "distance",
- string feature = "F1",
- float Exponent = 1.0,
- float Scale = 5.0,
- point Vector = P,
- output float Fac = 0.0,
- output color Color = 0.0)
+ int use_mapping = 0,
+ matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+ string coloring = "intensity",
+ string metric = "distance",
+ string feature = "F1",
+ float Exponent = 1.0,
+ float Scale = 5.0,
+ point Vector = P,
+ output float Fac = 0.0,
+ output color Color = 0.0)
{
- point p = Vector;
-
- if (use_mapping)
- p = transform(mapping, p);
-
- /* compute distance and point coordinate of 4 nearest neighbours */
- float da[4];
- point pa[4];
-
- /* compute distance and point coordinate of 4 nearest neighbours */
- voronoi_m(p * Scale, metric, Exponent, da, pa);
-
- if (coloring == "intensity") {
- /* Intensity output */
- if (feature == "F1") {
- Fac = fabs(da[0]);
- }
- else if (feature == "F2") {
- Fac = fabs(da[1]);
- }
- else if (feature == "F3") {
- Fac = fabs(da[2]);
- }
- else if (feature == "F4") {
- Fac = fabs(da[3]);
- }
- else if (feature == "F2F1") {
- Fac = fabs(da[1] - da[0]);
- }
- Color = color(Fac);
- }
- else {
- /* Color output */
- if (feature == "F1") {
- Color = pa[0];
- }
- else if (feature == "F2") {
- Color = pa[1];
- }
- else if (feature == "F3") {
- Color = pa[2];
- }
- else if (feature == "F4") {
- Color = pa[3];
- }
- else if (feature == "F2F1") {
- Color = fabs(pa[1] - pa[0]);
- }
-
- Color = cellnoise_color(Color);
- Fac = (Color[0] + Color[1] + Color[2]) * (1.0 / 3.0);
- }
+ point p = Vector;
+
+ if (use_mapping)
+ p = transform(mapping, p);
+
+ /* compute distance and point coordinate of 4 nearest neighbours */
+ float da[4];
+ point pa[4];
+
+ /* compute distance and point coordinate of 4 nearest neighbours */
+ voronoi_m(p * Scale, metric, Exponent, da, pa);
+
+ if (coloring == "intensity") {
+ /* Intensity output */
+ if (feature == "F1") {
+ Fac = fabs(da[0]);
+ }
+ else if (feature == "F2") {
+ Fac = fabs(da[1]);
+ }
+ else if (feature == "F3") {
+ Fac = fabs(da[2]);
+ }
+ else if (feature == "F4") {
+ Fac = fabs(da[3]);
+ }
+ else if (feature == "F2F1") {
+ Fac = fabs(da[1] - da[0]);
+ }
+ Color = color(Fac);
+ }
+ else {
+ /* Color output */
+ if (feature == "F1") {
+ Color = pa[0];
+ }
+ else if (feature == "F2") {
+ Color = pa[1];
+ }
+ else if (feature == "F3") {
+ Color = pa[2];
+ }
+ else if (feature == "F4") {
+ Color = pa[3];
+ }
+ else if (feature == "F2F1") {
+ Color = fabs(pa[1] - pa[0]);
+ }
+
+ Color = cellnoise_color(Color);
+ Fac = (Color[0] + Color[1] + Color[2]) * (1.0 / 3.0);
+ }
}
-
diff --git a/intern/cycles/kernel/shaders/node_voxel_texture.osl b/intern/cycles/kernel/shaders/node_voxel_texture.osl
index 9253febd64a..0e4484561d8 100644
--- a/intern/cycles/kernel/shaders/node_voxel_texture.osl
+++ b/intern/cycles/kernel/shaders/node_voxel_texture.osl
@@ -16,32 +16,30 @@
#include "stdosl.h"
-shader node_voxel_texture(
- string filename = "",
- string interpolation = "linear",
- int use_mapping = 0,
- matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
- point Vector = P,
- output float Density = 0,
- output color Color = 0)
+shader node_voxel_texture(string filename = "",
+ string interpolation = "linear",
+ int use_mapping = 0,
+ matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+ point Vector = P,
+ output float Density = 0,
+ output color Color = 0)
{
- point p = Vector;
- if (use_mapping) {
- p = transform(mapping, p);
- }
- else {
- p = transform("object", Vector);
- matrix tfm;
- if (getattribute("geom:generated_transform", tfm))
- p = transform(tfm, p);
- }
- if (p[0] < 0.0 || p[1] < 0.0 || p[2] < 0.0 ||
- p[0] > 1.0 || p[1] > 1.0 || p[2] > 1.0)
- {
- Density = 0;
- Color = color(0, 0, 0);
- }
- else {
- Color = (color)texture3d(filename, p, "wrap", "periodic", "interp", interpolation, "alpha", Density);
- }
+ point p = Vector;
+ if (use_mapping) {
+ p = transform(mapping, p);
+ }
+ else {
+ p = transform("object", Vector);
+ matrix tfm;
+ if (getattribute("geom:generated_transform", tfm))
+ p = transform(tfm, p);
+ }
+ if (p[0] < 0.0 || p[1] < 0.0 || p[2] < 0.0 || p[0] > 1.0 || p[1] > 1.0 || p[2] > 1.0) {
+ Density = 0;
+ Color = color(0, 0, 0);
+ }
+ else {
+ Color = (color)texture3d(
+ filename, p, "wrap", "periodic", "interp", interpolation, "alpha", Density);
+ }
}
diff --git a/intern/cycles/kernel/shaders/node_wave_texture.osl b/intern/cycles/kernel/shaders/node_wave_texture.osl
index 71bc9324705..dfc2dbfb800 100644
--- a/intern/cycles/kernel/shaders/node_wave_texture.osl
+++ b/intern/cycles/kernel/shaders/node_wave_texture.osl
@@ -21,49 +21,47 @@
float wave(point p, string type, string profile, float detail, float distortion, float dscale)
{
- float n = 0.0;
+ float n = 0.0;
- if (type == "bands") {
- n = (p[0] + p[1] + p[2]) * 10.0;
- }
- else if (type == "rings") {
- n = length(p) * 20.0;
- }
+ if (type == "bands") {
+ n = (p[0] + p[1] + p[2]) * 10.0;
+ }
+ else if (type == "rings") {
+ n = length(p) * 20.0;
+ }
- if (distortion != 0.0) {
- n = n + (distortion * noise_turbulence(p * dscale, detail, 0));
- }
+ if (distortion != 0.0) {
+ n = n + (distortion * noise_turbulence(p * dscale, detail, 0));
+ }
- if (profile == "sine") {
- return 0.5 + 0.5 * sin(n);
- }
- else {
- /* Saw profile */
- n /= M_2PI;
- n -= (int) n;
- return (n < 0.0) ? n + 1.0 : n;
- }
+ if (profile == "sine") {
+ return 0.5 + 0.5 * sin(n);
+ }
+ else {
+ /* Saw profile */
+ n /= M_2PI;
+ n -= (int)n;
+ return (n < 0.0) ? n + 1.0 : n;
+ }
}
-shader node_wave_texture(
- int use_mapping = 0,
- matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
- string type = "bands",
- string profile = "sine",
- float Scale = 5.0,
- float Distortion = 0.0,
- float Detail = 2.0,
- float DetailScale = 1.0,
- point Vector = P,
- output float Fac = 0.0,
- output color Color = 0.0)
+shader node_wave_texture(int use_mapping = 0,
+ matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+ string type = "bands",
+ string profile = "sine",
+ float Scale = 5.0,
+ float Distortion = 0.0,
+ float Detail = 2.0,
+ float DetailScale = 1.0,
+ point Vector = P,
+ output float Fac = 0.0,
+ output color Color = 0.0)
{
- point p = Vector;
+ point p = Vector;
- if (use_mapping)
- p = transform(mapping, p);
+ if (use_mapping)
+ p = transform(mapping, p);
- Fac = wave(p * Scale, type, profile, Detail, Distortion, DetailScale);
- Color = Fac;
+ Fac = wave(p * Scale, type, profile, Detail, Distortion, DetailScale);
+ Color = Fac;
}
-
diff --git a/intern/cycles/kernel/shaders/node_wavelength.osl b/intern/cycles/kernel/shaders/node_wavelength.osl
index 79e7043d4bf..c8c6eecb171 100644
--- a/intern/cycles/kernel/shaders/node_wavelength.osl
+++ b/intern/cycles/kernel/shaders/node_wavelength.osl
@@ -16,10 +16,7 @@
#include "stdosl.h"
-shader node_wavelength(
- float Wavelength = 500.0,
- output color Color = 0.0)
+shader node_wavelength(float Wavelength = 500.0, output color Color = 0.0)
{
- Color = wavelength_color(Wavelength);
+ Color = wavelength_color(Wavelength);
}
-
diff --git a/intern/cycles/kernel/shaders/node_wireframe.osl b/intern/cycles/kernel/shaders/node_wireframe.osl
index 5cc214495dd..ea4bd3a4c87 100644
--- a/intern/cycles/kernel/shaders/node_wireframe.osl
+++ b/intern/cycles/kernel/shaders/node_wireframe.osl
@@ -17,25 +17,24 @@
#include "stdosl.h"
#include "oslutil.h"
-shader node_wireframe(
- string bump_offset = "center",
- int use_pixel_size = 0,
- float Size = 0.01,
- output float Fac = 0.0)
+shader node_wireframe(string bump_offset = "center",
+ int use_pixel_size = 0,
+ float Size = 0.01,
+ output float Fac = 0.0)
{
- Fac = wireframe("triangles", Size, use_pixel_size);
- /* TODO(sergey): Since we can't use autodiff here we do algebraic
- * calculation of derivatives by definition. We could probably
- * optimize this a bit by doing some extra calculation in wireframe().
- */
- if (bump_offset == "dx") {
- point dx = Dx(P);
- P -= dx;
- Fac += (Fac - wireframe("triangles", Size, use_pixel_size)) / length(dx);
- }
- else if (bump_offset == "dy") {
- point dy = Dy(P);
- P -= dy;
- Fac += (Fac - wireframe("triangles", Size, use_pixel_size)) / length(dy);
- }
+ Fac = wireframe("triangles", Size, use_pixel_size);
+ /* TODO(sergey): Since we can't use autodiff here we do algebraic
+ * calculation of derivatives by definition. We could probably
+ * optimize this a bit by doing some extra calculation in wireframe().
+ */
+ if (bump_offset == "dx") {
+ point dx = Dx(P);
+ P -= dx;
+ Fac += (Fac - wireframe("triangles", Size, use_pixel_size)) / length(dx);
+ }
+ else if (bump_offset == "dy") {
+ point dy = Dy(P);
+ P -= dy;
+ Fac += (Fac - wireframe("triangles", Size, use_pixel_size)) / length(dy);
+ }
}
diff --git a/intern/cycles/kernel/shaders/oslutil.h b/intern/cycles/kernel/shaders/oslutil.h
index 592a8ad12d9..d48bfa4a665 100644
--- a/intern/cycles/kernel/shaders/oslutil.h
+++ b/intern/cycles/kernel/shaders/oslutil.h
@@ -39,57 +39,63 @@
//
float wireframe(string edge_type, float line_width, int raster)
{
- // ray differentials are so big in diffuse context that this function would always return "wire"
- if (raytype("path:diffuse")) return 0.0;
+ // ray differentials are so big in diffuse context that this function would always return "wire"
+ if (raytype("path:diffuse"))
+ return 0.0;
- int np = 0;
- point p[64];
- float pixelWidth = 1;
+ int np = 0;
+ point p[64];
+ float pixelWidth = 1;
- if (edge_type == "triangles")
- {
- np = 3;
- if (!getattribute("geom:trianglevertices", p))
- return 0.0;
- }
- else if (edge_type == "polygons" || edge_type == "patches")
- {
- getattribute("geom:numpolyvertices", np);
- if (np < 3 || !getattribute("geom:polyvertices", p))
- return 0.0;
- }
+ if (edge_type == "triangles") {
+ np = 3;
+ if (!getattribute("geom:trianglevertices", p))
+ return 0.0;
+ }
+ else if (edge_type == "polygons" || edge_type == "patches") {
+ getattribute("geom:numpolyvertices", np);
+ if (np < 3 || !getattribute("geom:polyvertices", p))
+ return 0.0;
+ }
- if (raster)
- {
- // Project the derivatives of P to the viewing plane defined
- // by I so we have a measure of how big is a pixel at this point
- float pixelWidthX = length(Dx(P) - dot(Dx(P), I) * I);
- float pixelWidthY = length(Dy(P) - dot(Dy(P), I) * I);
- // Take the average of both axis' length
- pixelWidth = (pixelWidthX + pixelWidthY) / 2;
- }
+ if (raster) {
+ // Project the derivatives of P to the viewing plane defined
+ // by I so we have a measure of how big is a pixel at this point
+ float pixelWidthX = length(Dx(P) - dot(Dx(P), I) * I);
+ float pixelWidthY = length(Dy(P) - dot(Dy(P), I) * I);
+ // Take the average of both axis' length
+ pixelWidth = (pixelWidthX + pixelWidthY) / 2;
+ }
- // Use half the width as the neighbor face will render the
- // other half. And take the square for fast comparison
- pixelWidth *= 0.5 * line_width;
- pixelWidth *= pixelWidth;
- for (int i = 0; i < np; i++)
- {
- int i2 = i ? i - 1 : np - 1;
- vector dir = P - p[i];
- vector edge = p[i] - p[i2];
- vector crs = cross(edge, dir);
- // At this point dot(crs, crs) / dot(edge, edge) is
- // the square of area / length(edge) == square of the
- // distance to the edge.
- if (dot(crs, crs) < (dot(edge, edge) * pixelWidth))
- return 1;
- }
- return 0;
+ // Use half the width as the neighbor face will render the
+ // other half. And take the square for fast comparison
+ pixelWidth *= 0.5 * line_width;
+ pixelWidth *= pixelWidth;
+ for (int i = 0; i < np; i++) {
+ int i2 = i ? i - 1 : np - 1;
+ vector dir = P - p[i];
+ vector edge = p[i] - p[i2];
+ vector crs = cross(edge, dir);
+ // At this point dot(crs, crs) / dot(edge, edge) is
+ // the square of area / length(edge) == square of the
+ // distance to the edge.
+ if (dot(crs, crs) < (dot(edge, edge) * pixelWidth))
+ return 1;
+ }
+ return 0;
}
-float wireframe(string edge_type, float line_width) { return wireframe(edge_type, line_width, 1); }
-float wireframe(string edge_type) { return wireframe(edge_type, 1.0, 1); }
-float wireframe() { return wireframe("polygons", 1.0, 1); }
+float wireframe(string edge_type, float line_width)
+{
+ return wireframe(edge_type, line_width, 1);
+}
+float wireframe(string edge_type)
+{
+ return wireframe(edge_type, 1.0, 1);
+}
+float wireframe()
+{
+ return wireframe("polygons", 1.0, 1);
+}
-#endif /* CCL_OSLUTIL_H */
+#endif /* CCL_OSLUTIL_H */
diff --git a/intern/cycles/kernel/shaders/stdosl.h b/intern/cycles/kernel/shaders/stdosl.h
index 7136c746321..9b9720ffff9 100644
--- a/intern/cycles/kernel/shaders/stdosl.h
+++ b/intern/cycles/kernel/shaders/stdosl.h
@@ -25,124 +25,215 @@
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/////////////////////////////////////////////////////////////////////////////
-
#ifndef CCL_STDOSL_H
#define CCL_STDOSL_H
-
#ifndef M_PI
-#define M_PI 3.1415926535897932 /* pi */
-#define M_PI_2 1.5707963267948966 /* pi/2 */
-#define M_PI_4 0.7853981633974483 /* pi/4 */
-#define M_2_PI 0.6366197723675813 /* 2/pi */
-#define M_2PI 6.2831853071795865 /* 2*pi */
-#define M_4PI 12.566370614359173 /* 4*pi */
-#define M_2_SQRTPI 1.1283791670955126 /* 2/sqrt(pi) */
-#define M_E 2.7182818284590452 /* e (Euler's number) */
-#define M_LN2 0.6931471805599453 /* ln(2) */
-#define M_LN10 2.3025850929940457 /* ln(10) */
-#define M_LOG2E 1.4426950408889634 /* log_2(e) */
-#define M_LOG10E 0.4342944819032518 /* log_10(e) */
-#define M_SQRT2 1.4142135623730950 /* sqrt(2) */
-#define M_SQRT1_2 0.7071067811865475 /* 1/sqrt(2) */
+# define M_PI 3.1415926535897932 /* pi */
+# define M_PI_2 1.5707963267948966 /* pi/2 */
+# define M_PI_4 0.7853981633974483 /* pi/4 */
+# define M_2_PI 0.6366197723675813 /* 2/pi */
+# define M_2PI 6.2831853071795865 /* 2*pi */
+# define M_4PI 12.566370614359173 /* 4*pi */
+# define M_2_SQRTPI 1.1283791670955126 /* 2/sqrt(pi) */
+# define M_E 2.7182818284590452 /* e (Euler's number) */
+# define M_LN2 0.6931471805599453 /* ln(2) */
+# define M_LN10 2.3025850929940457 /* ln(10) */
+# define M_LOG2E 1.4426950408889634 /* log_2(e) */
+# define M_LOG10E 0.4342944819032518 /* log_10(e) */
+# define M_SQRT2 1.4142135623730950 /* sqrt(2) */
+# define M_SQRT1_2 0.7071067811865475 /* 1/sqrt(2) */
#endif
-
-
// Declaration of built-in functions and closures
-#define BUILTIN [[ int builtin = 1 ]]
+#define BUILTIN [[int builtin = 1]]
#define BUILTIN_DERIV [[ int builtin = 1, int deriv = 1 ]]
-#define PERCOMP1(name) \
- normal name (normal x) BUILTIN; \
- vector name (vector x) BUILTIN; \
- point name (point x) BUILTIN; \
- color name (color x) BUILTIN; \
- float name (float x) BUILTIN;
-
-#define PERCOMP2(name) \
- normal name (normal x, normal y) BUILTIN; \
- vector name (vector x, vector y) BUILTIN; \
- point name (point x, point y) BUILTIN; \
- color name (color x, color y) BUILTIN; \
- float name (float x, float y) BUILTIN;
-
-#define PERCOMP2F(name) \
- normal name (normal x, float y) BUILTIN; \
- vector name (vector x, float y) BUILTIN; \
- point name (point x, float y) BUILTIN; \
- color name (color x, float y) BUILTIN; \
- float name (float x, float y) BUILTIN;
-
+#define PERCOMP1(name) \
+ normal name(normal x) BUILTIN; \
+ vector name(vector x) BUILTIN; \
+ point name(point x) BUILTIN; \
+ color name(color x) BUILTIN; \
+ float name(float x) BUILTIN;
+
+#define PERCOMP2(name) \
+ normal name(normal x, normal y) BUILTIN; \
+ vector name(vector x, vector y) BUILTIN; \
+ point name(point x, point y) BUILTIN; \
+ color name(color x, color y) BUILTIN; \
+ float name(float x, float y) BUILTIN;
+
+#define PERCOMP2F(name) \
+ normal name(normal x, float y) BUILTIN; \
+ vector name(vector x, float y) BUILTIN; \
+ point name(point x, float y) BUILTIN; \
+ color name(color x, float y) BUILTIN; \
+ float name(float x, float y) BUILTIN;
// Basic math
-normal degrees (normal x) { return x*(180.0/M_PI); }
-vector degrees (vector x) { return x*(180.0/M_PI); }
-point degrees (point x) { return x*(180.0/M_PI); }
-color degrees (color x) { return x*(180.0/M_PI); }
-float degrees (float x) { return x*(180.0/M_PI); }
-normal radians (normal x) { return x*(M_PI/180.0); }
-vector radians (vector x) { return x*(M_PI/180.0); }
-point radians (point x) { return x*(M_PI/180.0); }
-color radians (color x) { return x*(M_PI/180.0); }
-float radians (float x) { return x*(M_PI/180.0); }
-PERCOMP1 (cos)
-PERCOMP1 (sin)
-PERCOMP1 (tan)
-PERCOMP1 (acos)
-PERCOMP1 (asin)
-PERCOMP1 (atan)
-PERCOMP2 (atan2)
-PERCOMP1 (cosh)
-PERCOMP1 (sinh)
-PERCOMP1 (tanh)
-PERCOMP2F (pow)
-PERCOMP1 (exp)
-PERCOMP1 (exp2)
-PERCOMP1 (expm1)
-PERCOMP1 (log)
-point log (point a, float b) { return log(a)/log(b); }
-vector log (vector a, float b) { return log(a)/log(b); }
-color log (color a, float b) { return log(a)/log(b); }
-float log (float a, float b) { return log(a)/log(b); }
-PERCOMP1 (log2)
-PERCOMP1 (log10)
-PERCOMP1 (logb)
-PERCOMP1 (sqrt)
-PERCOMP1 (inversesqrt)
-float hypot (float a, float b) { return sqrt (a*a + b*b); }
-float hypot (float a, float b, float c) { return sqrt (a*a + b*b + c*c); }
-PERCOMP1 (abs)
-int abs (int x) BUILTIN;
-PERCOMP1 (fabs)
-int fabs (int x) BUILTIN;
-PERCOMP1 (sign)
-PERCOMP1 (floor)
-PERCOMP1 (ceil)
-PERCOMP1 (round)
-PERCOMP1 (trunc)
-PERCOMP2 (fmod)
-PERCOMP2F (fmod)
-int mod (int a, int b) { return a - b*(int)floor(a/b); }
-point mod (point a, point b) { return a - b*floor(a/b); }
-vector mod (vector a, vector b) { return a - b*floor(a/b); }
-normal mod (normal a, normal b) { return a - b*floor(a/b); }
-color mod (color a, color b) { return a - b*floor(a/b); }
-point mod (point a, float b) { return a - b*floor(a/b); }
-vector mod (vector a, float b) { return a - b*floor(a/b); }
-normal mod (normal a, float b) { return a - b*floor(a/b); }
-color mod (color a, float b) { return a - b*floor(a/b); }
-float mod (float a, float b) { return a - b*floor(a/b); }
-PERCOMP2 (min)
-int min (int a, int b) BUILTIN;
-PERCOMP2 (max)
-int max (int a, int b) BUILTIN;
-normal clamp (normal x, normal minval, normal maxval) { return max(min(x,maxval),minval); }
-vector clamp (vector x, vector minval, vector maxval) { return max(min(x,maxval),minval); }
-point clamp (point x, point minval, point maxval) { return max(min(x,maxval),minval); }
-color clamp (color x, color minval, color maxval) { return max(min(x,maxval),minval); }
-float clamp (float x, float minval, float maxval) { return max(min(x,maxval),minval); }
-int clamp (int x, int minval, int maxval) { return max(min(x,maxval),minval); }
+normal degrees(normal x)
+{
+ return x * (180.0 / M_PI);
+}
+vector degrees(vector x)
+{
+ return x * (180.0 / M_PI);
+}
+point degrees(point x)
+{
+ return x * (180.0 / M_PI);
+}
+color degrees(color x)
+{
+ return x * (180.0 / M_PI);
+}
+float degrees(float x)
+{
+ return x * (180.0 / M_PI);
+}
+normal radians(normal x)
+{
+ return x * (M_PI / 180.0);
+}
+vector radians(vector x)
+{
+ return x * (M_PI / 180.0);
+}
+point radians(point x)
+{
+ return x * (M_PI / 180.0);
+}
+color radians(color x)
+{
+ return x * (M_PI / 180.0);
+}
+float radians(float x)
+{
+ return x * (M_PI / 180.0);
+}
+PERCOMP1(cos)
+PERCOMP1(sin)
+PERCOMP1(tan)
+PERCOMP1(acos)
+PERCOMP1(asin)
+PERCOMP1(atan)
+PERCOMP2(atan2)
+PERCOMP1(cosh)
+PERCOMP1(sinh)
+PERCOMP1(tanh)
+PERCOMP2F(pow)
+PERCOMP1(exp)
+PERCOMP1(exp2)
+PERCOMP1(expm1)
+PERCOMP1(log)
+point log(point a, float b)
+{
+ return log(a) / log(b);
+}
+vector log(vector a, float b)
+{
+ return log(a) / log(b);
+}
+color log(color a, float b)
+{
+ return log(a) / log(b);
+}
+float log(float a, float b)
+{
+ return log(a) / log(b);
+}
+PERCOMP1(log2)
+PERCOMP1(log10)
+PERCOMP1(logb)
+PERCOMP1(sqrt)
+PERCOMP1(inversesqrt)
+float hypot(float a, float b)
+{
+ return sqrt(a * a + b * b);
+}
+float hypot(float a, float b, float c)
+{
+ return sqrt(a * a + b * b + c * c);
+}
+PERCOMP1(abs)
+int abs(int x) BUILTIN;
+PERCOMP1(fabs)
+int fabs(int x) BUILTIN;
+PERCOMP1(sign)
+PERCOMP1(floor)
+PERCOMP1(ceil)
+PERCOMP1(round)
+PERCOMP1(trunc)
+PERCOMP2(fmod)
+PERCOMP2F(fmod)
+int mod(int a, int b)
+{
+ return a - b * (int)floor(a / b);
+}
+point mod(point a, point b)
+{
+ return a - b * floor(a / b);
+}
+vector mod(vector a, vector b)
+{
+ return a - b * floor(a / b);
+}
+normal mod(normal a, normal b)
+{
+ return a - b * floor(a / b);
+}
+color mod(color a, color b)
+{
+ return a - b * floor(a / b);
+}
+point mod(point a, float b)
+{
+ return a - b * floor(a / b);
+}
+vector mod(vector a, float b)
+{
+ return a - b * floor(a / b);
+}
+normal mod(normal a, float b)
+{
+ return a - b * floor(a / b);
+}
+color mod(color a, float b)
+{
+ return a - b * floor(a / b);
+}
+float mod(float a, float b)
+{
+ return a - b * floor(a / b);
+}
+PERCOMP2(min)
+int min(int a, int b) BUILTIN;
+PERCOMP2(max)
+int max(int a, int b) BUILTIN;
+normal clamp(normal x, normal minval, normal maxval)
+{
+ return max(min(x, maxval), minval);
+}
+vector clamp(vector x, vector minval, vector maxval)
+{
+ return max(min(x, maxval), minval);
+}
+point clamp(point x, point minval, point maxval)
+{
+ return max(min(x, maxval), minval);
+}
+color clamp(color x, color minval, color maxval)
+{
+ return max(min(x, maxval), minval);
+}
+float clamp(float x, float minval, float maxval)
+{
+ return max(min(x, maxval), minval);
+}
+int clamp(int x, int minval, int maxval)
+{
+ return max(min(x, maxval), minval);
+}
#if 0
normal mix (normal x, normal y, normal a) { return x*(1-a) + y*a; }
normal mix (normal x, normal y, float a) { return x*(1-a) + y*a; }
@@ -154,102 +245,121 @@ color mix (color x, color y, color a) { return x*(1-a) + y*a; }
color mix (color x, color y, float a) { return x*(1-a) + y*a; }
float mix (float x, float y, float a) { return x*(1-a) + y*a; }
#else
-normal mix (normal x, normal y, normal a) BUILTIN;
-normal mix (normal x, normal y, float a) BUILTIN;
-vector mix (vector x, vector y, vector a) BUILTIN;
-vector mix (vector x, vector y, float a) BUILTIN;
-point mix (point x, point y, point a) BUILTIN;
-point mix (point x, point y, float a) BUILTIN;
-color mix (color x, color y, color a) BUILTIN;
-color mix (color x, color y, float a) BUILTIN;
-float mix (float x, float y, float a) BUILTIN;
+normal mix(normal x, normal y, normal a) BUILTIN;
+normal mix(normal x, normal y, float a) BUILTIN;
+vector mix(vector x, vector y, vector a) BUILTIN;
+vector mix(vector x, vector y, float a) BUILTIN;
+point mix(point x, point y, point a) BUILTIN;
+point mix(point x, point y, float a) BUILTIN;
+color mix(color x, color y, color a) BUILTIN;
+color mix(color x, color y, float a) BUILTIN;
+float mix(float x, float y, float a) BUILTIN;
#endif
-int isnan (float x) BUILTIN;
-int isinf (float x) BUILTIN;
-int isfinite (float x) BUILTIN;
-float erf (float x) BUILTIN;
-float erfc (float x) BUILTIN;
+int isnan(float x) BUILTIN;
+int isinf(float x) BUILTIN;
+int isfinite(float x) BUILTIN;
+float erf(float x) BUILTIN;
+float erfc(float x) BUILTIN;
// Vector functions
-vector cross (vector a, vector b) BUILTIN;
-float dot (vector a, vector b) BUILTIN;
-float length (vector v) BUILTIN;
-float distance (point a, point b) BUILTIN;
-float distance (point a, point b, point q)
-{
- vector d = b - a;
- float dd = dot(d, d);
- if(dd == 0.0)
- return distance(q, a);
- float t = dot(q - a, d)/dd;
- return distance(q, a + clamp(t, 0.0, 1.0)*d);
-}
-normal normalize (normal v) BUILTIN;
-vector normalize (vector v) BUILTIN;
-vector faceforward (vector N, vector I, vector Nref) BUILTIN;
-vector faceforward (vector N, vector I) BUILTIN;
-vector reflect (vector I, vector N) { return I - 2*dot(N,I)*N; }
-vector refract (vector I, vector N, float eta) {
- float IdotN = dot (I, N);
- float k = 1 - eta*eta * (1 - IdotN*IdotN);
- return (k < 0) ? vector(0,0,0) : (eta*I - N * (eta*IdotN + sqrt(k)));
-}
-void fresnel (vector I, normal N, float eta,
- output float Kr, output float Kt,
- output vector R, output vector T)
-{
- float sqr(float x) { return x*x; }
- float c = dot(I, N);
- if (c < 0)
- c = -c;
- R = reflect(I, N);
- float g = 1.0 / sqr(eta) - 1.0 + c * c;
- if (g >= 0.0) {
- g = sqrt (g);
- float beta = g - c;
- float F = (c * (g+c) - 1.0) / (c * beta + 1.0);
- F = 0.5 * (1.0 + sqr(F));
- F *= sqr (beta / (g+c));
- Kr = F;
- Kt = (1.0 - Kr) * eta*eta;
- // OPT: the following recomputes some of the above values, but it
- // gives us the same result as if the shader-writer called refract()
- T = refract(I, N, eta);
- } else {
- // total internal reflection
- Kr = 1.0;
- Kt = 0.0;
- T = vector (0,0,0);
- }
+vector cross(vector a, vector b) BUILTIN;
+float dot(vector a, vector b) BUILTIN;
+float length(vector v) BUILTIN;
+float distance(point a, point b) BUILTIN;
+float distance(point a, point b, point q)
+{
+ vector d = b - a;
+ float dd = dot(d, d);
+ if (dd == 0.0)
+ return distance(q, a);
+ float t = dot(q - a, d) / dd;
+ return distance(q, a + clamp(t, 0.0, 1.0) * d);
}
-
-void fresnel (vector I, normal N, float eta,
- output float Kr, output float Kt)
+normal normalize(normal v) BUILTIN;
+vector normalize(vector v) BUILTIN;
+vector faceforward(vector N, vector I, vector Nref) BUILTIN;
+vector faceforward(vector N, vector I) BUILTIN;
+vector reflect(vector I, vector N)
{
- vector R, T;
- fresnel(I, N, eta, Kr, Kt, R, T);
+ return I - 2 * dot(N, I) * N;
+}
+vector refract(vector I, vector N, float eta)
+{
+ float IdotN = dot(I, N);
+ float k = 1 - eta * eta * (1 - IdotN * IdotN);
+ return (k < 0) ? vector(0, 0, 0) : (eta * I - N * (eta * IdotN + sqrt(k)));
+}
+void fresnel(vector I,
+ normal N,
+ float eta,
+ output float Kr,
+ output float Kt,
+ output vector R,
+ output vector T)
+{
+ float sqr(float x)
+ {
+ return x * x;
+ }
+ float c = dot(I, N);
+ if (c < 0)
+ c = -c;
+ R = reflect(I, N);
+ float g = 1.0 / sqr(eta) - 1.0 + c * c;
+ if (g >= 0.0) {
+ g = sqrt(g);
+ float beta = g - c;
+ float F = (c * (g + c) - 1.0) / (c * beta + 1.0);
+ F = 0.5 * (1.0 + sqr(F));
+ F *= sqr(beta / (g + c));
+ Kr = F;
+ Kt = (1.0 - Kr) * eta * eta;
+ // OPT: the following recomputes some of the above values, but it
+ // gives us the same result as if the shader-writer called refract()
+ T = refract(I, N, eta);
+ }
+ else {
+ // total internal reflection
+ Kr = 1.0;
+ Kt = 0.0;
+ T = vector(0, 0, 0);
+ }
}
+void fresnel(vector I, normal N, float eta, output float Kr, output float Kt)
+{
+ vector R, T;
+ fresnel(I, N, eta, Kr, Kt, R, T);
+}
-normal transform (matrix Mto, normal p) BUILTIN;
-vector transform (matrix Mto, vector p) BUILTIN;
-point transform (matrix Mto, point p) BUILTIN;
-normal transform (string from, string to, normal p) BUILTIN;
-vector transform (string from, string to, vector p) BUILTIN;
-point transform (string from, string to, point p) BUILTIN;
-normal transform (string to, normal p) { return transform("common",to,p); }
-vector transform (string to, vector p) { return transform("common",to,p); }
-point transform (string to, point p) { return transform("common",to,p); }
+normal transform(matrix Mto, normal p) BUILTIN;
+vector transform(matrix Mto, vector p) BUILTIN;
+point transform(matrix Mto, point p) BUILTIN;
+normal transform(string from, string to, normal p) BUILTIN;
+vector transform(string from, string to, vector p) BUILTIN;
+point transform(string from, string to, point p) BUILTIN;
+normal transform(string to, normal p)
+{
+ return transform("common", to, p);
+}
+vector transform(string to, vector p)
+{
+ return transform("common", to, p);
+}
+point transform(string to, point p)
+{
+ return transform("common", to, p);
+}
-float transformu (string tounits, float x) BUILTIN;
-float transformu (string fromunits, string tounits, float x) BUILTIN;
+float transformu(string tounits, float x) BUILTIN;
+float transformu(string fromunits, string tounits, float x) BUILTIN;
-point rotate (point p, float angle, point a, point b)
+point rotate(point p, float angle, point a, point b)
{
- vector axis = normalize (b - a);
- float cosang, sinang;
- /* Older OSX has major issues with sincos() function,
+ vector axis = normalize(b - a);
+ float cosang, sinang;
+ /* Older OSX has major issues with sincos() function,
* it's likely a big in OSL or LLVM. For until we've
* updated to new versions of this libraries we'll
* use a workaround to prevent possible crashes on all
@@ -261,317 +371,348 @@ point rotate (point p, float angle, point a, point b)
#if 0
sincos (angle, sinang, cosang);
#else
- sinang = sin (angle);
- cosang = cos (angle);
+ sinang = sin(angle);
+ cosang = cos(angle);
#endif
- float cosang1 = 1.0 - cosang;
- float x = axis[0], y = axis[1], z = axis[2];
- matrix M = matrix (x * x + (1.0 - x * x) * cosang,
- x * y * cosang1 + z * sinang,
- x * z * cosang1 - y * sinang,
- 0.0,
- x * y * cosang1 - z * sinang,
- y * y + (1.0 - y * y) * cosang,
- y * z * cosang1 + x * sinang,
- 0.0,
- x * z * cosang1 + y * sinang,
- y * z * cosang1 - x * sinang,
- z * z + (1.0 - z * z) * cosang,
- 0.0,
- 0.0, 0.0, 0.0, 1.0);
- return transform (M, p-a) + a;
+ float cosang1 = 1.0 - cosang;
+ float x = axis[0], y = axis[1], z = axis[2];
+ matrix M = matrix(x * x + (1.0 - x * x) * cosang,
+ x * y * cosang1 + z * sinang,
+ x * z * cosang1 - y * sinang,
+ 0.0,
+ x * y * cosang1 - z * sinang,
+ y * y + (1.0 - y * y) * cosang,
+ y * z * cosang1 + x * sinang,
+ 0.0,
+ x * z * cosang1 + y * sinang,
+ y * z * cosang1 - x * sinang,
+ z * z + (1.0 - z * z) * cosang,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 1.0);
+ return transform(M, p - a) + a;
}
normal ensure_valid_reflection(normal Ng, vector I, normal N)
{
- /* The implementation here mirrors the one in kernel_montecarlo.h,
+ /* The implementation here mirrors the one in kernel_montecarlo.h,
* check there for an explanation of the algorithm. */
- float sqr(float x) { return x*x; }
-
- vector R = 2*dot(N, I)*N - I;
+ float sqr(float x)
+ {
+ return x * x;
+ }
- float threshold = min(0.9*dot(Ng, I), 0.01);
- if(dot(Ng, R) >= threshold) {
- return N;
- }
+ vector R = 2 * dot(N, I) * N - I;
- float NdotNg = dot(N, Ng);
- vector X = normalize(N - NdotNg*Ng);
+ float threshold = min(0.9 * dot(Ng, I), 0.01);
+ if (dot(Ng, R) >= threshold) {
+ return N;
+ }
- float Ix = dot(I, X), Iz = dot(I, Ng);
- float Ix2 = sqr(Ix), Iz2 = sqr(Iz);
- float a = Ix2 + Iz2;
+ float NdotNg = dot(N, Ng);
+ vector X = normalize(N - NdotNg * Ng);
- float b = sqrt(Ix2*(a - sqr(threshold)));
- float c = Iz*threshold + a;
+ float Ix = dot(I, X), Iz = dot(I, Ng);
+ float Ix2 = sqr(Ix), Iz2 = sqr(Iz);
+ float a = Ix2 + Iz2;
- float fac = 0.5/a;
- float N1_z2 = fac*(b+c), N2_z2 = fac*(-b+c);
- int valid1 = (N1_z2 > 1e-5) && (N1_z2 <= (1.0 + 1e-5));
- int valid2 = (N2_z2 > 1e-5) && (N2_z2 <= (1.0 + 1e-5));
+ float b = sqrt(Ix2 * (a - sqr(threshold)));
+ float c = Iz * threshold + a;
- float N_new_x, N_new_z;
- if(valid1 && valid2) {
- float N1_x = sqrt(1.0 - N1_z2), N1_z = sqrt(N1_z2);
- float N2_x = sqrt(1.0 - N2_z2), N2_z = sqrt(N2_z2);
+ float fac = 0.5 / a;
+ float N1_z2 = fac * (b + c), N2_z2 = fac * (-b + c);
+ int valid1 = (N1_z2 > 1e-5) && (N1_z2 <= (1.0 + 1e-5));
+ int valid2 = (N2_z2 > 1e-5) && (N2_z2 <= (1.0 + 1e-5));
- float R1 = 2*(N1_x*Ix + N1_z*Iz)*N1_z - Iz;
- float R2 = 2*(N2_x*Ix + N2_z*Iz)*N2_z - Iz;
+ float N_new_x, N_new_z;
+ if (valid1 && valid2) {
+ float N1_x = sqrt(1.0 - N1_z2), N1_z = sqrt(N1_z2);
+ float N2_x = sqrt(1.0 - N2_z2), N2_z = sqrt(N2_z2);
- valid1 = (R1 >= 1e-5);
- valid2 = (R2 >= 1e-5);
- if(valid1 && valid2) {
- N_new_x = (R1 < R2)? N1_x : N2_x;
- N_new_z = (R1 < R2)? N1_z : N2_z;
- }
- else {
- N_new_x = (R1 > R2)? N1_x : N2_x;
- N_new_z = (R1 > R2)? N1_z : N2_z;
- }
+ float R1 = 2 * (N1_x * Ix + N1_z * Iz) * N1_z - Iz;
+ float R2 = 2 * (N2_x * Ix + N2_z * Iz) * N2_z - Iz;
- }
- else if(valid1 || valid2) {
- float Nz2 = valid1? N1_z2 : N2_z2;
- N_new_x = sqrt(1.0 - Nz2);
- N_new_z = sqrt(Nz2);
+ valid1 = (R1 >= 1e-5);
+ valid2 = (R2 >= 1e-5);
+ if (valid1 && valid2) {
+ N_new_x = (R1 < R2) ? N1_x : N2_x;
+ N_new_z = (R1 < R2) ? N1_z : N2_z;
}
else {
- return Ng;
+ N_new_x = (R1 > R2) ? N1_x : N2_x;
+ N_new_z = (R1 > R2) ? N1_z : N2_z;
}
-
- return N_new_x*X + N_new_z*Ng;
+ }
+ else if (valid1 || valid2) {
+ float Nz2 = valid1 ? N1_z2 : N2_z2;
+ N_new_x = sqrt(1.0 - Nz2);
+ N_new_z = sqrt(Nz2);
+ }
+ else {
+ return Ng;
+ }
+
+ return N_new_x * X + N_new_z * Ng;
}
-
// Color functions
-float luminance (color c) BUILTIN;
-color blackbody (float temperatureK) BUILTIN;
-color wavelength_color (float wavelength_nm) BUILTIN;
-
-
-color transformc (string to, color x)
-{
- color rgb_to_hsv (color rgb) { // See Foley & van Dam
- float r = rgb[0], g = rgb[1], b = rgb[2];
- float mincomp = min (r, min (g, b));
- float maxcomp = max (r, max (g, b));
- float delta = maxcomp - mincomp; // chroma
- float h, s, v;
- v = maxcomp;
- if (maxcomp > 0)
- s = delta / maxcomp;
- else s = 0;
- if (s <= 0)
- h = 0;
- else {
- if (r >= maxcomp) h = (g-b) / delta;
- else if (g >= maxcomp) h = 2 + (b-r) / delta;
- else h = 4 + (r-g) / delta;
- h /= 6;
- if (h < 0)
- h += 1;
- }
- return color (h, s, v);
- }
-
- color rgb_to_hsl (color rgb) { // See Foley & van Dam
- // First convert rgb to hsv, then to hsl
- float minval = min (rgb[0], min (rgb[1], rgb[2]));
- color hsv = rgb_to_hsv (rgb);
- float maxval = hsv[2]; // v == maxval
- float h = hsv[0], s, l = (minval+maxval) / 2;
- if (minval == maxval)
- s = 0; // special 'achromatic' case, hue is 0
- else if (l <= 0.5)
- s = (maxval - minval) / (maxval + minval);
- else
- s = (maxval - minval) / (2 - maxval - minval);
- return color (h, s, l);
- }
+float luminance(color c) BUILTIN;
+color blackbody(float temperatureK) BUILTIN;
+color wavelength_color(float wavelength_nm) BUILTIN;
- color r;
- if (to == "rgb" || to == "RGB")
- r = x;
- else if (to == "hsv")
- r = rgb_to_hsv (x);
- else if (to == "hsl")
- r = rgb_to_hsl (x);
- else if (to == "YIQ")
- r = color (dot (vector(0.299, 0.587, 0.114), (vector)x),
- dot (vector(0.596, -0.275, -0.321), (vector)x),
- dot (vector(0.212, -0.523, 0.311), (vector)x));
- else if (to == "XYZ")
- r = color (dot (vector(0.412453, 0.357580, 0.180423), (vector)x),
- dot (vector(0.212671, 0.715160, 0.072169), (vector)x),
- dot (vector(0.019334, 0.119193, 0.950227), (vector)x));
+color transformc(string to, color x)
+{
+ color rgb_to_hsv(color rgb)
+ { // See Foley & van Dam
+ float r = rgb[0], g = rgb[1], b = rgb[2];
+ float mincomp = min(r, min(g, b));
+ float maxcomp = max(r, max(g, b));
+ float delta = maxcomp - mincomp; // chroma
+ float h, s, v;
+ v = maxcomp;
+ if (maxcomp > 0)
+ s = delta / maxcomp;
+ else
+ s = 0;
+ if (s <= 0)
+ h = 0;
else {
- error ("Unknown color space \"%s\"", to);
- r = x;
+ if (r >= maxcomp)
+ h = (g - b) / delta;
+ else if (g >= maxcomp)
+ h = 2 + (b - r) / delta;
+ else
+ h = 4 + (r - g) / delta;
+ h /= 6;
+ if (h < 0)
+ h += 1;
}
- return r;
+ return color(h, s, v);
+ }
+
+ color rgb_to_hsl(color rgb)
+ { // See Foley & van Dam
+ // First convert rgb to hsv, then to hsl
+ float minval = min(rgb[0], min(rgb[1], rgb[2]));
+ color hsv = rgb_to_hsv(rgb);
+ float maxval = hsv[2]; // v == maxval
+ float h = hsv[0], s, l = (minval + maxval) / 2;
+ if (minval == maxval)
+ s = 0; // special 'achromatic' case, hue is 0
+ else if (l <= 0.5)
+ s = (maxval - minval) / (maxval + minval);
+ else
+ s = (maxval - minval) / (2 - maxval - minval);
+ return color(h, s, l);
+ }
+
+ color r;
+ if (to == "rgb" || to == "RGB")
+ r = x;
+ else if (to == "hsv")
+ r = rgb_to_hsv(x);
+ else if (to == "hsl")
+ r = rgb_to_hsl(x);
+ else if (to == "YIQ")
+ r = color(dot(vector(0.299, 0.587, 0.114), (vector)x),
+ dot(vector(0.596, -0.275, -0.321), (vector)x),
+ dot(vector(0.212, -0.523, 0.311), (vector)x));
+ else if (to == "XYZ")
+ r = color(dot(vector(0.412453, 0.357580, 0.180423), (vector)x),
+ dot(vector(0.212671, 0.715160, 0.072169), (vector)x),
+ dot(vector(0.019334, 0.119193, 0.950227), (vector)x));
+ else {
+ error("Unknown color space \"%s\"", to);
+ r = x;
+ }
+ return r;
}
-
-color transformc (string from, string to, color x)
-{
- color hsv_to_rgb (color c) { // Reference: Foley & van Dam
- float h = c[0], s = c[1], v = c[2];
- color r;
- if (s < 0.0001) {
- r = v;
- } else {
- h = 6 * (h - floor(h)); // expand to [0..6)
- int hi = (int)h;
- float f = h - hi;
- float p = v * (1-s);
- float q = v * (1-s*f);
- float t = v * (1-s*(1-f));
- if (hi == 0) r = color (v, t, p);
- else if (hi == 1) r = color (q, v, p);
- else if (hi == 2) r = color (p, v, t);
- else if (hi == 3) r = color (p, q, v);
- else if (hi == 4) r = color (t, p, v);
- else r = color (v, p, q);
- }
- return r;
+color transformc(string from, string to, color x)
+{
+ color hsv_to_rgb(color c)
+ { // Reference: Foley & van Dam
+ float h = c[0], s = c[1], v = c[2];
+ color r;
+ if (s < 0.0001) {
+ r = v;
}
-
- color hsl_to_rgb (color c) {
- float h = c[0], s = c[1], l = c[2];
- // Easiest to convert hsl -> hsv, then hsv -> RGB (per Foley & van Dam)
- float v = (l <= 0.5) ? (l * (1 + s)) : (l * (1 - s) + s);
- color r;
- if (v <= 0) {
- r = 0;
- } else {
- float min = 2 * l - v;
- s = (v - min) / v;
- r = hsv_to_rgb (color (h, s, v));
- }
- return r;
+ else {
+ h = 6 * (h - floor(h)); // expand to [0..6)
+ int hi = (int)h;
+ float f = h - hi;
+ float p = v * (1 - s);
+ float q = v * (1 - s * f);
+ float t = v * (1 - s * (1 - f));
+ if (hi == 0)
+ r = color(v, t, p);
+ else if (hi == 1)
+ r = color(q, v, p);
+ else if (hi == 2)
+ r = color(p, v, t);
+ else if (hi == 3)
+ r = color(p, q, v);
+ else if (hi == 4)
+ r = color(t, p, v);
+ else
+ r = color(v, p, q);
}
+ return r;
+ }
+ color hsl_to_rgb(color c)
+ {
+ float h = c[0], s = c[1], l = c[2];
+ // Easiest to convert hsl -> hsv, then hsv -> RGB (per Foley & van Dam)
+ float v = (l <= 0.5) ? (l * (1 + s)) : (l * (1 - s) + s);
color r;
- if (from == "rgb" || from == "RGB")
- r = x;
- else if (from == "hsv")
- r = hsv_to_rgb (x);
- else if (from == "hsl")
- r = hsl_to_rgb (x);
- else if (from == "YIQ")
- r = color (dot (vector(1, 0.9557, 0.6199), (vector)x),
- dot (vector(1, -0.2716, -0.6469), (vector)x),
- dot (vector(1, -1.1082, 1.7051), (vector)x));
- else if (from == "XYZ")
- r = color (dot (vector( 3.240479, -1.537150, -0.498535), (vector)x),
- dot (vector(-0.969256, 1.875991, 0.041556), (vector)x),
- dot (vector( 0.055648, -0.204043, 1.057311), (vector)x));
+ if (v <= 0) {
+ r = 0;
+ }
else {
- error ("Unknown color space \"%s\"", to);
- r = x;
+ float min = 2 * l - v;
+ s = (v - min) / v;
+ r = hsv_to_rgb(color(h, s, v));
}
- return transformc (to, r);
+ return r;
+ }
+
+ color r;
+ if (from == "rgb" || from == "RGB")
+ r = x;
+ else if (from == "hsv")
+ r = hsv_to_rgb(x);
+ else if (from == "hsl")
+ r = hsl_to_rgb(x);
+ else if (from == "YIQ")
+ r = color(dot(vector(1, 0.9557, 0.6199), (vector)x),
+ dot(vector(1, -0.2716, -0.6469), (vector)x),
+ dot(vector(1, -1.1082, 1.7051), (vector)x));
+ else if (from == "XYZ")
+ r = color(dot(vector(3.240479, -1.537150, -0.498535), (vector)x),
+ dot(vector(-0.969256, 1.875991, 0.041556), (vector)x),
+ dot(vector(0.055648, -0.204043, 1.057311), (vector)x));
+ else {
+ error("Unknown color space \"%s\"", to);
+ r = x;
+ }
+ return transformc(to, r);
}
-
-
// Matrix functions
-float determinant (matrix m) BUILTIN;
-matrix transpose (matrix m) BUILTIN;
+float determinant(matrix m) BUILTIN;
+matrix transpose(matrix m) BUILTIN;
+// Pattern generation
+color step(color edge, color x) BUILTIN;
+point step(point edge, point x) BUILTIN;
+vector step(vector edge, vector x) BUILTIN;
+normal step(normal edge, normal x) BUILTIN;
+float step(float edge, float x) BUILTIN;
+float smoothstep(float edge0, float edge1, float x) BUILTIN;
-// Pattern generation
+float linearstep(float edge0, float edge1, float x)
+{
+ float result;
+ if (edge0 != edge1) {
+ float xclamped = clamp(x, edge0, edge1);
+ result = (xclamped - edge0) / (edge1 - edge0);
+ }
+ else { // special case: edges coincide
+ result = step(edge0, x);
+ }
+ return result;
+}
-color step (color edge, color x) BUILTIN;
-point step (point edge, point x) BUILTIN;
-vector step (vector edge, vector x) BUILTIN;
-normal step (normal edge, normal x) BUILTIN;
-float step (float edge, float x) BUILTIN;
-float smoothstep (float edge0, float edge1, float x) BUILTIN;
-
-float linearstep (float edge0, float edge1, float x) {
- float result;
- if (edge0 != edge1) {
- float xclamped = clamp (x, edge0, edge1);
- result = (xclamped - edge0) / (edge1 - edge0);
- } else { // special case: edges coincide
- result = step (edge0, x);
- }
- return result;
-}
-
-float smooth_linearstep (float edge0, float edge1, float x_, float eps_) {
- float result;
- if (edge0 != edge1) {
- float rampup (float x, float r) { return 0.5/r * x*x; }
- float width_inv = 1.0 / (edge1 - edge0);
- float eps = eps_ * width_inv;
- float x = (x_ - edge0) * width_inv;
- if (x <= -eps) result = 0;
- else if (x >= eps && x <= 1.0-eps) result = x;
- else if (x >= 1.0+eps) result = 1;
- else if (x < eps) result = rampup (x+eps, 2.0*eps);
- else /* if (x < 1.0+eps) */ result = 1.0 - rampup (1.0+eps - x, 2.0*eps);
- } else {
- result = step (edge0, x_);
+float smooth_linearstep(float edge0, float edge1, float x_, float eps_)
+{
+ float result;
+ if (edge0 != edge1) {
+ float rampup(float x, float r)
+ {
+ return 0.5 / r * x * x;
}
- return result;
+ float width_inv = 1.0 / (edge1 - edge0);
+ float eps = eps_ * width_inv;
+ float x = (x_ - edge0) * width_inv;
+ if (x <= -eps)
+ result = 0;
+ else if (x >= eps && x <= 1.0 - eps)
+ result = x;
+ else if (x >= 1.0 + eps)
+ result = 1;
+ else if (x < eps)
+ result = rampup(x + eps, 2.0 * eps);
+ else /* if (x < 1.0+eps) */
+ result = 1.0 - rampup(1.0 + eps - x, 2.0 * eps);
+ }
+ else {
+ result = step(edge0, x_);
+ }
+ return result;
}
-float aastep (float edge, float s, float dedge, float ds) {
- // Box filtered AA step
- float width = fabs(dedge) + fabs(ds);
- float halfwidth = 0.5*width;
- float e1 = edge-halfwidth;
- return (s <= e1) ? 0.0 : ((s >= (edge+halfwidth)) ? 1.0 : (s-e1)/width);
+float aastep(float edge, float s, float dedge, float ds)
+{
+ // Box filtered AA step
+ float width = fabs(dedge) + fabs(ds);
+ float halfwidth = 0.5 * width;
+ float e1 = edge - halfwidth;
+ return (s <= e1) ? 0.0 : ((s >= (edge + halfwidth)) ? 1.0 : (s - e1) / width);
}
-float aastep (float edge, float s, float ds) {
- return aastep (edge, s, filterwidth(edge), ds);
+float aastep(float edge, float s, float ds)
+{
+ return aastep(edge, s, filterwidth(edge), ds);
}
-float aastep (float edge, float s) {
- return aastep (edge, s, filterwidth(edge), filterwidth(s));
+float aastep(float edge, float s)
+{
+ return aastep(edge, s, filterwidth(edge), filterwidth(s));
}
-
// Derivatives and area operators
-
// Displacement functions
-
// String functions
-int strlen (string s) BUILTIN;
-int hash (string s) BUILTIN;
-int getchar (string s, int index) BUILTIN;
-int startswith (string s, string prefix) BUILTIN;
-int endswith (string s, string suffix) BUILTIN;
-string substr (string s, int start, int len) BUILTIN;
-string substr (string s, int start) { return substr (s, start, strlen(s)); }
-float stof (string str) BUILTIN;
-int stoi (string str) BUILTIN;
+int strlen(string s) BUILTIN;
+int hash(string s) BUILTIN;
+int getchar(string s, int index) BUILTIN;
+int startswith(string s, string prefix) BUILTIN;
+int endswith(string s, string suffix) BUILTIN;
+string substr(string s, int start, int len) BUILTIN;
+string substr(string s, int start)
+{
+ return substr(s, start, strlen(s));
+}
+float stof(string str) BUILTIN;
+int stoi(string str) BUILTIN;
// Define concat in terms of shorter concat
-string concat (string a, string b, string c) {
- return concat(concat(a,b), c);
+string concat(string a, string b, string c)
+{
+ return concat(concat(a, b), c);
}
-string concat (string a, string b, string c, string d) {
- return concat(concat(a,b,c), d);
+string concat(string a, string b, string c, string d)
+{
+ return concat(concat(a, b, c), d);
}
-string concat (string a, string b, string c, string d, string e) {
- return concat(concat(a,b,c,d), e);
+string concat(string a, string b, string c, string d, string e)
+{
+ return concat(concat(a, b, c, d), e);
}
-string concat (string a, string b, string c, string d, string e, string f) {
- return concat(concat(a,b,c,d,e), f);
+string concat(string a, string b, string c, string d, string e, string f)
+{
+ return concat(concat(a, b, c, d, e), f);
}
-
// Texture
-
// Closures
closure color diffuse(normal N) BUILTIN;
@@ -591,14 +732,18 @@ closure color microfacet_multi_ggx(normal N, float ag, color C) BUILTIN;
closure color microfacet_multi_ggx_aniso(normal N, vector T, float ax, float ay, color C) BUILTIN;
closure color microfacet_multi_ggx_glass(normal N, float ag, float eta, color C) BUILTIN;
closure color microfacet_ggx_fresnel(normal N, float ag, float eta, color C, color Cspec0) BUILTIN;
-closure color microfacet_ggx_aniso_fresnel(normal N, vector T, float ax, float ay, float eta, color C, color Cspec0) BUILTIN;
-closure color microfacet_multi_ggx_fresnel(normal N, float ag, float eta, color C, color Cspec0) BUILTIN;
-closure color microfacet_multi_ggx_aniso_fresnel(normal N, vector T, float ax, float ay, float eta, color C, color Cspec0) BUILTIN;
-closure color microfacet_multi_ggx_glass_fresnel(normal N, float ag, float eta, color C, color Cspec0) BUILTIN;
+closure color microfacet_ggx_aniso_fresnel(
+ normal N, vector T, float ax, float ay, float eta, color C, color Cspec0) BUILTIN;
+closure color
+microfacet_multi_ggx_fresnel(normal N, float ag, float eta, color C, color Cspec0) BUILTIN;
+closure color microfacet_multi_ggx_aniso_fresnel(
+ normal N, vector T, float ax, float ay, float eta, color C, color Cspec0) BUILTIN;
+closure color
+microfacet_multi_ggx_glass_fresnel(normal N, float ag, float eta, color C, color Cspec0) BUILTIN;
closure color microfacet_beckmann(normal N, float ab) BUILTIN;
closure color microfacet_beckmann_aniso(normal N, vector T, float ax, float ay) BUILTIN;
closure color microfacet_beckmann_refraction(normal N, float ab, float eta) BUILTIN;
-closure color ashikhmin_shirley(normal N, vector T,float ax, float ay) BUILTIN;
+closure color ashikhmin_shirley(normal N, vector T, float ax, float ay) BUILTIN;
closure color ashikhmin_velvet(normal N, float sigma) BUILTIN;
closure color emission() BUILTIN;
closure color background() BUILTIN;
@@ -612,78 +757,97 @@ closure color principled_clearcoat(normal N, float clearcoat, float clearcoat_ro
closure color bssrdf(string method, normal N, vector radius, color albedo) BUILTIN;
// Hair
-closure color hair_reflection(normal N, float roughnessu, float roughnessv, vector T, float offset) BUILTIN;
-closure color hair_transmission(normal N, float roughnessu, float roughnessv, vector T, float offset) BUILTIN;
-closure color principled_hair(normal N, color sigma, float roughnessu, float roughnessv, float coat, float alpha, float eta) BUILTIN;
+closure color
+hair_reflection(normal N, float roughnessu, float roughnessv, vector T, float offset) BUILTIN;
+closure color
+hair_transmission(normal N, float roughnessu, float roughnessv, vector T, float offset) BUILTIN;
+closure color principled_hair(normal N,
+ color sigma,
+ float roughnessu,
+ float roughnessv,
+ float coat,
+ float alpha,
+ float eta) BUILTIN;
// Volume
closure color henyey_greenstein(float g) BUILTIN;
closure color absorption() BUILTIN;
// OSL 1.5 Microfacet functions
-closure color microfacet(string distribution, normal N, vector U, float xalpha, float yalpha, float eta, int refract) {
- /* GGX */
- if (distribution == "ggx" || distribution == "default") {
- if (!refract) {
- if (xalpha == yalpha) {
- /* Isotropic */
- return microfacet_ggx(N, xalpha);
- }
- else {
- /* Anisotropic */
- return microfacet_ggx_aniso(N, U, xalpha, yalpha);
- }
- }
- else {
- return microfacet_ggx_refraction(N, xalpha, eta);
- }
- }
- /* Beckmann */
- else {
- if (!refract) {
- if (xalpha == yalpha) {
- /* Isotropic */
- return microfacet_beckmann(N, xalpha);
- }
- else {
- /* Anisotropic */
- return microfacet_beckmann_aniso(N, U, xalpha, yalpha);
- }
- }
- else {
- return microfacet_beckmann_refraction(N, xalpha, eta);
- }
- }
-}
-
-closure color microfacet (string distribution, normal N, float alpha, float eta, int refract) {
- return microfacet(distribution, N, vector(0), alpha, alpha, eta, refract);
+closure color microfacet(
+ string distribution, normal N, vector U, float xalpha, float yalpha, float eta, int refract)
+{
+ /* GGX */
+ if (distribution == "ggx" || distribution == "default") {
+ if (!refract) {
+ if (xalpha == yalpha) {
+ /* Isotropic */
+ return microfacet_ggx(N, xalpha);
+ }
+ else {
+ /* Anisotropic */
+ return microfacet_ggx_aniso(N, U, xalpha, yalpha);
+ }
+ }
+ else {
+ return microfacet_ggx_refraction(N, xalpha, eta);
+ }
+ }
+ /* Beckmann */
+ else {
+ if (!refract) {
+ if (xalpha == yalpha) {
+ /* Isotropic */
+ return microfacet_beckmann(N, xalpha);
+ }
+ else {
+ /* Anisotropic */
+ return microfacet_beckmann_aniso(N, U, xalpha, yalpha);
+ }
+ }
+ else {
+ return microfacet_beckmann_refraction(N, xalpha, eta);
+ }
+ }
}
+closure color microfacet(string distribution, normal N, float alpha, float eta, int refract)
+{
+ return microfacet(distribution, N, vector(0), alpha, alpha, eta, refract);
+}
// Renderer state
-int backfacing () BUILTIN;
-int raytype (string typename) BUILTIN;
+int backfacing() BUILTIN;
+int raytype(string typename) BUILTIN;
// the individual 'isFOOray' functions are deprecated
-int iscameraray () { return raytype("camera"); }
-int isdiffuseray () { return raytype("diffuse"); }
-int isglossyray () { return raytype("glossy"); }
-int isshadowray () { return raytype("shadow"); }
-int getmatrix (string fromspace, string tospace, output matrix M) BUILTIN;
-int getmatrix (string fromspace, output matrix M) {
- return getmatrix (fromspace, "common", M);
+int iscameraray()
+{
+ return raytype("camera");
+}
+int isdiffuseray()
+{
+ return raytype("diffuse");
+}
+int isglossyray()
+{
+ return raytype("glossy");
+}
+int isshadowray()
+{
+ return raytype("shadow");
+}
+int getmatrix(string fromspace, string tospace, output matrix M) BUILTIN;
+int getmatrix(string fromspace, output matrix M)
+{
+ return getmatrix(fromspace, "common", M);
}
-
// Miscellaneous
-
-
-
#undef BUILTIN
#undef BUILTIN_DERIV
#undef PERCOMP1
#undef PERCOMP2
#undef PERCOMP2F
-#endif /* CCL_STDOSL_H */
+#endif /* CCL_STDOSL_H */
diff --git a/intern/cycles/kernel/split/kernel_branched.h b/intern/cycles/kernel/split/kernel_branched.h
index ed0a82067f1..e08d87ab618 100644
--- a/intern/cycles/kernel/split/kernel_branched.h
+++ b/intern/cycles/kernel/split/kernel_branched.h
@@ -19,215 +19,213 @@ CCL_NAMESPACE_BEGIN
#ifdef __BRANCHED_PATH__
/* sets up the various state needed to do an indirect loop */
-ccl_device_inline void kernel_split_branched_path_indirect_loop_init(KernelGlobals *kg, int ray_index)
+ccl_device_inline void kernel_split_branched_path_indirect_loop_init(KernelGlobals *kg,
+ int ray_index)
{
- SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
+ SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
- /* save a copy of the state to restore later */
-#define BRANCHED_STORE(name) \
- branched_state->name = kernel_split_state.name[ray_index];
+ /* save a copy of the state to restore later */
+# define BRANCHED_STORE(name) branched_state->name = kernel_split_state.name[ray_index];
- BRANCHED_STORE(path_state);
- BRANCHED_STORE(throughput);
- BRANCHED_STORE(ray);
- BRANCHED_STORE(isect);
- BRANCHED_STORE(ray_state);
+ BRANCHED_STORE(path_state);
+ BRANCHED_STORE(throughput);
+ BRANCHED_STORE(ray);
+ BRANCHED_STORE(isect);
+ BRANCHED_STORE(ray_state);
- *kernel_split_sd(branched_state_sd, ray_index) = *kernel_split_sd(sd, ray_index);
- for(int i = 0; i < kernel_split_sd(branched_state_sd, ray_index)->num_closure; i++) {
- kernel_split_sd(branched_state_sd, ray_index)->closure[i] = kernel_split_sd(sd, ray_index)->closure[i];
- }
+ *kernel_split_sd(branched_state_sd, ray_index) = *kernel_split_sd(sd, ray_index);
+ for (int i = 0; i < kernel_split_sd(branched_state_sd, ray_index)->num_closure; i++) {
+ kernel_split_sd(branched_state_sd, ray_index)->closure[i] =
+ kernel_split_sd(sd, ray_index)->closure[i];
+ }
-#undef BRANCHED_STORE
+# undef BRANCHED_STORE
- /* set loop counters to intial position */
- branched_state->next_closure = 0;
- branched_state->next_sample = 0;
+ /* set loop counters to intial position */
+ branched_state->next_closure = 0;
+ branched_state->next_sample = 0;
}
/* ends an indirect loop and restores the previous state */
-ccl_device_inline void kernel_split_branched_path_indirect_loop_end(KernelGlobals *kg, int ray_index)
+ccl_device_inline void kernel_split_branched_path_indirect_loop_end(KernelGlobals *kg,
+ int ray_index)
{
- SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
+ SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
- /* restore state */
-#define BRANCHED_RESTORE(name) \
- kernel_split_state.name[ray_index] = branched_state->name;
+ /* restore state */
+# define BRANCHED_RESTORE(name) kernel_split_state.name[ray_index] = branched_state->name;
- BRANCHED_RESTORE(path_state);
- BRANCHED_RESTORE(throughput);
- BRANCHED_RESTORE(ray);
- BRANCHED_RESTORE(isect);
- BRANCHED_RESTORE(ray_state);
+ BRANCHED_RESTORE(path_state);
+ BRANCHED_RESTORE(throughput);
+ BRANCHED_RESTORE(ray);
+ BRANCHED_RESTORE(isect);
+ BRANCHED_RESTORE(ray_state);
- *kernel_split_sd(sd, ray_index) = *kernel_split_sd(branched_state_sd, ray_index);
- for(int i = 0; i < kernel_split_sd(branched_state_sd, ray_index)->num_closure; i++) {
- kernel_split_sd(sd, ray_index)->closure[i] = kernel_split_sd(branched_state_sd, ray_index)->closure[i];
- }
+ *kernel_split_sd(sd, ray_index) = *kernel_split_sd(branched_state_sd, ray_index);
+ for (int i = 0; i < kernel_split_sd(branched_state_sd, ray_index)->num_closure; i++) {
+ kernel_split_sd(sd, ray_index)->closure[i] =
+ kernel_split_sd(branched_state_sd, ray_index)->closure[i];
+ }
-#undef BRANCHED_RESTORE
+# undef BRANCHED_RESTORE
- /* leave indirect loop */
- REMOVE_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT);
+ /* leave indirect loop */
+ REMOVE_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT);
}
-ccl_device_inline bool kernel_split_branched_indirect_start_shared(KernelGlobals *kg, int ray_index)
+ccl_device_inline bool kernel_split_branched_indirect_start_shared(KernelGlobals *kg,
+ int ray_index)
{
- ccl_global char *ray_state = kernel_split_state.ray_state;
+ ccl_global char *ray_state = kernel_split_state.ray_state;
- int inactive_ray = dequeue_ray_index(QUEUE_INACTIVE_RAYS,
- kernel_split_state.queue_data, kernel_split_params.queue_size, kernel_split_params.queue_index);
+ int inactive_ray = dequeue_ray_index(QUEUE_INACTIVE_RAYS,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ kernel_split_params.queue_index);
- if(!IS_STATE(ray_state, inactive_ray, RAY_INACTIVE)) {
- return false;
- }
+ if (!IS_STATE(ray_state, inactive_ray, RAY_INACTIVE)) {
+ return false;
+ }
-#define SPLIT_DATA_ENTRY(type, name, num) \
- if(num) { \
- kernel_split_state.name[inactive_ray] = kernel_split_state.name[ray_index]; \
- }
- SPLIT_DATA_ENTRIES_BRANCHED_SHARED
-#undef SPLIT_DATA_ENTRY
+# define SPLIT_DATA_ENTRY(type, name, num) \
+ if (num) { \
+ kernel_split_state.name[inactive_ray] = kernel_split_state.name[ray_index]; \
+ }
+ SPLIT_DATA_ENTRIES_BRANCHED_SHARED
+# undef SPLIT_DATA_ENTRY
- *kernel_split_sd(sd, inactive_ray) = *kernel_split_sd(sd, ray_index);
- for(int i = 0; i < kernel_split_sd(sd, ray_index)->num_closure; i++) {
- kernel_split_sd(sd, inactive_ray)->closure[i] = kernel_split_sd(sd, ray_index)->closure[i];
- }
+ *kernel_split_sd(sd, inactive_ray) = *kernel_split_sd(sd, ray_index);
+ for (int i = 0; i < kernel_split_sd(sd, ray_index)->num_closure; i++) {
+ kernel_split_sd(sd, inactive_ray)->closure[i] = kernel_split_sd(sd, ray_index)->closure[i];
+ }
- kernel_split_state.branched_state[inactive_ray].shared_sample_count = 0;
- kernel_split_state.branched_state[inactive_ray].original_ray = ray_index;
- kernel_split_state.branched_state[inactive_ray].waiting_on_shared_samples = false;
+ kernel_split_state.branched_state[inactive_ray].shared_sample_count = 0;
+ kernel_split_state.branched_state[inactive_ray].original_ray = ray_index;
+ kernel_split_state.branched_state[inactive_ray].waiting_on_shared_samples = false;
- PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
- PathRadiance *inactive_L = &kernel_split_state.path_radiance[inactive_ray];
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ PathRadiance *inactive_L = &kernel_split_state.path_radiance[inactive_ray];
- path_radiance_init(inactive_L, kernel_data.film.use_light_pass);
- path_radiance_copy_indirect(inactive_L, L);
+ path_radiance_init(inactive_L, kernel_data.film.use_light_pass);
+ path_radiance_copy_indirect(inactive_L, L);
- ray_state[inactive_ray] = RAY_REGENERATED;
- ADD_RAY_FLAG(ray_state, inactive_ray, RAY_BRANCHED_INDIRECT_SHARED);
- ADD_RAY_FLAG(ray_state, inactive_ray, IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT));
+ ray_state[inactive_ray] = RAY_REGENERATED;
+ ADD_RAY_FLAG(ray_state, inactive_ray, RAY_BRANCHED_INDIRECT_SHARED);
+ ADD_RAY_FLAG(ray_state, inactive_ray, IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT));
- atomic_fetch_and_inc_uint32((ccl_global uint*)&kernel_split_state.branched_state[ray_index].shared_sample_count);
+ atomic_fetch_and_inc_uint32(
+ (ccl_global uint *)&kernel_split_state.branched_state[ray_index].shared_sample_count);
- return true;
+ return true;
}
/* bounce off surface and integrate indirect light */
-ccl_device_noinline bool kernel_split_branched_path_surface_indirect_light_iter(KernelGlobals *kg,
- int ray_index,
- float num_samples_adjust,
- ShaderData *saved_sd,
- bool reset_path_state,
- bool wait_for_shared)
+ccl_device_noinline bool kernel_split_branched_path_surface_indirect_light_iter(
+ KernelGlobals *kg,
+ int ray_index,
+ float num_samples_adjust,
+ ShaderData *saved_sd,
+ bool reset_path_state,
+ bool wait_for_shared)
{
- SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
-
- ShaderData *sd = saved_sd;
- PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
- float3 throughput = branched_state->throughput;
- ccl_global PathState *ps = &kernel_split_state.path_state[ray_index];
-
- float sum_sample_weight = 0.0f;
-#ifdef __DENOISING_FEATURES__
- if(ps->denoising_feature_weight > 0.0f) {
- for(int i = 0; i < sd->num_closure; i++) {
- const ShaderClosure *sc = &sd->closure[i];
-
- /* transparency is not handled here, but in outer loop */
- if(!CLOSURE_IS_BSDF(sc->type) || CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) {
- continue;
- }
-
- sum_sample_weight += sc->sample_weight;
- }
- }
- else {
- sum_sample_weight = 1.0f;
- }
-#endif /* __DENOISING_FEATURES__ */
-
- for(int i = branched_state->next_closure; i < sd->num_closure; i++) {
- const ShaderClosure *sc = &sd->closure[i];
-
- if(!CLOSURE_IS_BSDF(sc->type))
- continue;
- /* transparency is not handled here, but in outer loop */
- if(sc->type == CLOSURE_BSDF_TRANSPARENT_ID)
- continue;
-
- int num_samples;
-
- if(CLOSURE_IS_BSDF_DIFFUSE(sc->type))
- num_samples = kernel_data.integrator.diffuse_samples;
- else if(CLOSURE_IS_BSDF_BSSRDF(sc->type))
- num_samples = 1;
- else if(CLOSURE_IS_BSDF_GLOSSY(sc->type))
- num_samples = kernel_data.integrator.glossy_samples;
- else
- num_samples = kernel_data.integrator.transmission_samples;
-
- num_samples = ceil_to_int(num_samples_adjust*num_samples);
-
- float num_samples_inv = num_samples_adjust/num_samples;
-
- for(int j = branched_state->next_sample; j < num_samples; j++) {
- if(reset_path_state) {
- *ps = branched_state->path_state;
- }
-
- ps->rng_hash = cmj_hash(branched_state->path_state.rng_hash, i);
-
- ccl_global float3 *tp = &kernel_split_state.throughput[ray_index];
- *tp = throughput;
-
- ccl_global Ray *bsdf_ray = &kernel_split_state.ray[ray_index];
-
- if(!kernel_branched_path_surface_bounce(kg,
- sd,
- sc,
- j,
- num_samples,
- tp,
- ps,
- &L->state,
- bsdf_ray,
- sum_sample_weight))
- {
- continue;
- }
-
- ps->rng_hash = branched_state->path_state.rng_hash;
-
- /* update state for next iteration */
- branched_state->next_closure = i;
- branched_state->next_sample = j+1;
-
- /* start the indirect path */
- *tp *= num_samples_inv;
-
- if(kernel_split_branched_indirect_start_shared(kg, ray_index)) {
- continue;
- }
-
- return true;
- }
-
- branched_state->next_sample = 0;
- }
-
- branched_state->next_closure = sd->num_closure;
-
- if(wait_for_shared) {
- branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0);
- if(branched_state->waiting_on_shared_samples) {
- return true;
- }
- }
-
- return false;
+ SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
+
+ ShaderData *sd = saved_sd;
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ float3 throughput = branched_state->throughput;
+ ccl_global PathState *ps = &kernel_split_state.path_state[ray_index];
+
+ float sum_sample_weight = 0.0f;
+# ifdef __DENOISING_FEATURES__
+ if (ps->denoising_feature_weight > 0.0f) {
+ for (int i = 0; i < sd->num_closure; i++) {
+ const ShaderClosure *sc = &sd->closure[i];
+
+ /* transparency is not handled here, but in outer loop */
+ if (!CLOSURE_IS_BSDF(sc->type) || CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) {
+ continue;
+ }
+
+ sum_sample_weight += sc->sample_weight;
+ }
+ }
+ else {
+ sum_sample_weight = 1.0f;
+ }
+# endif /* __DENOISING_FEATURES__ */
+
+ for (int i = branched_state->next_closure; i < sd->num_closure; i++) {
+ const ShaderClosure *sc = &sd->closure[i];
+
+ if (!CLOSURE_IS_BSDF(sc->type))
+ continue;
+ /* transparency is not handled here, but in outer loop */
+ if (sc->type == CLOSURE_BSDF_TRANSPARENT_ID)
+ continue;
+
+ int num_samples;
+
+ if (CLOSURE_IS_BSDF_DIFFUSE(sc->type))
+ num_samples = kernel_data.integrator.diffuse_samples;
+ else if (CLOSURE_IS_BSDF_BSSRDF(sc->type))
+ num_samples = 1;
+ else if (CLOSURE_IS_BSDF_GLOSSY(sc->type))
+ num_samples = kernel_data.integrator.glossy_samples;
+ else
+ num_samples = kernel_data.integrator.transmission_samples;
+
+ num_samples = ceil_to_int(num_samples_adjust * num_samples);
+
+ float num_samples_inv = num_samples_adjust / num_samples;
+
+ for (int j = branched_state->next_sample; j < num_samples; j++) {
+ if (reset_path_state) {
+ *ps = branched_state->path_state;
+ }
+
+ ps->rng_hash = cmj_hash(branched_state->path_state.rng_hash, i);
+
+ ccl_global float3 *tp = &kernel_split_state.throughput[ray_index];
+ *tp = throughput;
+
+ ccl_global Ray *bsdf_ray = &kernel_split_state.ray[ray_index];
+
+ if (!kernel_branched_path_surface_bounce(
+ kg, sd, sc, j, num_samples, tp, ps, &L->state, bsdf_ray, sum_sample_weight)) {
+ continue;
+ }
+
+ ps->rng_hash = branched_state->path_state.rng_hash;
+
+ /* update state for next iteration */
+ branched_state->next_closure = i;
+ branched_state->next_sample = j + 1;
+
+ /* start the indirect path */
+ *tp *= num_samples_inv;
+
+ if (kernel_split_branched_indirect_start_shared(kg, ray_index)) {
+ continue;
+ }
+
+ return true;
+ }
+
+ branched_state->next_sample = 0;
+ }
+
+ branched_state->next_closure = sd->num_closure;
+
+ if (wait_for_shared) {
+ branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0);
+ if (branched_state->waiting_on_shared_samples) {
+ return true;
+ }
+ }
+
+ return false;
}
-#endif /* __BRANCHED_PATH__ */
+#endif /* __BRANCHED_PATH__ */
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_buffer_update.h b/intern/cycles/kernel/split/kernel_buffer_update.h
index 18eec6372f1..e77743350dc 100644
--- a/intern/cycles/kernel/split/kernel_buffer_update.h
+++ b/intern/cycles/kernel/split/kernel_buffer_update.h
@@ -41,132 +41,133 @@ CCL_NAMESPACE_BEGIN
ccl_device void kernel_buffer_update(KernelGlobals *kg,
ccl_local_param unsigned int *local_queue_atomics)
{
- if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
- *local_queue_atomics = 0;
- }
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
-
- int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
- if(ray_index == 0) {
- /* We will empty this queue in this kernel. */
- kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0;
- }
- char enqueue_flag = 0;
- ray_index = get_ray_index(kg, ray_index,
- QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
- kernel_split_state.queue_data,
- kernel_split_params.queue_size,
- 1);
+ if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+ *local_queue_atomics = 0;
+ }
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+ int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ if (ray_index == 0) {
+ /* We will empty this queue in this kernel. */
+ kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0;
+ }
+ char enqueue_flag = 0;
+ ray_index = get_ray_index(kg,
+ ray_index,
+ QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 1);
#ifdef __COMPUTE_DEVICE_GPU__
- /* If we are executing on a GPU device, we exit all threads that are not
- * required.
- *
- * If we are executing on a CPU device, then we need to keep all threads
- * active since we have barrier() calls later in the kernel. CPU devices,
- * expect all threads to execute barrier statement.
- */
- if(ray_index == QUEUE_EMPTY_SLOT) {
- return;
- }
+ /* If we are executing on a GPU device, we exit all threads that are not
+ * required.
+ *
+ * If we are executing on a CPU device, then we need to keep all threads
+ * active since we have barrier() calls later in the kernel. CPU devices,
+ * expect all threads to execute barrier statement.
+ */
+ if (ray_index == QUEUE_EMPTY_SLOT) {
+ return;
+ }
#endif
#ifndef __COMPUTE_DEVICE_GPU__
- if(ray_index != QUEUE_EMPTY_SLOT) {
+ if (ray_index != QUEUE_EMPTY_SLOT) {
#endif
- ccl_global char *ray_state = kernel_split_state.ray_state;
- ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
- PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
- ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
- ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
- bool ray_was_updated = false;
-
- if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) {
- ray_was_updated = true;
- uint sample = state->sample;
- uint buffer_offset = kernel_split_state.buffer_offset[ray_index];
- ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
-
- /* accumulate result in output buffer */
- kernel_write_result(kg, buffer, sample, L);
-
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE);
- }
-
- if(kernel_data.film.cryptomatte_passes) {
- /* Make sure no thread is writing to the buffers. */
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
- if(ray_was_updated && state->sample - 1 == kernel_data.integrator.aa_samples) {
- uint buffer_offset = kernel_split_state.buffer_offset[ray_index];
- ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
- ccl_global float *cryptomatte_buffer = buffer + kernel_data.film.pass_cryptomatte;
- kernel_sort_id_slots(cryptomatte_buffer, 2 * kernel_data.film.cryptomatte_depth);
- }
- }
-
- if(IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) {
- /* We have completed current work; So get next work */
- ccl_global uint *work_pools = kernel_split_params.work_pools;
- uint total_work_size = kernel_split_params.total_work_size;
- uint work_index;
-
- if(!get_next_work(kg, work_pools, total_work_size, ray_index, &work_index)) {
- /* If work is invalid, this means no more work is available and the thread may exit */
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE);
- }
-
- if(IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) {
- ccl_global WorkTile *tile = &kernel_split_params.tile;
- uint x, y, sample;
- get_work_pixel(tile, work_index, &x, &y, &sample);
-
- /* Store buffer offset for writing to passes. */
- uint buffer_offset = (tile->offset + x + y*tile->stride) * kernel_data.film.pass_stride;
- kernel_split_state.buffer_offset[ray_index] = buffer_offset;
-
- /* Initialize random numbers and ray. */
- uint rng_hash;
- kernel_path_trace_setup(kg, sample, x, y, &rng_hash, ray);
-
- if(ray->t != 0.0f) {
- /* Initialize throughput, path radiance, Ray, PathState;
- * These rays proceed with path-iteration.
- */
- *throughput = make_float3(1.0f, 1.0f, 1.0f);
- path_radiance_init(L, kernel_data.film.use_light_pass);
- path_state_init(kg,
- AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]),
- state,
- rng_hash,
- sample,
- ray);
+ ccl_global char *ray_state = kernel_split_state.ray_state;
+ ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+ ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
+ bool ray_was_updated = false;
+
+ if (IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) {
+ ray_was_updated = true;
+ uint sample = state->sample;
+ uint buffer_offset = kernel_split_state.buffer_offset[ray_index];
+ ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
+
+ /* accumulate result in output buffer */
+ kernel_write_result(kg, buffer, sample, L);
+
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE);
+ }
+
+ if (kernel_data.film.cryptomatte_passes) {
+ /* Make sure no thread is writing to the buffers. */
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ if (ray_was_updated && state->sample - 1 == kernel_data.integrator.aa_samples) {
+ uint buffer_offset = kernel_split_state.buffer_offset[ray_index];
+ ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
+ ccl_global float *cryptomatte_buffer = buffer + kernel_data.film.pass_cryptomatte;
+ kernel_sort_id_slots(cryptomatte_buffer, 2 * kernel_data.film.cryptomatte_depth);
+ }
+ }
+
+ if (IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) {
+ /* We have completed current work; So get next work */
+ ccl_global uint *work_pools = kernel_split_params.work_pools;
+ uint total_work_size = kernel_split_params.total_work_size;
+ uint work_index;
+
+ if (!get_next_work(kg, work_pools, total_work_size, ray_index, &work_index)) {
+ /* If work is invalid, this means no more work is available and the thread may exit */
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE);
+ }
+
+ if (IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) {
+ ccl_global WorkTile *tile = &kernel_split_params.tile;
+ uint x, y, sample;
+ get_work_pixel(tile, work_index, &x, &y, &sample);
+
+ /* Store buffer offset for writing to passes. */
+ uint buffer_offset = (tile->offset + x + y * tile->stride) * kernel_data.film.pass_stride;
+ kernel_split_state.buffer_offset[ray_index] = buffer_offset;
+
+ /* Initialize random numbers and ray. */
+ uint rng_hash;
+ kernel_path_trace_setup(kg, sample, x, y, &rng_hash, ray);
+
+ if (ray->t != 0.0f) {
+ /* Initialize throughput, path radiance, Ray, PathState;
+ * These rays proceed with path-iteration.
+ */
+ *throughput = make_float3(1.0f, 1.0f, 1.0f);
+ path_radiance_init(L, kernel_data.film.use_light_pass);
+ path_state_init(kg,
+ AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]),
+ state,
+ rng_hash,
+ sample,
+ ray);
#ifdef __SUBSURFACE__
- kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]);
+ kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]);
#endif
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
- enqueue_flag = 1;
- }
- else {
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE);
- }
- }
- }
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+ enqueue_flag = 1;
+ }
+ else {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE);
+ }
+ }
+ }
#ifndef __COMPUTE_DEVICE_GPU__
- }
+ }
#endif
- /* Enqueue RAY_REGENERATED rays into QUEUE_ACTIVE_AND_REGENERATED_RAYS;
- * These rays will be made active during next SceneIntersectkernel.
- */
- enqueue_ray_index_local(ray_index,
- QUEUE_ACTIVE_AND_REGENERATED_RAYS,
- enqueue_flag,
- kernel_split_params.queue_size,
- local_queue_atomics,
- kernel_split_state.queue_data,
- kernel_split_params.queue_index);
+ /* Enqueue RAY_REGENERATED rays into QUEUE_ACTIVE_AND_REGENERATED_RAYS;
+ * These rays will be made active during next SceneIntersectkernel.
+ */
+ enqueue_ray_index_local(ray_index,
+ QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+ enqueue_flag,
+ kernel_split_params.queue_size,
+ local_queue_atomics,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_index);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_data_init.h b/intern/cycles/kernel/split/kernel_data_init.h
index 77fb61b80a8..52930843f56 100644
--- a/intern/cycles/kernel/split/kernel_data_init.h
+++ b/intern/cycles/kernel/split/kernel_data_init.h
@@ -28,82 +28,88 @@ ccl_device void kernel_data_init(
#else
void KERNEL_FUNCTION_FULL_NAME(data_init)(
#endif
- KernelGlobals *kg,
- ccl_constant KernelData *data,
- ccl_global void *split_data_buffer,
- int num_elements,
- ccl_global char *ray_state,
+ KernelGlobals *kg,
+ ccl_constant KernelData *data,
+ ccl_global void *split_data_buffer,
+ int num_elements,
+ ccl_global char *ray_state,
#ifdef __KERNEL_OPENCL__
- KERNEL_BUFFER_PARAMS,
+ KERNEL_BUFFER_PARAMS,
#endif
- int start_sample,
- int end_sample,
- int sx, int sy, int sw, int sh, int offset, int stride,
- ccl_global int *Queue_index, /* Tracks the number of elements in queues */
- int queuesize, /* size (capacity) of the queue */
- ccl_global char *use_queues_flag, /* flag to decide if scene-intersect kernel should use queues to fetch ray index */
- ccl_global unsigned int *work_pools, /* Work pool for each work group */
- unsigned int num_samples,
- ccl_global float *buffer)
+ int start_sample,
+ int end_sample,
+ int sx,
+ int sy,
+ int sw,
+ int sh,
+ int offset,
+ int stride,
+ ccl_global int *Queue_index, /* Tracks the number of elements in queues */
+ int queuesize, /* size (capacity) of the queue */
+ ccl_global char *
+ use_queues_flag, /* flag to decide if scene-intersect kernel should use queues to fetch ray index */
+ ccl_global unsigned int *work_pools, /* Work pool for each work group */
+ unsigned int num_samples,
+ ccl_global float *buffer)
{
#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, data_init);
+ STUB_ASSERT(KERNEL_ARCH, data_init);
#else
-#ifdef __KERNEL_OPENCL__
- kg->data = data;
-#endif
+# ifdef __KERNEL_OPENCL__
+ kg->data = data;
+# endif
- kernel_split_params.tile.x = sx;
- kernel_split_params.tile.y = sy;
- kernel_split_params.tile.w = sw;
- kernel_split_params.tile.h = sh;
+ kernel_split_params.tile.x = sx;
+ kernel_split_params.tile.y = sy;
+ kernel_split_params.tile.w = sw;
+ kernel_split_params.tile.h = sh;
- kernel_split_params.tile.start_sample = start_sample;
- kernel_split_params.tile.num_samples = num_samples;
+ kernel_split_params.tile.start_sample = start_sample;
+ kernel_split_params.tile.num_samples = num_samples;
- kernel_split_params.tile.offset = offset;
- kernel_split_params.tile.stride = stride;
+ kernel_split_params.tile.offset = offset;
+ kernel_split_params.tile.stride = stride;
- kernel_split_params.tile.buffer = buffer;
+ kernel_split_params.tile.buffer = buffer;
- kernel_split_params.total_work_size = sw * sh * num_samples;
+ kernel_split_params.total_work_size = sw * sh * num_samples;
- kernel_split_params.work_pools = work_pools;
+ kernel_split_params.work_pools = work_pools;
- kernel_split_params.queue_index = Queue_index;
- kernel_split_params.queue_size = queuesize;
- kernel_split_params.use_queues_flag = use_queues_flag;
+ kernel_split_params.queue_index = Queue_index;
+ kernel_split_params.queue_size = queuesize;
+ kernel_split_params.use_queues_flag = use_queues_flag;
- split_data_init(kg, &kernel_split_state, num_elements, split_data_buffer, ray_state);
+ split_data_init(kg, &kernel_split_state, num_elements, split_data_buffer, ray_state);
-#ifdef __KERNEL_OPENCL__
- kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
- kernel_set_buffer_info(kg);
-#endif
+# ifdef __KERNEL_OPENCL__
+ kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
+ kernel_set_buffer_info(kg);
+# endif
+
+ int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+
+ /* Initialize queue data and queue index. */
+ if (thread_index < queuesize) {
+ for (int i = 0; i < NUM_QUEUES; i++) {
+ kernel_split_state.queue_data[i * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
+ }
+ }
+
+ if (thread_index == 0) {
+ for (int i = 0; i < NUM_QUEUES; i++) {
+ Queue_index[i] = 0;
+ }
- int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
-
- /* Initialize queue data and queue index. */
- if(thread_index < queuesize) {
- for(int i = 0; i < NUM_QUEUES; i++) {
- kernel_split_state.queue_data[i * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
- }
- }
-
- if(thread_index == 0) {
- for(int i = 0; i < NUM_QUEUES; i++) {
- Queue_index[i] = 0;
- }
-
- /* The scene-intersect kernel should not use the queues very first time.
- * since the queue would be empty.
- */
- *use_queues_flag = 0;
- }
-#endif /* KERENL_STUB */
+ /* The scene-intersect kernel should not use the queues very first time.
+ * since the queue would be empty.
+ */
+ *use_queues_flag = 0;
+ }
+#endif /* KERENL_STUB */
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_direct_lighting.h b/intern/cycles/kernel/split/kernel_direct_lighting.h
index ca79602c565..b2ca59d60cc 100644
--- a/intern/cycles/kernel/split/kernel_direct_lighting.h
+++ b/intern/cycles/kernel/split/kernel_direct_lighting.h
@@ -43,116 +43,111 @@ CCL_NAMESPACE_BEGIN
ccl_device void kernel_direct_lighting(KernelGlobals *kg,
ccl_local_param unsigned int *local_queue_atomics)
{
- if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
- *local_queue_atomics = 0;
- }
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
-
- char enqueue_flag = 0;
- int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
- ray_index = get_ray_index(kg, ray_index,
- QUEUE_ACTIVE_AND_REGENERATED_RAYS,
- kernel_split_state.queue_data,
- kernel_split_params.queue_size,
- 0);
-
- if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) {
- ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
- ShaderData *sd = kernel_split_sd(sd, ray_index);
-
- /* direct lighting */
+ if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+ *local_queue_atomics = 0;
+ }
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+ char enqueue_flag = 0;
+ int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ ray_index = get_ray_index(kg,
+ ray_index,
+ QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 0);
+
+ if (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) {
+ ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+ ShaderData *sd = kernel_split_sd(sd, ray_index);
+
+ /* direct lighting */
#ifdef __EMISSION__
- bool flag = (kernel_data.integrator.use_direct_light &&
- (sd->flag & SD_BSDF_HAS_EVAL));
+ bool flag = (kernel_data.integrator.use_direct_light && (sd->flag & SD_BSDF_HAS_EVAL));
# ifdef __BRANCHED_PATH__
- if(flag && kernel_data.integrator.branched) {
- flag = false;
- enqueue_flag = 1;
- }
-# endif /* __BRANCHED_PATH__ */
+ if (flag && kernel_data.integrator.branched) {
+ flag = false;
+ enqueue_flag = 1;
+ }
+# endif /* __BRANCHED_PATH__ */
# ifdef __SHADOW_TRICKS__
- if(flag && state->flag & PATH_RAY_SHADOW_CATCHER) {
- flag = false;
- enqueue_flag = 1;
- }
-# endif /* __SHADOW_TRICKS__ */
-
- if(flag) {
- /* Sample illumination from lights to find path contribution. */
- float light_u, light_v;
- path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v);
- float terminate = path_state_rng_light_termination(kg, state);
-
- LightSample ls;
- if(light_sample(kg,
- light_u, light_v,
- sd->time,
- sd->P,
- state->bounce,
- &ls)) {
-
- Ray light_ray;
- light_ray.time = sd->time;
-
- BsdfEval L_light;
- bool is_lamp;
- if(direct_emission(kg,
- sd,
- AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]),
- &ls,
- state,
- &light_ray,
- &L_light,
- &is_lamp,
- terminate))
- {
- /* Write intermediate data to global memory to access from
- * the next kernel.
- */
- kernel_split_state.light_ray[ray_index] = light_ray;
- kernel_split_state.bsdf_eval[ray_index] = L_light;
- kernel_split_state.is_lamp[ray_index] = is_lamp;
- /* Mark ray state for next shadow kernel. */
- enqueue_flag = 1;
- }
- }
- }
-#endif /* __EMISSION__ */
- }
+ if (flag && state->flag & PATH_RAY_SHADOW_CATCHER) {
+ flag = false;
+ enqueue_flag = 1;
+ }
+# endif /* __SHADOW_TRICKS__ */
+
+ if (flag) {
+ /* Sample illumination from lights to find path contribution. */
+ float light_u, light_v;
+ path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v);
+ float terminate = path_state_rng_light_termination(kg, state);
+
+ LightSample ls;
+ if (light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) {
+
+ Ray light_ray;
+ light_ray.time = sd->time;
+
+ BsdfEval L_light;
+ bool is_lamp;
+ if (direct_emission(kg,
+ sd,
+ AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]),
+ &ls,
+ state,
+ &light_ray,
+ &L_light,
+ &is_lamp,
+ terminate)) {
+ /* Write intermediate data to global memory to access from
+ * the next kernel.
+ */
+ kernel_split_state.light_ray[ray_index] = light_ray;
+ kernel_split_state.bsdf_eval[ray_index] = L_light;
+ kernel_split_state.is_lamp[ray_index] = is_lamp;
+ /* Mark ray state for next shadow kernel. */
+ enqueue_flag = 1;
+ }
+ }
+ }
+#endif /* __EMISSION__ */
+ }
#ifdef __EMISSION__
- /* Enqueue RAY_SHADOW_RAY_CAST_DL rays. */
- enqueue_ray_index_local(ray_index,
- QUEUE_SHADOW_RAY_CAST_DL_RAYS,
- enqueue_flag,
- kernel_split_params.queue_size,
- local_queue_atomics,
- kernel_split_state.queue_data,
- kernel_split_params.queue_index);
+ /* Enqueue RAY_SHADOW_RAY_CAST_DL rays. */
+ enqueue_ray_index_local(ray_index,
+ QUEUE_SHADOW_RAY_CAST_DL_RAYS,
+ enqueue_flag,
+ kernel_split_params.queue_size,
+ local_queue_atomics,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_index);
#endif
#ifdef __BRANCHED_PATH__
- /* Enqueue RAY_LIGHT_INDIRECT_NEXT_ITER rays
- * this is the last kernel before next_iteration_setup that uses local atomics so we do this here
- */
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
- if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
- *local_queue_atomics = 0;
- }
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
-
- ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
- enqueue_ray_index_local(ray_index,
- QUEUE_LIGHT_INDIRECT_ITER,
- IS_STATE(kernel_split_state.ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER),
- kernel_split_params.queue_size,
- local_queue_atomics,
- kernel_split_state.queue_data,
- kernel_split_params.queue_index);
-
-#endif /* __BRANCHED_PATH__ */
+ /* Enqueue RAY_LIGHT_INDIRECT_NEXT_ITER rays
+ * this is the last kernel before next_iteration_setup that uses local atomics so we do this here
+ */
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+ *local_queue_atomics = 0;
+ }
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+ ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ enqueue_ray_index_local(
+ ray_index,
+ QUEUE_LIGHT_INDIRECT_ITER,
+ IS_STATE(kernel_split_state.ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER),
+ kernel_split_params.queue_size,
+ local_queue_atomics,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_index);
+
+#endif /* __BRANCHED_PATH__ */
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_do_volume.h b/intern/cycles/kernel/split/kernel_do_volume.h
index fb5bd3d48dd..45b839db05f 100644
--- a/intern/cycles/kernel/split/kernel_do_volume.h
+++ b/intern/cycles/kernel/split/kernel_do_volume.h
@@ -18,203 +18,210 @@ CCL_NAMESPACE_BEGIN
#if defined(__BRANCHED_PATH__) && defined(__VOLUME__)
-ccl_device_inline void kernel_split_branched_path_volume_indirect_light_init(KernelGlobals *kg, int ray_index)
+ccl_device_inline void kernel_split_branched_path_volume_indirect_light_init(KernelGlobals *kg,
+ int ray_index)
{
- kernel_split_branched_path_indirect_loop_init(kg, ray_index);
+ kernel_split_branched_path_indirect_loop_init(kg, ray_index);
- ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_VOLUME_INDIRECT);
+ ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_VOLUME_INDIRECT);
}
-ccl_device_noinline bool kernel_split_branched_path_volume_indirect_light_iter(KernelGlobals *kg, int ray_index)
+ccl_device_noinline bool kernel_split_branched_path_volume_indirect_light_iter(KernelGlobals *kg,
+ int ray_index)
{
- SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
+ SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
- ShaderData *sd = kernel_split_sd(sd, ray_index);
- PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
- ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
+ ShaderData *sd = kernel_split_sd(sd, ray_index);
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
- /* GPU: no decoupled ray marching, scatter probalistically */
- int num_samples = kernel_data.integrator.volume_samples;
- float num_samples_inv = 1.0f/num_samples;
+ /* GPU: no decoupled ray marching, scatter probalistically */
+ int num_samples = kernel_data.integrator.volume_samples;
+ float num_samples_inv = 1.0f / num_samples;
- Ray volume_ray = branched_state->ray;
- volume_ray.t = (!IS_STATE(&branched_state->ray_state, 0, RAY_HIT_BACKGROUND)) ? branched_state->isect.t : FLT_MAX;
+ Ray volume_ray = branched_state->ray;
+ volume_ray.t = (!IS_STATE(&branched_state->ray_state, 0, RAY_HIT_BACKGROUND)) ?
+ branched_state->isect.t :
+ FLT_MAX;
- bool heterogeneous = volume_stack_is_heterogeneous(kg, branched_state->path_state.volume_stack);
+ bool heterogeneous = volume_stack_is_heterogeneous(kg, branched_state->path_state.volume_stack);
- for(int j = branched_state->next_sample; j < num_samples; j++) {
- ccl_global PathState *ps = &kernel_split_state.path_state[ray_index];
- *ps = branched_state->path_state;
+ for (int j = branched_state->next_sample; j < num_samples; j++) {
+ ccl_global PathState *ps = &kernel_split_state.path_state[ray_index];
+ *ps = branched_state->path_state;
- ccl_global Ray *pray = &kernel_split_state.ray[ray_index];
- *pray = branched_state->ray;
+ ccl_global Ray *pray = &kernel_split_state.ray[ray_index];
+ *pray = branched_state->ray;
- ccl_global float3 *tp = &kernel_split_state.throughput[ray_index];
- *tp = branched_state->throughput * num_samples_inv;
+ ccl_global float3 *tp = &kernel_split_state.throughput[ray_index];
+ *tp = branched_state->throughput * num_samples_inv;
- /* branch RNG state */
- path_state_branch(ps, j, num_samples);
+ /* branch RNG state */
+ path_state_branch(ps, j, num_samples);
- /* integrate along volume segment with distance sampling */
- VolumeIntegrateResult result = kernel_volume_integrate(
- kg, ps, sd, &volume_ray, L, tp, heterogeneous);
+ /* integrate along volume segment with distance sampling */
+ VolumeIntegrateResult result = kernel_volume_integrate(
+ kg, ps, sd, &volume_ray, L, tp, heterogeneous);
# ifdef __VOLUME_SCATTER__
- if(result == VOLUME_PATH_SCATTERED) {
- /* direct lighting */
- kernel_path_volume_connect_light(kg, sd, emission_sd, *tp, &branched_state->path_state, L);
-
- /* indirect light bounce */
- if(!kernel_path_volume_bounce(kg, sd, tp, ps, &L->state, pray)) {
- continue;
- }
-
- /* start the indirect path */
- branched_state->next_closure = 0;
- branched_state->next_sample = j+1;
-
- /* Attempting to share too many samples is slow for volumes as it causes us to
- * loop here more and have many calls to kernel_volume_integrate which evaluates
- * shaders. The many expensive shader evaluations cause the work load to become
- * unbalanced and many threads to become idle in this kernel. Limiting the
- * number of shared samples here helps quite a lot.
- */
- if(branched_state->shared_sample_count < 2) {
- if(kernel_split_branched_indirect_start_shared(kg, ray_index)) {
- continue;
- }
- }
-
- return true;
- }
+ if (result == VOLUME_PATH_SCATTERED) {
+ /* direct lighting */
+ kernel_path_volume_connect_light(kg, sd, emission_sd, *tp, &branched_state->path_state, L);
+
+ /* indirect light bounce */
+ if (!kernel_path_volume_bounce(kg, sd, tp, ps, &L->state, pray)) {
+ continue;
+ }
+
+ /* start the indirect path */
+ branched_state->next_closure = 0;
+ branched_state->next_sample = j + 1;
+
+ /* Attempting to share too many samples is slow for volumes as it causes us to
+ * loop here more and have many calls to kernel_volume_integrate which evaluates
+ * shaders. The many expensive shader evaluations cause the work load to become
+ * unbalanced and many threads to become idle in this kernel. Limiting the
+ * number of shared samples here helps quite a lot.
+ */
+ if (branched_state->shared_sample_count < 2) {
+ if (kernel_split_branched_indirect_start_shared(kg, ray_index)) {
+ continue;
+ }
+ }
+
+ return true;
+ }
# endif
- }
+ }
- branched_state->next_sample = num_samples;
+ branched_state->next_sample = num_samples;
- branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0);
- if(branched_state->waiting_on_shared_samples) {
- return true;
- }
+ branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0);
+ if (branched_state->waiting_on_shared_samples) {
+ return true;
+ }
- kernel_split_branched_path_indirect_loop_end(kg, ray_index);
+ kernel_split_branched_path_indirect_loop_end(kg, ray_index);
- /* todo: avoid this calculation using decoupled ray marching */
- float3 throughput = kernel_split_state.throughput[ray_index];
- kernel_volume_shadow(kg, emission_sd, &kernel_split_state.path_state[ray_index], &volume_ray, &throughput);
- kernel_split_state.throughput[ray_index] = throughput;
+ /* todo: avoid this calculation using decoupled ray marching */
+ float3 throughput = kernel_split_state.throughput[ray_index];
+ kernel_volume_shadow(
+ kg, emission_sd, &kernel_split_state.path_state[ray_index], &volume_ray, &throughput);
+ kernel_split_state.throughput[ray_index] = throughput;
- return false;
+ return false;
}
-#endif /* __BRANCHED_PATH__ && __VOLUME__ */
+#endif /* __BRANCHED_PATH__ && __VOLUME__ */
ccl_device void kernel_do_volume(KernelGlobals *kg)
{
#ifdef __VOLUME__
- /* We will empty this queue in this kernel. */
- if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
- kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
+ /* We will empty this queue in this kernel. */
+ if (ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
+ kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
# ifdef __BRANCHED_PATH__
- kernel_split_params.queue_index[QUEUE_VOLUME_INDIRECT_ITER] = 0;
-# endif /* __BRANCHED_PATH__ */
- }
-
- int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
-
- if(*kernel_split_params.use_queues_flag) {
- ray_index = get_ray_index(kg, ray_index,
- QUEUE_ACTIVE_AND_REGENERATED_RAYS,
- kernel_split_state.queue_data,
- kernel_split_params.queue_size,
- 1);
- }
-
- ccl_global char *ray_state = kernel_split_state.ray_state;
-
- PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
- ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
-
- if(IS_STATE(ray_state, ray_index, RAY_ACTIVE) ||
- IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
- ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
- ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
- ccl_global Intersection *isect = &kernel_split_state.isect[ray_index];
- ShaderData *sd = kernel_split_sd(sd, ray_index);
- ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
-
- bool hit = ! IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND);
-
- /* Sanitize volume stack. */
- if(!hit) {
- kernel_volume_clean_stack(kg, state->volume_stack);
- }
- /* volume attenuation, emission, scatter */
- if(state->volume_stack[0].shader != SHADER_NONE) {
- Ray volume_ray = *ray;
- volume_ray.t = (hit)? isect->t: FLT_MAX;
+ kernel_split_params.queue_index[QUEUE_VOLUME_INDIRECT_ITER] = 0;
+# endif /* __BRANCHED_PATH__ */
+ }
+
+ int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+
+ if (*kernel_split_params.use_queues_flag) {
+ ray_index = get_ray_index(kg,
+ ray_index,
+ QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 1);
+ }
+
+ ccl_global char *ray_state = kernel_split_state.ray_state;
+
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+
+ if (IS_STATE(ray_state, ray_index, RAY_ACTIVE) ||
+ IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
+ ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
+ ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+ ccl_global Intersection *isect = &kernel_split_state.isect[ray_index];
+ ShaderData *sd = kernel_split_sd(sd, ray_index);
+ ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
+
+ bool hit = !IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND);
+
+ /* Sanitize volume stack. */
+ if (!hit) {
+ kernel_volume_clean_stack(kg, state->volume_stack);
+ }
+ /* volume attenuation, emission, scatter */
+ if (state->volume_stack[0].shader != SHADER_NONE) {
+ Ray volume_ray = *ray;
+ volume_ray.t = (hit) ? isect->t : FLT_MAX;
# ifdef __BRANCHED_PATH__
- if(!kernel_data.integrator.branched || IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
-# endif /* __BRANCHED_PATH__ */
- bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
+ if (!kernel_data.integrator.branched ||
+ IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
+# endif /* __BRANCHED_PATH__ */
+ bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
- {
- /* integrate along volume segment with distance sampling */
- VolumeIntegrateResult result = kernel_volume_integrate(
- kg, state, sd, &volume_ray, L, throughput, heterogeneous);
+ {
+ /* integrate along volume segment with distance sampling */
+ VolumeIntegrateResult result = kernel_volume_integrate(
+ kg, state, sd, &volume_ray, L, throughput, heterogeneous);
# ifdef __VOLUME_SCATTER__
- if(result == VOLUME_PATH_SCATTERED) {
- /* direct lighting */
- kernel_path_volume_connect_light(kg, sd, emission_sd, *throughput, state, L);
-
- /* indirect light bounce */
- if(kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray)) {
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
- }
- else {
- kernel_split_path_end(kg, ray_index);
- }
- }
-# endif /* __VOLUME_SCATTER__ */
- }
+ if (result == VOLUME_PATH_SCATTERED) {
+ /* direct lighting */
+ kernel_path_volume_connect_light(kg, sd, emission_sd, *throughput, state, L);
+
+ /* indirect light bounce */
+ if (kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray)) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+ }
+ else {
+ kernel_split_path_end(kg, ray_index);
+ }
+ }
+# endif /* __VOLUME_SCATTER__ */
+ }
# ifdef __BRANCHED_PATH__
- }
- else {
- kernel_split_branched_path_volume_indirect_light_init(kg, ray_index);
-
- if(kernel_split_branched_path_volume_indirect_light_iter(kg, ray_index)) {
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
- }
- }
-# endif /* __BRANCHED_PATH__ */
- }
- }
+ }
+ else {
+ kernel_split_branched_path_volume_indirect_light_init(kg, ray_index);
+
+ if (kernel_split_branched_path_volume_indirect_light_iter(kg, ray_index)) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+ }
+ }
+# endif /* __BRANCHED_PATH__ */
+ }
+ }
# ifdef __BRANCHED_PATH__
- /* iter loop */
- ray_index = get_ray_index(kg, ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0),
- QUEUE_VOLUME_INDIRECT_ITER,
- kernel_split_state.queue_data,
- kernel_split_params.queue_size,
- 1);
-
- if(IS_STATE(ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER)) {
- /* for render passes, sum and reset indirect light pass variables
- * for the next samples */
- path_radiance_sum_indirect(&kernel_split_state.path_radiance[ray_index]);
- path_radiance_reset_indirect(&kernel_split_state.path_radiance[ray_index]);
-
- if(kernel_split_branched_path_volume_indirect_light_iter(kg, ray_index)) {
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
- }
- }
-# endif /* __BRANCHED_PATH__ */
-
-#endif /* __VOLUME__ */
+ /* iter loop */
+ ray_index = get_ray_index(kg,
+ ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0),
+ QUEUE_VOLUME_INDIRECT_ITER,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 1);
+
+ if (IS_STATE(ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER)) {
+ /* for render passes, sum and reset indirect light pass variables
+ * for the next samples */
+ path_radiance_sum_indirect(&kernel_split_state.path_radiance[ray_index]);
+ path_radiance_reset_indirect(&kernel_split_state.path_radiance[ray_index]);
+
+ if (kernel_split_branched_path_volume_indirect_light_iter(kg, ray_index)) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+ }
+ }
+# endif /* __BRANCHED_PATH__ */
+
+#endif /* __VOLUME__ */
}
-
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_enqueue_inactive.h b/intern/cycles/kernel/split/kernel_enqueue_inactive.h
index 496355bbc3a..31d2daef616 100644
--- a/intern/cycles/kernel/split/kernel_enqueue_inactive.h
+++ b/intern/cycles/kernel/split/kernel_enqueue_inactive.h
@@ -20,27 +20,27 @@ ccl_device void kernel_enqueue_inactive(KernelGlobals *kg,
ccl_local_param unsigned int *local_queue_atomics)
{
#ifdef __BRANCHED_PATH__
- /* Enqeueue RAY_INACTIVE rays into QUEUE_INACTIVE_RAYS queue. */
- if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
- *local_queue_atomics = 0;
- }
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ /* Enqeueue RAY_INACTIVE rays into QUEUE_INACTIVE_RAYS queue. */
+ if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+ *local_queue_atomics = 0;
+ }
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
- int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
- char enqueue_flag = 0;
- if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_INACTIVE)) {
- enqueue_flag = 1;
- }
+ char enqueue_flag = 0;
+ if (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_INACTIVE)) {
+ enqueue_flag = 1;
+ }
- enqueue_ray_index_local(ray_index,
- QUEUE_INACTIVE_RAYS,
- enqueue_flag,
- kernel_split_params.queue_size,
- local_queue_atomics,
- kernel_split_state.queue_data,
- kernel_split_params.queue_index);
-#endif /* __BRANCHED_PATH__ */
+ enqueue_ray_index_local(ray_index,
+ QUEUE_INACTIVE_RAYS,
+ enqueue_flag,
+ kernel_split_params.queue_size,
+ local_queue_atomics,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_index);
+#endif /* __BRANCHED_PATH__ */
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
index f14eecec2f2..63bc5a8e0ce 100644
--- a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
+++ b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
@@ -54,120 +54,112 @@ CCL_NAMESPACE_BEGIN
*/
ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
- KernelGlobals *kg,
- ccl_local_param BackgroundAOLocals *locals)
+ KernelGlobals *kg, ccl_local_param BackgroundAOLocals *locals)
{
- if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
- locals->queue_atomics_bg = 0;
- locals->queue_atomics_ao = 0;
- }
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+ locals->queue_atomics_bg = 0;
+ locals->queue_atomics_ao = 0;
+ }
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
#ifdef __AO__
- char enqueue_flag = 0;
+ char enqueue_flag = 0;
#endif
- int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
- ray_index = get_ray_index(kg, ray_index,
- QUEUE_ACTIVE_AND_REGENERATED_RAYS,
- kernel_split_state.queue_data,
- kernel_split_params.queue_size,
- 0);
+ int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ ray_index = get_ray_index(kg,
+ ray_index,
+ QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 0);
#ifdef __COMPUTE_DEVICE_GPU__
- /* If we are executing on a GPU device, we exit all threads that are not
- * required.
- *
- * If we are executing on a CPU device, then we need to keep all threads
- * active since we have barrier() calls later in the kernel. CPU devices,
- * expect all threads to execute barrier statement.
- */
- if(ray_index == QUEUE_EMPTY_SLOT) {
- return;
- }
-#endif /* __COMPUTE_DEVICE_GPU__ */
+ /* If we are executing on a GPU device, we exit all threads that are not
+ * required.
+ *
+ * If we are executing on a CPU device, then we need to keep all threads
+ * active since we have barrier() calls later in the kernel. CPU devices,
+ * expect all threads to execute barrier statement.
+ */
+ if (ray_index == QUEUE_EMPTY_SLOT) {
+ return;
+ }
+#endif /* __COMPUTE_DEVICE_GPU__ */
#ifndef __COMPUTE_DEVICE_GPU__
- if(ray_index != QUEUE_EMPTY_SLOT) {
+ if (ray_index != QUEUE_EMPTY_SLOT) {
#endif
- ccl_global PathState *state = 0x0;
- float3 throughput;
-
- ccl_global char *ray_state = kernel_split_state.ray_state;
- ShaderData *sd = kernel_split_sd(sd, ray_index);
-
- if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
- uint buffer_offset = kernel_split_state.buffer_offset[ray_index];
- ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
-
- ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
- ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
- PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
-
- throughput = kernel_split_state.throughput[ray_index];
- state = &kernel_split_state.path_state[ray_index];
-
- if(!kernel_path_shader_apply(kg,
- sd,
- state,
- ray,
- throughput,
- emission_sd,
- L,
- buffer))
- {
- kernel_split_path_end(kg, ray_index);
- }
- }
-
- if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
- /* Path termination. this is a strange place to put the termination, it's
- * mainly due to the mixed in MIS that we use. gives too many unneeded
- * shader evaluations, only need emission if we are going to terminate.
- */
- float probability = path_state_continuation_probability(kg, state, throughput);
-
- if(probability == 0.0f) {
- kernel_split_path_end(kg, ray_index);
- }
- else if(probability < 1.0f) {
- float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE);
- if(terminate >= probability) {
- kernel_split_path_end(kg, ray_index);
- }
- else {
- kernel_split_state.throughput[ray_index] = throughput/probability;
- }
- }
-
- if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
- PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
- kernel_update_denoising_features(kg, sd, state, L);
- }
- }
+ ccl_global PathState *state = 0x0;
+ float3 throughput;
+
+ ccl_global char *ray_state = kernel_split_state.ray_state;
+ ShaderData *sd = kernel_split_sd(sd, ray_index);
+
+ if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+ uint buffer_offset = kernel_split_state.buffer_offset[ray_index];
+ ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
+
+ ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+ ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+
+ throughput = kernel_split_state.throughput[ray_index];
+ state = &kernel_split_state.path_state[ray_index];
+
+ if (!kernel_path_shader_apply(kg, sd, state, ray, throughput, emission_sd, L, buffer)) {
+ kernel_split_path_end(kg, ray_index);
+ }
+ }
+
+ if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+ /* Path termination. this is a strange place to put the termination, it's
+ * mainly due to the mixed in MIS that we use. gives too many unneeded
+ * shader evaluations, only need emission if we are going to terminate.
+ */
+ float probability = path_state_continuation_probability(kg, state, throughput);
+
+ if (probability == 0.0f) {
+ kernel_split_path_end(kg, ray_index);
+ }
+ else if (probability < 1.0f) {
+ float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE);
+ if (terminate >= probability) {
+ kernel_split_path_end(kg, ray_index);
+ }
+ else {
+ kernel_split_state.throughput[ray_index] = throughput / probability;
+ }
+ }
+
+ if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ kernel_update_denoising_features(kg, sd, state, L);
+ }
+ }
#ifdef __AO__
- if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
- /* ambient occlusion */
- if(kernel_data.integrator.use_ambient_occlusion) {
- enqueue_flag = 1;
- }
- }
-#endif /* __AO__ */
+ if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+ /* ambient occlusion */
+ if (kernel_data.integrator.use_ambient_occlusion) {
+ enqueue_flag = 1;
+ }
+ }
+#endif /* __AO__ */
#ifndef __COMPUTE_DEVICE_GPU__
- }
+ }
#endif
#ifdef __AO__
- /* Enqueue to-shadow-ray-cast rays. */
- enqueue_ray_index_local(ray_index,
- QUEUE_SHADOW_RAY_CAST_AO_RAYS,
- enqueue_flag,
- kernel_split_params.queue_size,
- &locals->queue_atomics_ao,
- kernel_split_state.queue_data,
- kernel_split_params.queue_index);
+ /* Enqueue to-shadow-ray-cast rays. */
+ enqueue_ray_index_local(ray_index,
+ QUEUE_SHADOW_RAY_CAST_AO_RAYS,
+ enqueue_flag,
+ kernel_split_params.queue_size,
+ &locals->queue_atomics_ao,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_index);
#endif
}
diff --git a/intern/cycles/kernel/split/kernel_indirect_background.h b/intern/cycles/kernel/split/kernel_indirect_background.h
index 4cf88a02590..b1c65f61e2c 100644
--- a/intern/cycles/kernel/split/kernel_indirect_background.h
+++ b/intern/cycles/kernel/split/kernel_indirect_background.h
@@ -18,48 +18,50 @@ CCL_NAMESPACE_BEGIN
ccl_device void kernel_indirect_background(KernelGlobals *kg)
{
- ccl_global char *ray_state = kernel_split_state.ray_state;
+ ccl_global char *ray_state = kernel_split_state.ray_state;
- int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
- int ray_index;
+ int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ int ray_index;
- if(kernel_data.integrator.ao_bounces != INT_MAX) {
- ray_index = get_ray_index(kg, thread_index,
- QUEUE_ACTIVE_AND_REGENERATED_RAYS,
- kernel_split_state.queue_data,
- kernel_split_params.queue_size,
- 0);
+ if (kernel_data.integrator.ao_bounces != INT_MAX) {
+ ray_index = get_ray_index(kg,
+ thread_index,
+ QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 0);
- if(ray_index != QUEUE_EMPTY_SLOT) {
- if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
- ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
- if(path_state_ao_bounce(kg, state)) {
- kernel_split_path_end(kg, ray_index);
- }
- }
- }
- }
+ if (ray_index != QUEUE_EMPTY_SLOT) {
+ if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+ ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+ if (path_state_ao_bounce(kg, state)) {
+ kernel_split_path_end(kg, ray_index);
+ }
+ }
+ }
+ }
- ray_index = get_ray_index(kg, thread_index,
- QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
- kernel_split_state.queue_data,
- kernel_split_params.queue_size,
- 0);
+ ray_index = get_ray_index(kg,
+ thread_index,
+ QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 0);
- if(ray_index == QUEUE_EMPTY_SLOT) {
- return;
- }
+ if (ray_index == QUEUE_EMPTY_SLOT) {
+ return;
+ }
- if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
- ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
- PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
- ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
- float3 throughput = kernel_split_state.throughput[ray_index];
- ShaderData *sd = kernel_split_sd(sd, ray_index);
+ if (IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
+ ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+ float3 throughput = kernel_split_state.throughput[ray_index];
+ ShaderData *sd = kernel_split_sd(sd, ray_index);
- kernel_path_background(kg, state, ray, throughput, sd, L);
- kernel_split_path_end(kg, ray_index);
- }
+ kernel_path_background(kg, state, ray, throughput, sd, L);
+ kernel_split_path_end(kg, ray_index);
+ }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_indirect_subsurface.h b/intern/cycles/kernel/split/kernel_indirect_subsurface.h
index 236c94e983c..3f48f8d6f56 100644
--- a/intern/cycles/kernel/split/kernel_indirect_subsurface.h
+++ b/intern/cycles/kernel/split/kernel_indirect_subsurface.h
@@ -18,53 +18,50 @@ CCL_NAMESPACE_BEGIN
ccl_device void kernel_indirect_subsurface(KernelGlobals *kg)
{
- int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
- if(thread_index == 0) {
- /* We will empty both queues in this kernel. */
- kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
- kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0;
- }
+ int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ if (thread_index == 0) {
+ /* We will empty both queues in this kernel. */
+ kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
+ kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0;
+ }
- int ray_index;
- get_ray_index(kg, thread_index,
- QUEUE_ACTIVE_AND_REGENERATED_RAYS,
- kernel_split_state.queue_data,
- kernel_split_params.queue_size,
- 1);
- ray_index = get_ray_index(kg, thread_index,
- QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
- kernel_split_state.queue_data,
- kernel_split_params.queue_size,
- 1);
+ int ray_index;
+ get_ray_index(kg,
+ thread_index,
+ QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 1);
+ ray_index = get_ray_index(kg,
+ thread_index,
+ QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 1);
#ifdef __SUBSURFACE__
- if(ray_index == QUEUE_EMPTY_SLOT) {
- return;
- }
+ if (ray_index == QUEUE_EMPTY_SLOT) {
+ return;
+ }
- ccl_global char *ray_state = kernel_split_state.ray_state;
- ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
- PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
- ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
- ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
+ ccl_global char *ray_state = kernel_split_state.ray_state;
+ ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+ ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
- if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) {
- ccl_addr_space SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
+ if (IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) {
+ ccl_addr_space SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
- /* Trace indirect subsurface rays by restarting the loop. this uses less
- * stack memory than invoking kernel_path_indirect.
- */
- if(ss_indirect->num_rays) {
- kernel_path_subsurface_setup_indirect(kg,
- ss_indirect,
- state,
- ray,
- L,
- throughput);
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
- }
- }
-#endif /* __SUBSURFACE__ */
+ /* Trace indirect subsurface rays by restarting the loop. this uses less
+ * stack memory than invoking kernel_path_indirect.
+ */
+ if (ss_indirect->num_rays) {
+ kernel_path_subsurface_setup_indirect(kg, ss_indirect, state, ray, L, throughput);
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+ }
+ }
+#endif /* __SUBSURFACE__ */
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_lamp_emission.h b/intern/cycles/kernel/split/kernel_lamp_emission.h
index 5b2c554b922..7ecb099208d 100644
--- a/intern/cycles/kernel/split/kernel_lamp_emission.h
+++ b/intern/cycles/kernel/split/kernel_lamp_emission.h
@@ -23,45 +23,45 @@ CCL_NAMESPACE_BEGIN
ccl_device void kernel_lamp_emission(KernelGlobals *kg)
{
#ifndef __VOLUME__
- /* We will empty this queue in this kernel. */
- if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
- kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
- }
+ /* We will empty this queue in this kernel. */
+ if (ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
+ kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
+ }
#endif
- /* Fetch use_queues_flag. */
- char local_use_queues_flag = *kernel_split_params.use_queues_flag;
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ /* Fetch use_queues_flag. */
+ char local_use_queues_flag = *kernel_split_params.use_queues_flag;
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
- int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
- if(local_use_queues_flag) {
- ray_index = get_ray_index(kg, ray_index,
- QUEUE_ACTIVE_AND_REGENERATED_RAYS,
- kernel_split_state.queue_data,
- kernel_split_params.queue_size,
+ int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ if (local_use_queues_flag) {
+ ray_index = get_ray_index(kg,
+ ray_index,
+ QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
#ifndef __VOLUME__
- 1
+ 1
#else
- 0
+ 0
#endif
- );
- if(ray_index == QUEUE_EMPTY_SLOT) {
- return;
- }
- }
+ );
+ if (ray_index == QUEUE_EMPTY_SLOT) {
+ return;
+ }
+ }
- if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) ||
- IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND))
- {
- PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
- ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+ if (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) ||
+ IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND)) {
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
- float3 throughput = kernel_split_state.throughput[ray_index];
- Ray ray = kernel_split_state.ray[ray_index];
- ccl_global Intersection *isect = &kernel_split_state.isect[ray_index];
- ShaderData *sd = kernel_split_sd(sd, ray_index);
+ float3 throughput = kernel_split_state.throughput[ray_index];
+ Ray ray = kernel_split_state.ray[ray_index];
+ ccl_global Intersection *isect = &kernel_split_state.isect[ray_index];
+ ShaderData *sd = kernel_split_sd(sd, ray_index);
- kernel_path_lamp_emission(kg, state, &ray, throughput, isect, sd, L);
- }
+ kernel_path_lamp_emission(kg, state, &ray, throughput, isect, sd, L);
+ }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_next_iteration_setup.h b/intern/cycles/kernel/split/kernel_next_iteration_setup.h
index e388955f1af..781ce869374 100644
--- a/intern/cycles/kernel/split/kernel_next_iteration_setup.h
+++ b/intern/cycles/kernel/split/kernel_next_iteration_setup.h
@@ -48,217 +48,211 @@ CCL_NAMESPACE_BEGIN
#ifdef __BRANCHED_PATH__
ccl_device_inline void kernel_split_branched_indirect_light_init(KernelGlobals *kg, int ray_index)
{
- kernel_split_branched_path_indirect_loop_init(kg, ray_index);
+ kernel_split_branched_path_indirect_loop_init(kg, ray_index);
- ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_LIGHT_INDIRECT);
+ ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_LIGHT_INDIRECT);
}
ccl_device void kernel_split_branched_transparent_bounce(KernelGlobals *kg, int ray_index)
{
- ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
- ShaderData *sd = kernel_split_sd(sd, ray_index);
- ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
- ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+ ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
+ ShaderData *sd = kernel_split_sd(sd, ray_index);
+ ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+ ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
# ifdef __VOLUME__
- if(!(sd->flag & SD_HAS_ONLY_VOLUME)) {
+ if (!(sd->flag & SD_HAS_ONLY_VOLUME)) {
# endif
- /* continue in case of transparency */
- *throughput *= shader_bsdf_transparency(kg, sd);
+ /* continue in case of transparency */
+ *throughput *= shader_bsdf_transparency(kg, sd);
- if(is_zero(*throughput)) {
- kernel_split_path_end(kg, ray_index);
- return;
- }
+ if (is_zero(*throughput)) {
+ kernel_split_path_end(kg, ray_index);
+ return;
+ }
- /* Update Path State */
- path_state_next(kg, state, LABEL_TRANSPARENT);
+ /* Update Path State */
+ path_state_next(kg, state, LABEL_TRANSPARENT);
# ifdef __VOLUME__
- }
- else {
- if(!path_state_volume_next(kg, state)) {
- kernel_split_path_end(kg, ray_index);
- return;
- }
- }
+ }
+ else {
+ if (!path_state_volume_next(kg, state)) {
+ kernel_split_path_end(kg, ray_index);
+ return;
+ }
+ }
# endif
- ray->P = ray_offset(sd->P, -sd->Ng);
- ray->t -= sd->ray_length; /* clipping works through transparent */
+ ray->P = ray_offset(sd->P, -sd->Ng);
+ ray->t -= sd->ray_length; /* clipping works through transparent */
# ifdef __RAY_DIFFERENTIALS__
- ray->dP = sd->dP;
- ray->dD.dx = -sd->dI.dx;
- ray->dD.dy = -sd->dI.dy;
-# endif /* __RAY_DIFFERENTIALS__ */
+ ray->dP = sd->dP;
+ ray->dD.dx = -sd->dI.dx;
+ ray->dD.dy = -sd->dI.dy;
+# endif /* __RAY_DIFFERENTIALS__ */
# ifdef __VOLUME__
- /* enter/exit volume */
- kernel_volume_stack_enter_exit(kg, sd, state->volume_stack);
-# endif /* __VOLUME__ */
+ /* enter/exit volume */
+ kernel_volume_stack_enter_exit(kg, sd, state->volume_stack);
+# endif /* __VOLUME__ */
}
-#endif /* __BRANCHED_PATH__ */
+#endif /* __BRANCHED_PATH__ */
ccl_device void kernel_next_iteration_setup(KernelGlobals *kg,
ccl_local_param unsigned int *local_queue_atomics)
{
- if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
- *local_queue_atomics = 0;
- }
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
-
- if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
- /* If we are here, then it means that scene-intersect kernel
- * has already been executed atleast once. From the next time,
- * scene-intersect kernel may operate on queues to fetch ray index
- */
- *kernel_split_params.use_queues_flag = 1;
-
- /* Mark queue indices of QUEUE_SHADOW_RAY_CAST_AO_RAYS and
- * QUEUE_SHADOW_RAY_CAST_DL_RAYS queues that were made empty during the
- * previous kernel.
- */
- kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS] = 0;
- kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS] = 0;
- }
-
- int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
- ray_index = get_ray_index(kg, ray_index,
- QUEUE_ACTIVE_AND_REGENERATED_RAYS,
- kernel_split_state.queue_data,
- kernel_split_params.queue_size,
- 0);
-
- ccl_global char *ray_state = kernel_split_state.ray_state;
-
-# ifdef __VOLUME__
- /* Reactivate only volume rays here, most surface work was skipped. */
- if(IS_STATE(ray_state, ray_index, RAY_HAS_ONLY_VOLUME)) {
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_ACTIVE);
- }
-# endif
+ if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+ *local_queue_atomics = 0;
+ }
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+ if (ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
+ /* If we are here, then it means that scene-intersect kernel
+ * has already been executed atleast once. From the next time,
+ * scene-intersect kernel may operate on queues to fetch ray index
+ */
+ *kernel_split_params.use_queues_flag = 1;
+
+ /* Mark queue indices of QUEUE_SHADOW_RAY_CAST_AO_RAYS and
+ * QUEUE_SHADOW_RAY_CAST_DL_RAYS queues that were made empty during the
+ * previous kernel.
+ */
+ kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS] = 0;
+ kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS] = 0;
+ }
+
+ int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ ray_index = get_ray_index(kg,
+ ray_index,
+ QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 0);
+
+ ccl_global char *ray_state = kernel_split_state.ray_state;
+
+#ifdef __VOLUME__
+ /* Reactivate only volume rays here, most surface work was skipped. */
+ if (IS_STATE(ray_state, ray_index, RAY_HAS_ONLY_VOLUME)) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_ACTIVE);
+ }
+#endif
- bool active = IS_STATE(ray_state, ray_index, RAY_ACTIVE);
- if(active) {
- ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
- ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
- ShaderData *sd = kernel_split_sd(sd, ray_index);
- ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
- PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ bool active = IS_STATE(ray_state, ray_index, RAY_ACTIVE);
+ if (active) {
+ ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
+ ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+ ShaderData *sd = kernel_split_sd(sd, ray_index);
+ ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
#ifdef __BRANCHED_PATH__
- if(!kernel_data.integrator.branched || IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
+ if (!kernel_data.integrator.branched || IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
#endif
- /* Compute direct lighting and next bounce. */
- if(!kernel_path_surface_bounce(kg, sd, throughput, state, &L->state, ray)) {
- kernel_split_path_end(kg, ray_index);
- }
+ /* Compute direct lighting and next bounce. */
+ if (!kernel_path_surface_bounce(kg, sd, throughput, state, &L->state, ray)) {
+ kernel_split_path_end(kg, ray_index);
+ }
#ifdef __BRANCHED_PATH__
- }
- else if(sd->flag & SD_HAS_ONLY_VOLUME) {
- kernel_split_branched_transparent_bounce(kg, ray_index);
- }
- else {
- kernel_split_branched_indirect_light_init(kg, ray_index);
-
- if(kernel_split_branched_path_surface_indirect_light_iter(kg,
- ray_index,
- 1.0f,
- kernel_split_sd(branched_state_sd, ray_index),
- true,
- true))
- {
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
- }
- else {
- kernel_split_branched_path_indirect_loop_end(kg, ray_index);
- kernel_split_branched_transparent_bounce(kg, ray_index);
- }
- }
-#endif /* __BRANCHED_PATH__ */
- }
-
- /* Enqueue RAY_UPDATE_BUFFER rays. */
- enqueue_ray_index_local(ray_index,
- QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
- IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER) && active,
- kernel_split_params.queue_size,
- local_queue_atomics,
- kernel_split_state.queue_data,
- kernel_split_params.queue_index);
+ }
+ else if (sd->flag & SD_HAS_ONLY_VOLUME) {
+ kernel_split_branched_transparent_bounce(kg, ray_index);
+ }
+ else {
+ kernel_split_branched_indirect_light_init(kg, ray_index);
+
+ if (kernel_split_branched_path_surface_indirect_light_iter(
+ kg, ray_index, 1.0f, kernel_split_sd(branched_state_sd, ray_index), true, true)) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+ }
+ else {
+ kernel_split_branched_path_indirect_loop_end(kg, ray_index);
+ kernel_split_branched_transparent_bounce(kg, ray_index);
+ }
+ }
+#endif /* __BRANCHED_PATH__ */
+ }
+
+ /* Enqueue RAY_UPDATE_BUFFER rays. */
+ enqueue_ray_index_local(ray_index,
+ QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+ IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER) && active,
+ kernel_split_params.queue_size,
+ local_queue_atomics,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_index);
#ifdef __BRANCHED_PATH__
- /* iter loop */
- if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
- kernel_split_params.queue_index[QUEUE_LIGHT_INDIRECT_ITER] = 0;
- }
-
- ray_index = get_ray_index(kg, ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0),
- QUEUE_LIGHT_INDIRECT_ITER,
- kernel_split_state.queue_data,
- kernel_split_params.queue_size,
- 1);
-
- if(IS_STATE(ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER)) {
- /* for render passes, sum and reset indirect light pass variables
- * for the next samples */
- PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
-
- path_radiance_sum_indirect(L);
- path_radiance_reset_indirect(L);
-
- if(kernel_split_branched_path_surface_indirect_light_iter(kg,
- ray_index,
- 1.0f,
- kernel_split_sd(branched_state_sd, ray_index),
- true,
- true))
- {
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
- }
- else {
- kernel_split_branched_path_indirect_loop_end(kg, ray_index);
- kernel_split_branched_transparent_bounce(kg, ray_index);
- }
- }
+ /* iter loop */
+ if (ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
+ kernel_split_params.queue_index[QUEUE_LIGHT_INDIRECT_ITER] = 0;
+ }
+
+ ray_index = get_ray_index(kg,
+ ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0),
+ QUEUE_LIGHT_INDIRECT_ITER,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 1);
+
+ if (IS_STATE(ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER)) {
+ /* for render passes, sum and reset indirect light pass variables
+ * for the next samples */
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+
+ path_radiance_sum_indirect(L);
+ path_radiance_reset_indirect(L);
+
+ if (kernel_split_branched_path_surface_indirect_light_iter(
+ kg, ray_index, 1.0f, kernel_split_sd(branched_state_sd, ray_index), true, true)) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+ }
+ else {
+ kernel_split_branched_path_indirect_loop_end(kg, ray_index);
+ kernel_split_branched_transparent_bounce(kg, ray_index);
+ }
+ }
# ifdef __VOLUME__
- /* Enqueue RAY_VOLUME_INDIRECT_NEXT_ITER rays */
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
- if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
- *local_queue_atomics = 0;
- }
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
-
- ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
- enqueue_ray_index_local(ray_index,
- QUEUE_VOLUME_INDIRECT_ITER,
- IS_STATE(kernel_split_state.ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER),
- kernel_split_params.queue_size,
- local_queue_atomics,
- kernel_split_state.queue_data,
- kernel_split_params.queue_index);
-
-# endif /* __VOLUME__ */
+ /* Enqueue RAY_VOLUME_INDIRECT_NEXT_ITER rays */
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+ *local_queue_atomics = 0;
+ }
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+ ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ enqueue_ray_index_local(
+ ray_index,
+ QUEUE_VOLUME_INDIRECT_ITER,
+ IS_STATE(kernel_split_state.ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER),
+ kernel_split_params.queue_size,
+ local_queue_atomics,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_index);
+
+# endif /* __VOLUME__ */
# ifdef __SUBSURFACE__
- /* Enqueue RAY_SUBSURFACE_INDIRECT_NEXT_ITER rays */
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
- if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
- *local_queue_atomics = 0;
- }
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
-
- ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
- enqueue_ray_index_local(ray_index,
- QUEUE_SUBSURFACE_INDIRECT_ITER,
- IS_STATE(kernel_split_state.ray_state, ray_index, RAY_SUBSURFACE_INDIRECT_NEXT_ITER),
- kernel_split_params.queue_size,
- local_queue_atomics,
- kernel_split_state.queue_data,
- kernel_split_params.queue_index);
-# endif /* __SUBSURFACE__ */
-#endif /* __BRANCHED_PATH__ */
+ /* Enqueue RAY_SUBSURFACE_INDIRECT_NEXT_ITER rays */
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+ *local_queue_atomics = 0;
+ }
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+ ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ enqueue_ray_index_local(
+ ray_index,
+ QUEUE_SUBSURFACE_INDIRECT_ITER,
+ IS_STATE(kernel_split_state.ray_state, ray_index, RAY_SUBSURFACE_INDIRECT_NEXT_ITER),
+ kernel_split_params.queue_size,
+ local_queue_atomics,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_index);
+# endif /* __SUBSURFACE__ */
+#endif /* __BRANCHED_PATH__ */
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_path_init.h b/intern/cycles/kernel/split/kernel_path_init.h
index fdd54225b07..3faa3208341 100644
--- a/intern/cycles/kernel/split/kernel_path_init.h
+++ b/intern/cycles/kernel/split/kernel_path_init.h
@@ -21,61 +21,59 @@ CCL_NAMESPACE_BEGIN
*
* Ray state of rays outside the tile-boundary will be marked RAY_INACTIVE
*/
-ccl_device void kernel_path_init(KernelGlobals *kg) {
- int ray_index = ccl_global_id(0) + ccl_global_id(1) * ccl_global_size(0);
+ccl_device void kernel_path_init(KernelGlobals *kg)
+{
+ int ray_index = ccl_global_id(0) + ccl_global_id(1) * ccl_global_size(0);
- /* This is the first assignment to ray_state;
- * So we dont use ASSIGN_RAY_STATE macro.
- */
- kernel_split_state.ray_state[ray_index] = RAY_ACTIVE;
+ /* This is the first assignment to ray_state;
+ * So we dont use ASSIGN_RAY_STATE macro.
+ */
+ kernel_split_state.ray_state[ray_index] = RAY_ACTIVE;
- /* Get work. */
- ccl_global uint *work_pools = kernel_split_params.work_pools;
- uint total_work_size = kernel_split_params.total_work_size;
- uint work_index;
+ /* Get work. */
+ ccl_global uint *work_pools = kernel_split_params.work_pools;
+ uint total_work_size = kernel_split_params.total_work_size;
+ uint work_index;
- if(!get_next_work(kg, work_pools, total_work_size, ray_index, &work_index)) {
- /* No more work, mark ray as inactive */
- kernel_split_state.ray_state[ray_index] = RAY_INACTIVE;
+ if (!get_next_work(kg, work_pools, total_work_size, ray_index, &work_index)) {
+ /* No more work, mark ray as inactive */
+ kernel_split_state.ray_state[ray_index] = RAY_INACTIVE;
- return;
- }
+ return;
+ }
- ccl_global WorkTile *tile = &kernel_split_params.tile;
- uint x, y, sample;
- get_work_pixel(tile, work_index, &x, &y, &sample);
+ ccl_global WorkTile *tile = &kernel_split_params.tile;
+ uint x, y, sample;
+ get_work_pixel(tile, work_index, &x, &y, &sample);
- /* Store buffer offset for writing to passes. */
- uint buffer_offset = (tile->offset + x + y*tile->stride) * kernel_data.film.pass_stride;
- kernel_split_state.buffer_offset[ray_index] = buffer_offset;
+ /* Store buffer offset for writing to passes. */
+ uint buffer_offset = (tile->offset + x + y * tile->stride) * kernel_data.film.pass_stride;
+ kernel_split_state.buffer_offset[ray_index] = buffer_offset;
- /* Initialize random numbers and ray. */
- uint rng_hash;
- kernel_path_trace_setup(kg,
- sample,
- x, y,
- &rng_hash,
- &kernel_split_state.ray[ray_index]);
+ /* Initialize random numbers and ray. */
+ uint rng_hash;
+ kernel_path_trace_setup(kg, sample, x, y, &rng_hash, &kernel_split_state.ray[ray_index]);
- if(kernel_split_state.ray[ray_index].t != 0.0f) {
- /* Initialize throughput, path radiance, Ray, PathState;
- * These rays proceed with path-iteration.
- */
- kernel_split_state.throughput[ray_index] = make_float3(1.0f, 1.0f, 1.0f);
- path_radiance_init(&kernel_split_state.path_radiance[ray_index], kernel_data.film.use_light_pass);
- path_state_init(kg,
- AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]),
- &kernel_split_state.path_state[ray_index],
- rng_hash,
- sample,
- &kernel_split_state.ray[ray_index]);
+ if (kernel_split_state.ray[ray_index].t != 0.0f) {
+ /* Initialize throughput, path radiance, Ray, PathState;
+ * These rays proceed with path-iteration.
+ */
+ kernel_split_state.throughput[ray_index] = make_float3(1.0f, 1.0f, 1.0f);
+ path_radiance_init(&kernel_split_state.path_radiance[ray_index],
+ kernel_data.film.use_light_pass);
+ path_state_init(kg,
+ AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]),
+ &kernel_split_state.path_state[ray_index],
+ rng_hash,
+ sample,
+ &kernel_split_state.ray[ray_index]);
#ifdef __SUBSURFACE__
- kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]);
+ kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]);
#endif
- }
- else {
- ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE);
- }
+ }
+ else {
+ ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE);
+ }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_queue_enqueue.h b/intern/cycles/kernel/split/kernel_queue_enqueue.h
index df67fabab19..2db87f7a671 100644
--- a/intern/cycles/kernel/split/kernel_queue_enqueue.h
+++ b/intern/cycles/kernel/split/kernel_queue_enqueue.h
@@ -35,58 +35,53 @@ CCL_NAMESPACE_BEGIN
* - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with
* RAY_TO_REGENERATE, RAY_UPDATE_BUFFER, RAY_HIT_BACKGROUND rays.
*/
-ccl_device void kernel_queue_enqueue(KernelGlobals *kg,
- ccl_local_param QueueEnqueueLocals *locals)
+ccl_device void kernel_queue_enqueue(KernelGlobals *kg, ccl_local_param QueueEnqueueLocals *locals)
{
- /* We have only 2 cases (Hit/Not-Hit) */
- int lidx = ccl_local_id(1) * ccl_local_size(0) + ccl_local_id(0);
- int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ /* We have only 2 cases (Hit/Not-Hit) */
+ int lidx = ccl_local_id(1) * ccl_local_size(0) + ccl_local_id(0);
+ int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
- if(lidx == 0) {
- locals->queue_atomics[0] = 0;
- locals->queue_atomics[1] = 0;
- }
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ if (lidx == 0) {
+ locals->queue_atomics[0] = 0;
+ locals->queue_atomics[1] = 0;
+ }
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
- int queue_number = -1;
+ int queue_number = -1;
- if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND) ||
- IS_STATE(kernel_split_state.ray_state, ray_index, RAY_UPDATE_BUFFER) ||
- IS_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE)) {
- queue_number = QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS;
- }
- else if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) ||
- IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HAS_ONLY_VOLUME) ||
- IS_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED)) {
- queue_number = QUEUE_ACTIVE_AND_REGENERATED_RAYS;
- }
+ if (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND) ||
+ IS_STATE(kernel_split_state.ray_state, ray_index, RAY_UPDATE_BUFFER) ||
+ IS_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE)) {
+ queue_number = QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS;
+ }
+ else if (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) ||
+ IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HAS_ONLY_VOLUME) ||
+ IS_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED)) {
+ queue_number = QUEUE_ACTIVE_AND_REGENERATED_RAYS;
+ }
- unsigned int my_lqidx;
- if(queue_number != -1) {
- my_lqidx = get_local_queue_index(queue_number, locals->queue_atomics);
- }
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ unsigned int my_lqidx;
+ if (queue_number != -1) {
+ my_lqidx = get_local_queue_index(queue_number, locals->queue_atomics);
+ }
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
- if(lidx == 0) {
- locals->queue_atomics[QUEUE_ACTIVE_AND_REGENERATED_RAYS] =
- get_global_per_queue_offset(QUEUE_ACTIVE_AND_REGENERATED_RAYS,
- locals->queue_atomics,
- kernel_split_params.queue_index);
- locals->queue_atomics[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] =
- get_global_per_queue_offset(QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
- locals->queue_atomics,
- kernel_split_params.queue_index);
- }
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ if (lidx == 0) {
+ locals->queue_atomics[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = get_global_per_queue_offset(
+ QUEUE_ACTIVE_AND_REGENERATED_RAYS, locals->queue_atomics, kernel_split_params.queue_index);
+ locals->queue_atomics[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = get_global_per_queue_offset(
+ QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+ locals->queue_atomics,
+ kernel_split_params.queue_index);
+ }
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
- unsigned int my_gqidx;
- if(queue_number != -1) {
- my_gqidx = get_global_queue_index(queue_number,
- kernel_split_params.queue_size,
- my_lqidx,
- locals->queue_atomics);
- kernel_split_state.queue_data[my_gqidx] = ray_index;
- }
+ unsigned int my_gqidx;
+ if (queue_number != -1) {
+ my_gqidx = get_global_queue_index(
+ queue_number, kernel_split_params.queue_size, my_lqidx, locals->queue_atomics);
+ kernel_split_state.queue_data[my_gqidx] = ray_index;
+ }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_scene_intersect.h b/intern/cycles/kernel/split/kernel_scene_intersect.h
index f5378bc172b..5fef3e045f8 100644
--- a/intern/cycles/kernel/split/kernel_scene_intersect.h
+++ b/intern/cycles/kernel/split/kernel_scene_intersect.h
@@ -25,55 +25,56 @@ CCL_NAMESPACE_BEGIN
*/
ccl_device void kernel_scene_intersect(KernelGlobals *kg)
{
- /* Fetch use_queues_flag */
- char local_use_queues_flag = *kernel_split_params.use_queues_flag;
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ /* Fetch use_queues_flag */
+ char local_use_queues_flag = *kernel_split_params.use_queues_flag;
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
- int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
- if(local_use_queues_flag) {
- ray_index = get_ray_index(kg, ray_index,
- QUEUE_ACTIVE_AND_REGENERATED_RAYS,
- kernel_split_state.queue_data,
- kernel_split_params.queue_size,
- 0);
+ int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ if (local_use_queues_flag) {
+ ray_index = get_ray_index(kg,
+ ray_index,
+ QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 0);
- if(ray_index == QUEUE_EMPTY_SLOT) {
- return;
- }
- }
+ if (ray_index == QUEUE_EMPTY_SLOT) {
+ return;
+ }
+ }
- /* All regenerated rays become active here */
- if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED)) {
+ /* All regenerated rays become active here */
+ if (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED)) {
#ifdef __BRANCHED_PATH__
- if(kernel_split_state.branched_state[ray_index].waiting_on_shared_samples) {
- kernel_split_path_end(kg, ray_index);
- }
- else
-#endif /* __BRANCHED_PATH__ */
- {
- ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE);
- }
- }
+ if (kernel_split_state.branched_state[ray_index].waiting_on_shared_samples) {
+ kernel_split_path_end(kg, ray_index);
+ }
+ else
+#endif /* __BRANCHED_PATH__ */
+ {
+ ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE);
+ }
+ }
- if(!IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) {
- return;
- }
+ if (!IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) {
+ return;
+ }
- ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
- Ray ray = kernel_split_state.ray[ray_index];
- PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+ Ray ray = kernel_split_state.ray[ray_index];
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
- Intersection isect;
- bool hit = kernel_path_scene_intersect(kg, state, &ray, &isect, L);
- kernel_split_state.isect[ray_index] = isect;
+ Intersection isect;
+ bool hit = kernel_path_scene_intersect(kg, state, &ray, &isect, L);
+ kernel_split_state.isect[ray_index] = isect;
- if(!hit) {
- /* Change the state of rays that hit the background;
- * These rays undergo special processing in the
- * background_bufferUpdate kernel.
- */
- ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND);
- }
+ if (!hit) {
+ /* Change the state of rays that hit the background;
+ * These rays undergo special processing in the
+ * background_bufferUpdate kernel.
+ */
+ ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND);
+ }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_shader_eval.h b/intern/cycles/kernel/split/kernel_shader_eval.h
index 2bc2d300699..8e39c9797e5 100644
--- a/intern/cycles/kernel/split/kernel_shader_eval.h
+++ b/intern/cycles/kernel/split/kernel_shader_eval.h
@@ -22,45 +22,46 @@ CCL_NAMESPACE_BEGIN
ccl_device void kernel_shader_eval(KernelGlobals *kg)
{
- int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
- /* Sorting on cuda split is not implemented */
+ int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ /* Sorting on cuda split is not implemented */
#ifdef __KERNEL_CUDA__
- int queue_index = kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS];
+ int queue_index = kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS];
#else
- int queue_index = kernel_split_params.queue_index[QUEUE_SHADER_SORTED_RAYS];
+ int queue_index = kernel_split_params.queue_index[QUEUE_SHADER_SORTED_RAYS];
#endif
- if(ray_index >= queue_index) {
- return;
- }
- ray_index = get_ray_index(kg, ray_index,
+ if (ray_index >= queue_index) {
+ return;
+ }
+ ray_index = get_ray_index(kg,
+ ray_index,
#ifdef __KERNEL_CUDA__
- QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+ QUEUE_ACTIVE_AND_REGENERATED_RAYS,
#else
- QUEUE_SHADER_SORTED_RAYS,
+ QUEUE_SHADER_SORTED_RAYS,
#endif
- kernel_split_state.queue_data,
- kernel_split_params.queue_size,
- 0);
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 0);
- if(ray_index == QUEUE_EMPTY_SLOT) {
- return;
- }
+ if (ray_index == QUEUE_EMPTY_SLOT) {
+ return;
+ }
- ccl_global char *ray_state = kernel_split_state.ray_state;
- if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
- ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+ ccl_global char *ray_state = kernel_split_state.ray_state;
+ if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+ ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
- shader_eval_surface(kg, kernel_split_sd(sd, ray_index), state, state->flag);
+ shader_eval_surface(kg, kernel_split_sd(sd, ray_index), state, state->flag);
#ifdef __BRANCHED_PATH__
- if(kernel_data.integrator.branched) {
- shader_merge_closures(kernel_split_sd(sd, ray_index));
- }
- else
+ if (kernel_data.integrator.branched) {
+ shader_merge_closures(kernel_split_sd(sd, ray_index));
+ }
+ else
#endif
- {
- shader_prepare_closures(kernel_split_sd(sd, ray_index), state);
- }
- }
+ {
+ shader_prepare_closures(kernel_split_sd(sd, ray_index), state);
+ }
+ }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_shader_setup.h b/intern/cycles/kernel/split/kernel_shader_setup.h
index ea3ec2ec83f..da332db2c98 100644
--- a/intern/cycles/kernel/split/kernel_shader_setup.h
+++ b/intern/cycles/kernel/split/kernel_shader_setup.h
@@ -25,54 +25,52 @@ CCL_NAMESPACE_BEGIN
ccl_device void kernel_shader_setup(KernelGlobals *kg,
ccl_local_param unsigned int *local_queue_atomics)
{
- /* Enqeueue RAY_TO_REGENERATE rays into QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue. */
- if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
- *local_queue_atomics = 0;
- }
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ /* Enqeueue RAY_TO_REGENERATE rays into QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue. */
+ if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+ *local_queue_atomics = 0;
+ }
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
- int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
- int queue_index = kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS];
- if(ray_index >= queue_index) {
- return;
- }
- ray_index = get_ray_index(kg, ray_index,
- QUEUE_ACTIVE_AND_REGENERATED_RAYS,
- kernel_split_state.queue_data,
- kernel_split_params.queue_size,
- 0);
+ int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ int queue_index = kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS];
+ if (ray_index >= queue_index) {
+ return;
+ }
+ ray_index = get_ray_index(kg,
+ ray_index,
+ QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 0);
- if(ray_index == QUEUE_EMPTY_SLOT) {
- return;
- }
+ if (ray_index == QUEUE_EMPTY_SLOT) {
+ return;
+ }
- char enqueue_flag = (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE)) ? 1 : 0;
- enqueue_ray_index_local(ray_index,
- QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
- enqueue_flag,
- kernel_split_params.queue_size,
- local_queue_atomics,
- kernel_split_state.queue_data,
- kernel_split_params.queue_index);
+ char enqueue_flag = (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE)) ? 1 :
+ 0;
+ enqueue_ray_index_local(ray_index,
+ QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+ enqueue_flag,
+ kernel_split_params.queue_size,
+ local_queue_atomics,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_index);
- /* Continue on with shader evaluation. */
- if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) {
- Intersection isect = kernel_split_state.isect[ray_index];
- Ray ray = kernel_split_state.ray[ray_index];
- ShaderData *sd = kernel_split_sd(sd, ray_index);
+ /* Continue on with shader evaluation. */
+ if (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) {
+ Intersection isect = kernel_split_state.isect[ray_index];
+ Ray ray = kernel_split_state.ray[ray_index];
+ ShaderData *sd = kernel_split_sd(sd, ray_index);
- shader_setup_from_ray(kg,
- sd,
- &isect,
- &ray);
+ shader_setup_from_ray(kg, sd, &isect, &ray);
#ifdef __VOLUME__
- if(sd->flag & SD_HAS_ONLY_VOLUME) {
- ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_HAS_ONLY_VOLUME);
- }
+ if (sd->flag & SD_HAS_ONLY_VOLUME) {
+ ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_HAS_ONLY_VOLUME);
+ }
#endif
- }
-
+ }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_shader_sort.h b/intern/cycles/kernel/split/kernel_shader_sort.h
index 666355de334..95d33a42014 100644
--- a/intern/cycles/kernel/split/kernel_shader_sort.h
+++ b/intern/cycles/kernel/split/kernel_shader_sort.h
@@ -16,82 +16,82 @@
CCL_NAMESPACE_BEGIN
-
-ccl_device void kernel_shader_sort(KernelGlobals *kg,
- ccl_local_param ShaderSortLocals *locals)
+ccl_device void kernel_shader_sort(KernelGlobals *kg, ccl_local_param ShaderSortLocals *locals)
{
#ifndef __KERNEL_CUDA__
- int tid = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
- uint qsize = kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS];
- if(tid == 0) {
- kernel_split_params.queue_index[QUEUE_SHADER_SORTED_RAYS] = qsize;
- }
+ int tid = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ uint qsize = kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS];
+ if (tid == 0) {
+ kernel_split_params.queue_index[QUEUE_SHADER_SORTED_RAYS] = qsize;
+ }
- uint offset = (tid/SHADER_SORT_LOCAL_SIZE)*SHADER_SORT_BLOCK_SIZE;
- if(offset >= qsize) {
- return;
- }
+ uint offset = (tid / SHADER_SORT_LOCAL_SIZE) * SHADER_SORT_BLOCK_SIZE;
+ if (offset >= qsize) {
+ return;
+ }
- int lid = ccl_local_id(1) * ccl_local_size(0) + ccl_local_id(0);
- uint input = QUEUE_ACTIVE_AND_REGENERATED_RAYS * (kernel_split_params.queue_size);
- uint output = QUEUE_SHADER_SORTED_RAYS * (kernel_split_params.queue_size);
- ccl_local uint *local_value = &locals->local_value[0];
- ccl_local ushort *local_index = &locals->local_index[0];
+ int lid = ccl_local_id(1) * ccl_local_size(0) + ccl_local_id(0);
+ uint input = QUEUE_ACTIVE_AND_REGENERATED_RAYS * (kernel_split_params.queue_size);
+ uint output = QUEUE_SHADER_SORTED_RAYS * (kernel_split_params.queue_size);
+ ccl_local uint *local_value = &locals->local_value[0];
+ ccl_local ushort *local_index = &locals->local_index[0];
- /* copy to local memory */
- for(uint i = 0; i < SHADER_SORT_BLOCK_SIZE; i += SHADER_SORT_LOCAL_SIZE) {
- uint idx = offset + i + lid;
- uint add = input + idx;
- uint value = (~0);
- if(idx < qsize) {
- int ray_index = kernel_split_state.queue_data[add];
- bool valid = (ray_index != QUEUE_EMPTY_SLOT) && IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE);
- if(valid) {
- value = kernel_split_sd(sd, ray_index)->shader & SHADER_MASK;
- }
- }
- local_value[i + lid] = value;
- local_index[i + lid] = i + lid;
- }
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ /* copy to local memory */
+ for (uint i = 0; i < SHADER_SORT_BLOCK_SIZE; i += SHADER_SORT_LOCAL_SIZE) {
+ uint idx = offset + i + lid;
+ uint add = input + idx;
+ uint value = (~0);
+ if (idx < qsize) {
+ int ray_index = kernel_split_state.queue_data[add];
+ bool valid = (ray_index != QUEUE_EMPTY_SLOT) &&
+ IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE);
+ if (valid) {
+ value = kernel_split_sd(sd, ray_index)->shader & SHADER_MASK;
+ }
+ }
+ local_value[i + lid] = value;
+ local_index[i + lid] = i + lid;
+ }
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
- /* skip sorting for cpu split kernel */
+ /* skip sorting for cpu split kernel */
# ifdef __KERNEL_OPENCL__
- /* bitonic sort */
- for(uint length = 1; length < SHADER_SORT_BLOCK_SIZE; length <<= 1) {
- for(uint inc = length; inc > 0; inc >>= 1) {
- for(uint ii = 0; ii < SHADER_SORT_BLOCK_SIZE; ii += SHADER_SORT_LOCAL_SIZE) {
- uint i = lid + ii;
- bool direction = ((i & (length << 1)) != 0);
- uint j = i ^ inc;
- ushort ioff = local_index[i];
- ushort joff = local_index[j];
- uint iKey = local_value[ioff];
- uint jKey = local_value[joff];
- bool smaller = (jKey < iKey) || (jKey == iKey && j < i);
- bool swap = smaller ^ (j < i) ^ direction;
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
- local_index[i] = (swap) ? joff : ioff;
- local_index[j] = (swap) ? ioff : joff;
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
- }
- }
- }
-# endif /* __KERNEL_OPENCL__ */
+ /* bitonic sort */
+ for (uint length = 1; length < SHADER_SORT_BLOCK_SIZE; length <<= 1) {
+ for (uint inc = length; inc > 0; inc >>= 1) {
+ for (uint ii = 0; ii < SHADER_SORT_BLOCK_SIZE; ii += SHADER_SORT_LOCAL_SIZE) {
+ uint i = lid + ii;
+ bool direction = ((i & (length << 1)) != 0);
+ uint j = i ^ inc;
+ ushort ioff = local_index[i];
+ ushort joff = local_index[j];
+ uint iKey = local_value[ioff];
+ uint jKey = local_value[joff];
+ bool smaller = (jKey < iKey) || (jKey == iKey && j < i);
+ bool swap = smaller ^ (j < i) ^ direction;
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ local_index[i] = (swap) ? joff : ioff;
+ local_index[j] = (swap) ? ioff : joff;
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ }
+ }
+ }
+# endif /* __KERNEL_OPENCL__ */
- /* copy to destination */
- for(uint i = 0; i < SHADER_SORT_BLOCK_SIZE; i += SHADER_SORT_LOCAL_SIZE) {
- uint idx = offset + i + lid;
- uint lidx = local_index[i + lid];
- uint outi = output + idx;
- uint ini = input + offset + lidx;
- uint value = local_value[lidx];
- if(idx < qsize) {
- kernel_split_state.queue_data[outi] = (value == (~0)) ? QUEUE_EMPTY_SLOT : kernel_split_state.queue_data[ini];
- }
- }
-#endif /* __KERNEL_CUDA__ */
+ /* copy to destination */
+ for (uint i = 0; i < SHADER_SORT_BLOCK_SIZE; i += SHADER_SORT_LOCAL_SIZE) {
+ uint idx = offset + i + lid;
+ uint lidx = local_index[i + lid];
+ uint outi = output + idx;
+ uint ini = input + offset + lidx;
+ uint value = local_value[lidx];
+ if (idx < qsize) {
+ kernel_split_state.queue_data[outi] = (value == (~0)) ? QUEUE_EMPTY_SLOT :
+ kernel_split_state.queue_data[ini];
+ }
+ }
+#endif /* __KERNEL_CUDA__ */
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h b/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h
index fb08112503a..5d772fc597b 100644
--- a/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h
+++ b/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h
@@ -19,35 +19,40 @@ CCL_NAMESPACE_BEGIN
/* Shadow ray cast for AO. */
ccl_device void kernel_shadow_blocked_ao(KernelGlobals *kg)
{
- unsigned int ao_queue_length = kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS];
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ unsigned int ao_queue_length = kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS];
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
- int ray_index = QUEUE_EMPTY_SLOT;
- int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
- if(thread_index < ao_queue_length) {
- ray_index = get_ray_index(kg, thread_index, QUEUE_SHADOW_RAY_CAST_AO_RAYS,
- kernel_split_state.queue_data, kernel_split_params.queue_size, 1);
- }
+ int ray_index = QUEUE_EMPTY_SLOT;
+ int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ if (thread_index < ao_queue_length) {
+ ray_index = get_ray_index(kg,
+ thread_index,
+ QUEUE_SHADOW_RAY_CAST_AO_RAYS,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 1);
+ }
- if(ray_index == QUEUE_EMPTY_SLOT) {
- return;
- }
+ if (ray_index == QUEUE_EMPTY_SLOT) {
+ return;
+ }
- ShaderData *sd = kernel_split_sd(sd, ray_index);
- ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
- PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
- ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
- float3 throughput = kernel_split_state.throughput[ray_index];
+ ShaderData *sd = kernel_split_sd(sd, ray_index);
+ ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+ float3 throughput = kernel_split_state.throughput[ray_index];
#ifdef __BRANCHED_PATH__
- if(!kernel_data.integrator.branched || IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
+ if (!kernel_data.integrator.branched ||
+ IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
#endif
- kernel_path_ao(kg, sd, emission_sd, L, state, throughput, shader_bsdf_alpha(kg, sd));
+ kernel_path_ao(kg, sd, emission_sd, L, state, throughput, shader_bsdf_alpha(kg, sd));
#ifdef __BRANCHED_PATH__
- }
- else {
- kernel_branched_path_ao(kg, sd, emission_sd, L, state, throughput);
- }
+ }
+ else {
+ kernel_branched_path_ao(kg, sd, emission_sd, L, state, throughput);
+ }
#endif
}
diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
index da072fd5f1a..82990ce9fae 100644
--- a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
+++ b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
@@ -19,89 +19,80 @@ CCL_NAMESPACE_BEGIN
/* Shadow ray cast for direct visible light. */
ccl_device void kernel_shadow_blocked_dl(KernelGlobals *kg)
{
- unsigned int dl_queue_length = kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS];
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ unsigned int dl_queue_length = kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS];
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
- int ray_index = QUEUE_EMPTY_SLOT;
- int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
- if(thread_index < dl_queue_length) {
- ray_index = get_ray_index(kg, thread_index, QUEUE_SHADOW_RAY_CAST_DL_RAYS,
- kernel_split_state.queue_data, kernel_split_params.queue_size, 1);
- }
+ int ray_index = QUEUE_EMPTY_SLOT;
+ int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ if (thread_index < dl_queue_length) {
+ ray_index = get_ray_index(kg,
+ thread_index,
+ QUEUE_SHADOW_RAY_CAST_DL_RAYS,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 1);
+ }
#ifdef __BRANCHED_PATH__
- /* TODO(mai): move this somewhere else? */
- if(thread_index == 0) {
- /* Clear QUEUE_INACTIVE_RAYS before next kernel. */
- kernel_split_params.queue_index[QUEUE_INACTIVE_RAYS] = 0;
- }
-#endif /* __BRANCHED_PATH__ */
+ /* TODO(mai): move this somewhere else? */
+ if (thread_index == 0) {
+ /* Clear QUEUE_INACTIVE_RAYS before next kernel. */
+ kernel_split_params.queue_index[QUEUE_INACTIVE_RAYS] = 0;
+ }
+#endif /* __BRANCHED_PATH__ */
- if(ray_index == QUEUE_EMPTY_SLOT)
- return;
+ if (ray_index == QUEUE_EMPTY_SLOT)
+ return;
- ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
- Ray ray = kernel_split_state.light_ray[ray_index];
- PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
- ShaderData *sd = kernel_split_sd(sd, ray_index);
- float3 throughput = kernel_split_state.throughput[ray_index];
+ ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+ Ray ray = kernel_split_state.light_ray[ray_index];
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ ShaderData *sd = kernel_split_sd(sd, ray_index);
+ float3 throughput = kernel_split_state.throughput[ray_index];
- BsdfEval L_light = kernel_split_state.bsdf_eval[ray_index];
- ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
- bool is_lamp = kernel_split_state.is_lamp[ray_index];
+ BsdfEval L_light = kernel_split_state.bsdf_eval[ray_index];
+ ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
+ bool is_lamp = kernel_split_state.is_lamp[ray_index];
-# if defined(__BRANCHED_PATH__) || defined(__SHADOW_TRICKS__)
- bool use_branched = false;
- int all = 0;
+#if defined(__BRANCHED_PATH__) || defined(__SHADOW_TRICKS__)
+ bool use_branched = false;
+ int all = 0;
- if(state->flag & PATH_RAY_SHADOW_CATCHER) {
- use_branched = true;
- all = 1;
- }
-# if defined(__BRANCHED_PATH__)
- else if(kernel_data.integrator.branched) {
- use_branched = true;
+ if (state->flag & PATH_RAY_SHADOW_CATCHER) {
+ use_branched = true;
+ all = 1;
+ }
+# if defined(__BRANCHED_PATH__)
+ else if (kernel_data.integrator.branched) {
+ use_branched = true;
- if(IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
- all = (kernel_data.integrator.sample_all_lights_indirect);
- }
- else
- {
- all = (kernel_data.integrator.sample_all_lights_direct);
- }
- }
-# endif /* __BRANCHED_PATH__ */
+ if (IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
+ all = (kernel_data.integrator.sample_all_lights_indirect);
+ }
+ else {
+ all = (kernel_data.integrator.sample_all_lights_direct);
+ }
+ }
+# endif /* __BRANCHED_PATH__ */
- if(use_branched) {
- kernel_branched_path_surface_connect_light(kg,
- sd,
- emission_sd,
- state,
- throughput,
- 1.0f,
- L,
- all);
- }
- else
-# endif /* defined(__BRANCHED_PATH__) || defined(__SHADOW_TRICKS__)*/
- {
- /* trace shadow ray */
- float3 shadow;
+ if (use_branched) {
+ kernel_branched_path_surface_connect_light(
+ kg, sd, emission_sd, state, throughput, 1.0f, L, all);
+ }
+ else
+#endif /* defined(__BRANCHED_PATH__) || defined(__SHADOW_TRICKS__)*/
+ {
+ /* trace shadow ray */
+ float3 shadow;
- if(!shadow_blocked(kg,
- sd,
- emission_sd,
- state,
- &ray,
- &shadow))
- {
- /* accumulate */
- path_radiance_accum_light(L, state, throughput, &L_light, shadow, 1.0f, is_lamp);
- }
- else {
- path_radiance_accum_total_light(L, state, throughput, &L_light);
- }
- }
+ if (!shadow_blocked(kg, sd, emission_sd, state, &ray, &shadow)) {
+ /* accumulate */
+ path_radiance_accum_light(L, state, throughput, &L_light, shadow, 1.0f, is_lamp);
+ }
+ else {
+ path_radiance_accum_total_light(L, state, throughput, &L_light);
+ }
+ }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_split_common.h b/intern/cycles/kernel/split/kernel_split_common.h
index 4b86696691a..384bc952460 100644
--- a/intern/cycles/kernel/split/kernel_split_common.h
+++ b/intern/cycles/kernel/split/kernel_split_common.h
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef __KERNEL_SPLIT_H__
-#define __KERNEL_SPLIT_H__
+#ifndef __KERNEL_SPLIT_H__
+#define __KERNEL_SPLIT_H__
#include "kernel/kernel_math.h"
#include "kernel/kernel_types.h"
@@ -57,47 +57,48 @@ CCL_NAMESPACE_BEGIN
ccl_device_inline void kernel_split_path_end(KernelGlobals *kg, int ray_index)
{
- ccl_global char *ray_state = kernel_split_state.ray_state;
+ ccl_global char *ray_state = kernel_split_state.ray_state;
#ifdef __BRANCHED_PATH__
# ifdef __SUBSURFACE__
- ccl_addr_space SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
-
- if(ss_indirect->num_rays) {
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
- }
- else
-# endif /* __SUBSURFACE__ */
- if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT_SHARED)) {
- int orig_ray = kernel_split_state.branched_state[ray_index].original_ray;
-
- PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
- PathRadiance *orig_ray_L = &kernel_split_state.path_radiance[orig_ray];
-
- path_radiance_sum_indirect(L);
- path_radiance_accum_sample(orig_ray_L, L);
-
- atomic_fetch_and_dec_uint32((ccl_global uint*)&kernel_split_state.branched_state[orig_ray].shared_sample_count);
-
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE);
- }
- else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_LIGHT_INDIRECT)) {
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER);
- }
- else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_VOLUME_INDIRECT)) {
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER);
- }
- else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_SUBSURFACE_INDIRECT)) {
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_SUBSURFACE_INDIRECT_NEXT_ITER);
- }
- else {
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
- }
+ ccl_addr_space SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
+
+ if (ss_indirect->num_rays) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
+ }
+ else
+# endif /* __SUBSURFACE__ */
+ if (IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT_SHARED)) {
+ int orig_ray = kernel_split_state.branched_state[ray_index].original_ray;
+
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ PathRadiance *orig_ray_L = &kernel_split_state.path_radiance[orig_ray];
+
+ path_radiance_sum_indirect(L);
+ path_radiance_accum_sample(orig_ray_L, L);
+
+ atomic_fetch_and_dec_uint32(
+ (ccl_global uint *)&kernel_split_state.branched_state[orig_ray].shared_sample_count);
+
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE);
+ }
+ else if (IS_FLAG(ray_state, ray_index, RAY_BRANCHED_LIGHT_INDIRECT)) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER);
+ }
+ else if (IS_FLAG(ray_state, ray_index, RAY_BRANCHED_VOLUME_INDIRECT)) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER);
+ }
+ else if (IS_FLAG(ray_state, ray_index, RAY_BRANCHED_SUBSURFACE_INDIRECT)) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_SUBSURFACE_INDIRECT_NEXT_ITER);
+ }
+ else {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
+ }
#else
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
#endif
}
CCL_NAMESPACE_END
-#endif /* __KERNEL_SPLIT_H__ */
+#endif /* __KERNEL_SPLIT_H__ */
diff --git a/intern/cycles/kernel/split/kernel_split_data.h b/intern/cycles/kernel/split/kernel_split_data.h
index 3f6b3977d79..433b1221a37 100644
--- a/intern/cycles/kernel/split/kernel_split_data.h
+++ b/intern/cycles/kernel/split/kernel_split_data.h
@@ -24,22 +24,22 @@ CCL_NAMESPACE_BEGIN
ccl_device_inline uint64_t split_data_buffer_size(KernelGlobals *kg, size_t num_elements)
{
- (void) kg; /* Unused on CPU. */
+ (void)kg; /* Unused on CPU. */
- uint64_t size = 0;
-#define SPLIT_DATA_ENTRY(type, name, num) + align_up(num_elements * num * sizeof(type), 16)
- size = size SPLIT_DATA_ENTRIES;
+ uint64_t size = 0;
+#define SPLIT_DATA_ENTRY(type, name, num) +align_up(num_elements *num * sizeof(type), 16)
+ size = size SPLIT_DATA_ENTRIES;
#undef SPLIT_DATA_ENTRY
- uint64_t closure_size = sizeof(ShaderClosure) * (kernel_data.integrator.max_closures-1);
+ uint64_t closure_size = sizeof(ShaderClosure) * (kernel_data.integrator.max_closures - 1);
#ifdef __BRANCHED_PATH__
- size += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16);
+ size += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16);
#endif
- size += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16);
+ size += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16);
- return size;
+ return size;
}
ccl_device_inline void split_data_init(KernelGlobals *kg,
@@ -48,28 +48,29 @@ ccl_device_inline void split_data_init(KernelGlobals *kg,
ccl_global void *data,
ccl_global char *ray_state)
{
- (void) kg; /* Unused on CPU. */
+ (void)kg; /* Unused on CPU. */
- ccl_global char *p = (ccl_global char*)data;
+ ccl_global char *p = (ccl_global char *)data;
#define SPLIT_DATA_ENTRY(type, name, num) \
- split_data->name = (type*)p; p += align_up(num_elements * num * sizeof(type), 16);
- SPLIT_DATA_ENTRIES;
+ split_data->name = (type *)p; \
+ p += align_up(num_elements * num * sizeof(type), 16);
+ SPLIT_DATA_ENTRIES;
#undef SPLIT_DATA_ENTRY
- uint64_t closure_size = sizeof(ShaderClosure) * (kernel_data.integrator.max_closures-1);
+ uint64_t closure_size = sizeof(ShaderClosure) * (kernel_data.integrator.max_closures - 1);
#ifdef __BRANCHED_PATH__
- split_data->_branched_state_sd = (ShaderData*)p;
- p += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16);
+ split_data->_branched_state_sd = (ShaderData *)p;
+ p += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16);
#endif
- split_data->_sd = (ShaderData*)p;
- p += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16);
+ split_data->_sd = (ShaderData *)p;
+ p += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16);
- split_data->ray_state = ray_state;
+ split_data->ray_state = ray_state;
}
CCL_NAMESPACE_END
-#endif /* __KERNEL_SPLIT_DATA_H__ */
+#endif /* __KERNEL_SPLIT_DATA_H__ */
diff --git a/intern/cycles/kernel/split/kernel_split_data_types.h b/intern/cycles/kernel/split/kernel_split_data_types.h
index 83df1e2a0a6..6ff3f5bdb55 100644
--- a/intern/cycles/kernel/split/kernel_split_data_types.h
+++ b/intern/cycles/kernel/split/kernel_split_data_types.h
@@ -22,17 +22,17 @@ CCL_NAMESPACE_BEGIN
/* parameters used by the split kernels, we use a single struct to avoid passing these to each kernel */
typedef struct SplitParams {
- WorkTile tile;
- uint total_work_size;
+ WorkTile tile;
+ uint total_work_size;
- ccl_global unsigned int *work_pools;
+ ccl_global unsigned int *work_pools;
- ccl_global int *queue_index;
- int queue_size;
- ccl_global char *use_queues_flag;
+ ccl_global int *queue_index;
+ int queue_size;
+ ccl_global char *use_queues_flag;
- /* Place for storing sd->flag. AMD GPU OpenCL compiler workaround */
- int dummy_sd_flag;
+ /* Place for storing sd->flag. AMD GPU OpenCL compiler workaround */
+ int dummy_sd_flag;
} SplitParams;
/* Global memory variables [porting]; These memory is used for
@@ -46,98 +46,98 @@ typedef struct SplitParams {
#ifdef __BRANCHED_PATH__
typedef ccl_global struct SplitBranchedState {
- /* various state that must be kept and restored after an indirect loop */
- PathState path_state;
- float3 throughput;
- Ray ray;
+ /* various state that must be kept and restored after an indirect loop */
+ PathState path_state;
+ float3 throughput;
+ Ray ray;
- Intersection isect;
+ Intersection isect;
- char ray_state;
+ char ray_state;
- /* indirect loop state */
- int next_closure;
- int next_sample;
+ /* indirect loop state */
+ int next_closure;
+ int next_sample;
-#ifdef __SUBSURFACE__
- int ss_next_closure;
- int ss_next_sample;
- int next_hit;
- int num_hits;
-
- uint lcg_state;
- LocalIntersection ss_isect;
-#endif /*__SUBSURFACE__ */
-
- int shared_sample_count; /* number of branched samples shared with other threads */
- int original_ray; /* index of original ray when sharing branched samples */
- bool waiting_on_shared_samples;
+# ifdef __SUBSURFACE__
+ int ss_next_closure;
+ int ss_next_sample;
+ int next_hit;
+ int num_hits;
+
+ uint lcg_state;
+ LocalIntersection ss_isect;
+# endif /*__SUBSURFACE__ */
+
+ int shared_sample_count; /* number of branched samples shared with other threads */
+ int original_ray; /* index of original ray when sharing branched samples */
+ bool waiting_on_shared_samples;
} SplitBranchedState;
-#define SPLIT_DATA_BRANCHED_ENTRIES \
- SPLIT_DATA_ENTRY( SplitBranchedState, branched_state, 1) \
- SPLIT_DATA_ENTRY(ShaderData, _branched_state_sd, 0)
+# define SPLIT_DATA_BRANCHED_ENTRIES \
+ SPLIT_DATA_ENTRY(SplitBranchedState, branched_state, 1) \
+ SPLIT_DATA_ENTRY(ShaderData, _branched_state_sd, 0)
#else
-#define SPLIT_DATA_BRANCHED_ENTRIES
-#endif /* __BRANCHED_PATH__ */
+# define SPLIT_DATA_BRANCHED_ENTRIES
+#endif /* __BRANCHED_PATH__ */
#ifdef __SUBSURFACE__
# define SPLIT_DATA_SUBSURFACE_ENTRIES \
- SPLIT_DATA_ENTRY(ccl_global SubsurfaceIndirectRays, ss_rays, 1)
+ SPLIT_DATA_ENTRY(ccl_global SubsurfaceIndirectRays, ss_rays, 1)
#else
# define SPLIT_DATA_SUBSURFACE_ENTRIES
-#endif /* __SUBSURFACE__ */
+#endif /* __SUBSURFACE__ */
#ifdef __VOLUME__
-# define SPLIT_DATA_VOLUME_ENTRIES \
- SPLIT_DATA_ENTRY(ccl_global PathState, state_shadow, 1)
+# define SPLIT_DATA_VOLUME_ENTRIES SPLIT_DATA_ENTRY(ccl_global PathState, state_shadow, 1)
#else
# define SPLIT_DATA_VOLUME_ENTRIES
-#endif /* __VOLUME__ */
+#endif /* __VOLUME__ */
#define SPLIT_DATA_ENTRIES \
- SPLIT_DATA_ENTRY(ccl_global float3, throughput, 1) \
- SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \
- SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \
- SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \
- SPLIT_DATA_ENTRY(ccl_global Intersection, isect, 1) \
- SPLIT_DATA_ENTRY(ccl_global BsdfEval, bsdf_eval, 1) \
- SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \
- SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \
- SPLIT_DATA_ENTRY(ccl_global int, queue_data, (NUM_QUEUES*2)) /* TODO(mai): this is too large? */ \
- SPLIT_DATA_ENTRY(ccl_global uint, buffer_offset, 1) \
- SPLIT_DATA_ENTRY(ShaderDataTinyStorage, sd_DL_shadow, 1) \
- SPLIT_DATA_SUBSURFACE_ENTRIES \
- SPLIT_DATA_VOLUME_ENTRIES \
- SPLIT_DATA_BRANCHED_ENTRIES \
- SPLIT_DATA_ENTRY(ShaderData, _sd, 0)
+ SPLIT_DATA_ENTRY(ccl_global float3, throughput, 1) \
+ SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \
+ SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \
+ SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \
+ SPLIT_DATA_ENTRY(ccl_global Intersection, isect, 1) \
+ SPLIT_DATA_ENTRY(ccl_global BsdfEval, bsdf_eval, 1) \
+ SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \
+ SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \
+ SPLIT_DATA_ENTRY( \
+ ccl_global int, queue_data, (NUM_QUEUES * 2)) /* TODO(mai): this is too large? */ \
+ SPLIT_DATA_ENTRY(ccl_global uint, buffer_offset, 1) \
+ SPLIT_DATA_ENTRY(ShaderDataTinyStorage, sd_DL_shadow, 1) \
+ SPLIT_DATA_SUBSURFACE_ENTRIES \
+ SPLIT_DATA_VOLUME_ENTRIES \
+ SPLIT_DATA_BRANCHED_ENTRIES \
+ SPLIT_DATA_ENTRY(ShaderData, _sd, 0)
/* entries to be copied to inactive rays when sharing branched samples (TODO: which are actually needed?) */
#define SPLIT_DATA_ENTRIES_BRANCHED_SHARED \
- SPLIT_DATA_ENTRY(ccl_global float3, throughput, 1) \
- SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \
- SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \
- SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \
- SPLIT_DATA_ENTRY(ccl_global Intersection, isect, 1) \
- SPLIT_DATA_ENTRY(ccl_global BsdfEval, bsdf_eval, 1) \
- SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \
- SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \
- SPLIT_DATA_ENTRY(ShaderDataTinyStorage, sd_DL_shadow, 1) \
- SPLIT_DATA_SUBSURFACE_ENTRIES \
- SPLIT_DATA_VOLUME_ENTRIES \
- SPLIT_DATA_BRANCHED_ENTRIES \
- SPLIT_DATA_ENTRY(ShaderData, _sd, 0)
+ SPLIT_DATA_ENTRY(ccl_global float3, throughput, 1) \
+ SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \
+ SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \
+ SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \
+ SPLIT_DATA_ENTRY(ccl_global Intersection, isect, 1) \
+ SPLIT_DATA_ENTRY(ccl_global BsdfEval, bsdf_eval, 1) \
+ SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \
+ SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \
+ SPLIT_DATA_ENTRY(ShaderDataTinyStorage, sd_DL_shadow, 1) \
+ SPLIT_DATA_SUBSURFACE_ENTRIES \
+ SPLIT_DATA_VOLUME_ENTRIES \
+ SPLIT_DATA_BRANCHED_ENTRIES \
+ SPLIT_DATA_ENTRY(ShaderData, _sd, 0)
/* struct that holds pointers to data in the shared state buffer */
typedef struct SplitData {
#define SPLIT_DATA_ENTRY(type, name, num) type *name;
- SPLIT_DATA_ENTRIES
+ SPLIT_DATA_ENTRIES
#undef SPLIT_DATA_ENTRY
- /* this is actually in a separate buffer from the rest of the split state data (so it can be read back from
- * the host easily) but is still used the same as the other data so we have it here in this struct as well
- */
- ccl_global char *ray_state;
+ /* this is actually in a separate buffer from the rest of the split state data (so it can be read back from
+ * the host easily) but is still used the same as the other data so we have it here in this struct as well
+ */
+ ccl_global char *ray_state;
} SplitData;
#ifndef __KERNEL_CUDA__
@@ -148,30 +148,30 @@ __device__ SplitData __split_data;
# define kernel_split_state (__split_data)
__device__ SplitParams __split_param_data;
# define kernel_split_params (__split_param_data)
-#endif /* __KERNEL_CUDA__ */
+#endif /* __KERNEL_CUDA__ */
-#define kernel_split_sd(sd, ray_index) ((ShaderData*) \
- ( \
- ((ccl_global char*)kernel_split_state._##sd) + \
- (sizeof(ShaderData) + sizeof(ShaderClosure)*(kernel_data.integrator.max_closures-1)) * (ray_index) \
- ))
+#define kernel_split_sd(sd, ray_index) \
+ ((ShaderData *)(((ccl_global char *)kernel_split_state._##sd) + \
+ (sizeof(ShaderData) + \
+ sizeof(ShaderClosure) * (kernel_data.integrator.max_closures - 1)) * \
+ (ray_index)))
/* Local storage for queue_enqueue kernel. */
typedef struct QueueEnqueueLocals {
- uint queue_atomics[2];
+ uint queue_atomics[2];
} QueueEnqueueLocals;
/* Local storage for holdout_emission_blurring_pathtermination_ao kernel. */
typedef struct BackgroundAOLocals {
- uint queue_atomics_bg;
- uint queue_atomics_ao;
+ uint queue_atomics_bg;
+ uint queue_atomics_ao;
} BackgroundAOLocals;
typedef struct ShaderSortLocals {
- uint local_value[SHADER_SORT_BLOCK_SIZE];
- ushort local_index[SHADER_SORT_BLOCK_SIZE];
+ uint local_value[SHADER_SORT_BLOCK_SIZE];
+ ushort local_index[SHADER_SORT_BLOCK_SIZE];
} ShaderSortLocals;
CCL_NAMESPACE_END
-#endif /* __KERNEL_SPLIT_DATA_TYPES_H__ */
+#endif /* __KERNEL_SPLIT_DATA_TYPES_H__ */
diff --git a/intern/cycles/kernel/split/kernel_subsurface_scatter.h b/intern/cycles/kernel/split/kernel_subsurface_scatter.h
index 08769fe303b..ba06ae3bc53 100644
--- a/intern/cycles/kernel/split/kernel_subsurface_scatter.h
+++ b/intern/cycles/kernel/split/kernel_subsurface_scatter.h
@@ -18,276 +18,247 @@ CCL_NAMESPACE_BEGIN
#if defined(__BRANCHED_PATH__) && defined(__SUBSURFACE__)
-ccl_device_inline void kernel_split_branched_path_subsurface_indirect_light_init(KernelGlobals *kg, int ray_index)
+ccl_device_inline void kernel_split_branched_path_subsurface_indirect_light_init(KernelGlobals *kg,
+ int ray_index)
{
- kernel_split_branched_path_indirect_loop_init(kg, ray_index);
+ kernel_split_branched_path_indirect_loop_init(kg, ray_index);
- SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
+ SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
- branched_state->ss_next_closure = 0;
- branched_state->ss_next_sample = 0;
+ branched_state->ss_next_closure = 0;
+ branched_state->ss_next_sample = 0;
- branched_state->num_hits = 0;
- branched_state->next_hit = 0;
+ branched_state->num_hits = 0;
+ branched_state->next_hit = 0;
- ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_SUBSURFACE_INDIRECT);
+ ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_SUBSURFACE_INDIRECT);
}
-ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_iter(KernelGlobals *kg, int ray_index)
+ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_iter(
+ KernelGlobals *kg, int ray_index)
{
- SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
-
- ShaderData *sd = kernel_split_sd(branched_state_sd, ray_index);
- PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
- ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
-
- for(int i = branched_state->ss_next_closure; i < sd->num_closure; i++) {
- ShaderClosure *sc = &sd->closure[i];
-
- if(!CLOSURE_IS_BSSRDF(sc->type))
- continue;
-
- /* Closure memory will be overwritten, so read required variables now. */
- Bssrdf *bssrdf = (Bssrdf *)sc;
- ClosureType bssrdf_type = sc->type;
- float bssrdf_roughness = bssrdf->roughness;
-
- /* set up random number generator */
- if(branched_state->ss_next_sample == 0 && branched_state->next_hit == 0 &&
- branched_state->next_closure == 0 && branched_state->next_sample == 0)
- {
- branched_state->lcg_state = lcg_state_init_addrspace(&branched_state->path_state,
- 0x68bc21eb);
- }
- int num_samples = kernel_data.integrator.subsurface_samples * 3;
- float num_samples_inv = 1.0f/num_samples;
- uint bssrdf_rng_hash = cmj_hash(branched_state->path_state.rng_hash, i);
-
- /* do subsurface scatter step with copy of shader data, this will
- * replace the BSSRDF with a diffuse BSDF closure */
- for(int j = branched_state->ss_next_sample; j < num_samples; j++) {
- ccl_global PathState *hit_state = &kernel_split_state.path_state[ray_index];
- *hit_state = branched_state->path_state;
- hit_state->rng_hash = bssrdf_rng_hash;
- path_state_branch(hit_state, j, num_samples);
-
- ccl_global LocalIntersection *ss_isect = &branched_state->ss_isect;
- float bssrdf_u, bssrdf_v;
- path_branched_rng_2D(kg,
- bssrdf_rng_hash,
- hit_state,
- j,
- num_samples,
- PRNG_BSDF_U,
- &bssrdf_u,
- &bssrdf_v);
-
- /* intersection is expensive so avoid doing multiple times for the same input */
- if(branched_state->next_hit == 0 && branched_state->next_closure == 0 && branched_state->next_sample == 0) {
- uint lcg_state = branched_state->lcg_state;
- LocalIntersection ss_isect_private;
-
- branched_state->num_hits = subsurface_scatter_multi_intersect(kg,
- &ss_isect_private,
- sd,
- hit_state,
- sc,
- &lcg_state,
- bssrdf_u, bssrdf_v,
- true);
-
- branched_state->lcg_state = lcg_state;
- *ss_isect = ss_isect_private;
- }
-
- hit_state->rng_offset += PRNG_BOUNCE_NUM;
-
-#ifdef __VOLUME__
- Ray volume_ray = branched_state->ray;
- bool need_update_volume_stack =
- kernel_data.integrator.use_volumes &&
- sd->object_flag & SD_OBJECT_INTERSECTS_VOLUME;
-#endif /* __VOLUME__ */
-
- /* compute lighting with the BSDF closure */
- for(int hit = branched_state->next_hit; hit < branched_state->num_hits; hit++) {
- ShaderData *bssrdf_sd = kernel_split_sd(sd, ray_index);
- *bssrdf_sd = *sd; /* note: copy happens each iteration of inner loop, this is
- * important as the indirect path will write into bssrdf_sd */
-
- LocalIntersection ss_isect_private = *ss_isect;
- subsurface_scatter_multi_setup(kg,
- &ss_isect_private,
- hit,
- bssrdf_sd,
- hit_state,
- bssrdf_type,
- bssrdf_roughness);
- *ss_isect = ss_isect_private;
-
-#ifdef __VOLUME__
- if(need_update_volume_stack) {
- /* Setup ray from previous surface point to the new one. */
- float3 P = ray_offset(bssrdf_sd->P, -bssrdf_sd->Ng);
- volume_ray.D = normalize_len(P - volume_ray.P, &volume_ray.t);
-
- for(int k = 0; k < VOLUME_STACK_SIZE; k++) {
- hit_state->volume_stack[k] = branched_state->path_state.volume_stack[k];
- }
-
- kernel_volume_stack_update_for_subsurface(kg,
- emission_sd,
- &volume_ray,
- hit_state->volume_stack);
- }
-#endif /* __VOLUME__ */
-
-#ifdef __EMISSION__
- if(branched_state->next_closure == 0 && branched_state->next_sample == 0) {
- /* direct light */
- if(kernel_data.integrator.use_direct_light) {
- int all = (kernel_data.integrator.sample_all_lights_direct) ||
- (hit_state->flag & PATH_RAY_SHADOW_CATCHER);
- kernel_branched_path_surface_connect_light(kg,
- bssrdf_sd,
- emission_sd,
- hit_state,
- branched_state->throughput,
- num_samples_inv,
- L,
- all);
- }
- }
-#endif /* __EMISSION__ */
-
- /* indirect light */
- if(kernel_split_branched_path_surface_indirect_light_iter(kg,
- ray_index,
- num_samples_inv,
- bssrdf_sd,
- false,
- false))
- {
- branched_state->ss_next_closure = i;
- branched_state->ss_next_sample = j;
- branched_state->next_hit = hit;
-
- return true;
- }
-
- branched_state->next_closure = 0;
- }
-
- branched_state->next_hit = 0;
- }
-
- branched_state->ss_next_sample = 0;
- }
-
- branched_state->ss_next_closure = sd->num_closure;
-
- branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0);
- if(branched_state->waiting_on_shared_samples) {
- return true;
- }
-
- kernel_split_branched_path_indirect_loop_end(kg, ray_index);
-
- return false;
+ SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
+
+ ShaderData *sd = kernel_split_sd(branched_state_sd, ray_index);
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
+
+ for (int i = branched_state->ss_next_closure; i < sd->num_closure; i++) {
+ ShaderClosure *sc = &sd->closure[i];
+
+ if (!CLOSURE_IS_BSSRDF(sc->type))
+ continue;
+
+ /* Closure memory will be overwritten, so read required variables now. */
+ Bssrdf *bssrdf = (Bssrdf *)sc;
+ ClosureType bssrdf_type = sc->type;
+ float bssrdf_roughness = bssrdf->roughness;
+
+ /* set up random number generator */
+ if (branched_state->ss_next_sample == 0 && branched_state->next_hit == 0 &&
+ branched_state->next_closure == 0 && branched_state->next_sample == 0) {
+ branched_state->lcg_state = lcg_state_init_addrspace(&branched_state->path_state,
+ 0x68bc21eb);
+ }
+ int num_samples = kernel_data.integrator.subsurface_samples * 3;
+ float num_samples_inv = 1.0f / num_samples;
+ uint bssrdf_rng_hash = cmj_hash(branched_state->path_state.rng_hash, i);
+
+ /* do subsurface scatter step with copy of shader data, this will
+ * replace the BSSRDF with a diffuse BSDF closure */
+ for (int j = branched_state->ss_next_sample; j < num_samples; j++) {
+ ccl_global PathState *hit_state = &kernel_split_state.path_state[ray_index];
+ *hit_state = branched_state->path_state;
+ hit_state->rng_hash = bssrdf_rng_hash;
+ path_state_branch(hit_state, j, num_samples);
+
+ ccl_global LocalIntersection *ss_isect = &branched_state->ss_isect;
+ float bssrdf_u, bssrdf_v;
+ path_branched_rng_2D(
+ kg, bssrdf_rng_hash, hit_state, j, num_samples, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
+
+ /* intersection is expensive so avoid doing multiple times for the same input */
+ if (branched_state->next_hit == 0 && branched_state->next_closure == 0 &&
+ branched_state->next_sample == 0) {
+ uint lcg_state = branched_state->lcg_state;
+ LocalIntersection ss_isect_private;
+
+ branched_state->num_hits = subsurface_scatter_multi_intersect(
+ kg, &ss_isect_private, sd, hit_state, sc, &lcg_state, bssrdf_u, bssrdf_v, true);
+
+ branched_state->lcg_state = lcg_state;
+ *ss_isect = ss_isect_private;
+ }
+
+ hit_state->rng_offset += PRNG_BOUNCE_NUM;
+
+# ifdef __VOLUME__
+ Ray volume_ray = branched_state->ray;
+ bool need_update_volume_stack = kernel_data.integrator.use_volumes &&
+ sd->object_flag & SD_OBJECT_INTERSECTS_VOLUME;
+# endif /* __VOLUME__ */
+
+ /* compute lighting with the BSDF closure */
+ for (int hit = branched_state->next_hit; hit < branched_state->num_hits; hit++) {
+ ShaderData *bssrdf_sd = kernel_split_sd(sd, ray_index);
+ *bssrdf_sd = *sd; /* note: copy happens each iteration of inner loop, this is
+ * important as the indirect path will write into bssrdf_sd */
+
+ LocalIntersection ss_isect_private = *ss_isect;
+ subsurface_scatter_multi_setup(
+ kg, &ss_isect_private, hit, bssrdf_sd, hit_state, bssrdf_type, bssrdf_roughness);
+ *ss_isect = ss_isect_private;
+
+# ifdef __VOLUME__
+ if (need_update_volume_stack) {
+ /* Setup ray from previous surface point to the new one. */
+ float3 P = ray_offset(bssrdf_sd->P, -bssrdf_sd->Ng);
+ volume_ray.D = normalize_len(P - volume_ray.P, &volume_ray.t);
+
+ for (int k = 0; k < VOLUME_STACK_SIZE; k++) {
+ hit_state->volume_stack[k] = branched_state->path_state.volume_stack[k];
+ }
+
+ kernel_volume_stack_update_for_subsurface(
+ kg, emission_sd, &volume_ray, hit_state->volume_stack);
+ }
+# endif /* __VOLUME__ */
+
+# ifdef __EMISSION__
+ if (branched_state->next_closure == 0 && branched_state->next_sample == 0) {
+ /* direct light */
+ if (kernel_data.integrator.use_direct_light) {
+ int all = (kernel_data.integrator.sample_all_lights_direct) ||
+ (hit_state->flag & PATH_RAY_SHADOW_CATCHER);
+ kernel_branched_path_surface_connect_light(kg,
+ bssrdf_sd,
+ emission_sd,
+ hit_state,
+ branched_state->throughput,
+ num_samples_inv,
+ L,
+ all);
+ }
+ }
+# endif /* __EMISSION__ */
+
+ /* indirect light */
+ if (kernel_split_branched_path_surface_indirect_light_iter(
+ kg, ray_index, num_samples_inv, bssrdf_sd, false, false)) {
+ branched_state->ss_next_closure = i;
+ branched_state->ss_next_sample = j;
+ branched_state->next_hit = hit;
+
+ return true;
+ }
+
+ branched_state->next_closure = 0;
+ }
+
+ branched_state->next_hit = 0;
+ }
+
+ branched_state->ss_next_sample = 0;
+ }
+
+ branched_state->ss_next_closure = sd->num_closure;
+
+ branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0);
+ if (branched_state->waiting_on_shared_samples) {
+ return true;
+ }
+
+ kernel_split_branched_path_indirect_loop_end(kg, ray_index);
+
+ return false;
}
-#endif /* __BRANCHED_PATH__ && __SUBSURFACE__ */
+#endif /* __BRANCHED_PATH__ && __SUBSURFACE__ */
ccl_device void kernel_subsurface_scatter(KernelGlobals *kg)
{
- int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
- if(thread_index == 0) {
- /* We will empty both queues in this kernel. */
- kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
- kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0;
- }
-
- int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
- ray_index = get_ray_index(kg, ray_index,
- QUEUE_ACTIVE_AND_REGENERATED_RAYS,
- kernel_split_state.queue_data,
- kernel_split_params.queue_size,
- 1);
- get_ray_index(kg, thread_index,
- QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
- kernel_split_state.queue_data,
- kernel_split_params.queue_size,
- 1);
+ int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ if (thread_index == 0) {
+ /* We will empty both queues in this kernel. */
+ kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
+ kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0;
+ }
+
+ int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ ray_index = get_ray_index(kg,
+ ray_index,
+ QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 1);
+ get_ray_index(kg,
+ thread_index,
+ QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 1);
#ifdef __SUBSURFACE__
- ccl_global char *ray_state = kernel_split_state.ray_state;
-
- if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
- ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
- PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
- ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
- ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
- ccl_global SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
- ShaderData *sd = kernel_split_sd(sd, ray_index);
- ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
-
- if(sd->flag & SD_BSSRDF) {
-
-#ifdef __BRANCHED_PATH__
- if(!kernel_data.integrator.branched ||
- IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT))
- {
-#endif
- if(kernel_path_subsurface_scatter(kg,
- sd,
- emission_sd,
- L,
- state,
- ray,
- throughput,
- ss_indirect))
- {
- kernel_split_path_end(kg, ray_index);
- }
-#ifdef __BRANCHED_PATH__
- }
- else {
- kernel_split_branched_path_subsurface_indirect_light_init(kg, ray_index);
-
- if(kernel_split_branched_path_subsurface_indirect_light_iter(kg, ray_index)) {
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
- }
- }
-#endif
- }
- }
+ ccl_global char *ray_state = kernel_split_state.ray_state;
+
+ if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+ ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+ ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
+ ccl_global SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
+ ShaderData *sd = kernel_split_sd(sd, ray_index);
+ ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
+
+ if (sd->flag & SD_BSSRDF) {
# ifdef __BRANCHED_PATH__
- if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
- kernel_split_params.queue_index[QUEUE_SUBSURFACE_INDIRECT_ITER] = 0;
- }
-
- /* iter loop */
- ray_index = get_ray_index(kg, ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0),
- QUEUE_SUBSURFACE_INDIRECT_ITER,
- kernel_split_state.queue_data,
- kernel_split_params.queue_size,
- 1);
-
- if(IS_STATE(ray_state, ray_index, RAY_SUBSURFACE_INDIRECT_NEXT_ITER)) {
- /* for render passes, sum and reset indirect light pass variables
- * for the next samples */
- path_radiance_sum_indirect(&kernel_split_state.path_radiance[ray_index]);
- path_radiance_reset_indirect(&kernel_split_state.path_radiance[ray_index]);
-
- if(kernel_split_branched_path_subsurface_indirect_light_iter(kg, ray_index)) {
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
- }
- }
-# endif /* __BRANCHED_PATH__ */
-
-#endif /* __SUBSURFACE__ */
+ if (!kernel_data.integrator.branched ||
+ IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
+# endif
+ if (kernel_path_subsurface_scatter(
+ kg, sd, emission_sd, L, state, ray, throughput, ss_indirect)) {
+ kernel_split_path_end(kg, ray_index);
+ }
+# ifdef __BRANCHED_PATH__
+ }
+ else {
+ kernel_split_branched_path_subsurface_indirect_light_init(kg, ray_index);
+
+ if (kernel_split_branched_path_subsurface_indirect_light_iter(kg, ray_index)) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+ }
+ }
+# endif
+ }
+ }
+# ifdef __BRANCHED_PATH__
+ if (ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
+ kernel_split_params.queue_index[QUEUE_SUBSURFACE_INDIRECT_ITER] = 0;
+ }
+
+ /* iter loop */
+ ray_index = get_ray_index(kg,
+ ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0),
+ QUEUE_SUBSURFACE_INDIRECT_ITER,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 1);
+
+ if (IS_STATE(ray_state, ray_index, RAY_SUBSURFACE_INDIRECT_NEXT_ITER)) {
+ /* for render passes, sum and reset indirect light pass variables
+ * for the next samples */
+ path_radiance_sum_indirect(&kernel_split_state.path_radiance[ray_index]);
+ path_radiance_reset_indirect(&kernel_split_state.path_radiance[ray_index]);
+
+ if (kernel_split_branched_path_subsurface_indirect_light_iter(kg, ray_index)) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+ }
+ }
+# endif /* __BRANCHED_PATH__ */
+
+#endif /* __SUBSURFACE__ */
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h
index ccb9aef7a5b..4a386afa5de 100644
--- a/intern/cycles/kernel/svm/svm.h
+++ b/intern/cycles/kernel/svm/svm.h
@@ -46,92 +46,102 @@ CCL_NAMESPACE_BEGIN
ccl_device_inline float3 stack_load_float3(float *stack, uint a)
{
- kernel_assert(a+2 < SVM_STACK_SIZE);
+ kernel_assert(a + 2 < SVM_STACK_SIZE);
- return make_float3(stack[a+0], stack[a+1], stack[a+2]);
+ return make_float3(stack[a + 0], stack[a + 1], stack[a + 2]);
}
ccl_device_inline void stack_store_float3(float *stack, uint a, float3 f)
{
- kernel_assert(a+2 < SVM_STACK_SIZE);
+ kernel_assert(a + 2 < SVM_STACK_SIZE);
- stack[a+0] = f.x;
- stack[a+1] = f.y;
- stack[a+2] = f.z;
+ stack[a + 0] = f.x;
+ stack[a + 1] = f.y;
+ stack[a + 2] = f.z;
}
ccl_device_inline float stack_load_float(float *stack, uint a)
{
- kernel_assert(a < SVM_STACK_SIZE);
+ kernel_assert(a < SVM_STACK_SIZE);
- return stack[a];
+ return stack[a];
}
ccl_device_inline float stack_load_float_default(float *stack, uint a, uint value)
{
- return (a == (uint)SVM_STACK_INVALID)? __uint_as_float(value): stack_load_float(stack, a);
+ return (a == (uint)SVM_STACK_INVALID) ? __uint_as_float(value) : stack_load_float(stack, a);
}
ccl_device_inline void stack_store_float(float *stack, uint a, float f)
{
- kernel_assert(a < SVM_STACK_SIZE);
+ kernel_assert(a < SVM_STACK_SIZE);
- stack[a] = f;
+ stack[a] = f;
}
ccl_device_inline int stack_load_int(float *stack, uint a)
{
- kernel_assert(a < SVM_STACK_SIZE);
+ kernel_assert(a < SVM_STACK_SIZE);
- return __float_as_int(stack[a]);
+ return __float_as_int(stack[a]);
}
ccl_device_inline int stack_load_int_default(float *stack, uint a, uint value)
{
- return (a == (uint)SVM_STACK_INVALID)? (int)value: stack_load_int(stack, a);
+ return (a == (uint)SVM_STACK_INVALID) ? (int)value : stack_load_int(stack, a);
}
ccl_device_inline void stack_store_int(float *stack, uint a, int i)
{
- kernel_assert(a < SVM_STACK_SIZE);
+ kernel_assert(a < SVM_STACK_SIZE);
- stack[a] = __int_as_float(i);
+ stack[a] = __int_as_float(i);
}
ccl_device_inline bool stack_valid(uint a)
{
- return a != (uint)SVM_STACK_INVALID;
+ return a != (uint)SVM_STACK_INVALID;
}
/* Reading Nodes */
ccl_device_inline uint4 read_node(KernelGlobals *kg, int *offset)
{
- uint4 node = kernel_tex_fetch(__svm_nodes, *offset);
- (*offset)++;
- return node;
+ uint4 node = kernel_tex_fetch(__svm_nodes, *offset);
+ (*offset)++;
+ return node;
}
ccl_device_inline float4 read_node_float(KernelGlobals *kg, int *offset)
{
- uint4 node = kernel_tex_fetch(__svm_nodes, *offset);
- float4 f = make_float4(__uint_as_float(node.x), __uint_as_float(node.y), __uint_as_float(node.z), __uint_as_float(node.w));
- (*offset)++;
- return f;
+ uint4 node = kernel_tex_fetch(__svm_nodes, *offset);
+ float4 f = make_float4(__uint_as_float(node.x),
+ __uint_as_float(node.y),
+ __uint_as_float(node.z),
+ __uint_as_float(node.w));
+ (*offset)++;
+ return f;
}
ccl_device_inline float4 fetch_node_float(KernelGlobals *kg, int offset)
{
- uint4 node = kernel_tex_fetch(__svm_nodes, offset);
- return make_float4(__uint_as_float(node.x), __uint_as_float(node.y), __uint_as_float(node.z), __uint_as_float(node.w));
+ uint4 node = kernel_tex_fetch(__svm_nodes, offset);
+ return make_float4(__uint_as_float(node.x),
+ __uint_as_float(node.y),
+ __uint_as_float(node.z),
+ __uint_as_float(node.w));
}
ccl_device_inline void decode_node_uchar4(uint i, uint *x, uint *y, uint *z, uint *w)
{
- if(x) *x = (i & 0xFF);
- if(y) *y = ((i >> 8) & 0xFF);
- if(z) *z = ((i >> 16) & 0xFF);
- if(w) *w = ((i >> 24) & 0xFF);
+ if (x)
+ *x = (i & 0xFF);
+ if (y)
+ *y = ((i >> 8) & 0xFF);
+ if (z)
+ *z = ((i >> 16) & 0xFF);
+ if (w)
+ *w = ((i >> 24) & 0xFF);
}
CCL_NAMESPACE_END
@@ -194,302 +204,310 @@ CCL_NAMESPACE_BEGIN
#define NODES_FEATURE(feature) ((__NODES_FEATURES__ & (feature)) != 0)
/* Main Interpreter Loop */
-ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state, ShaderType type, int path_flag)
+ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg,
+ ShaderData *sd,
+ ccl_addr_space PathState *state,
+ ShaderType type,
+ int path_flag)
{
- float stack[SVM_STACK_SIZE];
- int offset = sd->shader & SHADER_MASK;
+ float stack[SVM_STACK_SIZE];
+ int offset = sd->shader & SHADER_MASK;
- while(1) {
- uint4 node = read_node(kg, &offset);
+ while (1) {
+ uint4 node = read_node(kg, &offset);
- switch(node.x) {
+ switch (node.x) {
#if NODES_GROUP(NODE_GROUP_LEVEL_0)
- case NODE_SHADER_JUMP: {
- if(type == SHADER_TYPE_SURFACE) offset = node.y;
- else if(type == SHADER_TYPE_VOLUME) offset = node.z;
- else if(type == SHADER_TYPE_DISPLACEMENT) offset = node.w;
- else return;
- break;
- }
- case NODE_CLOSURE_BSDF:
- svm_node_closure_bsdf(kg, sd, stack, node, type, path_flag, &offset);
- break;
- case NODE_CLOSURE_EMISSION:
- svm_node_closure_emission(sd, stack, node);
- break;
- case NODE_CLOSURE_BACKGROUND:
- svm_node_closure_background(sd, stack, node);
- break;
- case NODE_CLOSURE_SET_WEIGHT:
- svm_node_closure_set_weight(sd, node.y, node.z, node.w);
- break;
- case NODE_CLOSURE_WEIGHT:
- svm_node_closure_weight(sd, stack, node.y);
- break;
- case NODE_EMISSION_WEIGHT:
- svm_node_emission_weight(kg, sd, stack, node);
- break;
- case NODE_MIX_CLOSURE:
- svm_node_mix_closure(sd, stack, node);
- break;
- case NODE_JUMP_IF_ZERO:
- if(stack_load_float(stack, node.z) == 0.0f)
- offset += node.y;
- break;
- case NODE_JUMP_IF_ONE:
- if(stack_load_float(stack, node.z) == 1.0f)
- offset += node.y;
- break;
- case NODE_GEOMETRY:
- svm_node_geometry(kg, sd, stack, node.y, node.z);
- break;
- case NODE_CONVERT:
- svm_node_convert(kg, sd, stack, node.y, node.z, node.w);
- break;
- case NODE_TEX_COORD:
- svm_node_tex_coord(kg, sd, path_flag, stack, node, &offset);
- break;
- case NODE_VALUE_F:
- svm_node_value_f(kg, sd, stack, node.y, node.z);
- break;
- case NODE_VALUE_V:
- svm_node_value_v(kg, sd, stack, node.y, &offset);
- break;
- case NODE_ATTR:
- svm_node_attr(kg, sd, stack, node);
- break;
+ case NODE_SHADER_JUMP: {
+ if (type == SHADER_TYPE_SURFACE)
+ offset = node.y;
+ else if (type == SHADER_TYPE_VOLUME)
+ offset = node.z;
+ else if (type == SHADER_TYPE_DISPLACEMENT)
+ offset = node.w;
+ else
+ return;
+ break;
+ }
+ case NODE_CLOSURE_BSDF:
+ svm_node_closure_bsdf(kg, sd, stack, node, type, path_flag, &offset);
+ break;
+ case NODE_CLOSURE_EMISSION:
+ svm_node_closure_emission(sd, stack, node);
+ break;
+ case NODE_CLOSURE_BACKGROUND:
+ svm_node_closure_background(sd, stack, node);
+ break;
+ case NODE_CLOSURE_SET_WEIGHT:
+ svm_node_closure_set_weight(sd, node.y, node.z, node.w);
+ break;
+ case NODE_CLOSURE_WEIGHT:
+ svm_node_closure_weight(sd, stack, node.y);
+ break;
+ case NODE_EMISSION_WEIGHT:
+ svm_node_emission_weight(kg, sd, stack, node);
+ break;
+ case NODE_MIX_CLOSURE:
+ svm_node_mix_closure(sd, stack, node);
+ break;
+ case NODE_JUMP_IF_ZERO:
+ if (stack_load_float(stack, node.z) == 0.0f)
+ offset += node.y;
+ break;
+ case NODE_JUMP_IF_ONE:
+ if (stack_load_float(stack, node.z) == 1.0f)
+ offset += node.y;
+ break;
+ case NODE_GEOMETRY:
+ svm_node_geometry(kg, sd, stack, node.y, node.z);
+ break;
+ case NODE_CONVERT:
+ svm_node_convert(kg, sd, stack, node.y, node.z, node.w);
+ break;
+ case NODE_TEX_COORD:
+ svm_node_tex_coord(kg, sd, path_flag, stack, node, &offset);
+ break;
+ case NODE_VALUE_F:
+ svm_node_value_f(kg, sd, stack, node.y, node.z);
+ break;
+ case NODE_VALUE_V:
+ svm_node_value_v(kg, sd, stack, node.y, &offset);
+ break;
+ case NODE_ATTR:
+ svm_node_attr(kg, sd, stack, node);
+ break;
# if NODES_FEATURE(NODE_FEATURE_BUMP)
- case NODE_GEOMETRY_BUMP_DX:
- svm_node_geometry_bump_dx(kg, sd, stack, node.y, node.z);
- break;
- case NODE_GEOMETRY_BUMP_DY:
- svm_node_geometry_bump_dy(kg, sd, stack, node.y, node.z);
- break;
- case NODE_SET_DISPLACEMENT:
- svm_node_set_displacement(kg, sd, stack, node.y);
- break;
- case NODE_DISPLACEMENT:
- svm_node_displacement(kg, sd, stack, node);
- break;
- case NODE_VECTOR_DISPLACEMENT:
- svm_node_vector_displacement(kg, sd, stack, node, &offset);
- break;
-# endif /* NODES_FEATURE(NODE_FEATURE_BUMP) */
+ case NODE_GEOMETRY_BUMP_DX:
+ svm_node_geometry_bump_dx(kg, sd, stack, node.y, node.z);
+ break;
+ case NODE_GEOMETRY_BUMP_DY:
+ svm_node_geometry_bump_dy(kg, sd, stack, node.y, node.z);
+ break;
+ case NODE_SET_DISPLACEMENT:
+ svm_node_set_displacement(kg, sd, stack, node.y);
+ break;
+ case NODE_DISPLACEMENT:
+ svm_node_displacement(kg, sd, stack, node);
+ break;
+ case NODE_VECTOR_DISPLACEMENT:
+ svm_node_vector_displacement(kg, sd, stack, node, &offset);
+ break;
+# endif /* NODES_FEATURE(NODE_FEATURE_BUMP) */
# ifdef __TEXTURES__
- case NODE_TEX_IMAGE:
- svm_node_tex_image(kg, sd, stack, node);
- break;
- case NODE_TEX_IMAGE_BOX:
- svm_node_tex_image_box(kg, sd, stack, node);
- break;
- case NODE_TEX_NOISE:
- svm_node_tex_noise(kg, sd, stack, node, &offset);
- break;
-# endif /* __TEXTURES__ */
+ case NODE_TEX_IMAGE:
+ svm_node_tex_image(kg, sd, stack, node);
+ break;
+ case NODE_TEX_IMAGE_BOX:
+ svm_node_tex_image_box(kg, sd, stack, node);
+ break;
+ case NODE_TEX_NOISE:
+ svm_node_tex_noise(kg, sd, stack, node, &offset);
+ break;
+# endif /* __TEXTURES__ */
# ifdef __EXTRA_NODES__
# if NODES_FEATURE(NODE_FEATURE_BUMP)
- case NODE_SET_BUMP:
- svm_node_set_bump(kg, sd, stack, node);
- break;
- case NODE_ATTR_BUMP_DX:
- svm_node_attr_bump_dx(kg, sd, stack, node);
- break;
- case NODE_ATTR_BUMP_DY:
- svm_node_attr_bump_dy(kg, sd, stack, node);
- break;
- case NODE_TEX_COORD_BUMP_DX:
- svm_node_tex_coord_bump_dx(kg, sd, path_flag, stack, node, &offset);
- break;
- case NODE_TEX_COORD_BUMP_DY:
- svm_node_tex_coord_bump_dy(kg, sd, path_flag, stack, node, &offset);
- break;
- case NODE_CLOSURE_SET_NORMAL:
- svm_node_set_normal(kg, sd, stack, node.y, node.z);
- break;
+ case NODE_SET_BUMP:
+ svm_node_set_bump(kg, sd, stack, node);
+ break;
+ case NODE_ATTR_BUMP_DX:
+ svm_node_attr_bump_dx(kg, sd, stack, node);
+ break;
+ case NODE_ATTR_BUMP_DY:
+ svm_node_attr_bump_dy(kg, sd, stack, node);
+ break;
+ case NODE_TEX_COORD_BUMP_DX:
+ svm_node_tex_coord_bump_dx(kg, sd, path_flag, stack, node, &offset);
+ break;
+ case NODE_TEX_COORD_BUMP_DY:
+ svm_node_tex_coord_bump_dy(kg, sd, path_flag, stack, node, &offset);
+ break;
+ case NODE_CLOSURE_SET_NORMAL:
+ svm_node_set_normal(kg, sd, stack, node.y, node.z);
+ break;
# if NODES_FEATURE(NODE_FEATURE_BUMP_STATE)
- case NODE_ENTER_BUMP_EVAL:
- svm_node_enter_bump_eval(kg, sd, stack, node.y);
- break;
- case NODE_LEAVE_BUMP_EVAL:
- svm_node_leave_bump_eval(kg, sd, stack, node.y);
- break;
-# endif /* NODES_FEATURE(NODE_FEATURE_BUMP_STATE) */
-# endif /* NODES_FEATURE(NODE_FEATURE_BUMP) */
- case NODE_HSV:
- svm_node_hsv(kg, sd, stack, node, &offset);
- break;
-# endif /* __EXTRA_NODES__ */
-#endif /* NODES_GROUP(NODE_GROUP_LEVEL_0) */
+ case NODE_ENTER_BUMP_EVAL:
+ svm_node_enter_bump_eval(kg, sd, stack, node.y);
+ break;
+ case NODE_LEAVE_BUMP_EVAL:
+ svm_node_leave_bump_eval(kg, sd, stack, node.y);
+ break;
+# endif /* NODES_FEATURE(NODE_FEATURE_BUMP_STATE) */
+# endif /* NODES_FEATURE(NODE_FEATURE_BUMP) */
+ case NODE_HSV:
+ svm_node_hsv(kg, sd, stack, node, &offset);
+ break;
+# endif /* __EXTRA_NODES__ */
+#endif /* NODES_GROUP(NODE_GROUP_LEVEL_0) */
#if NODES_GROUP(NODE_GROUP_LEVEL_1)
- case NODE_CLOSURE_HOLDOUT:
- svm_node_closure_holdout(sd, stack, node);
- break;
- case NODE_FRESNEL:
- svm_node_fresnel(sd, stack, node.y, node.z, node.w);
- break;
- case NODE_LAYER_WEIGHT:
- svm_node_layer_weight(sd, stack, node);
- break;
+ case NODE_CLOSURE_HOLDOUT:
+ svm_node_closure_holdout(sd, stack, node);
+ break;
+ case NODE_FRESNEL:
+ svm_node_fresnel(sd, stack, node.y, node.z, node.w);
+ break;
+ case NODE_LAYER_WEIGHT:
+ svm_node_layer_weight(sd, stack, node);
+ break;
# if NODES_FEATURE(NODE_FEATURE_VOLUME)
- case NODE_CLOSURE_VOLUME:
- svm_node_closure_volume(kg, sd, stack, node, type);
- break;
- case NODE_PRINCIPLED_VOLUME:
- svm_node_principled_volume(kg, sd, stack, node, type, path_flag, &offset);
- break;
-# endif /* NODES_FEATURE(NODE_FEATURE_VOLUME) */
+ case NODE_CLOSURE_VOLUME:
+ svm_node_closure_volume(kg, sd, stack, node, type);
+ break;
+ case NODE_PRINCIPLED_VOLUME:
+ svm_node_principled_volume(kg, sd, stack, node, type, path_flag, &offset);
+ break;
+# endif /* NODES_FEATURE(NODE_FEATURE_VOLUME) */
# ifdef __EXTRA_NODES__
- case NODE_MATH:
- svm_node_math(kg, sd, stack, node.y, node.z, node.w, &offset);
- break;
- case NODE_VECTOR_MATH:
- svm_node_vector_math(kg, sd, stack, node.y, node.z, node.w, &offset);
- break;
- case NODE_RGB_RAMP:
- svm_node_rgb_ramp(kg, sd, stack, node, &offset);
- break;
- case NODE_GAMMA:
- svm_node_gamma(sd, stack, node.y, node.z, node.w);
- break;
- case NODE_BRIGHTCONTRAST:
- svm_node_brightness(sd, stack, node.y, node.z, node.w);
- break;
- case NODE_LIGHT_PATH:
- svm_node_light_path(sd, state, stack, node.y, node.z, path_flag);
- break;
- case NODE_OBJECT_INFO:
- svm_node_object_info(kg, sd, stack, node.y, node.z);
- break;
- case NODE_PARTICLE_INFO:
- svm_node_particle_info(kg, sd, stack, node.y, node.z);
- break;
+ case NODE_MATH:
+ svm_node_math(kg, sd, stack, node.y, node.z, node.w, &offset);
+ break;
+ case NODE_VECTOR_MATH:
+ svm_node_vector_math(kg, sd, stack, node.y, node.z, node.w, &offset);
+ break;
+ case NODE_RGB_RAMP:
+ svm_node_rgb_ramp(kg, sd, stack, node, &offset);
+ break;
+ case NODE_GAMMA:
+ svm_node_gamma(sd, stack, node.y, node.z, node.w);
+ break;
+ case NODE_BRIGHTCONTRAST:
+ svm_node_brightness(sd, stack, node.y, node.z, node.w);
+ break;
+ case NODE_LIGHT_PATH:
+ svm_node_light_path(sd, state, stack, node.y, node.z, path_flag);
+ break;
+ case NODE_OBJECT_INFO:
+ svm_node_object_info(kg, sd, stack, node.y, node.z);
+ break;
+ case NODE_PARTICLE_INFO:
+ svm_node_particle_info(kg, sd, stack, node.y, node.z);
+ break;
# ifdef __HAIR__
# if NODES_FEATURE(NODE_FEATURE_HAIR)
- case NODE_HAIR_INFO:
- svm_node_hair_info(kg, sd, stack, node.y, node.z);
- break;
-# endif /* NODES_FEATURE(NODE_FEATURE_HAIR) */
-# endif /* __HAIR__ */
-# endif /* __EXTRA_NODES__ */
-#endif /* NODES_GROUP(NODE_GROUP_LEVEL_1) */
+ case NODE_HAIR_INFO:
+ svm_node_hair_info(kg, sd, stack, node.y, node.z);
+ break;
+# endif /* NODES_FEATURE(NODE_FEATURE_HAIR) */
+# endif /* __HAIR__ */
+# endif /* __EXTRA_NODES__ */
+#endif /* NODES_GROUP(NODE_GROUP_LEVEL_1) */
#if NODES_GROUP(NODE_GROUP_LEVEL_2)
- case NODE_MAPPING:
- svm_node_mapping(kg, sd, stack, node.y, node.z, &offset);
- break;
- case NODE_MIN_MAX:
- svm_node_min_max(kg, sd, stack, node.y, node.z, &offset);
- break;
- case NODE_CAMERA:
- svm_node_camera(kg, sd, stack, node.y, node.z, node.w);
- break;
+ case NODE_MAPPING:
+ svm_node_mapping(kg, sd, stack, node.y, node.z, &offset);
+ break;
+ case NODE_MIN_MAX:
+ svm_node_min_max(kg, sd, stack, node.y, node.z, &offset);
+ break;
+ case NODE_CAMERA:
+ svm_node_camera(kg, sd, stack, node.y, node.z, node.w);
+ break;
# ifdef __TEXTURES__
- case NODE_TEX_ENVIRONMENT:
- svm_node_tex_environment(kg, sd, stack, node);
- break;
- case NODE_TEX_SKY:
- svm_node_tex_sky(kg, sd, stack, node, &offset);
- break;
- case NODE_TEX_GRADIENT:
- svm_node_tex_gradient(sd, stack, node);
- break;
- case NODE_TEX_VORONOI:
- svm_node_tex_voronoi(kg, sd, stack, node, &offset);
- break;
- case NODE_TEX_MUSGRAVE:
- svm_node_tex_musgrave(kg, sd, stack, node, &offset);
- break;
- case NODE_TEX_WAVE:
- svm_node_tex_wave(kg, sd, stack, node, &offset);
- break;
- case NODE_TEX_MAGIC:
- svm_node_tex_magic(kg, sd, stack, node, &offset);
- break;
- case NODE_TEX_CHECKER:
- svm_node_tex_checker(kg, sd, stack, node);
- break;
- case NODE_TEX_BRICK:
- svm_node_tex_brick(kg, sd, stack, node, &offset);
- break;
-# endif /* __TEXTURES__ */
+ case NODE_TEX_ENVIRONMENT:
+ svm_node_tex_environment(kg, sd, stack, node);
+ break;
+ case NODE_TEX_SKY:
+ svm_node_tex_sky(kg, sd, stack, node, &offset);
+ break;
+ case NODE_TEX_GRADIENT:
+ svm_node_tex_gradient(sd, stack, node);
+ break;
+ case NODE_TEX_VORONOI:
+ svm_node_tex_voronoi(kg, sd, stack, node, &offset);
+ break;
+ case NODE_TEX_MUSGRAVE:
+ svm_node_tex_musgrave(kg, sd, stack, node, &offset);
+ break;
+ case NODE_TEX_WAVE:
+ svm_node_tex_wave(kg, sd, stack, node, &offset);
+ break;
+ case NODE_TEX_MAGIC:
+ svm_node_tex_magic(kg, sd, stack, node, &offset);
+ break;
+ case NODE_TEX_CHECKER:
+ svm_node_tex_checker(kg, sd, stack, node);
+ break;
+ case NODE_TEX_BRICK:
+ svm_node_tex_brick(kg, sd, stack, node, &offset);
+ break;
+# endif /* __TEXTURES__ */
# ifdef __EXTRA_NODES__
- case NODE_NORMAL:
- svm_node_normal(kg, sd, stack, node.y, node.z, node.w, &offset);
- break;
- case NODE_LIGHT_FALLOFF:
- svm_node_light_falloff(sd, stack, node);
- break;
- case NODE_IES:
- svm_node_ies(kg, sd, stack, node, &offset);
- break;
-# endif /* __EXTRA_NODES__ */
-#endif /* NODES_GROUP(NODE_GROUP_LEVEL_2) */
+ case NODE_NORMAL:
+ svm_node_normal(kg, sd, stack, node.y, node.z, node.w, &offset);
+ break;
+ case NODE_LIGHT_FALLOFF:
+ svm_node_light_falloff(sd, stack, node);
+ break;
+ case NODE_IES:
+ svm_node_ies(kg, sd, stack, node, &offset);
+ break;
+# endif /* __EXTRA_NODES__ */
+#endif /* NODES_GROUP(NODE_GROUP_LEVEL_2) */
#if NODES_GROUP(NODE_GROUP_LEVEL_3)
- case NODE_RGB_CURVES:
- case NODE_VECTOR_CURVES:
- svm_node_curves(kg, sd, stack, node, &offset);
- break;
- case NODE_TANGENT:
- svm_node_tangent(kg, sd, stack, node);
- break;
- case NODE_NORMAL_MAP:
- svm_node_normal_map(kg, sd, stack, node);
- break;
+ case NODE_RGB_CURVES:
+ case NODE_VECTOR_CURVES:
+ svm_node_curves(kg, sd, stack, node, &offset);
+ break;
+ case NODE_TANGENT:
+ svm_node_tangent(kg, sd, stack, node);
+ break;
+ case NODE_NORMAL_MAP:
+ svm_node_normal_map(kg, sd, stack, node);
+ break;
# ifdef __EXTRA_NODES__
- case NODE_INVERT:
- svm_node_invert(sd, stack, node.y, node.z, node.w);
- break;
- case NODE_MIX:
- svm_node_mix(kg, sd, stack, node.y, node.z, node.w, &offset);
- break;
- case NODE_SEPARATE_VECTOR:
- svm_node_separate_vector(sd, stack, node.y, node.z, node.w);
- break;
- case NODE_COMBINE_VECTOR:
- svm_node_combine_vector(sd, stack, node.y, node.z, node.w);
- break;
- case NODE_SEPARATE_HSV:
- svm_node_separate_hsv(kg, sd, stack, node.y, node.z, node.w, &offset);
- break;
- case NODE_COMBINE_HSV:
- svm_node_combine_hsv(kg, sd, stack, node.y, node.z, node.w, &offset);
- break;
- case NODE_VECTOR_TRANSFORM:
- svm_node_vector_transform(kg, sd, stack, node);
- break;
- case NODE_WIREFRAME:
- svm_node_wireframe(kg, sd, stack, node);
- break;
- case NODE_WAVELENGTH:
- svm_node_wavelength(kg, sd, stack, node.y, node.z);
- break;
- case NODE_BLACKBODY:
- svm_node_blackbody(kg, sd, stack, node.y, node.z);
- break;
-# endif /* __EXTRA_NODES__ */
+ case NODE_INVERT:
+ svm_node_invert(sd, stack, node.y, node.z, node.w);
+ break;
+ case NODE_MIX:
+ svm_node_mix(kg, sd, stack, node.y, node.z, node.w, &offset);
+ break;
+ case NODE_SEPARATE_VECTOR:
+ svm_node_separate_vector(sd, stack, node.y, node.z, node.w);
+ break;
+ case NODE_COMBINE_VECTOR:
+ svm_node_combine_vector(sd, stack, node.y, node.z, node.w);
+ break;
+ case NODE_SEPARATE_HSV:
+ svm_node_separate_hsv(kg, sd, stack, node.y, node.z, node.w, &offset);
+ break;
+ case NODE_COMBINE_HSV:
+ svm_node_combine_hsv(kg, sd, stack, node.y, node.z, node.w, &offset);
+ break;
+ case NODE_VECTOR_TRANSFORM:
+ svm_node_vector_transform(kg, sd, stack, node);
+ break;
+ case NODE_WIREFRAME:
+ svm_node_wireframe(kg, sd, stack, node);
+ break;
+ case NODE_WAVELENGTH:
+ svm_node_wavelength(kg, sd, stack, node.y, node.z);
+ break;
+ case NODE_BLACKBODY:
+ svm_node_blackbody(kg, sd, stack, node.y, node.z);
+ break;
+# endif /* __EXTRA_NODES__ */
# if NODES_FEATURE(NODE_FEATURE_VOLUME)
- case NODE_TEX_VOXEL:
- svm_node_tex_voxel(kg, sd, stack, node, &offset);
- break;
-# endif /* NODES_FEATURE(NODE_FEATURE_VOLUME) */
+ case NODE_TEX_VOXEL:
+ svm_node_tex_voxel(kg, sd, stack, node, &offset);
+ break;
+# endif /* NODES_FEATURE(NODE_FEATURE_VOLUME) */
# ifdef __SHADER_RAYTRACE__
- case NODE_BEVEL:
- svm_node_bevel(kg, sd, state, stack, node);
- break;
- case NODE_AMBIENT_OCCLUSION:
- svm_node_ao(kg, sd, state, stack, node);
- break;
-# endif /* __SHADER_RAYTRACE__ */
-#endif /* NODES_GROUP(NODE_GROUP_LEVEL_3) */
- case NODE_END:
- return;
- default:
- kernel_assert(!"Unknown node type was passed to the SVM machine");
- return;
- }
- }
+ case NODE_BEVEL:
+ svm_node_bevel(kg, sd, state, stack, node);
+ break;
+ case NODE_AMBIENT_OCCLUSION:
+ svm_node_ao(kg, sd, state, stack, node);
+ break;
+# endif /* __SHADER_RAYTRACE__ */
+#endif /* NODES_GROUP(NODE_GROUP_LEVEL_3) */
+ case NODE_END:
+ return;
+ default:
+ kernel_assert(!"Unknown node type was passed to the SVM machine");
+ return;
+ }
+ }
}
#undef NODES_GROUP
@@ -497,4 +515,4 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ccl_a
CCL_NAMESPACE_END
-#endif /* __SVM_H__ */
+#endif /* __SVM_H__ */
diff --git a/intern/cycles/kernel/svm/svm_ao.h b/intern/cycles/kernel/svm/svm_ao.h
index 0744ec1768f..06076175c40 100644
--- a/intern/cycles/kernel/svm/svm_ao.h
+++ b/intern/cycles/kernel/svm/svm_ao.h
@@ -24,95 +24,82 @@ ccl_device_noinline float svm_ao(KernelGlobals *kg,
int num_samples,
int flags)
{
- if(flags & NODE_AO_GLOBAL_RADIUS) {
- max_dist = kernel_data.background.ao_distance;
- }
-
- /* Early out if no sampling needed. */
- if(max_dist <= 0.0f || num_samples < 1 || sd->object == OBJECT_NONE) {
- return 1.0f;
- }
-
- /* Can't raytrace from shaders like displacement, before BVH exists. */
- if (kernel_data.bvh.bvh_layout == BVH_LAYOUT_NONE) {
- return 1.0f;
- }
-
- if(flags & NODE_AO_INSIDE) {
- N = -N;
- }
-
- float3 T, B;
- make_orthonormals(N, &T, &B);
-
- int unoccluded = 0;
- for(int sample = 0; sample < num_samples; sample++) {
- float disk_u, disk_v;
- path_branched_rng_2D(kg, state->rng_hash, state, sample, num_samples,
- PRNG_BEVEL_U, &disk_u, &disk_v);
-
- float2 d = concentric_sample_disk(disk_u, disk_v);
- float3 D = make_float3(d.x, d.y, safe_sqrtf(1.0f - dot(d, d)));
-
- /* Create ray. */
- Ray ray;
- ray.P = ray_offset(sd->P, N);
- ray.D = D.x*T + D.y*B + D.z*N;
- ray.t = max_dist;
- ray.time = sd->time;
- ray.dP = sd->dP;
- ray.dD = differential3_zero();
-
- if(flags & NODE_AO_ONLY_LOCAL) {
- if(!scene_intersect_local(kg,
- ray,
- NULL,
- sd->object,
- NULL,
- 0)) {
- unoccluded++;
- }
- }
- else {
- Intersection isect;
- if(!scene_intersect(kg,
- ray,
- PATH_RAY_SHADOW_OPAQUE,
- &isect,
- NULL,
- 0.0f, 0.0f)) {
- unoccluded++;
- }
- }
- }
-
- return ((float) unoccluded) / num_samples;
+ if (flags & NODE_AO_GLOBAL_RADIUS) {
+ max_dist = kernel_data.background.ao_distance;
+ }
+
+ /* Early out if no sampling needed. */
+ if (max_dist <= 0.0f || num_samples < 1 || sd->object == OBJECT_NONE) {
+ return 1.0f;
+ }
+
+ /* Can't raytrace from shaders like displacement, before BVH exists. */
+ if (kernel_data.bvh.bvh_layout == BVH_LAYOUT_NONE) {
+ return 1.0f;
+ }
+
+ if (flags & NODE_AO_INSIDE) {
+ N = -N;
+ }
+
+ float3 T, B;
+ make_orthonormals(N, &T, &B);
+
+ int unoccluded = 0;
+ for (int sample = 0; sample < num_samples; sample++) {
+ float disk_u, disk_v;
+ path_branched_rng_2D(
+ kg, state->rng_hash, state, sample, num_samples, PRNG_BEVEL_U, &disk_u, &disk_v);
+
+ float2 d = concentric_sample_disk(disk_u, disk_v);
+ float3 D = make_float3(d.x, d.y, safe_sqrtf(1.0f - dot(d, d)));
+
+ /* Create ray. */
+ Ray ray;
+ ray.P = ray_offset(sd->P, N);
+ ray.D = D.x * T + D.y * B + D.z * N;
+ ray.t = max_dist;
+ ray.time = sd->time;
+ ray.dP = sd->dP;
+ ray.dD = differential3_zero();
+
+ if (flags & NODE_AO_ONLY_LOCAL) {
+ if (!scene_intersect_local(kg, ray, NULL, sd->object, NULL, 0)) {
+ unoccluded++;
+ }
+ }
+ else {
+ Intersection isect;
+ if (!scene_intersect(kg, ray, PATH_RAY_SHADOW_OPAQUE, &isect, NULL, 0.0f, 0.0f)) {
+ unoccluded++;
+ }
+ }
+ }
+
+ return ((float)unoccluded) / num_samples;
}
-ccl_device void svm_node_ao(KernelGlobals *kg,
- ShaderData *sd,
- ccl_addr_space PathState *state,
- float *stack,
- uint4 node)
+ccl_device void svm_node_ao(
+ KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state, float *stack, uint4 node)
{
- uint flags, dist_offset, normal_offset, out_ao_offset;
- decode_node_uchar4(node.y, &flags, &dist_offset, &normal_offset, &out_ao_offset);
+ uint flags, dist_offset, normal_offset, out_ao_offset;
+ decode_node_uchar4(node.y, &flags, &dist_offset, &normal_offset, &out_ao_offset);
- uint color_offset, out_color_offset, samples;
- decode_node_uchar4(node.z, &color_offset, &out_color_offset, &samples, NULL);
+ uint color_offset, out_color_offset, samples;
+ decode_node_uchar4(node.z, &color_offset, &out_color_offset, &samples, NULL);
- float dist = stack_load_float_default(stack, dist_offset, node.w);
- float3 normal = stack_valid(normal_offset)? stack_load_float3(stack, normal_offset): sd->N;
- float ao = svm_ao(kg, sd, normal, state, dist, samples, flags);
+ float dist = stack_load_float_default(stack, dist_offset, node.w);
+ float3 normal = stack_valid(normal_offset) ? stack_load_float3(stack, normal_offset) : sd->N;
+ float ao = svm_ao(kg, sd, normal, state, dist, samples, flags);
- if(stack_valid(out_ao_offset)) {
- stack_store_float(stack, out_ao_offset, ao);
- }
+ if (stack_valid(out_ao_offset)) {
+ stack_store_float(stack, out_ao_offset, ao);
+ }
- if(stack_valid(out_color_offset)) {
- float3 color = stack_load_float3(stack, color_offset);
- stack_store_float3(stack, out_color_offset, ao * color);
- }
+ if (stack_valid(out_color_offset)) {
+ float3 color = stack_load_float3(stack, color_offset);
+ stack_store_float3(stack, out_color_offset, ao * color);
+ }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_attribute.h b/intern/cycles/kernel/svm/svm_attribute.h
index c2366df71d0..a67cfe91a30 100644
--- a/intern/cycles/kernel/svm/svm_attribute.h
+++ b/intern/cycles/kernel/svm/svm_attribute.h
@@ -18,67 +18,66 @@ CCL_NAMESPACE_BEGIN
/* Attribute Node */
-ccl_device AttributeDescriptor svm_node_attr_init(KernelGlobals *kg, ShaderData *sd,
- uint4 node, NodeAttributeType *type,
- uint *out_offset)
+ccl_device AttributeDescriptor svm_node_attr_init(
+ KernelGlobals *kg, ShaderData *sd, uint4 node, NodeAttributeType *type, uint *out_offset)
{
- *out_offset = node.z;
- *type = (NodeAttributeType)node.w;
+ *out_offset = node.z;
+ *type = (NodeAttributeType)node.w;
- AttributeDescriptor desc;
+ AttributeDescriptor desc;
- if(sd->object != OBJECT_NONE) {
- desc = find_attribute(kg, sd, node.y);
- if(desc.offset == ATTR_STD_NOT_FOUND) {
- desc = attribute_not_found();
- desc.offset = 0;
- desc.type = (NodeAttributeType)node.w;
- }
- }
- else {
- /* background */
- desc = attribute_not_found();
- desc.offset = 0;
- desc.type = (NodeAttributeType)node.w;
- }
+ if (sd->object != OBJECT_NONE) {
+ desc = find_attribute(kg, sd, node.y);
+ if (desc.offset == ATTR_STD_NOT_FOUND) {
+ desc = attribute_not_found();
+ desc.offset = 0;
+ desc.type = (NodeAttributeType)node.w;
+ }
+ }
+ else {
+ /* background */
+ desc = attribute_not_found();
+ desc.offset = 0;
+ desc.type = (NodeAttributeType)node.w;
+ }
- return desc;
+ return desc;
}
ccl_device void svm_node_attr(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
{
- NodeAttributeType type;
- uint out_offset;
- AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset);
+ NodeAttributeType type;
+ uint out_offset;
+ AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset);
- /* fetch and store attribute */
- if(desc.type == NODE_ATTR_FLOAT) {
- float f = primitive_attribute_float(kg, sd, desc, NULL, NULL);
- if(type == NODE_ATTR_FLOAT) {
- stack_store_float(stack, out_offset, f);
- }
- else {
- stack_store_float3(stack, out_offset, make_float3(f, f, f));
- }
- }
- else if(desc.type == NODE_ATTR_FLOAT2) {
- float2 f = primitive_attribute_float2(kg, sd, desc, NULL, NULL);
- if(type == NODE_ATTR_FLOAT) {
- stack_store_float(stack, out_offset, f.x);
- }
- else {
- stack_store_float3(stack, out_offset, make_float3(f.x, f.y, 0.0f));
- }
- }
- else {
- float3 f = primitive_attribute_float3(kg, sd, desc, NULL, NULL);
- if(type == NODE_ATTR_FLOAT) {
- stack_store_float(stack, out_offset, average(f));
- }
- else {
- stack_store_float3(stack, out_offset, f);
- }
- }
+ /* fetch and store attribute */
+ if (desc.type == NODE_ATTR_FLOAT) {
+ float f = primitive_attribute_float(kg, sd, desc, NULL, NULL);
+ if (type == NODE_ATTR_FLOAT) {
+ stack_store_float(stack, out_offset, f);
+ }
+ else {
+ stack_store_float3(stack, out_offset, make_float3(f, f, f));
+ }
+ }
+ else if (desc.type == NODE_ATTR_FLOAT2) {
+ float2 f = primitive_attribute_float2(kg, sd, desc, NULL, NULL);
+ if (type == NODE_ATTR_FLOAT) {
+ stack_store_float(stack, out_offset, f.x);
+ }
+ else {
+ stack_store_float3(stack, out_offset, make_float3(f.x, f.y, 0.0f));
+ }
+ }
+ else {
+ float3 f = primitive_attribute_float3(kg, sd, desc, NULL, NULL);
+ if (type == NODE_ATTR_FLOAT) {
+ stack_store_float(stack, out_offset, average(f));
+ }
+ else {
+ stack_store_float3(stack, out_offset, f);
+ }
+ }
}
#ifndef __KERNEL_CUDA__
@@ -86,43 +85,44 @@ ccl_device
#else
ccl_device_noinline
#endif
-void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
+ void
+ svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
{
- NodeAttributeType type;
- uint out_offset;
- AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset);
+ NodeAttributeType type;
+ uint out_offset;
+ AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset);
- /* fetch and store attribute */
- if(desc.type == NODE_ATTR_FLOAT) {
- float dx;
- float f = primitive_surface_attribute_float(kg, sd, desc, &dx, NULL);
- if(type == NODE_ATTR_FLOAT) {
- stack_store_float(stack, out_offset, f+dx);
- }
- else {
- stack_store_float3(stack, out_offset, make_float3(f+dx, f+dx, f+dx));
- }
- }
- else if(desc.type == NODE_ATTR_FLOAT2) {
- float2 dx;
- float2 f = primitive_attribute_float2(kg, sd, desc, &dx, NULL);
- if (type == NODE_ATTR_FLOAT) {
- stack_store_float(stack, out_offset, f.x + dx.x);
- }
- else {
- stack_store_float3(stack, out_offset, make_float3(f.x+dx.x, f.y+dx.y, 0.0f));
- }
- }
- else {
- float3 dx;
- float3 f = primitive_surface_attribute_float3(kg, sd, desc, &dx, NULL);
- if(type == NODE_ATTR_FLOAT) {
- stack_store_float(stack, out_offset, average(f+dx));
- }
- else {
- stack_store_float3(stack, out_offset, f+dx);
- }
- }
+ /* fetch and store attribute */
+ if (desc.type == NODE_ATTR_FLOAT) {
+ float dx;
+ float f = primitive_surface_attribute_float(kg, sd, desc, &dx, NULL);
+ if (type == NODE_ATTR_FLOAT) {
+ stack_store_float(stack, out_offset, f + dx);
+ }
+ else {
+ stack_store_float3(stack, out_offset, make_float3(f + dx, f + dx, f + dx));
+ }
+ }
+ else if (desc.type == NODE_ATTR_FLOAT2) {
+ float2 dx;
+ float2 f = primitive_attribute_float2(kg, sd, desc, &dx, NULL);
+ if (type == NODE_ATTR_FLOAT) {
+ stack_store_float(stack, out_offset, f.x + dx.x);
+ }
+ else {
+ stack_store_float3(stack, out_offset, make_float3(f.x + dx.x, f.y + dx.y, 0.0f));
+ }
+ }
+ else {
+ float3 dx;
+ float3 f = primitive_surface_attribute_float3(kg, sd, desc, &dx, NULL);
+ if (type == NODE_ATTR_FLOAT) {
+ stack_store_float(stack, out_offset, average(f + dx));
+ }
+ else {
+ stack_store_float3(stack, out_offset, f + dx);
+ }
+ }
}
#ifndef __KERNEL_CUDA__
@@ -130,46 +130,44 @@ ccl_device
#else
ccl_device_noinline
#endif
-void svm_node_attr_bump_dy(KernelGlobals *kg,
- ShaderData *sd,
- float *stack,
- uint4 node)
+ void
+ svm_node_attr_bump_dy(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
{
- NodeAttributeType type;
- uint out_offset;
- AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset);
+ NodeAttributeType type;
+ uint out_offset;
+ AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset);
- /* fetch and store attribute */
- if(desc.type == NODE_ATTR_FLOAT) {
- float dy;
- float f = primitive_surface_attribute_float(kg, sd, desc, NULL, &dy);
- if(type == NODE_ATTR_FLOAT) {
- stack_store_float(stack, out_offset, f+dy);
- }
- else {
- stack_store_float3(stack, out_offset, make_float3(f+dy, f+dy, f+dy));
- }
- }
- else if(desc.type == NODE_ATTR_FLOAT2) {
- float2 dy;
- float2 f = primitive_attribute_float2(kg, sd, desc, NULL, &dy);
- if(type == NODE_ATTR_FLOAT) {
- stack_store_float(stack, out_offset, f.x + dy.x);
- }
- else {
- stack_store_float3(stack, out_offset, make_float3(f.x+dy.x, f.y+dy.y, 0.0f));
- }
- }
- else {
- float3 dy;
- float3 f = primitive_surface_attribute_float3(kg, sd, desc, NULL, &dy);
- if(type == NODE_ATTR_FLOAT) {
- stack_store_float(stack, out_offset, average(f+dy));
- }
- else {
- stack_store_float3(stack, out_offset, f+dy);
- }
- }
+ /* fetch and store attribute */
+ if (desc.type == NODE_ATTR_FLOAT) {
+ float dy;
+ float f = primitive_surface_attribute_float(kg, sd, desc, NULL, &dy);
+ if (type == NODE_ATTR_FLOAT) {
+ stack_store_float(stack, out_offset, f + dy);
+ }
+ else {
+ stack_store_float3(stack, out_offset, make_float3(f + dy, f + dy, f + dy));
+ }
+ }
+ else if (desc.type == NODE_ATTR_FLOAT2) {
+ float2 dy;
+ float2 f = primitive_attribute_float2(kg, sd, desc, NULL, &dy);
+ if (type == NODE_ATTR_FLOAT) {
+ stack_store_float(stack, out_offset, f.x + dy.x);
+ }
+ else {
+ stack_store_float3(stack, out_offset, make_float3(f.x + dy.x, f.y + dy.y, 0.0f));
+ }
+ }
+ else {
+ float3 dy;
+ float3 f = primitive_surface_attribute_float3(kg, sd, desc, NULL, &dy);
+ if (type == NODE_ATTR_FLOAT) {
+ stack_store_float(stack, out_offset, average(f + dy));
+ }
+ else {
+ stack_store_float3(stack, out_offset, f + dy);
+ }
+ }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_bevel.h b/intern/cycles/kernel/svm/svm_bevel.h
index b5bb9df422b..fcf28e96e98 100644
--- a/intern/cycles/kernel/svm/svm_bevel.h
+++ b/intern/cycles/kernel/svm/svm_bevel.h
@@ -22,215 +22,196 @@ CCL_NAMESPACE_BEGIN
* http://library.imageworks.com/pdfs/imageworks-library-BSSRDF-sampling.pdf
*/
-ccl_device_noinline float3 svm_bevel(
- KernelGlobals *kg,
- ShaderData *sd,
- ccl_addr_space PathState *state,
- float radius,
- int num_samples)
+ccl_device_noinline float3 svm_bevel(KernelGlobals *kg,
+ ShaderData *sd,
+ ccl_addr_space PathState *state,
+ float radius,
+ int num_samples)
{
- /* Early out if no sampling needed. */
- if(radius <= 0.0f || num_samples < 1 || sd->object == OBJECT_NONE) {
- return sd->N;
- }
-
- /* Can't raytrace from shaders like displacement, before BVH exists. */
- if (kernel_data.bvh.bvh_layout == BVH_LAYOUT_NONE) {
- return sd->N;
- }
-
- /* Don't bevel for blurry indirect rays. */
- if(state->min_ray_pdf < 8.0f) {
- return sd->N;
- }
-
- /* Setup for multi intersection. */
- LocalIntersection isect;
- uint lcg_state = lcg_state_init_addrspace(state, 0x64c6a40e);
-
- /* Sample normals from surrounding points on surface. */
- float3 sum_N = make_float3(0.0f, 0.0f, 0.0f);
-
- for(int sample = 0; sample < num_samples; sample++) {
- float disk_u, disk_v;
- path_branched_rng_2D(kg, state->rng_hash, state, sample, num_samples,
- PRNG_BEVEL_U, &disk_u, &disk_v);
-
- /* Pick random axis in local frame and point on disk. */
- float3 disk_N, disk_T, disk_B;
- float pick_pdf_N, pick_pdf_T, pick_pdf_B;
-
- disk_N = sd->Ng;
- make_orthonormals(disk_N, &disk_T, &disk_B);
-
- float axisu = disk_u;
-
- if(axisu < 0.5f) {
- pick_pdf_N = 0.5f;
- pick_pdf_T = 0.25f;
- pick_pdf_B = 0.25f;
- disk_u *= 2.0f;
- }
- else if(axisu < 0.75f) {
- float3 tmp = disk_N;
- disk_N = disk_T;
- disk_T = tmp;
- pick_pdf_N = 0.25f;
- pick_pdf_T = 0.5f;
- pick_pdf_B = 0.25f;
- disk_u = (disk_u - 0.5f)*4.0f;
- }
- else {
- float3 tmp = disk_N;
- disk_N = disk_B;
- disk_B = tmp;
- pick_pdf_N = 0.25f;
- pick_pdf_T = 0.25f;
- pick_pdf_B = 0.5f;
- disk_u = (disk_u - 0.75f)*4.0f;
- }
-
- /* Sample point on disk. */
- float phi = M_2PI_F * disk_u;
- float disk_r = disk_v;
- float disk_height;
-
- /* Perhaps find something better than Cubic BSSRDF, but happens to work well. */
- bssrdf_cubic_sample(radius, 0.0f, disk_r, &disk_r, &disk_height);
-
- float3 disk_P = (disk_r*cosf(phi)) * disk_T + (disk_r*sinf(phi)) * disk_B;
-
- /* Create ray. */
- Ray *ray = &isect.ray;
- ray->P = sd->P + disk_N*disk_height + disk_P;
- ray->D = -disk_N;
- ray->t = 2.0f*disk_height;
- ray->dP = sd->dP;
- ray->dD = differential3_zero();
- ray->time = sd->time;
-
- /* Intersect with the same object. if multiple intersections are found it
- * will use at most LOCAL_MAX_HITS hits, a random subset of all hits. */
- scene_intersect_local(kg,
- *ray,
- &isect,
- sd->object,
- &lcg_state,
- LOCAL_MAX_HITS);
-
- int num_eval_hits = min(isect.num_hits, LOCAL_MAX_HITS);
-
- for(int hit = 0; hit < num_eval_hits; hit++) {
- /* Quickly retrieve P and Ng without setting up ShaderData. */
- float3 hit_P;
- if(sd->type & PRIMITIVE_TRIANGLE) {
- hit_P = triangle_refine_local(kg,
- sd,
- &isect.hits[hit],
- ray);
- }
+ /* Early out if no sampling needed. */
+ if (radius <= 0.0f || num_samples < 1 || sd->object == OBJECT_NONE) {
+ return sd->N;
+ }
+
+ /* Can't raytrace from shaders like displacement, before BVH exists. */
+ if (kernel_data.bvh.bvh_layout == BVH_LAYOUT_NONE) {
+ return sd->N;
+ }
+
+ /* Don't bevel for blurry indirect rays. */
+ if (state->min_ray_pdf < 8.0f) {
+ return sd->N;
+ }
+
+ /* Setup for multi intersection. */
+ LocalIntersection isect;
+ uint lcg_state = lcg_state_init_addrspace(state, 0x64c6a40e);
+
+ /* Sample normals from surrounding points on surface. */
+ float3 sum_N = make_float3(0.0f, 0.0f, 0.0f);
+
+ for (int sample = 0; sample < num_samples; sample++) {
+ float disk_u, disk_v;
+ path_branched_rng_2D(
+ kg, state->rng_hash, state, sample, num_samples, PRNG_BEVEL_U, &disk_u, &disk_v);
+
+ /* Pick random axis in local frame and point on disk. */
+ float3 disk_N, disk_T, disk_B;
+ float pick_pdf_N, pick_pdf_T, pick_pdf_B;
+
+ disk_N = sd->Ng;
+ make_orthonormals(disk_N, &disk_T, &disk_B);
+
+ float axisu = disk_u;
+
+ if (axisu < 0.5f) {
+ pick_pdf_N = 0.5f;
+ pick_pdf_T = 0.25f;
+ pick_pdf_B = 0.25f;
+ disk_u *= 2.0f;
+ }
+ else if (axisu < 0.75f) {
+ float3 tmp = disk_N;
+ disk_N = disk_T;
+ disk_T = tmp;
+ pick_pdf_N = 0.25f;
+ pick_pdf_T = 0.5f;
+ pick_pdf_B = 0.25f;
+ disk_u = (disk_u - 0.5f) * 4.0f;
+ }
+ else {
+ float3 tmp = disk_N;
+ disk_N = disk_B;
+ disk_B = tmp;
+ pick_pdf_N = 0.25f;
+ pick_pdf_T = 0.25f;
+ pick_pdf_B = 0.5f;
+ disk_u = (disk_u - 0.75f) * 4.0f;
+ }
+
+ /* Sample point on disk. */
+ float phi = M_2PI_F * disk_u;
+ float disk_r = disk_v;
+ float disk_height;
+
+ /* Perhaps find something better than Cubic BSSRDF, but happens to work well. */
+ bssrdf_cubic_sample(radius, 0.0f, disk_r, &disk_r, &disk_height);
+
+ float3 disk_P = (disk_r * cosf(phi)) * disk_T + (disk_r * sinf(phi)) * disk_B;
+
+ /* Create ray. */
+ Ray *ray = &isect.ray;
+ ray->P = sd->P + disk_N * disk_height + disk_P;
+ ray->D = -disk_N;
+ ray->t = 2.0f * disk_height;
+ ray->dP = sd->dP;
+ ray->dD = differential3_zero();
+ ray->time = sd->time;
+
+ /* Intersect with the same object. if multiple intersections are found it
+ * will use at most LOCAL_MAX_HITS hits, a random subset of all hits. */
+ scene_intersect_local(kg, *ray, &isect, sd->object, &lcg_state, LOCAL_MAX_HITS);
+
+ int num_eval_hits = min(isect.num_hits, LOCAL_MAX_HITS);
+
+ for (int hit = 0; hit < num_eval_hits; hit++) {
+ /* Quickly retrieve P and Ng without setting up ShaderData. */
+ float3 hit_P;
+ if (sd->type & PRIMITIVE_TRIANGLE) {
+ hit_P = triangle_refine_local(kg, sd, &isect.hits[hit], ray);
+ }
#ifdef __OBJECT_MOTION__
- else if(sd->type & PRIMITIVE_MOTION_TRIANGLE) {
- float3 verts[3];
- motion_triangle_vertices(
- kg,
- sd->object,
- kernel_tex_fetch(__prim_index, isect.hits[hit].prim),
- sd->time,
- verts);
- hit_P = motion_triangle_refine_local(kg,
- sd,
- &isect.hits[hit],
- ray,
- verts);
- }
-#endif /* __OBJECT_MOTION__ */
-
- /* Get geometric normal. */
- float3 hit_Ng = isect.Ng[hit];
- int object = (isect.hits[hit].object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, isect.hits[hit].prim): isect.hits[hit].object;
- int object_flag = kernel_tex_fetch(__object_flag, object);
- if(object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) {
- hit_Ng = -hit_Ng;
- }
-
- /* Compute smooth normal. */
- float3 N = hit_Ng;
- int prim = kernel_tex_fetch(__prim_index, isect.hits[hit].prim);
- int shader = kernel_tex_fetch(__tri_shader, prim);
-
- if(shader & SHADER_SMOOTH_NORMAL) {
- float u = isect.hits[hit].u;
- float v = isect.hits[hit].v;
-
- if(sd->type & PRIMITIVE_TRIANGLE) {
- N = triangle_smooth_normal(kg, N, prim, u, v);
- }
+ else if (sd->type & PRIMITIVE_MOTION_TRIANGLE) {
+ float3 verts[3];
+ motion_triangle_vertices(
+ kg, sd->object, kernel_tex_fetch(__prim_index, isect.hits[hit].prim), sd->time, verts);
+ hit_P = motion_triangle_refine_local(kg, sd, &isect.hits[hit], ray, verts);
+ }
+#endif /* __OBJECT_MOTION__ */
+
+ /* Get geometric normal. */
+ float3 hit_Ng = isect.Ng[hit];
+ int object = (isect.hits[hit].object == OBJECT_NONE) ?
+ kernel_tex_fetch(__prim_object, isect.hits[hit].prim) :
+ isect.hits[hit].object;
+ int object_flag = kernel_tex_fetch(__object_flag, object);
+ if (object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) {
+ hit_Ng = -hit_Ng;
+ }
+
+ /* Compute smooth normal. */
+ float3 N = hit_Ng;
+ int prim = kernel_tex_fetch(__prim_index, isect.hits[hit].prim);
+ int shader = kernel_tex_fetch(__tri_shader, prim);
+
+ if (shader & SHADER_SMOOTH_NORMAL) {
+ float u = isect.hits[hit].u;
+ float v = isect.hits[hit].v;
+
+ if (sd->type & PRIMITIVE_TRIANGLE) {
+ N = triangle_smooth_normal(kg, N, prim, u, v);
+ }
#ifdef __OBJECT_MOTION__
- else if(sd->type & PRIMITIVE_MOTION_TRIANGLE) {
- N = motion_triangle_smooth_normal(kg, N, sd->object, prim, u, v, sd->time);
- }
-#endif /* __OBJECT_MOTION__ */
- }
-
- /* Transform normals to world space. */
- if(!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
- object_normal_transform(kg, sd, &N);
- object_normal_transform(kg, sd, &hit_Ng);
- }
-
- /* Probability densities for local frame axes. */
- float pdf_N = pick_pdf_N * fabsf(dot(disk_N, hit_Ng));
- float pdf_T = pick_pdf_T * fabsf(dot(disk_T, hit_Ng));
- float pdf_B = pick_pdf_B * fabsf(dot(disk_B, hit_Ng));
-
- /* Multiple importance sample between 3 axes, power heuristic
- * found to be slightly better than balance heuristic. pdf_N
- * in the MIS weight and denominator cancelled out. */
- float w = pdf_N / (sqr(pdf_N) + sqr(pdf_T) + sqr(pdf_B));
- if(isect.num_hits > LOCAL_MAX_HITS) {
- w *= isect.num_hits/(float)LOCAL_MAX_HITS;
- }
-
- /* Real distance to sampled point. */
- float r = len(hit_P - sd->P);
-
- /* Compute weight. */
- float pdf = bssrdf_cubic_pdf(radius, 0.0f, r);
- float disk_pdf = bssrdf_cubic_pdf(radius, 0.0f, disk_r);
-
- w *= pdf / disk_pdf;
-
- /* Sum normal and weight. */
- sum_N += w * N;
- }
- }
-
- /* Normalize. */
- float3 N = safe_normalize(sum_N);
- return is_zero(N) ? sd->N : (sd->flag & SD_BACKFACING) ? -N : N;
+ else if (sd->type & PRIMITIVE_MOTION_TRIANGLE) {
+ N = motion_triangle_smooth_normal(kg, N, sd->object, prim, u, v, sd->time);
+ }
+#endif /* __OBJECT_MOTION__ */
+ }
+
+ /* Transform normals to world space. */
+ if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+ object_normal_transform(kg, sd, &N);
+ object_normal_transform(kg, sd, &hit_Ng);
+ }
+
+ /* Probability densities for local frame axes. */
+ float pdf_N = pick_pdf_N * fabsf(dot(disk_N, hit_Ng));
+ float pdf_T = pick_pdf_T * fabsf(dot(disk_T, hit_Ng));
+ float pdf_B = pick_pdf_B * fabsf(dot(disk_B, hit_Ng));
+
+ /* Multiple importance sample between 3 axes, power heuristic
+ * found to be slightly better than balance heuristic. pdf_N
+ * in the MIS weight and denominator cancelled out. */
+ float w = pdf_N / (sqr(pdf_N) + sqr(pdf_T) + sqr(pdf_B));
+ if (isect.num_hits > LOCAL_MAX_HITS) {
+ w *= isect.num_hits / (float)LOCAL_MAX_HITS;
+ }
+
+ /* Real distance to sampled point. */
+ float r = len(hit_P - sd->P);
+
+ /* Compute weight. */
+ float pdf = bssrdf_cubic_pdf(radius, 0.0f, r);
+ float disk_pdf = bssrdf_cubic_pdf(radius, 0.0f, disk_r);
+
+ w *= pdf / disk_pdf;
+
+ /* Sum normal and weight. */
+ sum_N += w * N;
+ }
+ }
+
+ /* Normalize. */
+ float3 N = safe_normalize(sum_N);
+ return is_zero(N) ? sd->N : (sd->flag & SD_BACKFACING) ? -N : N;
}
ccl_device void svm_node_bevel(
- KernelGlobals *kg,
- ShaderData *sd,
- ccl_addr_space PathState *state,
- float *stack,
- uint4 node)
+ KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state, float *stack, uint4 node)
{
- uint num_samples, radius_offset, normal_offset, out_offset;
- decode_node_uchar4(node.y, &num_samples, &radius_offset, &normal_offset, &out_offset);
+ uint num_samples, radius_offset, normal_offset, out_offset;
+ decode_node_uchar4(node.y, &num_samples, &radius_offset, &normal_offset, &out_offset);
- float radius = stack_load_float(stack, radius_offset);
- float3 bevel_N = svm_bevel(kg, sd, state, radius, num_samples);
+ float radius = stack_load_float(stack, radius_offset);
+ float3 bevel_N = svm_bevel(kg, sd, state, radius, num_samples);
- if(stack_valid(normal_offset)) {
- /* Preserve input normal. */
- float3 ref_N = stack_load_float3(stack, normal_offset);
- bevel_N = normalize(ref_N + (bevel_N - sd->N));
- }
+ if (stack_valid(normal_offset)) {
+ /* Preserve input normal. */
+ float3 ref_N = stack_load_float3(stack, normal_offset);
+ bevel_N = normalize(ref_N + (bevel_N - sd->N));
+ }
- stack_store_float3(stack, out_offset, bevel_N);
+ stack_store_float3(stack, out_offset, bevel_N);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_blackbody.h b/intern/cycles/kernel/svm/svm_blackbody.h
index 51590b18505..adfc50d961e 100644
--- a/intern/cycles/kernel/svm/svm_blackbody.h
+++ b/intern/cycles/kernel/svm/svm_blackbody.h
@@ -34,14 +34,15 @@ CCL_NAMESPACE_BEGIN
/* Blackbody Node */
-ccl_device void svm_node_blackbody(KernelGlobals *kg, ShaderData *sd, float *stack, uint temperature_offset, uint col_offset)
+ccl_device void svm_node_blackbody(
+ KernelGlobals *kg, ShaderData *sd, float *stack, uint temperature_offset, uint col_offset)
{
- /* Input */
- float temperature = stack_load_float(stack, temperature_offset);
+ /* Input */
+ float temperature = stack_load_float(stack, temperature_offset);
- float3 color_rgb = svm_math_blackbody_color(temperature);
+ float3 color_rgb = svm_math_blackbody_color(temperature);
- stack_store_float3(stack, col_offset, color_rgb);
+ stack_store_float3(stack, col_offset, color_rgb);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_brick.h b/intern/cycles/kernel/svm/svm_brick.h
index 744d9ff16c5..b5cbfcc72df 100644
--- a/intern/cycles/kernel/svm/svm_brick.h
+++ b/intern/cycles/kernel/svm/svm_brick.h
@@ -20,101 +20,119 @@ CCL_NAMESPACE_BEGIN
ccl_device_noinline float brick_noise(uint n) /* fast integer noise */
{
- uint nn;
- n = (n + 1013) & 0x7fffffff;
- n = (n >> 13) ^ n;
- nn = (n * (n * n * 60493 + 19990303) + 1376312589) & 0x7fffffff;
- return 0.5f * ((float)nn / 1073741824.0f);
+ uint nn;
+ n = (n + 1013) & 0x7fffffff;
+ n = (n >> 13) ^ n;
+ nn = (n * (n * n * 60493 + 19990303) + 1376312589) & 0x7fffffff;
+ return 0.5f * ((float)nn / 1073741824.0f);
}
-ccl_device_noinline float2 svm_brick(float3 p, float mortar_size, float mortar_smooth, float bias,
- float brick_width, float row_height, float offset_amount, int offset_frequency,
- float squash_amount, int squash_frequency)
+ccl_device_noinline float2 svm_brick(float3 p,
+ float mortar_size,
+ float mortar_smooth,
+ float bias,
+ float brick_width,
+ float row_height,
+ float offset_amount,
+ int offset_frequency,
+ float squash_amount,
+ int squash_frequency)
{
- int bricknum, rownum;
- float offset = 0.0f;
- float x, y;
-
- rownum = floor_to_int(p.y / row_height);
-
- if(offset_frequency && squash_frequency) {
- brick_width *= (rownum % squash_frequency) ? 1.0f : squash_amount; /* squash */
- offset = (rownum % offset_frequency) ? 0.0f : (brick_width*offset_amount); /* offset */
- }
-
- bricknum = floor_to_int((p.x+offset) / brick_width);
-
- x = (p.x+offset) - brick_width*bricknum;
- y = p.y - row_height*rownum;
-
- float tint = saturate((brick_noise((rownum << 16) + (bricknum & 0xFFFF)) + bias));
- float min_dist = min(min(x, y), min(brick_width - x, row_height - y));
-
- float mortar;
- if(min_dist >= mortar_size) {
- mortar = 0.0f;
- }
- else if(mortar_smooth == 0.0f) {
- mortar = 1.0f;
- }
- else {
- min_dist = 1.0f - min_dist/mortar_size;
- mortar = (min_dist < mortar_smooth)? smoothstepf(min_dist / mortar_smooth) : 1.0f;
- }
-
- return make_float2(tint, mortar);
+ int bricknum, rownum;
+ float offset = 0.0f;
+ float x, y;
+
+ rownum = floor_to_int(p.y / row_height);
+
+ if (offset_frequency && squash_frequency) {
+ brick_width *= (rownum % squash_frequency) ? 1.0f : squash_amount; /* squash */
+ offset = (rownum % offset_frequency) ? 0.0f : (brick_width * offset_amount); /* offset */
+ }
+
+ bricknum = floor_to_int((p.x + offset) / brick_width);
+
+ x = (p.x + offset) - brick_width * bricknum;
+ y = p.y - row_height * rownum;
+
+ float tint = saturate((brick_noise((rownum << 16) + (bricknum & 0xFFFF)) + bias));
+ float min_dist = min(min(x, y), min(brick_width - x, row_height - y));
+
+ float mortar;
+ if (min_dist >= mortar_size) {
+ mortar = 0.0f;
+ }
+ else if (mortar_smooth == 0.0f) {
+ mortar = 1.0f;
+ }
+ else {
+ min_dist = 1.0f - min_dist / mortar_size;
+ mortar = (min_dist < mortar_smooth) ? smoothstepf(min_dist / mortar_smooth) : 1.0f;
+ }
+
+ return make_float2(tint, mortar);
}
-ccl_device void svm_node_tex_brick(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
+ccl_device void svm_node_tex_brick(
+ KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
{
- uint4 node2 = read_node(kg, offset);
- uint4 node3 = read_node(kg, offset);
- uint4 node4 = read_node(kg, offset);
-
- /* Input and Output Sockets */
- uint co_offset, color1_offset, color2_offset, mortar_offset, scale_offset;
- uint mortar_size_offset, bias_offset, brick_width_offset, row_height_offset;
- uint color_offset, fac_offset, mortar_smooth_offset;
-
- /* RNA properties */
- uint offset_frequency, squash_frequency;
-
- decode_node_uchar4(node.y, &co_offset, &color1_offset, &color2_offset, &mortar_offset);
- decode_node_uchar4(node.z, &scale_offset, &mortar_size_offset, &bias_offset, &brick_width_offset);
- decode_node_uchar4(node.w, &row_height_offset, &color_offset, &fac_offset, &mortar_smooth_offset);
-
- decode_node_uchar4(node2.x, &offset_frequency, &squash_frequency, NULL, NULL);
-
- float3 co = stack_load_float3(stack, co_offset);
-
- float3 color1 = stack_load_float3(stack, color1_offset);
- float3 color2 = stack_load_float3(stack, color2_offset);
- float3 mortar = stack_load_float3(stack, mortar_offset);
-
- float scale = stack_load_float_default(stack, scale_offset, node2.y);
- float mortar_size = stack_load_float_default(stack, mortar_size_offset, node2.z);
- float mortar_smooth = stack_load_float_default(stack, mortar_smooth_offset, node4.x);
- float bias = stack_load_float_default(stack, bias_offset, node2.w);
- float brick_width = stack_load_float_default(stack, brick_width_offset, node3.x);
- float row_height = stack_load_float_default(stack, row_height_offset, node3.y);
- float offset_amount = __int_as_float(node3.z);
- float squash_amount = __int_as_float(node3.w);
-
- float2 f2 = svm_brick(co*scale, mortar_size, mortar_smooth, bias, brick_width, row_height,
- offset_amount, offset_frequency, squash_amount, squash_frequency);
-
- float tint = f2.x;
- float f = f2.y;
-
- if(f != 1.0f) {
- float facm = 1.0f - tint;
- color1 = facm * color1 + tint * color2;
- }
-
- if(stack_valid(color_offset))
- stack_store_float3(stack, color_offset, color1*(1.0f-f) + mortar*f);
- if(stack_valid(fac_offset))
- stack_store_float(stack, fac_offset, f);
+ uint4 node2 = read_node(kg, offset);
+ uint4 node3 = read_node(kg, offset);
+ uint4 node4 = read_node(kg, offset);
+
+ /* Input and Output Sockets */
+ uint co_offset, color1_offset, color2_offset, mortar_offset, scale_offset;
+ uint mortar_size_offset, bias_offset, brick_width_offset, row_height_offset;
+ uint color_offset, fac_offset, mortar_smooth_offset;
+
+ /* RNA properties */
+ uint offset_frequency, squash_frequency;
+
+ decode_node_uchar4(node.y, &co_offset, &color1_offset, &color2_offset, &mortar_offset);
+ decode_node_uchar4(
+ node.z, &scale_offset, &mortar_size_offset, &bias_offset, &brick_width_offset);
+ decode_node_uchar4(
+ node.w, &row_height_offset, &color_offset, &fac_offset, &mortar_smooth_offset);
+
+ decode_node_uchar4(node2.x, &offset_frequency, &squash_frequency, NULL, NULL);
+
+ float3 co = stack_load_float3(stack, co_offset);
+
+ float3 color1 = stack_load_float3(stack, color1_offset);
+ float3 color2 = stack_load_float3(stack, color2_offset);
+ float3 mortar = stack_load_float3(stack, mortar_offset);
+
+ float scale = stack_load_float_default(stack, scale_offset, node2.y);
+ float mortar_size = stack_load_float_default(stack, mortar_size_offset, node2.z);
+ float mortar_smooth = stack_load_float_default(stack, mortar_smooth_offset, node4.x);
+ float bias = stack_load_float_default(stack, bias_offset, node2.w);
+ float brick_width = stack_load_float_default(stack, brick_width_offset, node3.x);
+ float row_height = stack_load_float_default(stack, row_height_offset, node3.y);
+ float offset_amount = __int_as_float(node3.z);
+ float squash_amount = __int_as_float(node3.w);
+
+ float2 f2 = svm_brick(co * scale,
+ mortar_size,
+ mortar_smooth,
+ bias,
+ brick_width,
+ row_height,
+ offset_amount,
+ offset_frequency,
+ squash_amount,
+ squash_frequency);
+
+ float tint = f2.x;
+ float f = f2.y;
+
+ if (f != 1.0f) {
+ float facm = 1.0f - tint;
+ color1 = facm * color1 + tint * color2;
+ }
+
+ if (stack_valid(color_offset))
+ stack_store_float3(stack, color_offset, color1 * (1.0f - f) + mortar * f);
+ if (stack_valid(fac_offset))
+ stack_store_float(stack, fac_offset, f);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_brightness.h b/intern/cycles/kernel/svm/svm_brightness.h
index d71b0ee0b61..dcd75a2fe8f 100644
--- a/intern/cycles/kernel/svm/svm_brightness.h
+++ b/intern/cycles/kernel/svm/svm_brightness.h
@@ -16,19 +16,20 @@
CCL_NAMESPACE_BEGIN
-ccl_device void svm_node_brightness(ShaderData *sd, float *stack, uint in_color, uint out_color, uint node)
+ccl_device void svm_node_brightness(
+ ShaderData *sd, float *stack, uint in_color, uint out_color, uint node)
{
- uint bright_offset, contrast_offset;
- float3 color = stack_load_float3(stack, in_color);
+ uint bright_offset, contrast_offset;
+ float3 color = stack_load_float3(stack, in_color);
- decode_node_uchar4(node, &bright_offset, &contrast_offset, NULL, NULL);
- float brightness = stack_load_float(stack, bright_offset);
- float contrast = stack_load_float(stack, contrast_offset);
+ decode_node_uchar4(node, &bright_offset, &contrast_offset, NULL, NULL);
+ float brightness = stack_load_float(stack, bright_offset);
+ float contrast = stack_load_float(stack, contrast_offset);
- color = svm_brightness_contrast(color, brightness, contrast);
+ color = svm_brightness_contrast(color, brightness, contrast);
- if(stack_valid(out_color))
- stack_store_float3(stack, out_color, color);
+ if (stack_valid(out_color))
+ stack_store_float3(stack, out_color, color);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_bump.h b/intern/cycles/kernel/svm/svm_bump.h
index 35aac174409..c9d430a2bba 100644
--- a/intern/cycles/kernel/svm/svm_bump.h
+++ b/intern/cycles/kernel/svm/svm_bump.h
@@ -18,36 +18,42 @@ CCL_NAMESPACE_BEGIN
/* Bump Eval Nodes */
-ccl_device void svm_node_enter_bump_eval(KernelGlobals *kg, ShaderData *sd, float *stack, uint offset)
+ccl_device void svm_node_enter_bump_eval(KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint offset)
{
- /* save state */
- stack_store_float3(stack, offset+0, sd->P);
- stack_store_float3(stack, offset+3, sd->dP.dx);
- stack_store_float3(stack, offset+6, sd->dP.dy);
-
- /* set state as if undisplaced */
- const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_POSITION_UNDISPLACED);
-
- if(desc.offset != ATTR_STD_NOT_FOUND) {
- float3 P, dPdx, dPdy;
- P = primitive_surface_attribute_float3(kg, sd, desc, &dPdx, &dPdy);
-
- object_position_transform(kg, sd, &P);
- object_dir_transform(kg, sd, &dPdx);
- object_dir_transform(kg, sd, &dPdy);
-
- sd->P = P;
- sd->dP.dx = dPdx;
- sd->dP.dy = dPdy;
- }
+ /* save state */
+ stack_store_float3(stack, offset + 0, sd->P);
+ stack_store_float3(stack, offset + 3, sd->dP.dx);
+ stack_store_float3(stack, offset + 6, sd->dP.dy);
+
+ /* set state as if undisplaced */
+ const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_POSITION_UNDISPLACED);
+
+ if (desc.offset != ATTR_STD_NOT_FOUND) {
+ float3 P, dPdx, dPdy;
+ P = primitive_surface_attribute_float3(kg, sd, desc, &dPdx, &dPdy);
+
+ object_position_transform(kg, sd, &P);
+ object_dir_transform(kg, sd, &dPdx);
+ object_dir_transform(kg, sd, &dPdy);
+
+ sd->P = P;
+ sd->dP.dx = dPdx;
+ sd->dP.dy = dPdy;
+ }
}
-ccl_device void svm_node_leave_bump_eval(KernelGlobals *kg, ShaderData *sd, float *stack, uint offset)
+ccl_device void svm_node_leave_bump_eval(KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint offset)
{
- /* restore state */
- sd->P = stack_load_float3(stack, offset+0);
- sd->dP.dx = stack_load_float3(stack, offset+3);
- sd->dP.dy = stack_load_float3(stack, offset+6);
+ /* restore state */
+ sd->P = stack_load_float3(stack, offset + 0);
+ sd->dP.dx = stack_load_float3(stack, offset + 3);
+ sd->dP.dy = stack_load_float3(stack, offset + 6);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_camera.h b/intern/cycles/kernel/svm/svm_camera.h
index cf90229b53b..21a17acf5f1 100644
--- a/intern/cycles/kernel/svm/svm_camera.h
+++ b/intern/cycles/kernel/svm/svm_camera.h
@@ -16,25 +16,30 @@
CCL_NAMESPACE_BEGIN
-ccl_device void svm_node_camera(KernelGlobals *kg, ShaderData *sd, float *stack, uint out_vector, uint out_zdepth, uint out_distance)
+ccl_device void svm_node_camera(KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint out_vector,
+ uint out_zdepth,
+ uint out_distance)
{
- float distance;
- float zdepth;
- float3 vector;
+ float distance;
+ float zdepth;
+ float3 vector;
- Transform tfm = kernel_data.cam.worldtocamera;
- vector = transform_point(&tfm, sd->P);
- zdepth = vector.z;
- distance = len(vector);
+ Transform tfm = kernel_data.cam.worldtocamera;
+ vector = transform_point(&tfm, sd->P);
+ zdepth = vector.z;
+ distance = len(vector);
- if(stack_valid(out_vector))
- stack_store_float3(stack, out_vector, normalize(vector));
+ if (stack_valid(out_vector))
+ stack_store_float3(stack, out_vector, normalize(vector));
- if(stack_valid(out_zdepth))
- stack_store_float(stack, out_zdepth, zdepth);
+ if (stack_valid(out_zdepth))
+ stack_store_float(stack, out_zdepth, zdepth);
- if(stack_valid(out_distance))
- stack_store_float(stack, out_distance, distance);
+ if (stack_valid(out_distance))
+ stack_store_float(stack, out_distance, distance);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_checker.h b/intern/cycles/kernel/svm/svm_checker.h
index 45e6c181e9e..63b4d1e149b 100644
--- a/intern/cycles/kernel/svm/svm_checker.h
+++ b/intern/cycles/kernel/svm/svm_checker.h
@@ -20,37 +20,37 @@ CCL_NAMESPACE_BEGIN
ccl_device_noinline float svm_checker(float3 p)
{
- /* avoid precision issues on unit coordinates */
- p.x = (p.x + 0.000001f)*0.999999f;
- p.y = (p.y + 0.000001f)*0.999999f;
- p.z = (p.z + 0.000001f)*0.999999f;
+ /* avoid precision issues on unit coordinates */
+ p.x = (p.x + 0.000001f) * 0.999999f;
+ p.y = (p.y + 0.000001f) * 0.999999f;
+ p.z = (p.z + 0.000001f) * 0.999999f;
- int xi = abs(float_to_int(floorf(p.x)));
- int yi = abs(float_to_int(floorf(p.y)));
- int zi = abs(float_to_int(floorf(p.z)));
+ int xi = abs(float_to_int(floorf(p.x)));
+ int yi = abs(float_to_int(floorf(p.y)));
+ int zi = abs(float_to_int(floorf(p.z)));
- return ((xi % 2 == yi % 2) == (zi % 2))? 1.0f: 0.0f;
+ return ((xi % 2 == yi % 2) == (zi % 2)) ? 1.0f : 0.0f;
}
ccl_device void svm_node_tex_checker(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
{
- uint co_offset, color1_offset, color2_offset, scale_offset;
- uint color_offset, fac_offset;
+ uint co_offset, color1_offset, color2_offset, scale_offset;
+ uint color_offset, fac_offset;
- decode_node_uchar4(node.y, &co_offset, &color1_offset, &color2_offset, &scale_offset);
- decode_node_uchar4(node.z, &color_offset, &fac_offset, NULL, NULL);
+ decode_node_uchar4(node.y, &co_offset, &color1_offset, &color2_offset, &scale_offset);
+ decode_node_uchar4(node.z, &color_offset, &fac_offset, NULL, NULL);
- float3 co = stack_load_float3(stack, co_offset);
- float3 color1 = stack_load_float3(stack, color1_offset);
- float3 color2 = stack_load_float3(stack, color2_offset);
- float scale = stack_load_float_default(stack, scale_offset, node.w);
+ float3 co = stack_load_float3(stack, co_offset);
+ float3 color1 = stack_load_float3(stack, color1_offset);
+ float3 color2 = stack_load_float3(stack, color2_offset);
+ float scale = stack_load_float_default(stack, scale_offset, node.w);
- float f = svm_checker(co*scale);
+ float f = svm_checker(co * scale);
- if(stack_valid(color_offset))
- stack_store_float3(stack, color_offset, (f == 1.0f)? color1: color2);
- if(stack_valid(fac_offset))
- stack_store_float(stack, fac_offset, f);
+ if (stack_valid(color_offset))
+ stack_store_float3(stack, color_offset, (f == 1.0f) ? color1 : color2);
+ if (stack_valid(fac_offset))
+ stack_store_float(stack, fac_offset, f);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_closure.h b/intern/cycles/kernel/svm/svm_closure.h
index a7e87715ed4..270fe4c8615 100644
--- a/intern/cycles/kernel/svm/svm_closure.h
+++ b/intern/cycles/kernel/svm/svm_closure.h
@@ -20,1140 +20,1237 @@ CCL_NAMESPACE_BEGIN
ccl_device_inline float3 sigma_from_concentration(float eumelanin, float pheomelanin)
{
- return eumelanin*make_float3(0.506f, 0.841f, 1.653f) + pheomelanin*make_float3(0.343f, 0.733f, 1.924f);
+ return eumelanin * make_float3(0.506f, 0.841f, 1.653f) +
+ pheomelanin * make_float3(0.343f, 0.733f, 1.924f);
}
ccl_device_inline float3 sigma_from_reflectance(float3 color, float azimuthal_roughness)
{
- float x = azimuthal_roughness;
- float roughness_fac = (((((0.245f*x) + 5.574f)*x - 10.73f)*x + 2.532f)*x - 0.215f)*x + 5.969f;
- float3 sigma = log3(color) / roughness_fac;
- return sigma * sigma;
+ float x = azimuthal_roughness;
+ float roughness_fac = (((((0.245f * x) + 5.574f) * x - 10.73f) * x + 2.532f) * x - 0.215f) * x +
+ 5.969f;
+ float3 sigma = log3(color) / roughness_fac;
+ return sigma * sigma;
}
/* Closure Nodes */
-ccl_device void svm_node_glass_setup(ShaderData *sd, MicrofacetBsdf *bsdf, int type, float eta, float roughness, bool refract)
+ccl_device void svm_node_glass_setup(
+ ShaderData *sd, MicrofacetBsdf *bsdf, int type, float eta, float roughness, bool refract)
{
- if(type == CLOSURE_BSDF_SHARP_GLASS_ID) {
- if(refract) {
- bsdf->alpha_y = 0.0f;
- bsdf->alpha_x = 0.0f;
- bsdf->ior = eta;
- sd->flag |= bsdf_refraction_setup(bsdf);
- }
- else {
- bsdf->alpha_y = 0.0f;
- bsdf->alpha_x = 0.0f;
- bsdf->ior = 0.0f;
- sd->flag |= bsdf_reflection_setup(bsdf);
- }
- }
- else if(type == CLOSURE_BSDF_MICROFACET_BECKMANN_GLASS_ID) {
- bsdf->alpha_x = roughness;
- bsdf->alpha_y = roughness;
- bsdf->ior = eta;
-
- if(refract)
- sd->flag |= bsdf_microfacet_beckmann_refraction_setup(bsdf);
- else
- sd->flag |= bsdf_microfacet_beckmann_setup(bsdf);
- }
- else {
- bsdf->alpha_x = roughness;
- bsdf->alpha_y = roughness;
- bsdf->ior = eta;
-
- if(refract)
- sd->flag |= bsdf_microfacet_ggx_refraction_setup(bsdf);
- else
- sd->flag |= bsdf_microfacet_ggx_setup(bsdf);
- }
+ if (type == CLOSURE_BSDF_SHARP_GLASS_ID) {
+ if (refract) {
+ bsdf->alpha_y = 0.0f;
+ bsdf->alpha_x = 0.0f;
+ bsdf->ior = eta;
+ sd->flag |= bsdf_refraction_setup(bsdf);
+ }
+ else {
+ bsdf->alpha_y = 0.0f;
+ bsdf->alpha_x = 0.0f;
+ bsdf->ior = 0.0f;
+ sd->flag |= bsdf_reflection_setup(bsdf);
+ }
+ }
+ else if (type == CLOSURE_BSDF_MICROFACET_BECKMANN_GLASS_ID) {
+ bsdf->alpha_x = roughness;
+ bsdf->alpha_y = roughness;
+ bsdf->ior = eta;
+
+ if (refract)
+ sd->flag |= bsdf_microfacet_beckmann_refraction_setup(bsdf);
+ else
+ sd->flag |= bsdf_microfacet_beckmann_setup(bsdf);
+ }
+ else {
+ bsdf->alpha_x = roughness;
+ bsdf->alpha_y = roughness;
+ bsdf->ior = eta;
+
+ if (refract)
+ sd->flag |= bsdf_microfacet_ggx_refraction_setup(bsdf);
+ else
+ sd->flag |= bsdf_microfacet_ggx_setup(bsdf);
+ }
}
-ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, ShaderType shader_type, int path_flag, int *offset)
+ccl_device void svm_node_closure_bsdf(KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint4 node,
+ ShaderType shader_type,
+ int path_flag,
+ int *offset)
{
- uint type, param1_offset, param2_offset;
+ uint type, param1_offset, param2_offset;
- uint mix_weight_offset;
- decode_node_uchar4(node.y, &type, &param1_offset, &param2_offset, &mix_weight_offset);
- float mix_weight = (stack_valid(mix_weight_offset)? stack_load_float(stack, mix_weight_offset): 1.0f);
+ uint mix_weight_offset;
+ decode_node_uchar4(node.y, &type, &param1_offset, &param2_offset, &mix_weight_offset);
+ float mix_weight = (stack_valid(mix_weight_offset) ? stack_load_float(stack, mix_weight_offset) :
+ 1.0f);
- /* note we read this extra node before weight check, so offset is added */
- uint4 data_node = read_node(kg, offset);
+ /* note we read this extra node before weight check, so offset is added */
+ uint4 data_node = read_node(kg, offset);
- /* Only compute BSDF for surfaces, transparent variable is shared with volume extinction. */
- if(mix_weight == 0.0f || shader_type != SHADER_TYPE_SURFACE) {
- if(type == CLOSURE_BSDF_PRINCIPLED_ID) {
- /* Read all principled BSDF extra data to get the right offset. */
- read_node(kg, offset);
- read_node(kg, offset);
- read_node(kg, offset);
- read_node(kg, offset);
- }
+ /* Only compute BSDF for surfaces, transparent variable is shared with volume extinction. */
+ if (mix_weight == 0.0f || shader_type != SHADER_TYPE_SURFACE) {
+ if (type == CLOSURE_BSDF_PRINCIPLED_ID) {
+ /* Read all principled BSDF extra data to get the right offset. */
+ read_node(kg, offset);
+ read_node(kg, offset);
+ read_node(kg, offset);
+ read_node(kg, offset);
+ }
- return;
- }
+ return;
+ }
- float3 N = stack_valid(data_node.x)? stack_load_float3(stack, data_node.x): sd->N;
+ float3 N = stack_valid(data_node.x) ? stack_load_float3(stack, data_node.x) : sd->N;
- float param1 = (stack_valid(param1_offset))? stack_load_float(stack, param1_offset): __uint_as_float(node.z);
- float param2 = (stack_valid(param2_offset))? stack_load_float(stack, param2_offset): __uint_as_float(node.w);
+ float param1 = (stack_valid(param1_offset)) ? stack_load_float(stack, param1_offset) :
+ __uint_as_float(node.z);
+ float param2 = (stack_valid(param2_offset)) ? stack_load_float(stack, param2_offset) :
+ __uint_as_float(node.w);
- switch(type) {
+ switch (type) {
#ifdef __PRINCIPLED__
- case CLOSURE_BSDF_PRINCIPLED_ID: {
- uint specular_offset, roughness_offset, specular_tint_offset, anisotropic_offset, sheen_offset,
- sheen_tint_offset, clearcoat_offset, clearcoat_roughness_offset, eta_offset, transmission_offset,
- anisotropic_rotation_offset, transmission_roughness_offset;
- uint4 data_node2 = read_node(kg, offset);
-
- float3 T = stack_load_float3(stack, data_node.y);
- decode_node_uchar4(data_node.z, &specular_offset, &roughness_offset, &specular_tint_offset, &anisotropic_offset);
- decode_node_uchar4(data_node.w, &sheen_offset, &sheen_tint_offset, &clearcoat_offset, &clearcoat_roughness_offset);
- decode_node_uchar4(data_node2.x, &eta_offset, &transmission_offset, &anisotropic_rotation_offset, &transmission_roughness_offset);
-
- // get Disney principled parameters
- float metallic = param1;
- float subsurface = param2;
- float specular = stack_load_float(stack, specular_offset);
- float roughness = stack_load_float(stack, roughness_offset);
- float specular_tint = stack_load_float(stack, specular_tint_offset);
- float anisotropic = stack_load_float(stack, anisotropic_offset);
- float sheen = stack_load_float(stack, sheen_offset);
- float sheen_tint = stack_load_float(stack, sheen_tint_offset);
- float clearcoat = stack_load_float(stack, clearcoat_offset);
- float clearcoat_roughness = stack_load_float(stack, clearcoat_roughness_offset);
- float transmission = stack_load_float(stack, transmission_offset);
- float anisotropic_rotation = stack_load_float(stack, anisotropic_rotation_offset);
- float transmission_roughness = stack_load_float(stack, transmission_roughness_offset);
- float eta = fmaxf(stack_load_float(stack, eta_offset), 1e-5f);
-
- ClosureType distribution = (ClosureType) data_node2.y;
- ClosureType subsurface_method = (ClosureType) data_node2.z;
-
- /* rotate tangent */
- if(anisotropic_rotation != 0.0f)
- T = rotate_around_axis(T, N, anisotropic_rotation * M_2PI_F);
-
- /* calculate ior */
- float ior = (sd->flag & SD_BACKFACING) ? 1.0f / eta : eta;
-
- // calculate fresnel for refraction
- float cosNO = dot(N, sd->I);
- float fresnel = fresnel_dielectric_cos(cosNO, ior);
-
- // calculate weights of the diffuse and specular part
- float diffuse_weight = (1.0f - saturate(metallic)) * (1.0f - saturate(transmission));
-
- float final_transmission = saturate(transmission) * (1.0f - saturate(metallic));
- float specular_weight = (1.0f - final_transmission);
-
- // get the base color
- uint4 data_base_color = read_node(kg, offset);
- float3 base_color = stack_valid(data_base_color.x) ? stack_load_float3(stack, data_base_color.x) :
- make_float3(__uint_as_float(data_base_color.y), __uint_as_float(data_base_color.z), __uint_as_float(data_base_color.w));
-
- // get the additional clearcoat normal and subsurface scattering radius
- uint4 data_cn_ssr = read_node(kg, offset);
- float3 clearcoat_normal = stack_valid(data_cn_ssr.x) ? stack_load_float3(stack, data_cn_ssr.x) : sd->N;
- float3 subsurface_radius = stack_valid(data_cn_ssr.y) ? stack_load_float3(stack, data_cn_ssr.y) : make_float3(1.0f, 1.0f, 1.0f);
-
- // get the subsurface color
- uint4 data_subsurface_color = read_node(kg, offset);
- float3 subsurface_color = stack_valid(data_subsurface_color.x) ? stack_load_float3(stack, data_subsurface_color.x) :
- make_float3(__uint_as_float(data_subsurface_color.y), __uint_as_float(data_subsurface_color.z), __uint_as_float(data_subsurface_color.w));
-
- float3 weight = sd->svm_closure_weight * mix_weight;
-
-#ifdef __SUBSURFACE__
- float3 mixed_ss_base_color = subsurface_color * subsurface + base_color * (1.0f - subsurface);
- float3 subsurf_weight = weight * mixed_ss_base_color * diffuse_weight;
-
- /* disable in case of diffuse ancestor, can't see it well then and
- * adds considerably noise due to probabilities of continuing path
- * getting lower and lower */
- if(path_flag & PATH_RAY_DIFFUSE_ANCESTOR) {
- subsurface = 0.0f;
-
- /* need to set the base color in this case such that the
- * rays get the correctly mixed color after transmitting
- * the object */
- base_color = mixed_ss_base_color;
- }
-
- /* diffuse */
- if(fabsf(average(mixed_ss_base_color)) > CLOSURE_WEIGHT_CUTOFF) {
- if(subsurface <= CLOSURE_WEIGHT_CUTOFF && diffuse_weight > CLOSURE_WEIGHT_CUTOFF) {
- float3 diff_weight = weight * base_color * diffuse_weight;
-
- PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf*)bsdf_alloc(sd, sizeof(PrincipledDiffuseBsdf), diff_weight);
-
- if(bsdf) {
- bsdf->N = N;
- bsdf->roughness = roughness;
-
- /* setup bsdf */
- sd->flag |= bsdf_principled_diffuse_setup(bsdf);
- }
- }
- else if(subsurface > CLOSURE_WEIGHT_CUTOFF) {
- Bssrdf *bssrdf = bssrdf_alloc(sd, subsurf_weight);
-
- if(bssrdf) {
- bssrdf->radius = subsurface_radius * subsurface;
- bssrdf->albedo = (subsurface_method == CLOSURE_BSSRDF_PRINCIPLED_ID)? subsurface_color: mixed_ss_base_color;
- bssrdf->texture_blur = 0.0f;
- bssrdf->sharpness = 0.0f;
- bssrdf->N = N;
- bssrdf->roughness = roughness;
-
- /* setup bsdf */
- sd->flag |= bssrdf_setup(sd, bssrdf, subsurface_method);
- }
- }
- }
-#else
- /* diffuse */
- if(diffuse_weight > CLOSURE_WEIGHT_CUTOFF) {
- float3 diff_weight = weight * base_color * diffuse_weight;
-
- PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf*)bsdf_alloc(sd, sizeof(PrincipledDiffuseBsdf), diff_weight);
-
- if(bsdf) {
- bsdf->N = N;
- bsdf->roughness = roughness;
-
- /* setup bsdf */
- sd->flag |= bsdf_principled_diffuse_setup(bsdf);
- }
- }
-#endif
-
- /* sheen */
- if(diffuse_weight > CLOSURE_WEIGHT_CUTOFF && sheen > CLOSURE_WEIGHT_CUTOFF) {
- float m_cdlum = linear_rgb_to_gray(kg, base_color);
- float3 m_ctint = m_cdlum > 0.0f ? base_color / m_cdlum : make_float3(1.0f, 1.0f, 1.0f); // normalize lum. to isolate hue+sat
-
- /* color of the sheen component */
- float3 sheen_color = make_float3(1.0f, 1.0f, 1.0f) * (1.0f - sheen_tint) + m_ctint * sheen_tint;
-
- float3 sheen_weight = weight * sheen * sheen_color * diffuse_weight;
-
- PrincipledSheenBsdf *bsdf = (PrincipledSheenBsdf*)bsdf_alloc(sd, sizeof(PrincipledSheenBsdf), sheen_weight);
-
- if(bsdf) {
- bsdf->N = N;
-
- /* setup bsdf */
- sd->flag |= bsdf_principled_sheen_setup(bsdf);
- }
- }
-
- /* specular reflection */
+ case CLOSURE_BSDF_PRINCIPLED_ID: {
+ uint specular_offset, roughness_offset, specular_tint_offset, anisotropic_offset,
+ sheen_offset, sheen_tint_offset, clearcoat_offset, clearcoat_roughness_offset,
+ eta_offset, transmission_offset, anisotropic_rotation_offset,
+ transmission_roughness_offset;
+ uint4 data_node2 = read_node(kg, offset);
+
+ float3 T = stack_load_float3(stack, data_node.y);
+ decode_node_uchar4(data_node.z,
+ &specular_offset,
+ &roughness_offset,
+ &specular_tint_offset,
+ &anisotropic_offset);
+ decode_node_uchar4(data_node.w,
+ &sheen_offset,
+ &sheen_tint_offset,
+ &clearcoat_offset,
+ &clearcoat_roughness_offset);
+ decode_node_uchar4(data_node2.x,
+ &eta_offset,
+ &transmission_offset,
+ &anisotropic_rotation_offset,
+ &transmission_roughness_offset);
+
+ // get Disney principled parameters
+ float metallic = param1;
+ float subsurface = param2;
+ float specular = stack_load_float(stack, specular_offset);
+ float roughness = stack_load_float(stack, roughness_offset);
+ float specular_tint = stack_load_float(stack, specular_tint_offset);
+ float anisotropic = stack_load_float(stack, anisotropic_offset);
+ float sheen = stack_load_float(stack, sheen_offset);
+ float sheen_tint = stack_load_float(stack, sheen_tint_offset);
+ float clearcoat = stack_load_float(stack, clearcoat_offset);
+ float clearcoat_roughness = stack_load_float(stack, clearcoat_roughness_offset);
+ float transmission = stack_load_float(stack, transmission_offset);
+ float anisotropic_rotation = stack_load_float(stack, anisotropic_rotation_offset);
+ float transmission_roughness = stack_load_float(stack, transmission_roughness_offset);
+ float eta = fmaxf(stack_load_float(stack, eta_offset), 1e-5f);
+
+ ClosureType distribution = (ClosureType)data_node2.y;
+ ClosureType subsurface_method = (ClosureType)data_node2.z;
+
+ /* rotate tangent */
+ if (anisotropic_rotation != 0.0f)
+ T = rotate_around_axis(T, N, anisotropic_rotation * M_2PI_F);
+
+ /* calculate ior */
+ float ior = (sd->flag & SD_BACKFACING) ? 1.0f / eta : eta;
+
+ // calculate fresnel for refraction
+ float cosNO = dot(N, sd->I);
+ float fresnel = fresnel_dielectric_cos(cosNO, ior);
+
+ // calculate weights of the diffuse and specular part
+ float diffuse_weight = (1.0f - saturate(metallic)) * (1.0f - saturate(transmission));
+
+ float final_transmission = saturate(transmission) * (1.0f - saturate(metallic));
+ float specular_weight = (1.0f - final_transmission);
+
+ // get the base color
+ uint4 data_base_color = read_node(kg, offset);
+ float3 base_color = stack_valid(data_base_color.x) ?
+ stack_load_float3(stack, data_base_color.x) :
+ make_float3(__uint_as_float(data_base_color.y),
+ __uint_as_float(data_base_color.z),
+ __uint_as_float(data_base_color.w));
+
+ // get the additional clearcoat normal and subsurface scattering radius
+ uint4 data_cn_ssr = read_node(kg, offset);
+ float3 clearcoat_normal = stack_valid(data_cn_ssr.x) ?
+ stack_load_float3(stack, data_cn_ssr.x) :
+ sd->N;
+ float3 subsurface_radius = stack_valid(data_cn_ssr.y) ?
+ stack_load_float3(stack, data_cn_ssr.y) :
+ make_float3(1.0f, 1.0f, 1.0f);
+
+ // get the subsurface color
+ uint4 data_subsurface_color = read_node(kg, offset);
+ float3 subsurface_color = stack_valid(data_subsurface_color.x) ?
+ stack_load_float3(stack, data_subsurface_color.x) :
+ make_float3(__uint_as_float(data_subsurface_color.y),
+ __uint_as_float(data_subsurface_color.z),
+ __uint_as_float(data_subsurface_color.w));
+
+ float3 weight = sd->svm_closure_weight * mix_weight;
+
+# ifdef __SUBSURFACE__
+ float3 mixed_ss_base_color = subsurface_color * subsurface +
+ base_color * (1.0f - subsurface);
+ float3 subsurf_weight = weight * mixed_ss_base_color * diffuse_weight;
+
+ /* disable in case of diffuse ancestor, can't see it well then and
+ * adds considerably noise due to probabilities of continuing path
+ * getting lower and lower */
+ if (path_flag & PATH_RAY_DIFFUSE_ANCESTOR) {
+ subsurface = 0.0f;
+
+ /* need to set the base color in this case such that the
+ * rays get the correctly mixed color after transmitting
+ * the object */
+ base_color = mixed_ss_base_color;
+ }
+
+ /* diffuse */
+ if (fabsf(average(mixed_ss_base_color)) > CLOSURE_WEIGHT_CUTOFF) {
+ if (subsurface <= CLOSURE_WEIGHT_CUTOFF && diffuse_weight > CLOSURE_WEIGHT_CUTOFF) {
+ float3 diff_weight = weight * base_color * diffuse_weight;
+
+ PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf *)bsdf_alloc(
+ sd, sizeof(PrincipledDiffuseBsdf), diff_weight);
+
+ if (bsdf) {
+ bsdf->N = N;
+ bsdf->roughness = roughness;
+
+ /* setup bsdf */
+ sd->flag |= bsdf_principled_diffuse_setup(bsdf);
+ }
+ }
+ else if (subsurface > CLOSURE_WEIGHT_CUTOFF) {
+ Bssrdf *bssrdf = bssrdf_alloc(sd, subsurf_weight);
+
+ if (bssrdf) {
+ bssrdf->radius = subsurface_radius * subsurface;
+ bssrdf->albedo = (subsurface_method == CLOSURE_BSSRDF_PRINCIPLED_ID) ?
+ subsurface_color :
+ mixed_ss_base_color;
+ bssrdf->texture_blur = 0.0f;
+ bssrdf->sharpness = 0.0f;
+ bssrdf->N = N;
+ bssrdf->roughness = roughness;
+
+ /* setup bsdf */
+ sd->flag |= bssrdf_setup(sd, bssrdf, subsurface_method);
+ }
+ }
+ }
+# else
+ /* diffuse */
+ if (diffuse_weight > CLOSURE_WEIGHT_CUTOFF) {
+ float3 diff_weight = weight * base_color * diffuse_weight;
+
+ PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf *)bsdf_alloc(
+ sd, sizeof(PrincipledDiffuseBsdf), diff_weight);
+
+ if (bsdf) {
+ bsdf->N = N;
+ bsdf->roughness = roughness;
+
+ /* setup bsdf */
+ sd->flag |= bsdf_principled_diffuse_setup(bsdf);
+ }
+ }
+# endif
+
+ /* sheen */
+ if (diffuse_weight > CLOSURE_WEIGHT_CUTOFF && sheen > CLOSURE_WEIGHT_CUTOFF) {
+ float m_cdlum = linear_rgb_to_gray(kg, base_color);
+ float3 m_ctint = m_cdlum > 0.0f ?
+ base_color / m_cdlum :
+ make_float3(1.0f, 1.0f, 1.0f); // normalize lum. to isolate hue+sat
+
+ /* color of the sheen component */
+ float3 sheen_color = make_float3(1.0f, 1.0f, 1.0f) * (1.0f - sheen_tint) +
+ m_ctint * sheen_tint;
+
+ float3 sheen_weight = weight * sheen * sheen_color * diffuse_weight;
+
+ PrincipledSheenBsdf *bsdf = (PrincipledSheenBsdf *)bsdf_alloc(
+ sd, sizeof(PrincipledSheenBsdf), sheen_weight);
+
+ if (bsdf) {
+ bsdf->N = N;
+
+ /* setup bsdf */
+ sd->flag |= bsdf_principled_sheen_setup(bsdf);
+ }
+ }
+
+ /* specular reflection */
+# ifdef __CAUSTICS_TRICKS__
+ if (kernel_data.integrator.caustics_reflective || (path_flag & PATH_RAY_DIFFUSE) == 0) {
+# endif
+ if (specular_weight > CLOSURE_WEIGHT_CUTOFF &&
+ (specular > CLOSURE_WEIGHT_CUTOFF || metallic > CLOSURE_WEIGHT_CUTOFF)) {
+ float3 spec_weight = weight * specular_weight;
+
+ MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc(
+ sd, sizeof(MicrofacetBsdf), spec_weight);
+ MicrofacetExtra *extra = (bsdf != NULL) ? (MicrofacetExtra *)closure_alloc_extra(
+ sd, sizeof(MicrofacetExtra)) :
+ NULL;
+
+ if (bsdf && extra) {
+ bsdf->N = N;
+ bsdf->ior = (2.0f / (1.0f - safe_sqrtf(0.08f * specular))) - 1.0f;
+ bsdf->T = T;
+ bsdf->extra = extra;
+
+ float aspect = safe_sqrtf(1.0f - anisotropic * 0.9f);
+ float r2 = roughness * roughness;
+
+ bsdf->alpha_x = r2 / aspect;
+ bsdf->alpha_y = r2 * aspect;
+
+ float m_cdlum = 0.3f * base_color.x + 0.6f * base_color.y +
+ 0.1f * base_color.z; // luminance approx.
+ float3 m_ctint = m_cdlum > 0.0f ?
+ base_color / m_cdlum :
+ make_float3(
+ 0.0f, 0.0f, 0.0f); // normalize lum. to isolate hue+sat
+ float3 tmp_col = make_float3(1.0f, 1.0f, 1.0f) * (1.0f - specular_tint) +
+ m_ctint * specular_tint;
+
+ bsdf->extra->cspec0 = (specular * 0.08f * tmp_col) * (1.0f - metallic) +
+ base_color * metallic;
+ bsdf->extra->color = base_color;
+ bsdf->extra->clearcoat = 0.0f;
+
+ /* setup bsdf */
+ if (distribution == CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID ||
+ roughness <= 0.075f) /* use single-scatter GGX */
+ sd->flag |= bsdf_microfacet_ggx_aniso_fresnel_setup(bsdf, sd);
+ else /* use multi-scatter GGX */
+ sd->flag |= bsdf_microfacet_multi_ggx_aniso_fresnel_setup(bsdf, sd);
+ }
+ }
+# ifdef __CAUSTICS_TRICKS__
+ }
+# endif
+
+ /* BSDF */
+# ifdef __CAUSTICS_TRICKS__
+ if (kernel_data.integrator.caustics_reflective ||
+ kernel_data.integrator.caustics_refractive || (path_flag & PATH_RAY_DIFFUSE) == 0) {
+# endif
+ if (final_transmission > CLOSURE_WEIGHT_CUTOFF) {
+ float3 glass_weight = weight * final_transmission;
+ float3 cspec0 = base_color * specular_tint +
+ make_float3(1.0f, 1.0f, 1.0f) * (1.0f - specular_tint);
+
+ if (roughness <= 5e-2f ||
+ distribution == CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID) { /* use single-scatter GGX */
+ float refl_roughness = roughness;
+
+ /* reflection */
+# ifdef __CAUSTICS_TRICKS__
+ if (kernel_data.integrator.caustics_reflective || (path_flag & PATH_RAY_DIFFUSE) == 0)
+# endif
+ {
+ MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc(
+ sd, sizeof(MicrofacetBsdf), glass_weight * fresnel);
+ MicrofacetExtra *extra = (bsdf != NULL) ? (MicrofacetExtra *)closure_alloc_extra(
+ sd, sizeof(MicrofacetExtra)) :
+ NULL;
+
+ if (bsdf && extra) {
+ bsdf->N = N;
+ bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+ bsdf->extra = extra;
+
+ bsdf->alpha_x = refl_roughness * refl_roughness;
+ bsdf->alpha_y = refl_roughness * refl_roughness;
+ bsdf->ior = ior;
+
+ bsdf->extra->color = base_color;
+ bsdf->extra->cspec0 = cspec0;
+ bsdf->extra->clearcoat = 0.0f;
+
+ /* setup bsdf */
+ sd->flag |= bsdf_microfacet_ggx_fresnel_setup(bsdf, sd);
+ }
+ }
+
+ /* refraction */
+# ifdef __CAUSTICS_TRICKS__
+ if (kernel_data.integrator.caustics_refractive || (path_flag & PATH_RAY_DIFFUSE) == 0)
+# endif
+ {
+ MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc(
+ sd, sizeof(MicrofacetBsdf), base_color * glass_weight * (1.0f - fresnel));
+ if (bsdf) {
+ bsdf->N = N;
+ bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+ bsdf->extra = NULL;
+
+ if (distribution == CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID)
+ transmission_roughness = 1.0f - (1.0f - refl_roughness) *
+ (1.0f - transmission_roughness);
+ else
+ transmission_roughness = refl_roughness;
+
+ bsdf->alpha_x = transmission_roughness * transmission_roughness;
+ bsdf->alpha_y = transmission_roughness * transmission_roughness;
+ bsdf->ior = ior;
+
+ /* setup bsdf */
+ sd->flag |= bsdf_microfacet_ggx_refraction_setup(bsdf);
+ }
+ }
+ }
+ else { /* use multi-scatter GGX */
+ MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc(
+ sd, sizeof(MicrofacetBsdf), glass_weight);
+ MicrofacetExtra *extra = (bsdf != NULL) ? (MicrofacetExtra *)closure_alloc_extra(
+ sd, sizeof(MicrofacetExtra)) :
+ NULL;
+
+ if (bsdf && extra) {
+ bsdf->N = N;
+ bsdf->extra = extra;
+ bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+
+ bsdf->alpha_x = roughness * roughness;
+ bsdf->alpha_y = roughness * roughness;
+ bsdf->ior = ior;
+
+ bsdf->extra->color = base_color;
+ bsdf->extra->cspec0 = cspec0;
+ bsdf->extra->clearcoat = 0.0f;
+
+ /* setup bsdf */
+ sd->flag |= bsdf_microfacet_multi_ggx_glass_fresnel_setup(bsdf, sd);
+ }
+ }
+ }
+# ifdef __CAUSTICS_TRICKS__
+ }
+# endif
+
+ /* clearcoat */
+# ifdef __CAUSTICS_TRICKS__
+ if (kernel_data.integrator.caustics_reflective || (path_flag & PATH_RAY_DIFFUSE) == 0) {
+# endif
+ if (clearcoat > CLOSURE_WEIGHT_CUTOFF) {
+ MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight);
+ MicrofacetExtra *extra = (bsdf != NULL) ? (MicrofacetExtra *)closure_alloc_extra(
+ sd, sizeof(MicrofacetExtra)) :
+ NULL;
+
+ if (bsdf && extra) {
+ bsdf->N = clearcoat_normal;
+ bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+ bsdf->ior = 1.5f;
+ bsdf->extra = extra;
+
+ bsdf->alpha_x = clearcoat_roughness * clearcoat_roughness;
+ bsdf->alpha_y = clearcoat_roughness * clearcoat_roughness;
+
+ bsdf->extra->color = make_float3(0.0f, 0.0f, 0.0f);
+ bsdf->extra->cspec0 = make_float3(0.04f, 0.04f, 0.04f);
+ bsdf->extra->clearcoat = clearcoat;
+
+ /* setup bsdf */
+ sd->flag |= bsdf_microfacet_ggx_clearcoat_setup(bsdf, sd);
+ }
+ }
+# ifdef __CAUSTICS_TRICKS__
+ }
+# endif
+
+ break;
+ }
+#endif /* __PRINCIPLED__ */
+ case CLOSURE_BSDF_DIFFUSE_ID: {
+ float3 weight = sd->svm_closure_weight * mix_weight;
+ OrenNayarBsdf *bsdf = (OrenNayarBsdf *)bsdf_alloc(sd, sizeof(OrenNayarBsdf), weight);
+
+ if (bsdf) {
+ bsdf->N = N;
+
+ float roughness = param1;
+
+ if (roughness == 0.0f) {
+ sd->flag |= bsdf_diffuse_setup((DiffuseBsdf *)bsdf);
+ }
+ else {
+ bsdf->roughness = roughness;
+ sd->flag |= bsdf_oren_nayar_setup(bsdf);
+ }
+ }
+ break;
+ }
+ case CLOSURE_BSDF_TRANSLUCENT_ID: {
+ float3 weight = sd->svm_closure_weight * mix_weight;
+ DiffuseBsdf *bsdf = (DiffuseBsdf *)bsdf_alloc(sd, sizeof(DiffuseBsdf), weight);
+
+ if (bsdf) {
+ bsdf->N = N;
+ sd->flag |= bsdf_translucent_setup(bsdf);
+ }
+ break;
+ }
+ case CLOSURE_BSDF_TRANSPARENT_ID: {
+ float3 weight = sd->svm_closure_weight * mix_weight;
+ bsdf_transparent_setup(sd, weight, path_flag);
+ break;
+ }
+ case CLOSURE_BSDF_REFLECTION_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_ID:
+ case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
+ case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
+ case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID: {
#ifdef __CAUSTICS_TRICKS__
- if(kernel_data.integrator.caustics_reflective || (path_flag & PATH_RAY_DIFFUSE) == 0) {
+ if (!kernel_data.integrator.caustics_reflective && (path_flag & PATH_RAY_DIFFUSE))
+ break;
#endif
- if(specular_weight > CLOSURE_WEIGHT_CUTOFF && (specular > CLOSURE_WEIGHT_CUTOFF || metallic > CLOSURE_WEIGHT_CUTOFF)) {
- float3 spec_weight = weight * specular_weight;
-
- MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), spec_weight);
- MicrofacetExtra *extra = (bsdf != NULL)
- ? (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra))
- : NULL;
-
- if(bsdf && extra) {
- bsdf->N = N;
- bsdf->ior = (2.0f / (1.0f - safe_sqrtf(0.08f * specular))) - 1.0f;
- bsdf->T = T;
- bsdf->extra = extra;
-
- float aspect = safe_sqrtf(1.0f - anisotropic * 0.9f);
- float r2 = roughness * roughness;
-
- bsdf->alpha_x = r2 / aspect;
- bsdf->alpha_y = r2 * aspect;
-
- float m_cdlum = 0.3f * base_color.x + 0.6f * base_color.y + 0.1f * base_color.z; // luminance approx.
- float3 m_ctint = m_cdlum > 0.0f ? base_color / m_cdlum : make_float3(0.0f, 0.0f, 0.0f); // normalize lum. to isolate hue+sat
- float3 tmp_col = make_float3(1.0f, 1.0f, 1.0f) * (1.0f - specular_tint) + m_ctint * specular_tint;
-
- bsdf->extra->cspec0 = (specular * 0.08f * tmp_col) * (1.0f - metallic) + base_color * metallic;
- bsdf->extra->color = base_color;
- bsdf->extra->clearcoat = 0.0f;
-
- /* setup bsdf */
- if(distribution == CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID || roughness <= 0.075f) /* use single-scatter GGX */
- sd->flag |= bsdf_microfacet_ggx_aniso_fresnel_setup(bsdf, sd);
- else /* use multi-scatter GGX */
- sd->flag |= bsdf_microfacet_multi_ggx_aniso_fresnel_setup(bsdf, sd);
- }
- }
+ float3 weight = sd->svm_closure_weight * mix_weight;
+ MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight);
+
+ if (!bsdf) {
+ break;
+ }
+
+ float roughness = sqr(param1);
+
+ bsdf->N = N;
+ bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+ bsdf->alpha_x = roughness;
+ bsdf->alpha_y = roughness;
+ bsdf->ior = 0.0f;
+ bsdf->extra = NULL;
+
+ /* setup bsdf */
+ if (type == CLOSURE_BSDF_REFLECTION_ID)
+ sd->flag |= bsdf_reflection_setup(bsdf);
+ else if (type == CLOSURE_BSDF_MICROFACET_BECKMANN_ID)
+ sd->flag |= bsdf_microfacet_beckmann_setup(bsdf);
+ else if (type == CLOSURE_BSDF_MICROFACET_GGX_ID)
+ sd->flag |= bsdf_microfacet_ggx_setup(bsdf);
+ else if (type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID) {
+ kernel_assert(stack_valid(data_node.z));
+ bsdf->extra = (MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra));
+ if (bsdf->extra) {
+ bsdf->extra->color = stack_load_float3(stack, data_node.z);
+ bsdf->extra->cspec0 = make_float3(0.0f, 0.0f, 0.0f);
+ bsdf->extra->clearcoat = 0.0f;
+ sd->flag |= bsdf_microfacet_multi_ggx_setup(bsdf);
+ }
+ }
+ else {
+ sd->flag |= bsdf_ashikhmin_shirley_setup(bsdf);
+ }
+
+ break;
+ }
+ case CLOSURE_BSDF_REFRACTION_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
+ case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID: {
#ifdef __CAUSTICS_TRICKS__
- }
+ if (!kernel_data.integrator.caustics_refractive && (path_flag & PATH_RAY_DIFFUSE))
+ break;
#endif
-
- /* BSDF */
+ float3 weight = sd->svm_closure_weight * mix_weight;
+ MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight);
+
+ if (bsdf) {
+ bsdf->N = N;
+ bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+ bsdf->extra = NULL;
+
+ float eta = fmaxf(param2, 1e-5f);
+ eta = (sd->flag & SD_BACKFACING) ? 1.0f / eta : eta;
+
+ /* setup bsdf */
+ if (type == CLOSURE_BSDF_REFRACTION_ID) {
+ bsdf->alpha_x = 0.0f;
+ bsdf->alpha_y = 0.0f;
+ bsdf->ior = eta;
+
+ sd->flag |= bsdf_refraction_setup(bsdf);
+ }
+ else {
+ float roughness = sqr(param1);
+ bsdf->alpha_x = roughness;
+ bsdf->alpha_y = roughness;
+ bsdf->ior = eta;
+
+ if (type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID)
+ sd->flag |= bsdf_microfacet_beckmann_refraction_setup(bsdf);
+ else
+ sd->flag |= bsdf_microfacet_ggx_refraction_setup(bsdf);
+ }
+ }
+
+ break;
+ }
+ case CLOSURE_BSDF_SHARP_GLASS_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID:
+ case CLOSURE_BSDF_MICROFACET_BECKMANN_GLASS_ID: {
#ifdef __CAUSTICS_TRICKS__
- if(kernel_data.integrator.caustics_reflective || kernel_data.integrator.caustics_refractive || (path_flag & PATH_RAY_DIFFUSE) == 0) {
+ if (!kernel_data.integrator.caustics_reflective &&
+ !kernel_data.integrator.caustics_refractive && (path_flag & PATH_RAY_DIFFUSE)) {
+ break;
+ }
#endif
- if(final_transmission > CLOSURE_WEIGHT_CUTOFF) {
- float3 glass_weight = weight * final_transmission;
- float3 cspec0 = base_color * specular_tint + make_float3(1.0f, 1.0f, 1.0f) * (1.0f - specular_tint);
+ float3 weight = sd->svm_closure_weight * mix_weight;
- if(roughness <= 5e-2f || distribution == CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID) { /* use single-scatter GGX */
- float refl_roughness = roughness;
+ /* index of refraction */
+ float eta = fmaxf(param2, 1e-5f);
+ eta = (sd->flag & SD_BACKFACING) ? 1.0f / eta : eta;
- /* reflection */
-#ifdef __CAUSTICS_TRICKS__
- if(kernel_data.integrator.caustics_reflective || (path_flag & PATH_RAY_DIFFUSE) == 0)
-#endif
- {
- MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), glass_weight*fresnel);
- MicrofacetExtra *extra = (bsdf != NULL)
- ? (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra))
- : NULL;
-
- if(bsdf && extra) {
- bsdf->N = N;
- bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
- bsdf->extra = extra;
-
- bsdf->alpha_x = refl_roughness * refl_roughness;
- bsdf->alpha_y = refl_roughness * refl_roughness;
- bsdf->ior = ior;
-
- bsdf->extra->color = base_color;
- bsdf->extra->cspec0 = cspec0;
- bsdf->extra->clearcoat = 0.0f;
-
- /* setup bsdf */
- sd->flag |= bsdf_microfacet_ggx_fresnel_setup(bsdf, sd);
- }
- }
-
- /* refraction */
-#ifdef __CAUSTICS_TRICKS__
- if(kernel_data.integrator.caustics_refractive || (path_flag & PATH_RAY_DIFFUSE) == 0)
-#endif
- {
- MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), base_color*glass_weight*(1.0f - fresnel));
- if(bsdf) {
- bsdf->N = N;
- bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
- bsdf->extra = NULL;
-
- if(distribution == CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID)
- transmission_roughness = 1.0f - (1.0f - refl_roughness) * (1.0f - transmission_roughness);
- else
- transmission_roughness = refl_roughness;
-
- bsdf->alpha_x = transmission_roughness * transmission_roughness;
- bsdf->alpha_y = transmission_roughness * transmission_roughness;
- bsdf->ior = ior;
-
- /* setup bsdf */
- sd->flag |= bsdf_microfacet_ggx_refraction_setup(bsdf);
- }
- }
- }
- else { /* use multi-scatter GGX */
- MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), glass_weight);
- MicrofacetExtra *extra = (bsdf != NULL)
- ? (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra))
- : NULL;
-
- if(bsdf && extra) {
- bsdf->N = N;
- bsdf->extra = extra;
- bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
-
- bsdf->alpha_x = roughness * roughness;
- bsdf->alpha_y = roughness * roughness;
- bsdf->ior = ior;
-
- bsdf->extra->color = base_color;
- bsdf->extra->cspec0 = cspec0;
- bsdf->extra->clearcoat = 0.0f;
-
- /* setup bsdf */
- sd->flag |= bsdf_microfacet_multi_ggx_glass_fresnel_setup(bsdf, sd);
- }
- }
- }
-#ifdef __CAUSTICS_TRICKS__
- }
-#endif
+ /* fresnel */
+ float cosNO = dot(N, sd->I);
+ float fresnel = fresnel_dielectric_cos(cosNO, eta);
+ float roughness = sqr(param1);
- /* clearcoat */
+ /* reflection */
#ifdef __CAUSTICS_TRICKS__
- if(kernel_data.integrator.caustics_reflective || (path_flag & PATH_RAY_DIFFUSE) == 0) {
+ if (kernel_data.integrator.caustics_reflective || (path_flag & PATH_RAY_DIFFUSE) == 0)
#endif
- if(clearcoat > CLOSURE_WEIGHT_CUTOFF) {
- MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight);
- MicrofacetExtra *extra = (bsdf != NULL)
- ? (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra))
- : NULL;
-
- if(bsdf && extra) {
- bsdf->N = clearcoat_normal;
- bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
- bsdf->ior = 1.5f;
- bsdf->extra = extra;
-
- bsdf->alpha_x = clearcoat_roughness * clearcoat_roughness;
- bsdf->alpha_y = clearcoat_roughness * clearcoat_roughness;
-
- bsdf->extra->color = make_float3(0.0f, 0.0f, 0.0f);
- bsdf->extra->cspec0 = make_float3(0.04f, 0.04f, 0.04f);
- bsdf->extra->clearcoat = clearcoat;
-
- /* setup bsdf */
- sd->flag |= bsdf_microfacet_ggx_clearcoat_setup(bsdf, sd);
- }
- }
+ {
+ MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc(
+ sd, sizeof(MicrofacetBsdf), weight * fresnel);
+
+ if (bsdf) {
+ bsdf->N = N;
+ bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+ bsdf->extra = NULL;
+ svm_node_glass_setup(sd, bsdf, type, eta, roughness, false);
+ }
+ }
+
+ /* refraction */
#ifdef __CAUSTICS_TRICKS__
- }
+ if (kernel_data.integrator.caustics_refractive || (path_flag & PATH_RAY_DIFFUSE) == 0)
#endif
-
- break;
- }
-#endif /* __PRINCIPLED__ */
- case CLOSURE_BSDF_DIFFUSE_ID: {
- float3 weight = sd->svm_closure_weight * mix_weight;
- OrenNayarBsdf *bsdf = (OrenNayarBsdf*)bsdf_alloc(sd, sizeof(OrenNayarBsdf), weight);
-
- if(bsdf) {
- bsdf->N = N;
-
- float roughness = param1;
-
- if(roughness == 0.0f) {
- sd->flag |= bsdf_diffuse_setup((DiffuseBsdf*)bsdf);
- }
- else {
- bsdf->roughness = roughness;
- sd->flag |= bsdf_oren_nayar_setup(bsdf);
- }
- }
- break;
- }
- case CLOSURE_BSDF_TRANSLUCENT_ID: {
- float3 weight = sd->svm_closure_weight * mix_weight;
- DiffuseBsdf *bsdf = (DiffuseBsdf*)bsdf_alloc(sd, sizeof(DiffuseBsdf), weight);
-
- if(bsdf) {
- bsdf->N = N;
- sd->flag |= bsdf_translucent_setup(bsdf);
- }
- break;
- }
- case CLOSURE_BSDF_TRANSPARENT_ID: {
- float3 weight = sd->svm_closure_weight * mix_weight;
- bsdf_transparent_setup(sd, weight, path_flag);
- break;
- }
- case CLOSURE_BSDF_REFLECTION_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_ID:
- case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
- case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
- case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID: {
+ {
+ MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc(
+ sd, sizeof(MicrofacetBsdf), weight * (1.0f - fresnel));
+
+ if (bsdf) {
+ bsdf->N = N;
+ bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+ bsdf->extra = NULL;
+ svm_node_glass_setup(sd, bsdf, type, eta, roughness, true);
+ }
+ }
+
+ break;
+ }
+ case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID: {
#ifdef __CAUSTICS_TRICKS__
- if(!kernel_data.integrator.caustics_reflective && (path_flag & PATH_RAY_DIFFUSE))
- break;
+ if (!kernel_data.integrator.caustics_reflective &&
+ !kernel_data.integrator.caustics_refractive && (path_flag & PATH_RAY_DIFFUSE))
+ break;
#endif
- float3 weight = sd->svm_closure_weight * mix_weight;
- MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight);
-
- if(!bsdf) {
- break;
- }
-
- float roughness = sqr(param1);
-
- bsdf->N = N;
- bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
- bsdf->alpha_x = roughness;
- bsdf->alpha_y = roughness;
- bsdf->ior = 0.0f;
- bsdf->extra = NULL;
-
- /* setup bsdf */
- if(type == CLOSURE_BSDF_REFLECTION_ID)
- sd->flag |= bsdf_reflection_setup(bsdf);
- else if(type == CLOSURE_BSDF_MICROFACET_BECKMANN_ID)
- sd->flag |= bsdf_microfacet_beckmann_setup(bsdf);
- else if(type == CLOSURE_BSDF_MICROFACET_GGX_ID)
- sd->flag |= bsdf_microfacet_ggx_setup(bsdf);
- else if(type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID) {
- kernel_assert(stack_valid(data_node.z));
- bsdf->extra = (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra));
- if(bsdf->extra) {
- bsdf->extra->color = stack_load_float3(stack, data_node.z);
- bsdf->extra->cspec0 = make_float3(0.0f, 0.0f, 0.0f);
- bsdf->extra->clearcoat = 0.0f;
- sd->flag |= bsdf_microfacet_multi_ggx_setup(bsdf);
- }
- }
- else {
- sd->flag |= bsdf_ashikhmin_shirley_setup(bsdf);
- }
-
- break;
- }
- case CLOSURE_BSDF_REFRACTION_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
- case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID: {
+ float3 weight = sd->svm_closure_weight * mix_weight;
+ MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight);
+ if (!bsdf) {
+ break;
+ }
+
+ MicrofacetExtra *extra = (MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra));
+ if (!extra) {
+ break;
+ }
+
+ bsdf->N = N;
+ bsdf->extra = extra;
+ bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+
+ float roughness = sqr(param1);
+ bsdf->alpha_x = roughness;
+ bsdf->alpha_y = roughness;
+ float eta = fmaxf(param2, 1e-5f);
+ bsdf->ior = (sd->flag & SD_BACKFACING) ? 1.0f / eta : eta;
+
+ kernel_assert(stack_valid(data_node.z));
+ bsdf->extra->color = stack_load_float3(stack, data_node.z);
+ bsdf->extra->cspec0 = make_float3(0.0f, 0.0f, 0.0f);
+ bsdf->extra->clearcoat = 0.0f;
+
+ /* setup bsdf */
+ sd->flag |= bsdf_microfacet_multi_ggx_glass_setup(bsdf);
+ break;
+ }
+ case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
+ case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ANISO_ID:
+ case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID: {
#ifdef __CAUSTICS_TRICKS__
- if(!kernel_data.integrator.caustics_refractive && (path_flag & PATH_RAY_DIFFUSE))
- break;
+ if (!kernel_data.integrator.caustics_reflective && (path_flag & PATH_RAY_DIFFUSE))
+ break;
#endif
- float3 weight = sd->svm_closure_weight * mix_weight;
- MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight);
-
- if(bsdf) {
- bsdf->N = N;
- bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
- bsdf->extra = NULL;
-
- float eta = fmaxf(param2, 1e-5f);
- eta = (sd->flag & SD_BACKFACING)? 1.0f/eta: eta;
-
- /* setup bsdf */
- if(type == CLOSURE_BSDF_REFRACTION_ID) {
- bsdf->alpha_x = 0.0f;
- bsdf->alpha_y = 0.0f;
- bsdf->ior = eta;
-
- sd->flag |= bsdf_refraction_setup(bsdf);
- }
- else {
- float roughness = sqr(param1);
- bsdf->alpha_x = roughness;
- bsdf->alpha_y = roughness;
- bsdf->ior = eta;
-
- if(type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID)
- sd->flag |= bsdf_microfacet_beckmann_refraction_setup(bsdf);
- else
- sd->flag |= bsdf_microfacet_ggx_refraction_setup(bsdf);
- }
- }
-
- break;
- }
- case CLOSURE_BSDF_SHARP_GLASS_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID:
- case CLOSURE_BSDF_MICROFACET_BECKMANN_GLASS_ID: {
+ float3 weight = sd->svm_closure_weight * mix_weight;
+ MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight);
+
+ if (bsdf) {
+ bsdf->N = N;
+ bsdf->extra = NULL;
+ bsdf->T = stack_load_float3(stack, data_node.y);
+
+ /* rotate tangent */
+ float rotation = stack_load_float(stack, data_node.z);
+
+ if (rotation != 0.0f)
+ bsdf->T = rotate_around_axis(bsdf->T, bsdf->N, rotation * M_2PI_F);
+
+ /* compute roughness */
+ float roughness = sqr(param1);
+ float anisotropy = clamp(param2, -0.99f, 0.99f);
+
+ if (anisotropy < 0.0f) {
+ bsdf->alpha_x = roughness / (1.0f + anisotropy);
+ bsdf->alpha_y = roughness * (1.0f + anisotropy);
+ }
+ else {
+ bsdf->alpha_x = roughness * (1.0f - anisotropy);
+ bsdf->alpha_y = roughness / (1.0f - anisotropy);
+ }
+
+ bsdf->ior = 0.0f;
+
+ if (type == CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID) {
+ sd->flag |= bsdf_microfacet_beckmann_aniso_setup(bsdf);
+ }
+ else if (type == CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID) {
+ sd->flag |= bsdf_microfacet_ggx_aniso_setup(bsdf);
+ }
+ else if (type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_ANISO_ID) {
+ kernel_assert(stack_valid(data_node.w));
+ bsdf->extra = (MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra));
+ if (bsdf->extra) {
+ bsdf->extra->color = stack_load_float3(stack, data_node.w);
+ bsdf->extra->cspec0 = make_float3(0.0f, 0.0f, 0.0f);
+ bsdf->extra->clearcoat = 0.0f;
+ sd->flag |= bsdf_microfacet_multi_ggx_aniso_setup(bsdf);
+ }
+ }
+ else
+ sd->flag |= bsdf_ashikhmin_shirley_aniso_setup(bsdf);
+ }
+ break;
+ }
+ case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID: {
+ float3 weight = sd->svm_closure_weight * mix_weight;
+ VelvetBsdf *bsdf = (VelvetBsdf *)bsdf_alloc(sd, sizeof(VelvetBsdf), weight);
+
+ if (bsdf) {
+ bsdf->N = N;
+
+ bsdf->sigma = saturate(param1);
+ sd->flag |= bsdf_ashikhmin_velvet_setup(bsdf);
+ }
+ break;
+ }
+ case CLOSURE_BSDF_GLOSSY_TOON_ID:
#ifdef __CAUSTICS_TRICKS__
- if(!kernel_data.integrator.caustics_reflective &&
- !kernel_data.integrator.caustics_refractive && (path_flag & PATH_RAY_DIFFUSE))
- {
- break;
- }
+ if (!kernel_data.integrator.caustics_reflective && (path_flag & PATH_RAY_DIFFUSE))
+ break;
+ ATTR_FALLTHROUGH;
#endif
- float3 weight = sd->svm_closure_weight * mix_weight;
-
- /* index of refraction */
- float eta = fmaxf(param2, 1e-5f);
- eta = (sd->flag & SD_BACKFACING)? 1.0f/eta: eta;
-
- /* fresnel */
- float cosNO = dot(N, sd->I);
- float fresnel = fresnel_dielectric_cos(cosNO, eta);
- float roughness = sqr(param1);
-
- /* reflection */
-#ifdef __CAUSTICS_TRICKS__
- if(kernel_data.integrator.caustics_reflective || (path_flag & PATH_RAY_DIFFUSE) == 0)
-#endif
- {
- MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight*fresnel);
-
- if(bsdf) {
- bsdf->N = N;
- bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
- bsdf->extra = NULL;
- svm_node_glass_setup(sd, bsdf, type, eta, roughness, false);
- }
- }
-
- /* refraction */
-#ifdef __CAUSTICS_TRICKS__
- if(kernel_data.integrator.caustics_refractive || (path_flag & PATH_RAY_DIFFUSE) == 0)
-#endif
- {
- MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight*(1.0f - fresnel));
-
- if(bsdf) {
- bsdf->N = N;
- bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
- bsdf->extra = NULL;
- svm_node_glass_setup(sd, bsdf, type, eta, roughness, true);
- }
- }
-
- break;
- }
- case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID: {
-#ifdef __CAUSTICS_TRICKS__
- if(!kernel_data.integrator.caustics_reflective && !kernel_data.integrator.caustics_refractive && (path_flag & PATH_RAY_DIFFUSE))
- break;
-#endif
- float3 weight = sd->svm_closure_weight * mix_weight;
- MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight);
- if(!bsdf) {
- break;
- }
-
- MicrofacetExtra *extra = (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra));
- if(!extra) {
- break;
- }
-
- bsdf->N = N;
- bsdf->extra = extra;
- bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
-
- float roughness = sqr(param1);
- bsdf->alpha_x = roughness;
- bsdf->alpha_y = roughness;
- float eta = fmaxf(param2, 1e-5f);
- bsdf->ior = (sd->flag & SD_BACKFACING)? 1.0f/eta: eta;
-
- kernel_assert(stack_valid(data_node.z));
- bsdf->extra->color = stack_load_float3(stack, data_node.z);
- bsdf->extra->cspec0 = make_float3(0.0f, 0.0f, 0.0f);
- bsdf->extra->clearcoat = 0.0f;
-
- /* setup bsdf */
- sd->flag |= bsdf_microfacet_multi_ggx_glass_setup(bsdf);
- break;
- }
- case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
- case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ANISO_ID:
- case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID: {
-#ifdef __CAUSTICS_TRICKS__
- if(!kernel_data.integrator.caustics_reflective && (path_flag & PATH_RAY_DIFFUSE))
- break;
-#endif
- float3 weight = sd->svm_closure_weight * mix_weight;
- MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight);
-
- if(bsdf) {
- bsdf->N = N;
- bsdf->extra = NULL;
- bsdf->T = stack_load_float3(stack, data_node.y);
-
- /* rotate tangent */
- float rotation = stack_load_float(stack, data_node.z);
-
- if(rotation != 0.0f)
- bsdf->T = rotate_around_axis(bsdf->T, bsdf->N, rotation * M_2PI_F);
-
- /* compute roughness */
- float roughness = sqr(param1);
- float anisotropy = clamp(param2, -0.99f, 0.99f);
-
- if(anisotropy < 0.0f) {
- bsdf->alpha_x = roughness/(1.0f + anisotropy);
- bsdf->alpha_y = roughness*(1.0f + anisotropy);
- }
- else {
- bsdf->alpha_x = roughness*(1.0f - anisotropy);
- bsdf->alpha_y = roughness/(1.0f - anisotropy);
- }
-
- bsdf->ior = 0.0f;
-
- if(type == CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID) {
- sd->flag |= bsdf_microfacet_beckmann_aniso_setup(bsdf);
- }
- else if(type == CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID) {
- sd->flag |= bsdf_microfacet_ggx_aniso_setup(bsdf);
- }
- else if(type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_ANISO_ID) {
- kernel_assert(stack_valid(data_node.w));
- bsdf->extra = (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra));
- if(bsdf->extra) {
- bsdf->extra->color = stack_load_float3(stack, data_node.w);
- bsdf->extra->cspec0 = make_float3(0.0f, 0.0f, 0.0f);
- bsdf->extra->clearcoat = 0.0f;
- sd->flag |= bsdf_microfacet_multi_ggx_aniso_setup(bsdf);
- }
- }
- else
- sd->flag |= bsdf_ashikhmin_shirley_aniso_setup(bsdf);
- }
- break;
- }
- case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID: {
- float3 weight = sd->svm_closure_weight * mix_weight;
- VelvetBsdf *bsdf = (VelvetBsdf*)bsdf_alloc(sd, sizeof(VelvetBsdf), weight);
-
- if(bsdf) {
- bsdf->N = N;
-
- bsdf->sigma = saturate(param1);
- sd->flag |= bsdf_ashikhmin_velvet_setup(bsdf);
- }
- break;
- }
- case CLOSURE_BSDF_GLOSSY_TOON_ID:
-#ifdef __CAUSTICS_TRICKS__
- if(!kernel_data.integrator.caustics_reflective && (path_flag & PATH_RAY_DIFFUSE))
- break;
- ATTR_FALLTHROUGH;
-#endif
- case CLOSURE_BSDF_DIFFUSE_TOON_ID: {
- float3 weight = sd->svm_closure_weight * mix_weight;
- ToonBsdf *bsdf = (ToonBsdf*)bsdf_alloc(sd, sizeof(ToonBsdf), weight);
-
- if(bsdf) {
- bsdf->N = N;
- bsdf->size = param1;
- bsdf->smooth = param2;
-
- if(type == CLOSURE_BSDF_DIFFUSE_TOON_ID)
- sd->flag |= bsdf_diffuse_toon_setup(bsdf);
- else
- sd->flag |= bsdf_glossy_toon_setup(bsdf);
- }
- break;
- }
+ case CLOSURE_BSDF_DIFFUSE_TOON_ID: {
+ float3 weight = sd->svm_closure_weight * mix_weight;
+ ToonBsdf *bsdf = (ToonBsdf *)bsdf_alloc(sd, sizeof(ToonBsdf), weight);
+
+ if (bsdf) {
+ bsdf->N = N;
+ bsdf->size = param1;
+ bsdf->smooth = param2;
+
+ if (type == CLOSURE_BSDF_DIFFUSE_TOON_ID)
+ sd->flag |= bsdf_diffuse_toon_setup(bsdf);
+ else
+ sd->flag |= bsdf_glossy_toon_setup(bsdf);
+ }
+ break;
+ }
#ifdef __HAIR__
- case CLOSURE_BSDF_HAIR_PRINCIPLED_ID: {
- uint4 data_node2 = read_node(kg, offset);
- uint4 data_node3 = read_node(kg, offset);
- uint4 data_node4 = read_node(kg, offset);
-
- float3 weight = sd->svm_closure_weight * mix_weight;
-
- uint offset_ofs, ior_ofs, color_ofs, parametrization;
- decode_node_uchar4(data_node.y, &offset_ofs, &ior_ofs, &color_ofs, &parametrization);
- float alpha = stack_load_float_default(stack, offset_ofs, data_node.z);
- float ior = stack_load_float_default(stack, ior_ofs, data_node.w);
-
- uint coat_ofs, melanin_ofs, melanin_redness_ofs, absorption_coefficient_ofs;
- decode_node_uchar4(data_node2.x, &coat_ofs, &melanin_ofs, &melanin_redness_ofs, &absorption_coefficient_ofs);
-
- uint tint_ofs, random_ofs, random_color_ofs, random_roughness_ofs;
- decode_node_uchar4(data_node3.x, &tint_ofs, &random_ofs, &random_color_ofs, &random_roughness_ofs);
-
- const AttributeDescriptor attr_descr_random = find_attribute(kg, sd, data_node4.y);
- float random = 0.0f;
- if(attr_descr_random.offset != ATTR_STD_NOT_FOUND) {
- random = primitive_surface_attribute_float(kg, sd, attr_descr_random, NULL, NULL);
- }
- else {
- random = stack_load_float_default(stack, random_ofs, data_node3.y);
- }
-
-
- PrincipledHairBSDF *bsdf = (PrincipledHairBSDF*)bsdf_alloc(sd, sizeof(PrincipledHairBSDF), weight);
- if(bsdf) {
- PrincipledHairExtra *extra = (PrincipledHairExtra*)closure_alloc_extra(sd, sizeof(PrincipledHairExtra));
-
- if(!extra)
- break;
-
- /* Random factors range: [-randomization/2, +randomization/2]. */
- float random_roughness = stack_load_float_default(stack, random_roughness_ofs, data_node3.w);
- float factor_random_roughness = 1.0f + 2.0f*(random - 0.5f)*random_roughness;
- float roughness = param1 * factor_random_roughness;
- float radial_roughness = param2 * factor_random_roughness;
-
- /* Remap Coat value to [0, 100]% of Roughness. */
- float coat = stack_load_float_default(stack, coat_ofs, data_node2.y);
- float m0_roughness = 1.0f - clamp(coat, 0.0f, 1.0f);
-
- bsdf->N = N;
- bsdf->v = roughness;
- bsdf->s = radial_roughness;
- bsdf->m0_roughness = m0_roughness;
- bsdf->alpha = alpha;
- bsdf->eta = ior;
- bsdf->extra = extra;
-
- switch(parametrization) {
- case NODE_PRINCIPLED_HAIR_DIRECT_ABSORPTION: {
- float3 absorption_coefficient = stack_load_float3(stack, absorption_coefficient_ofs);
- bsdf->sigma = absorption_coefficient;
- break;
- }
- case NODE_PRINCIPLED_HAIR_PIGMENT_CONCENTRATION: {
- float melanin = stack_load_float_default(stack, melanin_ofs, data_node2.z);
- float melanin_redness = stack_load_float_default(stack, melanin_redness_ofs, data_node2.w);
-
- /* Randomize melanin. */
- float random_color = stack_load_float_default(stack, random_color_ofs, data_node3.z);
- random_color = clamp(random_color, 0.0f, 1.0f);
- float factor_random_color = 1.0f + 2.0f * (random - 0.5f) * random_color;
- melanin *= factor_random_color;
-
- /* Map melanin 0..inf from more perceptually linear 0..1. */
- melanin = -logf(fmaxf(1.0f - melanin, 0.0001f));
-
- /* Benedikt Bitterli's melanin ratio remapping. */
- float eumelanin = melanin * (1.0f - melanin_redness);
- float pheomelanin = melanin * melanin_redness;
- float3 melanin_sigma = sigma_from_concentration(eumelanin, pheomelanin);
-
- /* Optional tint. */
- float3 tint = stack_load_float3(stack, tint_ofs);
- float3 tint_sigma = sigma_from_reflectance(tint, radial_roughness);
-
- bsdf->sigma = melanin_sigma + tint_sigma;
- break;
- }
- case NODE_PRINCIPLED_HAIR_REFLECTANCE: {
- float3 color = stack_load_float3(stack, color_ofs);
- bsdf->sigma = sigma_from_reflectance(color, radial_roughness);
- break;
- }
- default: {
- /* Fallback to brownish hair, same as defaults for melanin. */
- kernel_assert(!"Invalid Principled Hair parametrization!");
- bsdf->sigma = sigma_from_concentration(0.0f, 0.8054375f);
- break;
- }
- }
-
- sd->flag |= bsdf_principled_hair_setup(sd, bsdf);
- }
- break;
- }
- case CLOSURE_BSDF_HAIR_REFLECTION_ID:
- case CLOSURE_BSDF_HAIR_TRANSMISSION_ID: {
- float3 weight = sd->svm_closure_weight * mix_weight;
-
- if(sd->flag & SD_BACKFACING && sd->type & PRIMITIVE_ALL_CURVE) {
- /* todo: giving a fixed weight here will cause issues when
- * mixing multiple BSDFS. energy will not be conserved and
- * the throughput can blow up after multiple bounces. we
- * better figure out a way to skip backfaces from rays
- * spawned by transmission from the front */
- bsdf_transparent_setup(sd, make_float3(1.0f, 1.0f, 1.0f), path_flag);
- }
- else {
- HairBsdf *bsdf = (HairBsdf*)bsdf_alloc(sd, sizeof(HairBsdf), weight);
-
- if(bsdf) {
- bsdf->N = N;
- bsdf->roughness1 = param1;
- bsdf->roughness2 = param2;
- bsdf->offset = -stack_load_float(stack, data_node.z);
-
- if(stack_valid(data_node.y)) {
- bsdf->T = normalize(stack_load_float3(stack, data_node.y));
- }
- else if(!(sd->type & PRIMITIVE_ALL_CURVE)) {
- bsdf->T = normalize(sd->dPdv);
- bsdf->offset = 0.0f;
- }
- else
- bsdf->T = normalize(sd->dPdu);
-
- if(type == CLOSURE_BSDF_HAIR_REFLECTION_ID) {
- sd->flag |= bsdf_hair_reflection_setup(bsdf);
- }
- else {
- sd->flag |= bsdf_hair_transmission_setup(bsdf);
- }
- }
- }
-
- break;
- }
-#endif /* __HAIR__ */
+ case CLOSURE_BSDF_HAIR_PRINCIPLED_ID: {
+ uint4 data_node2 = read_node(kg, offset);
+ uint4 data_node3 = read_node(kg, offset);
+ uint4 data_node4 = read_node(kg, offset);
+
+ float3 weight = sd->svm_closure_weight * mix_weight;
+
+ uint offset_ofs, ior_ofs, color_ofs, parametrization;
+ decode_node_uchar4(data_node.y, &offset_ofs, &ior_ofs, &color_ofs, &parametrization);
+ float alpha = stack_load_float_default(stack, offset_ofs, data_node.z);
+ float ior = stack_load_float_default(stack, ior_ofs, data_node.w);
+
+ uint coat_ofs, melanin_ofs, melanin_redness_ofs, absorption_coefficient_ofs;
+ decode_node_uchar4(data_node2.x,
+ &coat_ofs,
+ &melanin_ofs,
+ &melanin_redness_ofs,
+ &absorption_coefficient_ofs);
+
+ uint tint_ofs, random_ofs, random_color_ofs, random_roughness_ofs;
+ decode_node_uchar4(
+ data_node3.x, &tint_ofs, &random_ofs, &random_color_ofs, &random_roughness_ofs);
+
+ const AttributeDescriptor attr_descr_random = find_attribute(kg, sd, data_node4.y);
+ float random = 0.0f;
+ if (attr_descr_random.offset != ATTR_STD_NOT_FOUND) {
+ random = primitive_surface_attribute_float(kg, sd, attr_descr_random, NULL, NULL);
+ }
+ else {
+ random = stack_load_float_default(stack, random_ofs, data_node3.y);
+ }
+
+ PrincipledHairBSDF *bsdf = (PrincipledHairBSDF *)bsdf_alloc(
+ sd, sizeof(PrincipledHairBSDF), weight);
+ if (bsdf) {
+ PrincipledHairExtra *extra = (PrincipledHairExtra *)closure_alloc_extra(
+ sd, sizeof(PrincipledHairExtra));
+
+ if (!extra)
+ break;
+
+ /* Random factors range: [-randomization/2, +randomization/2]. */
+ float random_roughness = stack_load_float_default(
+ stack, random_roughness_ofs, data_node3.w);
+ float factor_random_roughness = 1.0f + 2.0f * (random - 0.5f) * random_roughness;
+ float roughness = param1 * factor_random_roughness;
+ float radial_roughness = param2 * factor_random_roughness;
+
+ /* Remap Coat value to [0, 100]% of Roughness. */
+ float coat = stack_load_float_default(stack, coat_ofs, data_node2.y);
+ float m0_roughness = 1.0f - clamp(coat, 0.0f, 1.0f);
+
+ bsdf->N = N;
+ bsdf->v = roughness;
+ bsdf->s = radial_roughness;
+ bsdf->m0_roughness = m0_roughness;
+ bsdf->alpha = alpha;
+ bsdf->eta = ior;
+ bsdf->extra = extra;
+
+ switch (parametrization) {
+ case NODE_PRINCIPLED_HAIR_DIRECT_ABSORPTION: {
+ float3 absorption_coefficient = stack_load_float3(stack, absorption_coefficient_ofs);
+ bsdf->sigma = absorption_coefficient;
+ break;
+ }
+ case NODE_PRINCIPLED_HAIR_PIGMENT_CONCENTRATION: {
+ float melanin = stack_load_float_default(stack, melanin_ofs, data_node2.z);
+ float melanin_redness = stack_load_float_default(
+ stack, melanin_redness_ofs, data_node2.w);
+
+ /* Randomize melanin. */
+ float random_color = stack_load_float_default(stack, random_color_ofs, data_node3.z);
+ random_color = clamp(random_color, 0.0f, 1.0f);
+ float factor_random_color = 1.0f + 2.0f * (random - 0.5f) * random_color;
+ melanin *= factor_random_color;
+
+ /* Map melanin 0..inf from more perceptually linear 0..1. */
+ melanin = -logf(fmaxf(1.0f - melanin, 0.0001f));
+
+ /* Benedikt Bitterli's melanin ratio remapping. */
+ float eumelanin = melanin * (1.0f - melanin_redness);
+ float pheomelanin = melanin * melanin_redness;
+ float3 melanin_sigma = sigma_from_concentration(eumelanin, pheomelanin);
+
+ /* Optional tint. */
+ float3 tint = stack_load_float3(stack, tint_ofs);
+ float3 tint_sigma = sigma_from_reflectance(tint, radial_roughness);
+
+ bsdf->sigma = melanin_sigma + tint_sigma;
+ break;
+ }
+ case NODE_PRINCIPLED_HAIR_REFLECTANCE: {
+ float3 color = stack_load_float3(stack, color_ofs);
+ bsdf->sigma = sigma_from_reflectance(color, radial_roughness);
+ break;
+ }
+ default: {
+ /* Fallback to brownish hair, same as defaults for melanin. */
+ kernel_assert(!"Invalid Principled Hair parametrization!");
+ bsdf->sigma = sigma_from_concentration(0.0f, 0.8054375f);
+ break;
+ }
+ }
+
+ sd->flag |= bsdf_principled_hair_setup(sd, bsdf);
+ }
+ break;
+ }
+ case CLOSURE_BSDF_HAIR_REFLECTION_ID:
+ case CLOSURE_BSDF_HAIR_TRANSMISSION_ID: {
+ float3 weight = sd->svm_closure_weight * mix_weight;
+
+ if (sd->flag & SD_BACKFACING && sd->type & PRIMITIVE_ALL_CURVE) {
+ /* todo: giving a fixed weight here will cause issues when
+ * mixing multiple BSDFS. energy will not be conserved and
+ * the throughput can blow up after multiple bounces. we
+ * better figure out a way to skip backfaces from rays
+ * spawned by transmission from the front */
+ bsdf_transparent_setup(sd, make_float3(1.0f, 1.0f, 1.0f), path_flag);
+ }
+ else {
+ HairBsdf *bsdf = (HairBsdf *)bsdf_alloc(sd, sizeof(HairBsdf), weight);
+
+ if (bsdf) {
+ bsdf->N = N;
+ bsdf->roughness1 = param1;
+ bsdf->roughness2 = param2;
+ bsdf->offset = -stack_load_float(stack, data_node.z);
+
+ if (stack_valid(data_node.y)) {
+ bsdf->T = normalize(stack_load_float3(stack, data_node.y));
+ }
+ else if (!(sd->type & PRIMITIVE_ALL_CURVE)) {
+ bsdf->T = normalize(sd->dPdv);
+ bsdf->offset = 0.0f;
+ }
+ else
+ bsdf->T = normalize(sd->dPdu);
+
+ if (type == CLOSURE_BSDF_HAIR_REFLECTION_ID) {
+ sd->flag |= bsdf_hair_reflection_setup(bsdf);
+ }
+ else {
+ sd->flag |= bsdf_hair_transmission_setup(bsdf);
+ }
+ }
+ }
+
+ break;
+ }
+#endif /* __HAIR__ */
#ifdef __SUBSURFACE__
- case CLOSURE_BSSRDF_CUBIC_ID:
- case CLOSURE_BSSRDF_GAUSSIAN_ID:
- case CLOSURE_BSSRDF_BURLEY_ID:
- case CLOSURE_BSSRDF_RANDOM_WALK_ID: {
- float3 weight = sd->svm_closure_weight * mix_weight;
- Bssrdf *bssrdf = bssrdf_alloc(sd, weight);
-
- if(bssrdf) {
- /* disable in case of diffuse ancestor, can't see it well then and
- * adds considerably noise due to probabilities of continuing path
- * getting lower and lower */
- if(path_flag & PATH_RAY_DIFFUSE_ANCESTOR)
- param1 = 0.0f;
-
- bssrdf->radius = stack_load_float3(stack, data_node.z)*param1;
- bssrdf->albedo = sd->svm_closure_weight;
- bssrdf->texture_blur = param2;
- bssrdf->sharpness = stack_load_float(stack, data_node.w);
- bssrdf->N = N;
- bssrdf->roughness = 0.0f;
- sd->flag |= bssrdf_setup(sd, bssrdf, (ClosureType)type);
- }
-
- break;
- }
+ case CLOSURE_BSSRDF_CUBIC_ID:
+ case CLOSURE_BSSRDF_GAUSSIAN_ID:
+ case CLOSURE_BSSRDF_BURLEY_ID:
+ case CLOSURE_BSSRDF_RANDOM_WALK_ID: {
+ float3 weight = sd->svm_closure_weight * mix_weight;
+ Bssrdf *bssrdf = bssrdf_alloc(sd, weight);
+
+ if (bssrdf) {
+ /* disable in case of diffuse ancestor, can't see it well then and
+ * adds considerably noise due to probabilities of continuing path
+ * getting lower and lower */
+ if (path_flag & PATH_RAY_DIFFUSE_ANCESTOR)
+ param1 = 0.0f;
+
+ bssrdf->radius = stack_load_float3(stack, data_node.z) * param1;
+ bssrdf->albedo = sd->svm_closure_weight;
+ bssrdf->texture_blur = param2;
+ bssrdf->sharpness = stack_load_float(stack, data_node.w);
+ bssrdf->N = N;
+ bssrdf->roughness = 0.0f;
+ sd->flag |= bssrdf_setup(sd, bssrdf, (ClosureType)type);
+ }
+
+ break;
+ }
#endif
- default:
- break;
- }
+ default:
+ break;
+ }
}
-ccl_device void svm_node_closure_volume(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, ShaderType shader_type)
+ccl_device void svm_node_closure_volume(
+ KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, ShaderType shader_type)
{
#ifdef __VOLUME__
- /* Only sum extinction for volumes, variable is shared with surface transparency. */
- if(shader_type != SHADER_TYPE_VOLUME) {
- return;
- }
-
- uint type, density_offset, anisotropy_offset;
-
- uint mix_weight_offset;
- decode_node_uchar4(node.y, &type, &density_offset, &anisotropy_offset, &mix_weight_offset);
- float mix_weight = (stack_valid(mix_weight_offset)? stack_load_float(stack, mix_weight_offset): 1.0f);
-
- if(mix_weight == 0.0f) {
- return;
- }
-
- float density = (stack_valid(density_offset))? stack_load_float(stack, density_offset): __uint_as_float(node.z);
- density = mix_weight * fmaxf(density, 0.0f);
-
- /* Compute scattering coefficient. */
- float3 weight = sd->svm_closure_weight;
-
- if(type == CLOSURE_VOLUME_ABSORPTION_ID) {
- weight = make_float3(1.0f, 1.0f, 1.0f) - weight;
- }
-
- weight *= density;
-
- /* Add closure for volume scattering. */
- if(type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) {
- HenyeyGreensteinVolume *volume = (HenyeyGreensteinVolume*)bsdf_alloc(sd, sizeof(HenyeyGreensteinVolume), weight);
-
- if(volume) {
- float anisotropy = (stack_valid(anisotropy_offset))? stack_load_float(stack, anisotropy_offset): __uint_as_float(node.w);
- volume->g = anisotropy; /* g */
- sd->flag |= volume_henyey_greenstein_setup(volume);
- }
- }
-
- /* Sum total extinction weight. */
- volume_extinction_setup(sd, weight);
+ /* Only sum extinction for volumes, variable is shared with surface transparency. */
+ if (shader_type != SHADER_TYPE_VOLUME) {
+ return;
+ }
+
+ uint type, density_offset, anisotropy_offset;
+
+ uint mix_weight_offset;
+ decode_node_uchar4(node.y, &type, &density_offset, &anisotropy_offset, &mix_weight_offset);
+ float mix_weight = (stack_valid(mix_weight_offset) ? stack_load_float(stack, mix_weight_offset) :
+ 1.0f);
+
+ if (mix_weight == 0.0f) {
+ return;
+ }
+
+ float density = (stack_valid(density_offset)) ? stack_load_float(stack, density_offset) :
+ __uint_as_float(node.z);
+ density = mix_weight * fmaxf(density, 0.0f);
+
+ /* Compute scattering coefficient. */
+ float3 weight = sd->svm_closure_weight;
+
+ if (type == CLOSURE_VOLUME_ABSORPTION_ID) {
+ weight = make_float3(1.0f, 1.0f, 1.0f) - weight;
+ }
+
+ weight *= density;
+
+ /* Add closure for volume scattering. */
+ if (type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) {
+ HenyeyGreensteinVolume *volume = (HenyeyGreensteinVolume *)bsdf_alloc(
+ sd, sizeof(HenyeyGreensteinVolume), weight);
+
+ if (volume) {
+ float anisotropy = (stack_valid(anisotropy_offset)) ?
+ stack_load_float(stack, anisotropy_offset) :
+ __uint_as_float(node.w);
+ volume->g = anisotropy; /* g */
+ sd->flag |= volume_henyey_greenstein_setup(volume);
+ }
+ }
+
+ /* Sum total extinction weight. */
+ volume_extinction_setup(sd, weight);
#endif
}
-ccl_device void svm_node_principled_volume(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, ShaderType shader_type, int path_flag, int *offset)
+ccl_device void svm_node_principled_volume(KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint4 node,
+ ShaderType shader_type,
+ int path_flag,
+ int *offset)
{
#ifdef __VOLUME__
- uint4 value_node = read_node(kg, offset);
- uint4 attr_node = read_node(kg, offset);
-
- /* Only sum extinction for volumes, variable is shared with surface transparency. */
- if(shader_type != SHADER_TYPE_VOLUME) {
- return;
- }
-
- uint density_offset, anisotropy_offset, absorption_color_offset, mix_weight_offset;
- decode_node_uchar4(node.y, &density_offset, &anisotropy_offset, &absorption_color_offset, &mix_weight_offset);
- float mix_weight = (stack_valid(mix_weight_offset)? stack_load_float(stack, mix_weight_offset): 1.0f);
-
- if(mix_weight == 0.0f) {
- return;
- }
-
- /* Compute density. */
- float primitive_density = 1.0f;
- float density = (stack_valid(density_offset))? stack_load_float(stack, density_offset): __uint_as_float(value_node.x);
- density = mix_weight * fmaxf(density, 0.0f);
-
- if(density > CLOSURE_WEIGHT_CUTOFF) {
- /* Density and color attribute lookup if available. */
- const AttributeDescriptor attr_density = find_attribute(kg, sd, attr_node.x);
- if(attr_density.offset != ATTR_STD_NOT_FOUND) {
- primitive_density = primitive_volume_attribute_float(kg, sd, attr_density);
- density = fmaxf(density * primitive_density, 0.0f);
- }
- }
-
- if(density > CLOSURE_WEIGHT_CUTOFF) {
- /* Compute scattering color. */
- float3 color = sd->svm_closure_weight;
-
- const AttributeDescriptor attr_color = find_attribute(kg, sd, attr_node.y);
- if(attr_color.offset != ATTR_STD_NOT_FOUND) {
- color *= primitive_volume_attribute_float3(kg, sd, attr_color);
- }
-
- /* Add closure for volume scattering. */
- HenyeyGreensteinVolume *volume = (HenyeyGreensteinVolume*)bsdf_alloc(sd, sizeof(HenyeyGreensteinVolume), color * density);
- if(volume) {
- float anisotropy = (stack_valid(anisotropy_offset))? stack_load_float(stack, anisotropy_offset): __uint_as_float(value_node.y);
- volume->g = anisotropy;
- sd->flag |= volume_henyey_greenstein_setup(volume);
- }
-
- /* Add extinction weight. */
- float3 zero = make_float3(0.0f, 0.0f, 0.0f);
- float3 one = make_float3(1.0f, 1.0f, 1.0f);
- float3 absorption_color = max(sqrt(stack_load_float3(stack, absorption_color_offset)), zero);
- float3 absorption = max(one - color, zero) * max(one - absorption_color, zero);
- volume_extinction_setup(sd, (color + absorption) * density);
- }
-
- /* Compute emission. */
- if(path_flag & PATH_RAY_SHADOW) {
- /* Don't need emission for shadows. */
- return;
- }
-
- uint emission_offset, emission_color_offset, blackbody_offset, temperature_offset;
- decode_node_uchar4(node.z, &emission_offset, &emission_color_offset, &blackbody_offset, &temperature_offset);
- float emission = (stack_valid(emission_offset))? stack_load_float(stack, emission_offset): __uint_as_float(value_node.z);
- float blackbody = (stack_valid(blackbody_offset))? stack_load_float(stack, blackbody_offset): __uint_as_float(value_node.w);
-
- if(emission > CLOSURE_WEIGHT_CUTOFF) {
- float3 emission_color = stack_load_float3(stack, emission_color_offset);
- emission_setup(sd, emission * emission_color);
- }
-
- if(blackbody > CLOSURE_WEIGHT_CUTOFF) {
- float T = stack_load_float(stack, temperature_offset);
-
- /* Add flame temperature from attribute if available. */
- const AttributeDescriptor attr_temperature = find_attribute(kg, sd, attr_node.z);
- if(attr_temperature.offset != ATTR_STD_NOT_FOUND) {
- float temperature = primitive_volume_attribute_float(kg, sd, attr_temperature);
- T *= fmaxf(temperature, 0.0f);
- }
-
- T = fmaxf(T, 0.0f);
-
- /* Stefan-Boltzmann law. */
- float T4 = sqr(sqr(T));
- float sigma = 5.670373e-8f * 1e-6f / M_PI_F;
- float intensity = sigma * mix(1.0f, T4, blackbody);
-
- if(intensity > CLOSURE_WEIGHT_CUTOFF) {
- float3 blackbody_tint = stack_load_float3(stack, node.w);
- float3 bb = blackbody_tint * intensity * svm_math_blackbody_color(T);
- emission_setup(sd, bb);
- }
- }
+ uint4 value_node = read_node(kg, offset);
+ uint4 attr_node = read_node(kg, offset);
+
+ /* Only sum extinction for volumes, variable is shared with surface transparency. */
+ if (shader_type != SHADER_TYPE_VOLUME) {
+ return;
+ }
+
+ uint density_offset, anisotropy_offset, absorption_color_offset, mix_weight_offset;
+ decode_node_uchar4(
+ node.y, &density_offset, &anisotropy_offset, &absorption_color_offset, &mix_weight_offset);
+ float mix_weight = (stack_valid(mix_weight_offset) ? stack_load_float(stack, mix_weight_offset) :
+ 1.0f);
+
+ if (mix_weight == 0.0f) {
+ return;
+ }
+
+ /* Compute density. */
+ float primitive_density = 1.0f;
+ float density = (stack_valid(density_offset)) ? stack_load_float(stack, density_offset) :
+ __uint_as_float(value_node.x);
+ density = mix_weight * fmaxf(density, 0.0f);
+
+ if (density > CLOSURE_WEIGHT_CUTOFF) {
+ /* Density and color attribute lookup if available. */
+ const AttributeDescriptor attr_density = find_attribute(kg, sd, attr_node.x);
+ if (attr_density.offset != ATTR_STD_NOT_FOUND) {
+ primitive_density = primitive_volume_attribute_float(kg, sd, attr_density);
+ density = fmaxf(density * primitive_density, 0.0f);
+ }
+ }
+
+ if (density > CLOSURE_WEIGHT_CUTOFF) {
+ /* Compute scattering color. */
+ float3 color = sd->svm_closure_weight;
+
+ const AttributeDescriptor attr_color = find_attribute(kg, sd, attr_node.y);
+ if (attr_color.offset != ATTR_STD_NOT_FOUND) {
+ color *= primitive_volume_attribute_float3(kg, sd, attr_color);
+ }
+
+ /* Add closure for volume scattering. */
+ HenyeyGreensteinVolume *volume = (HenyeyGreensteinVolume *)bsdf_alloc(
+ sd, sizeof(HenyeyGreensteinVolume), color * density);
+ if (volume) {
+ float anisotropy = (stack_valid(anisotropy_offset)) ?
+ stack_load_float(stack, anisotropy_offset) :
+ __uint_as_float(value_node.y);
+ volume->g = anisotropy;
+ sd->flag |= volume_henyey_greenstein_setup(volume);
+ }
+
+ /* Add extinction weight. */
+ float3 zero = make_float3(0.0f, 0.0f, 0.0f);
+ float3 one = make_float3(1.0f, 1.0f, 1.0f);
+ float3 absorption_color = max(sqrt(stack_load_float3(stack, absorption_color_offset)), zero);
+ float3 absorption = max(one - color, zero) * max(one - absorption_color, zero);
+ volume_extinction_setup(sd, (color + absorption) * density);
+ }
+
+ /* Compute emission. */
+ if (path_flag & PATH_RAY_SHADOW) {
+ /* Don't need emission for shadows. */
+ return;
+ }
+
+ uint emission_offset, emission_color_offset, blackbody_offset, temperature_offset;
+ decode_node_uchar4(
+ node.z, &emission_offset, &emission_color_offset, &blackbody_offset, &temperature_offset);
+ float emission = (stack_valid(emission_offset)) ? stack_load_float(stack, emission_offset) :
+ __uint_as_float(value_node.z);
+ float blackbody = (stack_valid(blackbody_offset)) ? stack_load_float(stack, blackbody_offset) :
+ __uint_as_float(value_node.w);
+
+ if (emission > CLOSURE_WEIGHT_CUTOFF) {
+ float3 emission_color = stack_load_float3(stack, emission_color_offset);
+ emission_setup(sd, emission * emission_color);
+ }
+
+ if (blackbody > CLOSURE_WEIGHT_CUTOFF) {
+ float T = stack_load_float(stack, temperature_offset);
+
+ /* Add flame temperature from attribute if available. */
+ const AttributeDescriptor attr_temperature = find_attribute(kg, sd, attr_node.z);
+ if (attr_temperature.offset != ATTR_STD_NOT_FOUND) {
+ float temperature = primitive_volume_attribute_float(kg, sd, attr_temperature);
+ T *= fmaxf(temperature, 0.0f);
+ }
+
+ T = fmaxf(T, 0.0f);
+
+ /* Stefan-Boltzmann law. */
+ float T4 = sqr(sqr(T));
+ float sigma = 5.670373e-8f * 1e-6f / M_PI_F;
+ float intensity = sigma * mix(1.0f, T4, blackbody);
+
+ if (intensity > CLOSURE_WEIGHT_CUTOFF) {
+ float3 blackbody_tint = stack_load_float3(stack, node.w);
+ float3 bb = blackbody_tint * intensity * svm_math_blackbody_color(T);
+ emission_setup(sd, bb);
+ }
+ }
#endif
}
ccl_device void svm_node_closure_emission(ShaderData *sd, float *stack, uint4 node)
{
- uint mix_weight_offset = node.y;
- float3 weight = sd->svm_closure_weight;
+ uint mix_weight_offset = node.y;
+ float3 weight = sd->svm_closure_weight;
- if(stack_valid(mix_weight_offset)) {
- float mix_weight = stack_load_float(stack, mix_weight_offset);
+ if (stack_valid(mix_weight_offset)) {
+ float mix_weight = stack_load_float(stack, mix_weight_offset);
- if(mix_weight == 0.0f)
- return;
+ if (mix_weight == 0.0f)
+ return;
- weight *= mix_weight;
- }
+ weight *= mix_weight;
+ }
- emission_setup(sd, weight);
+ emission_setup(sd, weight);
}
ccl_device void svm_node_closure_background(ShaderData *sd, float *stack, uint4 node)
{
- uint mix_weight_offset = node.y;
- float3 weight = sd->svm_closure_weight;
+ uint mix_weight_offset = node.y;
+ float3 weight = sd->svm_closure_weight;
- if(stack_valid(mix_weight_offset)) {
- float mix_weight = stack_load_float(stack, mix_weight_offset);
+ if (stack_valid(mix_weight_offset)) {
+ float mix_weight = stack_load_float(stack, mix_weight_offset);
- if(mix_weight == 0.0f)
- return;
+ if (mix_weight == 0.0f)
+ return;
- weight *= mix_weight;
- }
+ weight *= mix_weight;
+ }
- background_setup(sd, weight);
+ background_setup(sd, weight);
}
ccl_device void svm_node_closure_holdout(ShaderData *sd, float *stack, uint4 node)
{
- uint mix_weight_offset = node.y;
+ uint mix_weight_offset = node.y;
- if(stack_valid(mix_weight_offset)) {
- float mix_weight = stack_load_float(stack, mix_weight_offset);
+ if (stack_valid(mix_weight_offset)) {
+ float mix_weight = stack_load_float(stack, mix_weight_offset);
- if(mix_weight == 0.0f)
- return;
+ if (mix_weight == 0.0f)
+ return;
- closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_HOLDOUT_ID, sd->svm_closure_weight * mix_weight);
- }
- else
- closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_HOLDOUT_ID, sd->svm_closure_weight);
+ closure_alloc(
+ sd, sizeof(ShaderClosure), CLOSURE_HOLDOUT_ID, sd->svm_closure_weight * mix_weight);
+ }
+ else
+ closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_HOLDOUT_ID, sd->svm_closure_weight);
- sd->flag |= SD_HOLDOUT;
+ sd->flag |= SD_HOLDOUT;
}
/* Closure Nodes */
ccl_device_inline void svm_node_closure_store_weight(ShaderData *sd, float3 weight)
{
- sd->svm_closure_weight = weight;
+ sd->svm_closure_weight = weight;
}
ccl_device void svm_node_closure_set_weight(ShaderData *sd, uint r, uint g, uint b)
{
- float3 weight = make_float3(__uint_as_float(r), __uint_as_float(g), __uint_as_float(b));
- svm_node_closure_store_weight(sd, weight);
+ float3 weight = make_float3(__uint_as_float(r), __uint_as_float(g), __uint_as_float(b));
+ svm_node_closure_store_weight(sd, weight);
}
ccl_device void svm_node_closure_weight(ShaderData *sd, float *stack, uint weight_offset)
{
- float3 weight = stack_load_float3(stack, weight_offset);
+ float3 weight = stack_load_float3(stack, weight_offset);
- svm_node_closure_store_weight(sd, weight);
+ svm_node_closure_store_weight(sd, weight);
}
-ccl_device void svm_node_emission_weight(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
+ccl_device void svm_node_emission_weight(KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint4 node)
{
- uint color_offset = node.y;
- uint strength_offset = node.z;
+ uint color_offset = node.y;
+ uint strength_offset = node.z;
- float strength = stack_load_float(stack, strength_offset);
- float3 weight = stack_load_float3(stack, color_offset)*strength;
+ float strength = stack_load_float(stack, strength_offset);
+ float3 weight = stack_load_float3(stack, color_offset) * strength;
- svm_node_closure_store_weight(sd, weight);
+ svm_node_closure_store_weight(sd, weight);
}
ccl_device void svm_node_mix_closure(ShaderData *sd, float *stack, uint4 node)
{
- /* fetch weight from blend input, previous mix closures,
- * and write to stack to be used by closure nodes later */
- uint weight_offset, in_weight_offset, weight1_offset, weight2_offset;
- decode_node_uchar4(node.y, &weight_offset, &in_weight_offset, &weight1_offset, &weight2_offset);
+ /* fetch weight from blend input, previous mix closures,
+ * and write to stack to be used by closure nodes later */
+ uint weight_offset, in_weight_offset, weight1_offset, weight2_offset;
+ decode_node_uchar4(node.y, &weight_offset, &in_weight_offset, &weight1_offset, &weight2_offset);
- float weight = stack_load_float(stack, weight_offset);
- weight = saturate(weight);
+ float weight = stack_load_float(stack, weight_offset);
+ weight = saturate(weight);
- float in_weight = (stack_valid(in_weight_offset))? stack_load_float(stack, in_weight_offset): 1.0f;
+ float in_weight = (stack_valid(in_weight_offset)) ? stack_load_float(stack, in_weight_offset) :
+ 1.0f;
- if(stack_valid(weight1_offset))
- stack_store_float(stack, weight1_offset, in_weight*(1.0f - weight));
- if(stack_valid(weight2_offset))
- stack_store_float(stack, weight2_offset, in_weight*weight);
+ if (stack_valid(weight1_offset))
+ stack_store_float(stack, weight1_offset, in_weight * (1.0f - weight));
+ if (stack_valid(weight2_offset))
+ stack_store_float(stack, weight2_offset, in_weight * weight);
}
/* (Bump) normal */
-ccl_device void svm_node_set_normal(KernelGlobals *kg, ShaderData *sd, float *stack, uint in_direction, uint out_normal)
+ccl_device void svm_node_set_normal(
+ KernelGlobals *kg, ShaderData *sd, float *stack, uint in_direction, uint out_normal)
{
- float3 normal = stack_load_float3(stack, in_direction);
- sd->N = normal;
- stack_store_float3(stack, out_normal, normal);
+ float3 normal = stack_load_float3(stack, in_direction);
+ sd->N = normal;
+ stack_store_float3(stack, out_normal, normal);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_color_util.h b/intern/cycles/kernel/svm/svm_color_util.h
index d5945f915c6..12b59d2616b 100644
--- a/intern/cycles/kernel/svm/svm_color_util.h
+++ b/intern/cycles/kernel/svm/svm_color_util.h
@@ -18,288 +18,310 @@ CCL_NAMESPACE_BEGIN
ccl_device float3 svm_mix_blend(float t, float3 col1, float3 col2)
{
- return interp(col1, col2, t);
+ return interp(col1, col2, t);
}
ccl_device float3 svm_mix_add(float t, float3 col1, float3 col2)
{
- return interp(col1, col1 + col2, t);
+ return interp(col1, col1 + col2, t);
}
ccl_device float3 svm_mix_mul(float t, float3 col1, float3 col2)
{
- return interp(col1, col1 * col2, t);
+ return interp(col1, col1 * col2, t);
}
ccl_device float3 svm_mix_screen(float t, float3 col1, float3 col2)
{
- float tm = 1.0f - t;
- float3 one = make_float3(1.0f, 1.0f, 1.0f);
- float3 tm3 = make_float3(tm, tm, tm);
+ float tm = 1.0f - t;
+ float3 one = make_float3(1.0f, 1.0f, 1.0f);
+ float3 tm3 = make_float3(tm, tm, tm);
- return one - (tm3 + t*(one - col2))*(one - col1);
+ return one - (tm3 + t * (one - col2)) * (one - col1);
}
ccl_device float3 svm_mix_overlay(float t, float3 col1, float3 col2)
{
- float tm = 1.0f - t;
+ float tm = 1.0f - t;
- float3 outcol = col1;
+ float3 outcol = col1;
- if(outcol.x < 0.5f)
- outcol.x *= tm + 2.0f*t*col2.x;
- else
- outcol.x = 1.0f - (tm + 2.0f*t*(1.0f - col2.x))*(1.0f - outcol.x);
+ if (outcol.x < 0.5f)
+ outcol.x *= tm + 2.0f * t * col2.x;
+ else
+ outcol.x = 1.0f - (tm + 2.0f * t * (1.0f - col2.x)) * (1.0f - outcol.x);
- if(outcol.y < 0.5f)
- outcol.y *= tm + 2.0f*t*col2.y;
- else
- outcol.y = 1.0f - (tm + 2.0f*t*(1.0f - col2.y))*(1.0f - outcol.y);
+ if (outcol.y < 0.5f)
+ outcol.y *= tm + 2.0f * t * col2.y;
+ else
+ outcol.y = 1.0f - (tm + 2.0f * t * (1.0f - col2.y)) * (1.0f - outcol.y);
- if(outcol.z < 0.5f)
- outcol.z *= tm + 2.0f*t*col2.z;
- else
- outcol.z = 1.0f - (tm + 2.0f*t*(1.0f - col2.z))*(1.0f - outcol.z);
+ if (outcol.z < 0.5f)
+ outcol.z *= tm + 2.0f * t * col2.z;
+ else
+ outcol.z = 1.0f - (tm + 2.0f * t * (1.0f - col2.z)) * (1.0f - outcol.z);
- return outcol;
+ return outcol;
}
ccl_device float3 svm_mix_sub(float t, float3 col1, float3 col2)
{
- return interp(col1, col1 - col2, t);
+ return interp(col1, col1 - col2, t);
}
ccl_device float3 svm_mix_div(float t, float3 col1, float3 col2)
{
- float tm = 1.0f - t;
+ float tm = 1.0f - t;
- float3 outcol = col1;
+ float3 outcol = col1;
- if(col2.x != 0.0f) outcol.x = tm*outcol.x + t*outcol.x/col2.x;
- if(col2.y != 0.0f) outcol.y = tm*outcol.y + t*outcol.y/col2.y;
- if(col2.z != 0.0f) outcol.z = tm*outcol.z + t*outcol.z/col2.z;
+ if (col2.x != 0.0f)
+ outcol.x = tm * outcol.x + t * outcol.x / col2.x;
+ if (col2.y != 0.0f)
+ outcol.y = tm * outcol.y + t * outcol.y / col2.y;
+ if (col2.z != 0.0f)
+ outcol.z = tm * outcol.z + t * outcol.z / col2.z;
- return outcol;
+ return outcol;
}
ccl_device float3 svm_mix_diff(float t, float3 col1, float3 col2)
{
- return interp(col1, fabs(col1 - col2), t);
+ return interp(col1, fabs(col1 - col2), t);
}
ccl_device float3 svm_mix_dark(float t, float3 col1, float3 col2)
{
- return min(col1, col2)*t + col1*(1.0f - t);
+ return min(col1, col2) * t + col1 * (1.0f - t);
}
ccl_device float3 svm_mix_light(float t, float3 col1, float3 col2)
{
- return max(col1, col2*t);
+ return max(col1, col2 * t);
}
ccl_device float3 svm_mix_dodge(float t, float3 col1, float3 col2)
{
- float3 outcol = col1;
-
- if(outcol.x != 0.0f) {
- float tmp = 1.0f - t*col2.x;
- if(tmp <= 0.0f)
- outcol.x = 1.0f;
- else if((tmp = outcol.x/tmp) > 1.0f)
- outcol.x = 1.0f;
- else
- outcol.x = tmp;
- }
- if(outcol.y != 0.0f) {
- float tmp = 1.0f - t*col2.y;
- if(tmp <= 0.0f)
- outcol.y = 1.0f;
- else if((tmp = outcol.y/tmp) > 1.0f)
- outcol.y = 1.0f;
- else
- outcol.y = tmp;
- }
- if(outcol.z != 0.0f) {
- float tmp = 1.0f - t*col2.z;
- if(tmp <= 0.0f)
- outcol.z = 1.0f;
- else if((tmp = outcol.z/tmp) > 1.0f)
- outcol.z = 1.0f;
- else
- outcol.z = tmp;
- }
-
- return outcol;
+ float3 outcol = col1;
+
+ if (outcol.x != 0.0f) {
+ float tmp = 1.0f - t * col2.x;
+ if (tmp <= 0.0f)
+ outcol.x = 1.0f;
+ else if ((tmp = outcol.x / tmp) > 1.0f)
+ outcol.x = 1.0f;
+ else
+ outcol.x = tmp;
+ }
+ if (outcol.y != 0.0f) {
+ float tmp = 1.0f - t * col2.y;
+ if (tmp <= 0.0f)
+ outcol.y = 1.0f;
+ else if ((tmp = outcol.y / tmp) > 1.0f)
+ outcol.y = 1.0f;
+ else
+ outcol.y = tmp;
+ }
+ if (outcol.z != 0.0f) {
+ float tmp = 1.0f - t * col2.z;
+ if (tmp <= 0.0f)
+ outcol.z = 1.0f;
+ else if ((tmp = outcol.z / tmp) > 1.0f)
+ outcol.z = 1.0f;
+ else
+ outcol.z = tmp;
+ }
+
+ return outcol;
}
ccl_device float3 svm_mix_burn(float t, float3 col1, float3 col2)
{
- float tmp, tm = 1.0f - t;
-
- float3 outcol = col1;
-
- tmp = tm + t*col2.x;
- if(tmp <= 0.0f)
- outcol.x = 0.0f;
- else if((tmp = (1.0f - (1.0f - outcol.x)/tmp)) < 0.0f)
- outcol.x = 0.0f;
- else if(tmp > 1.0f)
- outcol.x = 1.0f;
- else
- outcol.x = tmp;
-
- tmp = tm + t*col2.y;
- if(tmp <= 0.0f)
- outcol.y = 0.0f;
- else if((tmp = (1.0f - (1.0f - outcol.y)/tmp)) < 0.0f)
- outcol.y = 0.0f;
- else if(tmp > 1.0f)
- outcol.y = 1.0f;
- else
- outcol.y = tmp;
-
- tmp = tm + t*col2.z;
- if(tmp <= 0.0f)
- outcol.z = 0.0f;
- else if((tmp = (1.0f - (1.0f - outcol.z)/tmp)) < 0.0f)
- outcol.z = 0.0f;
- else if(tmp > 1.0f)
- outcol.z = 1.0f;
- else
- outcol.z = tmp;
-
- return outcol;
+ float tmp, tm = 1.0f - t;
+
+ float3 outcol = col1;
+
+ tmp = tm + t * col2.x;
+ if (tmp <= 0.0f)
+ outcol.x = 0.0f;
+ else if ((tmp = (1.0f - (1.0f - outcol.x) / tmp)) < 0.0f)
+ outcol.x = 0.0f;
+ else if (tmp > 1.0f)
+ outcol.x = 1.0f;
+ else
+ outcol.x = tmp;
+
+ tmp = tm + t * col2.y;
+ if (tmp <= 0.0f)
+ outcol.y = 0.0f;
+ else if ((tmp = (1.0f - (1.0f - outcol.y) / tmp)) < 0.0f)
+ outcol.y = 0.0f;
+ else if (tmp > 1.0f)
+ outcol.y = 1.0f;
+ else
+ outcol.y = tmp;
+
+ tmp = tm + t * col2.z;
+ if (tmp <= 0.0f)
+ outcol.z = 0.0f;
+ else if ((tmp = (1.0f - (1.0f - outcol.z) / tmp)) < 0.0f)
+ outcol.z = 0.0f;
+ else if (tmp > 1.0f)
+ outcol.z = 1.0f;
+ else
+ outcol.z = tmp;
+
+ return outcol;
}
ccl_device float3 svm_mix_hue(float t, float3 col1, float3 col2)
{
- float3 outcol = col1;
+ float3 outcol = col1;
- float3 hsv2 = rgb_to_hsv(col2);
+ float3 hsv2 = rgb_to_hsv(col2);
- if(hsv2.y != 0.0f) {
- float3 hsv = rgb_to_hsv(outcol);
- hsv.x = hsv2.x;
- float3 tmp = hsv_to_rgb(hsv);
+ if (hsv2.y != 0.0f) {
+ float3 hsv = rgb_to_hsv(outcol);
+ hsv.x = hsv2.x;
+ float3 tmp = hsv_to_rgb(hsv);
- outcol = interp(outcol, tmp, t);
- }
+ outcol = interp(outcol, tmp, t);
+ }
- return outcol;
+ return outcol;
}
ccl_device float3 svm_mix_sat(float t, float3 col1, float3 col2)
{
- float tm = 1.0f - t;
+ float tm = 1.0f - t;
- float3 outcol = col1;
+ float3 outcol = col1;
- float3 hsv = rgb_to_hsv(outcol);
+ float3 hsv = rgb_to_hsv(outcol);
- if(hsv.y != 0.0f) {
- float3 hsv2 = rgb_to_hsv(col2);
+ if (hsv.y != 0.0f) {
+ float3 hsv2 = rgb_to_hsv(col2);
- hsv.y = tm*hsv.y + t*hsv2.y;
- outcol = hsv_to_rgb(hsv);
- }
+ hsv.y = tm * hsv.y + t * hsv2.y;
+ outcol = hsv_to_rgb(hsv);
+ }
- return outcol;
+ return outcol;
}
ccl_device float3 svm_mix_val(float t, float3 col1, float3 col2)
{
- float tm = 1.0f - t;
+ float tm = 1.0f - t;
- float3 hsv = rgb_to_hsv(col1);
- float3 hsv2 = rgb_to_hsv(col2);
+ float3 hsv = rgb_to_hsv(col1);
+ float3 hsv2 = rgb_to_hsv(col2);
- hsv.z = tm*hsv.z + t*hsv2.z;
+ hsv.z = tm * hsv.z + t * hsv2.z;
- return hsv_to_rgb(hsv);
+ return hsv_to_rgb(hsv);
}
ccl_device float3 svm_mix_color(float t, float3 col1, float3 col2)
{
- float3 outcol = col1;
- float3 hsv2 = rgb_to_hsv(col2);
+ float3 outcol = col1;
+ float3 hsv2 = rgb_to_hsv(col2);
- if(hsv2.y != 0.0f) {
- float3 hsv = rgb_to_hsv(outcol);
- hsv.x = hsv2.x;
- hsv.y = hsv2.y;
- float3 tmp = hsv_to_rgb(hsv);
+ if (hsv2.y != 0.0f) {
+ float3 hsv = rgb_to_hsv(outcol);
+ hsv.x = hsv2.x;
+ hsv.y = hsv2.y;
+ float3 tmp = hsv_to_rgb(hsv);
- outcol = interp(outcol, tmp, t);
- }
+ outcol = interp(outcol, tmp, t);
+ }
- return outcol;
+ return outcol;
}
ccl_device float3 svm_mix_soft(float t, float3 col1, float3 col2)
{
- float tm = 1.0f - t;
+ float tm = 1.0f - t;
- float3 one = make_float3(1.0f, 1.0f, 1.0f);
- float3 scr = one - (one - col2)*(one - col1);
+ float3 one = make_float3(1.0f, 1.0f, 1.0f);
+ float3 scr = one - (one - col2) * (one - col1);
- return tm*col1 + t*((one - col1)*col2*col1 + col1*scr);
+ return tm * col1 + t * ((one - col1) * col2 * col1 + col1 * scr);
}
ccl_device float3 svm_mix_linear(float t, float3 col1, float3 col2)
{
- return col1 + t*(2.0f*col2 + make_float3(-1.0f, -1.0f, -1.0f));
+ return col1 + t * (2.0f * col2 + make_float3(-1.0f, -1.0f, -1.0f));
}
ccl_device float3 svm_mix_clamp(float3 col)
{
- float3 outcol = col;
+ float3 outcol = col;
- outcol.x = saturate(col.x);
- outcol.y = saturate(col.y);
- outcol.z = saturate(col.z);
+ outcol.x = saturate(col.x);
+ outcol.y = saturate(col.y);
+ outcol.z = saturate(col.z);
- return outcol;
+ return outcol;
}
ccl_device_noinline float3 svm_mix(NodeMix type, float fac, float3 c1, float3 c2)
{
- float t = saturate(fac);
-
- switch(type) {
- case NODE_MIX_BLEND: return svm_mix_blend(t, c1, c2);
- case NODE_MIX_ADD: return svm_mix_add(t, c1, c2);
- case NODE_MIX_MUL: return svm_mix_mul(t, c1, c2);
- case NODE_MIX_SCREEN: return svm_mix_screen(t, c1, c2);
- case NODE_MIX_OVERLAY: return svm_mix_overlay(t, c1, c2);
- case NODE_MIX_SUB: return svm_mix_sub(t, c1, c2);
- case NODE_MIX_DIV: return svm_mix_div(t, c1, c2);
- case NODE_MIX_DIFF: return svm_mix_diff(t, c1, c2);
- case NODE_MIX_DARK: return svm_mix_dark(t, c1, c2);
- case NODE_MIX_LIGHT: return svm_mix_light(t, c1, c2);
- case NODE_MIX_DODGE: return svm_mix_dodge(t, c1, c2);
- case NODE_MIX_BURN: return svm_mix_burn(t, c1, c2);
- case NODE_MIX_HUE: return svm_mix_hue(t, c1, c2);
- case NODE_MIX_SAT: return svm_mix_sat(t, c1, c2);
- case NODE_MIX_VAL: return svm_mix_val (t, c1, c2);
- case NODE_MIX_COLOR: return svm_mix_color(t, c1, c2);
- case NODE_MIX_SOFT: return svm_mix_soft(t, c1, c2);
- case NODE_MIX_LINEAR: return svm_mix_linear(t, c1, c2);
- case NODE_MIX_CLAMP: return svm_mix_clamp(c1);
- }
-
- return make_float3(0.0f, 0.0f, 0.0f);
+ float t = saturate(fac);
+
+ switch (type) {
+ case NODE_MIX_BLEND:
+ return svm_mix_blend(t, c1, c2);
+ case NODE_MIX_ADD:
+ return svm_mix_add(t, c1, c2);
+ case NODE_MIX_MUL:
+ return svm_mix_mul(t, c1, c2);
+ case NODE_MIX_SCREEN:
+ return svm_mix_screen(t, c1, c2);
+ case NODE_MIX_OVERLAY:
+ return svm_mix_overlay(t, c1, c2);
+ case NODE_MIX_SUB:
+ return svm_mix_sub(t, c1, c2);
+ case NODE_MIX_DIV:
+ return svm_mix_div(t, c1, c2);
+ case NODE_MIX_DIFF:
+ return svm_mix_diff(t, c1, c2);
+ case NODE_MIX_DARK:
+ return svm_mix_dark(t, c1, c2);
+ case NODE_MIX_LIGHT:
+ return svm_mix_light(t, c1, c2);
+ case NODE_MIX_DODGE:
+ return svm_mix_dodge(t, c1, c2);
+ case NODE_MIX_BURN:
+ return svm_mix_burn(t, c1, c2);
+ case NODE_MIX_HUE:
+ return svm_mix_hue(t, c1, c2);
+ case NODE_MIX_SAT:
+ return svm_mix_sat(t, c1, c2);
+ case NODE_MIX_VAL:
+ return svm_mix_val(t, c1, c2);
+ case NODE_MIX_COLOR:
+ return svm_mix_color(t, c1, c2);
+ case NODE_MIX_SOFT:
+ return svm_mix_soft(t, c1, c2);
+ case NODE_MIX_LINEAR:
+ return svm_mix_linear(t, c1, c2);
+ case NODE_MIX_CLAMP:
+ return svm_mix_clamp(c1);
+ }
+
+ return make_float3(0.0f, 0.0f, 0.0f);
}
ccl_device_inline float3 svm_brightness_contrast(float3 color, float brightness, float contrast)
{
- float a = 1.0f + contrast;
- float b = brightness - contrast*0.5f;
+ float a = 1.0f + contrast;
+ float b = brightness - contrast * 0.5f;
- color.x = max(a*color.x + b, 0.0f);
- color.y = max(a*color.y + b, 0.0f);
- color.z = max(a*color.z + b, 0.0f);
+ color.x = max(a * color.x + b, 0.0f);
+ color.y = max(a * color.y + b, 0.0f);
+ color.z = max(a * color.z + b, 0.0f);
- return color;
+ return color;
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_convert.h b/intern/cycles/kernel/svm/svm_convert.h
index 63b1dc6865e..5df6c9fb755 100644
--- a/intern/cycles/kernel/svm/svm_convert.h
+++ b/intern/cycles/kernel/svm/svm_convert.h
@@ -18,54 +18,55 @@ CCL_NAMESPACE_BEGIN
/* Conversion Nodes */
-ccl_device void svm_node_convert(KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint from, uint to)
+ccl_device void svm_node_convert(
+ KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint from, uint to)
{
- switch(type) {
- case NODE_CONVERT_FI: {
- float f = stack_load_float(stack, from);
- stack_store_int(stack, to, float_to_int(f));
- break;
- }
- case NODE_CONVERT_FV: {
- float f = stack_load_float(stack, from);
- stack_store_float3(stack, to, make_float3(f, f, f));
- break;
- }
- case NODE_CONVERT_CF: {
- float3 f = stack_load_float3(stack, from);
- float g = linear_rgb_to_gray(kg, f);
- stack_store_float(stack, to, g);
- break;
- }
- case NODE_CONVERT_CI: {
- float3 f = stack_load_float3(stack, from);
- int i = (int)linear_rgb_to_gray(kg, f);
- stack_store_int(stack, to, i);
- break;
- }
- case NODE_CONVERT_VF: {
- float3 f = stack_load_float3(stack, from);
- float g = average(f);
- stack_store_float(stack, to, g);
- break;
- }
- case NODE_CONVERT_VI: {
- float3 f = stack_load_float3(stack, from);
- int i = (int)average(f);
- stack_store_int(stack, to, i);
- break;
- }
- case NODE_CONVERT_IF: {
- float f = (float)stack_load_int(stack, from);
- stack_store_float(stack, to, f);
- break;
- }
- case NODE_CONVERT_IV: {
- float f = (float)stack_load_int(stack, from);
- stack_store_float3(stack, to, make_float3(f, f, f));
- break;
- }
- }
+ switch (type) {
+ case NODE_CONVERT_FI: {
+ float f = stack_load_float(stack, from);
+ stack_store_int(stack, to, float_to_int(f));
+ break;
+ }
+ case NODE_CONVERT_FV: {
+ float f = stack_load_float(stack, from);
+ stack_store_float3(stack, to, make_float3(f, f, f));
+ break;
+ }
+ case NODE_CONVERT_CF: {
+ float3 f = stack_load_float3(stack, from);
+ float g = linear_rgb_to_gray(kg, f);
+ stack_store_float(stack, to, g);
+ break;
+ }
+ case NODE_CONVERT_CI: {
+ float3 f = stack_load_float3(stack, from);
+ int i = (int)linear_rgb_to_gray(kg, f);
+ stack_store_int(stack, to, i);
+ break;
+ }
+ case NODE_CONVERT_VF: {
+ float3 f = stack_load_float3(stack, from);
+ float g = average(f);
+ stack_store_float(stack, to, g);
+ break;
+ }
+ case NODE_CONVERT_VI: {
+ float3 f = stack_load_float3(stack, from);
+ int i = (int)average(f);
+ stack_store_int(stack, to, i);
+ break;
+ }
+ case NODE_CONVERT_IF: {
+ float f = (float)stack_load_int(stack, from);
+ stack_store_float(stack, to, f);
+ break;
+ }
+ case NODE_CONVERT_IV: {
+ float f = (float)stack_load_int(stack, from);
+ stack_store_float3(stack, to, make_float3(f, f, f));
+ break;
+ }
+ }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_displace.h b/intern/cycles/kernel/svm/svm_displace.h
index a69c9fe81f9..f16664a684c 100644
--- a/intern/cycles/kernel/svm/svm_displace.h
+++ b/intern/cycles/kernel/svm/svm_displace.h
@@ -21,144 +21,149 @@ CCL_NAMESPACE_BEGIN
ccl_device void svm_node_set_bump(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
{
#ifdef __RAY_DIFFERENTIALS__
- /* get normal input */
- uint normal_offset, scale_offset, invert, use_object_space;
- decode_node_uchar4(node.y, &normal_offset, &scale_offset, &invert, &use_object_space);
+ /* get normal input */
+ uint normal_offset, scale_offset, invert, use_object_space;
+ decode_node_uchar4(node.y, &normal_offset, &scale_offset, &invert, &use_object_space);
- float3 normal_in = stack_valid(normal_offset)? stack_load_float3(stack, normal_offset): sd->N;
+ float3 normal_in = stack_valid(normal_offset) ? stack_load_float3(stack, normal_offset) : sd->N;
- float3 dPdx = sd->dP.dx;
- float3 dPdy = sd->dP.dy;
+ float3 dPdx = sd->dP.dx;
+ float3 dPdy = sd->dP.dy;
- if(use_object_space) {
- object_inverse_normal_transform(kg, sd, &normal_in);
- object_inverse_dir_transform(kg, sd, &dPdx);
- object_inverse_dir_transform(kg, sd, &dPdy);
- }
+ if (use_object_space) {
+ object_inverse_normal_transform(kg, sd, &normal_in);
+ object_inverse_dir_transform(kg, sd, &dPdx);
+ object_inverse_dir_transform(kg, sd, &dPdy);
+ }
- /* get surface tangents from normal */
- float3 Rx = cross(dPdy, normal_in);
- float3 Ry = cross(normal_in, dPdx);
+ /* get surface tangents from normal */
+ float3 Rx = cross(dPdy, normal_in);
+ float3 Ry = cross(normal_in, dPdx);
- /* get bump values */
- uint c_offset, x_offset, y_offset, strength_offset;
- decode_node_uchar4(node.z, &c_offset, &x_offset, &y_offset, &strength_offset);
+ /* get bump values */
+ uint c_offset, x_offset, y_offset, strength_offset;
+ decode_node_uchar4(node.z, &c_offset, &x_offset, &y_offset, &strength_offset);
- float h_c = stack_load_float(stack, c_offset);
- float h_x = stack_load_float(stack, x_offset);
- float h_y = stack_load_float(stack, y_offset);
+ float h_c = stack_load_float(stack, c_offset);
+ float h_x = stack_load_float(stack, x_offset);
+ float h_y = stack_load_float(stack, y_offset);
- /* compute surface gradient and determinant */
- float det = dot(dPdx, Rx);
- float3 surfgrad = (h_x - h_c)*Rx + (h_y - h_c)*Ry;
+ /* compute surface gradient and determinant */
+ float det = dot(dPdx, Rx);
+ float3 surfgrad = (h_x - h_c) * Rx + (h_y - h_c) * Ry;
- float absdet = fabsf(det);
+ float absdet = fabsf(det);
- float strength = stack_load_float(stack, strength_offset);
- float scale = stack_load_float(stack, scale_offset);
+ float strength = stack_load_float(stack, strength_offset);
+ float scale = stack_load_float(stack, scale_offset);
- if(invert)
- scale *= -1.0f;
+ if (invert)
+ scale *= -1.0f;
- strength = max(strength, 0.0f);
+ strength = max(strength, 0.0f);
- /* compute and output perturbed normal */
- float3 normal_out = safe_normalize(absdet*normal_in - scale*signf(det)*surfgrad);
- if(is_zero(normal_out)) {
- normal_out = normal_in;
- }
- else {
- normal_out = normalize(strength*normal_out + (1.0f - strength)*normal_in);
- }
+ /* compute and output perturbed normal */
+ float3 normal_out = safe_normalize(absdet * normal_in - scale * signf(det) * surfgrad);
+ if (is_zero(normal_out)) {
+ normal_out = normal_in;
+ }
+ else {
+ normal_out = normalize(strength * normal_out + (1.0f - strength) * normal_in);
+ }
- if(use_object_space) {
- object_normal_transform(kg, sd, &normal_out);
- }
+ if (use_object_space) {
+ object_normal_transform(kg, sd, &normal_out);
+ }
- normal_out = ensure_valid_reflection(sd->Ng, sd->I, normal_out);
+ normal_out = ensure_valid_reflection(sd->Ng, sd->I, normal_out);
- stack_store_float3(stack, node.w, normal_out);
+ stack_store_float3(stack, node.w, normal_out);
#endif
}
/* Displacement Node */
-ccl_device void svm_node_set_displacement(KernelGlobals *kg, ShaderData *sd, float *stack, uint fac_offset)
+ccl_device void svm_node_set_displacement(KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint fac_offset)
{
- float3 dP = stack_load_float3(stack, fac_offset);
- sd->P += dP;
+ float3 dP = stack_load_float3(stack, fac_offset);
+ sd->P += dP;
}
ccl_device void svm_node_displacement(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
{
- uint height_offset, midlevel_offset, scale_offset, normal_offset;
- decode_node_uchar4(node.y, &height_offset, &midlevel_offset, &scale_offset, &normal_offset);
-
- float height = stack_load_float(stack, height_offset);
- float midlevel = stack_load_float(stack, midlevel_offset);
- float scale = stack_load_float(stack, scale_offset);
- float3 normal = stack_valid(normal_offset)? stack_load_float3(stack, normal_offset): sd->N;
- uint space = node.w;
-
- float3 dP = normal;
-
- if(space == NODE_NORMAL_MAP_OBJECT) {
- /* Object space. */
- object_inverse_normal_transform(kg, sd, &dP);
- dP *= (height - midlevel) * scale;
- object_dir_transform(kg, sd, &dP);
- }
- else {
- /* World space. */
- dP *= (height - midlevel) * scale;
- }
-
- stack_store_float3(stack, node.z, dP);
+ uint height_offset, midlevel_offset, scale_offset, normal_offset;
+ decode_node_uchar4(node.y, &height_offset, &midlevel_offset, &scale_offset, &normal_offset);
+
+ float height = stack_load_float(stack, height_offset);
+ float midlevel = stack_load_float(stack, midlevel_offset);
+ float scale = stack_load_float(stack, scale_offset);
+ float3 normal = stack_valid(normal_offset) ? stack_load_float3(stack, normal_offset) : sd->N;
+ uint space = node.w;
+
+ float3 dP = normal;
+
+ if (space == NODE_NORMAL_MAP_OBJECT) {
+ /* Object space. */
+ object_inverse_normal_transform(kg, sd, &dP);
+ dP *= (height - midlevel) * scale;
+ object_dir_transform(kg, sd, &dP);
+ }
+ else {
+ /* World space. */
+ dP *= (height - midlevel) * scale;
+ }
+
+ stack_store_float3(stack, node.z, dP);
}
-ccl_device void svm_node_vector_displacement(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
+ccl_device void svm_node_vector_displacement(
+ KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
{
- uint4 data_node = read_node(kg, offset);
- uint space = data_node.x;
-
- uint vector_offset, midlevel_offset,scale_offset, displacement_offset;
- decode_node_uchar4(node.y, &vector_offset, &midlevel_offset, &scale_offset, &displacement_offset);
-
- float3 vector = stack_load_float3(stack, vector_offset);
- float midlevel = stack_load_float(stack, midlevel_offset);
- float scale = stack_load_float(stack, scale_offset);
- float3 dP = (vector - make_float3(midlevel, midlevel, midlevel)) * scale;
-
- if(space == NODE_NORMAL_MAP_TANGENT) {
- /* Tangent space. */
- float3 normal = sd->N;
- object_inverse_normal_transform(kg, sd, &normal);
-
- const AttributeDescriptor attr = find_attribute(kg, sd, node.z);
- float3 tangent;
- if(attr.offset != ATTR_STD_NOT_FOUND) {
- tangent = primitive_surface_attribute_float3(kg, sd, attr, NULL, NULL);
- }
- else {
- tangent = normalize(sd->dPdu);
- }
-
- float3 bitangent = normalize(cross(normal, tangent));
- const AttributeDescriptor attr_sign = find_attribute(kg, sd, node.w);
- if(attr_sign.offset != ATTR_STD_NOT_FOUND) {
- float sign = primitive_surface_attribute_float(kg, sd, attr_sign, NULL, NULL);
- bitangent *= sign;
- }
-
- dP = tangent*dP.x + normal*dP.y + bitangent*dP.z;
- }
-
- if(space != NODE_NORMAL_MAP_WORLD) {
- /* Tangent or object space. */
- object_dir_transform(kg, sd, &dP);
- }
-
- stack_store_float3(stack, displacement_offset, dP);
+ uint4 data_node = read_node(kg, offset);
+ uint space = data_node.x;
+
+ uint vector_offset, midlevel_offset, scale_offset, displacement_offset;
+ decode_node_uchar4(
+ node.y, &vector_offset, &midlevel_offset, &scale_offset, &displacement_offset);
+
+ float3 vector = stack_load_float3(stack, vector_offset);
+ float midlevel = stack_load_float(stack, midlevel_offset);
+ float scale = stack_load_float(stack, scale_offset);
+ float3 dP = (vector - make_float3(midlevel, midlevel, midlevel)) * scale;
+
+ if (space == NODE_NORMAL_MAP_TANGENT) {
+ /* Tangent space. */
+ float3 normal = sd->N;
+ object_inverse_normal_transform(kg, sd, &normal);
+
+ const AttributeDescriptor attr = find_attribute(kg, sd, node.z);
+ float3 tangent;
+ if (attr.offset != ATTR_STD_NOT_FOUND) {
+ tangent = primitive_surface_attribute_float3(kg, sd, attr, NULL, NULL);
+ }
+ else {
+ tangent = normalize(sd->dPdu);
+ }
+
+ float3 bitangent = normalize(cross(normal, tangent));
+ const AttributeDescriptor attr_sign = find_attribute(kg, sd, node.w);
+ if (attr_sign.offset != ATTR_STD_NOT_FOUND) {
+ float sign = primitive_surface_attribute_float(kg, sd, attr_sign, NULL, NULL);
+ bitangent *= sign;
+ }
+
+ dP = tangent * dP.x + normal * dP.y + bitangent * dP.z;
+ }
+
+ if (space != NODE_NORMAL_MAP_WORLD) {
+ /* Tangent or object space. */
+ object_dir_transform(kg, sd, &dP);
+ }
+
+ stack_store_float3(stack, displacement_offset, dP);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_fresnel.h b/intern/cycles/kernel/svm/svm_fresnel.h
index 99dda5fb170..03119991597 100644
--- a/intern/cycles/kernel/svm/svm_fresnel.h
+++ b/intern/cycles/kernel/svm/svm_fresnel.h
@@ -18,56 +18,60 @@ CCL_NAMESPACE_BEGIN
/* Fresnel Node */
-ccl_device void svm_node_fresnel(ShaderData *sd, float *stack, uint ior_offset, uint ior_value, uint node)
+ccl_device void svm_node_fresnel(
+ ShaderData *sd, float *stack, uint ior_offset, uint ior_value, uint node)
{
- uint normal_offset, out_offset;
- decode_node_uchar4(node, &normal_offset, &out_offset, NULL, NULL);
- float eta = (stack_valid(ior_offset))? stack_load_float(stack, ior_offset): __uint_as_float(ior_value);
- float3 normal_in = stack_valid(normal_offset)? stack_load_float3(stack, normal_offset): sd->N;
+ uint normal_offset, out_offset;
+ decode_node_uchar4(node, &normal_offset, &out_offset, NULL, NULL);
+ float eta = (stack_valid(ior_offset)) ? stack_load_float(stack, ior_offset) :
+ __uint_as_float(ior_value);
+ float3 normal_in = stack_valid(normal_offset) ? stack_load_float3(stack, normal_offset) : sd->N;
- eta = fmaxf(eta, 1e-5f);
- eta = (sd->flag & SD_BACKFACING)? 1.0f/eta: eta;
+ eta = fmaxf(eta, 1e-5f);
+ eta = (sd->flag & SD_BACKFACING) ? 1.0f / eta : eta;
- float f = fresnel_dielectric_cos(dot(sd->I, normal_in), eta);
+ float f = fresnel_dielectric_cos(dot(sd->I, normal_in), eta);
- stack_store_float(stack, out_offset, f);
+ stack_store_float(stack, out_offset, f);
}
/* Layer Weight Node */
ccl_device void svm_node_layer_weight(ShaderData *sd, float *stack, uint4 node)
{
- uint blend_offset = node.y;
- uint blend_value = node.z;
+ uint blend_offset = node.y;
+ uint blend_value = node.z;
- uint type, normal_offset, out_offset;
- decode_node_uchar4(node.w, &type, &normal_offset, &out_offset, NULL);
+ uint type, normal_offset, out_offset;
+ decode_node_uchar4(node.w, &type, &normal_offset, &out_offset, NULL);
- float blend = (stack_valid(blend_offset))? stack_load_float(stack, blend_offset): __uint_as_float(blend_value);
- float3 normal_in = (stack_valid(normal_offset))? stack_load_float3(stack, normal_offset): sd->N;
+ float blend = (stack_valid(blend_offset)) ? stack_load_float(stack, blend_offset) :
+ __uint_as_float(blend_value);
+ float3 normal_in = (stack_valid(normal_offset)) ? stack_load_float3(stack, normal_offset) :
+ sd->N;
- float f;
+ float f;
- if(type == NODE_LAYER_WEIGHT_FRESNEL) {
- float eta = fmaxf(1.0f - blend, 1e-5f);
- eta = (sd->flag & SD_BACKFACING)? eta: 1.0f/eta;
+ if (type == NODE_LAYER_WEIGHT_FRESNEL) {
+ float eta = fmaxf(1.0f - blend, 1e-5f);
+ eta = (sd->flag & SD_BACKFACING) ? eta : 1.0f / eta;
- f = fresnel_dielectric_cos(dot(sd->I, normal_in), eta);
- }
- else {
- f = fabsf(dot(sd->I, normal_in));
+ f = fresnel_dielectric_cos(dot(sd->I, normal_in), eta);
+ }
+ else {
+ f = fabsf(dot(sd->I, normal_in));
- if(blend != 0.5f) {
- blend = clamp(blend, 0.0f, 1.0f-1e-5f);
- blend = (blend < 0.5f)? 2.0f*blend: 0.5f/(1.0f - blend);
+ if (blend != 0.5f) {
+ blend = clamp(blend, 0.0f, 1.0f - 1e-5f);
+ blend = (blend < 0.5f) ? 2.0f * blend : 0.5f / (1.0f - blend);
- f = powf(f, blend);
- }
+ f = powf(f, blend);
+ }
- f = 1.0f - f;
- }
+ f = 1.0f - f;
+ }
- stack_store_float(stack, out_offset, f);
+ stack_store_float(stack, out_offset, f);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_gamma.h b/intern/cycles/kernel/svm/svm_gamma.h
index 171945a60bc..65eb08eb0eb 100644
--- a/intern/cycles/kernel/svm/svm_gamma.h
+++ b/intern/cycles/kernel/svm/svm_gamma.h
@@ -16,15 +16,16 @@
CCL_NAMESPACE_BEGIN
-ccl_device void svm_node_gamma(ShaderData *sd, float *stack, uint in_gamma, uint in_color, uint out_color)
+ccl_device void svm_node_gamma(
+ ShaderData *sd, float *stack, uint in_gamma, uint in_color, uint out_color)
{
- float3 color = stack_load_float3(stack, in_color);
- float gamma = stack_load_float(stack, in_gamma);
+ float3 color = stack_load_float3(stack, in_color);
+ float gamma = stack_load_float(stack, in_gamma);
- color = svm_math_gamma_color(color, gamma);
+ color = svm_math_gamma_color(color, gamma);
- if(stack_valid(out_color))
- stack_store_float3(stack, out_color, color);
+ if (stack_valid(out_color))
+ stack_store_float3(stack, out_color, color);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_geometry.h b/intern/cycles/kernel/svm/svm_geometry.h
index 05443772505..a9104643299 100644
--- a/intern/cycles/kernel/svm/svm_geometry.h
+++ b/intern/cycles/kernel/svm/svm_geometry.h
@@ -18,192 +18,217 @@ CCL_NAMESPACE_BEGIN
/* Geometry Node */
-ccl_device_inline void svm_node_geometry(KernelGlobals *kg,
- ShaderData *sd,
- float *stack,
- uint type,
- uint out_offset)
+ccl_device_inline void svm_node_geometry(
+ KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset)
{
- float3 data;
-
- switch(type) {
- case NODE_GEOM_P: data = sd->P; break;
- case NODE_GEOM_N: data = sd->N; break;
+ float3 data;
+
+ switch (type) {
+ case NODE_GEOM_P:
+ data = sd->P;
+ break;
+ case NODE_GEOM_N:
+ data = sd->N;
+ break;
#ifdef __DPDU__
- case NODE_GEOM_T: data = primitive_tangent(kg, sd); break;
+ case NODE_GEOM_T:
+ data = primitive_tangent(kg, sd);
+ break;
#endif
- case NODE_GEOM_I: data = sd->I; break;
- case NODE_GEOM_Ng: data = sd->Ng; break;
+ case NODE_GEOM_I:
+ data = sd->I;
+ break;
+ case NODE_GEOM_Ng:
+ data = sd->Ng;
+ break;
#ifdef __UV__
- case NODE_GEOM_uv: data = make_float3(sd->u, sd->v, 0.0f); break;
+ case NODE_GEOM_uv:
+ data = make_float3(sd->u, sd->v, 0.0f);
+ break;
#endif
- default: data = make_float3(0.0f, 0.0f, 0.0f);
- }
+ default:
+ data = make_float3(0.0f, 0.0f, 0.0f);
+ }
- stack_store_float3(stack, out_offset, data);
+ stack_store_float3(stack, out_offset, data);
}
-ccl_device void svm_node_geometry_bump_dx(KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset)
+ccl_device void svm_node_geometry_bump_dx(
+ KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset)
{
#ifdef __RAY_DIFFERENTIALS__
- float3 data;
-
- switch(type) {
- case NODE_GEOM_P: data = sd->P + sd->dP.dx; break;
- case NODE_GEOM_uv: data = make_float3(sd->u + sd->du.dx, sd->v + sd->dv.dx, 0.0f); break;
- default: svm_node_geometry(kg, sd, stack, type, out_offset); return;
- }
-
- stack_store_float3(stack, out_offset, data);
+ float3 data;
+
+ switch (type) {
+ case NODE_GEOM_P:
+ data = sd->P + sd->dP.dx;
+ break;
+ case NODE_GEOM_uv:
+ data = make_float3(sd->u + sd->du.dx, sd->v + sd->dv.dx, 0.0f);
+ break;
+ default:
+ svm_node_geometry(kg, sd, stack, type, out_offset);
+ return;
+ }
+
+ stack_store_float3(stack, out_offset, data);
#else
- svm_node_geometry(kg, sd, stack, type, out_offset);
+ svm_node_geometry(kg, sd, stack, type, out_offset);
#endif
}
-ccl_device void svm_node_geometry_bump_dy(KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset)
+ccl_device void svm_node_geometry_bump_dy(
+ KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset)
{
#ifdef __RAY_DIFFERENTIALS__
- float3 data;
-
- switch(type) {
- case NODE_GEOM_P: data = sd->P + sd->dP.dy; break;
- case NODE_GEOM_uv: data = make_float3(sd->u + sd->du.dy, sd->v + sd->dv.dy, 0.0f); break;
- default: svm_node_geometry(kg, sd, stack, type, out_offset); return;
- }
-
- stack_store_float3(stack, out_offset, data);
+ float3 data;
+
+ switch (type) {
+ case NODE_GEOM_P:
+ data = sd->P + sd->dP.dy;
+ break;
+ case NODE_GEOM_uv:
+ data = make_float3(sd->u + sd->du.dy, sd->v + sd->dv.dy, 0.0f);
+ break;
+ default:
+ svm_node_geometry(kg, sd, stack, type, out_offset);
+ return;
+ }
+
+ stack_store_float3(stack, out_offset, data);
#else
- svm_node_geometry(kg, sd, stack, type, out_offset);
+ svm_node_geometry(kg, sd, stack, type, out_offset);
#endif
}
/* Object Info */
-ccl_device void svm_node_object_info(KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset)
+ccl_device void svm_node_object_info(
+ KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset)
{
- float data;
-
- switch(type) {
- case NODE_INFO_OB_LOCATION: {
- stack_store_float3(stack, out_offset, object_location(kg, sd));
- return;
- }
- case NODE_INFO_OB_INDEX: data = object_pass_id(kg, sd->object); break;
- case NODE_INFO_MAT_INDEX: data = shader_pass_id(kg, sd); break;
- case NODE_INFO_OB_RANDOM: {
- if(sd->lamp != LAMP_NONE) {
- data = lamp_random_number(kg, sd->lamp);
- }
- else {
- data = object_random_number(kg, sd->object);
- }
- break;
- }
- default: data = 0.0f; break;
- }
-
- stack_store_float(stack, out_offset, data);
+ float data;
+
+ switch (type) {
+ case NODE_INFO_OB_LOCATION: {
+ stack_store_float3(stack, out_offset, object_location(kg, sd));
+ return;
+ }
+ case NODE_INFO_OB_INDEX:
+ data = object_pass_id(kg, sd->object);
+ break;
+ case NODE_INFO_MAT_INDEX:
+ data = shader_pass_id(kg, sd);
+ break;
+ case NODE_INFO_OB_RANDOM: {
+ if (sd->lamp != LAMP_NONE) {
+ data = lamp_random_number(kg, sd->lamp);
+ }
+ else {
+ data = object_random_number(kg, sd->object);
+ }
+ break;
+ }
+ default:
+ data = 0.0f;
+ break;
+ }
+
+ stack_store_float(stack, out_offset, data);
}
/* Particle Info */
-ccl_device void svm_node_particle_info(KernelGlobals *kg,
- ShaderData *sd,
- float *stack,
- uint type,
- uint out_offset)
+ccl_device void svm_node_particle_info(
+ KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset)
{
- switch(type) {
- case NODE_INFO_PAR_INDEX: {
- int particle_id = object_particle_id(kg, sd->object);
- stack_store_float(stack, out_offset, particle_index(kg, particle_id));
- break;
- }
- case NODE_INFO_PAR_RANDOM: {
- int particle_id = object_particle_id(kg, sd->object);
- float random = hash_int_01(particle_index(kg, particle_id));
- stack_store_float(stack, out_offset, random);
- break;
- }
- case NODE_INFO_PAR_AGE: {
- int particle_id = object_particle_id(kg, sd->object);
- stack_store_float(stack, out_offset, particle_age(kg, particle_id));
- break;
- }
- case NODE_INFO_PAR_LIFETIME: {
- int particle_id = object_particle_id(kg, sd->object);
- stack_store_float(stack, out_offset, particle_lifetime(kg, particle_id));
- break;
- }
- case NODE_INFO_PAR_LOCATION: {
- int particle_id = object_particle_id(kg, sd->object);
- stack_store_float3(stack, out_offset, particle_location(kg, particle_id));
- break;
- }
-#if 0 /* XXX float4 currently not supported in SVM stack */
- case NODE_INFO_PAR_ROTATION: {
- int particle_id = object_particle_id(kg, sd->object);
- stack_store_float4(stack, out_offset, particle_rotation(kg, particle_id));
- break;
- }
+ switch (type) {
+ case NODE_INFO_PAR_INDEX: {
+ int particle_id = object_particle_id(kg, sd->object);
+ stack_store_float(stack, out_offset, particle_index(kg, particle_id));
+ break;
+ }
+ case NODE_INFO_PAR_RANDOM: {
+ int particle_id = object_particle_id(kg, sd->object);
+ float random = hash_int_01(particle_index(kg, particle_id));
+ stack_store_float(stack, out_offset, random);
+ break;
+ }
+ case NODE_INFO_PAR_AGE: {
+ int particle_id = object_particle_id(kg, sd->object);
+ stack_store_float(stack, out_offset, particle_age(kg, particle_id));
+ break;
+ }
+ case NODE_INFO_PAR_LIFETIME: {
+ int particle_id = object_particle_id(kg, sd->object);
+ stack_store_float(stack, out_offset, particle_lifetime(kg, particle_id));
+ break;
+ }
+ case NODE_INFO_PAR_LOCATION: {
+ int particle_id = object_particle_id(kg, sd->object);
+ stack_store_float3(stack, out_offset, particle_location(kg, particle_id));
+ break;
+ }
+#if 0 /* XXX float4 currently not supported in SVM stack */
+ case NODE_INFO_PAR_ROTATION: {
+ int particle_id = object_particle_id(kg, sd->object);
+ stack_store_float4(stack, out_offset, particle_rotation(kg, particle_id));
+ break;
+ }
#endif
- case NODE_INFO_PAR_SIZE: {
- int particle_id = object_particle_id(kg, sd->object);
- stack_store_float(stack, out_offset, particle_size(kg, particle_id));
- break;
- }
- case NODE_INFO_PAR_VELOCITY: {
- int particle_id = object_particle_id(kg, sd->object);
- stack_store_float3(stack, out_offset, particle_velocity(kg, particle_id));
- break;
- }
- case NODE_INFO_PAR_ANGULAR_VELOCITY: {
- int particle_id = object_particle_id(kg, sd->object);
- stack_store_float3(stack, out_offset, particle_angular_velocity(kg, particle_id));
- break;
- }
- }
+ case NODE_INFO_PAR_SIZE: {
+ int particle_id = object_particle_id(kg, sd->object);
+ stack_store_float(stack, out_offset, particle_size(kg, particle_id));
+ break;
+ }
+ case NODE_INFO_PAR_VELOCITY: {
+ int particle_id = object_particle_id(kg, sd->object);
+ stack_store_float3(stack, out_offset, particle_velocity(kg, particle_id));
+ break;
+ }
+ case NODE_INFO_PAR_ANGULAR_VELOCITY: {
+ int particle_id = object_particle_id(kg, sd->object);
+ stack_store_float3(stack, out_offset, particle_angular_velocity(kg, particle_id));
+ break;
+ }
+ }
}
#ifdef __HAIR__
/* Hair Info */
-ccl_device void svm_node_hair_info(KernelGlobals *kg,
- ShaderData *sd,
- float *stack,
- uint type,
- uint out_offset)
+ccl_device void svm_node_hair_info(
+ KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset)
{
- float data;
- float3 data3;
-
- switch(type) {
- case NODE_INFO_CURVE_IS_STRAND: {
- data = (sd->type & PRIMITIVE_ALL_CURVE) != 0;
- stack_store_float(stack, out_offset, data);
- break;
- }
- case NODE_INFO_CURVE_INTERCEPT:
- break; /* handled as attribute */
- case NODE_INFO_CURVE_RANDOM:
- break; /* handled as attribute */
- case NODE_INFO_CURVE_THICKNESS: {
- data = curve_thickness(kg, sd);
- stack_store_float(stack, out_offset, data);
- break;
- }
- /*case NODE_INFO_CURVE_FADE: {
- data = sd->curve_transparency;
- stack_store_float(stack, out_offset, data);
- break;
- }*/
- case NODE_INFO_CURVE_TANGENT_NORMAL: {
- data3 = curve_tangent_normal(kg, sd);
- stack_store_float3(stack, out_offset, data3);
- break;
- }
- }
+ float data;
+ float3 data3;
+
+ switch (type) {
+ case NODE_INFO_CURVE_IS_STRAND: {
+ data = (sd->type & PRIMITIVE_ALL_CURVE) != 0;
+ stack_store_float(stack, out_offset, data);
+ break;
+ }
+ case NODE_INFO_CURVE_INTERCEPT:
+ break; /* handled as attribute */
+ case NODE_INFO_CURVE_RANDOM:
+ break; /* handled as attribute */
+ case NODE_INFO_CURVE_THICKNESS: {
+ data = curve_thickness(kg, sd);
+ stack_store_float(stack, out_offset, data);
+ break;
+ }
+ /*case NODE_INFO_CURVE_FADE: {
+ data = sd->curve_transparency;
+ stack_store_float(stack, out_offset, data);
+ break;
+ }*/
+ case NODE_INFO_CURVE_TANGENT_NORMAL: {
+ data3 = curve_tangent_normal(kg, sd);
+ stack_store_float3(stack, out_offset, data3);
+ break;
+ }
+ }
}
#endif
diff --git a/intern/cycles/kernel/svm/svm_gradient.h b/intern/cycles/kernel/svm/svm_gradient.h
index 177e0506dee..c315564fbc2 100644
--- a/intern/cycles/kernel/svm/svm_gradient.h
+++ b/intern/cycles/kernel/svm/svm_gradient.h
@@ -20,61 +20,61 @@ CCL_NAMESPACE_BEGIN
ccl_device float svm_gradient(float3 p, NodeGradientType type)
{
- float x, y, z;
+ float x, y, z;
- x = p.x;
- y = p.y;
- z = p.z;
+ x = p.x;
+ y = p.y;
+ z = p.z;
- if(type == NODE_BLEND_LINEAR) {
- return x;
- }
- else if(type == NODE_BLEND_QUADRATIC) {
- float r = fmaxf(x, 0.0f);
- return r*r;
- }
- else if(type == NODE_BLEND_EASING) {
- float r = fminf(fmaxf(x, 0.0f), 1.0f);
- float t = r*r;
+ if (type == NODE_BLEND_LINEAR) {
+ return x;
+ }
+ else if (type == NODE_BLEND_QUADRATIC) {
+ float r = fmaxf(x, 0.0f);
+ return r * r;
+ }
+ else if (type == NODE_BLEND_EASING) {
+ float r = fminf(fmaxf(x, 0.0f), 1.0f);
+ float t = r * r;
- return (3.0f*t - 2.0f*t*r);
- }
- else if(type == NODE_BLEND_DIAGONAL) {
- return (x + y) * 0.5f;
- }
- else if(type == NODE_BLEND_RADIAL) {
- return atan2f(y, x) / M_2PI_F + 0.5f;
- }
- else {
- /* Bias a little bit for the case where p is a unit length vector,
- * to get exactly zero instead of a small random value depending
- * on float precision. */
- float r = fmaxf(0.999999f - sqrtf(x*x + y*y + z*z), 0.0f);
+ return (3.0f * t - 2.0f * t * r);
+ }
+ else if (type == NODE_BLEND_DIAGONAL) {
+ return (x + y) * 0.5f;
+ }
+ else if (type == NODE_BLEND_RADIAL) {
+ return atan2f(y, x) / M_2PI_F + 0.5f;
+ }
+ else {
+ /* Bias a little bit for the case where p is a unit length vector,
+ * to get exactly zero instead of a small random value depending
+ * on float precision. */
+ float r = fmaxf(0.999999f - sqrtf(x * x + y * y + z * z), 0.0f);
- if(type == NODE_BLEND_QUADRATIC_SPHERE)
- return r*r;
- else if(type == NODE_BLEND_SPHERICAL)
- return r;
- }
+ if (type == NODE_BLEND_QUADRATIC_SPHERE)
+ return r * r;
+ else if (type == NODE_BLEND_SPHERICAL)
+ return r;
+ }
- return 0.0f;
+ return 0.0f;
}
ccl_device void svm_node_tex_gradient(ShaderData *sd, float *stack, uint4 node)
{
- uint type, co_offset, color_offset, fac_offset;
+ uint type, co_offset, color_offset, fac_offset;
- decode_node_uchar4(node.y, &type, &co_offset, &fac_offset, &color_offset);
+ decode_node_uchar4(node.y, &type, &co_offset, &fac_offset, &color_offset);
- float3 co = stack_load_float3(stack, co_offset);
+ float3 co = stack_load_float3(stack, co_offset);
- float f = svm_gradient(co, (NodeGradientType)type);
- f = saturate(f);
+ float f = svm_gradient(co, (NodeGradientType)type);
+ f = saturate(f);
- if(stack_valid(fac_offset))
- stack_store_float(stack, fac_offset, f);
- if(stack_valid(color_offset))
- stack_store_float3(stack, color_offset, make_float3(f, f, f));
+ if (stack_valid(fac_offset))
+ stack_store_float(stack, fac_offset, f);
+ if (stack_valid(color_offset))
+ stack_store_float3(stack, color_offset, make_float3(f, f, f));
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_hsv.h b/intern/cycles/kernel/svm/svm_hsv.h
index 6f3efa639e2..72379fba870 100644
--- a/intern/cycles/kernel/svm/svm_hsv.h
+++ b/intern/cycles/kernel/svm/svm_hsv.h
@@ -19,43 +19,44 @@
CCL_NAMESPACE_BEGIN
-ccl_device void svm_node_hsv(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
+ccl_device void svm_node_hsv(
+ KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
{
- uint in_color_offset, fac_offset, out_color_offset;
- uint hue_offset, sat_offset, val_offset;
- decode_node_uchar4(node.y, &in_color_offset, &fac_offset, &out_color_offset, NULL);
- decode_node_uchar4(node.z, &hue_offset, &sat_offset, &val_offset, NULL);
+ uint in_color_offset, fac_offset, out_color_offset;
+ uint hue_offset, sat_offset, val_offset;
+ decode_node_uchar4(node.y, &in_color_offset, &fac_offset, &out_color_offset, NULL);
+ decode_node_uchar4(node.z, &hue_offset, &sat_offset, &val_offset, NULL);
- float fac = stack_load_float(stack, fac_offset);
- float3 in_color = stack_load_float3(stack, in_color_offset);
- float3 color = in_color;
+ float fac = stack_load_float(stack, fac_offset);
+ float3 in_color = stack_load_float3(stack, in_color_offset);
+ float3 color = in_color;
- float hue = stack_load_float(stack, hue_offset);
- float sat = stack_load_float(stack, sat_offset);
- float val = stack_load_float(stack, val_offset);
+ float hue = stack_load_float(stack, hue_offset);
+ float sat = stack_load_float(stack, sat_offset);
+ float val = stack_load_float(stack, val_offset);
- color = rgb_to_hsv(color);
+ color = rgb_to_hsv(color);
- /* remember: fmod doesn't work for negative numbers here */
- color.x = fmodf(color.x + hue + 0.5f, 1.0f);
- color.y = saturate(color.y * sat);
- color.z *= val;
+ /* remember: fmod doesn't work for negative numbers here */
+ color.x = fmodf(color.x + hue + 0.5f, 1.0f);
+ color.y = saturate(color.y * sat);
+ color.z *= val;
- color = hsv_to_rgb(color);
+ color = hsv_to_rgb(color);
- color.x = fac*color.x + (1.0f - fac)*in_color.x;
- color.y = fac*color.y + (1.0f - fac)*in_color.y;
- color.z = fac*color.z + (1.0f - fac)*in_color.z;
+ color.x = fac * color.x + (1.0f - fac) * in_color.x;
+ color.y = fac * color.y + (1.0f - fac) * in_color.y;
+ color.z = fac * color.z + (1.0f - fac) * in_color.z;
- /* Clamp color to prevent negative values caused by oversaturation. */
- color.x = max(color.x, 0.0f);
- color.y = max(color.y, 0.0f);
- color.z = max(color.z, 0.0f);
+ /* Clamp color to prevent negative values caused by oversaturation. */
+ color.x = max(color.x, 0.0f);
+ color.y = max(color.y, 0.0f);
+ color.z = max(color.z, 0.0f);
- if(stack_valid(out_color_offset))
- stack_store_float3(stack, out_color_offset, color);
+ if (stack_valid(out_color_offset))
+ stack_store_float3(stack, out_color_offset, color);
}
CCL_NAMESPACE_END
-#endif /* __SVM_HSV_H__ */
+#endif /* __SVM_HSV_H__ */
diff --git a/intern/cycles/kernel/svm/svm_ies.h b/intern/cycles/kernel/svm/svm_ies.h
index 6130c3348b0..9434c0c5505 100644
--- a/intern/cycles/kernel/svm/svm_ies.h
+++ b/intern/cycles/kernel/svm/svm_ies.h
@@ -18,93 +18,102 @@ CCL_NAMESPACE_BEGIN
/* IES Light */
-ccl_device_inline float interpolate_ies_vertical(KernelGlobals *kg, int ofs, int v, int v_num, float v_frac, int h)
+ccl_device_inline float interpolate_ies_vertical(
+ KernelGlobals *kg, int ofs, int v, int v_num, float v_frac, int h)
{
- /* Since lookups are performed in spherical coordinates, clamping the coordinates at the low end of v
- * (corresponding to the north pole) would result in artifacts.
- * The proper way of dealing with this would be to lookup the corresponding value on the other side of the pole,
- * but since the horizontal coordinates might be nonuniform, this would require yet another interpolation.
- * Therefore, the assumtion is made that the light is going to be symmetrical, which means that we can just take
- * the corresponding value at the current horizontal coordinate. */
-
-#define IES_LOOKUP(v) kernel_tex_fetch(__ies, ofs+h*v_num+(v))
- /* If v is zero, assume symmetry and read at v=1 instead of v=-1. */
- float a = IES_LOOKUP((v == 0)? 1 : v-1);
- float b = IES_LOOKUP(v);
- float c = IES_LOOKUP(v+1);
- float d = IES_LOOKUP(min(v+2, v_num-1));
+ /* Since lookups are performed in spherical coordinates, clamping the coordinates at the low end of v
+ * (corresponding to the north pole) would result in artifacts.
+ * The proper way of dealing with this would be to lookup the corresponding value on the other side of the pole,
+ * but since the horizontal coordinates might be nonuniform, this would require yet another interpolation.
+ * Therefore, the assumtion is made that the light is going to be symmetrical, which means that we can just take
+ * the corresponding value at the current horizontal coordinate. */
+
+#define IES_LOOKUP(v) kernel_tex_fetch(__ies, ofs + h * v_num + (v))
+ /* If v is zero, assume symmetry and read at v=1 instead of v=-1. */
+ float a = IES_LOOKUP((v == 0) ? 1 : v - 1);
+ float b = IES_LOOKUP(v);
+ float c = IES_LOOKUP(v + 1);
+ float d = IES_LOOKUP(min(v + 2, v_num - 1));
#undef IES_LOOKUP
- return cubic_interp(a, b, c, d, v_frac);
+ return cubic_interp(a, b, c, d, v_frac);
}
-ccl_device_inline float kernel_ies_interp(KernelGlobals *kg, int slot, float h_angle, float v_angle)
+ccl_device_inline float kernel_ies_interp(KernelGlobals *kg,
+ int slot,
+ float h_angle,
+ float v_angle)
{
- /* Find offset of the IES data in the table. */
- int ofs = __float_as_int(kernel_tex_fetch(__ies, slot));
- if(ofs == -1) {
- return 100.0f;
- }
-
- int h_num = __float_as_int(kernel_tex_fetch(__ies, ofs++));
- int v_num = __float_as_int(kernel_tex_fetch(__ies, ofs++));
-
-#define IES_LOOKUP_ANGLE_H(h) kernel_tex_fetch(__ies, ofs+(h))
-#define IES_LOOKUP_ANGLE_V(v) kernel_tex_fetch(__ies, ofs+h_num+(v))
-
- /* Check whether the angle is within the bounds of the IES texture. */
- if(v_angle >= IES_LOOKUP_ANGLE_V(v_num-1)) {
- return 0.0f;
- }
- kernel_assert(v_angle >= IES_LOOKUP_ANGLE_V(0));
- kernel_assert(h_angle >= IES_LOOKUP_ANGLE_H(0));
- kernel_assert(h_angle <= IES_LOOKUP_ANGLE_H(h_num-1));
-
- /* Lookup the angles to find the table position. */
- int h_i, v_i;
- /* TODO(lukas): Consider using bisection. Probably not worth it for the vast majority of IES files. */
- for(h_i = 0; IES_LOOKUP_ANGLE_H(h_i+1) < h_angle; h_i++);
- for(v_i = 0; IES_LOOKUP_ANGLE_V(v_i+1) < v_angle; v_i++);
-
- float h_frac = inverse_lerp(IES_LOOKUP_ANGLE_H(h_i), IES_LOOKUP_ANGLE_H(h_i+1), h_angle);
- float v_frac = inverse_lerp(IES_LOOKUP_ANGLE_V(v_i), IES_LOOKUP_ANGLE_V(v_i+1), v_angle);
+ /* Find offset of the IES data in the table. */
+ int ofs = __float_as_int(kernel_tex_fetch(__ies, slot));
+ if (ofs == -1) {
+ return 100.0f;
+ }
+
+ int h_num = __float_as_int(kernel_tex_fetch(__ies, ofs++));
+ int v_num = __float_as_int(kernel_tex_fetch(__ies, ofs++));
+
+#define IES_LOOKUP_ANGLE_H(h) kernel_tex_fetch(__ies, ofs + (h))
+#define IES_LOOKUP_ANGLE_V(v) kernel_tex_fetch(__ies, ofs + h_num + (v))
+
+ /* Check whether the angle is within the bounds of the IES texture. */
+ if (v_angle >= IES_LOOKUP_ANGLE_V(v_num - 1)) {
+ return 0.0f;
+ }
+ kernel_assert(v_angle >= IES_LOOKUP_ANGLE_V(0));
+ kernel_assert(h_angle >= IES_LOOKUP_ANGLE_H(0));
+ kernel_assert(h_angle <= IES_LOOKUP_ANGLE_H(h_num - 1));
+
+ /* Lookup the angles to find the table position. */
+ int h_i, v_i;
+ /* TODO(lukas): Consider using bisection. Probably not worth it for the vast majority of IES files. */
+ for (h_i = 0; IES_LOOKUP_ANGLE_H(h_i + 1) < h_angle; h_i++)
+ ;
+ for (v_i = 0; IES_LOOKUP_ANGLE_V(v_i + 1) < v_angle; v_i++)
+ ;
+
+ float h_frac = inverse_lerp(IES_LOOKUP_ANGLE_H(h_i), IES_LOOKUP_ANGLE_H(h_i + 1), h_angle);
+ float v_frac = inverse_lerp(IES_LOOKUP_ANGLE_V(v_i), IES_LOOKUP_ANGLE_V(v_i + 1), v_angle);
#undef IES_LOOKUP_ANGLE_H
#undef IES_LOOKUP_ANGLE_V
- /* Skip forward to the actual intensity data. */
- ofs += h_num+v_num;
-
- /* Perform cubic interpolation along the horizontal coordinate to get the intensity value.
- * If h_i is zero, just wrap around since the horizontal angles always go over the full circle.
- * However, the last entry (360°) equals the first one, so we need to wrap around to the one before that. */
- float a = interpolate_ies_vertical(kg, ofs, v_i, v_num, v_frac, (h_i == 0)? h_num-2 : h_i-1);
- float b = interpolate_ies_vertical(kg, ofs, v_i, v_num, v_frac, h_i);
- float c = interpolate_ies_vertical(kg, ofs, v_i, v_num, v_frac, h_i+1);
- /* Same logic here, wrap around to the second element if necessary. */
- float d = interpolate_ies_vertical(kg, ofs, v_i, v_num, v_frac, (h_i+2 == h_num)? 1 : h_i+2);
-
- /* Cubic interpolation can result in negative values, so get rid of them. */
- return max(cubic_interp(a, b, c, d, h_frac), 0.0f);
+ /* Skip forward to the actual intensity data. */
+ ofs += h_num + v_num;
+
+ /* Perform cubic interpolation along the horizontal coordinate to get the intensity value.
+ * If h_i is zero, just wrap around since the horizontal angles always go over the full circle.
+ * However, the last entry (360°) equals the first one, so we need to wrap around to the one before that. */
+ float a = interpolate_ies_vertical(
+ kg, ofs, v_i, v_num, v_frac, (h_i == 0) ? h_num - 2 : h_i - 1);
+ float b = interpolate_ies_vertical(kg, ofs, v_i, v_num, v_frac, h_i);
+ float c = interpolate_ies_vertical(kg, ofs, v_i, v_num, v_frac, h_i + 1);
+ /* Same logic here, wrap around to the second element if necessary. */
+ float d = interpolate_ies_vertical(
+ kg, ofs, v_i, v_num, v_frac, (h_i + 2 == h_num) ? 1 : h_i + 2);
+
+ /* Cubic interpolation can result in negative values, so get rid of them. */
+ return max(cubic_interp(a, b, c, d, h_frac), 0.0f);
}
-ccl_device void svm_node_ies(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
+ccl_device void svm_node_ies(
+ KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
{
- uint vector_offset, strength_offset, fac_offset, dummy, slot = node.z;
- decode_node_uchar4(node.y, &strength_offset, &vector_offset, &fac_offset, &dummy);
+ uint vector_offset, strength_offset, fac_offset, dummy, slot = node.z;
+ decode_node_uchar4(node.y, &strength_offset, &vector_offset, &fac_offset, &dummy);
- float3 vector = stack_load_float3(stack, vector_offset);
- float strength = stack_load_float_default(stack, strength_offset, node.w);
+ float3 vector = stack_load_float3(stack, vector_offset);
+ float strength = stack_load_float_default(stack, strength_offset, node.w);
- vector = normalize(vector);
- float v_angle = safe_acosf(-vector.z);
- float h_angle = atan2f(vector.x, vector.y) + M_PI_F;
+ vector = normalize(vector);
+ float v_angle = safe_acosf(-vector.z);
+ float h_angle = atan2f(vector.x, vector.y) + M_PI_F;
- float fac = strength * kernel_ies_interp(kg, slot, h_angle, v_angle);
+ float fac = strength * kernel_ies_interp(kg, slot, h_angle, v_angle);
- if(stack_valid(fac_offset)) {
- stack_store_float(stack, fac_offset, fac);
- }
+ if (stack_valid(fac_offset)) {
+ stack_store_float(stack, fac_offset, fac);
+ }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h
index 81ee79c984e..ee4b8b6e50c 100644
--- a/intern/cycles/kernel/svm/svm_image.h
+++ b/intern/cycles/kernel/svm/svm_image.h
@@ -16,190 +16,192 @@
CCL_NAMESPACE_BEGIN
-ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, uint srgb, uint use_alpha)
+ccl_device float4
+svm_image_texture(KernelGlobals *kg, int id, float x, float y, uint srgb, uint use_alpha)
{
- float4 r = kernel_tex_image_interp(kg, id, x, y);
- const float alpha = r.w;
-
- if(use_alpha && alpha != 1.0f && alpha != 0.0f) {
- r /= alpha;
- const int texture_type = kernel_tex_type(id);
- if(texture_type == IMAGE_DATA_TYPE_BYTE4 ||
- texture_type == IMAGE_DATA_TYPE_BYTE)
- {
- r = min(r, make_float4(1.0f, 1.0f, 1.0f, 1.0f));
- }
- r.w = alpha;
- }
-
- if(srgb) {
- /* TODO(lukas): Implement proper conversion for image textures. */
- r = color_srgb_to_linear_v4(r);
- }
-
- return r;
+ float4 r = kernel_tex_image_interp(kg, id, x, y);
+ const float alpha = r.w;
+
+ if (use_alpha && alpha != 1.0f && alpha != 0.0f) {
+ r /= alpha;
+ const int texture_type = kernel_tex_type(id);
+ if (texture_type == IMAGE_DATA_TYPE_BYTE4 || texture_type == IMAGE_DATA_TYPE_BYTE) {
+ r = min(r, make_float4(1.0f, 1.0f, 1.0f, 1.0f));
+ }
+ r.w = alpha;
+ }
+
+ if (srgb) {
+ /* TODO(lukas): Implement proper conversion for image textures. */
+ r = color_srgb_to_linear_v4(r);
+ }
+
+ return r;
}
/* Remap coordnate from 0..1 box to -1..-1 */
ccl_device_inline float3 texco_remap_square(float3 co)
{
- return (co - make_float3(0.5f, 0.5f, 0.5f)) * 2.0f;
+ return (co - make_float3(0.5f, 0.5f, 0.5f)) * 2.0f;
}
ccl_device void svm_node_tex_image(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
{
- uint id = node.y;
- uint co_offset, out_offset, alpha_offset, srgb;
-
- decode_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &srgb);
-
- float3 co = stack_load_float3(stack, co_offset);
- float2 tex_co;
- uint use_alpha = stack_valid(alpha_offset);
- if(node.w == NODE_IMAGE_PROJ_SPHERE) {
- co = texco_remap_square(co);
- tex_co = map_to_sphere(co);
- }
- else if(node.w == NODE_IMAGE_PROJ_TUBE) {
- co = texco_remap_square(co);
- tex_co = map_to_tube(co);
- }
- else {
- tex_co = make_float2(co.x, co.y);
- }
- float4 f = svm_image_texture(kg, id, tex_co.x, tex_co.y, srgb, use_alpha);
-
- if(stack_valid(out_offset))
- stack_store_float3(stack, out_offset, make_float3(f.x, f.y, f.z));
- if(stack_valid(alpha_offset))
- stack_store_float(stack, alpha_offset, f.w);
+ uint id = node.y;
+ uint co_offset, out_offset, alpha_offset, srgb;
+
+ decode_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &srgb);
+
+ float3 co = stack_load_float3(stack, co_offset);
+ float2 tex_co;
+ uint use_alpha = stack_valid(alpha_offset);
+ if (node.w == NODE_IMAGE_PROJ_SPHERE) {
+ co = texco_remap_square(co);
+ tex_co = map_to_sphere(co);
+ }
+ else if (node.w == NODE_IMAGE_PROJ_TUBE) {
+ co = texco_remap_square(co);
+ tex_co = map_to_tube(co);
+ }
+ else {
+ tex_co = make_float2(co.x, co.y);
+ }
+ float4 f = svm_image_texture(kg, id, tex_co.x, tex_co.y, srgb, use_alpha);
+
+ if (stack_valid(out_offset))
+ stack_store_float3(stack, out_offset, make_float3(f.x, f.y, f.z));
+ if (stack_valid(alpha_offset))
+ stack_store_float(stack, alpha_offset, f.w);
}
ccl_device void svm_node_tex_image_box(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
{
- /* get object space normal */
- float3 N = sd->N;
-
- N = sd->N;
- object_inverse_normal_transform(kg, sd, &N);
-
- /* project from direction vector to barycentric coordinates in triangles */
- float3 signed_N = N;
-
- N.x = fabsf(N.x);
- N.y = fabsf(N.y);
- N.z = fabsf(N.z);
-
- N /= (N.x + N.y + N.z);
-
- /* basic idea is to think of this as a triangle, each corner representing
- * one of the 3 faces of the cube. in the corners we have single textures,
- * in between we blend between two textures, and in the middle we a blend
- * between three textures.
- *
- * the Nxyz values are the barycentric coordinates in an equilateral
- * triangle, which in case of blending, in the middle has a smaller
- * equilateral triangle where 3 textures blend. this divides things into
- * 7 zones, with an if() test for each zone */
-
- float3 weight = make_float3(0.0f, 0.0f, 0.0f);
- float blend = __int_as_float(node.w);
- float limit = 0.5f*(1.0f + blend);
-
- /* first test for corners with single texture */
- if(N.x > limit*(N.x + N.y) && N.x > limit*(N.x + N.z)) {
- weight.x = 1.0f;
- }
- else if(N.y > limit*(N.x + N.y) && N.y > limit*(N.y + N.z)) {
- weight.y = 1.0f;
- }
- else if(N.z > limit*(N.x + N.z) && N.z > limit*(N.y + N.z)) {
- weight.z = 1.0f;
- }
- else if(blend > 0.0f) {
- /* in case of blending, test for mixes between two textures */
- if(N.z < (1.0f - limit)*(N.y + N.x)) {
- weight.x = N.x/(N.x + N.y);
- weight.x = saturate((weight.x - 0.5f*(1.0f - blend))/blend);
- weight.y = 1.0f - weight.x;
- }
- else if(N.x < (1.0f - limit)*(N.y + N.z)) {
- weight.y = N.y/(N.y + N.z);
- weight.y = saturate((weight.y - 0.5f*(1.0f - blend))/blend);
- weight.z = 1.0f - weight.y;
- }
- else if(N.y < (1.0f - limit)*(N.x + N.z)) {
- weight.x = N.x/(N.x + N.z);
- weight.x = saturate((weight.x - 0.5f*(1.0f - blend))/blend);
- weight.z = 1.0f - weight.x;
- }
- else {
- /* last case, we have a mix between three */
- weight.x = ((2.0f - limit)*N.x + (limit - 1.0f))/(2.0f*limit - 1.0f);
- weight.y = ((2.0f - limit)*N.y + (limit - 1.0f))/(2.0f*limit - 1.0f);
- weight.z = ((2.0f - limit)*N.z + (limit - 1.0f))/(2.0f*limit - 1.0f);
- }
- }
- else {
- /* Desperate mode, no valid choice anyway, fallback to one side.*/
- weight.x = 1.0f;
- }
-
- /* now fetch textures */
- uint co_offset, out_offset, alpha_offset, srgb;
- decode_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &srgb);
-
- float3 co = stack_load_float3(stack, co_offset);
- uint id = node.y;
-
- float4 f = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- uint use_alpha = stack_valid(alpha_offset);
-
- /* Map so that no textures are flipped, rotation is somewhat arbitrary. */
- if(weight.x > 0.0f) {
- float2 uv = make_float2((signed_N.x < 0.0f)? 1.0f - co.y: co.y, co.z);
- f += weight.x*svm_image_texture(kg, id, uv.x, uv.y, srgb, use_alpha);
- }
- if(weight.y > 0.0f) {
- float2 uv = make_float2((signed_N.y > 0.0f)? 1.0f - co.x: co.x, co.z);
- f += weight.y*svm_image_texture(kg, id, uv.x, uv.y, srgb, use_alpha);
- }
- if(weight.z > 0.0f) {
- float2 uv = make_float2((signed_N.z > 0.0f)? 1.0f - co.y: co.y, co.x);
- f += weight.z*svm_image_texture(kg, id, uv.x, uv.y, srgb, use_alpha);
- }
-
- if(stack_valid(out_offset))
- stack_store_float3(stack, out_offset, make_float3(f.x, f.y, f.z));
- if(stack_valid(alpha_offset))
- stack_store_float(stack, alpha_offset, f.w);
+ /* get object space normal */
+ float3 N = sd->N;
+
+ N = sd->N;
+ object_inverse_normal_transform(kg, sd, &N);
+
+ /* project from direction vector to barycentric coordinates in triangles */
+ float3 signed_N = N;
+
+ N.x = fabsf(N.x);
+ N.y = fabsf(N.y);
+ N.z = fabsf(N.z);
+
+ N /= (N.x + N.y + N.z);
+
+ /* basic idea is to think of this as a triangle, each corner representing
+ * one of the 3 faces of the cube. in the corners we have single textures,
+ * in between we blend between two textures, and in the middle we a blend
+ * between three textures.
+ *
+ * the Nxyz values are the barycentric coordinates in an equilateral
+ * triangle, which in case of blending, in the middle has a smaller
+ * equilateral triangle where 3 textures blend. this divides things into
+ * 7 zones, with an if() test for each zone */
+
+ float3 weight = make_float3(0.0f, 0.0f, 0.0f);
+ float blend = __int_as_float(node.w);
+ float limit = 0.5f * (1.0f + blend);
+
+ /* first test for corners with single texture */
+ if (N.x > limit * (N.x + N.y) && N.x > limit * (N.x + N.z)) {
+ weight.x = 1.0f;
+ }
+ else if (N.y > limit * (N.x + N.y) && N.y > limit * (N.y + N.z)) {
+ weight.y = 1.0f;
+ }
+ else if (N.z > limit * (N.x + N.z) && N.z > limit * (N.y + N.z)) {
+ weight.z = 1.0f;
+ }
+ else if (blend > 0.0f) {
+ /* in case of blending, test for mixes between two textures */
+ if (N.z < (1.0f - limit) * (N.y + N.x)) {
+ weight.x = N.x / (N.x + N.y);
+ weight.x = saturate((weight.x - 0.5f * (1.0f - blend)) / blend);
+ weight.y = 1.0f - weight.x;
+ }
+ else if (N.x < (1.0f - limit) * (N.y + N.z)) {
+ weight.y = N.y / (N.y + N.z);
+ weight.y = saturate((weight.y - 0.5f * (1.0f - blend)) / blend);
+ weight.z = 1.0f - weight.y;
+ }
+ else if (N.y < (1.0f - limit) * (N.x + N.z)) {
+ weight.x = N.x / (N.x + N.z);
+ weight.x = saturate((weight.x - 0.5f * (1.0f - blend)) / blend);
+ weight.z = 1.0f - weight.x;
+ }
+ else {
+ /* last case, we have a mix between three */
+ weight.x = ((2.0f - limit) * N.x + (limit - 1.0f)) / (2.0f * limit - 1.0f);
+ weight.y = ((2.0f - limit) * N.y + (limit - 1.0f)) / (2.0f * limit - 1.0f);
+ weight.z = ((2.0f - limit) * N.z + (limit - 1.0f)) / (2.0f * limit - 1.0f);
+ }
+ }
+ else {
+ /* Desperate mode, no valid choice anyway, fallback to one side.*/
+ weight.x = 1.0f;
+ }
+
+ /* now fetch textures */
+ uint co_offset, out_offset, alpha_offset, srgb;
+ decode_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &srgb);
+
+ float3 co = stack_load_float3(stack, co_offset);
+ uint id = node.y;
+
+ float4 f = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ uint use_alpha = stack_valid(alpha_offset);
+
+ /* Map so that no textures are flipped, rotation is somewhat arbitrary. */
+ if (weight.x > 0.0f) {
+ float2 uv = make_float2((signed_N.x < 0.0f) ? 1.0f - co.y : co.y, co.z);
+ f += weight.x * svm_image_texture(kg, id, uv.x, uv.y, srgb, use_alpha);
+ }
+ if (weight.y > 0.0f) {
+ float2 uv = make_float2((signed_N.y > 0.0f) ? 1.0f - co.x : co.x, co.z);
+ f += weight.y * svm_image_texture(kg, id, uv.x, uv.y, srgb, use_alpha);
+ }
+ if (weight.z > 0.0f) {
+ float2 uv = make_float2((signed_N.z > 0.0f) ? 1.0f - co.y : co.y, co.x);
+ f += weight.z * svm_image_texture(kg, id, uv.x, uv.y, srgb, use_alpha);
+ }
+
+ if (stack_valid(out_offset))
+ stack_store_float3(stack, out_offset, make_float3(f.x, f.y, f.z));
+ if (stack_valid(alpha_offset))
+ stack_store_float(stack, alpha_offset, f.w);
}
-ccl_device void svm_node_tex_environment(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
+ccl_device void svm_node_tex_environment(KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint4 node)
{
- uint id = node.y;
- uint co_offset, out_offset, alpha_offset, srgb;
- uint projection = node.w;
+ uint id = node.y;
+ uint co_offset, out_offset, alpha_offset, srgb;
+ uint projection = node.w;
- decode_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &srgb);
+ decode_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &srgb);
- float3 co = stack_load_float3(stack, co_offset);
- float2 uv;
+ float3 co = stack_load_float3(stack, co_offset);
+ float2 uv;
- co = safe_normalize(co);
+ co = safe_normalize(co);
- if(projection == 0)
- uv = direction_to_equirectangular(co);
- else
- uv = direction_to_mirrorball(co);
+ if (projection == 0)
+ uv = direction_to_equirectangular(co);
+ else
+ uv = direction_to_mirrorball(co);
- uint use_alpha = stack_valid(alpha_offset);
- float4 f = svm_image_texture(kg, id, uv.x, uv.y, srgb, use_alpha);
+ uint use_alpha = stack_valid(alpha_offset);
+ float4 f = svm_image_texture(kg, id, uv.x, uv.y, srgb, use_alpha);
- if(stack_valid(out_offset))
- stack_store_float3(stack, out_offset, make_float3(f.x, f.y, f.z));
- if(stack_valid(alpha_offset))
- stack_store_float(stack, alpha_offset, f.w);
+ if (stack_valid(out_offset))
+ stack_store_float3(stack, out_offset, make_float3(f.x, f.y, f.z));
+ if (stack_valid(alpha_offset))
+ stack_store_float(stack, alpha_offset, f.w);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_invert.h b/intern/cycles/kernel/svm/svm_invert.h
index 57cc4281101..02024742b13 100644
--- a/intern/cycles/kernel/svm/svm_invert.h
+++ b/intern/cycles/kernel/svm/svm_invert.h
@@ -18,20 +18,21 @@ CCL_NAMESPACE_BEGIN
ccl_device float invert(float color, float factor)
{
- return factor*(1.0f - color) + (1.0f - factor) * color;
+ return factor * (1.0f - color) + (1.0f - factor) * color;
}
-ccl_device void svm_node_invert(ShaderData *sd, float *stack, uint in_fac, uint in_color, uint out_color)
+ccl_device void svm_node_invert(
+ ShaderData *sd, float *stack, uint in_fac, uint in_color, uint out_color)
{
- float factor = stack_load_float(stack, in_fac);
- float3 color = stack_load_float3(stack, in_color);
+ float factor = stack_load_float(stack, in_fac);
+ float3 color = stack_load_float3(stack, in_color);
- color.x = invert(color.x, factor);
- color.y = invert(color.y, factor);
- color.z = invert(color.z, factor);
+ color.x = invert(color.x, factor);
+ color.y = invert(color.y, factor);
+ color.z = invert(color.z, factor);
- if(stack_valid(out_color))
- stack_store_float3(stack, out_color, color);
+ if (stack_valid(out_color))
+ stack_store_float3(stack, out_color, color);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_light_path.h b/intern/cycles/kernel/svm/svm_light_path.h
index dd4390057cf..65a9a284a17 100644
--- a/intern/cycles/kernel/svm/svm_light_path.h
+++ b/intern/cycles/kernel/svm/svm_light_path.h
@@ -18,59 +18,99 @@ CCL_NAMESPACE_BEGIN
/* Light Path Node */
-ccl_device void svm_node_light_path(ShaderData *sd, ccl_addr_space PathState *state, float *stack, uint type, uint out_offset, int path_flag)
+ccl_device void svm_node_light_path(ShaderData *sd,
+ ccl_addr_space PathState *state,
+ float *stack,
+ uint type,
+ uint out_offset,
+ int path_flag)
{
- float info = 0.0f;
+ float info = 0.0f;
- switch(type) {
- case NODE_LP_camera: info = (path_flag & PATH_RAY_CAMERA)? 1.0f: 0.0f; break;
- case NODE_LP_shadow: info = (path_flag & PATH_RAY_SHADOW)? 1.0f: 0.0f; break;
- case NODE_LP_diffuse: info = (path_flag & PATH_RAY_DIFFUSE)? 1.0f: 0.0f; break;
- case NODE_LP_glossy: info = (path_flag & PATH_RAY_GLOSSY)? 1.0f: 0.0f; break;
- case NODE_LP_singular: info = (path_flag & PATH_RAY_SINGULAR)? 1.0f: 0.0f; break;
- case NODE_LP_reflection: info = (path_flag & PATH_RAY_REFLECT)? 1.0f: 0.0f; break;
- case NODE_LP_transmission: info = (path_flag & PATH_RAY_TRANSMIT)? 1.0f: 0.0f; break;
- case NODE_LP_volume_scatter: info = (path_flag & PATH_RAY_VOLUME_SCATTER)? 1.0f: 0.0f; break;
- case NODE_LP_backfacing: info = (sd->flag & SD_BACKFACING)? 1.0f: 0.0f; break;
- case NODE_LP_ray_length: info = sd->ray_length; break;
- case NODE_LP_ray_depth: info = (float)state->bounce; break;
- case NODE_LP_ray_diffuse: info = (float)state->diffuse_bounce; break;
- case NODE_LP_ray_glossy: info = (float)state->glossy_bounce; break;
- case NODE_LP_ray_transparent: info = (float)state->transparent_bounce; break;
- case NODE_LP_ray_transmission: info = (float)state->transmission_bounce; break;
- }
+ switch (type) {
+ case NODE_LP_camera:
+ info = (path_flag & PATH_RAY_CAMERA) ? 1.0f : 0.0f;
+ break;
+ case NODE_LP_shadow:
+ info = (path_flag & PATH_RAY_SHADOW) ? 1.0f : 0.0f;
+ break;
+ case NODE_LP_diffuse:
+ info = (path_flag & PATH_RAY_DIFFUSE) ? 1.0f : 0.0f;
+ break;
+ case NODE_LP_glossy:
+ info = (path_flag & PATH_RAY_GLOSSY) ? 1.0f : 0.0f;
+ break;
+ case NODE_LP_singular:
+ info = (path_flag & PATH_RAY_SINGULAR) ? 1.0f : 0.0f;
+ break;
+ case NODE_LP_reflection:
+ info = (path_flag & PATH_RAY_REFLECT) ? 1.0f : 0.0f;
+ break;
+ case NODE_LP_transmission:
+ info = (path_flag & PATH_RAY_TRANSMIT) ? 1.0f : 0.0f;
+ break;
+ case NODE_LP_volume_scatter:
+ info = (path_flag & PATH_RAY_VOLUME_SCATTER) ? 1.0f : 0.0f;
+ break;
+ case NODE_LP_backfacing:
+ info = (sd->flag & SD_BACKFACING) ? 1.0f : 0.0f;
+ break;
+ case NODE_LP_ray_length:
+ info = sd->ray_length;
+ break;
+ case NODE_LP_ray_depth:
+ info = (float)state->bounce;
+ break;
+ case NODE_LP_ray_diffuse:
+ info = (float)state->diffuse_bounce;
+ break;
+ case NODE_LP_ray_glossy:
+ info = (float)state->glossy_bounce;
+ break;
+ case NODE_LP_ray_transparent:
+ info = (float)state->transparent_bounce;
+ break;
+ case NODE_LP_ray_transmission:
+ info = (float)state->transmission_bounce;
+ break;
+ }
- stack_store_float(stack, out_offset, info);
+ stack_store_float(stack, out_offset, info);
}
/* Light Falloff Node */
ccl_device void svm_node_light_falloff(ShaderData *sd, float *stack, uint4 node)
{
- uint strength_offset, out_offset, smooth_offset;
+ uint strength_offset, out_offset, smooth_offset;
- decode_node_uchar4(node.z, &strength_offset, &smooth_offset, &out_offset, NULL);
+ decode_node_uchar4(node.z, &strength_offset, &smooth_offset, &out_offset, NULL);
- float strength = stack_load_float(stack, strength_offset);
- uint type = node.y;
+ float strength = stack_load_float(stack, strength_offset);
+ uint type = node.y;
- switch(type) {
- case NODE_LIGHT_FALLOFF_QUADRATIC: break;
- case NODE_LIGHT_FALLOFF_LINEAR: strength *= sd->ray_length; break;
- case NODE_LIGHT_FALLOFF_CONSTANT: strength *= sd->ray_length*sd->ray_length; break;
- }
+ switch (type) {
+ case NODE_LIGHT_FALLOFF_QUADRATIC:
+ break;
+ case NODE_LIGHT_FALLOFF_LINEAR:
+ strength *= sd->ray_length;
+ break;
+ case NODE_LIGHT_FALLOFF_CONSTANT:
+ strength *= sd->ray_length * sd->ray_length;
+ break;
+ }
- float smooth = stack_load_float(stack, smooth_offset);
+ float smooth = stack_load_float(stack, smooth_offset);
- if(smooth > 0.0f) {
- float squared = sd->ray_length*sd->ray_length;
- /* Distant lamps set the ray length to FLT_MAX, which causes squared to overflow. */
- if(isfinite(squared)) {
- strength *= squared/(smooth + squared);
- }
- }
+ if (smooth > 0.0f) {
+ float squared = sd->ray_length * sd->ray_length;
+ /* Distant lamps set the ray length to FLT_MAX, which causes squared to overflow. */
+ if (isfinite(squared)) {
+ strength *= squared / (smooth + squared);
+ }
+ }
- stack_store_float(stack, out_offset, strength);
+ stack_store_float(stack, out_offset, strength);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_magic.h b/intern/cycles/kernel/svm/svm_magic.h
index 6afaff37acd..115d2e2fe4b 100644
--- a/intern/cycles/kernel/svm/svm_magic.h
+++ b/intern/cycles/kernel/svm/svm_magic.h
@@ -20,92 +20,93 @@ CCL_NAMESPACE_BEGIN
ccl_device_noinline float3 svm_magic(float3 p, int n, float distortion)
{
- float x = sinf((p.x + p.y + p.z)*5.0f);
- float y = cosf((-p.x + p.y - p.z)*5.0f);
- float z = -cosf((-p.x - p.y + p.z)*5.0f);
-
- if(n > 0) {
- x *= distortion;
- y *= distortion;
- z *= distortion;
- y = -cosf(x-y+z);
- y *= distortion;
-
- if(n > 1) {
- x = cosf(x-y-z);
- x *= distortion;
-
- if(n > 2) {
- z = sinf(-x-y-z);
- z *= distortion;
-
- if(n > 3) {
- x = -cosf(-x+y-z);
- x *= distortion;
-
- if(n > 4) {
- y = -sinf(-x+y+z);
- y *= distortion;
-
- if(n > 5) {
- y = -cosf(-x+y+z);
- y *= distortion;
-
- if(n > 6) {
- x = cosf(x+y+z);
- x *= distortion;
-
- if(n > 7) {
- z = sinf(x+y-z);
- z *= distortion;
-
- if(n > 8) {
- x = -cosf(-x-y+z);
- x *= distortion;
-
- if(n > 9) {
- y = -sinf(x-y+z);
- y *= distortion;
- }
- }
- }
- }
- }
- }
- }
- }
- }
- }
-
- if(distortion != 0.0f) {
- distortion *= 2.0f;
- x /= distortion;
- y /= distortion;
- z /= distortion;
- }
-
- return make_float3(0.5f - x, 0.5f - y, 0.5f - z);
+ float x = sinf((p.x + p.y + p.z) * 5.0f);
+ float y = cosf((-p.x + p.y - p.z) * 5.0f);
+ float z = -cosf((-p.x - p.y + p.z) * 5.0f);
+
+ if (n > 0) {
+ x *= distortion;
+ y *= distortion;
+ z *= distortion;
+ y = -cosf(x - y + z);
+ y *= distortion;
+
+ if (n > 1) {
+ x = cosf(x - y - z);
+ x *= distortion;
+
+ if (n > 2) {
+ z = sinf(-x - y - z);
+ z *= distortion;
+
+ if (n > 3) {
+ x = -cosf(-x + y - z);
+ x *= distortion;
+
+ if (n > 4) {
+ y = -sinf(-x + y + z);
+ y *= distortion;
+
+ if (n > 5) {
+ y = -cosf(-x + y + z);
+ y *= distortion;
+
+ if (n > 6) {
+ x = cosf(x + y + z);
+ x *= distortion;
+
+ if (n > 7) {
+ z = sinf(x + y - z);
+ z *= distortion;
+
+ if (n > 8) {
+ x = -cosf(-x - y + z);
+ x *= distortion;
+
+ if (n > 9) {
+ y = -sinf(x - y + z);
+ y *= distortion;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (distortion != 0.0f) {
+ distortion *= 2.0f;
+ x /= distortion;
+ y /= distortion;
+ z /= distortion;
+ }
+
+ return make_float3(0.5f - x, 0.5f - y, 0.5f - z);
}
-ccl_device void svm_node_tex_magic(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
+ccl_device void svm_node_tex_magic(
+ KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
{
- uint depth;
- uint scale_offset, distortion_offset, co_offset, fac_offset, color_offset;
+ uint depth;
+ uint scale_offset, distortion_offset, co_offset, fac_offset, color_offset;
- decode_node_uchar4(node.y, &depth, &color_offset, &fac_offset, NULL);
- decode_node_uchar4(node.z, &co_offset, &scale_offset, &distortion_offset, NULL);
+ decode_node_uchar4(node.y, &depth, &color_offset, &fac_offset, NULL);
+ decode_node_uchar4(node.z, &co_offset, &scale_offset, &distortion_offset, NULL);
- uint4 node2 = read_node(kg, offset);
- float3 co = stack_load_float3(stack, co_offset);
- float scale = stack_load_float_default(stack, scale_offset, node2.x);
- float distortion = stack_load_float_default(stack, distortion_offset, node2.y);
+ uint4 node2 = read_node(kg, offset);
+ float3 co = stack_load_float3(stack, co_offset);
+ float scale = stack_load_float_default(stack, scale_offset, node2.x);
+ float distortion = stack_load_float_default(stack, distortion_offset, node2.y);
- float3 color = svm_magic(co*scale, depth, distortion);
+ float3 color = svm_magic(co * scale, depth, distortion);
- if(stack_valid(fac_offset))
- stack_store_float(stack, fac_offset, average(color));
- if(stack_valid(color_offset))
- stack_store_float3(stack, color_offset, color);
+ if (stack_valid(fac_offset))
+ stack_store_float(stack, fac_offset, average(color));
+ if (stack_valid(color_offset))
+ stack_store_float3(stack, color_offset, color);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_mapping.h b/intern/cycles/kernel/svm/svm_mapping.h
index 86181283821..998a29912d4 100644
--- a/intern/cycles/kernel/svm/svm_mapping.h
+++ b/intern/cycles/kernel/svm/svm_mapping.h
@@ -18,28 +18,30 @@ CCL_NAMESPACE_BEGIN
/* Mapping Node */
-ccl_device void svm_node_mapping(KernelGlobals *kg, ShaderData *sd, float *stack, uint vec_offset, uint out_offset, int *offset)
+ccl_device void svm_node_mapping(
+ KernelGlobals *kg, ShaderData *sd, float *stack, uint vec_offset, uint out_offset, int *offset)
{
- float3 v = stack_load_float3(stack, vec_offset);
+ float3 v = stack_load_float3(stack, vec_offset);
- Transform tfm;
- tfm.x = read_node_float(kg, offset);
- tfm.y = read_node_float(kg, offset);
- tfm.z = read_node_float(kg, offset);
+ Transform tfm;
+ tfm.x = read_node_float(kg, offset);
+ tfm.y = read_node_float(kg, offset);
+ tfm.z = read_node_float(kg, offset);
- float3 r = transform_point(&tfm, v);
- stack_store_float3(stack, out_offset, r);
+ float3 r = transform_point(&tfm, v);
+ stack_store_float3(stack, out_offset, r);
}
-ccl_device void svm_node_min_max(KernelGlobals *kg, ShaderData *sd, float *stack, uint vec_offset, uint out_offset, int *offset)
+ccl_device void svm_node_min_max(
+ KernelGlobals *kg, ShaderData *sd, float *stack, uint vec_offset, uint out_offset, int *offset)
{
- float3 v = stack_load_float3(stack, vec_offset);
+ float3 v = stack_load_float3(stack, vec_offset);
- float3 mn = float4_to_float3(read_node_float(kg, offset));
- float3 mx = float4_to_float3(read_node_float(kg, offset));
+ float3 mn = float4_to_float3(read_node_float(kg, offset));
+ float3 mx = float4_to_float3(read_node_float(kg, offset));
- float3 r = min(max(mn, v), mx);
- stack_store_float3(stack, out_offset, r);
+ float3 r = min(max(mn, v), mx);
+ stack_store_float3(stack, out_offset, r);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_math.h b/intern/cycles/kernel/svm/svm_math.h
index c9a838361cd..5920913825b 100644
--- a/intern/cycles/kernel/svm/svm_math.h
+++ b/intern/cycles/kernel/svm/svm_math.h
@@ -18,32 +18,46 @@ CCL_NAMESPACE_BEGIN
/* Nodes */
-ccl_device void svm_node_math(KernelGlobals *kg, ShaderData *sd, float *stack, uint itype, uint f1_offset, uint f2_offset, int *offset)
+ccl_device void svm_node_math(KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint itype,
+ uint f1_offset,
+ uint f2_offset,
+ int *offset)
{
- NodeMath type = (NodeMath)itype;
- float f1 = stack_load_float(stack, f1_offset);
- float f2 = stack_load_float(stack, f2_offset);
- float f = svm_math(type, f1, f2);
+ NodeMath type = (NodeMath)itype;
+ float f1 = stack_load_float(stack, f1_offset);
+ float f2 = stack_load_float(stack, f2_offset);
+ float f = svm_math(type, f1, f2);
- uint4 node1 = read_node(kg, offset);
+ uint4 node1 = read_node(kg, offset);
- stack_store_float(stack, node1.y, f);
+ stack_store_float(stack, node1.y, f);
}
-ccl_device void svm_node_vector_math(KernelGlobals *kg, ShaderData *sd, float *stack, uint itype, uint v1_offset, uint v2_offset, int *offset)
+ccl_device void svm_node_vector_math(KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint itype,
+ uint v1_offset,
+ uint v2_offset,
+ int *offset)
{
- NodeVectorMath type = (NodeVectorMath)itype;
- float3 v1 = stack_load_float3(stack, v1_offset);
- float3 v2 = stack_load_float3(stack, v2_offset);
- float f;
- float3 v;
+ NodeVectorMath type = (NodeVectorMath)itype;
+ float3 v1 = stack_load_float3(stack, v1_offset);
+ float3 v2 = stack_load_float3(stack, v2_offset);
+ float f;
+ float3 v;
- svm_vector_math(&f, &v, type, v1, v2);
+ svm_vector_math(&f, &v, type, v1, v2);
- uint4 node1 = read_node(kg, offset);
+ uint4 node1 = read_node(kg, offset);
- if(stack_valid(node1.y)) stack_store_float(stack, node1.y, f);
- if(stack_valid(node1.z)) stack_store_float3(stack, node1.z, v);
+ if (stack_valid(node1.y))
+ stack_store_float(stack, node1.y, f);
+ if (stack_valid(node1.z))
+ stack_store_float3(stack, node1.z, v);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_math_util.h b/intern/cycles/kernel/svm/svm_math_util.h
index 669b174e4a3..e3544515f1b 100644
--- a/intern/cycles/kernel/svm/svm_math_util.h
+++ b/intern/cycles/kernel/svm/svm_math_util.h
@@ -18,96 +18,97 @@ CCL_NAMESPACE_BEGIN
ccl_device float average_fac(float3 v)
{
- return (fabsf(v.x) + fabsf(v.y) + fabsf(v.z))/3.0f;
+ return (fabsf(v.x) + fabsf(v.y) + fabsf(v.z)) / 3.0f;
}
-ccl_device void svm_vector_math(float *Fac, float3 *Vector, NodeVectorMath type, float3 Vector1, float3 Vector2)
+ccl_device void svm_vector_math(
+ float *Fac, float3 *Vector, NodeVectorMath type, float3 Vector1, float3 Vector2)
{
- if(type == NODE_VECTOR_MATH_ADD) {
- *Vector = Vector1 + Vector2;
- *Fac = average_fac(*Vector);
- }
- else if(type == NODE_VECTOR_MATH_SUBTRACT) {
- *Vector = Vector1 - Vector2;
- *Fac = average_fac(*Vector);
- }
- else if(type == NODE_VECTOR_MATH_AVERAGE) {
- *Vector = safe_normalize_len(Vector1 + Vector2, Fac);
- }
- else if(type == NODE_VECTOR_MATH_DOT_PRODUCT) {
- *Fac = dot(Vector1, Vector2);
- *Vector = make_float3(0.0f, 0.0f, 0.0f);
- }
- else if(type == NODE_VECTOR_MATH_CROSS_PRODUCT) {
- *Vector = safe_normalize_len(cross(Vector1, Vector2), Fac);
- }
- else if(type == NODE_VECTOR_MATH_NORMALIZE) {
- *Vector = safe_normalize_len(Vector1, Fac);
- }
- else {
- *Fac = 0.0f;
- *Vector = make_float3(0.0f, 0.0f, 0.0f);
- }
+ if (type == NODE_VECTOR_MATH_ADD) {
+ *Vector = Vector1 + Vector2;
+ *Fac = average_fac(*Vector);
+ }
+ else if (type == NODE_VECTOR_MATH_SUBTRACT) {
+ *Vector = Vector1 - Vector2;
+ *Fac = average_fac(*Vector);
+ }
+ else if (type == NODE_VECTOR_MATH_AVERAGE) {
+ *Vector = safe_normalize_len(Vector1 + Vector2, Fac);
+ }
+ else if (type == NODE_VECTOR_MATH_DOT_PRODUCT) {
+ *Fac = dot(Vector1, Vector2);
+ *Vector = make_float3(0.0f, 0.0f, 0.0f);
+ }
+ else if (type == NODE_VECTOR_MATH_CROSS_PRODUCT) {
+ *Vector = safe_normalize_len(cross(Vector1, Vector2), Fac);
+ }
+ else if (type == NODE_VECTOR_MATH_NORMALIZE) {
+ *Vector = safe_normalize_len(Vector1, Fac);
+ }
+ else {
+ *Fac = 0.0f;
+ *Vector = make_float3(0.0f, 0.0f, 0.0f);
+ }
}
ccl_device float svm_math(NodeMath type, float Fac1, float Fac2)
{
- float Fac;
-
- if(type == NODE_MATH_ADD)
- Fac = Fac1 + Fac2;
- else if(type == NODE_MATH_SUBTRACT)
- Fac = Fac1 - Fac2;
- else if(type == NODE_MATH_MULTIPLY)
- Fac = Fac1*Fac2;
- else if(type == NODE_MATH_DIVIDE)
- Fac = safe_divide(Fac1, Fac2);
- else if(type == NODE_MATH_SINE)
- Fac = sinf(Fac1);
- else if(type == NODE_MATH_COSINE)
- Fac = cosf(Fac1);
- else if(type == NODE_MATH_TANGENT)
- Fac = tanf(Fac1);
- else if(type == NODE_MATH_ARCSINE)
- Fac = safe_asinf(Fac1);
- else if(type == NODE_MATH_ARCCOSINE)
- Fac = safe_acosf(Fac1);
- else if(type == NODE_MATH_ARCTANGENT)
- Fac = atanf(Fac1);
- else if(type == NODE_MATH_POWER)
- Fac = safe_powf(Fac1, Fac2);
- else if(type == NODE_MATH_LOGARITHM)
- Fac = safe_logf(Fac1, Fac2);
- else if(type == NODE_MATH_MINIMUM)
- Fac = fminf(Fac1, Fac2);
- else if(type == NODE_MATH_MAXIMUM)
- Fac = fmaxf(Fac1, Fac2);
- else if(type == NODE_MATH_ROUND)
- Fac = floorf(Fac1 + 0.5f);
- else if(type == NODE_MATH_LESS_THAN)
- Fac = Fac1 < Fac2;
- else if(type == NODE_MATH_GREATER_THAN)
- Fac = Fac1 > Fac2;
- else if(type == NODE_MATH_MODULO)
- Fac = safe_modulo(Fac1, Fac2);
- else if(type == NODE_MATH_ABSOLUTE)
- Fac = fabsf(Fac1);
- else if(type == NODE_MATH_ARCTAN2)
- Fac = atan2f(Fac1, Fac2);
- else if(type == NODE_MATH_FLOOR)
- Fac = floorf(Fac1);
- else if(type == NODE_MATH_CEIL)
- Fac = ceilf(Fac1);
- else if(type == NODE_MATH_FRACT)
- Fac = Fac1 - floorf(Fac1);
- else if(type == NODE_MATH_SQRT)
- Fac = safe_sqrtf(Fac1);
- else if(type == NODE_MATH_CLAMP)
- Fac = saturate(Fac1);
- else
- Fac = 0.0f;
-
- return Fac;
+ float Fac;
+
+ if (type == NODE_MATH_ADD)
+ Fac = Fac1 + Fac2;
+ else if (type == NODE_MATH_SUBTRACT)
+ Fac = Fac1 - Fac2;
+ else if (type == NODE_MATH_MULTIPLY)
+ Fac = Fac1 * Fac2;
+ else if (type == NODE_MATH_DIVIDE)
+ Fac = safe_divide(Fac1, Fac2);
+ else if (type == NODE_MATH_SINE)
+ Fac = sinf(Fac1);
+ else if (type == NODE_MATH_COSINE)
+ Fac = cosf(Fac1);
+ else if (type == NODE_MATH_TANGENT)
+ Fac = tanf(Fac1);
+ else if (type == NODE_MATH_ARCSINE)
+ Fac = safe_asinf(Fac1);
+ else if (type == NODE_MATH_ARCCOSINE)
+ Fac = safe_acosf(Fac1);
+ else if (type == NODE_MATH_ARCTANGENT)
+ Fac = atanf(Fac1);
+ else if (type == NODE_MATH_POWER)
+ Fac = safe_powf(Fac1, Fac2);
+ else if (type == NODE_MATH_LOGARITHM)
+ Fac = safe_logf(Fac1, Fac2);
+ else if (type == NODE_MATH_MINIMUM)
+ Fac = fminf(Fac1, Fac2);
+ else if (type == NODE_MATH_MAXIMUM)
+ Fac = fmaxf(Fac1, Fac2);
+ else if (type == NODE_MATH_ROUND)
+ Fac = floorf(Fac1 + 0.5f);
+ else if (type == NODE_MATH_LESS_THAN)
+ Fac = Fac1 < Fac2;
+ else if (type == NODE_MATH_GREATER_THAN)
+ Fac = Fac1 > Fac2;
+ else if (type == NODE_MATH_MODULO)
+ Fac = safe_modulo(Fac1, Fac2);
+ else if (type == NODE_MATH_ABSOLUTE)
+ Fac = fabsf(Fac1);
+ else if (type == NODE_MATH_ARCTAN2)
+ Fac = atan2f(Fac1, Fac2);
+ else if (type == NODE_MATH_FLOOR)
+ Fac = floorf(Fac1);
+ else if (type == NODE_MATH_CEIL)
+ Fac = ceilf(Fac1);
+ else if (type == NODE_MATH_FRACT)
+ Fac = Fac1 - floorf(Fac1);
+ else if (type == NODE_MATH_SQRT)
+ Fac = safe_sqrtf(Fac1);
+ else if (type == NODE_MATH_CLAMP)
+ Fac = saturate(Fac1);
+ else
+ Fac = 0.0f;
+
+ return Fac;
}
/* Calculate color in range 800..12000 using an approximation
@@ -117,74 +118,72 @@ ccl_device float svm_math(NodeMath type, float Fac1, float Fac2)
*/
ccl_static_constant float blackbody_table_r[6][3] = {
- { 2.52432244e+03f, -1.06185848e-03f, 3.11067539e+00f },
- { 3.37763626e+03f, -4.34581697e-04f, 1.64843306e+00f },
- { 4.10671449e+03f, -8.61949938e-05f, 6.41423749e-01f },
- { 4.66849800e+03f, 2.85655028e-05f, 1.29075375e-01f },
- { 4.60124770e+03f, 2.89727618e-05f, 1.48001316e-01f },
- { 3.78765709e+03f, 9.36026367e-06f, 3.98995841e-01f },
+ {2.52432244e+03f, -1.06185848e-03f, 3.11067539e+00f},
+ {3.37763626e+03f, -4.34581697e-04f, 1.64843306e+00f},
+ {4.10671449e+03f, -8.61949938e-05f, 6.41423749e-01f},
+ {4.66849800e+03f, 2.85655028e-05f, 1.29075375e-01f},
+ {4.60124770e+03f, 2.89727618e-05f, 1.48001316e-01f},
+ {3.78765709e+03f, 9.36026367e-06f, 3.98995841e-01f},
};
ccl_static_constant float blackbody_table_g[6][3] = {
- { -7.50343014e+02f, 3.15679613e-04f, 4.73464526e-01f },
- { -1.00402363e+03f, 1.29189794e-04f, 9.08181524e-01f },
- { -1.22075471e+03f, 2.56245413e-05f, 1.20753416e+00f },
- { -1.42546105e+03f, -4.01730887e-05f, 1.44002695e+00f },
- { -1.18134453e+03f, -2.18913373e-05f, 1.30656109e+00f },
- { -5.00279505e+02f, -4.59745390e-06f, 1.09090465e+00f },
+ {-7.50343014e+02f, 3.15679613e-04f, 4.73464526e-01f},
+ {-1.00402363e+03f, 1.29189794e-04f, 9.08181524e-01f},
+ {-1.22075471e+03f, 2.56245413e-05f, 1.20753416e+00f},
+ {-1.42546105e+03f, -4.01730887e-05f, 1.44002695e+00f},
+ {-1.18134453e+03f, -2.18913373e-05f, 1.30656109e+00f},
+ {-5.00279505e+02f, -4.59745390e-06f, 1.09090465e+00f},
};
ccl_static_constant float blackbody_table_b[6][4] = {
- { 0.0f, 0.0f, 0.0f, 0.0f }, /* zeros should be optimized by compiler */
- { 0.0f, 0.0f, 0.0f, 0.0f },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- { -2.02524603e-11f, 1.79435860e-07f, -2.60561875e-04f, -1.41761141e-02f },
- { -2.22463426e-13f, -1.55078698e-08f, 3.81675160e-04f, -7.30646033e-01f },
- { 6.72595954e-13f, -2.73059993e-08f, 4.24068546e-04f, -7.52204323e-01f },
+ {0.0f, 0.0f, 0.0f, 0.0f}, /* zeros should be optimized by compiler */
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ {-2.02524603e-11f, 1.79435860e-07f, -2.60561875e-04f, -1.41761141e-02f},
+ {-2.22463426e-13f, -1.55078698e-08f, 3.81675160e-04f, -7.30646033e-01f},
+ {6.72595954e-13f, -2.73059993e-08f, 4.24068546e-04f, -7.52204323e-01f},
};
-
ccl_device float3 svm_math_blackbody_color(float t)
{
- /* TODO(lukas): Reimplement in XYZ. */
-
- if(t >= 12000.0f) {
- return make_float3(0.826270103f, 0.994478524f, 1.56626022f);
- }
- else if(t < 965.0f) {
- /* For 800 <= t < 965 color does not change in OSL implementation, so keep color the same */
- return make_float3(4.70366907f, 0.0f, 0.0f);
- }
-
- int i = (t >= 6365.0f)? 5:
- (t >= 3315.0f)? 4:
- (t >= 1902.0f)? 3:
- (t >= 1449.0f)? 2:
- (t >= 1167.0f)? 1: 0;
-
- ccl_constant float *r = blackbody_table_r[i];
- ccl_constant float *g = blackbody_table_g[i];
- ccl_constant float *b = blackbody_table_b[i];
-
- const float t_inv = 1.0f / t;
- return make_float3(r[0] * t_inv + r[1] * t + r[2],
- g[0] * t_inv + g[1] * t + g[2],
- ((b[0] * t + b[1]) * t + b[2]) * t + b[3]);
+ /* TODO(lukas): Reimplement in XYZ. */
+
+ if (t >= 12000.0f) {
+ return make_float3(0.826270103f, 0.994478524f, 1.56626022f);
+ }
+ else if (t < 965.0f) {
+ /* For 800 <= t < 965 color does not change in OSL implementation, so keep color the same */
+ return make_float3(4.70366907f, 0.0f, 0.0f);
+ }
+
+ int i = (t >= 6365.0f) ?
+ 5 :
+ (t >= 3315.0f) ? 4 :
+ (t >= 1902.0f) ? 3 : (t >= 1449.0f) ? 2 : (t >= 1167.0f) ? 1 : 0;
+
+ ccl_constant float *r = blackbody_table_r[i];
+ ccl_constant float *g = blackbody_table_g[i];
+ ccl_constant float *b = blackbody_table_b[i];
+
+ const float t_inv = 1.0f / t;
+ return make_float3(r[0] * t_inv + r[1] * t + r[2],
+ g[0] * t_inv + g[1] * t + g[2],
+ ((b[0] * t + b[1]) * t + b[2]) * t + b[3]);
}
ccl_device_inline float3 svm_math_gamma_color(float3 color, float gamma)
{
- if(gamma == 0.0f)
- return make_float3(1.0f, 1.0f, 1.0f);
+ if (gamma == 0.0f)
+ return make_float3(1.0f, 1.0f, 1.0f);
- if(color.x > 0.0f)
- color.x = powf(color.x, gamma);
- if(color.y > 0.0f)
- color.y = powf(color.y, gamma);
- if(color.z > 0.0f)
- color.z = powf(color.z, gamma);
+ if (color.x > 0.0f)
+ color.x = powf(color.x, gamma);
+ if (color.y > 0.0f)
+ color.y = powf(color.y, gamma);
+ if (color.z > 0.0f)
+ color.z = powf(color.z, gamma);
- return color;
+ return color;
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_mix.h b/intern/cycles/kernel/svm/svm_mix.h
index 903a4dacebf..15114bfd5e4 100644
--- a/intern/cycles/kernel/svm/svm_mix.h
+++ b/intern/cycles/kernel/svm/svm_mix.h
@@ -18,17 +18,23 @@ CCL_NAMESPACE_BEGIN
/* Node */
-ccl_device void svm_node_mix(KernelGlobals *kg, ShaderData *sd, float *stack, uint fac_offset, uint c1_offset, uint c2_offset, int *offset)
+ccl_device void svm_node_mix(KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint fac_offset,
+ uint c1_offset,
+ uint c2_offset,
+ int *offset)
{
- /* read extra data */
- uint4 node1 = read_node(kg, offset);
+ /* read extra data */
+ uint4 node1 = read_node(kg, offset);
- float fac = stack_load_float(stack, fac_offset);
- float3 c1 = stack_load_float3(stack, c1_offset);
- float3 c2 = stack_load_float3(stack, c2_offset);
- float3 result = svm_mix((NodeMix)node1.y, fac, c1, c2);
+ float fac = stack_load_float(stack, fac_offset);
+ float3 c1 = stack_load_float3(stack, c1_offset);
+ float3 c2 = stack_load_float3(stack, c2_offset);
+ float3 result = svm_mix((NodeMix)node1.y, fac, c1, c2);
- stack_store_float3(stack, node1.z, result);
+ stack_store_float3(stack, node1.z, result);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_musgrave.h b/intern/cycles/kernel/svm/svm_musgrave.h
index 5d9e12628ca..67fb5ca6241 100644
--- a/intern/cycles/kernel/svm/svm_musgrave.h
+++ b/intern/cycles/kernel/svm/svm_musgrave.h
@@ -27,23 +27,23 @@ CCL_NAMESPACE_BEGIN
ccl_device_noinline float noise_musgrave_fBm(float3 p, float H, float lacunarity, float octaves)
{
- float rmd;
- float value = 0.0f;
- float pwr = 1.0f;
- float pwHL = powf(lacunarity, -H);
- int i;
-
- for(i = 0; i < float_to_int(octaves); i++) {
- value += snoise(p) * pwr;
- pwr *= pwHL;
- p *= lacunarity;
- }
-
- rmd = octaves - floorf(octaves);
- if(rmd != 0.0f)
- value += rmd * snoise(p) * pwr;
-
- return value;
+ float rmd;
+ float value = 0.0f;
+ float pwr = 1.0f;
+ float pwHL = powf(lacunarity, -H);
+ int i;
+
+ for (i = 0; i < float_to_int(octaves); i++) {
+ value += snoise(p) * pwr;
+ pwr *= pwHL;
+ p *= lacunarity;
+ }
+
+ rmd = octaves - floorf(octaves);
+ if (rmd != 0.0f)
+ value += rmd * snoise(p) * pwr;
+
+ return value;
}
/* Musgrave Multifractal
@@ -53,25 +53,28 @@ ccl_device_noinline float noise_musgrave_fBm(float3 p, float H, float lacunarity
* octaves: number of frequencies in the fBm
*/
-ccl_device_noinline float noise_musgrave_multi_fractal(float3 p, float H, float lacunarity, float octaves)
+ccl_device_noinline float noise_musgrave_multi_fractal(float3 p,
+ float H,
+ float lacunarity,
+ float octaves)
{
- float rmd;
- float value = 1.0f;
- float pwr = 1.0f;
- float pwHL = powf(lacunarity, -H);
- int i;
-
- for(i = 0; i < float_to_int(octaves); i++) {
- value *= (pwr * snoise(p) + 1.0f);
- pwr *= pwHL;
- p *= lacunarity;
- }
-
- rmd = octaves - floorf(octaves);
- if(rmd != 0.0f)
- value *= (rmd * pwr * snoise(p) + 1.0f); /* correct? */
-
- return value;
+ float rmd;
+ float value = 1.0f;
+ float pwr = 1.0f;
+ float pwHL = powf(lacunarity, -H);
+ int i;
+
+ for (i = 0; i < float_to_int(octaves); i++) {
+ value *= (pwr * snoise(p) + 1.0f);
+ pwr *= pwHL;
+ p *= lacunarity;
+ }
+
+ rmd = octaves - floorf(octaves);
+ if (rmd != 0.0f)
+ value *= (rmd * pwr * snoise(p) + 1.0f); /* correct? */
+
+ return value;
}
/* Musgrave Heterogeneous Terrain
@@ -82,31 +85,32 @@ ccl_device_noinline float noise_musgrave_multi_fractal(float3 p, float H, float
* offset: raises the terrain from `sea level'
*/
-ccl_device_noinline float noise_musgrave_hetero_terrain(float3 p, float H, float lacunarity, float octaves, float offset)
+ccl_device_noinline float noise_musgrave_hetero_terrain(
+ float3 p, float H, float lacunarity, float octaves, float offset)
{
- float value, increment, rmd;
- float pwHL = powf(lacunarity, -H);
- float pwr = pwHL;
- int i;
-
- /* first unscaled octave of function; later octaves are scaled */
- value = offset + snoise(p);
- p *= lacunarity;
-
- for(i = 1; i < float_to_int(octaves); i++) {
- increment = (snoise(p) + offset) * pwr * value;
- value += increment;
- pwr *= pwHL;
- p *= lacunarity;
- }
-
- rmd = octaves - floorf(octaves);
- if(rmd != 0.0f) {
- increment = (snoise(p) + offset) * pwr * value;
- value += rmd * increment;
- }
-
- return value;
+ float value, increment, rmd;
+ float pwHL = powf(lacunarity, -H);
+ float pwr = pwHL;
+ int i;
+
+ /* first unscaled octave of function; later octaves are scaled */
+ value = offset + snoise(p);
+ p *= lacunarity;
+
+ for (i = 1; i < float_to_int(octaves); i++) {
+ increment = (snoise(p) + offset) * pwr * value;
+ value += increment;
+ pwr *= pwHL;
+ p *= lacunarity;
+ }
+
+ rmd = octaves - floorf(octaves);
+ if (rmd != 0.0f) {
+ increment = (snoise(p) + offset) * pwr * value;
+ value += rmd * increment;
+ }
+
+ return value;
}
/* Hybrid Additive/Multiplicative Multifractal Terrain
@@ -117,33 +121,34 @@ ccl_device_noinline float noise_musgrave_hetero_terrain(float3 p, float H, float
* offset: raises the terrain from `sea level'
*/
-ccl_device_noinline float noise_musgrave_hybrid_multi_fractal(float3 p, float H, float lacunarity, float octaves, float offset, float gain)
+ccl_device_noinline float noise_musgrave_hybrid_multi_fractal(
+ float3 p, float H, float lacunarity, float octaves, float offset, float gain)
{
- float result, signal, weight, rmd;
- float pwHL = powf(lacunarity, -H);
- float pwr = pwHL;
- int i;
-
- result = snoise(p) + offset;
- weight = gain * result;
- p *= lacunarity;
-
- for(i = 1; (weight > 0.001f) && (i < float_to_int(octaves)); i++) {
- if(weight > 1.0f)
- weight = 1.0f;
-
- signal = (snoise(p) + offset) * pwr;
- pwr *= pwHL;
- result += weight * signal;
- weight *= gain * signal;
- p *= lacunarity;
- }
-
- rmd = octaves - floorf(octaves);
- if(rmd != 0.0f)
- result += rmd * ((snoise(p) + offset) * pwr);
-
- return result;
+ float result, signal, weight, rmd;
+ float pwHL = powf(lacunarity, -H);
+ float pwr = pwHL;
+ int i;
+
+ result = snoise(p) + offset;
+ weight = gain * result;
+ p *= lacunarity;
+
+ for (i = 1; (weight > 0.001f) && (i < float_to_int(octaves)); i++) {
+ if (weight > 1.0f)
+ weight = 1.0f;
+
+ signal = (snoise(p) + offset) * pwr;
+ pwr *= pwHL;
+ result += weight * signal;
+ weight *= gain * signal;
+ p *= lacunarity;
+ }
+
+ rmd = octaves - floorf(octaves);
+ if (rmd != 0.0f)
+ result += rmd * ((snoise(p) + offset) * pwr);
+
+ return result;
}
/* Ridged Multifractal Terrain
@@ -154,81 +159,93 @@ ccl_device_noinline float noise_musgrave_hybrid_multi_fractal(float3 p, float H,
* offset: raises the terrain from `sea level'
*/
-ccl_device_noinline float noise_musgrave_ridged_multi_fractal(float3 p, float H, float lacunarity, float octaves, float offset, float gain)
+ccl_device_noinline float noise_musgrave_ridged_multi_fractal(
+ float3 p, float H, float lacunarity, float octaves, float offset, float gain)
{
- float result, signal, weight;
- float pwHL = powf(lacunarity, -H);
- float pwr = pwHL;
- int i;
-
- signal = offset - fabsf(snoise(p));
- signal *= signal;
- result = signal;
- weight = 1.0f;
-
- for(i = 1; i < float_to_int(octaves); i++) {
- p *= lacunarity;
- weight = saturate(signal * gain);
- signal = offset - fabsf(snoise(p));
- signal *= signal;
- signal *= weight;
- result += signal * pwr;
- pwr *= pwHL;
- }
-
- return result;
+ float result, signal, weight;
+ float pwHL = powf(lacunarity, -H);
+ float pwr = pwHL;
+ int i;
+
+ signal = offset - fabsf(snoise(p));
+ signal *= signal;
+ result = signal;
+ weight = 1.0f;
+
+ for (i = 1; i < float_to_int(octaves); i++) {
+ p *= lacunarity;
+ weight = saturate(signal * gain);
+ signal = offset - fabsf(snoise(p));
+ signal *= signal;
+ signal *= weight;
+ result += signal * pwr;
+ pwr *= pwHL;
+ }
+
+ return result;
}
/* Shader */
-ccl_device float svm_musgrave(NodeMusgraveType type, float dimension, float lacunarity, float octaves, float offset, float intensity, float gain, float3 p)
+ccl_device float svm_musgrave(NodeMusgraveType type,
+ float dimension,
+ float lacunarity,
+ float octaves,
+ float offset,
+ float intensity,
+ float gain,
+ float3 p)
{
- if(type == NODE_MUSGRAVE_MULTIFRACTAL)
- return intensity*noise_musgrave_multi_fractal(p, dimension, lacunarity, octaves);
- else if(type == NODE_MUSGRAVE_FBM)
- return intensity*noise_musgrave_fBm(p, dimension, lacunarity, octaves);
- else if(type == NODE_MUSGRAVE_HYBRID_MULTIFRACTAL)
- return intensity*noise_musgrave_hybrid_multi_fractal(p, dimension, lacunarity, octaves, offset, gain);
- else if(type == NODE_MUSGRAVE_RIDGED_MULTIFRACTAL)
- return intensity*noise_musgrave_ridged_multi_fractal(p, dimension, lacunarity, octaves, offset, gain);
- else if(type == NODE_MUSGRAVE_HETERO_TERRAIN)
- return intensity*noise_musgrave_hetero_terrain(p, dimension, lacunarity, octaves, offset);
-
- return 0.0f;
+ if (type == NODE_MUSGRAVE_MULTIFRACTAL)
+ return intensity * noise_musgrave_multi_fractal(p, dimension, lacunarity, octaves);
+ else if (type == NODE_MUSGRAVE_FBM)
+ return intensity * noise_musgrave_fBm(p, dimension, lacunarity, octaves);
+ else if (type == NODE_MUSGRAVE_HYBRID_MULTIFRACTAL)
+ return intensity *
+ noise_musgrave_hybrid_multi_fractal(p, dimension, lacunarity, octaves, offset, gain);
+ else if (type == NODE_MUSGRAVE_RIDGED_MULTIFRACTAL)
+ return intensity *
+ noise_musgrave_ridged_multi_fractal(p, dimension, lacunarity, octaves, offset, gain);
+ else if (type == NODE_MUSGRAVE_HETERO_TERRAIN)
+ return intensity * noise_musgrave_hetero_terrain(p, dimension, lacunarity, octaves, offset);
+
+ return 0.0f;
}
-ccl_device void svm_node_tex_musgrave(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
+ccl_device void svm_node_tex_musgrave(
+ KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
{
- uint4 node2 = read_node(kg, offset);
- uint4 node3 = read_node(kg, offset);
-
- uint type, co_offset, color_offset, fac_offset;
- uint dimension_offset, lacunarity_offset, detail_offset, offset_offset;
- uint gain_offset, scale_offset;
-
- decode_node_uchar4(node.y, &type, &co_offset, &color_offset, &fac_offset);
- decode_node_uchar4(node.z, &dimension_offset, &lacunarity_offset, &detail_offset, &offset_offset);
- decode_node_uchar4(node.w, &gain_offset, &scale_offset, NULL, NULL);
-
- float3 co = stack_load_float3(stack, co_offset);
- float dimension = stack_load_float_default(stack, dimension_offset, node2.x);
- float lacunarity = stack_load_float_default(stack, lacunarity_offset, node2.y);
- float detail = stack_load_float_default(stack, detail_offset, node2.z);
- float foffset = stack_load_float_default(stack, offset_offset, node2.w);
- float gain = stack_load_float_default(stack, gain_offset, node3.x);
- float scale = stack_load_float_default(stack, scale_offset, node3.y);
-
- dimension = fmaxf(dimension, 1e-5f);
- detail = clamp(detail, 0.0f, 16.0f);
- lacunarity = fmaxf(lacunarity, 1e-5f);
-
- float f = svm_musgrave((NodeMusgraveType)type,
- dimension, lacunarity, detail, foffset, 1.0f, gain, co*scale);
-
- if(stack_valid(fac_offset))
- stack_store_float(stack, fac_offset, f);
- if(stack_valid(color_offset))
- stack_store_float3(stack, color_offset, make_float3(f, f, f));
+ uint4 node2 = read_node(kg, offset);
+ uint4 node3 = read_node(kg, offset);
+
+ uint type, co_offset, color_offset, fac_offset;
+ uint dimension_offset, lacunarity_offset, detail_offset, offset_offset;
+ uint gain_offset, scale_offset;
+
+ decode_node_uchar4(node.y, &type, &co_offset, &color_offset, &fac_offset);
+ decode_node_uchar4(
+ node.z, &dimension_offset, &lacunarity_offset, &detail_offset, &offset_offset);
+ decode_node_uchar4(node.w, &gain_offset, &scale_offset, NULL, NULL);
+
+ float3 co = stack_load_float3(stack, co_offset);
+ float dimension = stack_load_float_default(stack, dimension_offset, node2.x);
+ float lacunarity = stack_load_float_default(stack, lacunarity_offset, node2.y);
+ float detail = stack_load_float_default(stack, detail_offset, node2.z);
+ float foffset = stack_load_float_default(stack, offset_offset, node2.w);
+ float gain = stack_load_float_default(stack, gain_offset, node3.x);
+ float scale = stack_load_float_default(stack, scale_offset, node3.y);
+
+ dimension = fmaxf(dimension, 1e-5f);
+ detail = clamp(detail, 0.0f, 16.0f);
+ lacunarity = fmaxf(lacunarity, 1e-5f);
+
+ float f = svm_musgrave(
+ (NodeMusgraveType)type, dimension, lacunarity, detail, foffset, 1.0f, gain, co * scale);
+
+ if (stack_valid(fac_offset))
+ stack_store_float(stack, fac_offset, f);
+ if (stack_valid(color_offset))
+ stack_store_float3(stack, color_offset, make_float3(f, f, f));
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_noise.h b/intern/cycles/kernel/svm/svm_noise.h
index 8c425ecf326..322579ccfe3 100644
--- a/intern/cycles/kernel/svm/svm_noise.h
+++ b/intern/cycles/kernel/svm/svm_noise.h
@@ -33,280 +33,302 @@
CCL_NAMESPACE_BEGIN
#ifdef __KERNEL_SSE2__
-ccl_device_inline ssei quick_floor_sse(const ssef& x)
+ccl_device_inline ssei quick_floor_sse(const ssef &x)
{
- ssei b = truncatei(x);
- ssei isneg = cast((x < ssef(0.0f)).m128);
- return b + isneg; // unsaturated add 0xffffffff is the same as subtract -1
+ ssei b = truncatei(x);
+ ssei isneg = cast((x < ssef(0.0f)).m128);
+ return b + isneg; // unsaturated add 0xffffffff is the same as subtract -1
}
#endif
ccl_device uint hash(uint kx, uint ky, uint kz)
{
- // define some handy macros
-#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
-#define final(a,b,c) \
-{ \
- c ^= b; c -= rot(b,14); \
- a ^= c; a -= rot(c,11); \
- b ^= a; b -= rot(a,25); \
- c ^= b; c -= rot(b,16); \
- a ^= c; a -= rot(c,4); \
- b ^= a; b -= rot(a,14); \
- c ^= b; c -= rot(b,24); \
-}
- // now hash the data!
- uint a, b, c, len = 3;
- a = b = c = 0xdeadbeef + (len << 2) + 13;
-
- c += kz;
- b += ky;
- a += kx;
- final(a, b, c);
-
- return c;
- // macros not needed anymore
+ // define some handy macros
+#define rot(x, k) (((x) << (k)) | ((x) >> (32 - (k))))
+#define final(a, b, c) \
+ { \
+ c ^= b; \
+ c -= rot(b, 14); \
+ a ^= c; \
+ a -= rot(c, 11); \
+ b ^= a; \
+ b -= rot(a, 25); \
+ c ^= b; \
+ c -= rot(b, 16); \
+ a ^= c; \
+ a -= rot(c, 4); \
+ b ^= a; \
+ b -= rot(a, 14); \
+ c ^= b; \
+ c -= rot(b, 24); \
+ }
+ // now hash the data!
+ uint a, b, c, len = 3;
+ a = b = c = 0xdeadbeef + (len << 2) + 13;
+
+ c += kz;
+ b += ky;
+ a += kx;
+ final(a, b, c);
+
+ return c;
+ // macros not needed anymore
#undef rot
#undef final
}
#ifdef __KERNEL_SSE2__
-ccl_device_inline ssei hash_sse(const ssei& kx, const ssei& ky, const ssei& kz)
+ccl_device_inline ssei hash_sse(const ssei &kx, const ssei &ky, const ssei &kz)
{
-# define rot(x,k) (((x)<<(k)) | (srl(x, 32-(k))))
-# define xor_rot(a, b, c) do {a = a^b; a = a - rot(b, c);} while(0)
-
- uint len = 3;
- ssei magic = ssei(0xdeadbeef + (len << 2) + 13);
- ssei a = magic + kx;
- ssei b = magic + ky;
- ssei c = magic + kz;
-
- xor_rot(c, b, 14);
- xor_rot(a, c, 11);
- xor_rot(b, a, 25);
- xor_rot(c, b, 16);
- xor_rot(a, c, 4);
- xor_rot(b, a, 14);
- xor_rot(c, b, 24);
-
- return c;
+# define rot(x, k) (((x) << (k)) | (srl(x, 32 - (k))))
+# define xor_rot(a, b, c) \
+ do { \
+ a = a ^ b; \
+ a = a - rot(b, c); \
+ } while (0)
+
+ uint len = 3;
+ ssei magic = ssei(0xdeadbeef + (len << 2) + 13);
+ ssei a = magic + kx;
+ ssei b = magic + ky;
+ ssei c = magic + kz;
+
+ xor_rot(c, b, 14);
+ xor_rot(a, c, 11);
+ xor_rot(b, a, 25);
+ xor_rot(c, b, 16);
+ xor_rot(a, c, 4);
+ xor_rot(b, a, 14);
+ xor_rot(c, b, 24);
+
+ return c;
# undef rot
# undef xor_rot
}
#endif
-#if 0 // unused
+#if 0 // unused
ccl_device int imod(int a, int b)
{
- a %= b;
- return a < 0 ? a + b : a;
+ a %= b;
+ return a < 0 ? a + b : a;
}
ccl_device uint phash(int kx, int ky, int kz, int3 p)
{
- return hash(imod(kx, p.x), imod(ky, p.y), imod(kz, p.z));
+ return hash(imod(kx, p.x), imod(ky, p.y), imod(kz, p.z));
}
#endif
#ifndef __KERNEL_SSE2__
-ccl_device float floorfrac(float x, int* i)
+ccl_device float floorfrac(float x, int *i)
{
- *i = quick_floor_to_int(x);
- return x - *i;
+ *i = quick_floor_to_int(x);
+ return x - *i;
}
#else
-ccl_device_inline ssef floorfrac_sse(const ssef& x, ssei *i)
+ccl_device_inline ssef floorfrac_sse(const ssef &x, ssei *i)
{
- *i = quick_floor_sse(x);
- return x - ssef(*i);
+ *i = quick_floor_sse(x);
+ return x - ssef(*i);
}
#endif
#ifndef __KERNEL_SSE2__
ccl_device float fade(float t)
{
- return t * t * t * (t * (t * 6.0f - 15.0f) + 10.0f);
+ return t * t * t * (t * (t * 6.0f - 15.0f) + 10.0f);
}
#else
ccl_device_inline ssef fade_sse(const ssef *t)
{
- ssef a = madd(*t, ssef(6.0f), ssef(-15.0f));
- ssef b = madd(*t, a, ssef(10.0f));
- return ((*t) * (*t)) * ((*t) * b);
+ ssef a = madd(*t, ssef(6.0f), ssef(-15.0f));
+ ssef b = madd(*t, a, ssef(10.0f));
+ return ((*t) * (*t)) * ((*t) * b);
}
#endif
#ifndef __KERNEL_SSE2__
ccl_device float nerp(float t, float a, float b)
{
- return (1.0f - t) * a + t * b;
+ return (1.0f - t) * a + t * b;
}
#else
-ccl_device_inline ssef nerp_sse(const ssef& t, const ssef& a, const ssef& b)
+ccl_device_inline ssef nerp_sse(const ssef &t, const ssef &a, const ssef &b)
{
- ssef x1 = (ssef(1.0f) - t) * a;
- return madd(t, b, x1);
+ ssef x1 = (ssef(1.0f) - t) * a;
+ return madd(t, b, x1);
}
#endif
#ifndef __KERNEL_SSE2__
ccl_device float grad(int hash, float x, float y, float z)
{
- // use vectors pointing to the edges of the cube
- int h = hash & 15;
- float u = h<8 ? x : y;
- float vt = ((h == 12) | (h == 14)) ? x : z;
- float v = h < 4 ? y : vt;
- return ((h&1) ? -u : u) + ((h&2) ? -v : v);
+ // use vectors pointing to the edges of the cube
+ int h = hash & 15;
+ float u = h < 8 ? x : y;
+ float vt = ((h == 12) | (h == 14)) ? x : z;
+ float v = h < 4 ? y : vt;
+ return ((h & 1) ? -u : u) + ((h & 2) ? -v : v);
}
#else
-ccl_device_inline ssef grad_sse(const ssei& hash, const ssef& x, const ssef& y, const ssef& z)
+ccl_device_inline ssef grad_sse(const ssei &hash, const ssef &x, const ssef &y, const ssef &z)
{
- ssei c1 = ssei(1);
- ssei c2 = ssei(2);
+ ssei c1 = ssei(1);
+ ssei c2 = ssei(2);
- ssei h = hash & ssei(15); // h = hash & 15
+ ssei h = hash & ssei(15); // h = hash & 15
- sseb case_ux = h < ssei(8); // 0xffffffff if h < 8 else 0
+ sseb case_ux = h < ssei(8); // 0xffffffff if h < 8 else 0
- ssef u = select(case_ux, x, y); // u = h<8 ? x : y
+ ssef u = select(case_ux, x, y); // u = h<8 ? x : y
- sseb case_vy = h < ssei(4); // 0xffffffff if h < 4 else 0
+ sseb case_vy = h < ssei(4); // 0xffffffff if h < 4 else 0
- sseb case_h12 = h == ssei(12); // 0xffffffff if h == 12 else 0
- sseb case_h14 = h == ssei(14); // 0xffffffff if h == 14 else 0
+ sseb case_h12 = h == ssei(12); // 0xffffffff if h == 12 else 0
+ sseb case_h14 = h == ssei(14); // 0xffffffff if h == 14 else 0
- sseb case_vx = case_h12 | case_h14; // 0xffffffff if h == 12 or h == 14 else 0
+ sseb case_vx = case_h12 | case_h14; // 0xffffffff if h == 12 or h == 14 else 0
- ssef v = select(case_vy, y, select(case_vx, x, z)); // v = h<4 ? y : h == 12 || h == 14 ? x : z
+ ssef v = select(case_vy, y, select(case_vx, x, z)); // v = h<4 ? y : h == 12 || h == 14 ? x : z
- ssei case_uneg = (h & c1) << 31; // 1<<31 if h&1 else 0
- ssef case_uneg_mask = cast(case_uneg); // -0.0 if h&1 else +0.0
- ssef ru = u ^ case_uneg_mask; // -u if h&1 else u (copy float sign)
+ ssei case_uneg = (h & c1) << 31; // 1<<31 if h&1 else 0
+ ssef case_uneg_mask = cast(case_uneg); // -0.0 if h&1 else +0.0
+ ssef ru = u ^ case_uneg_mask; // -u if h&1 else u (copy float sign)
- ssei case_vneg = (h & c2) << 30; // 2<<30 if h&2 else 0
- ssef case_vneg_mask = cast(case_vneg); // -0.0 if h&2 else +0.0
- ssef rv = v ^ case_vneg_mask; // -v if h&2 else v (copy float sign)
+ ssei case_vneg = (h & c2) << 30; // 2<<30 if h&2 else 0
+ ssef case_vneg_mask = cast(case_vneg); // -0.0 if h&2 else +0.0
+ ssef rv = v ^ case_vneg_mask; // -v if h&2 else v (copy float sign)
- ssef r = ru + rv; // ((h&1) ? -u : u) + ((h&2) ? -v : v)
- return r;
+ ssef r = ru + rv; // ((h&1) ? -u : u) + ((h&2) ? -v : v)
+ return r;
}
#endif
#ifndef __KERNEL_SSE2__
ccl_device float scale3(float result)
{
- return 0.9820f * result;
+ return 0.9820f * result;
}
#else
-ccl_device_inline ssef scale3_sse(const ssef& result)
+ccl_device_inline ssef scale3_sse(const ssef &result)
{
- return ssef(0.9820f) * result;
+ return ssef(0.9820f) * result;
}
#endif
#ifndef __KERNEL_SSE2__
ccl_device_noinline float perlin(float x, float y, float z)
{
- int X; float fx = floorfrac(x, &X);
- int Y; float fy = floorfrac(y, &Y);
- int Z; float fz = floorfrac(z, &Z);
-
- float u = fade(fx);
- float v = fade(fy);
- float w = fade(fz);
-
- float result;
-
- result = nerp (w, nerp (v, nerp (u, grad (hash (X , Y , Z ), fx , fy , fz ),
- grad (hash (X+1, Y , Z ), fx-1.0f, fy , fz )),
- nerp (u, grad (hash (X , Y+1, Z ), fx , fy-1.0f, fz ),
- grad (hash (X+1, Y+1, Z ), fx-1.0f, fy-1.0f, fz ))),
- nerp (v, nerp (u, grad (hash (X , Y , Z+1), fx , fy , fz-1.0f ),
- grad (hash (X+1, Y , Z+1), fx-1.0f, fy , fz-1.0f )),
- nerp (u, grad (hash (X , Y+1, Z+1), fx , fy-1.0f, fz-1.0f ),
- grad (hash (X+1, Y+1, Z+1), fx-1.0f, fy-1.0f, fz-1.0f ))));
- float r = scale3(result);
-
- /* can happen for big coordinates, things even out to 0.0 then anyway */
- return (isfinite(r))? r: 0.0f;
+ int X;
+ float fx = floorfrac(x, &X);
+ int Y;
+ float fy = floorfrac(y, &Y);
+ int Z;
+ float fz = floorfrac(z, &Z);
+
+ float u = fade(fx);
+ float v = fade(fy);
+ float w = fade(fz);
+
+ float result;
+
+ result = nerp(
+ w,
+ nerp(v,
+ nerp(u, grad(hash(X, Y, Z), fx, fy, fz), grad(hash(X + 1, Y, Z), fx - 1.0f, fy, fz)),
+ nerp(u,
+ grad(hash(X, Y + 1, Z), fx, fy - 1.0f, fz),
+ grad(hash(X + 1, Y + 1, Z), fx - 1.0f, fy - 1.0f, fz))),
+ nerp(v,
+ nerp(u,
+ grad(hash(X, Y, Z + 1), fx, fy, fz - 1.0f),
+ grad(hash(X + 1, Y, Z + 1), fx - 1.0f, fy, fz - 1.0f)),
+ nerp(u,
+ grad(hash(X, Y + 1, Z + 1), fx, fy - 1.0f, fz - 1.0f),
+ grad(hash(X + 1, Y + 1, Z + 1), fx - 1.0f, fy - 1.0f, fz - 1.0f))));
+ float r = scale3(result);
+
+ /* can happen for big coordinates, things even out to 0.0 then anyway */
+ return (isfinite(r)) ? r : 0.0f;
}
#else
ccl_device_noinline float perlin(float x, float y, float z)
{
- ssef xyz = ssef(x, y, z, 0.0f);
- ssei XYZ;
+ ssef xyz = ssef(x, y, z, 0.0f);
+ ssei XYZ;
- ssef fxyz = floorfrac_sse(xyz, &XYZ);
+ ssef fxyz = floorfrac_sse(xyz, &XYZ);
- ssef uvw = fade_sse(&fxyz);
- ssef u = shuffle<0>(uvw), v = shuffle<1>(uvw), w = shuffle<2>(uvw);
+ ssef uvw = fade_sse(&fxyz);
+ ssef u = shuffle<0>(uvw), v = shuffle<1>(uvw), w = shuffle<2>(uvw);
- ssei XYZ_ofc = XYZ + ssei(1);
- ssei vdy = shuffle<1, 1, 1, 1>(XYZ, XYZ_ofc); // +0, +0, +1, +1
- ssei vdz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZ, XYZ_ofc)); // +0, +1, +0, +1
+ ssei XYZ_ofc = XYZ + ssei(1);
+ ssei vdy = shuffle<1, 1, 1, 1>(XYZ, XYZ_ofc); // +0, +0, +1, +1
+ ssei vdz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZ, XYZ_ofc)); // +0, +1, +0, +1
- ssei h1 = hash_sse(shuffle<0>(XYZ), vdy, vdz); // hash directions 000, 001, 010, 011
- ssei h2 = hash_sse(shuffle<0>(XYZ_ofc), vdy, vdz); // hash directions 100, 101, 110, 111
+ ssei h1 = hash_sse(shuffle<0>(XYZ), vdy, vdz); // hash directions 000, 001, 010, 011
+ ssei h2 = hash_sse(shuffle<0>(XYZ_ofc), vdy, vdz); // hash directions 100, 101, 110, 111
- ssef fxyz_ofc = fxyz - ssef(1.0f);
- ssef vfy = shuffle<1, 1, 1, 1>(fxyz, fxyz_ofc);
- ssef vfz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyz, fxyz_ofc));
+ ssef fxyz_ofc = fxyz - ssef(1.0f);
+ ssef vfy = shuffle<1, 1, 1, 1>(fxyz, fxyz_ofc);
+ ssef vfz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyz, fxyz_ofc));
- ssef g1 = grad_sse(h1, shuffle<0>(fxyz), vfy, vfz);
- ssef g2 = grad_sse(h2, shuffle<0>(fxyz_ofc), vfy, vfz);
- ssef n1 = nerp_sse(u, g1, g2);
+ ssef g1 = grad_sse(h1, shuffle<0>(fxyz), vfy, vfz);
+ ssef g2 = grad_sse(h2, shuffle<0>(fxyz_ofc), vfy, vfz);
+ ssef n1 = nerp_sse(u, g1, g2);
- ssef n1_half = shuffle<2, 3, 2, 3>(n1); // extract 2 floats to a separate vector
- ssef n2 = nerp_sse(v, n1, n1_half); // process nerp([a, b, _, _], [c, d, _, _]) -> [a', b', _, _]
+ ssef n1_half = shuffle<2, 3, 2, 3>(n1); // extract 2 floats to a separate vector
+ ssef n2 = nerp_sse(
+ v, n1, n1_half); // process nerp([a, b, _, _], [c, d, _, _]) -> [a', b', _, _]
- ssef n2_second = shuffle<1>(n2); // extract b to a separate vector
- ssef result = nerp_sse(w, n2, n2_second); // process nerp([a', _, _, _], [b', _, _, _]) -> [a'', _, _, _]
+ ssef n2_second = shuffle<1>(n2); // extract b to a separate vector
+ ssef result = nerp_sse(
+ w, n2, n2_second); // process nerp([a', _, _, _], [b', _, _, _]) -> [a'', _, _, _]
- ssef r = scale3_sse(result);
+ ssef r = scale3_sse(result);
- ssef infmask = cast(ssei(0x7f800000));
- ssef rinfmask = ((r & infmask) == infmask).m128; // 0xffffffff if r is inf/-inf/nan else 0
- ssef rfinite = andnot(rinfmask, r); // 0 if r is inf/-inf/nan else r
- return extract<0>(rfinite);
+ ssef infmask = cast(ssei(0x7f800000));
+ ssef rinfmask = ((r & infmask) == infmask).m128; // 0xffffffff if r is inf/-inf/nan else 0
+ ssef rfinite = andnot(rinfmask, r); // 0 if r is inf/-inf/nan else r
+ return extract<0>(rfinite);
}
#endif
/* perlin noise in range 0..1 */
ccl_device float noise(float3 p)
{
- float r = perlin(p.x, p.y, p.z);
- return 0.5f*r + 0.5f;
+ float r = perlin(p.x, p.y, p.z);
+ return 0.5f * r + 0.5f;
}
/* perlin noise in range -1..1 */
ccl_device float snoise(float3 p)
{
- return perlin(p.x, p.y, p.z);
+ return perlin(p.x, p.y, p.z);
}
/* cell noise */
ccl_device float cellnoise(float3 p)
{
- int3 ip = quick_floor_to_int3(p);
- return bits_to_01(hash(ip.x, ip.y, ip.z));
+ int3 ip = quick_floor_to_int3(p);
+ return bits_to_01(hash(ip.x, ip.y, ip.z));
}
ccl_device float3 cellnoise3(float3 p)
{
- int3 ip = quick_floor_to_int3(p);
+ int3 ip = quick_floor_to_int3(p);
#ifndef __KERNEL_SSE__
- float r = bits_to_01(hash(ip.x, ip.y, ip.z));
- float g = bits_to_01(hash(ip.y, ip.x, ip.z));
- float b = bits_to_01(hash(ip.y, ip.z, ip.x));
- return make_float3(r, g, b);
+ float r = bits_to_01(hash(ip.x, ip.y, ip.z));
+ float g = bits_to_01(hash(ip.y, ip.x, ip.z));
+ float b = bits_to_01(hash(ip.y, ip.z, ip.x));
+ return make_float3(r, g, b);
#else
- ssei ip_yxz = shuffle<1, 0, 2, 3>(ssei(ip.m128));
- ssei ip_xyy = shuffle<0, 1, 1, 3>(ssei(ip.m128));
- ssei ip_zzx = shuffle<2, 2, 0, 3>(ssei(ip.m128));
- ssei bits = hash_sse(ip_xyy, ip_yxz, ip_zzx);
- return float3(uint32_to_float(bits) * ssef(1.0f/(float)0xFFFFFFFF));
+ ssei ip_yxz = shuffle<1, 0, 2, 3>(ssei(ip.m128));
+ ssei ip_xyy = shuffle<0, 1, 1, 3>(ssei(ip.m128));
+ ssei ip_zzx = shuffle<2, 2, 0, 3>(ssei(ip.m128));
+ ssei bits = hash_sse(ip_xyy, ip_yxz, ip_zzx);
+ return float3(uint32_to_float(bits) * ssef(1.0f / (float)0xFFFFFFFF));
#endif
}
diff --git a/intern/cycles/kernel/svm/svm_noisetex.h b/intern/cycles/kernel/svm/svm_noisetex.h
index c02940f96d6..3324e86fcd8 100644
--- a/intern/cycles/kernel/svm/svm_noisetex.h
+++ b/intern/cycles/kernel/svm/svm_noisetex.h
@@ -18,42 +18,43 @@ CCL_NAMESPACE_BEGIN
/* Noise */
-ccl_device void svm_node_tex_noise(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
+ccl_device void svm_node_tex_noise(
+ KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
{
- uint co_offset, scale_offset, detail_offset, distortion_offset, fac_offset, color_offset;
+ uint co_offset, scale_offset, detail_offset, distortion_offset, fac_offset, color_offset;
- decode_node_uchar4(node.y, &co_offset, &scale_offset, &detail_offset, &distortion_offset);
- decode_node_uchar4(node.z, &color_offset, &fac_offset, NULL, NULL);
+ decode_node_uchar4(node.y, &co_offset, &scale_offset, &detail_offset, &distortion_offset);
+ decode_node_uchar4(node.z, &color_offset, &fac_offset, NULL, NULL);
- uint4 node2 = read_node(kg, offset);
+ uint4 node2 = read_node(kg, offset);
- float scale = stack_load_float_default(stack, scale_offset, node2.x);
- float detail = stack_load_float_default(stack, detail_offset, node2.y);
- float distortion = stack_load_float_default(stack, distortion_offset, node2.z);
- float3 p = stack_load_float3(stack, co_offset) * scale;
- int hard = 0;
+ float scale = stack_load_float_default(stack, scale_offset, node2.x);
+ float detail = stack_load_float_default(stack, detail_offset, node2.y);
+ float distortion = stack_load_float_default(stack, distortion_offset, node2.z);
+ float3 p = stack_load_float3(stack, co_offset) * scale;
+ int hard = 0;
- if(distortion != 0.0f) {
- float3 r, offset = make_float3(13.5f, 13.5f, 13.5f);
+ if (distortion != 0.0f) {
+ float3 r, offset = make_float3(13.5f, 13.5f, 13.5f);
- r.x = noise(p + offset) * distortion;
- r.y = noise(p) * distortion;
- r.z = noise(p - offset) * distortion;
+ r.x = noise(p + offset) * distortion;
+ r.y = noise(p) * distortion;
+ r.z = noise(p - offset) * distortion;
- p += r;
- }
+ p += r;
+ }
- float f = noise_turbulence(p, detail, hard);
+ float f = noise_turbulence(p, detail, hard);
- if(stack_valid(fac_offset)) {
- stack_store_float(stack, fac_offset, f);
- }
- if(stack_valid(color_offset)) {
- float3 color = make_float3(f,
- noise_turbulence(make_float3(p.y, p.x, p.z), detail, hard),
- noise_turbulence(make_float3(p.y, p.z, p.x), detail, hard));
- stack_store_float3(stack, color_offset, color);
- }
+ if (stack_valid(fac_offset)) {
+ stack_store_float(stack, fac_offset, f);
+ }
+ if (stack_valid(color_offset)) {
+ float3 color = make_float3(f,
+ noise_turbulence(make_float3(p.y, p.x, p.z), detail, hard),
+ noise_turbulence(make_float3(p.y, p.z, p.x), detail, hard));
+ stack_store_float3(stack, color_offset, color);
+ }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_normal.h b/intern/cycles/kernel/svm/svm_normal.h
index fe46d79fe15..4cd3eab0ed2 100644
--- a/intern/cycles/kernel/svm/svm_normal.h
+++ b/intern/cycles/kernel/svm/svm_normal.h
@@ -16,23 +16,29 @@
CCL_NAMESPACE_BEGIN
-ccl_device void svm_node_normal(KernelGlobals *kg, ShaderData *sd, float *stack, uint in_normal_offset, uint out_normal_offset, uint out_dot_offset, int *offset)
+ccl_device void svm_node_normal(KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint in_normal_offset,
+ uint out_normal_offset,
+ uint out_dot_offset,
+ int *offset)
{
- /* read extra data */
- uint4 node1 = read_node(kg, offset);
- float3 normal = stack_load_float3(stack, in_normal_offset);
+ /* read extra data */
+ uint4 node1 = read_node(kg, offset);
+ float3 normal = stack_load_float3(stack, in_normal_offset);
- float3 direction;
- direction.x = __int_as_float(node1.x);
- direction.y = __int_as_float(node1.y);
- direction.z = __int_as_float(node1.z);
- direction = normalize(direction);
+ float3 direction;
+ direction.x = __int_as_float(node1.x);
+ direction.y = __int_as_float(node1.y);
+ direction.z = __int_as_float(node1.z);
+ direction = normalize(direction);
- if(stack_valid(out_normal_offset))
- stack_store_float3(stack, out_normal_offset, direction);
+ if (stack_valid(out_normal_offset))
+ stack_store_float3(stack, out_normal_offset, direction);
- if(stack_valid(out_dot_offset))
- stack_store_float(stack, out_dot_offset, dot(direction, normalize(normal)));
+ if (stack_valid(out_dot_offset))
+ stack_store_float(stack, out_dot_offset, dot(direction, normalize(normal)));
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_ramp.h b/intern/cycles/kernel/svm/svm_ramp.h
index 6f39391057e..6084ee35a1f 100644
--- a/intern/cycles/kernel/svm/svm_ramp.h
+++ b/intern/cycles/kernel/svm/svm_ramp.h
@@ -21,91 +21,84 @@ CCL_NAMESPACE_BEGIN
/* NOTE: svm_ramp.h, svm_ramp_util.h and node_ramp_util.h must stay consistent */
-ccl_device_inline float4 rgb_ramp_lookup(KernelGlobals *kg,
- int offset,
- float f,
- bool interpolate,
- bool extrapolate,
- int table_size)
+ccl_device_inline float4 rgb_ramp_lookup(
+ KernelGlobals *kg, int offset, float f, bool interpolate, bool extrapolate, int table_size)
{
- if((f < 0.0f || f > 1.0f) && extrapolate) {
- float4 t0, dy;
- if(f < 0.0f) {
- t0 = fetch_node_float(kg, offset);
- dy = t0 - fetch_node_float(kg, offset + 1);
- f = -f;
- }
- else {
- t0 = fetch_node_float(kg, offset + table_size - 1);
- dy = t0 - fetch_node_float(kg, offset + table_size - 2);
- f = f - 1.0f;
- }
- return t0 + dy * f * (table_size-1);
- }
-
- f = saturate(f)*(table_size-1);
-
- /* clamp int as well in case of NaN */
- int i = clamp(float_to_int(f), 0, table_size-1);
- float t = f - (float)i;
-
- float4 a = fetch_node_float(kg, offset+i);
-
- if(interpolate && t > 0.0f)
- a = (1.0f - t)*a + t*fetch_node_float(kg, offset+i+1);
-
- return a;
+ if ((f < 0.0f || f > 1.0f) && extrapolate) {
+ float4 t0, dy;
+ if (f < 0.0f) {
+ t0 = fetch_node_float(kg, offset);
+ dy = t0 - fetch_node_float(kg, offset + 1);
+ f = -f;
+ }
+ else {
+ t0 = fetch_node_float(kg, offset + table_size - 1);
+ dy = t0 - fetch_node_float(kg, offset + table_size - 2);
+ f = f - 1.0f;
+ }
+ return t0 + dy * f * (table_size - 1);
+ }
+
+ f = saturate(f) * (table_size - 1);
+
+ /* clamp int as well in case of NaN */
+ int i = clamp(float_to_int(f), 0, table_size - 1);
+ float t = f - (float)i;
+
+ float4 a = fetch_node_float(kg, offset + i);
+
+ if (interpolate && t > 0.0f)
+ a = (1.0f - t) * a + t * fetch_node_float(kg, offset + i + 1);
+
+ return a;
}
-ccl_device void svm_node_rgb_ramp(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
+ccl_device void svm_node_rgb_ramp(
+ KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
{
- uint fac_offset, color_offset, alpha_offset;
- uint interpolate = node.z;
+ uint fac_offset, color_offset, alpha_offset;
+ uint interpolate = node.z;
- decode_node_uchar4(node.y, &fac_offset, &color_offset, &alpha_offset, NULL);
+ decode_node_uchar4(node.y, &fac_offset, &color_offset, &alpha_offset, NULL);
- uint table_size = read_node(kg, offset).x;
+ uint table_size = read_node(kg, offset).x;
- float fac = stack_load_float(stack, fac_offset);
- float4 color = rgb_ramp_lookup(kg, *offset, fac, interpolate, false, table_size);
+ float fac = stack_load_float(stack, fac_offset);
+ float4 color = rgb_ramp_lookup(kg, *offset, fac, interpolate, false, table_size);
- if(stack_valid(color_offset))
- stack_store_float3(stack, color_offset, float4_to_float3(color));
- if(stack_valid(alpha_offset))
- stack_store_float(stack, alpha_offset, color.w);
+ if (stack_valid(color_offset))
+ stack_store_float3(stack, color_offset, float4_to_float3(color));
+ if (stack_valid(alpha_offset))
+ stack_store_float(stack, alpha_offset, color.w);
- *offset += table_size;
+ *offset += table_size;
}
-ccl_device void svm_node_curves(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
+ccl_device void svm_node_curves(
+ KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
{
- uint fac_offset, color_offset, out_offset;
- decode_node_uchar4(node.y,
- &fac_offset,
- &color_offset,
- &out_offset,
- NULL);
+ uint fac_offset, color_offset, out_offset;
+ decode_node_uchar4(node.y, &fac_offset, &color_offset, &out_offset, NULL);
- uint table_size = read_node(kg, offset).x;
+ uint table_size = read_node(kg, offset).x;
- float fac = stack_load_float(stack, fac_offset);
- float3 color = stack_load_float3(stack, color_offset);
+ float fac = stack_load_float(stack, fac_offset);
+ float3 color = stack_load_float3(stack, color_offset);
- const float min_x = __int_as_float(node.z),
- max_x = __int_as_float(node.w);
- const float range_x = max_x - min_x;
- const float3 relpos = (color - make_float3(min_x, min_x, min_x)) / range_x;
+ const float min_x = __int_as_float(node.z), max_x = __int_as_float(node.w);
+ const float range_x = max_x - min_x;
+ const float3 relpos = (color - make_float3(min_x, min_x, min_x)) / range_x;
- float r = rgb_ramp_lookup(kg, *offset, relpos.x, true, true, table_size).x;
- float g = rgb_ramp_lookup(kg, *offset, relpos.y, true, true, table_size).y;
- float b = rgb_ramp_lookup(kg, *offset, relpos.z, true, true, table_size).z;
+ float r = rgb_ramp_lookup(kg, *offset, relpos.x, true, true, table_size).x;
+ float g = rgb_ramp_lookup(kg, *offset, relpos.y, true, true, table_size).y;
+ float b = rgb_ramp_lookup(kg, *offset, relpos.z, true, true, table_size).z;
- color = (1.0f - fac)*color + fac*make_float3(r, g, b);
- stack_store_float3(stack, out_offset, color);
+ color = (1.0f - fac) * color + fac * make_float3(r, g, b);
+ stack_store_float3(stack, out_offset, color);
- *offset += table_size;
+ *offset += table_size;
}
CCL_NAMESPACE_END
-#endif /* __SVM_RAMP_H__ */
+#endif /* __SVM_RAMP_H__ */
diff --git a/intern/cycles/kernel/svm/svm_ramp_util.h b/intern/cycles/kernel/svm/svm_ramp_util.h
index 847108ff1c2..202596c1fe3 100644
--- a/intern/cycles/kernel/svm/svm_ramp_util.h
+++ b/intern/cycles/kernel/svm/svm_ramp_util.h
@@ -21,78 +21,70 @@ CCL_NAMESPACE_BEGIN
/* NOTE: svm_ramp.h, svm_ramp_util.h and node_ramp_util.h must stay consistent */
-ccl_device_inline float3 rgb_ramp_lookup(const float3 *ramp,
- float f,
- bool interpolate,
- bool extrapolate,
- int table_size)
+ccl_device_inline float3
+rgb_ramp_lookup(const float3 *ramp, float f, bool interpolate, bool extrapolate, int table_size)
{
- if((f < 0.0f || f > 1.0f) && extrapolate) {
- float3 t0, dy;
- if(f < 0.0f) {
- t0 = ramp[0];
- dy = t0 - ramp[1],
- f = -f;
- }
- else {
- t0 = ramp[table_size - 1];
- dy = t0 - ramp[table_size - 2];
- f = f - 1.0f;
- }
- return t0 + dy * f * (table_size - 1);
- }
-
- f = clamp(f, 0.0f, 1.0f) * (table_size - 1);
-
- /* clamp int as well in case of NaN */
- int i = clamp(float_to_int(f), 0, table_size-1);
- float t = f - (float)i;
-
- float3 result = ramp[i];
-
- if(interpolate && t > 0.0f) {
- result = (1.0f - t) * result + t * ramp[i + 1];
- }
-
- return result;
+ if ((f < 0.0f || f > 1.0f) && extrapolate) {
+ float3 t0, dy;
+ if (f < 0.0f) {
+ t0 = ramp[0];
+ dy = t0 - ramp[1], f = -f;
+ }
+ else {
+ t0 = ramp[table_size - 1];
+ dy = t0 - ramp[table_size - 2];
+ f = f - 1.0f;
+ }
+ return t0 + dy * f * (table_size - 1);
+ }
+
+ f = clamp(f, 0.0f, 1.0f) * (table_size - 1);
+
+ /* clamp int as well in case of NaN */
+ int i = clamp(float_to_int(f), 0, table_size - 1);
+ float t = f - (float)i;
+
+ float3 result = ramp[i];
+
+ if (interpolate && t > 0.0f) {
+ result = (1.0f - t) * result + t * ramp[i + 1];
+ }
+
+ return result;
}
-ccl_device float float_ramp_lookup(const float *ramp,
- float f,
- bool interpolate,
- bool extrapolate,
- int table_size)
+ccl_device float float_ramp_lookup(
+ const float *ramp, float f, bool interpolate, bool extrapolate, int table_size)
{
- if((f < 0.0f || f > 1.0f) && extrapolate) {
- float t0, dy;
- if(f < 0.0f) {
- t0 = ramp[0];
- dy = t0 - ramp[1],
- f = -f;
- }
- else {
- t0 = ramp[table_size - 1];
- dy = t0 - ramp[table_size - 2];
- f = f - 1.0f;
- }
- return t0 + dy * f * (table_size - 1);
- }
-
- f = clamp(f, 0.0f, 1.0f) * (table_size - 1);
-
- /* clamp int as well in case of NaN */
- int i = clamp(float_to_int(f), 0, table_size-1);
- float t = f - (float)i;
-
- float result = ramp[i];
-
- if(interpolate && t > 0.0f) {
- result = (1.0f - t) * result + t * ramp[i + 1];
- }
-
- return result;
+ if ((f < 0.0f || f > 1.0f) && extrapolate) {
+ float t0, dy;
+ if (f < 0.0f) {
+ t0 = ramp[0];
+ dy = t0 - ramp[1], f = -f;
+ }
+ else {
+ t0 = ramp[table_size - 1];
+ dy = t0 - ramp[table_size - 2];
+ f = f - 1.0f;
+ }
+ return t0 + dy * f * (table_size - 1);
+ }
+
+ f = clamp(f, 0.0f, 1.0f) * (table_size - 1);
+
+ /* clamp int as well in case of NaN */
+ int i = clamp(float_to_int(f), 0, table_size - 1);
+ float t = f - (float)i;
+
+ float result = ramp[i];
+
+ if (interpolate && t > 0.0f) {
+ result = (1.0f - t) * result + t * ramp[i + 1];
+ }
+
+ return result;
}
CCL_NAMESPACE_END
-#endif /* __SVM_RAMP_UTIL_H__ */
+#endif /* __SVM_RAMP_UTIL_H__ */
diff --git a/intern/cycles/kernel/svm/svm_sepcomb_hsv.h b/intern/cycles/kernel/svm/svm_sepcomb_hsv.h
index 1096aed2d97..f501252062e 100644
--- a/intern/cycles/kernel/svm/svm_sepcomb_hsv.h
+++ b/intern/cycles/kernel/svm/svm_sepcomb_hsv.h
@@ -16,38 +16,50 @@
CCL_NAMESPACE_BEGIN
-ccl_device void svm_node_combine_hsv(KernelGlobals *kg, ShaderData *sd, float *stack, uint hue_in, uint saturation_in, uint value_in, int *offset)
+ccl_device void svm_node_combine_hsv(KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint hue_in,
+ uint saturation_in,
+ uint value_in,
+ int *offset)
{
- uint4 node1 = read_node(kg, offset);
- uint color_out = node1.y;
+ uint4 node1 = read_node(kg, offset);
+ uint color_out = node1.y;
- float hue = stack_load_float(stack, hue_in);
- float saturation = stack_load_float(stack, saturation_in);
- float value = stack_load_float(stack, value_in);
+ float hue = stack_load_float(stack, hue_in);
+ float saturation = stack_load_float(stack, saturation_in);
+ float value = stack_load_float(stack, value_in);
- /* Combine, and convert back to RGB */
- float3 color = hsv_to_rgb(make_float3(hue, saturation, value));
+ /* Combine, and convert back to RGB */
+ float3 color = hsv_to_rgb(make_float3(hue, saturation, value));
- if(stack_valid(color_out))
- stack_store_float3(stack, color_out, color);
+ if (stack_valid(color_out))
+ stack_store_float3(stack, color_out, color);
}
-ccl_device void svm_node_separate_hsv(KernelGlobals *kg, ShaderData *sd, float *stack, uint color_in, uint hue_out, uint saturation_out, int *offset)
+ccl_device void svm_node_separate_hsv(KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint color_in,
+ uint hue_out,
+ uint saturation_out,
+ int *offset)
{
- uint4 node1 = read_node(kg, offset);
- uint value_out = node1.y;
+ uint4 node1 = read_node(kg, offset);
+ uint value_out = node1.y;
- float3 color = stack_load_float3(stack, color_in);
+ float3 color = stack_load_float3(stack, color_in);
- /* Convert to HSV */
- color = rgb_to_hsv(color);
+ /* Convert to HSV */
+ color = rgb_to_hsv(color);
- if(stack_valid(hue_out))
- stack_store_float(stack, hue_out, color.x);
- if(stack_valid(saturation_out))
- stack_store_float(stack, saturation_out, color.y);
- if(stack_valid(value_out))
- stack_store_float(stack, value_out, color.z);
+ if (stack_valid(hue_out))
+ stack_store_float(stack, hue_out, color.x);
+ if (stack_valid(saturation_out))
+ stack_store_float(stack, saturation_out, color.y);
+ if (stack_valid(value_out))
+ stack_store_float(stack, value_out, color.z);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_sepcomb_vector.h b/intern/cycles/kernel/svm/svm_sepcomb_vector.h
index 0d85c0d6f1d..cbf77f1e640 100644
--- a/intern/cycles/kernel/svm/svm_sepcomb_vector.h
+++ b/intern/cycles/kernel/svm/svm_sepcomb_vector.h
@@ -18,26 +18,28 @@ CCL_NAMESPACE_BEGIN
/* Vector combine / separate, used for the RGB and XYZ nodes */
-ccl_device void svm_node_combine_vector(ShaderData *sd, float *stack, uint in_offset, uint vector_index, uint out_offset)
+ccl_device void svm_node_combine_vector(
+ ShaderData *sd, float *stack, uint in_offset, uint vector_index, uint out_offset)
{
- float vector = stack_load_float(stack, in_offset);
+ float vector = stack_load_float(stack, in_offset);
- if(stack_valid(out_offset))
- stack_store_float(stack, out_offset+vector_index, vector);
+ if (stack_valid(out_offset))
+ stack_store_float(stack, out_offset + vector_index, vector);
}
-ccl_device void svm_node_separate_vector(ShaderData *sd, float *stack, uint ivector_offset, uint vector_index, uint out_offset)
+ccl_device void svm_node_separate_vector(
+ ShaderData *sd, float *stack, uint ivector_offset, uint vector_index, uint out_offset)
{
- float3 vector = stack_load_float3(stack, ivector_offset);
+ float3 vector = stack_load_float3(stack, ivector_offset);
- if(stack_valid(out_offset)) {
- if(vector_index == 0)
- stack_store_float(stack, out_offset, vector.x);
- else if(vector_index == 1)
- stack_store_float(stack, out_offset, vector.y);
- else
- stack_store_float(stack, out_offset, vector.z);
- }
+ if (stack_valid(out_offset)) {
+ if (vector_index == 0)
+ stack_store_float(stack, out_offset, vector.x);
+ else if (vector_index == 1)
+ stack_store_float(stack, out_offset, vector.y);
+ else
+ stack_store_float(stack, out_offset, vector.z);
+ }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_sky.h b/intern/cycles/kernel/svm/svm_sky.h
index 092f6e045d6..50fe0c8232f 100644
--- a/intern/cycles/kernel/svm/svm_sky.h
+++ b/intern/cycles/kernel/svm/svm_sky.h
@@ -20,8 +20,8 @@ CCL_NAMESPACE_BEGIN
ccl_device float sky_angle_between(float thetav, float phiv, float theta, float phi)
{
- float cospsi = sinf(thetav)*sinf(theta)*cosf(phi - phiv) + cosf(thetav)*cosf(theta);
- return safe_acosf(cospsi);
+ float cospsi = sinf(thetav) * sinf(theta) * cosf(phi - phiv) + cosf(thetav) * cosf(theta);
+ return safe_acosf(cospsi);
}
/*
@@ -30,36 +30,43 @@ ccl_device float sky_angle_between(float thetav, float phiv, float theta, float
*/
ccl_device float sky_perez_function(float *lam, float theta, float gamma)
{
- float ctheta = cosf(theta);
- float cgamma = cosf(gamma);
+ float ctheta = cosf(theta);
+ float cgamma = cosf(gamma);
- return (1.0f + lam[0]*expf(lam[1]/ctheta)) * (1.0f + lam[2]*expf(lam[3]*gamma) + lam[4]*cgamma*cgamma);
+ return (1.0f + lam[0] * expf(lam[1] / ctheta)) *
+ (1.0f + lam[2] * expf(lam[3] * gamma) + lam[4] * cgamma * cgamma);
}
-ccl_device float3 sky_radiance_old(KernelGlobals *kg, float3 dir,
- float sunphi, float suntheta,
- float radiance_x, float radiance_y, float radiance_z,
- float *config_x, float *config_y, float *config_z)
+ccl_device float3 sky_radiance_old(KernelGlobals *kg,
+ float3 dir,
+ float sunphi,
+ float suntheta,
+ float radiance_x,
+ float radiance_y,
+ float radiance_z,
+ float *config_x,
+ float *config_y,
+ float *config_z)
{
- /* convert vector to spherical coordinates */
- float2 spherical = direction_to_spherical(dir);
- float theta = spherical.x;
- float phi = spherical.y;
+ /* convert vector to spherical coordinates */
+ float2 spherical = direction_to_spherical(dir);
+ float theta = spherical.x;
+ float phi = spherical.y;
- /* angle between sun direction and dir */
- float gamma = sky_angle_between(theta, phi, suntheta, sunphi);
+ /* angle between sun direction and dir */
+ float gamma = sky_angle_between(theta, phi, suntheta, sunphi);
- /* clamp theta to horizon */
- theta = min(theta, M_PI_2_F - 0.001f);
+ /* clamp theta to horizon */
+ theta = min(theta, M_PI_2_F - 0.001f);
- /* compute xyY color space values */
- float x = radiance_y * sky_perez_function(config_y, theta, gamma);
- float y = radiance_z * sky_perez_function(config_z, theta, gamma);
- float Y = radiance_x * sky_perez_function(config_x, theta, gamma);
+ /* compute xyY color space values */
+ float x = radiance_y * sky_perez_function(config_y, theta, gamma);
+ float y = radiance_z * sky_perez_function(config_z, theta, gamma);
+ float Y = radiance_x * sky_perez_function(config_x, theta, gamma);
- /* convert to RGB */
- float3 xyz = xyY_to_xyz(x, y, Y);
- return xyz_to_rgb(kg, xyz);
+ /* convert to RGB */
+ float3 xyz = xyY_to_xyz(x, y, Y);
+ return xyz_to_rgb(kg, xyz);
}
/*
@@ -68,118 +75,142 @@ ccl_device float3 sky_radiance_old(KernelGlobals *kg, float3 dir,
*/
ccl_device float sky_radiance_internal(float *configuration, float theta, float gamma)
{
- float ctheta = cosf(theta);
- float cgamma = cosf(gamma);
-
- float expM = expf(configuration[4] * gamma);
- float rayM = cgamma * cgamma;
- float mieM = (1.0f + rayM) / powf((1.0f + configuration[8]*configuration[8] - 2.0f*configuration[8]*cgamma), 1.5f);
- float zenith = sqrtf(ctheta);
-
- return (1.0f + configuration[0] * expf(configuration[1] / (ctheta + 0.01f))) *
- (configuration[2] + configuration[3] * expM + configuration[5] * rayM + configuration[6] * mieM + configuration[7] * zenith);
+ float ctheta = cosf(theta);
+ float cgamma = cosf(gamma);
+
+ float expM = expf(configuration[4] * gamma);
+ float rayM = cgamma * cgamma;
+ float mieM = (1.0f + rayM) / powf((1.0f + configuration[8] * configuration[8] -
+ 2.0f * configuration[8] * cgamma),
+ 1.5f);
+ float zenith = sqrtf(ctheta);
+
+ return (1.0f + configuration[0] * expf(configuration[1] / (ctheta + 0.01f))) *
+ (configuration[2] + configuration[3] * expM + configuration[5] * rayM +
+ configuration[6] * mieM + configuration[7] * zenith);
}
-ccl_device float3 sky_radiance_new(KernelGlobals *kg, float3 dir,
- float sunphi, float suntheta,
- float radiance_x, float radiance_y, float radiance_z,
- float *config_x, float *config_y, float *config_z)
+ccl_device float3 sky_radiance_new(KernelGlobals *kg,
+ float3 dir,
+ float sunphi,
+ float suntheta,
+ float radiance_x,
+ float radiance_y,
+ float radiance_z,
+ float *config_x,
+ float *config_y,
+ float *config_z)
{
- /* convert vector to spherical coordinates */
- float2 spherical = direction_to_spherical(dir);
- float theta = spherical.x;
- float phi = spherical.y;
+ /* convert vector to spherical coordinates */
+ float2 spherical = direction_to_spherical(dir);
+ float theta = spherical.x;
+ float phi = spherical.y;
- /* angle between sun direction and dir */
- float gamma = sky_angle_between(theta, phi, suntheta, sunphi);
+ /* angle between sun direction and dir */
+ float gamma = sky_angle_between(theta, phi, suntheta, sunphi);
- /* clamp theta to horizon */
- theta = min(theta, M_PI_2_F - 0.001f);
+ /* clamp theta to horizon */
+ theta = min(theta, M_PI_2_F - 0.001f);
- /* compute xyz color space values */
- float x = sky_radiance_internal(config_x, theta, gamma) * radiance_x;
- float y = sky_radiance_internal(config_y, theta, gamma) * radiance_y;
- float z = sky_radiance_internal(config_z, theta, gamma) * radiance_z;
+ /* compute xyz color space values */
+ float x = sky_radiance_internal(config_x, theta, gamma) * radiance_x;
+ float y = sky_radiance_internal(config_y, theta, gamma) * radiance_y;
+ float z = sky_radiance_internal(config_z, theta, gamma) * radiance_z;
- /* convert to RGB and adjust strength */
- return xyz_to_rgb(kg, make_float3(x, y, z)) * (M_2PI_F/683);
+ /* convert to RGB and adjust strength */
+ return xyz_to_rgb(kg, make_float3(x, y, z)) * (M_2PI_F / 683);
}
-ccl_device void svm_node_tex_sky(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
+ccl_device void svm_node_tex_sky(
+ KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
{
- /* Define variables */
- float sunphi, suntheta, radiance_x, radiance_y, radiance_z;
- float config_x[9], config_y[9], config_z[9];
-
- /* Load data */
- uint dir_offset = node.y;
- uint out_offset = node.z;
- int sky_model = node.w;
-
- float4 data = read_node_float(kg, offset);
- sunphi = data.x;
- suntheta = data.y;
- radiance_x = data.z;
- radiance_y = data.w;
-
- data = read_node_float(kg, offset);
- radiance_z = data.x;
- config_x[0] = data.y;
- config_x[1] = data.z;
- config_x[2] = data.w;
-
- data = read_node_float(kg, offset);
- config_x[3] = data.x;
- config_x[4] = data.y;
- config_x[5] = data.z;
- config_x[6] = data.w;
-
- data = read_node_float(kg, offset);
- config_x[7] = data.x;
- config_x[8] = data.y;
- config_y[0] = data.z;
- config_y[1] = data.w;
-
- data = read_node_float(kg, offset);
- config_y[2] = data.x;
- config_y[3] = data.y;
- config_y[4] = data.z;
- config_y[5] = data.w;
-
- data = read_node_float(kg, offset);
- config_y[6] = data.x;
- config_y[7] = data.y;
- config_y[8] = data.z;
- config_z[0] = data.w;
-
- data = read_node_float(kg, offset);
- config_z[1] = data.x;
- config_z[2] = data.y;
- config_z[3] = data.z;
- config_z[4] = data.w;
-
- data = read_node_float(kg, offset);
- config_z[5] = data.x;
- config_z[6] = data.y;
- config_z[7] = data.z;
- config_z[8] = data.w;
-
- float3 dir = stack_load_float3(stack, dir_offset);
- float3 f;
-
- /* Compute Sky */
- if(sky_model == 0) {
- f = sky_radiance_old(kg, dir, sunphi, suntheta,
- radiance_x, radiance_y, radiance_z,
- config_x, config_y, config_z);
- }
- else {
- f = sky_radiance_new(kg, dir, sunphi, suntheta,
- radiance_x, radiance_y, radiance_z,
- config_x, config_y, config_z);
- }
-
- stack_store_float3(stack, out_offset, f);
+ /* Define variables */
+ float sunphi, suntheta, radiance_x, radiance_y, radiance_z;
+ float config_x[9], config_y[9], config_z[9];
+
+ /* Load data */
+ uint dir_offset = node.y;
+ uint out_offset = node.z;
+ int sky_model = node.w;
+
+ float4 data = read_node_float(kg, offset);
+ sunphi = data.x;
+ suntheta = data.y;
+ radiance_x = data.z;
+ radiance_y = data.w;
+
+ data = read_node_float(kg, offset);
+ radiance_z = data.x;
+ config_x[0] = data.y;
+ config_x[1] = data.z;
+ config_x[2] = data.w;
+
+ data = read_node_float(kg, offset);
+ config_x[3] = data.x;
+ config_x[4] = data.y;
+ config_x[5] = data.z;
+ config_x[6] = data.w;
+
+ data = read_node_float(kg, offset);
+ config_x[7] = data.x;
+ config_x[8] = data.y;
+ config_y[0] = data.z;
+ config_y[1] = data.w;
+
+ data = read_node_float(kg, offset);
+ config_y[2] = data.x;
+ config_y[3] = data.y;
+ config_y[4] = data.z;
+ config_y[5] = data.w;
+
+ data = read_node_float(kg, offset);
+ config_y[6] = data.x;
+ config_y[7] = data.y;
+ config_y[8] = data.z;
+ config_z[0] = data.w;
+
+ data = read_node_float(kg, offset);
+ config_z[1] = data.x;
+ config_z[2] = data.y;
+ config_z[3] = data.z;
+ config_z[4] = data.w;
+
+ data = read_node_float(kg, offset);
+ config_z[5] = data.x;
+ config_z[6] = data.y;
+ config_z[7] = data.z;
+ config_z[8] = data.w;
+
+ float3 dir = stack_load_float3(stack, dir_offset);
+ float3 f;
+
+ /* Compute Sky */
+ if (sky_model == 0) {
+ f = sky_radiance_old(kg,
+ dir,
+ sunphi,
+ suntheta,
+ radiance_x,
+ radiance_y,
+ radiance_z,
+ config_x,
+ config_y,
+ config_z);
+ }
+ else {
+ f = sky_radiance_new(kg,
+ dir,
+ sunphi,
+ suntheta,
+ radiance_x,
+ radiance_y,
+ radiance_z,
+ config_x,
+ config_y,
+ config_z);
+ }
+
+ stack_store_float3(stack, out_offset, f);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_tex_coord.h b/intern/cycles/kernel/svm/svm_tex_coord.h
index fe61292d0b0..1fb3e20f9e0 100644
--- a/intern/cycles/kernel/svm/svm_tex_coord.h
+++ b/intern/cycles/kernel/svm/svm_tex_coord.h
@@ -18,390 +18,381 @@ CCL_NAMESPACE_BEGIN
/* Texture Coordinate Node */
-ccl_device void svm_node_tex_coord(KernelGlobals *kg,
- ShaderData *sd,
- int path_flag,
- float *stack,
- uint4 node,
- int *offset)
+ccl_device void svm_node_tex_coord(
+ KernelGlobals *kg, ShaderData *sd, int path_flag, float *stack, uint4 node, int *offset)
{
- float3 data;
- uint type = node.y;
- uint out_offset = node.z;
-
- switch(type) {
- case NODE_TEXCO_OBJECT: {
- data = sd->P;
- if(node.w == 0) {
- if(sd->object != OBJECT_NONE) {
- object_inverse_position_transform(kg, sd, &data);
- }
- }
- else {
- Transform tfm;
- tfm.x = read_node_float(kg, offset);
- tfm.y = read_node_float(kg, offset);
- tfm.z = read_node_float(kg, offset);
- data = transform_point(&tfm, data);
- }
- break;
- }
- case NODE_TEXCO_NORMAL: {
- data = sd->N;
- object_inverse_normal_transform(kg, sd, &data);
- break;
- }
- case NODE_TEXCO_CAMERA: {
- Transform tfm = kernel_data.cam.worldtocamera;
-
- if(sd->object != OBJECT_NONE)
- data = transform_point(&tfm, sd->P);
- else
- data = transform_point(&tfm, sd->P + camera_position(kg));
- break;
- }
- case NODE_TEXCO_WINDOW: {
- if((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC)
- data = camera_world_to_ndc(kg, sd, sd->ray_P);
- else
- data = camera_world_to_ndc(kg, sd, sd->P);
- data.z = 0.0f;
- break;
- }
- case NODE_TEXCO_REFLECTION: {
- if(sd->object != OBJECT_NONE)
- data = 2.0f*dot(sd->N, sd->I)*sd->N - sd->I;
- else
- data = sd->I;
- break;
- }
- case NODE_TEXCO_DUPLI_GENERATED: {
- data = object_dupli_generated(kg, sd->object);
- break;
- }
- case NODE_TEXCO_DUPLI_UV: {
- data = object_dupli_uv(kg, sd->object);
- break;
- }
- case NODE_TEXCO_VOLUME_GENERATED: {
- data = sd->P;
+ float3 data;
+ uint type = node.y;
+ uint out_offset = node.z;
+
+ switch (type) {
+ case NODE_TEXCO_OBJECT: {
+ data = sd->P;
+ if (node.w == 0) {
+ if (sd->object != OBJECT_NONE) {
+ object_inverse_position_transform(kg, sd, &data);
+ }
+ }
+ else {
+ Transform tfm;
+ tfm.x = read_node_float(kg, offset);
+ tfm.y = read_node_float(kg, offset);
+ tfm.z = read_node_float(kg, offset);
+ data = transform_point(&tfm, data);
+ }
+ break;
+ }
+ case NODE_TEXCO_NORMAL: {
+ data = sd->N;
+ object_inverse_normal_transform(kg, sd, &data);
+ break;
+ }
+ case NODE_TEXCO_CAMERA: {
+ Transform tfm = kernel_data.cam.worldtocamera;
+
+ if (sd->object != OBJECT_NONE)
+ data = transform_point(&tfm, sd->P);
+ else
+ data = transform_point(&tfm, sd->P + camera_position(kg));
+ break;
+ }
+ case NODE_TEXCO_WINDOW: {
+ if ((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE &&
+ kernel_data.cam.type == CAMERA_ORTHOGRAPHIC)
+ data = camera_world_to_ndc(kg, sd, sd->ray_P);
+ else
+ data = camera_world_to_ndc(kg, sd, sd->P);
+ data.z = 0.0f;
+ break;
+ }
+ case NODE_TEXCO_REFLECTION: {
+ if (sd->object != OBJECT_NONE)
+ data = 2.0f * dot(sd->N, sd->I) * sd->N - sd->I;
+ else
+ data = sd->I;
+ break;
+ }
+ case NODE_TEXCO_DUPLI_GENERATED: {
+ data = object_dupli_generated(kg, sd->object);
+ break;
+ }
+ case NODE_TEXCO_DUPLI_UV: {
+ data = object_dupli_uv(kg, sd->object);
+ break;
+ }
+ case NODE_TEXCO_VOLUME_GENERATED: {
+ data = sd->P;
#ifdef __VOLUME__
- if(sd->object != OBJECT_NONE)
- data = volume_normalized_position(kg, sd, data);
+ if (sd->object != OBJECT_NONE)
+ data = volume_normalized_position(kg, sd, data);
#endif
- break;
- }
- }
+ break;
+ }
+ }
- stack_store_float3(stack, out_offset, data);
+ stack_store_float3(stack, out_offset, data);
}
-ccl_device void svm_node_tex_coord_bump_dx(KernelGlobals *kg,
- ShaderData *sd,
- int path_flag,
- float *stack,
- uint4 node,
- int *offset)
+ccl_device void svm_node_tex_coord_bump_dx(
+ KernelGlobals *kg, ShaderData *sd, int path_flag, float *stack, uint4 node, int *offset)
{
#ifdef __RAY_DIFFERENTIALS__
- float3 data;
- uint type = node.y;
- uint out_offset = node.z;
-
- switch(type) {
- case NODE_TEXCO_OBJECT: {
- data = sd->P + sd->dP.dx;
- if(node.w == 0) {
- if(sd->object != OBJECT_NONE) {
- object_inverse_position_transform(kg, sd, &data);
- }
- }
- else {
- Transform tfm;
- tfm.x = read_node_float(kg, offset);
- tfm.y = read_node_float(kg, offset);
- tfm.z = read_node_float(kg, offset);
- data = transform_point(&tfm, data);
- }
- break;
- }
- case NODE_TEXCO_NORMAL: {
- data = sd->N;
- object_inverse_normal_transform(kg, sd, &data);
- break;
- }
- case NODE_TEXCO_CAMERA: {
- Transform tfm = kernel_data.cam.worldtocamera;
-
- if(sd->object != OBJECT_NONE)
- data = transform_point(&tfm, sd->P + sd->dP.dx);
- else
- data = transform_point(&tfm, sd->P + sd->dP.dx + camera_position(kg));
- break;
- }
- case NODE_TEXCO_WINDOW: {
- if((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC)
- data = camera_world_to_ndc(kg, sd, sd->ray_P + sd->ray_dP.dx);
- else
- data = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dx);
- data.z = 0.0f;
- break;
- }
- case NODE_TEXCO_REFLECTION: {
- if(sd->object != OBJECT_NONE)
- data = 2.0f*dot(sd->N, sd->I)*sd->N - sd->I;
- else
- data = sd->I;
- break;
- }
- case NODE_TEXCO_DUPLI_GENERATED: {
- data = object_dupli_generated(kg, sd->object);
- break;
- }
- case NODE_TEXCO_DUPLI_UV: {
- data = object_dupli_uv(kg, sd->object);
- break;
- }
- case NODE_TEXCO_VOLUME_GENERATED: {
- data = sd->P + sd->dP.dx;
-
-#ifdef __VOLUME__
- if(sd->object != OBJECT_NONE)
- data = volume_normalized_position(kg, sd, data);
-#endif
- break;
- }
- }
-
- stack_store_float3(stack, out_offset, data);
+ float3 data;
+ uint type = node.y;
+ uint out_offset = node.z;
+
+ switch (type) {
+ case NODE_TEXCO_OBJECT: {
+ data = sd->P + sd->dP.dx;
+ if (node.w == 0) {
+ if (sd->object != OBJECT_NONE) {
+ object_inverse_position_transform(kg, sd, &data);
+ }
+ }
+ else {
+ Transform tfm;
+ tfm.x = read_node_float(kg, offset);
+ tfm.y = read_node_float(kg, offset);
+ tfm.z = read_node_float(kg, offset);
+ data = transform_point(&tfm, data);
+ }
+ break;
+ }
+ case NODE_TEXCO_NORMAL: {
+ data = sd->N;
+ object_inverse_normal_transform(kg, sd, &data);
+ break;
+ }
+ case NODE_TEXCO_CAMERA: {
+ Transform tfm = kernel_data.cam.worldtocamera;
+
+ if (sd->object != OBJECT_NONE)
+ data = transform_point(&tfm, sd->P + sd->dP.dx);
+ else
+ data = transform_point(&tfm, sd->P + sd->dP.dx + camera_position(kg));
+ break;
+ }
+ case NODE_TEXCO_WINDOW: {
+ if ((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE &&
+ kernel_data.cam.type == CAMERA_ORTHOGRAPHIC)
+ data = camera_world_to_ndc(kg, sd, sd->ray_P + sd->ray_dP.dx);
+ else
+ data = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dx);
+ data.z = 0.0f;
+ break;
+ }
+ case NODE_TEXCO_REFLECTION: {
+ if (sd->object != OBJECT_NONE)
+ data = 2.0f * dot(sd->N, sd->I) * sd->N - sd->I;
+ else
+ data = sd->I;
+ break;
+ }
+ case NODE_TEXCO_DUPLI_GENERATED: {
+ data = object_dupli_generated(kg, sd->object);
+ break;
+ }
+ case NODE_TEXCO_DUPLI_UV: {
+ data = object_dupli_uv(kg, sd->object);
+ break;
+ }
+ case NODE_TEXCO_VOLUME_GENERATED: {
+ data = sd->P + sd->dP.dx;
+
+# ifdef __VOLUME__
+ if (sd->object != OBJECT_NONE)
+ data = volume_normalized_position(kg, sd, data);
+# endif
+ break;
+ }
+ }
+
+ stack_store_float3(stack, out_offset, data);
#else
- svm_node_tex_coord(kg, sd, path_flag, stack, node, offset);
+ svm_node_tex_coord(kg, sd, path_flag, stack, node, offset);
#endif
}
-ccl_device void svm_node_tex_coord_bump_dy(KernelGlobals *kg,
- ShaderData *sd,
- int path_flag,
- float *stack,
- uint4 node,
- int *offset)
+ccl_device void svm_node_tex_coord_bump_dy(
+ KernelGlobals *kg, ShaderData *sd, int path_flag, float *stack, uint4 node, int *offset)
{
#ifdef __RAY_DIFFERENTIALS__
- float3 data;
- uint type = node.y;
- uint out_offset = node.z;
-
- switch(type) {
- case NODE_TEXCO_OBJECT: {
- data = sd->P + sd->dP.dy;
- if(node.w == 0) {
- if(sd->object != OBJECT_NONE) {
- object_inverse_position_transform(kg, sd, &data);
- }
- }
- else {
- Transform tfm;
- tfm.x = read_node_float(kg, offset);
- tfm.y = read_node_float(kg, offset);
- tfm.z = read_node_float(kg, offset);
- data = transform_point(&tfm, data);
- }
- break;
- }
- case NODE_TEXCO_NORMAL: {
- data = sd->N;
- object_inverse_normal_transform(kg, sd, &data);
- break;
- }
- case NODE_TEXCO_CAMERA: {
- Transform tfm = kernel_data.cam.worldtocamera;
-
- if(sd->object != OBJECT_NONE)
- data = transform_point(&tfm, sd->P + sd->dP.dy);
- else
- data = transform_point(&tfm, sd->P + sd->dP.dy + camera_position(kg));
- break;
- }
- case NODE_TEXCO_WINDOW: {
- if((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC)
- data = camera_world_to_ndc(kg, sd, sd->ray_P + sd->ray_dP.dy);
- else
- data = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dy);
- data.z = 0.0f;
- break;
- }
- case NODE_TEXCO_REFLECTION: {
- if(sd->object != OBJECT_NONE)
- data = 2.0f*dot(sd->N, sd->I)*sd->N - sd->I;
- else
- data = sd->I;
- break;
- }
- case NODE_TEXCO_DUPLI_GENERATED: {
- data = object_dupli_generated(kg, sd->object);
- break;
- }
- case NODE_TEXCO_DUPLI_UV: {
- data = object_dupli_uv(kg, sd->object);
- break;
- }
- case NODE_TEXCO_VOLUME_GENERATED: {
- data = sd->P + sd->dP.dy;
-
-#ifdef __VOLUME__
- if(sd->object != OBJECT_NONE)
- data = volume_normalized_position(kg, sd, data);
-#endif
- break;
- }
- }
-
- stack_store_float3(stack, out_offset, data);
+ float3 data;
+ uint type = node.y;
+ uint out_offset = node.z;
+
+ switch (type) {
+ case NODE_TEXCO_OBJECT: {
+ data = sd->P + sd->dP.dy;
+ if (node.w == 0) {
+ if (sd->object != OBJECT_NONE) {
+ object_inverse_position_transform(kg, sd, &data);
+ }
+ }
+ else {
+ Transform tfm;
+ tfm.x = read_node_float(kg, offset);
+ tfm.y = read_node_float(kg, offset);
+ tfm.z = read_node_float(kg, offset);
+ data = transform_point(&tfm, data);
+ }
+ break;
+ }
+ case NODE_TEXCO_NORMAL: {
+ data = sd->N;
+ object_inverse_normal_transform(kg, sd, &data);
+ break;
+ }
+ case NODE_TEXCO_CAMERA: {
+ Transform tfm = kernel_data.cam.worldtocamera;
+
+ if (sd->object != OBJECT_NONE)
+ data = transform_point(&tfm, sd->P + sd->dP.dy);
+ else
+ data = transform_point(&tfm, sd->P + sd->dP.dy + camera_position(kg));
+ break;
+ }
+ case NODE_TEXCO_WINDOW: {
+ if ((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE &&
+ kernel_data.cam.type == CAMERA_ORTHOGRAPHIC)
+ data = camera_world_to_ndc(kg, sd, sd->ray_P + sd->ray_dP.dy);
+ else
+ data = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dy);
+ data.z = 0.0f;
+ break;
+ }
+ case NODE_TEXCO_REFLECTION: {
+ if (sd->object != OBJECT_NONE)
+ data = 2.0f * dot(sd->N, sd->I) * sd->N - sd->I;
+ else
+ data = sd->I;
+ break;
+ }
+ case NODE_TEXCO_DUPLI_GENERATED: {
+ data = object_dupli_generated(kg, sd->object);
+ break;
+ }
+ case NODE_TEXCO_DUPLI_UV: {
+ data = object_dupli_uv(kg, sd->object);
+ break;
+ }
+ case NODE_TEXCO_VOLUME_GENERATED: {
+ data = sd->P + sd->dP.dy;
+
+# ifdef __VOLUME__
+ if (sd->object != OBJECT_NONE)
+ data = volume_normalized_position(kg, sd, data);
+# endif
+ break;
+ }
+ }
+
+ stack_store_float3(stack, out_offset, data);
#else
- svm_node_tex_coord(kg, sd, path_flag, stack, node, offset);
+ svm_node_tex_coord(kg, sd, path_flag, stack, node, offset);
#endif
}
ccl_device void svm_node_normal_map(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
{
- uint color_offset, strength_offset, normal_offset, space;
- decode_node_uchar4(node.y, &color_offset, &strength_offset, &normal_offset, &space);
-
- float3 color = stack_load_float3(stack, color_offset);
- color = 2.0f*make_float3(color.x - 0.5f, color.y - 0.5f, color.z - 0.5f);
-
- bool is_backfacing = (sd->flag & SD_BACKFACING) != 0;
- float3 N;
-
- if(space == NODE_NORMAL_MAP_TANGENT) {
- /* tangent space */
- if(sd->object == OBJECT_NONE) {
- stack_store_float3(stack, normal_offset, make_float3(0.0f, 0.0f, 0.0f));
- return;
- }
-
- /* first try to get tangent attribute */
- const AttributeDescriptor attr = find_attribute(kg, sd, node.z);
- const AttributeDescriptor attr_sign = find_attribute(kg, sd, node.w);
- const AttributeDescriptor attr_normal = find_attribute(kg, sd, ATTR_STD_VERTEX_NORMAL);
-
- if(attr.offset == ATTR_STD_NOT_FOUND || attr_sign.offset == ATTR_STD_NOT_FOUND || attr_normal.offset == ATTR_STD_NOT_FOUND) {
- stack_store_float3(stack, normal_offset, make_float3(0.0f, 0.0f, 0.0f));
- return;
- }
-
- /* get _unnormalized_ interpolated normal and tangent */
- float3 tangent = primitive_surface_attribute_float3(kg, sd, attr, NULL, NULL);
- float sign = primitive_surface_attribute_float(kg, sd, attr_sign, NULL, NULL);
- float3 normal;
-
- if(sd->shader & SHADER_SMOOTH_NORMAL) {
- normal = primitive_surface_attribute_float3(kg, sd, attr_normal, NULL, NULL);
- }
- else {
- normal = sd->Ng;
-
- /* the normal is already inverted, which is too soon for the math here */
- if(is_backfacing) {
- normal = -normal;
- }
-
- object_inverse_normal_transform(kg, sd, &normal);
- }
-
- /* apply normal map */
- float3 B = sign * cross(normal, tangent);
- N = safe_normalize(color.x * tangent + color.y * B + color.z * normal);
-
- /* transform to world space */
- object_normal_transform(kg, sd, &N);
- }
- else {
- /* strange blender convention */
- if(space == NODE_NORMAL_MAP_BLENDER_OBJECT || space == NODE_NORMAL_MAP_BLENDER_WORLD) {
- color.y = -color.y;
- color.z = -color.z;
- }
-
- /* object, world space */
- N = color;
-
- if(space == NODE_NORMAL_MAP_OBJECT || space == NODE_NORMAL_MAP_BLENDER_OBJECT)
- object_normal_transform(kg, sd, &N);
- else
- N = safe_normalize(N);
- }
-
- /* invert normal for backfacing polygons */
- if(is_backfacing) {
- N = -N;
- }
-
- float strength = stack_load_float(stack, strength_offset);
-
- if(strength != 1.0f) {
- strength = max(strength, 0.0f);
- N = safe_normalize(sd->N + (N - sd->N)*strength);
- }
-
- N = ensure_valid_reflection(sd->Ng, sd->I, N);
-
- if(is_zero(N)) {
- N = sd->N;
- }
-
- stack_store_float3(stack, normal_offset, N);
+ uint color_offset, strength_offset, normal_offset, space;
+ decode_node_uchar4(node.y, &color_offset, &strength_offset, &normal_offset, &space);
+
+ float3 color = stack_load_float3(stack, color_offset);
+ color = 2.0f * make_float3(color.x - 0.5f, color.y - 0.5f, color.z - 0.5f);
+
+ bool is_backfacing = (sd->flag & SD_BACKFACING) != 0;
+ float3 N;
+
+ if (space == NODE_NORMAL_MAP_TANGENT) {
+ /* tangent space */
+ if (sd->object == OBJECT_NONE) {
+ stack_store_float3(stack, normal_offset, make_float3(0.0f, 0.0f, 0.0f));
+ return;
+ }
+
+ /* first try to get tangent attribute */
+ const AttributeDescriptor attr = find_attribute(kg, sd, node.z);
+ const AttributeDescriptor attr_sign = find_attribute(kg, sd, node.w);
+ const AttributeDescriptor attr_normal = find_attribute(kg, sd, ATTR_STD_VERTEX_NORMAL);
+
+ if (attr.offset == ATTR_STD_NOT_FOUND || attr_sign.offset == ATTR_STD_NOT_FOUND ||
+ attr_normal.offset == ATTR_STD_NOT_FOUND) {
+ stack_store_float3(stack, normal_offset, make_float3(0.0f, 0.0f, 0.0f));
+ return;
+ }
+
+ /* get _unnormalized_ interpolated normal and tangent */
+ float3 tangent = primitive_surface_attribute_float3(kg, sd, attr, NULL, NULL);
+ float sign = primitive_surface_attribute_float(kg, sd, attr_sign, NULL, NULL);
+ float3 normal;
+
+ if (sd->shader & SHADER_SMOOTH_NORMAL) {
+ normal = primitive_surface_attribute_float3(kg, sd, attr_normal, NULL, NULL);
+ }
+ else {
+ normal = sd->Ng;
+
+ /* the normal is already inverted, which is too soon for the math here */
+ if (is_backfacing) {
+ normal = -normal;
+ }
+
+ object_inverse_normal_transform(kg, sd, &normal);
+ }
+
+ /* apply normal map */
+ float3 B = sign * cross(normal, tangent);
+ N = safe_normalize(color.x * tangent + color.y * B + color.z * normal);
+
+ /* transform to world space */
+ object_normal_transform(kg, sd, &N);
+ }
+ else {
+ /* strange blender convention */
+ if (space == NODE_NORMAL_MAP_BLENDER_OBJECT || space == NODE_NORMAL_MAP_BLENDER_WORLD) {
+ color.y = -color.y;
+ color.z = -color.z;
+ }
+
+ /* object, world space */
+ N = color;
+
+ if (space == NODE_NORMAL_MAP_OBJECT || space == NODE_NORMAL_MAP_BLENDER_OBJECT)
+ object_normal_transform(kg, sd, &N);
+ else
+ N = safe_normalize(N);
+ }
+
+ /* invert normal for backfacing polygons */
+ if (is_backfacing) {
+ N = -N;
+ }
+
+ float strength = stack_load_float(stack, strength_offset);
+
+ if (strength != 1.0f) {
+ strength = max(strength, 0.0f);
+ N = safe_normalize(sd->N + (N - sd->N) * strength);
+ }
+
+ N = ensure_valid_reflection(sd->Ng, sd->I, N);
+
+ if (is_zero(N)) {
+ N = sd->N;
+ }
+
+ stack_store_float3(stack, normal_offset, N);
}
ccl_device void svm_node_tangent(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
{
- uint tangent_offset, direction_type, axis;
- decode_node_uchar4(node.y, &tangent_offset, &direction_type, &axis, NULL);
-
- float3 tangent;
- float3 attribute_value;
- const AttributeDescriptor desc = find_attribute(kg, sd, node.z);
- if (desc.offset != ATTR_STD_NOT_FOUND) {
- if(desc.type == NODE_ATTR_FLOAT2) {
- float2 value = primitive_surface_attribute_float2(kg, sd, desc, NULL, NULL);
- attribute_value.x = value.x;
- attribute_value.y = value.y;
- attribute_value.z = 0.0f;
- }
- else {
- attribute_value = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL);
- }
- }
-
-
- if(direction_type == NODE_TANGENT_UVMAP) {
- /* UV map */
- if(desc.offset == ATTR_STD_NOT_FOUND)
- tangent = make_float3(0.0f, 0.0f, 0.0f);
- else
- tangent = attribute_value;
- }
- else {
- /* radial */
- float3 generated;
-
- if(desc.offset == ATTR_STD_NOT_FOUND)
- generated = sd->P;
- else
- generated = attribute_value;
-
- if(axis == NODE_TANGENT_AXIS_X)
- tangent = make_float3(0.0f, -(generated.z - 0.5f), (generated.y - 0.5f));
- else if(axis == NODE_TANGENT_AXIS_Y)
- tangent = make_float3(-(generated.z - 0.5f), 0.0f, (generated.x - 0.5f));
- else
- tangent = make_float3(-(generated.y - 0.5f), (generated.x - 0.5f), 0.0f);
- }
-
- object_normal_transform(kg, sd, &tangent);
- tangent = cross(sd->N, normalize(cross(tangent, sd->N)));
- stack_store_float3(stack, tangent_offset, tangent);
+ uint tangent_offset, direction_type, axis;
+ decode_node_uchar4(node.y, &tangent_offset, &direction_type, &axis, NULL);
+
+ float3 tangent;
+ float3 attribute_value;
+ const AttributeDescriptor desc = find_attribute(kg, sd, node.z);
+ if (desc.offset != ATTR_STD_NOT_FOUND) {
+ if (desc.type == NODE_ATTR_FLOAT2) {
+ float2 value = primitive_surface_attribute_float2(kg, sd, desc, NULL, NULL);
+ attribute_value.x = value.x;
+ attribute_value.y = value.y;
+ attribute_value.z = 0.0f;
+ }
+ else {
+ attribute_value = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL);
+ }
+ }
+
+ if (direction_type == NODE_TANGENT_UVMAP) {
+ /* UV map */
+ if (desc.offset == ATTR_STD_NOT_FOUND)
+ tangent = make_float3(0.0f, 0.0f, 0.0f);
+ else
+ tangent = attribute_value;
+ }
+ else {
+ /* radial */
+ float3 generated;
+
+ if (desc.offset == ATTR_STD_NOT_FOUND)
+ generated = sd->P;
+ else
+ generated = attribute_value;
+
+ if (axis == NODE_TANGENT_AXIS_X)
+ tangent = make_float3(0.0f, -(generated.z - 0.5f), (generated.y - 0.5f));
+ else if (axis == NODE_TANGENT_AXIS_Y)
+ tangent = make_float3(-(generated.z - 0.5f), 0.0f, (generated.x - 0.5f));
+ else
+ tangent = make_float3(-(generated.y - 0.5f), (generated.x - 0.5f), 0.0f);
+ }
+
+ object_normal_transform(kg, sd, &tangent);
+ tangent = cross(sd->N, normalize(cross(tangent, sd->N)));
+ stack_store_float3(stack, tangent_offset, tangent);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_texture.h b/intern/cycles/kernel/svm/svm_texture.h
index 57729817bdc..290aa85c831 100644
--- a/intern/cycles/kernel/svm/svm_texture.h
+++ b/intern/cycles/kernel/svm/svm_texture.h
@@ -20,44 +20,44 @@ CCL_NAMESPACE_BEGIN
ccl_device_noinline float noise_turbulence(float3 p, float octaves, int hard)
{
- float fscale = 1.0f;
- float amp = 1.0f;
- float sum = 0.0f;
- int i, n;
+ float fscale = 1.0f;
+ float amp = 1.0f;
+ float sum = 0.0f;
+ int i, n;
- octaves = clamp(octaves, 0.0f, 16.0f);
- n = float_to_int(octaves);
+ octaves = clamp(octaves, 0.0f, 16.0f);
+ n = float_to_int(octaves);
- for(i = 0; i <= n; i++) {
- float t = noise(fscale*p);
+ for (i = 0; i <= n; i++) {
+ float t = noise(fscale * p);
- if(hard)
- t = fabsf(2.0f*t - 1.0f);
+ if (hard)
+ t = fabsf(2.0f * t - 1.0f);
- sum += t*amp;
- amp *= 0.5f;
- fscale *= 2.0f;
- }
+ sum += t * amp;
+ amp *= 0.5f;
+ fscale *= 2.0f;
+ }
- float rmd = octaves - floorf(octaves);
+ float rmd = octaves - floorf(octaves);
- if(rmd != 0.0f) {
- float t = noise(fscale*p);
+ if (rmd != 0.0f) {
+ float t = noise(fscale * p);
- if(hard)
- t = fabsf(2.0f*t - 1.0f);
+ if (hard)
+ t = fabsf(2.0f * t - 1.0f);
- float sum2 = sum + t*amp;
+ float sum2 = sum + t * amp;
- sum *= ((float)(1 << n)/(float)((1 << (n+1)) - 1));
- sum2 *= ((float)(1 << (n+1))/(float)((1 << (n+2)) - 1));
+ sum *= ((float)(1 << n) / (float)((1 << (n + 1)) - 1));
+ sum2 *= ((float)(1 << (n + 1)) / (float)((1 << (n + 2)) - 1));
- return (1.0f - rmd)*sum + rmd*sum2;
- }
- else {
- sum *= ((float)(1 << n)/(float)((1 << (n+1)) - 1));
- return sum;
- }
+ return (1.0f - rmd) * sum + rmd * sum2;
+ }
+ else {
+ sum *= ((float)(1 << n) / (float)((1 << (n + 1)) - 1));
+ return sum;
+ }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_types.h b/intern/cycles/kernel/svm/svm_types.h
index 8b15d7bf9f4..d31e4f93696 100644
--- a/intern/cycles/kernel/svm/svm_types.h
+++ b/intern/cycles/kernel/svm/svm_types.h
@@ -38,498 +38,505 @@ CCL_NAMESPACE_BEGIN
*
* Lower the number of group more often the node is used.
*/
-#define NODE_GROUP_LEVEL_0 0
-#define NODE_GROUP_LEVEL_1 1
-#define NODE_GROUP_LEVEL_2 2
-#define NODE_GROUP_LEVEL_3 3
-#define NODE_GROUP_LEVEL_MAX NODE_GROUP_LEVEL_3
-
-#define NODE_FEATURE_VOLUME (1 << 0)
-#define NODE_FEATURE_HAIR (1 << 1)
-#define NODE_FEATURE_BUMP (1 << 2)
+#define NODE_GROUP_LEVEL_0 0
+#define NODE_GROUP_LEVEL_1 1
+#define NODE_GROUP_LEVEL_2 2
+#define NODE_GROUP_LEVEL_3 3
+#define NODE_GROUP_LEVEL_MAX NODE_GROUP_LEVEL_3
+
+#define NODE_FEATURE_VOLUME (1 << 0)
+#define NODE_FEATURE_HAIR (1 << 1)
+#define NODE_FEATURE_BUMP (1 << 2)
#define NODE_FEATURE_BUMP_STATE (1 << 3)
/* TODO(sergey): Consider using something like ((uint)(-1)).
* Need to check carefully operand types around usage of this
* define first.
*/
-#define NODE_FEATURE_ALL (NODE_FEATURE_VOLUME|NODE_FEATURE_HAIR|NODE_FEATURE_BUMP|NODE_FEATURE_BUMP_STATE)
+#define NODE_FEATURE_ALL \
+ (NODE_FEATURE_VOLUME | NODE_FEATURE_HAIR | NODE_FEATURE_BUMP | NODE_FEATURE_BUMP_STATE)
typedef enum ShaderNodeType {
- NODE_END = 0,
- NODE_CLOSURE_BSDF,
- NODE_CLOSURE_EMISSION,
- NODE_CLOSURE_BACKGROUND,
- NODE_CLOSURE_SET_WEIGHT,
- NODE_CLOSURE_WEIGHT,
- NODE_MIX_CLOSURE,
- NODE_JUMP_IF_ZERO,
- NODE_JUMP_IF_ONE,
- NODE_TEX_IMAGE,
- NODE_TEX_IMAGE_BOX,
- NODE_TEX_SKY,
- NODE_GEOMETRY,
- NODE_GEOMETRY_DUPLI,
- NODE_LIGHT_PATH,
- NODE_VALUE_F,
- NODE_VALUE_V,
- NODE_MIX,
- NODE_ATTR,
- NODE_CONVERT,
- NODE_FRESNEL,
- NODE_WIREFRAME,
- NODE_WAVELENGTH,
- NODE_BLACKBODY,
- NODE_EMISSION_WEIGHT,
- NODE_TEX_GRADIENT,
- NODE_TEX_VORONOI,
- NODE_TEX_MUSGRAVE,
- NODE_TEX_WAVE,
- NODE_TEX_MAGIC,
- NODE_TEX_NOISE,
- NODE_SHADER_JUMP,
- NODE_SET_DISPLACEMENT,
- NODE_GEOMETRY_BUMP_DX,
- NODE_GEOMETRY_BUMP_DY,
- NODE_SET_BUMP,
- NODE_MATH,
- NODE_VECTOR_MATH,
- NODE_VECTOR_TRANSFORM,
- NODE_MAPPING,
- NODE_TEX_COORD,
- NODE_TEX_COORD_BUMP_DX,
- NODE_TEX_COORD_BUMP_DY,
- NODE_ATTR_BUMP_DX,
- NODE_ATTR_BUMP_DY,
- NODE_TEX_ENVIRONMENT,
- NODE_CLOSURE_HOLDOUT,
- NODE_LAYER_WEIGHT,
- NODE_CLOSURE_VOLUME,
- NODE_SEPARATE_VECTOR,
- NODE_COMBINE_VECTOR,
- NODE_SEPARATE_HSV,
- NODE_COMBINE_HSV,
- NODE_HSV,
- NODE_CAMERA,
- NODE_INVERT,
- NODE_NORMAL,
- NODE_GAMMA,
- NODE_TEX_CHECKER,
- NODE_BRIGHTCONTRAST,
- NODE_RGB_RAMP,
- NODE_RGB_CURVES,
- NODE_VECTOR_CURVES,
- NODE_MIN_MAX,
- NODE_LIGHT_FALLOFF,
- NODE_OBJECT_INFO,
- NODE_PARTICLE_INFO,
- NODE_TEX_BRICK,
- NODE_CLOSURE_SET_NORMAL,
- NODE_AMBIENT_OCCLUSION,
- NODE_TANGENT,
- NODE_NORMAL_MAP,
- NODE_HAIR_INFO,
- NODE_UVMAP,
- NODE_TEX_VOXEL,
- NODE_ENTER_BUMP_EVAL,
- NODE_LEAVE_BUMP_EVAL,
- NODE_BEVEL,
- NODE_DISPLACEMENT,
- NODE_VECTOR_DISPLACEMENT,
- NODE_PRINCIPLED_VOLUME,
- NODE_IES,
+ NODE_END = 0,
+ NODE_CLOSURE_BSDF,
+ NODE_CLOSURE_EMISSION,
+ NODE_CLOSURE_BACKGROUND,
+ NODE_CLOSURE_SET_WEIGHT,
+ NODE_CLOSURE_WEIGHT,
+ NODE_MIX_CLOSURE,
+ NODE_JUMP_IF_ZERO,
+ NODE_JUMP_IF_ONE,
+ NODE_TEX_IMAGE,
+ NODE_TEX_IMAGE_BOX,
+ NODE_TEX_SKY,
+ NODE_GEOMETRY,
+ NODE_GEOMETRY_DUPLI,
+ NODE_LIGHT_PATH,
+ NODE_VALUE_F,
+ NODE_VALUE_V,
+ NODE_MIX,
+ NODE_ATTR,
+ NODE_CONVERT,
+ NODE_FRESNEL,
+ NODE_WIREFRAME,
+ NODE_WAVELENGTH,
+ NODE_BLACKBODY,
+ NODE_EMISSION_WEIGHT,
+ NODE_TEX_GRADIENT,
+ NODE_TEX_VORONOI,
+ NODE_TEX_MUSGRAVE,
+ NODE_TEX_WAVE,
+ NODE_TEX_MAGIC,
+ NODE_TEX_NOISE,
+ NODE_SHADER_JUMP,
+ NODE_SET_DISPLACEMENT,
+ NODE_GEOMETRY_BUMP_DX,
+ NODE_GEOMETRY_BUMP_DY,
+ NODE_SET_BUMP,
+ NODE_MATH,
+ NODE_VECTOR_MATH,
+ NODE_VECTOR_TRANSFORM,
+ NODE_MAPPING,
+ NODE_TEX_COORD,
+ NODE_TEX_COORD_BUMP_DX,
+ NODE_TEX_COORD_BUMP_DY,
+ NODE_ATTR_BUMP_DX,
+ NODE_ATTR_BUMP_DY,
+ NODE_TEX_ENVIRONMENT,
+ NODE_CLOSURE_HOLDOUT,
+ NODE_LAYER_WEIGHT,
+ NODE_CLOSURE_VOLUME,
+ NODE_SEPARATE_VECTOR,
+ NODE_COMBINE_VECTOR,
+ NODE_SEPARATE_HSV,
+ NODE_COMBINE_HSV,
+ NODE_HSV,
+ NODE_CAMERA,
+ NODE_INVERT,
+ NODE_NORMAL,
+ NODE_GAMMA,
+ NODE_TEX_CHECKER,
+ NODE_BRIGHTCONTRAST,
+ NODE_RGB_RAMP,
+ NODE_RGB_CURVES,
+ NODE_VECTOR_CURVES,
+ NODE_MIN_MAX,
+ NODE_LIGHT_FALLOFF,
+ NODE_OBJECT_INFO,
+ NODE_PARTICLE_INFO,
+ NODE_TEX_BRICK,
+ NODE_CLOSURE_SET_NORMAL,
+ NODE_AMBIENT_OCCLUSION,
+ NODE_TANGENT,
+ NODE_NORMAL_MAP,
+ NODE_HAIR_INFO,
+ NODE_UVMAP,
+ NODE_TEX_VOXEL,
+ NODE_ENTER_BUMP_EVAL,
+ NODE_LEAVE_BUMP_EVAL,
+ NODE_BEVEL,
+ NODE_DISPLACEMENT,
+ NODE_VECTOR_DISPLACEMENT,
+ NODE_PRINCIPLED_VOLUME,
+ NODE_IES,
} ShaderNodeType;
typedef enum NodeAttributeType {
- NODE_ATTR_FLOAT = 0,
- NODE_ATTR_FLOAT2,
- NODE_ATTR_FLOAT3,
- NODE_ATTR_MATRIX
+ NODE_ATTR_FLOAT = 0,
+ NODE_ATTR_FLOAT2,
+ NODE_ATTR_FLOAT3,
+ NODE_ATTR_MATRIX
} NodeAttributeType;
typedef enum NodeGeometry {
- NODE_GEOM_P = 0,
- NODE_GEOM_N,
- NODE_GEOM_T,
- NODE_GEOM_I,
- NODE_GEOM_Ng,
- NODE_GEOM_uv
+ NODE_GEOM_P = 0,
+ NODE_GEOM_N,
+ NODE_GEOM_T,
+ NODE_GEOM_I,
+ NODE_GEOM_Ng,
+ NODE_GEOM_uv
} NodeGeometry;
typedef enum NodeObjectInfo {
- NODE_INFO_OB_LOCATION,
- NODE_INFO_OB_INDEX,
- NODE_INFO_MAT_INDEX,
- NODE_INFO_OB_RANDOM
+ NODE_INFO_OB_LOCATION,
+ NODE_INFO_OB_INDEX,
+ NODE_INFO_MAT_INDEX,
+ NODE_INFO_OB_RANDOM
} NodeObjectInfo;
typedef enum NodeParticleInfo {
- NODE_INFO_PAR_INDEX,
- NODE_INFO_PAR_RANDOM,
- NODE_INFO_PAR_AGE,
- NODE_INFO_PAR_LIFETIME,
- NODE_INFO_PAR_LOCATION,
- NODE_INFO_PAR_ROTATION,
- NODE_INFO_PAR_SIZE,
- NODE_INFO_PAR_VELOCITY,
- NODE_INFO_PAR_ANGULAR_VELOCITY
+ NODE_INFO_PAR_INDEX,
+ NODE_INFO_PAR_RANDOM,
+ NODE_INFO_PAR_AGE,
+ NODE_INFO_PAR_LIFETIME,
+ NODE_INFO_PAR_LOCATION,
+ NODE_INFO_PAR_ROTATION,
+ NODE_INFO_PAR_SIZE,
+ NODE_INFO_PAR_VELOCITY,
+ NODE_INFO_PAR_ANGULAR_VELOCITY
} NodeParticleInfo;
typedef enum NodeHairInfo {
- NODE_INFO_CURVE_IS_STRAND,
- NODE_INFO_CURVE_INTERCEPT,
- NODE_INFO_CURVE_THICKNESS,
- /*fade for minimum hair width transpency*/
- /*NODE_INFO_CURVE_FADE,*/
- NODE_INFO_CURVE_TANGENT_NORMAL,
- NODE_INFO_CURVE_RANDOM,
+ NODE_INFO_CURVE_IS_STRAND,
+ NODE_INFO_CURVE_INTERCEPT,
+ NODE_INFO_CURVE_THICKNESS,
+ /*fade for minimum hair width transpency*/
+ /*NODE_INFO_CURVE_FADE,*/
+ NODE_INFO_CURVE_TANGENT_NORMAL,
+ NODE_INFO_CURVE_RANDOM,
} NodeHairInfo;
typedef enum NodeLightPath {
- NODE_LP_camera = 0,
- NODE_LP_shadow,
- NODE_LP_diffuse,
- NODE_LP_glossy,
- NODE_LP_singular,
- NODE_LP_reflection,
- NODE_LP_transmission,
- NODE_LP_volume_scatter,
- NODE_LP_backfacing,
- NODE_LP_ray_length,
- NODE_LP_ray_depth,
- NODE_LP_ray_diffuse,
- NODE_LP_ray_glossy,
- NODE_LP_ray_transparent,
- NODE_LP_ray_transmission,
+ NODE_LP_camera = 0,
+ NODE_LP_shadow,
+ NODE_LP_diffuse,
+ NODE_LP_glossy,
+ NODE_LP_singular,
+ NODE_LP_reflection,
+ NODE_LP_transmission,
+ NODE_LP_volume_scatter,
+ NODE_LP_backfacing,
+ NODE_LP_ray_length,
+ NODE_LP_ray_depth,
+ NODE_LP_ray_diffuse,
+ NODE_LP_ray_glossy,
+ NODE_LP_ray_transparent,
+ NODE_LP_ray_transmission,
} NodeLightPath;
typedef enum NodeLightFalloff {
- NODE_LIGHT_FALLOFF_QUADRATIC,
- NODE_LIGHT_FALLOFF_LINEAR,
- NODE_LIGHT_FALLOFF_CONSTANT
+ NODE_LIGHT_FALLOFF_QUADRATIC,
+ NODE_LIGHT_FALLOFF_LINEAR,
+ NODE_LIGHT_FALLOFF_CONSTANT
} NodeLightFalloff;
typedef enum NodeTexCoord {
- NODE_TEXCO_NORMAL,
- NODE_TEXCO_OBJECT,
- NODE_TEXCO_CAMERA,
- NODE_TEXCO_WINDOW,
- NODE_TEXCO_REFLECTION,
- NODE_TEXCO_DUPLI_GENERATED,
- NODE_TEXCO_DUPLI_UV,
- NODE_TEXCO_VOLUME_GENERATED
+ NODE_TEXCO_NORMAL,
+ NODE_TEXCO_OBJECT,
+ NODE_TEXCO_CAMERA,
+ NODE_TEXCO_WINDOW,
+ NODE_TEXCO_REFLECTION,
+ NODE_TEXCO_DUPLI_GENERATED,
+ NODE_TEXCO_DUPLI_UV,
+ NODE_TEXCO_VOLUME_GENERATED
} NodeTexCoord;
typedef enum NodeMix {
- NODE_MIX_BLEND = 0,
- NODE_MIX_ADD,
- NODE_MIX_MUL,
- NODE_MIX_SUB,
- NODE_MIX_SCREEN,
- NODE_MIX_DIV,
- NODE_MIX_DIFF,
- NODE_MIX_DARK,
- NODE_MIX_LIGHT,
- NODE_MIX_OVERLAY,
- NODE_MIX_DODGE,
- NODE_MIX_BURN,
- NODE_MIX_HUE,
- NODE_MIX_SAT,
- NODE_MIX_VAL,
- NODE_MIX_COLOR,
- NODE_MIX_SOFT,
- NODE_MIX_LINEAR,
- NODE_MIX_CLAMP /* used for the clamp UI option */
+ NODE_MIX_BLEND = 0,
+ NODE_MIX_ADD,
+ NODE_MIX_MUL,
+ NODE_MIX_SUB,
+ NODE_MIX_SCREEN,
+ NODE_MIX_DIV,
+ NODE_MIX_DIFF,
+ NODE_MIX_DARK,
+ NODE_MIX_LIGHT,
+ NODE_MIX_OVERLAY,
+ NODE_MIX_DODGE,
+ NODE_MIX_BURN,
+ NODE_MIX_HUE,
+ NODE_MIX_SAT,
+ NODE_MIX_VAL,
+ NODE_MIX_COLOR,
+ NODE_MIX_SOFT,
+ NODE_MIX_LINEAR,
+ NODE_MIX_CLAMP /* used for the clamp UI option */
} NodeMix;
typedef enum NodeMath {
- NODE_MATH_ADD,
- NODE_MATH_SUBTRACT,
- NODE_MATH_MULTIPLY,
- NODE_MATH_DIVIDE,
- NODE_MATH_SINE,
- NODE_MATH_COSINE,
- NODE_MATH_TANGENT,
- NODE_MATH_ARCSINE,
- NODE_MATH_ARCCOSINE,
- NODE_MATH_ARCTANGENT,
- NODE_MATH_POWER,
- NODE_MATH_LOGARITHM,
- NODE_MATH_MINIMUM,
- NODE_MATH_MAXIMUM,
- NODE_MATH_ROUND,
- NODE_MATH_LESS_THAN,
- NODE_MATH_GREATER_THAN,
- NODE_MATH_MODULO,
- NODE_MATH_ABSOLUTE,
- NODE_MATH_ARCTAN2,
- NODE_MATH_FLOOR,
- NODE_MATH_CEIL,
- NODE_MATH_FRACT,
- NODE_MATH_SQRT,
- NODE_MATH_CLAMP /* used for the clamp UI option */
+ NODE_MATH_ADD,
+ NODE_MATH_SUBTRACT,
+ NODE_MATH_MULTIPLY,
+ NODE_MATH_DIVIDE,
+ NODE_MATH_SINE,
+ NODE_MATH_COSINE,
+ NODE_MATH_TANGENT,
+ NODE_MATH_ARCSINE,
+ NODE_MATH_ARCCOSINE,
+ NODE_MATH_ARCTANGENT,
+ NODE_MATH_POWER,
+ NODE_MATH_LOGARITHM,
+ NODE_MATH_MINIMUM,
+ NODE_MATH_MAXIMUM,
+ NODE_MATH_ROUND,
+ NODE_MATH_LESS_THAN,
+ NODE_MATH_GREATER_THAN,
+ NODE_MATH_MODULO,
+ NODE_MATH_ABSOLUTE,
+ NODE_MATH_ARCTAN2,
+ NODE_MATH_FLOOR,
+ NODE_MATH_CEIL,
+ NODE_MATH_FRACT,
+ NODE_MATH_SQRT,
+ NODE_MATH_CLAMP /* used for the clamp UI option */
} NodeMath;
typedef enum NodeVectorMath {
- NODE_VECTOR_MATH_ADD,
- NODE_VECTOR_MATH_SUBTRACT,
- NODE_VECTOR_MATH_AVERAGE,
- NODE_VECTOR_MATH_DOT_PRODUCT,
- NODE_VECTOR_MATH_CROSS_PRODUCT,
- NODE_VECTOR_MATH_NORMALIZE
+ NODE_VECTOR_MATH_ADD,
+ NODE_VECTOR_MATH_SUBTRACT,
+ NODE_VECTOR_MATH_AVERAGE,
+ NODE_VECTOR_MATH_DOT_PRODUCT,
+ NODE_VECTOR_MATH_CROSS_PRODUCT,
+ NODE_VECTOR_MATH_NORMALIZE
} NodeVectorMath;
typedef enum NodeVectorTransformType {
- NODE_VECTOR_TRANSFORM_TYPE_VECTOR,
- NODE_VECTOR_TRANSFORM_TYPE_POINT,
- NODE_VECTOR_TRANSFORM_TYPE_NORMAL
+ NODE_VECTOR_TRANSFORM_TYPE_VECTOR,
+ NODE_VECTOR_TRANSFORM_TYPE_POINT,
+ NODE_VECTOR_TRANSFORM_TYPE_NORMAL
} NodeVectorTransformType;
typedef enum NodeVectorTransformConvertSpace {
- NODE_VECTOR_TRANSFORM_CONVERT_SPACE_WORLD,
- NODE_VECTOR_TRANSFORM_CONVERT_SPACE_OBJECT,
- NODE_VECTOR_TRANSFORM_CONVERT_SPACE_CAMERA
+ NODE_VECTOR_TRANSFORM_CONVERT_SPACE_WORLD,
+ NODE_VECTOR_TRANSFORM_CONVERT_SPACE_OBJECT,
+ NODE_VECTOR_TRANSFORM_CONVERT_SPACE_CAMERA
} NodeVectorTransformConvertSpace;
typedef enum NodeConvert {
- NODE_CONVERT_FV,
- NODE_CONVERT_FI,
- NODE_CONVERT_CF,
- NODE_CONVERT_CI,
- NODE_CONVERT_VF,
- NODE_CONVERT_VI,
- NODE_CONVERT_IF,
- NODE_CONVERT_IV
+ NODE_CONVERT_FV,
+ NODE_CONVERT_FI,
+ NODE_CONVERT_CF,
+ NODE_CONVERT_CI,
+ NODE_CONVERT_VF,
+ NODE_CONVERT_VI,
+ NODE_CONVERT_IF,
+ NODE_CONVERT_IV
} NodeConvert;
typedef enum NodeMusgraveType {
- NODE_MUSGRAVE_MULTIFRACTAL,
- NODE_MUSGRAVE_FBM,
- NODE_MUSGRAVE_HYBRID_MULTIFRACTAL,
- NODE_MUSGRAVE_RIDGED_MULTIFRACTAL,
- NODE_MUSGRAVE_HETERO_TERRAIN
+ NODE_MUSGRAVE_MULTIFRACTAL,
+ NODE_MUSGRAVE_FBM,
+ NODE_MUSGRAVE_HYBRID_MULTIFRACTAL,
+ NODE_MUSGRAVE_RIDGED_MULTIFRACTAL,
+ NODE_MUSGRAVE_HETERO_TERRAIN
} NodeMusgraveType;
-typedef enum NodeWaveType {
- NODE_WAVE_BANDS,
- NODE_WAVE_RINGS
-} NodeWaveType;
+typedef enum NodeWaveType { NODE_WAVE_BANDS, NODE_WAVE_RINGS } NodeWaveType;
typedef enum NodeWaveProfiles {
- NODE_WAVE_PROFILE_SIN,
- NODE_WAVE_PROFILE_SAW,
+ NODE_WAVE_PROFILE_SIN,
+ NODE_WAVE_PROFILE_SAW,
} NodeWaveProfile;
-typedef enum NodeSkyType {
- NODE_SKY_OLD,
- NODE_SKY_NEW
-} NodeSkyType;
+typedef enum NodeSkyType { NODE_SKY_OLD, NODE_SKY_NEW } NodeSkyType;
typedef enum NodeGradientType {
- NODE_BLEND_LINEAR,
- NODE_BLEND_QUADRATIC,
- NODE_BLEND_EASING,
- NODE_BLEND_DIAGONAL,
- NODE_BLEND_RADIAL,
- NODE_BLEND_QUADRATIC_SPHERE,
- NODE_BLEND_SPHERICAL
+ NODE_BLEND_LINEAR,
+ NODE_BLEND_QUADRATIC,
+ NODE_BLEND_EASING,
+ NODE_BLEND_DIAGONAL,
+ NODE_BLEND_RADIAL,
+ NODE_BLEND_QUADRATIC_SPHERE,
+ NODE_BLEND_SPHERICAL
} NodeGradientType;
typedef enum NodeVoronoiColoring {
- NODE_VORONOI_INTENSITY,
- NODE_VORONOI_CELLS
+ NODE_VORONOI_INTENSITY,
+ NODE_VORONOI_CELLS
} NodeVoronoiColoring;
typedef enum NodeVoronoiDistanceMetric {
- NODE_VORONOI_DISTANCE,
- NODE_VORONOI_MANHATTAN,
- NODE_VORONOI_CHEBYCHEV,
- NODE_VORONOI_MINKOWSKI
+ NODE_VORONOI_DISTANCE,
+ NODE_VORONOI_MANHATTAN,
+ NODE_VORONOI_CHEBYCHEV,
+ NODE_VORONOI_MINKOWSKI
} NodeVoronoiDistanceMetric;
typedef enum NodeVoronoiFeature {
- NODE_VORONOI_F1,
- NODE_VORONOI_F2,
- NODE_VORONOI_F3,
- NODE_VORONOI_F4,
- NODE_VORONOI_F2F1
+ NODE_VORONOI_F1,
+ NODE_VORONOI_F2,
+ NODE_VORONOI_F3,
+ NODE_VORONOI_F4,
+ NODE_VORONOI_F2F1
} NodeVoronoiFeature;
typedef enum NodeBlendWeightType {
- NODE_LAYER_WEIGHT_FRESNEL,
- NODE_LAYER_WEIGHT_FACING
+ NODE_LAYER_WEIGHT_FRESNEL,
+ NODE_LAYER_WEIGHT_FACING
} NodeBlendWeightType;
typedef enum NodeTangentDirectionType {
- NODE_TANGENT_RADIAL,
- NODE_TANGENT_UVMAP
+ NODE_TANGENT_RADIAL,
+ NODE_TANGENT_UVMAP
} NodeTangentDirectionType;
typedef enum NodeTangentAxis {
- NODE_TANGENT_AXIS_X,
- NODE_TANGENT_AXIS_Y,
- NODE_TANGENT_AXIS_Z
+ NODE_TANGENT_AXIS_X,
+ NODE_TANGENT_AXIS_Y,
+ NODE_TANGENT_AXIS_Z
} NodeTangentAxis;
typedef enum NodeNormalMapSpace {
- NODE_NORMAL_MAP_TANGENT,
- NODE_NORMAL_MAP_OBJECT,
- NODE_NORMAL_MAP_WORLD,
- NODE_NORMAL_MAP_BLENDER_OBJECT,
- NODE_NORMAL_MAP_BLENDER_WORLD,
+ NODE_NORMAL_MAP_TANGENT,
+ NODE_NORMAL_MAP_OBJECT,
+ NODE_NORMAL_MAP_WORLD,
+ NODE_NORMAL_MAP_BLENDER_OBJECT,
+ NODE_NORMAL_MAP_BLENDER_WORLD,
} NodeNormalMapSpace;
typedef enum NodeImageColorSpace {
- NODE_COLOR_SPACE_NONE = 0,
- NODE_COLOR_SPACE_COLOR = 1,
+ NODE_COLOR_SPACE_NONE = 0,
+ NODE_COLOR_SPACE_COLOR = 1,
} NodeImageColorSpace;
typedef enum NodeImageProjection {
- NODE_IMAGE_PROJ_FLAT = 0,
- NODE_IMAGE_PROJ_BOX = 1,
- NODE_IMAGE_PROJ_SPHERE = 2,
- NODE_IMAGE_PROJ_TUBE = 3,
+ NODE_IMAGE_PROJ_FLAT = 0,
+ NODE_IMAGE_PROJ_BOX = 1,
+ NODE_IMAGE_PROJ_SPHERE = 2,
+ NODE_IMAGE_PROJ_TUBE = 3,
} NodeImageProjection;
typedef enum NodeEnvironmentProjection {
- NODE_ENVIRONMENT_EQUIRECTANGULAR = 0,
- NODE_ENVIRONMENT_MIRROR_BALL = 1,
+ NODE_ENVIRONMENT_EQUIRECTANGULAR = 0,
+ NODE_ENVIRONMENT_MIRROR_BALL = 1,
} NodeEnvironmentProjection;
typedef enum NodeBumpOffset {
- NODE_BUMP_OFFSET_CENTER,
- NODE_BUMP_OFFSET_DX,
- NODE_BUMP_OFFSET_DY,
+ NODE_BUMP_OFFSET_CENTER,
+ NODE_BUMP_OFFSET_DX,
+ NODE_BUMP_OFFSET_DY,
} NodeBumpOffset;
typedef enum NodeTexVoxelSpace {
- NODE_TEX_VOXEL_SPACE_OBJECT = 0,
- NODE_TEX_VOXEL_SPACE_WORLD = 1,
+ NODE_TEX_VOXEL_SPACE_OBJECT = 0,
+ NODE_TEX_VOXEL_SPACE_WORLD = 1,
} NodeTexVoxelSpace;
typedef enum NodeAO {
- NODE_AO_ONLY_LOCAL = (1 << 0),
- NODE_AO_INSIDE = (1 << 1),
- NODE_AO_GLOBAL_RADIUS = (1 << 2),
+ NODE_AO_ONLY_LOCAL = (1 << 0),
+ NODE_AO_INSIDE = (1 << 1),
+ NODE_AO_GLOBAL_RADIUS = (1 << 2),
} NodeAO;
typedef enum ShaderType {
- SHADER_TYPE_SURFACE,
- SHADER_TYPE_VOLUME,
- SHADER_TYPE_DISPLACEMENT,
- SHADER_TYPE_BUMP,
+ SHADER_TYPE_SURFACE,
+ SHADER_TYPE_VOLUME,
+ SHADER_TYPE_DISPLACEMENT,
+ SHADER_TYPE_BUMP,
} ShaderType;
typedef enum NodePrincipledHairParametrization {
- NODE_PRINCIPLED_HAIR_REFLECTANCE = 0,
- NODE_PRINCIPLED_HAIR_PIGMENT_CONCENTRATION = 1,
- NODE_PRINCIPLED_HAIR_DIRECT_ABSORPTION = 2,
- NODE_PRINCIPLED_HAIR_NUM,
+ NODE_PRINCIPLED_HAIR_REFLECTANCE = 0,
+ NODE_PRINCIPLED_HAIR_PIGMENT_CONCENTRATION = 1,
+ NODE_PRINCIPLED_HAIR_DIRECT_ABSORPTION = 2,
+ NODE_PRINCIPLED_HAIR_NUM,
} NodePrincipledHairParametrization;
/* Closure */
typedef enum ClosureType {
- /* Special type, flags generic node as a non-BSDF. */
- CLOSURE_NONE_ID,
-
- CLOSURE_BSDF_ID,
-
- /* Diffuse */
- CLOSURE_BSDF_DIFFUSE_ID,
- CLOSURE_BSDF_OREN_NAYAR_ID,
- CLOSURE_BSDF_DIFFUSE_RAMP_ID,
- CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID,
- CLOSURE_BSDF_PRINCIPLED_SHEEN_ID,
- CLOSURE_BSDF_DIFFUSE_TOON_ID,
-
- /* Glossy */
- CLOSURE_BSDF_REFLECTION_ID,
- CLOSURE_BSDF_MICROFACET_GGX_ID,
- CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID,
- CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID,
- CLOSURE_BSDF_MICROFACET_BECKMANN_ID,
- CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID,
- CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID,
- CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID,
- CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID,
- CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID,
- CLOSURE_BSDF_MICROFACET_MULTI_GGX_ANISO_ID,
- CLOSURE_BSDF_MICROFACET_MULTI_GGX_ANISO_FRESNEL_ID,
- CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID,
- CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID,
- CLOSURE_BSDF_ASHIKHMIN_VELVET_ID,
- CLOSURE_BSDF_PHONG_RAMP_ID,
- CLOSURE_BSDF_GLOSSY_TOON_ID,
- CLOSURE_BSDF_HAIR_REFLECTION_ID,
-
- /* Transmission */
- CLOSURE_BSDF_TRANSLUCENT_ID,
- CLOSURE_BSDF_REFRACTION_ID,
- CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID,
- CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID,
- CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID,
- CLOSURE_BSDF_MICROFACET_BECKMANN_GLASS_ID,
- CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID,
- CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID,
- CLOSURE_BSDF_SHARP_GLASS_ID,
- CLOSURE_BSDF_HAIR_PRINCIPLED_ID,
- CLOSURE_BSDF_HAIR_TRANSMISSION_ID,
-
- /* Special cases */
- CLOSURE_BSDF_BSSRDF_ID,
- CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID,
- CLOSURE_BSDF_TRANSPARENT_ID,
-
- /* BSSRDF */
- CLOSURE_BSSRDF_CUBIC_ID,
- CLOSURE_BSSRDF_GAUSSIAN_ID,
- CLOSURE_BSSRDF_PRINCIPLED_ID,
- CLOSURE_BSSRDF_BURLEY_ID,
- CLOSURE_BSSRDF_RANDOM_WALK_ID,
- CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID,
-
- /* Other */
- CLOSURE_HOLDOUT_ID,
-
- /* Volume */
- CLOSURE_VOLUME_ID,
- CLOSURE_VOLUME_ABSORPTION_ID,
- CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID,
-
- CLOSURE_BSDF_PRINCIPLED_ID,
-
- NBUILTIN_CLOSURES
+ /* Special type, flags generic node as a non-BSDF. */
+ CLOSURE_NONE_ID,
+
+ CLOSURE_BSDF_ID,
+
+ /* Diffuse */
+ CLOSURE_BSDF_DIFFUSE_ID,
+ CLOSURE_BSDF_OREN_NAYAR_ID,
+ CLOSURE_BSDF_DIFFUSE_RAMP_ID,
+ CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID,
+ CLOSURE_BSDF_PRINCIPLED_SHEEN_ID,
+ CLOSURE_BSDF_DIFFUSE_TOON_ID,
+
+ /* Glossy */
+ CLOSURE_BSDF_REFLECTION_ID,
+ CLOSURE_BSDF_MICROFACET_GGX_ID,
+ CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID,
+ CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID,
+ CLOSURE_BSDF_MICROFACET_BECKMANN_ID,
+ CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID,
+ CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID,
+ CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID,
+ CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID,
+ CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID,
+ CLOSURE_BSDF_MICROFACET_MULTI_GGX_ANISO_ID,
+ CLOSURE_BSDF_MICROFACET_MULTI_GGX_ANISO_FRESNEL_ID,
+ CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID,
+ CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID,
+ CLOSURE_BSDF_ASHIKHMIN_VELVET_ID,
+ CLOSURE_BSDF_PHONG_RAMP_ID,
+ CLOSURE_BSDF_GLOSSY_TOON_ID,
+ CLOSURE_BSDF_HAIR_REFLECTION_ID,
+
+ /* Transmission */
+ CLOSURE_BSDF_TRANSLUCENT_ID,
+ CLOSURE_BSDF_REFRACTION_ID,
+ CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID,
+ CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID,
+ CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID,
+ CLOSURE_BSDF_MICROFACET_BECKMANN_GLASS_ID,
+ CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID,
+ CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID,
+ CLOSURE_BSDF_SHARP_GLASS_ID,
+ CLOSURE_BSDF_HAIR_PRINCIPLED_ID,
+ CLOSURE_BSDF_HAIR_TRANSMISSION_ID,
+
+ /* Special cases */
+ CLOSURE_BSDF_BSSRDF_ID,
+ CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID,
+ CLOSURE_BSDF_TRANSPARENT_ID,
+
+ /* BSSRDF */
+ CLOSURE_BSSRDF_CUBIC_ID,
+ CLOSURE_BSSRDF_GAUSSIAN_ID,
+ CLOSURE_BSSRDF_PRINCIPLED_ID,
+ CLOSURE_BSSRDF_BURLEY_ID,
+ CLOSURE_BSSRDF_RANDOM_WALK_ID,
+ CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID,
+
+ /* Other */
+ CLOSURE_HOLDOUT_ID,
+
+ /* Volume */
+ CLOSURE_VOLUME_ID,
+ CLOSURE_VOLUME_ABSORPTION_ID,
+ CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID,
+
+ CLOSURE_BSDF_PRINCIPLED_ID,
+
+ NBUILTIN_CLOSURES
} ClosureType;
/* watch this, being lazy with memory usage */
#define CLOSURE_IS_BSDF(type) (type <= CLOSURE_BSDF_TRANSPARENT_ID)
-#define CLOSURE_IS_BSDF_DIFFUSE(type) (type >= CLOSURE_BSDF_DIFFUSE_ID && type <= CLOSURE_BSDF_DIFFUSE_TOON_ID)
-#define CLOSURE_IS_BSDF_GLOSSY(type) ((type >= CLOSURE_BSDF_REFLECTION_ID && type <= CLOSURE_BSDF_HAIR_REFLECTION_ID )|| (type == CLOSURE_BSDF_HAIR_PRINCIPLED_ID))
-#define CLOSURE_IS_BSDF_TRANSMISSION(type) (type >= CLOSURE_BSDF_TRANSLUCENT_ID && type <= CLOSURE_BSDF_HAIR_TRANSMISSION_ID)
-#define CLOSURE_IS_BSDF_BSSRDF(type) (type == CLOSURE_BSDF_BSSRDF_ID || type == CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID)
-#define CLOSURE_IS_BSDF_SINGULAR(type) (type == CLOSURE_BSDF_REFLECTION_ID || \
- type == CLOSURE_BSDF_REFRACTION_ID || \
- type == CLOSURE_BSDF_TRANSPARENT_ID)
+#define CLOSURE_IS_BSDF_DIFFUSE(type) \
+ (type >= CLOSURE_BSDF_DIFFUSE_ID && type <= CLOSURE_BSDF_DIFFUSE_TOON_ID)
+#define CLOSURE_IS_BSDF_GLOSSY(type) \
+ ((type >= CLOSURE_BSDF_REFLECTION_ID && type <= CLOSURE_BSDF_HAIR_REFLECTION_ID) || \
+ (type == CLOSURE_BSDF_HAIR_PRINCIPLED_ID))
+#define CLOSURE_IS_BSDF_TRANSMISSION(type) \
+ (type >= CLOSURE_BSDF_TRANSLUCENT_ID && type <= CLOSURE_BSDF_HAIR_TRANSMISSION_ID)
+#define CLOSURE_IS_BSDF_BSSRDF(type) \
+ (type == CLOSURE_BSDF_BSSRDF_ID || type == CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID)
+#define CLOSURE_IS_BSDF_SINGULAR(type) \
+ (type == CLOSURE_BSDF_REFLECTION_ID || type == CLOSURE_BSDF_REFRACTION_ID || \
+ type == CLOSURE_BSDF_TRANSPARENT_ID)
#define CLOSURE_IS_BSDF_TRANSPARENT(type) (type == CLOSURE_BSDF_TRANSPARENT_ID)
-#define CLOSURE_IS_BSDF_MULTISCATTER(type) (type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID ||\
- type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_ANISO_ID || \
- type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID)
-#define CLOSURE_IS_BSDF_MICROFACET(type) ((type >= CLOSURE_BSDF_MICROFACET_GGX_ID && type <= CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID) ||\
- (type >= CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID && type <= CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID) ||\
- (type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID))
+#define CLOSURE_IS_BSDF_MULTISCATTER(type) \
+ (type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID || \
+ type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_ANISO_ID || \
+ type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID)
+#define CLOSURE_IS_BSDF_MICROFACET(type) \
+ ((type >= CLOSURE_BSDF_MICROFACET_GGX_ID && type <= CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID) || \
+ (type >= CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID && \
+ type <= CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID) || \
+ (type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID))
#define CLOSURE_IS_BSDF_OR_BSSRDF(type) (type <= CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID)
-#define CLOSURE_IS_BSSRDF(type) (type >= CLOSURE_BSSRDF_CUBIC_ID && type <= CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID)
-#define CLOSURE_IS_DISK_BSSRDF(type) (type >= CLOSURE_BSSRDF_CUBIC_ID && type <= CLOSURE_BSSRDF_BURLEY_ID)
-#define CLOSURE_IS_VOLUME(type) (type >= CLOSURE_VOLUME_ID && type <= CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID)
+#define CLOSURE_IS_BSSRDF(type) \
+ (type >= CLOSURE_BSSRDF_CUBIC_ID && type <= CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID)
+#define CLOSURE_IS_DISK_BSSRDF(type) \
+ (type >= CLOSURE_BSSRDF_CUBIC_ID && type <= CLOSURE_BSSRDF_BURLEY_ID)
+#define CLOSURE_IS_VOLUME(type) \
+ (type >= CLOSURE_VOLUME_ID && type <= CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID)
#define CLOSURE_IS_VOLUME_SCATTER(type) (type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID)
#define CLOSURE_IS_VOLUME_ABSORPTION(type) (type == CLOSURE_VOLUME_ABSORPTION_ID)
#define CLOSURE_IS_HOLDOUT(type) (type == CLOSURE_HOLDOUT_ID)
#define CLOSURE_IS_PHASE(type) (type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID)
-#define CLOSURE_IS_GLASS(type) (type >= CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID && type <= CLOSURE_BSDF_SHARP_GLASS_ID)
+#define CLOSURE_IS_GLASS(type) \
+ (type >= CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID && type <= CLOSURE_BSDF_SHARP_GLASS_ID)
#define CLOSURE_IS_PRINCIPLED(type) (type == CLOSURE_BSDF_PRINCIPLED_ID)
#define CLOSURE_WEIGHT_CUTOFF 1e-5f
CCL_NAMESPACE_END
-#endif /* __SVM_TYPES_H__ */
+#endif /* __SVM_TYPES_H__ */
diff --git a/intern/cycles/kernel/svm/svm_value.h b/intern/cycles/kernel/svm/svm_value.h
index 062aee2956e..5b76f2c8832 100644
--- a/intern/cycles/kernel/svm/svm_value.h
+++ b/intern/cycles/kernel/svm/svm_value.h
@@ -18,18 +18,21 @@ CCL_NAMESPACE_BEGIN
/* Value Nodes */
-ccl_device void svm_node_value_f(KernelGlobals *kg, ShaderData *sd, float *stack, uint ivalue, uint out_offset)
+ccl_device void svm_node_value_f(
+ KernelGlobals *kg, ShaderData *sd, float *stack, uint ivalue, uint out_offset)
{
- stack_store_float(stack, out_offset, __uint_as_float(ivalue));
+ stack_store_float(stack, out_offset, __uint_as_float(ivalue));
}
-ccl_device void svm_node_value_v(KernelGlobals *kg, ShaderData *sd, float *stack, uint out_offset, int *offset)
+ccl_device void svm_node_value_v(
+ KernelGlobals *kg, ShaderData *sd, float *stack, uint out_offset, int *offset)
{
- /* read extra data */
- uint4 node1 = read_node(kg, offset);
- float3 p = make_float3(__uint_as_float(node1.y), __uint_as_float(node1.z), __uint_as_float(node1.w));
+ /* read extra data */
+ uint4 node1 = read_node(kg, offset);
+ float3 p = make_float3(
+ __uint_as_float(node1.y), __uint_as_float(node1.z), __uint_as_float(node1.w));
- stack_store_float3(stack, out_offset, p);
+ stack_store_float3(stack, out_offset, p);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_vector_transform.h b/intern/cycles/kernel/svm/svm_vector_transform.h
index f6ec36ba41f..7ec0f07f2e4 100644
--- a/intern/cycles/kernel/svm/svm_vector_transform.h
+++ b/intern/cycles/kernel/svm/svm_vector_transform.h
@@ -18,83 +18,90 @@ CCL_NAMESPACE_BEGIN
/* Vector Transform */
-ccl_device void svm_node_vector_transform(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
+ccl_device void svm_node_vector_transform(KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint4 node)
{
- uint itype, ifrom, ito;
- uint vector_in, vector_out;
+ uint itype, ifrom, ito;
+ uint vector_in, vector_out;
- decode_node_uchar4(node.y, &itype, &ifrom, &ito, NULL);
- decode_node_uchar4(node.z, &vector_in, &vector_out, NULL, NULL);
+ decode_node_uchar4(node.y, &itype, &ifrom, &ito, NULL);
+ decode_node_uchar4(node.z, &vector_in, &vector_out, NULL, NULL);
- float3 in = stack_load_float3(stack, vector_in);
+ float3 in = stack_load_float3(stack, vector_in);
- NodeVectorTransformType type = (NodeVectorTransformType)itype;
- NodeVectorTransformConvertSpace from = (NodeVectorTransformConvertSpace)ifrom;
- NodeVectorTransformConvertSpace to = (NodeVectorTransformConvertSpace)ito;
+ NodeVectorTransformType type = (NodeVectorTransformType)itype;
+ NodeVectorTransformConvertSpace from = (NodeVectorTransformConvertSpace)ifrom;
+ NodeVectorTransformConvertSpace to = (NodeVectorTransformConvertSpace)ito;
- Transform tfm;
- bool is_object = (sd->object != OBJECT_NONE);
- bool is_direction = (type == NODE_VECTOR_TRANSFORM_TYPE_VECTOR || type == NODE_VECTOR_TRANSFORM_TYPE_NORMAL);
+ Transform tfm;
+ bool is_object = (sd->object != OBJECT_NONE);
+ bool is_direction = (type == NODE_VECTOR_TRANSFORM_TYPE_VECTOR ||
+ type == NODE_VECTOR_TRANSFORM_TYPE_NORMAL);
- /* From world */
- if(from == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_WORLD) {
- if(to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_CAMERA) {
- tfm = kernel_data.cam.worldtocamera;
- if(is_direction)
- in = transform_direction(&tfm, in);
- else
- in = transform_point(&tfm, in);
- }
- else if(to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_OBJECT && is_object) {
- if(is_direction)
- object_inverse_dir_transform(kg, sd, &in);
- else
- object_inverse_position_transform(kg, sd, &in);
- }
- }
+ /* From world */
+ if (from == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_WORLD) {
+ if (to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_CAMERA) {
+ tfm = kernel_data.cam.worldtocamera;
+ if (is_direction)
+ in = transform_direction(&tfm, in);
+ else
+ in = transform_point(&tfm, in);
+ }
+ else if (to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_OBJECT && is_object) {
+ if (is_direction)
+ object_inverse_dir_transform(kg, sd, &in);
+ else
+ object_inverse_position_transform(kg, sd, &in);
+ }
+ }
- /* From camera */
- else if(from == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_CAMERA) {
- if(to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_WORLD || to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_OBJECT) {
- tfm = kernel_data.cam.cameratoworld;
- if(is_direction)
- in = transform_direction(&tfm, in);
- else
- in = transform_point(&tfm, in);
- }
- if(to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_OBJECT && is_object) {
- if(is_direction)
- object_inverse_dir_transform(kg, sd, &in);
- else
- object_inverse_position_transform(kg, sd, &in);
- }
- }
+ /* From camera */
+ else if (from == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_CAMERA) {
+ if (to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_WORLD ||
+ to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_OBJECT) {
+ tfm = kernel_data.cam.cameratoworld;
+ if (is_direction)
+ in = transform_direction(&tfm, in);
+ else
+ in = transform_point(&tfm, in);
+ }
+ if (to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_OBJECT && is_object) {
+ if (is_direction)
+ object_inverse_dir_transform(kg, sd, &in);
+ else
+ object_inverse_position_transform(kg, sd, &in);
+ }
+ }
- /* From object */
- else if(from == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_OBJECT) {
- if((to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_WORLD || to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_CAMERA) && is_object) {
- if(is_direction)
- object_dir_transform(kg, sd, &in);
- else
- object_position_transform(kg, sd, &in);
- }
- if(to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_CAMERA) {
- tfm = kernel_data.cam.worldtocamera;
- if(is_direction)
- in = transform_direction(&tfm, in);
- else
- in = transform_point(&tfm, in);
- }
- }
+ /* From object */
+ else if (from == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_OBJECT) {
+ if ((to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_WORLD ||
+ to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_CAMERA) &&
+ is_object) {
+ if (is_direction)
+ object_dir_transform(kg, sd, &in);
+ else
+ object_position_transform(kg, sd, &in);
+ }
+ if (to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_CAMERA) {
+ tfm = kernel_data.cam.worldtocamera;
+ if (is_direction)
+ in = transform_direction(&tfm, in);
+ else
+ in = transform_point(&tfm, in);
+ }
+ }
- /* Normalize Normal */
- if(type == NODE_VECTOR_TRANSFORM_TYPE_NORMAL)
- in = normalize(in);
+ /* Normalize Normal */
+ if (type == NODE_VECTOR_TRANSFORM_TYPE_NORMAL)
+ in = normalize(in);
- /* Output */
- if(stack_valid(vector_out)) {
- stack_store_float3(stack, vector_out, in);
- }
+ /* Output */
+ if (stack_valid(vector_out)) {
+ stack_store_float3(stack, vector_out, in);
+ }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_voronoi.h b/intern/cycles/kernel/svm/svm_voronoi.h
index d661df54ead..c311aefaf38 100644
--- a/intern/cycles/kernel/svm/svm_voronoi.h
+++ b/intern/cycles/kernel/svm/svm_voronoi.h
@@ -18,143 +18,167 @@ CCL_NAMESPACE_BEGIN
/* Voronoi */
-ccl_device void voronoi_neighbors(float3 p, NodeVoronoiDistanceMetric distance, float e, float da[4], float3 pa[4])
+ccl_device void voronoi_neighbors(
+ float3 p, NodeVoronoiDistanceMetric distance, float e, float da[4], float3 pa[4])
{
- /* Compute the distance to and the position of the closest neighbors to p.
- *
- * The neighbors are randomly placed, 1 each in a 3x3x3 grid (Worley pattern).
- * The distances and points are returned in ascending order, i.e. da[0] and pa[0] will
- * contain the distance to the closest point and its coordinates respectively.
- */
-
- da[0] = 1e10f;
- da[1] = 1e10f;
- da[2] = 1e10f;
- da[3] = 1e10f;
-
- pa[0] = make_float3(0.0f, 0.0f, 0.0f);
- pa[1] = make_float3(0.0f, 0.0f, 0.0f);
- pa[2] = make_float3(0.0f, 0.0f, 0.0f);
- pa[3] = make_float3(0.0f, 0.0f, 0.0f);
-
- int3 xyzi = quick_floor_to_int3(p);
-
- for(int xx = -1; xx <= 1; xx++) {
- for(int yy = -1; yy <= 1; yy++) {
- for(int zz = -1; zz <= 1; zz++) {
- int3 ip = xyzi + make_int3(xx, yy, zz);
- float3 fp = make_float3(ip.x, ip.y, ip.z);
- float3 vp = fp + cellnoise3(fp);
-
- float d;
- switch(distance) {
- case NODE_VORONOI_DISTANCE:
- d = len_squared(p - vp);
- break;
- case NODE_VORONOI_MANHATTAN:
- d = reduce_add(fabs(vp - p));
- break;
- case NODE_VORONOI_CHEBYCHEV:
- d = max3(fabs(vp - p));
- break;
- case NODE_VORONOI_MINKOWSKI: {
- float3 n = fabs(vp - p);
- if(e == 0.5f) {
- d = sqr(reduce_add(sqrt(n)));
- }
- else {
- d = powf(reduce_add(pow3(n, e)), 1.0f/e);
- }
- break;
- }
- }
-
- /* To keep the shortest four distances and associated points we have to keep them in sorted order. */
- if(d < da[0]) {
- da[3] = da[2];
- da[2] = da[1];
- da[1] = da[0];
- da[0] = d;
-
- pa[3] = pa[2];
- pa[2] = pa[1];
- pa[1] = pa[0];
- pa[0] = vp;
- }
- else if(d < da[1]) {
- da[3] = da[2];
- da[2] = da[1];
- da[1] = d;
-
- pa[3] = pa[2];
- pa[2] = pa[1];
- pa[1] = vp;
- }
- else if(d < da[2]) {
- da[3] = da[2];
- da[2] = d;
-
- pa[3] = pa[2];
- pa[2] = vp;
- }
- else if(d < da[3]) {
- da[3] = d;
- pa[3] = vp;
- }
- }
- }
- }
+ /* Compute the distance to and the position of the closest neighbors to p.
+ *
+ * The neighbors are randomly placed, 1 each in a 3x3x3 grid (Worley pattern).
+ * The distances and points are returned in ascending order, i.e. da[0] and pa[0] will
+ * contain the distance to the closest point and its coordinates respectively.
+ */
+
+ da[0] = 1e10f;
+ da[1] = 1e10f;
+ da[2] = 1e10f;
+ da[3] = 1e10f;
+
+ pa[0] = make_float3(0.0f, 0.0f, 0.0f);
+ pa[1] = make_float3(0.0f, 0.0f, 0.0f);
+ pa[2] = make_float3(0.0f, 0.0f, 0.0f);
+ pa[3] = make_float3(0.0f, 0.0f, 0.0f);
+
+ int3 xyzi = quick_floor_to_int3(p);
+
+ for (int xx = -1; xx <= 1; xx++) {
+ for (int yy = -1; yy <= 1; yy++) {
+ for (int zz = -1; zz <= 1; zz++) {
+ int3 ip = xyzi + make_int3(xx, yy, zz);
+ float3 fp = make_float3(ip.x, ip.y, ip.z);
+ float3 vp = fp + cellnoise3(fp);
+
+ float d;
+ switch (distance) {
+ case NODE_VORONOI_DISTANCE:
+ d = len_squared(p - vp);
+ break;
+ case NODE_VORONOI_MANHATTAN:
+ d = reduce_add(fabs(vp - p));
+ break;
+ case NODE_VORONOI_CHEBYCHEV:
+ d = max3(fabs(vp - p));
+ break;
+ case NODE_VORONOI_MINKOWSKI: {
+ float3 n = fabs(vp - p);
+ if (e == 0.5f) {
+ d = sqr(reduce_add(sqrt(n)));
+ }
+ else {
+ d = powf(reduce_add(pow3(n, e)), 1.0f / e);
+ }
+ break;
+ }
+ }
+
+ /* To keep the shortest four distances and associated points we have to keep them in sorted order. */
+ if (d < da[0]) {
+ da[3] = da[2];
+ da[2] = da[1];
+ da[1] = da[0];
+ da[0] = d;
+
+ pa[3] = pa[2];
+ pa[2] = pa[1];
+ pa[1] = pa[0];
+ pa[0] = vp;
+ }
+ else if (d < da[1]) {
+ da[3] = da[2];
+ da[2] = da[1];
+ da[1] = d;
+
+ pa[3] = pa[2];
+ pa[2] = pa[1];
+ pa[1] = vp;
+ }
+ else if (d < da[2]) {
+ da[3] = da[2];
+ da[2] = d;
+
+ pa[3] = pa[2];
+ pa[2] = vp;
+ }
+ else if (d < da[3]) {
+ da[3] = d;
+ pa[3] = vp;
+ }
+ }
+ }
+ }
}
-ccl_device void svm_node_tex_voronoi(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
+ccl_device void svm_node_tex_voronoi(
+ KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
{
- uint4 node2 = read_node(kg, offset);
-
- uint co_offset, coloring, distance, feature;
- uint scale_offset, e_offset, fac_offset, color_offset;
-
- decode_node_uchar4(node.y, &co_offset, &coloring, &distance, &feature);
- decode_node_uchar4(node.z, &scale_offset, &e_offset, &fac_offset, &color_offset);
-
- float3 co = stack_load_float3(stack, co_offset);
- float scale = stack_load_float_default(stack, scale_offset, node2.x);
- float exponent = stack_load_float_default(stack, e_offset, node2.y);
-
- float dist[4];
- float3 neighbor[4];
- voronoi_neighbors(co*scale, (NodeVoronoiDistanceMetric)distance, exponent, dist, neighbor);
-
- float3 color;
- float fac;
- if(coloring == NODE_VORONOI_INTENSITY) {
- switch(feature) {
- case NODE_VORONOI_F1: fac = dist[0]; break;
- case NODE_VORONOI_F2: fac = dist[1]; break;
- case NODE_VORONOI_F3: fac = dist[2]; break;
- case NODE_VORONOI_F4: fac = dist[3]; break;
- case NODE_VORONOI_F2F1: fac = dist[1] - dist[0]; break;
- }
-
- color = make_float3(fac, fac, fac);
- }
- else {
- /* NODE_VORONOI_CELLS */
- switch(feature) {
- case NODE_VORONOI_F1: color = neighbor[0]; break;
- case NODE_VORONOI_F2: color = neighbor[1]; break;
- case NODE_VORONOI_F3: color = neighbor[2]; break;
- case NODE_VORONOI_F4: color = neighbor[3]; break;
- /* Usefulness of this vector is questionable. Note F2 >= F1 but the
- * individual vector components might not be. */
- case NODE_VORONOI_F2F1: color = fabs(neighbor[1] - neighbor[0]); break;
- }
-
- color = cellnoise3(color);
- fac = average(color);
- }
-
- if(stack_valid(fac_offset)) stack_store_float(stack, fac_offset, fac);
- if(stack_valid(color_offset)) stack_store_float3(stack, color_offset, color);
+ uint4 node2 = read_node(kg, offset);
+
+ uint co_offset, coloring, distance, feature;
+ uint scale_offset, e_offset, fac_offset, color_offset;
+
+ decode_node_uchar4(node.y, &co_offset, &coloring, &distance, &feature);
+ decode_node_uchar4(node.z, &scale_offset, &e_offset, &fac_offset, &color_offset);
+
+ float3 co = stack_load_float3(stack, co_offset);
+ float scale = stack_load_float_default(stack, scale_offset, node2.x);
+ float exponent = stack_load_float_default(stack, e_offset, node2.y);
+
+ float dist[4];
+ float3 neighbor[4];
+ voronoi_neighbors(co * scale, (NodeVoronoiDistanceMetric)distance, exponent, dist, neighbor);
+
+ float3 color;
+ float fac;
+ if (coloring == NODE_VORONOI_INTENSITY) {
+ switch (feature) {
+ case NODE_VORONOI_F1:
+ fac = dist[0];
+ break;
+ case NODE_VORONOI_F2:
+ fac = dist[1];
+ break;
+ case NODE_VORONOI_F3:
+ fac = dist[2];
+ break;
+ case NODE_VORONOI_F4:
+ fac = dist[3];
+ break;
+ case NODE_VORONOI_F2F1:
+ fac = dist[1] - dist[0];
+ break;
+ }
+
+ color = make_float3(fac, fac, fac);
+ }
+ else {
+ /* NODE_VORONOI_CELLS */
+ switch (feature) {
+ case NODE_VORONOI_F1:
+ color = neighbor[0];
+ break;
+ case NODE_VORONOI_F2:
+ color = neighbor[1];
+ break;
+ case NODE_VORONOI_F3:
+ color = neighbor[2];
+ break;
+ case NODE_VORONOI_F4:
+ color = neighbor[3];
+ break;
+ /* Usefulness of this vector is questionable. Note F2 >= F1 but the
+ * individual vector components might not be. */
+ case NODE_VORONOI_F2F1:
+ color = fabs(neighbor[1] - neighbor[0]);
+ break;
+ }
+
+ color = cellnoise3(color);
+ fac = average(color);
+ }
+
+ if (stack_valid(fac_offset))
+ stack_store_float(stack, fac_offset, fac);
+ if (stack_valid(color_offset))
+ stack_store_float3(stack, color_offset, color);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_voxel.h b/intern/cycles/kernel/svm/svm_voxel.h
index 43b433683e0..26d8cc71d3b 100644
--- a/intern/cycles/kernel/svm/svm_voxel.h
+++ b/intern/cycles/kernel/svm/svm_voxel.h
@@ -19,37 +19,34 @@ CCL_NAMESPACE_BEGIN
/* TODO(sergey): Think of making it more generic volume-type attribute
* sampler.
*/
-ccl_device void svm_node_tex_voxel(KernelGlobals *kg,
- ShaderData *sd,
- float *stack,
- uint4 node,
- int *offset)
+ccl_device void svm_node_tex_voxel(
+ KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
{
- uint co_offset, density_out_offset, color_out_offset, space;
- decode_node_uchar4(node.z, &co_offset, &density_out_offset, &color_out_offset, &space);
+ uint co_offset, density_out_offset, color_out_offset, space;
+ decode_node_uchar4(node.z, &co_offset, &density_out_offset, &color_out_offset, &space);
#ifdef __VOLUME__
- int id = node.y;
- float3 co = stack_load_float3(stack, co_offset);
- if(space == NODE_TEX_VOXEL_SPACE_OBJECT) {
- co = volume_normalized_position(kg, sd, co);
- }
- else {
- kernel_assert(space == NODE_TEX_VOXEL_SPACE_WORLD);
- Transform tfm;
- tfm.x = read_node_float(kg, offset);
- tfm.y = read_node_float(kg, offset);
- tfm.z = read_node_float(kg, offset);
- co = transform_point(&tfm, co);
- }
+ int id = node.y;
+ float3 co = stack_load_float3(stack, co_offset);
+ if (space == NODE_TEX_VOXEL_SPACE_OBJECT) {
+ co = volume_normalized_position(kg, sd, co);
+ }
+ else {
+ kernel_assert(space == NODE_TEX_VOXEL_SPACE_WORLD);
+ Transform tfm;
+ tfm.x = read_node_float(kg, offset);
+ tfm.y = read_node_float(kg, offset);
+ tfm.z = read_node_float(kg, offset);
+ co = transform_point(&tfm, co);
+ }
- float4 r = kernel_tex_image_interp_3d(kg, id, co.x, co.y, co.z, INTERPOLATION_NONE);
+ float4 r = kernel_tex_image_interp_3d(kg, id, co.x, co.y, co.z, INTERPOLATION_NONE);
#else
- float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
#endif
- if(stack_valid(density_out_offset))
- stack_store_float(stack, density_out_offset, r.w);
- if(stack_valid(color_out_offset))
- stack_store_float3(stack, color_out_offset, make_float3(r.x, r.y, r.z));
+ if (stack_valid(density_out_offset))
+ stack_store_float(stack, density_out_offset, r.w);
+ if (stack_valid(color_out_offset))
+ stack_store_float3(stack, color_out_offset, make_float3(r.x, r.y, r.z));
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_wave.h b/intern/cycles/kernel/svm/svm_wave.h
index 80b63dc80cd..003ad7dc63a 100644
--- a/intern/cycles/kernel/svm/svm_wave.h
+++ b/intern/cycles/kernel/svm/svm_wave.h
@@ -18,48 +18,58 @@ CCL_NAMESPACE_BEGIN
/* Wave */
-ccl_device_noinline float svm_wave(NodeWaveType type, NodeWaveProfile profile, float3 p, float detail, float distortion, float dscale)
+ccl_device_noinline float svm_wave(NodeWaveType type,
+ NodeWaveProfile profile,
+ float3 p,
+ float detail,
+ float distortion,
+ float dscale)
{
- float n;
+ float n;
- if(type == NODE_WAVE_BANDS)
- n = (p.x + p.y + p.z) * 10.0f;
- else /* NODE_WAVE_RINGS */
- n = len(p) * 20.0f;
+ if (type == NODE_WAVE_BANDS)
+ n = (p.x + p.y + p.z) * 10.0f;
+ else /* NODE_WAVE_RINGS */
+ n = len(p) * 20.0f;
- if(distortion != 0.0f)
- n += distortion * noise_turbulence(p*dscale, detail, 0);
+ if (distortion != 0.0f)
+ n += distortion * noise_turbulence(p * dscale, detail, 0);
- if(profile == NODE_WAVE_PROFILE_SIN) {
- return 0.5f + 0.5f * sinf(n);
- }
- else { /* NODE_WAVE_PROFILE_SAW */
- n /= M_2PI_F;
- n -= (int) n;
- return (n < 0.0f)? n + 1.0f: n;
- }
+ if (profile == NODE_WAVE_PROFILE_SIN) {
+ return 0.5f + 0.5f * sinf(n);
+ }
+ else { /* NODE_WAVE_PROFILE_SAW */
+ n /= M_2PI_F;
+ n -= (int)n;
+ return (n < 0.0f) ? n + 1.0f : n;
+ }
}
-ccl_device void svm_node_tex_wave(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
+ccl_device void svm_node_tex_wave(
+ KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
{
- uint4 node2 = read_node(kg, offset);
+ uint4 node2 = read_node(kg, offset);
- uint type;
- uint co_offset, scale_offset, detail_offset, dscale_offset, distortion_offset, color_offset, fac_offset;
+ uint type;
+ uint co_offset, scale_offset, detail_offset, dscale_offset, distortion_offset, color_offset,
+ fac_offset;
- decode_node_uchar4(node.y, &type, &color_offset, &fac_offset, &dscale_offset);
- decode_node_uchar4(node.z, &co_offset, &scale_offset, &detail_offset, &distortion_offset);
+ decode_node_uchar4(node.y, &type, &color_offset, &fac_offset, &dscale_offset);
+ decode_node_uchar4(node.z, &co_offset, &scale_offset, &detail_offset, &distortion_offset);
- float3 co = stack_load_float3(stack, co_offset);
- float scale = stack_load_float_default(stack, scale_offset, node2.x);
- float detail = stack_load_float_default(stack, detail_offset, node2.y);
- float distortion = stack_load_float_default(stack, distortion_offset, node2.z);
- float dscale = stack_load_float_default(stack, dscale_offset, node2.w);
+ float3 co = stack_load_float3(stack, co_offset);
+ float scale = stack_load_float_default(stack, scale_offset, node2.x);
+ float detail = stack_load_float_default(stack, detail_offset, node2.y);
+ float distortion = stack_load_float_default(stack, distortion_offset, node2.z);
+ float dscale = stack_load_float_default(stack, dscale_offset, node2.w);
- float f = svm_wave((NodeWaveType)type, (NodeWaveProfile)node.w, co*scale, detail, distortion, dscale);
+ float f = svm_wave(
+ (NodeWaveType)type, (NodeWaveProfile)node.w, co * scale, detail, distortion, dscale);
- if(stack_valid(fac_offset)) stack_store_float(stack, fac_offset, f);
- if(stack_valid(color_offset)) stack_store_float3(stack, color_offset, make_float3(f, f, f));
+ if (stack_valid(fac_offset))
+ stack_store_float(stack, fac_offset, f);
+ if (stack_valid(color_offset))
+ stack_store_float3(stack, color_offset, make_float3(f, f, f));
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_wavelength.h b/intern/cycles/kernel/svm/svm_wavelength.h
index e935fd20690..d6144802559 100644
--- a/intern/cycles/kernel/svm/svm_wavelength.h
+++ b/intern/cycles/kernel/svm/svm_wavelength.h
@@ -10,13 +10,13 @@
* modification, are permitted provided that the following conditions are
* met:
* * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
+ * notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
* * Neither the name of Sony Pictures Imageworks nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,64 +35,64 @@ CCL_NAMESPACE_BEGIN
/* Wavelength to RGB */
// CIE colour matching functions xBar, yBar, and zBar for
-// wavelengths from 380 through 780 nanometers, every 5
-// nanometers. For a wavelength lambda in this range:
-// cie_colour_match[(lambda - 380) / 5][0] = xBar
-// cie_colour_match[(lambda - 380) / 5][1] = yBar
-// cie_colour_match[(lambda - 380) / 5][2] = zBar
+// wavelengths from 380 through 780 nanometers, every 5
+// nanometers. For a wavelength lambda in this range:
+// cie_colour_match[(lambda - 380) / 5][0] = xBar
+// cie_colour_match[(lambda - 380) / 5][1] = yBar
+// cie_colour_match[(lambda - 380) / 5][2] = zBar
ccl_static_constant float cie_colour_match[81][3] = {
- {0.0014f,0.0000f,0.0065f}, {0.0022f,0.0001f,0.0105f}, {0.0042f,0.0001f,0.0201f},
- {0.0076f,0.0002f,0.0362f}, {0.0143f,0.0004f,0.0679f}, {0.0232f,0.0006f,0.1102f},
- {0.0435f,0.0012f,0.2074f}, {0.0776f,0.0022f,0.3713f}, {0.1344f,0.0040f,0.6456f},
- {0.2148f,0.0073f,1.0391f}, {0.2839f,0.0116f,1.3856f}, {0.3285f,0.0168f,1.6230f},
- {0.3483f,0.0230f,1.7471f}, {0.3481f,0.0298f,1.7826f}, {0.3362f,0.0380f,1.7721f},
- {0.3187f,0.0480f,1.7441f}, {0.2908f,0.0600f,1.6692f}, {0.2511f,0.0739f,1.5281f},
- {0.1954f,0.0910f,1.2876f}, {0.1421f,0.1126f,1.0419f}, {0.0956f,0.1390f,0.8130f},
- {0.0580f,0.1693f,0.6162f}, {0.0320f,0.2080f,0.4652f}, {0.0147f,0.2586f,0.3533f},
- {0.0049f,0.3230f,0.2720f}, {0.0024f,0.4073f,0.2123f}, {0.0093f,0.5030f,0.1582f},
- {0.0291f,0.6082f,0.1117f}, {0.0633f,0.7100f,0.0782f}, {0.1096f,0.7932f,0.0573f},
- {0.1655f,0.8620f,0.0422f}, {0.2257f,0.9149f,0.0298f}, {0.2904f,0.9540f,0.0203f},
- {0.3597f,0.9803f,0.0134f}, {0.4334f,0.9950f,0.0087f}, {0.5121f,1.0000f,0.0057f},
- {0.5945f,0.9950f,0.0039f}, {0.6784f,0.9786f,0.0027f}, {0.7621f,0.9520f,0.0021f},
- {0.8425f,0.9154f,0.0018f}, {0.9163f,0.8700f,0.0017f}, {0.9786f,0.8163f,0.0014f},
- {1.0263f,0.7570f,0.0011f}, {1.0567f,0.6949f,0.0010f}, {1.0622f,0.6310f,0.0008f},
- {1.0456f,0.5668f,0.0006f}, {1.0026f,0.5030f,0.0003f}, {0.9384f,0.4412f,0.0002f},
- {0.8544f,0.3810f,0.0002f}, {0.7514f,0.3210f,0.0001f}, {0.6424f,0.2650f,0.0000f},
- {0.5419f,0.2170f,0.0000f}, {0.4479f,0.1750f,0.0000f}, {0.3608f,0.1382f,0.0000f},
- {0.2835f,0.1070f,0.0000f}, {0.2187f,0.0816f,0.0000f}, {0.1649f,0.0610f,0.0000f},
- {0.1212f,0.0446f,0.0000f}, {0.0874f,0.0320f,0.0000f}, {0.0636f,0.0232f,0.0000f},
- {0.0468f,0.0170f,0.0000f}, {0.0329f,0.0119f,0.0000f}, {0.0227f,0.0082f,0.0000f},
- {0.0158f,0.0057f,0.0000f}, {0.0114f,0.0041f,0.0000f}, {0.0081f,0.0029f,0.0000f},
- {0.0058f,0.0021f,0.0000f}, {0.0041f,0.0015f,0.0000f}, {0.0029f,0.0010f,0.0000f},
- {0.0020f,0.0007f,0.0000f}, {0.0014f,0.0005f,0.0000f}, {0.0010f,0.0004f,0.0000f},
- {0.0007f,0.0002f,0.0000f}, {0.0005f,0.0002f,0.0000f}, {0.0003f,0.0001f,0.0000f},
- {0.0002f,0.0001f,0.0000f}, {0.0002f,0.0001f,0.0000f}, {0.0001f,0.0000f,0.0000f},
- {0.0001f,0.0000f,0.0000f}, {0.0001f,0.0000f,0.0000f}, {0.0000f,0.0000f,0.0000f}
-};
+ {0.0014f, 0.0000f, 0.0065f}, {0.0022f, 0.0001f, 0.0105f}, {0.0042f, 0.0001f, 0.0201f},
+ {0.0076f, 0.0002f, 0.0362f}, {0.0143f, 0.0004f, 0.0679f}, {0.0232f, 0.0006f, 0.1102f},
+ {0.0435f, 0.0012f, 0.2074f}, {0.0776f, 0.0022f, 0.3713f}, {0.1344f, 0.0040f, 0.6456f},
+ {0.2148f, 0.0073f, 1.0391f}, {0.2839f, 0.0116f, 1.3856f}, {0.3285f, 0.0168f, 1.6230f},
+ {0.3483f, 0.0230f, 1.7471f}, {0.3481f, 0.0298f, 1.7826f}, {0.3362f, 0.0380f, 1.7721f},
+ {0.3187f, 0.0480f, 1.7441f}, {0.2908f, 0.0600f, 1.6692f}, {0.2511f, 0.0739f, 1.5281f},
+ {0.1954f, 0.0910f, 1.2876f}, {0.1421f, 0.1126f, 1.0419f}, {0.0956f, 0.1390f, 0.8130f},
+ {0.0580f, 0.1693f, 0.6162f}, {0.0320f, 0.2080f, 0.4652f}, {0.0147f, 0.2586f, 0.3533f},
+ {0.0049f, 0.3230f, 0.2720f}, {0.0024f, 0.4073f, 0.2123f}, {0.0093f, 0.5030f, 0.1582f},
+ {0.0291f, 0.6082f, 0.1117f}, {0.0633f, 0.7100f, 0.0782f}, {0.1096f, 0.7932f, 0.0573f},
+ {0.1655f, 0.8620f, 0.0422f}, {0.2257f, 0.9149f, 0.0298f}, {0.2904f, 0.9540f, 0.0203f},
+ {0.3597f, 0.9803f, 0.0134f}, {0.4334f, 0.9950f, 0.0087f}, {0.5121f, 1.0000f, 0.0057f},
+ {0.5945f, 0.9950f, 0.0039f}, {0.6784f, 0.9786f, 0.0027f}, {0.7621f, 0.9520f, 0.0021f},
+ {0.8425f, 0.9154f, 0.0018f}, {0.9163f, 0.8700f, 0.0017f}, {0.9786f, 0.8163f, 0.0014f},
+ {1.0263f, 0.7570f, 0.0011f}, {1.0567f, 0.6949f, 0.0010f}, {1.0622f, 0.6310f, 0.0008f},
+ {1.0456f, 0.5668f, 0.0006f}, {1.0026f, 0.5030f, 0.0003f}, {0.9384f, 0.4412f, 0.0002f},
+ {0.8544f, 0.3810f, 0.0002f}, {0.7514f, 0.3210f, 0.0001f}, {0.6424f, 0.2650f, 0.0000f},
+ {0.5419f, 0.2170f, 0.0000f}, {0.4479f, 0.1750f, 0.0000f}, {0.3608f, 0.1382f, 0.0000f},
+ {0.2835f, 0.1070f, 0.0000f}, {0.2187f, 0.0816f, 0.0000f}, {0.1649f, 0.0610f, 0.0000f},
+ {0.1212f, 0.0446f, 0.0000f}, {0.0874f, 0.0320f, 0.0000f}, {0.0636f, 0.0232f, 0.0000f},
+ {0.0468f, 0.0170f, 0.0000f}, {0.0329f, 0.0119f, 0.0000f}, {0.0227f, 0.0082f, 0.0000f},
+ {0.0158f, 0.0057f, 0.0000f}, {0.0114f, 0.0041f, 0.0000f}, {0.0081f, 0.0029f, 0.0000f},
+ {0.0058f, 0.0021f, 0.0000f}, {0.0041f, 0.0015f, 0.0000f}, {0.0029f, 0.0010f, 0.0000f},
+ {0.0020f, 0.0007f, 0.0000f}, {0.0014f, 0.0005f, 0.0000f}, {0.0010f, 0.0004f, 0.0000f},
+ {0.0007f, 0.0002f, 0.0000f}, {0.0005f, 0.0002f, 0.0000f}, {0.0003f, 0.0001f, 0.0000f},
+ {0.0002f, 0.0001f, 0.0000f}, {0.0002f, 0.0001f, 0.0000f}, {0.0001f, 0.0000f, 0.0000f},
+ {0.0001f, 0.0000f, 0.0000f}, {0.0001f, 0.0000f, 0.0000f}, {0.0000f, 0.0000f, 0.0000f}};
-ccl_device void svm_node_wavelength(KernelGlobals *kg, ShaderData *sd, float *stack, uint wavelength, uint color_out)
+ccl_device void svm_node_wavelength(
+ KernelGlobals *kg, ShaderData *sd, float *stack, uint wavelength, uint color_out)
{
- float lambda_nm = stack_load_float(stack, wavelength);
- float ii = (lambda_nm-380.0f) * (1.0f/5.0f); // scaled 0..80
- int i = float_to_int(ii);
- float3 color;
+ float lambda_nm = stack_load_float(stack, wavelength);
+ float ii = (lambda_nm - 380.0f) * (1.0f / 5.0f); // scaled 0..80
+ int i = float_to_int(ii);
+ float3 color;
- if(i < 0 || i >= 80) {
- color = make_float3(0.0f, 0.0f, 0.0f);
- }
- else {
- ii -= i;
- ccl_constant float *c = cie_colour_match[i];
- color = interp(make_float3(c[0], c[1], c[2]), make_float3(c[3], c[4], c[5]), ii);
- }
+ if (i < 0 || i >= 80) {
+ color = make_float3(0.0f, 0.0f, 0.0f);
+ }
+ else {
+ ii -= i;
+ ccl_constant float *c = cie_colour_match[i];
+ color = interp(make_float3(c[0], c[1], c[2]), make_float3(c[3], c[4], c[5]), ii);
+ }
- color = xyz_to_rgb(kg, color);
- color *= 1.0f/2.52f; // Empirical scale from lg to make all comps <= 1
+ color = xyz_to_rgb(kg, color);
+ color *= 1.0f / 2.52f; // Empirical scale from lg to make all comps <= 1
- /* Clamp to zero if values are smaller */
- color = max(color, make_float3(0.0f, 0.0f, 0.0f));
+ /* Clamp to zero if values are smaller */
+ color = max(color, make_float3(0.0f, 0.0f, 0.0f));
- stack_store_float3(stack, color_out, color);
+ stack_store_float3(stack, color_out, color);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_wireframe.h b/intern/cycles/kernel/svm/svm_wireframe.h
index 35df9e8a0e7..55e61d0e8c7 100644
--- a/intern/cycles/kernel/svm/svm_wireframe.h
+++ b/intern/cycles/kernel/svm/svm_wireframe.h
@@ -34,103 +34,97 @@ CCL_NAMESPACE_BEGIN
/* Wireframe Node */
-ccl_device_inline float wireframe(KernelGlobals *kg,
- ShaderData *sd,
- float size,
- int pixel_size,
- float3 *P)
+ccl_device_inline float wireframe(
+ KernelGlobals *kg, ShaderData *sd, float size, int pixel_size, float3 *P)
{
#ifdef __HAIR__
- if(sd->prim != PRIM_NONE && sd->type & PRIMITIVE_ALL_TRIANGLE)
+ if (sd->prim != PRIM_NONE && sd->type & PRIMITIVE_ALL_TRIANGLE)
#else
- if(sd->prim != PRIM_NONE)
+ if (sd->prim != PRIM_NONE)
#endif
- {
- float3 Co[3];
- float pixelwidth = 1.0f;
+ {
+ float3 Co[3];
+ float pixelwidth = 1.0f;
- /* Triangles */
- int np = 3;
+ /* Triangles */
+ int np = 3;
- if(sd->type & PRIMITIVE_TRIANGLE)
- triangle_vertices(kg, sd->prim, Co);
- else
- motion_triangle_vertices(kg, sd->object, sd->prim, sd->time, Co);
+ if (sd->type & PRIMITIVE_TRIANGLE)
+ triangle_vertices(kg, sd->prim, Co);
+ else
+ motion_triangle_vertices(kg, sd->object, sd->prim, sd->time, Co);
- if(!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
- object_position_transform(kg, sd, &Co[0]);
- object_position_transform(kg, sd, &Co[1]);
- object_position_transform(kg, sd, &Co[2]);
- }
+ if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+ object_position_transform(kg, sd, &Co[0]);
+ object_position_transform(kg, sd, &Co[1]);
+ object_position_transform(kg, sd, &Co[2]);
+ }
- if(pixel_size) {
- // Project the derivatives of P to the viewing plane defined
- // by I so we have a measure of how big is a pixel at this point
- float pixelwidth_x = len(sd->dP.dx - dot(sd->dP.dx, sd->I) * sd->I);
- float pixelwidth_y = len(sd->dP.dy - dot(sd->dP.dy, sd->I) * sd->I);
- // Take the average of both axis' length
- pixelwidth = (pixelwidth_x + pixelwidth_y) * 0.5f;
- }
+ if (pixel_size) {
+ // Project the derivatives of P to the viewing plane defined
+ // by I so we have a measure of how big is a pixel at this point
+ float pixelwidth_x = len(sd->dP.dx - dot(sd->dP.dx, sd->I) * sd->I);
+ float pixelwidth_y = len(sd->dP.dy - dot(sd->dP.dy, sd->I) * sd->I);
+ // Take the average of both axis' length
+ pixelwidth = (pixelwidth_x + pixelwidth_y) * 0.5f;
+ }
- // Use half the width as the neighbor face will render the
- // other half. And take the square for fast comparison
- pixelwidth *= 0.5f * size;
- pixelwidth *= pixelwidth;
- for(int i = 0; i < np; i++) {
- int i2 = i ? i - 1 : np - 1;
- float3 dir = *P - Co[i];
- float3 edge = Co[i] - Co[i2];
- float3 crs = cross(edge, dir);
- // At this point dot(crs, crs) / dot(edge, edge) is
- // the square of area / length(edge) == square of the
- // distance to the edge.
- if(dot(crs, crs) < (dot(edge, edge) * pixelwidth))
- return 1.0f;
- }
- }
- return 0.0f;
+ // Use half the width as the neighbor face will render the
+ // other half. And take the square for fast comparison
+ pixelwidth *= 0.5f * size;
+ pixelwidth *= pixelwidth;
+ for (int i = 0; i < np; i++) {
+ int i2 = i ? i - 1 : np - 1;
+ float3 dir = *P - Co[i];
+ float3 edge = Co[i] - Co[i2];
+ float3 crs = cross(edge, dir);
+ // At this point dot(crs, crs) / dot(edge, edge) is
+ // the square of area / length(edge) == square of the
+ // distance to the edge.
+ if (dot(crs, crs) < (dot(edge, edge) * pixelwidth))
+ return 1.0f;
+ }
+ }
+ return 0.0f;
}
-ccl_device void svm_node_wireframe(KernelGlobals *kg,
- ShaderData *sd,
- float *stack,
- uint4 node)
+ccl_device void svm_node_wireframe(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
{
- uint in_size = node.y;
- uint out_fac = node.z;
- uint use_pixel_size, bump_offset;
- decode_node_uchar4(node.w, &use_pixel_size, &bump_offset, NULL, NULL);
+ uint in_size = node.y;
+ uint out_fac = node.z;
+ uint use_pixel_size, bump_offset;
+ decode_node_uchar4(node.w, &use_pixel_size, &bump_offset, NULL, NULL);
- /* Input Data */
- float size = stack_load_float(stack, in_size);
- int pixel_size = (int)use_pixel_size;
+ /* Input Data */
+ float size = stack_load_float(stack, in_size);
+ int pixel_size = (int)use_pixel_size;
- /* Calculate wireframe */
+ /* Calculate wireframe */
#ifdef __SPLIT_KERNEL__
- /* TODO(sergey): This is because sd is actually a global space,
- * which makes it difficult to re-use same wireframe() function.
- *
- * With OpenCL 2.0 it's possible to avoid this change, but for until
- * then we'll be living with such an exception.
- */
- float3 P = sd->P;
- float f = wireframe(kg, sd, size, pixel_size, &P);
+ /* TODO(sergey): This is because sd is actually a global space,
+ * which makes it difficult to re-use same wireframe() function.
+ *
+ * With OpenCL 2.0 it's possible to avoid this change, but for until
+ * then we'll be living with such an exception.
+ */
+ float3 P = sd->P;
+ float f = wireframe(kg, sd, size, pixel_size, &P);
#else
- float f = wireframe(kg, sd, size, pixel_size, &sd->P);
+ float f = wireframe(kg, sd, size, pixel_size, &sd->P);
#endif
- /* TODO(sergey): Think of faster way to calculate derivatives. */
- if(bump_offset == NODE_BUMP_OFFSET_DX) {
- float3 Px = sd->P - sd->dP.dx;
- f += (f - wireframe(kg, sd, size, pixel_size, &Px)) / len(sd->dP.dx);
- }
- else if(bump_offset == NODE_BUMP_OFFSET_DY) {
- float3 Py = sd->P - sd->dP.dy;
- f += (f - wireframe(kg, sd, size, pixel_size, &Py)) / len(sd->dP.dy);
- }
+ /* TODO(sergey): Think of faster way to calculate derivatives. */
+ if (bump_offset == NODE_BUMP_OFFSET_DX) {
+ float3 Px = sd->P - sd->dP.dx;
+ f += (f - wireframe(kg, sd, size, pixel_size, &Px)) / len(sd->dP.dx);
+ }
+ else if (bump_offset == NODE_BUMP_OFFSET_DY) {
+ float3 Py = sd->P - sd->dP.dy;
+ f += (f - wireframe(kg, sd, size, pixel_size, &Py)) / len(sd->dP.dy);
+ }
- if(stack_valid(out_fac))
- stack_store_float(stack, out_fac, f);
+ if (stack_valid(out_fac))
+ stack_store_float(stack, out_fac, f);
}
CCL_NAMESPACE_END