Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles')
-rw-r--r--intern/cycles/CMakeLists.txt45
-rw-r--r--intern/cycles/app/CMakeLists.txt22
-rw-r--r--intern/cycles/app/cycles_cubin_cc.cpp33
-rw-r--r--intern/cycles/app/cycles_server.cpp2
-rw-r--r--intern/cycles/app/cycles_standalone.cpp5
-rw-r--r--intern/cycles/app/cycles_xml.cpp16
-rw-r--r--intern/cycles/app/io_export_cycles_xml.py2
-rw-r--r--intern/cycles/blender/CMakeLists.txt38
-rw-r--r--intern/cycles/blender/addon/__init__.py21
-rw-r--r--intern/cycles/blender/addon/engine.py156
-rw-r--r--intern/cycles/blender/addon/operators.py41
-rw-r--r--intern/cycles/blender/addon/osl.py1
-rw-r--r--intern/cycles/blender/addon/properties.py496
-rw-r--r--intern/cycles/blender/addon/ui.py778
-rw-r--r--intern/cycles/blender/addon/version_update.py206
-rw-r--r--intern/cycles/blender/blender_camera.cpp78
-rw-r--r--intern/cycles/blender/blender_curves.cpp1018
-rw-r--r--intern/cycles/blender/blender_device.cpp28
-rw-r--r--intern/cycles/blender/blender_device.h2
-rw-r--r--intern/cycles/blender/blender_geometry.cpp178
-rw-r--r--intern/cycles/blender/blender_id_map.h299
-rw-r--r--intern/cycles/blender/blender_image.cpp220
-rw-r--r--intern/cycles/blender/blender_image.h61
-rw-r--r--intern/cycles/blender/blender_light.cpp212
-rw-r--r--intern/cycles/blender/blender_mesh.cpp425
-rw-r--r--intern/cycles/blender/blender_object.cpp333
-rw-r--r--intern/cycles/blender/blender_object_cull.cpp1
-rw-r--r--intern/cycles/blender/blender_particles.cpp6
-rw-r--r--intern/cycles/blender/blender_python.cpp101
-rw-r--r--intern/cycles/blender/blender_session.cpp676
-rw-r--r--intern/cycles/blender/blender_session.h42
-rw-r--r--intern/cycles/blender/blender_shader.cpp353
-rw-r--r--intern/cycles/blender/blender_sync.cpp405
-rw-r--r--intern/cycles/blender/blender_sync.h118
-rw-r--r--intern/cycles/blender/blender_texture.h2
-rw-r--r--intern/cycles/blender/blender_util.h318
-rw-r--r--intern/cycles/blender/blender_viewport.cpp88
-rw-r--r--intern/cycles/blender/blender_viewport.h56
-rw-r--r--intern/cycles/blender/blender_volume.cpp387
-rw-r--r--intern/cycles/bvh/CMakeLists.txt13
-rw-r--r--intern/cycles/bvh/bvh.cpp183
-rw-r--r--intern/cycles/bvh/bvh.h18
-rw-r--r--intern/cycles/bvh/bvh2.cpp5
-rw-r--r--intern/cycles/bvh/bvh2.h4
-rw-r--r--intern/cycles/bvh/bvh4.cpp445
-rw-r--r--intern/cycles/bvh/bvh4.h86
-rw-r--r--intern/cycles/bvh/bvh8.cpp539
-rw-r--r--intern/cycles/bvh/bvh8.h97
-rw-r--r--intern/cycles/bvh/bvh_build.cpp257
-rw-r--r--intern/cycles/bvh/bvh_build.h21
-rw-r--r--intern/cycles/bvh/bvh_embree.cpp450
-rw-r--r--intern/cycles/bvh/bvh_embree.h15
-rw-r--r--intern/cycles/bvh/bvh_optix.cpp230
-rw-r--r--intern/cycles/bvh/bvh_optix.h58
-rw-r--r--intern/cycles/bvh/bvh_params.h7
-rw-r--r--intern/cycles/bvh/bvh_sort.cpp15
-rw-r--r--intern/cycles/bvh/bvh_split.cpp51
-rw-r--r--intern/cycles/bvh/bvh_split.h16
-rw-r--r--intern/cycles/bvh/bvh_unaligned.cpp20
-rw-r--r--intern/cycles/cmake/external_libs.cmake4
-rw-r--r--intern/cycles/cmake/macros.cmake60
-rw-r--r--intern/cycles/device/CMakeLists.txt37
-rw-r--r--intern/cycles/device/cuda/device_cuda.h269
-rw-r--r--intern/cycles/device/cuda/device_cuda_impl.cpp2683
-rw-r--r--intern/cycles/device/device.cpp129
-rw-r--r--intern/cycles/device/device.h63
-rw-r--r--intern/cycles/device/device_cpu.cpp567
-rw-r--r--intern/cycles/device/device_cuda.cpp2573
-rw-r--r--intern/cycles/device/device_denoising.cpp76
-rw-r--r--intern/cycles/device/device_denoising.h10
-rw-r--r--intern/cycles/device/device_intern.h10
-rw-r--r--intern/cycles/device/device_memory.cpp107
-rw-r--r--intern/cycles/device/device_memory.h55
-rw-r--r--intern/cycles/device/device_multi.cpp473
-rw-r--r--intern/cycles/device/device_network.cpp4
-rw-r--r--intern/cycles/device/device_network.h6
-rw-r--r--intern/cycles/device/device_opencl.cpp10
-rw-r--r--intern/cycles/device/device_optix.cpp1770
-rw-r--r--intern/cycles/device/device_split_kernel.cpp75
-rw-r--r--intern/cycles/device/device_split_kernel.h10
-rw-r--r--intern/cycles/device/device_task.cpp63
-rw-r--r--intern/cycles/device/device_task.h96
-rw-r--r--intern/cycles/device/opencl/device_opencl.h (renamed from intern/cycles/device/opencl/opencl.h)38
-rw-r--r--intern/cycles/device/opencl/device_opencl_impl.cpp (renamed from intern/cycles/device/opencl/opencl_split.cpp)278
-rw-r--r--intern/cycles/device/opencl/memory_manager.cpp7
-rw-r--r--intern/cycles/device/opencl/memory_manager.h2
-rw-r--r--intern/cycles/device/opencl/opencl_util.cpp80
-rw-r--r--intern/cycles/graph/CMakeLists.txt2
-rw-r--r--intern/cycles/graph/node.cpp20
-rw-r--r--intern/cycles/graph/node.h5
-rw-r--r--intern/cycles/graph/node_type.cpp11
-rw-r--r--intern/cycles/graph/node_type.h16
-rw-r--r--intern/cycles/graph/node_xml.cpp4
-rw-r--r--intern/cycles/kernel/CMakeLists.txt201
-rw-r--r--intern/cycles/kernel/bvh/bvh.h508
-rw-r--r--intern/cycles/kernel/bvh/bvh_embree.h27
-rw-r--r--intern/cycles/kernel/bvh/bvh_local.h63
-rw-r--r--intern/cycles/kernel/bvh/bvh_nodes.h451
-rw-r--r--intern/cycles/kernel/bvh/bvh_shadow_all.h152
-rw-r--r--intern/cycles/kernel/bvh/bvh_traversal.h264
-rw-r--r--intern/cycles/kernel/bvh/bvh_types.h8
-rw-r--r--intern/cycles/kernel/bvh/bvh_volume.h108
-rw-r--r--intern/cycles/kernel/bvh/bvh_volume_all.h136
-rw-r--r--intern/cycles/kernel/bvh/obvh_local.h398
-rw-r--r--intern/cycles/kernel/bvh/obvh_nodes.h591
-rw-r--r--intern/cycles/kernel/bvh/obvh_shadow_all.h670
-rw-r--r--intern/cycles/kernel/bvh/obvh_traversal.h620
-rw-r--r--intern/cycles/kernel/bvh/obvh_volume.h480
-rw-r--r--intern/cycles/kernel/bvh/obvh_volume_all.h551
-rw-r--r--intern/cycles/kernel/bvh/qbvh_local.h291
-rw-r--r--intern/cycles/kernel/bvh/qbvh_nodes.h516
-rw-r--r--intern/cycles/kernel/bvh/qbvh_shadow_all.h459
-rw-r--r--intern/cycles/kernel/bvh/qbvh_traversal.h483
-rw-r--r--intern/cycles/kernel/bvh/qbvh_volume.h367
-rw-r--r--intern/cycles/kernel/bvh/qbvh_volume_all.h444
-rw-r--r--intern/cycles/kernel/closure/bsdf.h133
-rw-r--r--intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h30
-rw-r--r--intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h2
-rw-r--r--intern/cycles/kernel/closure/bsdf_diffuse.h2
-rw-r--r--intern/cycles/kernel/closure/bsdf_diffuse_ramp.h2
-rw-r--r--intern/cycles/kernel/closure/bsdf_hair.h6
-rw-r--r--intern/cycles/kernel/closure/bsdf_hair_principled.h40
-rw-r--r--intern/cycles/kernel/closure/bsdf_microfacet.h99
-rw-r--r--intern/cycles/kernel/closure/bsdf_microfacet_multi.h73
-rw-r--r--intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h23
-rw-r--r--intern/cycles/kernel/closure/bsdf_oren_nayar.h2
-rw-r--r--intern/cycles/kernel/closure/bsdf_phong_ramp.h2
-rw-r--r--intern/cycles/kernel/closure/bsdf_principled_diffuse.h3
-rw-r--r--intern/cycles/kernel/closure/bsdf_principled_sheen.h28
-rw-r--r--intern/cycles/kernel/closure/bsdf_toon.h2
-rw-r--r--intern/cycles/kernel/closure/bsdf_util.h16
-rw-r--r--intern/cycles/kernel/closure/bssrdf.h12
-rw-r--r--intern/cycles/kernel/closure/volume.h3
-rw-r--r--intern/cycles/kernel/filter/filter_features.h5
-rw-r--r--intern/cycles/kernel/filter/filter_features_sse.h5
-rw-r--r--intern/cycles/kernel/filter/filter_nlm_cpu.h3
-rw-r--r--intern/cycles/kernel/filter/filter_prefilter.h74
-rw-r--r--intern/cycles/kernel/filter/filter_reconstruction.h6
-rw-r--r--intern/cycles/kernel/filter/filter_transform.h10
-rw-r--r--intern/cycles/kernel/filter/filter_transform_gpu.h8
-rw-r--r--intern/cycles/kernel/filter/filter_transform_sse.h8
-rw-r--r--intern/cycles/kernel/geom/geom.h2
-rw-r--r--intern/cycles/kernel/geom/geom_attribute.h10
-rw-r--r--intern/cycles/kernel/geom/geom_curve.h121
-rw-r--r--intern/cycles/kernel/geom/geom_curve_intersect.h1442
-rw-r--r--intern/cycles/kernel/geom/geom_motion_curve.h114
-rw-r--r--intern/cycles/kernel/geom/geom_motion_triangle_intersect.h23
-rw-r--r--intern/cycles/kernel/geom/geom_object.h118
-rw-r--r--intern/cycles/kernel/geom/geom_patch.h14
-rw-r--r--intern/cycles/kernel/geom/geom_primitive.h26
-rw-r--r--intern/cycles/kernel/geom/geom_subd_triangle.h156
-rw-r--r--intern/cycles/kernel/geom/geom_triangle.h88
-rw-r--r--intern/cycles/kernel/geom/geom_triangle_intersect.h439
-rw-r--r--intern/cycles/kernel/geom/geom_volume.h13
-rw-r--r--intern/cycles/kernel/kernel.h4
-rw-r--r--intern/cycles/kernel/kernel_accumulate.h222
-rw-r--r--intern/cycles/kernel/kernel_adaptive_sampling.h239
-rw-r--r--intern/cycles/kernel/kernel_bake.h188
-rw-r--r--intern/cycles/kernel/kernel_camera.h30
-rw-r--r--intern/cycles/kernel/kernel_compat_cpu.h6
-rw-r--r--intern/cycles/kernel/kernel_compat_cuda.h10
-rw-r--r--intern/cycles/kernel/kernel_compat_opencl.h10
-rw-r--r--intern/cycles/kernel/kernel_compat_optix.h92
-rw-r--r--intern/cycles/kernel/kernel_emission.h93
-rw-r--r--intern/cycles/kernel/kernel_film.h85
-rw-r--r--intern/cycles/kernel/kernel_globals.h43
-rw-r--r--intern/cycles/kernel/kernel_id_passes.h32
-rw-r--r--intern/cycles/kernel/kernel_jitter.h67
-rw-r--r--intern/cycles/kernel/kernel_light.h547
-rw-r--r--intern/cycles/kernel/kernel_light_background.h448
-rw-r--r--intern/cycles/kernel/kernel_light_common.h159
-rw-r--r--intern/cycles/kernel/kernel_montecarlo.h48
-rw-r--r--intern/cycles/kernel/kernel_passes.h182
-rw-r--r--intern/cycles/kernel/kernel_path.h134
-rw-r--r--intern/cycles/kernel/kernel_path_branched.h46
-rw-r--r--intern/cycles/kernel/kernel_path_state.h10
-rw-r--r--intern/cycles/kernel/kernel_path_surface.h17
-rw-r--r--intern/cycles/kernel/kernel_path_volume.h267
-rw-r--r--intern/cycles/kernel/kernel_queues.h2
-rw-r--r--intern/cycles/kernel/kernel_random.h49
-rw-r--r--intern/cycles/kernel/kernel_shader.h130
-rw-r--r--intern/cycles/kernel/kernel_shadow.h77
-rw-r--r--intern/cycles/kernel/kernel_subsurface.h31
-rw-r--r--intern/cycles/kernel/kernel_textures.h3
-rw-r--r--intern/cycles/kernel/kernel_types.h283
-rw-r--r--intern/cycles/kernel/kernel_volume.h86
-rw-r--r--intern/cycles/kernel/kernel_work_stealing.h84
-rw-r--r--intern/cycles/kernel/kernel_write_passes.h95
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel.cpp12
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu.h7
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h36
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h29
-rw-r--r--intern/cycles/kernel/kernels/cuda/filter.cu84
-rw-r--r--intern/cycles/kernel/kernels/cuda/kernel.cu81
-rw-r--r--intern/cycles/kernel/kernels/cuda/kernel_config.h3
-rw-r--r--intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h19
-rw-r--r--intern/cycles/kernel/kernels/cuda/kernel_split.cu8
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_adaptive_adjust_samples.cl23
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_x.cl23
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_y.cl23
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_adaptive_stopping.cl23
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h16
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl4
-rw-r--r--intern/cycles/kernel/kernels/optix/kernel_optix.cu329
-rw-r--r--intern/cycles/kernel/osl/CMakeLists.txt14
-rw-r--r--intern/cycles/kernel/osl/background.cpp2
-rw-r--r--intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp2
-rw-r--r--intern/cycles/kernel/osl/bsdf_phong_ramp.cpp2
-rw-r--r--intern/cycles/kernel/osl/emissive.cpp2
-rw-r--r--intern/cycles/kernel/osl/osl_bssrdf.cpp2
-rw-r--r--intern/cycles/kernel/osl/osl_closures.cpp193
-rw-r--r--intern/cycles/kernel/osl/osl_closures.h6
-rw-r--r--intern/cycles/kernel/osl/osl_globals.h11
-rw-r--r--intern/cycles/kernel/osl/osl_services.cpp525
-rw-r--r--intern/cycles/kernel/osl/osl_services.h71
-rw-r--r--intern/cycles/kernel/osl/osl_shader.cpp7
-rw-r--r--intern/cycles/kernel/shaders/CMakeLists.txt19
-rw-r--r--intern/cycles/kernel/shaders/node_absorption_volume.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_add_closure.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_ambient_occlusion.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_anisotropic_bsdf.osl3
-rw-r--r--intern/cycles/kernel/shaders/node_attribute.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_background.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_bevel.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_blackbody.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_brick_texture.osl3
-rw-r--r--intern/cycles/kernel/shaders/node_brightness.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_bump.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_camera.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_checker_texture.osl3
-rw-r--r--intern/cycles/kernel/shaders/node_clamp.osl26
-rw-r--r--intern/cycles/kernel/shaders/node_combine_hsv.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_combine_rgb.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_combine_xyz.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_convert_from_color.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_convert_from_float.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_convert_from_int.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_convert_from_normal.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_convert_from_point.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_convert_from_string.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_convert_from_vector.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_diffuse_bsdf.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_displacement.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_emission.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_environment_texture.osl14
-rw-r--r--intern/cycles/kernel/shaders/node_fresnel.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_gamma.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_geometry.osl7
-rw-r--r--intern/cycles/kernel/shaders/node_glass_bsdf.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_glossy_bsdf.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_gradient_texture.osl5
-rw-r--r--intern/cycles/kernel/shaders/node_hair_bsdf.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_hair_info.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_hash.h81
-rw-r--r--intern/cycles/kernel/shaders/node_holdout.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_hsv.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_ies_light.osl9
-rw-r--r--intern/cycles/kernel/shaders/node_image_texture.osl75
-rw-r--r--intern/cycles/kernel/shaders/node_invert.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_layer_weight.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_light_falloff.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_light_path.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_magic_texture.osl3
-rw-r--r--intern/cycles/kernel/shaders/node_map_range.osl58
-rw-r--r--intern/cycles/kernel/shaders/node_mapping.osl63
-rw-r--r--intern/cycles/kernel/shaders/node_math.h110
-rw-r--r--intern/cycles/kernel/shaders/node_math.osl139
-rw-r--r--intern/cycles/kernel/shaders/node_mix.osl6
-rw-r--r--intern/cycles/kernel/shaders/node_mix_closure.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_musgrave_texture.osl746
-rw-r--r--intern/cycles/kernel/shaders/node_noise.h202
-rw-r--r--intern/cycles/kernel/shaders/node_noise_texture.osl134
-rw-r--r--intern/cycles/kernel/shaders/node_normal.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_normal_map.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_object_info.osl4
-rw-r--r--intern/cycles/kernel/shaders/node_output_displacement.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_output_surface.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_output_volume.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_particle_info.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_principled_bsdf.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_principled_hair_bsdf.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_principled_volume.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_refraction_bsdf.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_rgb_curves.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_rgb_ramp.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_rgb_to_bw.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_scatter_volume.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_separate_hsv.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_separate_rgb.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_separate_xyz.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_set_normal.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_sky_texture.osl155
-rw-r--r--intern/cycles/kernel/shaders/node_subsurface_scattering.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_tangent.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_texture.h165
-rw-r--r--intern/cycles/kernel/shaders/node_texture_coordinate.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_toon_bsdf.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_translucent_bsdf.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_transparent_bsdf.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_uv_map.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_value.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_vector_curves.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_vector_displacement.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_vector_math.osl76
-rw-r--r--intern/cycles/kernel/shaders/node_vector_rotate.osl49
-rw-r--r--intern/cycles/kernel/shaders/node_vector_transform.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_velvet_bsdf.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_vertex_color.osl50
-rw-r--r--intern/cycles/kernel/shaders/node_voronoi_texture.osl1079
-rw-r--r--intern/cycles/kernel/shaders/node_voxel_texture.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_wave_texture.osl76
-rw-r--r--intern/cycles/kernel/shaders/node_wavelength.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_white_noise_texture.osl49
-rw-r--r--intern/cycles/kernel/shaders/node_wireframe.osl2
-rw-r--r--intern/cycles/kernel/shaders/oslutil.h101
-rw-r--r--intern/cycles/kernel/shaders/stdcycles.h150
-rw-r--r--intern/cycles/kernel/shaders/stdosl.h853
-rw-r--r--intern/cycles/kernel/split/kernel_adaptive_adjust_samples.h44
-rw-r--r--intern/cycles/kernel/split/kernel_adaptive_filter_x.h30
-rw-r--r--intern/cycles/kernel/split/kernel_adaptive_filter_y.h29
-rw-r--r--intern/cycles/kernel/split/kernel_adaptive_stopping.h37
-rw-r--r--intern/cycles/kernel/split/kernel_branched.h2
-rw-r--r--intern/cycles/kernel/split/kernel_buffer_update.h6
-rw-r--r--intern/cycles/kernel/split/kernel_data_init.h8
-rw-r--r--intern/cycles/kernel/split/kernel_direct_lighting.h1
-rw-r--r--intern/cycles/kernel/split/kernel_do_volume.h8
-rw-r--r--intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h8
-rw-r--r--intern/cycles/kernel/split/kernel_indirect_background.h4
-rw-r--r--intern/cycles/kernel/split/kernel_next_iteration_setup.h6
-rw-r--r--intern/cycles/kernel/split/kernel_path_init.h3
-rw-r--r--intern/cycles/kernel/split/kernel_shader_eval.h4
-rw-r--r--intern/cycles/kernel/split/kernel_shadow_blocked_dl.h2
-rw-r--r--intern/cycles/kernel/split/kernel_split_common.h2
-rw-r--r--intern/cycles/kernel/split/kernel_split_data.h1
-rw-r--r--intern/cycles/kernel/split/kernel_split_data_types.h11
-rw-r--r--intern/cycles/kernel/svm/svm.h163
-rw-r--r--intern/cycles/kernel/svm/svm_ao.h40
-rw-r--r--intern/cycles/kernel/svm/svm_aov.h49
-rw-r--r--intern/cycles/kernel/svm/svm_attribute.h57
-rw-r--r--intern/cycles/kernel/svm/svm_bevel.h16
-rw-r--r--intern/cycles/kernel/svm/svm_brick.h30
-rw-r--r--intern/cycles/kernel/svm/svm_brightness.h2
-rw-r--r--intern/cycles/kernel/svm/svm_checker.h6
-rw-r--r--intern/cycles/kernel/svm/svm_clamp.h46
-rw-r--r--intern/cycles/kernel/svm/svm_closure.h228
-rw-r--r--intern/cycles/kernel/svm/svm_color_util.h14
-rw-r--r--intern/cycles/kernel/svm/svm_displace.h8
-rw-r--r--intern/cycles/kernel/svm/svm_fractal_noise.h135
-rw-r--r--intern/cycles/kernel/svm/svm_fresnel.h4
-rw-r--r--intern/cycles/kernel/svm/svm_geometry.h8
-rw-r--r--intern/cycles/kernel/svm/svm_gradient.h2
-rw-r--r--intern/cycles/kernel/svm/svm_hsv.h4
-rw-r--r--intern/cycles/kernel/svm/svm_ies.h22
-rw-r--r--intern/cycles/kernel/svm/svm_image.h90
-rw-r--r--intern/cycles/kernel/svm/svm_light_path.h2
-rw-r--r--intern/cycles/kernel/svm/svm_magic.h6
-rw-r--r--intern/cycles/kernel/svm/svm_map_range.h88
-rw-r--r--intern/cycles/kernel/svm/svm_mapping.h28
-rw-r--r--intern/cycles/kernel/svm/svm_mapping_util.h39
-rw-r--r--intern/cycles/kernel/svm/svm_math.h61
-rw-r--r--intern/cycles/kernel/svm/svm_math_util.h256
-rw-r--r--intern/cycles/kernel/svm/svm_musgrave.h806
-rw-r--r--intern/cycles/kernel/svm/svm_noise.h857
-rw-r--r--intern/cycles/kernel/svm/svm_noisetex.h207
-rw-r--r--intern/cycles/kernel/svm/svm_ramp.h4
-rw-r--r--intern/cycles/kernel/svm/svm_sky.h304
-rw-r--r--intern/cycles/kernel/svm/svm_tex_coord.h4
-rw-r--r--intern/cycles/kernel/svm/svm_texture.h63
-rw-r--r--intern/cycles/kernel/svm/svm_types.h276
-rw-r--r--intern/cycles/kernel/svm/svm_vector_rotate.h78
-rw-r--r--intern/cycles/kernel/svm/svm_vector_transform.h4
-rw-r--r--intern/cycles/kernel/svm/svm_vertex_color.h92
-rw-r--r--intern/cycles/kernel/svm/svm_voronoi.h1237
-rw-r--r--intern/cycles/kernel/svm/svm_voxel.h4
-rw-r--r--intern/cycles/kernel/svm/svm_wave.h105
-rw-r--r--intern/cycles/kernel/svm/svm_white_noise.h81
-rw-r--r--intern/cycles/kernel/svm/svm_wireframe.h2
-rw-r--r--intern/cycles/render/CMakeLists.txt42
-rw-r--r--intern/cycles/render/attribute.cpp255
-rw-r--r--intern/cycles/render/attribute.h64
-rw-r--r--intern/cycles/render/background.cpp21
-rw-r--r--intern/cycles/render/background.h4
-rw-r--r--intern/cycles/render/bake.cpp309
-rw-r--r--intern/cycles/render/bake.h52
-rw-r--r--intern/cycles/render/buffers.cpp114
-rw-r--r--intern/cycles/render/buffers.h58
-rw-r--r--intern/cycles/render/camera.cpp66
-rw-r--r--intern/cycles/render/colorspace.cpp395
-rw-r--r--intern/cycles/render/colorspace.h66
-rw-r--r--intern/cycles/render/constant_fold.cpp72
-rw-r--r--intern/cycles/render/constant_fold.h7
-rw-r--r--intern/cycles/render/coverage.cpp7
-rw-r--r--intern/cycles/render/coverage.h15
-rw-r--r--intern/cycles/render/curves.cpp119
-rw-r--r--intern/cycles/render/curves.h65
-rw-r--r--intern/cycles/render/denoising.cpp99
-rw-r--r--intern/cycles/render/denoising.h21
-rw-r--r--intern/cycles/render/film.cpp185
-rw-r--r--intern/cycles/render/film.h8
-rw-r--r--intern/cycles/render/geometry.cpp1473
-rw-r--r--intern/cycles/render/geometry.h205
-rw-r--r--intern/cycles/render/graph.cpp92
-rw-r--r--intern/cycles/render/graph.h20
-rw-r--r--intern/cycles/render/hair.cpp488
-rw-r--r--intern/cycles/render/hair.h152
-rw-r--r--intern/cycles/render/image.cpp961
-rw-r--r--intern/cycles/render/image.h222
-rw-r--r--intern/cycles/render/image_oiio.cpp236
-rw-r--r--intern/cycles/render/image_oiio.h48
-rw-r--r--intern/cycles/render/image_sky.cpp94
-rw-r--r--intern/cycles/render/image_sky.h49
-rw-r--r--intern/cycles/render/image_vdb.cpp188
-rw-r--r--intern/cycles/render/image_vdb.h56
-rw-r--r--intern/cycles/render/integrator.cpp73
-rw-r--r--intern/cycles/render/integrator.h7
-rw-r--r--intern/cycles/render/jitter.cpp287
-rw-r--r--intern/cycles/render/jitter.h29
-rw-r--r--intern/cycles/render/light.cpp259
-rw-r--r--intern/cycles/render/light.h18
-rw-r--r--intern/cycles/render/merge.cpp2
-rw-r--r--intern/cycles/render/mesh.cpp1866
-rw-r--r--intern/cycles/render/mesh.h237
-rw-r--r--intern/cycles/render/mesh_displace.cpp81
-rw-r--r--intern/cycles/render/mesh_subdivision.cpp147
-rw-r--r--intern/cycles/render/mesh_volume.cpp112
-rw-r--r--intern/cycles/render/nodes.cpp1752
-rw-r--r--intern/cycles/render/nodes.h252
-rw-r--r--intern/cycles/render/object.cpp438
-rw-r--r--intern/cycles/render/object.h14
-rw-r--r--intern/cycles/render/osl.cpp145
-rw-r--r--intern/cycles/render/osl.h34
-rw-r--r--intern/cycles/render/particles.cpp2
-rw-r--r--intern/cycles/render/scene.cpp132
-rw-r--r--intern/cycles/render/scene.h27
-rw-r--r--intern/cycles/render/session.cpp442
-rw-r--r--intern/cycles/render/session.h52
-rw-r--r--intern/cycles/render/shader.cpp104
-rw-r--r--intern/cycles/render/shader.h20
-rw-r--r--intern/cycles/render/sobol.cpp58
-rw-r--r--intern/cycles/render/stats.h2
-rw-r--r--intern/cycles/render/svm.cpp211
-rw-r--r--intern/cycles/render/svm.h19
-rw-r--r--intern/cycles/render/tables.cpp2
-rw-r--r--intern/cycles/render/tables.h1
-rw-r--r--intern/cycles/render/tile.cpp171
-rw-r--r--intern/cycles/render/tile.h12
-rw-r--r--intern/cycles/subd/CMakeLists.txt1
-rw-r--r--intern/cycles/subd/subd_dice.cpp263
-rw-r--r--intern/cycles/subd/subd_dice.h75
-rw-r--r--intern/cycles/subd/subd_patch.h11
-rw-r--r--intern/cycles/subd/subd_split.cpp739
-rw-r--r--intern/cycles/subd/subd_split.h38
-rw-r--r--intern/cycles/subd/subd_subpatch.h219
-rw-r--r--intern/cycles/test/CMakeLists.txt32
-rw-r--r--intern/cycles/test/render_graph_finalize_test.cpp62
-rw-r--r--intern/cycles/test/util_avxf_avx2_test.cpp21
-rw-r--r--intern/cycles/test/util_avxf_avx_test.cpp21
-rw-r--r--intern/cycles/test/util_avxf_test.h222
-rw-r--r--intern/cycles/util/CMakeLists.txt25
-rw-r--r--intern/cycles/util/util_algorithm.h1
-rw-r--r--intern/cycles/util/util_aligned_malloc.cpp8
-rw-r--r--intern/cycles/util/util_aligned_malloc.h15
-rw-r--r--intern/cycles/util/util_array.h13
-rw-r--r--intern/cycles/util/util_atomic.h2
-rw-r--r--intern/cycles/util/util_avxb.h28
-rw-r--r--intern/cycles/util/util_avxf.h68
-rw-r--r--intern/cycles/util/util_avxi.h745
-rw-r--r--intern/cycles/util/util_boundbox.h2
-rw-r--r--intern/cycles/util/util_color.h41
-rw-r--r--intern/cycles/util/util_debug.cpp34
-rw-r--r--intern/cycles/util/util_debug.h28
-rw-r--r--intern/cycles/util/util_defines.h8
-rw-r--r--intern/cycles/util/util_deque.h28
-rw-r--r--intern/cycles/util/util_disjoint_set.h75
-rw-r--r--intern/cycles/util/util_guarded_allocator.h1
-rw-r--r--intern/cycles/util/util_half.h5
-rw-r--r--intern/cycles/util/util_hash.h357
-rw-r--r--intern/cycles/util/util_ies.cpp36
-rw-r--r--intern/cycles/util/util_ies.h6
-rw-r--r--intern/cycles/util/util_image.h1
-rw-r--r--intern/cycles/util/util_logging.cpp24
-rw-r--r--intern/cycles/util/util_logging.h1
-rw-r--r--intern/cycles/util/util_map.h8
-rw-r--r--intern/cycles/util/util_math.h150
-rw-r--r--intern/cycles/util/util_math_fast.h20
-rw-r--r--intern/cycles/util/util_math_float2.h31
-rw-r--r--intern/cycles/util/util_math_float3.h74
-rw-r--r--intern/cycles/util/util_math_float4.h47
-rw-r--r--intern/cycles/util/util_math_intersect.h2
-rw-r--r--intern/cycles/util/util_math_matrix.h40
-rw-r--r--intern/cycles/util/util_md5.cpp2
-rw-r--r--intern/cycles/util/util_openimagedenoise.h39
-rw-r--r--intern/cycles/util/util_param.h5
-rw-r--r--intern/cycles/util/util_path.cpp4
-rw-r--r--intern/cycles/util/util_profiling.cpp6
-rw-r--r--intern/cycles/util/util_profiling.h1
-rw-r--r--intern/cycles/util/util_progress.h7
-rw-r--r--intern/cycles/util/util_semaphore.h61
-rw-r--r--intern/cycles/util/util_simd.h34
-rw-r--r--intern/cycles/util/util_sky_model.cpp349
-rw-r--r--intern/cycles/util/util_sky_model.h429
-rw-r--r--intern/cycles/util/util_sky_model_data.h3847
-rw-r--r--intern/cycles/util/util_ssef.h28
-rw-r--r--intern/cycles/util/util_ssei.h9
-rw-r--r--intern/cycles/util/util_static_assert.h32
-rw-r--r--intern/cycles/util/util_string.h2
-rw-r--r--intern/cycles/util/util_system.cpp6
-rw-r--r--intern/cycles/util/util_task.cpp432
-rw-r--r--intern/cycles/util/util_task.h102
-rw-r--r--intern/cycles/util/util_tbb.h44
-rw-r--r--intern/cycles/util/util_texture.h19
-rw-r--r--intern/cycles/util/util_thread.h81
-rw-r--r--intern/cycles/util/util_transform.cpp40
-rw-r--r--intern/cycles/util/util_transform.h63
-rw-r--r--intern/cycles/util/util_types.h8
-rw-r--r--intern/cycles/util/util_types_float8.h52
-rw-r--r--intern/cycles/util/util_types_float8_impl.h52
-rw-r--r--intern/cycles/util/util_vector.h2
-rw-r--r--intern/cycles/util/util_version.h2
-rw-r--r--intern/cycles/util/util_view.cpp2
-rw-r--r--intern/cycles/util/util_windows.cpp54
-rw-r--r--intern/cycles/util/util_windows.h6
522 files changed, 36750 insertions, 32158 deletions
diff --git a/intern/cycles/CMakeLists.txt b/intern/cycles/CMakeLists.txt
index 87f88f7ed34..e5a5e9773d3 100644
--- a/intern/cycles/CMakeLists.txt
+++ b/intern/cycles/CMakeLists.txt
@@ -177,12 +177,11 @@ if(CXX_HAS_AVX2)
add_definitions(-DWITH_KERNEL_AVX2)
endif()
-if(WITH_CYCLES_OSL)
- if(WIN32 AND MSVC)
- set(RTTI_DISABLE_FLAGS "/GR- -DBOOST_NO_RTTI -DBOOST_NO_TYPEID")
- elseif(CMAKE_COMPILER_IS_GNUCC OR (CMAKE_C_COMPILER_ID MATCHES "Clang"))
- set(RTTI_DISABLE_FLAGS "-fno-rtti -DBOOST_NO_RTTI -DBOOST_NO_TYPEID")
- endif()
+# LLVM and OSL need to build without RTTI
+if(WIN32 AND MSVC)
+ set(RTTI_DISABLE_FLAGS "/GR- -DBOOST_NO_RTTI -DBOOST_NO_TYPEID")
+elseif(CMAKE_COMPILER_IS_GNUCC OR (CMAKE_C_COMPILER_ID MATCHES "Clang"))
+ set(RTTI_DISABLE_FLAGS "-fno-rtti -DBOOST_NO_RTTI -DBOOST_NO_TYPEID")
endif()
# Definitions and Includes
@@ -207,9 +206,9 @@ endif()
if(WITH_CYCLES_OSL)
add_definitions(-DWITH_OSL)
- #osl 1.9.x
+ # osl 1.9.x
add_definitions(-DOSL_STATIC_BUILD)
- #pre 1.9
+ # pre 1.9
add_definitions(-DOSL_STATIC_LIBRARY)
include_directories(
SYSTEM
@@ -217,6 +216,21 @@ if(WITH_CYCLES_OSL)
)
endif()
+if(WITH_CYCLES_DEVICE_OPTIX)
+ find_package(OptiX)
+
+ if(OPTIX_FOUND)
+ add_definitions(-DWITH_OPTIX)
+ include_directories(
+ SYSTEM
+ ${OPTIX_INCLUDE_DIR}
+ )
+ else()
+ message(STATUS "OptiX not found, disabling it from Cycles")
+ set(WITH_CYCLES_DEVICE_OPTIX OFF)
+ endif()
+endif()
+
if(WITH_CYCLES_EMBREE)
add_definitions(-DWITH_EMBREE)
add_definitions(-DEMBREE_STATIC_LIB)
@@ -272,6 +286,7 @@ include_directories(
${OPENEXR_INCLUDE_DIR}
${OPENEXR_INCLUDE_DIRS}
${PUGIXML_INCLUDE_DIR}
+ ${TBB_INCLUDE_DIRS}
)
if(CYCLES_STANDALONE_REPOSITORY)
@@ -299,9 +314,7 @@ if(WITH_CYCLES_CUDA_BINARIES AND (NOT WITH_CYCLES_CUBIN_COMPILER))
set(MAX_MSVC 1910)
elseif(${CUDA_VERSION} EQUAL "9.1")
set(MAX_MSVC 1911)
- elseif(${CUDA_VERSION} EQUAL "10.0")
- set(MAX_MSVC 1999)
- elseif(${CUDA_VERSION} EQUAL "10.1")
+ elseif(${CUDA_VERSION} LESS "11.0")
set(MAX_MSVC 1999)
endif()
if(NOT MSVC_VERSION LESS ${MAX_MSVC} OR CMAKE_C_COMPILER_ID MATCHES "Clang")
@@ -318,7 +331,7 @@ if(WITH_CYCLES_CUDA_BINARIES AND (NOT WITH_CYCLES_CUBIN_COMPILER))
endif()
# NVRTC gives wrong rendering result in CUDA 10.0, so we must use NVCC.
-if(WITH_CYCLES_CUDA_BINARIES AND WITH_CYCLES_CUBIN_COMPILER)
+if(WITH_CYCLES_CUDA_BINARIES AND WITH_CYCLES_CUBIN_COMPILER AND NOT WITH_CYCLES_CUBIN_COMPILER_OVERRRIDE)
if(NOT (${CUDA_VERSION} VERSION_LESS 10.0))
message(STATUS "cycles_cubin_cc not supported for CUDA 10.0+, using nvcc instead.")
set(WITH_CYCLES_CUBIN_COMPILER OFF)
@@ -336,14 +349,6 @@ if(WITH_CYCLES_NETWORK)
add_definitions(-DWITH_NETWORK)
endif()
-if(WITH_OPENCOLORIO)
- add_definitions(-DWITH_OCIO)
- include_directories(
- SYSTEM
- ${OPENCOLORIO_INCLUDE_DIRS}
- )
-endif()
-
if(WITH_CYCLES_STANDALONE OR WITH_CYCLES_NETWORK OR WITH_CYCLES_CUBIN_COMPILER)
add_subdirectory(app)
endif()
diff --git a/intern/cycles/app/CMakeLists.txt b/intern/cycles/app/CMakeLists.txt
index 36e3e179be5..a2b0ed03925 100644
--- a/intern/cycles/app/CMakeLists.txt
+++ b/intern/cycles/app/CMakeLists.txt
@@ -22,7 +22,6 @@ set(LIBRARIES
${ZLIB_LIBRARIES}
${TIFF_LIBRARY}
${PTHREADS_LIBRARIES}
- extern_clew
)
if(WITH_CUDA_DYNLOAD)
@@ -36,7 +35,7 @@ if(WITH_CYCLES_OSL)
endif()
if(NOT CYCLES_STANDALONE_REPOSITORY)
- list(APPEND LIBRARIES bf_intern_glew_mx bf_intern_guardedalloc bf_intern_numaapi)
+ list(APPEND LIBRARIES bf_intern_glew_mx bf_intern_guardedalloc bf_intern_numaapi bf_intern_sky)
endif()
if(WITH_CYCLES_LOGGING)
@@ -52,14 +51,17 @@ endif()
# Common configuration.
-link_directories(${OPENIMAGEIO_LIBPATH}
- ${BOOST_LIBPATH}
- ${PNG_LIBPATH}
- ${JPEG_LIBPATH}
- ${ZLIB_LIBPATH}
- ${TIFF_LIBPATH}
- ${OPENEXR_LIBPATH}
- ${OPENJPEG_LIBPATH})
+link_directories(
+ ${OPENIMAGEIO_LIBPATH}
+ ${BOOST_LIBPATH}
+ ${PNG_LIBPATH}
+ ${JPEG_LIBPATH}
+ ${ZLIB_LIBPATH}
+ ${TIFF_LIBPATH}
+ ${OPENEXR_LIBPATH}
+ ${OPENJPEG_LIBPATH}
+ ${OPENVDB_LIBPATH}
+)
if(WITH_OPENCOLORIO)
link_directories(${OPENCOLORIO_LIBPATH})
diff --git a/intern/cycles/app/cycles_cubin_cc.cpp b/intern/cycles/app/cycles_cubin_cc.cpp
index 774c18f4219..7631cb9bed5 100644
--- a/intern/cycles/app/cycles_cubin_cc.cpp
+++ b/intern/cycles/app/cycles_cubin_cc.cpp
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include <stdio.h>
#include <stdint.h>
+#include <stdio.h>
#include <string>
#include <vector>
@@ -43,7 +43,8 @@ template<typename T> std::string to_string(const T &n)
class CompilationSettings {
public:
- CompilationSettings() : target_arch(0), bits(64), verbose(false), fast_math(false)
+ CompilationSettings()
+ : target_arch(0), bits(64), verbose(false), fast_math(false), ptx_only(false)
{
}
@@ -57,12 +58,13 @@ class CompilationSettings {
int bits;
bool verbose;
bool fast_math;
+ bool ptx_only;
};
static bool compile_cuda(CompilationSettings &settings)
{
- const char *headers[] = {"stdlib.h", "float.h", "math.h", "stdio.h"};
- const char *header_content[] = {"\n", "\n", "\n", "\n"};
+ const char *headers[] = {"stdlib.h", "float.h", "math.h", "stdio.h", "stddef.h"};
+ const char *header_content[] = {"\n", "\n", "\n", "\n", "\n"};
printf("Building %s\n", settings.input_file.c_str());
@@ -83,6 +85,8 @@ static bool compile_cuda(CompilationSettings &settings)
options.push_back("-D__KERNEL_CUDA_VERSION__=" + std::to_string(cuewNvrtcVersion()));
options.push_back("-arch=compute_" + std::to_string(settings.target_arch));
options.push_back("--device-as-default-execution-space");
+ options.push_back("-DCYCLES_CUBIN_CC");
+ options.push_back("--std=c++11");
if (settings.fast_math)
options.push_back("--use_fast_math");
@@ -134,10 +138,14 @@ static bool compile_cuda(CompilationSettings &settings)
fprintf(stderr, "Error: nvrtcGetPTX failed (%d)\n\n", (int)result);
return false;
}
-
- /* Write a file in the temp folder with the ptx code. */
- settings.ptx_file = OIIO::Filesystem::temp_directory_path() + "/" +
- OIIO::Filesystem::unique_path();
+ if (settings.ptx_only) {
+ settings.ptx_file = settings.output_file;
+ }
+ else {
+ /* Write a file in the temp folder with the ptx code. */
+ settings.ptx_file = OIIO::Filesystem::temp_directory_path() + "/" +
+ OIIO::Filesystem::unique_path();
+ }
FILE *f = fopen(settings.ptx_file.c_str(), "wb");
fwrite(&ptx_code[0], 1, ptx_size, f);
fclose(f);
@@ -249,6 +257,9 @@ static bool parse_parameters(int argc, const char **argv, CompilationSettings &s
"-D %L",
&settings.defines,
"Add additional defines",
+ "-ptx",
+ &settings.ptx_only,
+ "emit PTX code",
"-v",
&settings.verbose,
"Use verbose logging",
@@ -303,8 +314,10 @@ int main(int argc, const char **argv)
exit(EXIT_FAILURE);
}
- if (!link_ptxas(settings)) {
- exit(EXIT_FAILURE);
+ if (!settings.ptx_only) {
+ if (!link_ptxas(settings)) {
+ exit(EXIT_FAILURE);
+ }
}
return 0;
diff --git a/intern/cycles/app/cycles_server.cpp b/intern/cycles/app/cycles_server.cpp
index c5a4c9b375b..1ad70a376ed 100644
--- a/intern/cycles/app/cycles_server.cpp
+++ b/intern/cycles/app/cycles_server.cpp
@@ -20,11 +20,11 @@
#include "util/util_args.h"
#include "util/util_foreach.h"
+#include "util/util_logging.h"
#include "util/util_path.h"
#include "util/util_stats.h"
#include "util/util_string.h"
#include "util/util_task.h"
-#include "util/util_logging.h"
using namespace ccl;
diff --git a/intern/cycles/app/cycles_standalone.cpp b/intern/cycles/app/cycles_standalone.cpp
index d2d112e8d7e..f057ce7a2f0 100644
--- a/intern/cycles/app/cycles_standalone.cpp
+++ b/intern/cycles/app/cycles_standalone.cpp
@@ -16,16 +16,17 @@
#include <stdio.h>
+#include "device/device.h"
#include "render/buffers.h"
#include "render/camera.h"
-#include "device/device.h"
+#include "render/integrator.h"
#include "render/scene.h"
#include "render/session.h"
-#include "render/integrator.h"
#include "util/util_args.h"
#include "util/util_foreach.h"
#include "util/util_function.h"
+#include "util/util_image.h"
#include "util/util_logging.h"
#include "util/util_path.h"
#include "util/util_progress.h"
diff --git a/intern/cycles/app/cycles_xml.cpp b/intern/cycles/app/cycles_xml.cpp
index 9caf7068d3d..aec00f845f3 100644
--- a/intern/cycles/app/cycles_xml.cpp
+++ b/intern/cycles/app/cycles_xml.cpp
@@ -16,9 +16,9 @@
#include <stdio.h>
-#include <sstream>
#include <algorithm>
#include <iterator>
+#include <sstream>
#include "graph/node_xml.h"
@@ -32,8 +32,8 @@
#include "render/nodes.h"
#include "render/object.h"
#include "render/osl.h"
-#include "render/shader.h"
#include "render/scene.h"
+#include "render/shader.h"
#include "subd/subd_patch.h"
#include "subd/subd_split.h"
@@ -292,7 +292,7 @@ static void xml_read_shader_graph(XMLReadState &state, Shader *shader, xml_node
filepath = path_join(state.base, filepath);
}
- snode = ((OSLShaderManager *)manager)->osl_node(filepath);
+ snode = OSLShaderManager::osl_node(manager, filepath);
if (!snode) {
fprintf(stderr, "Failed to create OSL node from \"%s\".\n", filepath.c_str());
@@ -326,6 +326,10 @@ static void xml_read_shader_graph(XMLReadState &state, Shader *shader, xml_node
fprintf(stderr, "Node type \"%s\" is not a shader node.\n", node_type->name.c_str());
continue;
}
+ else if (node_type->create == NULL) {
+ fprintf(stderr, "Can't create abstract node type \"%s\".\n", node_type->name.c_str());
+ continue;
+ }
snode = (ShaderNode *)node_type->create(node_type);
}
@@ -376,11 +380,11 @@ static Mesh *xml_add_mesh(Scene *scene, const Transform &tfm)
{
/* create mesh */
Mesh *mesh = new Mesh();
- scene->meshes.push_back(mesh);
+ scene->geometry.push_back(mesh);
/* create object*/
Object *object = new Object();
- object->mesh = mesh;
+ object->geometry = mesh;
object->tfm = tfm;
scene->objects.push_back(object);
@@ -495,7 +499,7 @@ static void xml_read_mesh(const XMLReadState &state, xml_node node)
float3 *fdata = attr->data_float3();
#if 0
- if(subdivide_uvs) {
+ if (subdivide_uvs) {
attr->flags |= ATTR_SUBDIVIDED;
}
#endif
diff --git a/intern/cycles/app/io_export_cycles_xml.py b/intern/cycles/app/io_export_cycles_xml.py
index a1b42f72f7c..d2c6dc493e8 100644
--- a/intern/cycles/app/io_export_cycles_xml.py
+++ b/intern/cycles/app/io_export_cycles_xml.py
@@ -64,7 +64,7 @@ class RenderButtonsPanel():
bl_context = "render"
@classmethod
- def poll(self, context):
+ def poll(cls, context):
return context.engine == 'CYCLES'
diff --git a/intern/cycles/blender/CMakeLists.txt b/intern/cycles/blender/CMakeLists.txt
index 7354b1e615e..2316800e21e 100644
--- a/intern/cycles/blender/CMakeLists.txt
+++ b/intern/cycles/blender/CMakeLists.txt
@@ -18,6 +18,9 @@ set(INC_SYS
set(SRC
blender_camera.cpp
blender_device.cpp
+ blender_image.cpp
+ blender_geometry.cpp
+ blender_light.cpp
blender_mesh.cpp
blender_object.cpp
blender_object_cull.cpp
@@ -29,13 +32,19 @@ set(SRC
blender_shader.cpp
blender_sync.cpp
blender_texture.cpp
+ blender_viewport.cpp
+ blender_volume.cpp
CCL_api.h
+ blender_device.h
+ blender_id_map.h
+ blender_image.h
blender_object_cull.h
blender_sync.h
blender_session.h
blender_texture.h
blender_util.h
+ blender_viewport.h
)
set(LIB
@@ -46,11 +55,15 @@ set(LIB
cycles_render
cycles_subd
cycles_util
+
+ ${PYTHON_LINKFLAGS}
+ ${PYTHON_LIBRARIES}
)
if(WITH_CYCLES_LOGGING)
list(APPEND LIB
- extern_glog
+ ${GLOG_LIBRARIES}
+ ${GFLAGS_LIBRARIES}
)
endif()
@@ -68,13 +81,34 @@ set(ADDON_FILES
add_definitions(${GL_DEFINITIONS})
if(WITH_CYCLES_DEVICE_OPENCL)
- add_definitions(-DWITH_OPENCL)
+ add_definitions(-DWITH_OPENCL)
endif()
if(WITH_CYCLES_NETWORK)
add_definitions(-DWITH_NETWORK)
endif()
+if(WITH_MOD_FLUID)
+ add_definitions(-DWITH_FLUID)
+endif()
+
+if(WITH_OPENVDB)
+ add_definitions(-DWITH_OPENVDB ${OPENVDB_DEFINITIONS})
+ list(APPEND INC_SYS
+ ${OPENVDB_INCLUDE_DIRS}
+ )
+ list(APPEND LIB
+ ${OPENVDB_LIBRARIES}
+ )
+endif()
+
+if(WITH_OPENIMAGEDENOISE)
+ add_definitions(-DWITH_OPENIMAGEDENOISE)
+ list(APPEND INC_SYS
+ ${OPENIMAGEDENOISE_INCLUDE_DIRS}
+ )
+endif()
+
blender_add_lib(bf_intern_cycles "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")
# avoid link failure with clang 3.4 debug
diff --git a/intern/cycles/blender/addon/__init__.py b/intern/cycles/blender/addon/__init__.py
index 93a1271b4b4..3ab352e52a2 100644
--- a/intern/cycles/blender/addon/__init__.py
+++ b/intern/cycles/blender/addon/__init__.py
@@ -20,10 +20,9 @@ bl_info = {
"name": "Cycles Render Engine",
"author": "",
"blender": (2, 80, 0),
- "location": "Info header, render engine menu",
- "description": "Cycles Render Engine integration",
+ "description": "Cycles renderer integration",
"warning": "",
- "wiki_url": "https://docs.blender.org/manual/en/dev/render/cycles/",
+ "doc_url": "https://docs.blender.org/manual/en/latest/render/cycles/",
"tracker_url": "",
"support": 'OFFICIAL',
"category": "Render"}
@@ -55,7 +54,7 @@ from . import (
class CyclesRender(bpy.types.RenderEngine):
bl_idname = 'CYCLES'
bl_label = "Cycles"
- bl_use_shading_nodes = True
+ bl_use_eevee_viewport = True
bl_use_preview = True
bl_use_exclude_layers = True
bl_use_save_buffers = True
@@ -83,20 +82,20 @@ class CyclesRender(bpy.types.RenderEngine):
def render(self, depsgraph):
engine.render(self, depsgraph)
- def bake(self, depsgraph, obj, pass_type, pass_filter, object_id, pixel_array, num_pixels, depth, result):
- engine.bake(self, depsgraph, obj, pass_type, pass_filter, object_id, pixel_array, num_pixels, depth, result)
+ def bake(self, depsgraph, obj, pass_type, pass_filter, width, height):
+ engine.bake(self, depsgraph, obj, pass_type, pass_filter, width, height)
# viewport render
- def view_update(self, context):
+ def view_update(self, context, depsgraph):
if not self.session:
engine.create(self, context.blend_data,
context.region, context.space_data, context.region_data)
- engine.reset(self, context.blend_data, context.depsgraph)
- engine.sync(self, context.depsgraph, context.blend_data)
+ engine.reset(self, context.blend_data, depsgraph)
+ engine.sync(self, depsgraph, context.blend_data)
- def view_draw(self, context):
- engine.draw(self, context.depsgraph, context.region, context.space_data, context.region_data)
+ def view_draw(self, context, depsgraph):
+ engine.draw(self, depsgraph, context.region, context.space_data, context.region_data)
def update_script_node(self, node):
if engine.with_osl():
diff --git a/intern/cycles/blender/addon/engine.py b/intern/cycles/blender/addon/engine.py
index b8bc74f9e35..67e448db859 100644
--- a/intern/cycles/blender/addon/engine.py
+++ b/intern/cycles/blender/addon/engine.py
@@ -33,7 +33,7 @@ def _is_using_buggy_driver():
# in the version string, but those cards do not quite work and
# causing crashes.
return True
- regex = re.compile(".*Compatibility Profile Context ([0-9]+(\.[0-9]+)+)$")
+ regex = re.compile(".*Compatibility Profile Context ([0-9]+(\\.[0-9]+)+)$")
if not regex.match(version):
# Skip cards like FireGL
return False
@@ -139,15 +139,19 @@ def create(engine, data, region=None, v3d=None, rv3d=None, preview_osl=False):
data = data.as_pointer()
prefs = bpy.context.preferences.as_pointer()
+ screen = 0
if region:
+ screen = region.id_data.as_pointer()
region = region.as_pointer()
if v3d:
+ screen = screen or v3d.id_data.as_pointer()
v3d = v3d.as_pointer()
if rv3d:
+ screen = screen or rv3d.id_data.as_pointer()
rv3d = rv3d.as_pointer()
engine.session = _cycles.create(
- engine.as_pointer(), prefs, data, region, v3d, rv3d, preview_osl)
+ engine.as_pointer(), prefs, data, screen, region, v3d, rv3d, preview_osl)
def free(engine):
@@ -164,18 +168,19 @@ def render(engine, depsgraph):
_cycles.render(engine.session, depsgraph.as_pointer())
-def bake(engine, depsgraph, obj, pass_type, pass_filter, object_id, pixel_array, num_pixels, depth, result):
+def bake(engine, depsgraph, obj, pass_type, pass_filter, width, height):
import _cycles
session = getattr(engine, "session", None)
if session is not None:
- _cycles.bake(engine.session, depsgraph.as_pointer(), obj.as_pointer(), pass_type, pass_filter, object_id, pixel_array.as_pointer(), num_pixels, depth, result.as_pointer())
+ _cycles.bake(engine.session, depsgraph.as_pointer(), obj.as_pointer(), pass_type, pass_filter, width, height)
def reset(engine, data, depsgraph):
import _cycles
import bpy
- if bpy.app.debug_value == 256:
+ prefs = bpy.context.preferences
+ if prefs.experimental.use_cycles_debug and prefs.view.show_developer_ui:
_cycles.debug_flags_update(depsgraph.scene.as_pointer())
else:
_cycles.debug_flags_reset()
@@ -219,65 +224,96 @@ def system_info():
import _cycles
return _cycles.system_info()
-
-def register_passes(engine, scene, srl):
- engine.register_pass(scene, srl, "Combined", 4, "RGBA", 'COLOR')
-
- if srl.use_pass_z: engine.register_pass(scene, srl, "Depth", 1, "Z", 'VALUE')
- if srl.use_pass_mist: engine.register_pass(scene, srl, "Mist", 1, "Z", 'VALUE')
- if srl.use_pass_normal: engine.register_pass(scene, srl, "Normal", 3, "XYZ", 'VECTOR')
- if srl.use_pass_vector: engine.register_pass(scene, srl, "Vector", 4, "XYZW", 'VECTOR')
- if srl.use_pass_uv: engine.register_pass(scene, srl, "UV", 3, "UVA", 'VECTOR')
- if srl.use_pass_object_index: engine.register_pass(scene, srl, "IndexOB", 1, "X", 'VALUE')
- if srl.use_pass_material_index: engine.register_pass(scene, srl, "IndexMA", 1, "X", 'VALUE')
- if srl.use_pass_shadow: engine.register_pass(scene, srl, "Shadow", 3, "RGB", 'COLOR')
- if srl.use_pass_ambient_occlusion: engine.register_pass(scene, srl, "AO", 3, "RGB", 'COLOR')
- if srl.use_pass_diffuse_direct: engine.register_pass(scene, srl, "DiffDir", 3, "RGB", 'COLOR')
- if srl.use_pass_diffuse_indirect: engine.register_pass(scene, srl, "DiffInd", 3, "RGB", 'COLOR')
- if srl.use_pass_diffuse_color: engine.register_pass(scene, srl, "DiffCol", 3, "RGB", 'COLOR')
- if srl.use_pass_glossy_direct: engine.register_pass(scene, srl, "GlossDir", 3, "RGB", 'COLOR')
- if srl.use_pass_glossy_indirect: engine.register_pass(scene, srl, "GlossInd", 3, "RGB", 'COLOR')
- if srl.use_pass_glossy_color: engine.register_pass(scene, srl, "GlossCol", 3, "RGB", 'COLOR')
- if srl.use_pass_transmission_direct: engine.register_pass(scene, srl, "TransDir", 3, "RGB", 'COLOR')
- if srl.use_pass_transmission_indirect: engine.register_pass(scene, srl, "TransInd", 3, "RGB", 'COLOR')
- if srl.use_pass_transmission_color: engine.register_pass(scene, srl, "TransCol", 3, "RGB", 'COLOR')
- if srl.use_pass_subsurface_direct: engine.register_pass(scene, srl, "SubsurfaceDir", 3, "RGB", 'COLOR')
- if srl.use_pass_subsurface_indirect: engine.register_pass(scene, srl, "SubsurfaceInd", 3, "RGB", 'COLOR')
- if srl.use_pass_subsurface_color: engine.register_pass(scene, srl, "SubsurfaceCol", 3, "RGB", 'COLOR')
- if srl.use_pass_emit: engine.register_pass(scene, srl, "Emit", 3, "RGB", 'COLOR')
- if srl.use_pass_environment: engine.register_pass(scene, srl, "Env", 3, "RGB", 'COLOR')
-
+def list_render_passes(scene, srl):
+ # Builtin Blender passes.
+ yield ("Combined", "RGBA", 'COLOR')
+
+ if srl.use_pass_z: yield ("Depth", "Z", 'VALUE')
+ if srl.use_pass_mist: yield ("Mist", "Z", 'VALUE')
+ if srl.use_pass_normal: yield ("Normal", "XYZ", 'VECTOR')
+ if srl.use_pass_vector: yield ("Vector", "XYZW", 'VECTOR')
+ if srl.use_pass_uv: yield ("UV", "UVA", 'VECTOR')
+ if srl.use_pass_object_index: yield ("IndexOB", "X", 'VALUE')
+ if srl.use_pass_material_index: yield ("IndexMA", "X", 'VALUE')
+ if srl.use_pass_shadow: yield ("Shadow", "RGB", 'COLOR')
+ if srl.use_pass_ambient_occlusion: yield ("AO", "RGB", 'COLOR')
+ if srl.use_pass_diffuse_direct: yield ("DiffDir", "RGB", 'COLOR')
+ if srl.use_pass_diffuse_indirect: yield ("DiffInd", "RGB", 'COLOR')
+ if srl.use_pass_diffuse_color: yield ("DiffCol", "RGB", 'COLOR')
+ if srl.use_pass_glossy_direct: yield ("GlossDir", "RGB", 'COLOR')
+ if srl.use_pass_glossy_indirect: yield ("GlossInd", "RGB", 'COLOR')
+ if srl.use_pass_glossy_color: yield ("GlossCol", "RGB", 'COLOR')
+ if srl.use_pass_transmission_direct: yield ("TransDir", "RGB", 'COLOR')
+ if srl.use_pass_transmission_indirect: yield ("TransInd", "RGB", 'COLOR')
+ if srl.use_pass_transmission_color: yield ("TransCol", "RGB", 'COLOR')
+ if srl.use_pass_emit: yield ("Emit", "RGB", 'COLOR')
+ if srl.use_pass_environment: yield ("Env", "RGB", 'COLOR')
+
+ # Cycles specific passes.
crl = srl.cycles
- if crl.pass_debug_render_time: engine.register_pass(scene, srl, "Debug Render Time", 1, "X", 'VALUE')
- if crl.pass_debug_bvh_traversed_nodes: engine.register_pass(scene, srl, "Debug BVH Traversed Nodes", 1, "X", 'VALUE')
- if crl.pass_debug_bvh_traversed_instances: engine.register_pass(scene, srl, "Debug BVH Traversed Instances", 1, "X", 'VALUE')
- if crl.pass_debug_bvh_intersections: engine.register_pass(scene, srl, "Debug BVH Intersections", 1, "X", 'VALUE')
- if crl.pass_debug_ray_bounces: engine.register_pass(scene, srl, "Debug Ray Bounces", 1, "X", 'VALUE')
- if crl.use_pass_volume_direct: engine.register_pass(scene, srl, "VolumeDir", 3, "RGB", 'COLOR')
- if crl.use_pass_volume_indirect: engine.register_pass(scene, srl, "VolumeInd", 3, "RGB", 'COLOR')
-
+ if crl.pass_debug_render_time: yield ("Debug Render Time", "X", 'VALUE')
+ if crl.pass_debug_bvh_traversed_nodes: yield ("Debug BVH Traversed Nodes", "X", 'VALUE')
+ if crl.pass_debug_bvh_traversed_instances: yield ("Debug BVH Traversed Instances", "X", 'VALUE')
+ if crl.pass_debug_bvh_intersections: yield ("Debug BVH Intersections", "X", 'VALUE')
+ if crl.pass_debug_ray_bounces: yield ("Debug Ray Bounces", "X", 'VALUE')
+ if crl.pass_debug_sample_count: yield ("Debug Sample Count", "X", 'VALUE')
+ if crl.use_pass_volume_direct: yield ("VolumeDir", "RGB", 'COLOR')
+ if crl.use_pass_volume_indirect: yield ("VolumeInd", "RGB", 'COLOR')
+
+ # Cryptomatte passes.
+ crypto_depth = (crl.pass_crypto_depth + 1) // 2
if crl.use_pass_crypto_object:
- for i in range(0, crl.pass_crypto_depth, 2):
- engine.register_pass(scene, srl, "CryptoObject" + '{:02d}'.format(i), 4, "RGBA", 'COLOR')
+ for i in range(0, crypto_depth):
+ yield ("CryptoObject" + '{:02d}'.format(i), "RGBA", 'COLOR')
if crl.use_pass_crypto_material:
- for i in range(0, crl.pass_crypto_depth, 2):
- engine.register_pass(scene, srl, "CryptoMaterial" + '{:02d}'.format(i), 4, "RGBA", 'COLOR')
+ for i in range(0, crypto_depth):
+ yield ("CryptoMaterial" + '{:02d}'.format(i), "RGBA", 'COLOR')
if srl.cycles.use_pass_crypto_asset:
- for i in range(0, srl.cycles.pass_crypto_depth, 2):
- engine.register_pass(scene, srl, "CryptoAsset" + '{:02d}'.format(i), 4, "RGBA", 'COLOR')
+ for i in range(0, crypto_depth):
+ yield ("CryptoAsset" + '{:02d}'.format(i), "RGBA", 'COLOR')
+ # Denoising passes.
if crl.use_denoising or crl.denoising_store_passes:
- engine.register_pass(scene, srl, "Noisy Image", 4, "RGBA", 'COLOR')
+ yield ("Noisy Image", "RGBA", 'COLOR')
if crl.denoising_store_passes:
- engine.register_pass(scene, srl, "Denoising Normal", 3, "XYZ", 'VECTOR')
- engine.register_pass(scene, srl, "Denoising Albedo", 3, "RGB", 'COLOR')
- engine.register_pass(scene, srl, "Denoising Depth", 1, "Z", 'VALUE')
- engine.register_pass(scene, srl, "Denoising Shadowing", 1, "X", 'VALUE')
- engine.register_pass(scene, srl, "Denoising Variance", 3, "RGB", 'COLOR')
- engine.register_pass(scene, srl, "Denoising Intensity", 1, "X", 'VALUE')
- clean_options = ("denoising_diffuse_direct", "denoising_diffuse_indirect",
- "denoising_glossy_direct", "denoising_glossy_indirect",
- "denoising_transmission_direct", "denoising_transmission_indirect",
- "denoising_subsurface_direct", "denoising_subsurface_indirect")
- if any(getattr(crl, option) for option in clean_options):
- engine.register_pass(scene, srl, "Denoising Clean", 3, "RGB", 'COLOR')
+ yield ("Denoising Normal", "XYZ", 'VECTOR')
+ yield ("Denoising Albedo", "RGB", 'COLOR')
+ yield ("Denoising Depth", "Z", 'VALUE')
+
+ if scene.cycles.denoiser == 'NLM':
+ yield ("Denoising Shadowing", "X", 'VALUE')
+ yield ("Denoising Variance", "RGB", 'COLOR')
+ yield ("Denoising Intensity", "X", 'VALUE')
+
+ clean_options = ("denoising_diffuse_direct", "denoising_diffuse_indirect",
+ "denoising_glossy_direct", "denoising_glossy_indirect",
+ "denoising_transmission_direct", "denoising_transmission_indirect")
+ if any(getattr(crl, option) for option in clean_options):
+ yield ("Denoising Clean", "RGB", 'COLOR')
+
+ # Custom AOV passes.
+ for aov in crl.aovs:
+ if aov.type == 'VALUE':
+ yield (aov.name, "X", 'VALUE')
+ else:
+ yield (aov.name, "RGBA", 'COLOR')
+
+def register_passes(engine, scene, view_layer):
+ # Detect duplicate render pass names, first one wins.
+ listed = set()
+ for name, channelids, channeltype in list_render_passes(scene, view_layer):
+ if name not in listed:
+ engine.register_pass(scene, view_layer, name, len(channelids), channelids, channeltype)
+ listed.add(name)
+
+def detect_conflicting_passes(scene, view_layer):
+ # Detect conflicting render pass names for UI.
+ counter = {}
+ for name, _, _ in list_render_passes(scene, view_layer):
+ counter[name] = counter.get(name, 0) + 1
+
+ for aov in view_layer.cycles.aovs:
+ if counter[aov.name] > 1:
+ aov.conflict = "Conflicts with another render pass with the same name"
+ else:
+ aov.conflict = ""
diff --git a/intern/cycles/blender/addon/operators.py b/intern/cycles/blender/addon/operators.py
index 63c61c4799e..3c8e79eaba5 100644
--- a/intern/cycles/blender/addon/operators.py
+++ b/intern/cycles/blender/addon/operators.py
@@ -20,6 +20,8 @@ import bpy
from bpy.types import Operator
from bpy.props import StringProperty
+from bpy.app.translations import pgettext_tip as tip_
+
class CYCLES_OT_use_shading_nodes(Operator):
"""Enable nodes on a material, world or light"""
@@ -42,6 +44,36 @@ class CYCLES_OT_use_shading_nodes(Operator):
return {'FINISHED'}
+class CYCLES_OT_add_aov(bpy.types.Operator):
+ """Add an AOV pass"""
+ bl_idname="cycles.add_aov"
+ bl_label="Add AOV"
+
+ def execute(self, context):
+ view_layer = context.view_layer
+ cycles_view_layer = view_layer.cycles
+
+ cycles_view_layer.aovs.add()
+
+ view_layer.update_render_passes()
+ return {'FINISHED'}
+
+
+class CYCLES_OT_remove_aov(bpy.types.Operator):
+ """Remove an AOV pass"""
+ bl_idname="cycles.remove_aov"
+ bl_label="Remove AOV"
+
+ def execute(self, context):
+ view_layer = context.view_layer
+ cycles_view_layer = view_layer.cycles
+
+ cycles_view_layer.aovs.remove(cycles_view_layer.active_aov)
+
+ view_layer.update_render_passes()
+ return {'FINISHED'}
+
+
class CYCLES_OT_denoise_animation(Operator):
"Denoise rendered animation sequence using current scene and view " \
"layer settings. Requires denoising data passes and output to " \
@@ -98,7 +130,8 @@ class CYCLES_OT_denoise_animation(Operator):
if not os.path.isfile(filepath):
scene.render.filepath = original_filepath
- self.report({'ERROR'}, f"Frame '{filepath}' not found, animation must be complete.")
+ err_msg = tip_("Frame '%s' not found, animation must be complete") % filepath
+ self.report({'ERROR'}, err_msg)
return {'CANCELLED'}
scene.render.filepath = out_filepath
@@ -120,12 +153,12 @@ class CYCLES_OT_denoise_animation(Operator):
self.report({'ERROR'}, str(e))
return {'FINISHED'}
- self.report({'INFO'}, "Denoising completed.")
+ self.report({'INFO'}, "Denoising completed")
return {'FINISHED'}
class CYCLES_OT_merge_images(Operator):
- "Combine OpenEXR multilayer images rendered with different sample" \
+ "Combine OpenEXR multilayer images rendered with different sample " \
"ranges into one image with reduced noise"
bl_idname = "cycles.merge_images"
bl_label = "Merge Images"
@@ -164,6 +197,8 @@ class CYCLES_OT_merge_images(Operator):
classes = (
CYCLES_OT_use_shading_nodes,
+ CYCLES_OT_add_aov,
+ CYCLES_OT_remove_aov,
CYCLES_OT_denoise_animation,
CYCLES_OT_merge_images
)
diff --git a/intern/cycles/blender/addon/osl.py b/intern/cycles/blender/addon/osl.py
index dd92ce642d4..4c6e7952491 100644
--- a/intern/cycles/blender/addon/osl.py
+++ b/intern/cycles/blender/addon/osl.py
@@ -85,6 +85,7 @@ def update_script_node(node, report):
# write text datablock contents to temporary file
osl_file = tempfile.NamedTemporaryFile(mode='w', suffix=".osl", delete=False)
osl_file.write(script.as_string())
+ osl_file.write("\n")
osl_file.close()
ok, oso_path = osl_compile(osl_file.name, report)
diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py
index db9e8bb46b3..d764f469eb7 100644
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -19,6 +19,7 @@
import bpy
from bpy.props import (
BoolProperty,
+ CollectionProperty,
EnumProperty,
FloatProperty,
IntProperty,
@@ -31,6 +32,7 @@ from math import pi
# enums
import _cycles
+from . import engine
enum_devices = (
('CPU', "CPU", "Use CPU for rendering"),
@@ -53,8 +55,7 @@ enum_displacement_methods = (
enum_bvh_layouts = (
('BVH2', "BVH2", "", 1),
- ('BVH4', "BVH4", "", 2),
- ('BVH8', "BVH8", "", 4),
+ ('EMBREE', "Embree", "", 4),
)
enum_bvh_types = (
@@ -68,11 +69,6 @@ enum_filter_types = (
('BLACKMAN_HARRIS', "Blackman-Harris", "Blackman-Harris filter"),
)
-enum_aperture_types = (
- ('RADIUS', "Radius", "Directly change the size of the aperture"),
- ('FSTOP', "F-stop", "Change the size of the aperture by f-stop"),
-)
-
enum_panorama_types = (
('EQUIRECTANGULAR', "Equirectangular", "Render the scene with a spherical camera, also known as Lat Long panorama"),
('FISHEYE_EQUIDISTANT', "Fisheye Equidistant", "Ideal for fulldomes, ignore the sensor dimensions"),
@@ -81,20 +77,9 @@ enum_panorama_types = (
('MIRRORBALL', "Mirror Ball", "Uses the mirror ball mapping"),
)
-enum_curve_primitives = (
- ('TRIANGLES', "Triangles", "Create triangle geometry around strands"),
- ('LINE_SEGMENTS', "Line Segments", "Use line segment primitives"),
- ('CURVE_SEGMENTS', "Curve Segments", "Use segmented cardinal curve primitives"),
-)
-
-enum_triangle_curves = (
- ('CAMERA_TRIANGLES', "Planes", "Create individual triangles forming planes that face camera"),
- ('TESSELLATED_TRIANGLES', "Tessellated", "Create mesh surrounding each strand"),
-)
-
enum_curve_shape = (
- ('RIBBONS', "Ribbons", "Ignore thickness of each strand"),
- ('THICK', "Thick", "Use thickness of strand when rendering"),
+ ('RIBBONS', "Rounded Ribbons", "Render hair as flat ribbon with rounded normals, for fast rendering"),
+ ('THICK', "3D Curves", "Render hair as 3D curve, for accurate results when viewing hair close up"),
)
enum_tile_order = (
@@ -115,6 +100,7 @@ enum_use_layer_samples = (
enum_sampling_pattern = (
('SOBOL', "Sobol", "Use Sobol random sampling pattern"),
('CORRELATED_MUTI_JITTER', "Correlated Multi-Jitter", "Use Correlated Multi-Jitter random sampling pattern"),
+ ('PROGRESSIVE_MUTI_JITTER', "Progressive Multi-Jitter", "Use Progressive Multi-Jitter random sampling pattern"),
)
enum_integrator = (
@@ -142,6 +128,7 @@ enum_world_mis = (
enum_device_type = (
('CPU', "CPU", "CPU", 0),
('CUDA', "CUDA", "CUDA", 1),
+ ('OPTIX', "OptiX", "OptiX", 3),
('OPENCL', "OpenCL", "OpenCL", 2)
)
@@ -156,6 +143,88 @@ enum_texture_limit = (
('8192', "8192", "Limit texture size to 8192 pixels", 7),
)
+enum_view3d_shading_render_pass= (
+ ('', "General", ""),
+
+ ('COMBINED', "Combined", "Show the Combined Render pass", 1),
+ ('EMISSION', "Emission", "Show the Emission render pass", 33),
+ ('BACKGROUND', "Background", "Show the Background render pass", 34),
+ ('AO', "Ambient Occlusion", "Show the Ambient Occlusion render pass", 35),
+
+ ('', "Light", ""),
+
+ ('DIFFUSE_DIRECT', "Diffuse Direct", "Show the Diffuse Direct render pass", 38),
+ ('DIFFUSE_INDIRECT', "Diffuse Indirect", "Show the Diffuse Indirect render pass", 39),
+ ('DIFFUSE_COLOR', "Diffuse Color", "Show the Diffuse Color render pass", 40),
+
+ ('GLOSSY_DIRECT', "Glossy Direct", "Show the Glossy Direct render pass", 41),
+ ('GLOSSY_INDIRECT', "Glossy Indirect", "Show the Glossy Indirect render pass", 42),
+ ('GLOSSY_COLOR', "Glossy Color", "Show the Glossy Color render pass", 43),
+
+ ('', "", ""),
+
+ ('TRANSMISSION_DIRECT', "Transmission Direct", "Show the Transmission Direct render pass", 44),
+ ('TRANSMISSION_INDIRECT', "Transmission Indirect", "Show the Transmission Indirect render pass", 45),
+ ('TRANSMISSION_COLOR', "Transmission Color", "Show the Transmission Color render pass", 46),
+
+ ('VOLUME_DIRECT', "Volume Direct", "Show the Volume Direct render pass", 50),
+ ('VOLUME_INDIRECT', "Volume Indirect", "Show the Volume Indirect render pass", 51),
+
+ ('', "Data", ""),
+
+ ('NORMAL', "Normal", "Show the Normal render pass", 3),
+ ('UV', "UV", "Show the UV render pass", 4),
+ ('MIST', "Mist", "Show the Mist render pass", 32),
+)
+
+enum_aov_types = (
+ ('VALUE', "Value", "Write a Value pass", 0),
+ ('COLOR', "Color", "Write a Color pass", 1),
+)
+
+
+def enum_openimagedenoise_denoiser(self, context):
+ if _cycles.with_openimagedenoise:
+ return [('OPENIMAGEDENOISE', "OpenImageDenoise", "Use Intel OpenImageDenoise AI denoiser running on the CPU", 4)]
+ return []
+
+def enum_optix_denoiser(self, context):
+ if not context or bool(context.preferences.addons[__package__].preferences.get_devices_for_type('OPTIX')):
+ return [('OPTIX', "OptiX", "Use the OptiX AI denoiser with GPU acceleration, only available on NVIDIA GPUs", 2)]
+ return []
+
+def enum_preview_denoiser(self, context):
+ optix_items = enum_optix_denoiser(self, context)
+ oidn_items = enum_openimagedenoise_denoiser(self, context)
+
+ if len(optix_items) or len(oidn_items):
+ items = [('AUTO', "Automatic", "Use the fastest available denoiser for viewport rendering (OptiX if available, OpenImageDenoise otherwise)", 0)]
+ else:
+ items = [('AUTO', "None", "Blender was compiled without a viewport denoiser", 0)]
+
+ items += optix_items
+ items += oidn_items
+ return items
+
+def enum_denoiser(self, context):
+ items = [('NLM', "NLM", "Cycles native non-local means denoiser, running on any compute device", 1)]
+ items += enum_optix_denoiser(self, context)
+ items += enum_openimagedenoise_denoiser(self, context)
+ return items
+
+enum_denoising_input_passes = (
+ ('RGB', "Color", "Use only color as input", 1),
+ ('RGB_ALBEDO', "Color + Albedo", "Use color and albedo data as input", 2),
+ ('RGB_ALBEDO_NORMAL', "Color + Albedo + Normal", "Use color, albedo and normal data as input", 3),
+)
+
+
+def update_render_passes(self, context):
+ scene = context.scene
+ view_layer = context.view_layer
+ view_layer.update_render_passes()
+ engine.detect_conflicting_passes(scene, view_layer)
+
class CyclesRenderSettings(bpy.types.PropertyGroup):
@@ -183,6 +252,39 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
default='PATH',
)
+ preview_pause: BoolProperty(
+ name="Pause Preview",
+ description="Pause all viewport preview renders",
+ default=False,
+ )
+
+ use_denoising: BoolProperty(
+ name="Use Denoising",
+ description="Denoise the rendered image",
+ default=False,
+ )
+ use_preview_denoising: BoolProperty(
+ name="Use Viewport Denoising",
+ description="Denoise the image in the 3D viewport",
+ default=False,
+ )
+
+ denoiser: EnumProperty(
+ name="Denoiser",
+ description="Denoise the image with the selected denoiser. "
+ "For denoising the image after rendering, denoising data render passes "
+ "also adapt to the selected denoiser",
+ items=enum_denoiser,
+ default=1,
+ update=update_render_passes,
+ )
+ preview_denoiser: EnumProperty(
+ name="Viewport Denoiser",
+ description="Denoise the image after each preview update with the selected denoiser",
+ items=enum_preview_denoiser,
+ default=0,
+ )
+
use_square_samples: BoolProperty(
name="Square Samples",
description="Square sampling values for easier artist control",
@@ -209,16 +311,11 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
default=128,
)
preview_samples: IntProperty(
- name="Preview Samples",
+ name="Viewport Samples",
description="Number of samples to render in the viewport, unlimited if 0",
min=0, max=(1 << 24),
default=32,
)
- preview_pause: BoolProperty(
- name="Pause Preview",
- description="Pause all viewport preview renders",
- default=False,
- )
aa_samples: IntProperty(
name="AA Samples",
description="Number of antialiasing samples to render for each pixel",
@@ -231,6 +328,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
min=0, max=2097151,
default=32,
)
+
diffuse_samples: IntProperty(
name="Diffuse Samples",
description="Number of diffuse bounce samples to render for each AA sample",
@@ -261,14 +359,12 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
min=1, max=1024,
default=1,
)
-
subsurface_samples: IntProperty(
name="Subsurface Samples",
description="Number of subsurface scattering samples to render for each AA sample",
min=1, max=1024,
default=1,
)
-
volume_samples: IntProperty(
name="Volume Samples",
description="Number of volume scattering samples to render for each AA sample",
@@ -309,6 +405,41 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
default=0.01,
)
+ use_adaptive_sampling: BoolProperty(
+ name="Use Adaptive Sampling",
+ description="Automatically reduce the number of samples per pixel based on estimated noise level",
+ default=False,
+ )
+
+ adaptive_threshold: FloatProperty(
+ name="Adaptive Sampling Threshold",
+ description="Noise level step to stop sampling at, lower values reduce noise the cost of render time. Zero for automatic setting based on number of AA samples",
+ min=0.0, max=1.0,
+ default=0.0,
+ precision=4,
+ )
+ adaptive_min_samples: IntProperty(
+ name="Adaptive Min Samples",
+ description="Minimum AA samples for adaptive sampling, to discover noisy features before stopping sampling. Zero for automatic setting based on number of AA samples",
+ min=0, max=4096,
+ default=0,
+ )
+
+ min_light_bounces: IntProperty(
+ name="Min Light Bounces",
+ description="Minimum number of light bounces. Setting this higher reduces noise in the first bounces, "
+ "but can also be less efficient for more complex geometry like hair and volumes",
+ min=0, max=1024,
+ default=0,
+ )
+ min_transparent_bounces: IntProperty(
+ name="Min Transparent Bounces",
+ description="Minimum number of transparent bounces. Setting this higher reduces noise in the first bounces, "
+ "but can also be less efficient for more complex geometry like hair and volumes",
+ min=0, max=1024,
+ default=0,
+ )
+
caustics_reflective: BoolProperty(
name="Reflective Caustics",
description="Use reflective caustics, resulting in a brighter image (more noise but added realism)",
@@ -368,13 +499,20 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
default=8,
)
- volume_step_size: FloatProperty(
- name="Step Size",
- description="Distance between volume shader samples when rendering the volume "
- "(lower values give more accurate and detailed results, but also increased render time)",
- default=0.1,
- min=0.0000001, max=100000.0, soft_min=0.01, soft_max=1.0, precision=4,
- unit='LENGTH'
+ volume_step_rate: FloatProperty(
+ name="Step Rate",
+ description="Globally adjust detail for volume rendering, on top of automatically estimated step size. "
+ "Higher values reduce render time, lower values render with more detail",
+ default=1.0,
+ min=0.01, max=100.0, soft_min=0.1, soft_max=10.0, precision=2
+ )
+
+ volume_preview_step_rate: FloatProperty(
+ name="Step Rate",
+ description="Globally adjust detail for volume rendering, on top of automatically estimated step size. "
+ "Higher values reduce render time, lower values render with more detail",
+ default=1.0,
+ min=0.01, max=100.0, soft_min=0.1, soft_max=10.0, precision=2
)
volume_max_steps: IntProperty(
@@ -393,7 +531,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
subtype='PIXEL'
)
preview_dicing_rate: FloatProperty(
- name="Preview Dicing Rate",
+ name="Viewport Dicing Rate",
description="Size of a micropolygon in pixels during preview render",
min=0.1, max=1000.0, soft_min=0.5,
default=8.0,
@@ -430,11 +568,6 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
min=0.0, max=10.0,
default=1.0,
)
- film_transparent: BoolProperty(
- name="Transparent",
- description="World background is transparent, for compositing the render over another background",
- default=False,
- )
film_transparent_glass: BoolProperty(
name="Transparent Glass",
description="Render transmissive surfaces as transparent, for compositing glass over another background",
@@ -519,6 +652,12 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
default=64,
subtype='PIXEL'
)
+ preview_denoising_start_sample: IntProperty(
+ name="Start Denoising",
+ description="Sample to start denoising the preview at",
+ min=0, max=(1 << 24),
+ default=1,
+ )
debug_reset_timeout: FloatProperty(
name="Reset timeout",
@@ -545,11 +684,6 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
items=enum_bvh_types,
default='DYNAMIC_BVH',
)
- use_bvh_embree: BoolProperty(
- name="Use Embree",
- description="Use Embree as ray accelerator",
- default=False,
- )
debug_use_spatial_splits: BoolProperty(
name="Use Spatial Splits",
description="Use BVH spatial splits: longer builder time, faster render",
@@ -598,7 +732,6 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
('DIFFUSE', "Diffuse", ""),
('GLOSSY', "Glossy", ""),
('TRANSMISSION', "Transmission", ""),
- ('SUBSURFACE', "Subsurface", ""),
),
)
@@ -703,13 +836,16 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
debug_bvh_layout: EnumProperty(
name="BVH Layout",
items=enum_bvh_layouts,
- default='BVH8',
+ default='EMBREE',
)
debug_use_cpu_split_kernel: BoolProperty(name="Split Kernel", default=False)
debug_use_cuda_adaptive_compile: BoolProperty(name="Adaptive Compile", default=False)
debug_use_cuda_split_kernel: BoolProperty(name="Split Kernel", default=False)
+ debug_optix_cuda_streams: IntProperty(name="CUDA Streams", default=1, min=1)
+ debug_optix_curves_api: BoolProperty(name="Native OptiX Curve Primitive", default=False)
+
debug_opencl_kernel_type: EnumProperty(
name="OpenCL Kernel Type",
default='DEFAULT',
@@ -760,49 +896,6 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
class CyclesCameraSettings(bpy.types.PropertyGroup):
- aperture_type: EnumProperty(
- name="Aperture Type",
- description="Use f-stop number or aperture radius",
- items=enum_aperture_types,
- default='RADIUS',
- )
- aperture_fstop: FloatProperty(
- name="Aperture f-stop",
- description="F-stop ratio (lower numbers give more defocus, higher numbers give a sharper image)",
- min=0.0, soft_min=0.1, soft_max=64.0,
- default=5.6,
- step=10,
- precision=1,
- )
- aperture_size: FloatProperty(
- name="Aperture Size",
- description="Radius of the aperture for depth of field (higher values give more defocus)",
- min=0.0, soft_max=10.0,
- default=0.0,
- step=1,
- precision=4,
- subtype='DISTANCE',
- )
- aperture_blades: IntProperty(
- name="Aperture Blades",
- description="Number of blades in aperture for polygonal bokeh (at least 3)",
- min=0, max=100,
- default=0,
- )
- aperture_rotation: FloatProperty(
- name="Aperture Rotation",
- description="Rotation of blades in aperture",
- soft_min=-pi, soft_max=pi,
- subtype='ANGLE',
- default=0,
- )
- aperture_ratio: FloatProperty(
- name="Aperture Ratio",
- description="Distortion to simulate anamorphic lens bokeh",
- min=0.01, soft_min=1.0, soft_max=2.0,
- default=1.0,
- precision=4,
- )
panorama_type: EnumProperty(
name="Panorama Type",
description="Distortion to use for the calculation",
@@ -899,6 +992,14 @@ class CyclesMaterialSettings(bpy.types.PropertyGroup):
default='LINEAR',
)
+ volume_step_rate: FloatProperty(
+ name="Step Rate",
+ description="Scale the distance between volume shader samples when rendering the volume "
+ "(lower values give more accurate and detailed results, but also increased render time)",
+ default=1.0,
+ min=0.001, max=1000.0, soft_min=0.1, soft_max=10.0, precision=4
+ )
+
displacement_method: EnumProperty(
name="Displacement Method",
description="Method to use for the displacement",
@@ -967,7 +1068,7 @@ class CyclesLightSettings(bpy.types.PropertyGroup):
class CyclesWorldSettings(bpy.types.PropertyGroup):
sampling_method: EnumProperty(
- name="Sampling method",
+ name="Sampling Method",
description="How to sample the background light",
items=enum_world_mis,
default='AUTOMATIC',
@@ -1009,6 +1110,13 @@ class CyclesWorldSettings(bpy.types.PropertyGroup):
items=enum_volume_interpolation,
default='LINEAR',
)
+ volume_step_size: FloatProperty(
+ name="Step Size",
+ description="Distance between volume shader samples when rendering the volume "
+ "(lower values give more accurate and detailed results, but also increased render time)",
+ default=1.0,
+ min=0.0000001, max=100000.0, soft_min=0.1, soft_max=100.0, precision=4
+ )
@classmethod
def register(cls):
@@ -1119,7 +1227,7 @@ class CyclesObjectSettings(bpy.types.PropertyGroup):
motion_steps: IntProperty(
name="Motion Steps",
description="Control accuracy of motion blur, more steps gives more memory usage (actual number of steps is 2^(steps - 1))",
- min=1, soft_max=8,
+ min=1, max=7,
default=1,
)
@@ -1148,6 +1256,13 @@ class CyclesObjectSettings(bpy.types.PropertyGroup):
default=1.0,
)
+ shadow_terminator_offset: FloatProperty(
+ name="Shadow Terminator Offset",
+ description="Push the shadow terminator towards the light to hide artifacts on low poly geometry",
+ min=0.0, max=1.0,
+ default=0.0,
+ )
+
is_shadow_catcher: BoolProperty(
name="Shadow Catcher",
description="Only render shadows on this object, for compositing renders into real footage",
@@ -1177,53 +1292,17 @@ class CyclesObjectSettings(bpy.types.PropertyGroup):
class CyclesCurveRenderSettings(bpy.types.PropertyGroup):
- primitive: EnumProperty(
- name="Primitive",
- description="Type of primitive used for hair rendering",
- items=enum_curve_primitives,
- default='LINE_SEGMENTS',
- )
shape: EnumProperty(
name="Shape",
description="Form of hair",
items=enum_curve_shape,
- default='THICK',
- )
- cull_backfacing: BoolProperty(
- name="Cull Back-faces",
- description="Do not test the back-face of each strand",
- default=True,
- )
- use_curves: BoolProperty(
- name="Use Cycles Hair Rendering",
- description="Activate Cycles hair rendering for particle system",
- default=True,
- )
- resolution: IntProperty(
- name="Resolution",
- description="Resolution of generated mesh",
- min=3, max=64,
- default=3,
- )
- minimum_width: FloatProperty(
- name="Minimal width",
- description="Minimal pixel width for strands (0 - deactivated)",
- min=0.0, max=100.0,
- default=0.0,
- subtype='PIXEL'
- )
- maximum_width: FloatProperty(
- name="Maximal width",
- description="Maximum extension that strand radius can be increased by",
- min=0.0, max=100.0,
- default=0.1,
- subtype='PIXEL'
+ default='RIBBONS',
)
subdivisions: IntProperty(
name="Subdivisions",
description="Number of subdivisions used in Cardinal curve intersection (power of 2)",
min=0, max=24,
- default=4,
+ default=2,
)
@classmethod
@@ -1239,10 +1318,25 @@ class CyclesCurveRenderSettings(bpy.types.PropertyGroup):
del bpy.types.Scene.cycles_curves
-def update_render_passes(self, context):
- view_layer = context.view_layer
- view_layer.update_render_passes()
-
+class CyclesAOVPass(bpy.types.PropertyGroup):
+ name: StringProperty(
+ name="Name",
+ description="Name of the pass, to use in the AOV Output shader node",
+ update=update_render_passes,
+ default="AOV"
+ )
+ type: EnumProperty(
+ name="Type",
+ description="Pass data type",
+ update=update_render_passes,
+ items=enum_aov_types,
+ default='COLOR'
+ )
+ conflict: StringProperty(
+ name="Conflict",
+ description="If there is a conflict with another render passes, message explaining why",
+ default=""
+ )
class CyclesRenderLayerSettings(bpy.types.PropertyGroup):
@@ -1276,6 +1370,12 @@ class CyclesRenderLayerSettings(bpy.types.PropertyGroup):
default=False,
update=update_render_passes,
)
+ pass_debug_sample_count: BoolProperty(
+ name="Debug Sample Count",
+ description="Number of samples/camera rays per pixel",
+ default=False,
+ update=update_render_passes,
+ )
use_pass_volume_direct: BoolProperty(
name="Volume Direct",
description="Deliver direct volumetric scattering pass",
@@ -1292,7 +1392,7 @@ class CyclesRenderLayerSettings(bpy.types.PropertyGroup):
use_denoising: BoolProperty(
name="Use Denoising",
description="Denoise the rendered image",
- default=False,
+ default=True,
update=update_render_passes,
)
denoising_diffuse_direct: BoolProperty(
@@ -1325,16 +1425,6 @@ class CyclesRenderLayerSettings(bpy.types.PropertyGroup):
description="Denoise the indirect transmission lighting",
default=True,
)
- denoising_subsurface_direct: BoolProperty(
- name="Subsurface Direct",
- description="Denoise the direct subsurface lighting",
- default=True,
- )
- denoising_subsurface_indirect: BoolProperty(
- name="Subsurface Indirect",
- description="Denoise the indirect subsurface lighting",
- default=True,
- )
denoising_strength: FloatProperty(
name="Denoising Strength",
description="Controls neighbor pixel weighting for the denoising filter (lower values preserve more detail, but aren't as smooth)",
@@ -1355,13 +1445,13 @@ class CyclesRenderLayerSettings(bpy.types.PropertyGroup):
subtype="PIXEL",
)
denoising_relative_pca: BoolProperty(
- name="Relative filter",
+ name="Relative Filter",
description="When removing pixels that don't carry information, use a relative threshold instead of an absolute one (can help to reduce artifacts, but might cause detail loss around edges)",
default=False,
)
denoising_store_passes: BoolProperty(
- name="Store denoising passes",
- description="Store the denoising feature passes and the noisy image",
+ name="Store Denoising Passes",
+ description="Store the denoising feature passes and the noisy image. The passes adapt to the denoiser selected for rendering",
default=False,
update=update_render_passes,
)
@@ -1371,6 +1461,21 @@ class CyclesRenderLayerSettings(bpy.types.PropertyGroup):
min=0, max=7,
default=0,
)
+
+ denoising_optix_input_passes: EnumProperty(
+ name="Input Passes",
+ description="Passes used by the denoiser to distinguish noise from shader and geometry detail",
+ items=enum_denoising_input_passes,
+ default='RGB_ALBEDO',
+ )
+
+ denoising_openimagedenoise_input_passes: EnumProperty(
+ name="Input Passes",
+ description="Passes used by the denoiser to distinguish noise from shader and geometry detail",
+ items=enum_denoising_input_passes,
+ default='RGB_ALBEDO_NORMAL',
+ )
+
use_pass_crypto_object: BoolProperty(
name="Cryptomatte Object",
description="Render cryptomatte object pass, for isolating objects in compositing",
@@ -1402,6 +1507,15 @@ class CyclesRenderLayerSettings(bpy.types.PropertyGroup):
update=update_render_passes,
)
+ aovs: CollectionProperty(
+ type=CyclesAOVPass,
+ description="Custom render passes that can be output by shader nodes",
+ )
+ active_aov: IntProperty(
+ default=0,
+ min=0
+ )
+
@classmethod
def register(cls):
bpy.types.ViewLayer.cycles = PointerProperty(
@@ -1427,10 +1541,12 @@ class CyclesPreferences(bpy.types.AddonPreferences):
def get_device_types(self, context):
import _cycles
- has_cuda, has_opencl = _cycles.get_device_types()
+ has_cuda, has_optix, has_opencl = _cycles.get_device_types()
list = [('NONE', "None", "Don't use compute device", 0)]
if has_cuda:
list.append(('CUDA', "CUDA", "Use CUDA for GPU acceleration", 1))
+ if has_optix:
+ list.append(('OPTIX', "OptiX", "Use OptiX for GPU acceleration", 3))
if has_opencl:
list.append(('OPENCL', "OpenCL", "Use OpenCL for GPU acceleration", 2))
return list
@@ -1443,6 +1559,12 @@ class CyclesPreferences(bpy.types.AddonPreferences):
devices: bpy.props.CollectionProperty(type=CyclesDeviceSettings)
+ peer_memory: BoolProperty(
+ name="Distribute memory across devices",
+ description="Make more room for large scenes to fit by distributing memory across interconnected devices (e.g. via NVLink) rather than duplicating it",
+ default=False,
+ )
+
def find_existing_device_entry(self, device):
for device_entry in self.devices:
if device_entry.id == device[2] and device_entry.type == device[1]:
@@ -1451,7 +1573,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
def update_device_entries(self, device_list):
for device in device_list:
- if not device[1] in {'CUDA', 'OPENCL', 'CPU'}:
+ if not device[1] in {'CUDA', 'OPTIX', 'OPENCL', 'CPU'}:
continue
# Try to find existing Device entry
entry = self.find_existing_device_entry(device)
@@ -1466,8 +1588,8 @@ class CyclesPreferences(bpy.types.AddonPreferences):
# Update name in case it changed
entry.name = device[0]
- # Gets all devices types by default.
- def get_devices(self, compute_device_type=''):
+ # Gets all devices types for a compute device type.
+ def get_devices_for_type(self, compute_device_type):
import _cycles
# Layout of the device tuples: (Name, Type, Persistent ID)
device_list = _cycles.available_devices(compute_device_type)
@@ -1476,20 +1598,25 @@ class CyclesPreferences(bpy.types.AddonPreferences):
# hold pointers to a resized array.
self.update_device_entries(device_list)
# Sort entries into lists
- cuda_devices = []
- opencl_devices = []
+ devices = []
cpu_devices = []
for device in device_list:
entry = self.find_existing_device_entry(device)
- if entry.type == 'CUDA':
- cuda_devices.append(entry)
- elif entry.type == 'OPENCL':
- opencl_devices.append(entry)
+ if entry.type == compute_device_type:
+ devices.append(entry)
elif entry.type == 'CPU':
cpu_devices.append(entry)
# Extend all GPU devices with CPU.
- cuda_devices.extend(cpu_devices)
- opencl_devices.extend(cpu_devices)
+ if compute_device_type in ('CUDA', 'OPENCL'):
+ devices.extend(cpu_devices)
+ return devices
+
+ # For backwards compatibility, only returns CUDA and OpenCL but still
+ # refreshes all devices.
+ def get_devices(self, compute_device_type=''):
+ cuda_devices = self.get_devices_for_type('CUDA')
+ self.get_devices_for_type('OPTIX')
+ opencl_devices = self.get_devices_for_type('OPENCL')
return cuda_devices, opencl_devices
def get_num_gpu_devices(self):
@@ -1517,27 +1644,53 @@ class CyclesPreferences(bpy.types.AddonPreferences):
break
if not found_device:
- box.label(text="No compatible GPUs found", icon='INFO')
+ col = box.column(align=True)
+ col.label(text="No compatible GPUs found for path tracing", icon='INFO')
+ col.label(text="Cycles will render on the CPU", icon='BLANK1')
return
for device in devices:
box.prop(device, "use", text=device.name)
+ if device_type == 'OPTIX':
+ col = box.column(align=True)
+ col.label(text="OptiX support is experimental", icon='INFO')
+ col.label(text="Not all Cycles features are supported yet", icon='BLANK1')
+
+
def draw_impl(self, layout, context):
row = layout.row()
row.prop(self, "compute_device_type", expand=True)
- cuda_devices, opencl_devices = self.get_devices(self.compute_device_type)
+ if self.compute_device_type == 'NONE':
+ return
row = layout.row()
- if self.compute_device_type == 'CUDA':
- self._draw_devices(row, 'CUDA', cuda_devices)
- elif self.compute_device_type == 'OPENCL':
- self._draw_devices(row, 'OPENCL', opencl_devices)
+ devices = self.get_devices_for_type(self.compute_device_type)
+ self._draw_devices(row, self.compute_device_type, devices)
+
+ import _cycles
+ has_peer_memory = 0
+ for device in _cycles.available_devices(self.compute_device_type):
+ if device[3] and self.find_existing_device_entry(device).use:
+ has_peer_memory += 1
+ if has_peer_memory > 1:
+ row = layout.row()
+ row.use_property_split = True
+ row.prop(self, "peer_memory")
def draw(self, context):
self.draw_impl(self.layout, context)
+class CyclesView3DShadingSettings(bpy.types.PropertyGroup):
+ render_pass: EnumProperty(
+ name="Render Pass",
+ description="Render pass to show in the 3D Viewport",
+ items=enum_view3d_shading_render_pass,
+ default='COMBINED',
+ )
+
+
def register():
bpy.utils.register_class(CyclesRenderSettings)
bpy.utils.register_class(CyclesCameraSettings)
@@ -1550,7 +1703,14 @@ def register():
bpy.utils.register_class(CyclesCurveRenderSettings)
bpy.utils.register_class(CyclesDeviceSettings)
bpy.utils.register_class(CyclesPreferences)
+ bpy.utils.register_class(CyclesAOVPass)
bpy.utils.register_class(CyclesRenderLayerSettings)
+ bpy.utils.register_class(CyclesView3DShadingSettings)
+
+ bpy.types.View3DShading.cycles = bpy.props.PointerProperty(
+ name="Cycles Settings",
+ type=CyclesView3DShadingSettings,
+ )
def unregister():
@@ -1565,4 +1725,6 @@ def unregister():
bpy.utils.unregister_class(CyclesCurveRenderSettings)
bpy.utils.unregister_class(CyclesDeviceSettings)
bpy.utils.unregister_class(CyclesPreferences)
+ bpy.utils.unregister_class(CyclesAOVPass)
bpy.utils.unregister_class(CyclesRenderLayerSettings)
+ bpy.utils.unregister_class(CyclesView3DShadingSettings)
diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py
index 303f1ddf1b2..39cb1477c33 100644
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -22,6 +22,8 @@ from bl_ui.utils import PresetPanel
from bpy.types import Panel
+from bl_ui.properties_grease_pencil_common import GreasePencilSimplifyPanel
+
class CYCLES_PT_sampling_presets(PresetPanel, Panel):
bl_label = "Sampling Presets"
@@ -57,7 +59,7 @@ def node_panel(cls):
node_cls.bl_space_type = 'NODE_EDITOR'
node_cls.bl_region_type = 'UI'
- node_cls.bl_category = "Node"
+ node_cls.bl_category = "Options"
if hasattr(node_cls, 'bl_parent_id'):
node_cls.bl_parent_id = 'NODE_' + node_cls.bl_parent_id
@@ -86,10 +88,16 @@ def use_cuda(context):
return (get_device_type(context) == 'CUDA' and cscene.device == 'GPU')
+def use_optix(context):
+ cscene = context.scene.cycles
+
+ return (get_device_type(context) == 'OPTIX' and cscene.device == 'GPU')
+
+
def use_branched_path(context):
cscene = context.scene.cycles
- return (cscene.progressive == 'BRANCHED_PATH')
+ return (cscene.progressive == 'BRANCHED_PATH' and not use_optix(context))
def use_sample_all_lights(context):
@@ -166,29 +174,29 @@ class CYCLES_RENDER_PT_sampling(CyclesButtonsPanel, Panel):
layout.use_property_split = True
layout.use_property_decorate = False
- layout.prop(cscene, "progressive")
+ if not use_optix(context):
+ layout.prop(cscene, "progressive")
- if cscene.progressive == 'PATH' or use_branched_path(context) is False:
+ if not use_branched_path(context):
col = layout.column(align=True)
col.prop(cscene, "samples", text="Render")
col.prop(cscene, "preview_samples", text="Viewport")
-
- draw_samples_info(layout, context)
else:
col = layout.column(align=True)
col.prop(cscene, "aa_samples", text="Render")
col.prop(cscene, "preview_aa_samples", text="Viewport")
+ if not use_branched_path(context):
+ draw_samples_info(layout, context)
+
class CYCLES_RENDER_PT_sampling_sub_samples(CyclesButtonsPanel, Panel):
bl_label = "Sub Samples"
bl_parent_id = "CYCLES_RENDER_PT_sampling"
@classmethod
- def poll(self, context):
- scene = context.scene
- cscene = scene.cycles
- return cscene.progressive != 'PATH' and use_branched_path(context)
+ def poll(cls, context):
+ return use_branched_path(context)
def draw(self, context):
layout = self.layout
@@ -213,6 +221,70 @@ class CYCLES_RENDER_PT_sampling_sub_samples(CyclesButtonsPanel, Panel):
draw_samples_info(layout, context)
+class CYCLES_RENDER_PT_sampling_adaptive(CyclesButtonsPanel, Panel):
+ bl_label = "Adaptive Sampling"
+ bl_parent_id = "CYCLES_RENDER_PT_sampling"
+ bl_options = {'DEFAULT_CLOSED'}
+
+ def draw_header(self, context):
+ layout = self.layout
+ scene = context.scene
+ cscene = scene.cycles
+
+ layout.prop(cscene, "use_adaptive_sampling", text="")
+
+ def draw(self, context):
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False
+
+ scene = context.scene
+ cscene = scene.cycles
+
+ layout.active = cscene.use_adaptive_sampling
+
+ col = layout.column(align=True)
+ col.prop(cscene, "adaptive_threshold", text="Noise Threshold")
+ col.prop(cscene, "adaptive_min_samples", text="Min Samples")
+
+
+class CYCLES_RENDER_PT_sampling_denoising(CyclesButtonsPanel, Panel):
+ bl_label = "Denoising"
+ bl_parent_id = "CYCLES_RENDER_PT_sampling"
+ bl_options = {'DEFAULT_CLOSED'}
+
+ def draw(self, context):
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False
+
+ scene = context.scene
+ cscene = scene.cycles
+
+ heading = layout.column(align=True, heading="Render")
+ row = heading.row(align=True)
+ row.prop(cscene, "use_denoising", text="")
+ sub = row.row()
+
+ sub.active = cscene.use_denoising
+ for view_layer in scene.view_layers:
+ if view_layer.cycles.denoising_store_passes:
+ sub.active = True
+
+ sub.prop(cscene, "denoiser", text="")
+
+ heading = layout.column(align=False, heading="Viewport")
+ row = heading.row(align=True)
+ row.prop(cscene, "use_preview_denoising", text="")
+ sub = row.row()
+ sub.active = cscene.use_preview_denoising
+ sub.prop(cscene, "preview_denoiser", text="")
+
+ sub = heading.row(align=True)
+ sub.active = cscene.use_preview_denoising
+ sub.prop(cscene, "preview_denoising_start_sample", text="Start Sample")
+
+
class CYCLES_RENDER_PT_sampling_advanced(CyclesButtonsPanel, Panel):
bl_label = "Advanced"
bl_parent_id = "CYCLES_RENDER_PT_sampling"
@@ -230,13 +302,17 @@ class CYCLES_RENDER_PT_sampling_advanced(CyclesButtonsPanel, Panel):
row.prop(cscene, "seed")
row.prop(cscene, "use_animated_seed", text="", icon='TIME')
- layout.prop(cscene, "sampling_pattern", text="Pattern")
+ col = layout.column(align=True)
+ col.active = not(cscene.use_adaptive_sampling)
+ col.prop(cscene, "sampling_pattern", text="Pattern")
layout.prop(cscene, "use_square_samples")
layout.separator()
col = layout.column(align=True)
+ col.prop(cscene, "min_light_bounces")
+ col.prop(cscene, "min_transparent_bounces")
col.prop(cscene, "light_sampling_threshold", text="Light Threshold")
if cscene.progressive != 'PATH' and use_branched_path(context):
@@ -264,7 +340,7 @@ class CYCLES_RENDER_PT_sampling_total(CyclesButtonsPanel, Panel):
bl_parent_id = "CYCLES_RENDER_PT_sampling"
@classmethod
- def poll(self, context):
+ def poll(cls, context):
scene = context.scene
cscene = scene.cycles
@@ -320,7 +396,7 @@ class CYCLES_RENDER_PT_subdivision(CyclesButtonsPanel, Panel):
bl_options = {'DEFAULT_CLOSED'}
@classmethod
- def poll(self, context):
+ def poll(cls, context):
return (context.scene.render.engine == 'CYCLES') and (context.scene.cycles.feature_set == 'EXPERIMENTAL')
def draw(self, context):
@@ -334,7 +410,7 @@ class CYCLES_RENDER_PT_subdivision(CyclesButtonsPanel, Panel):
col = layout.column()
sub = col.column(align=True)
sub.prop(cscene, "dicing_rate", text="Dicing Rate Render")
- sub.prop(cscene, "preview_dicing_rate", text="Preview")
+ sub.prop(cscene, "preview_dicing_rate", text="Viewport")
col.separator()
@@ -348,13 +424,6 @@ class CYCLES_RENDER_PT_hair(CyclesButtonsPanel, Panel):
bl_label = "Hair"
bl_options = {'DEFAULT_CLOSED'}
- def draw_header(self, context):
- layout = self.layout
- scene = context.scene
- ccscene = scene.cycles_curves
-
- layout.prop(ccscene, "use_curves", text="")
-
def draw(self, context):
layout = self.layout
layout.use_property_split = True
@@ -363,20 +432,10 @@ class CYCLES_RENDER_PT_hair(CyclesButtonsPanel, Panel):
scene = context.scene
ccscene = scene.cycles_curves
- layout.active = ccscene.use_curves
-
col = layout.column()
- col.prop(ccscene, "minimum_width", text="Min Pixels")
- col.prop(ccscene, "maximum_width", text="Max Extension")
col.prop(ccscene, "shape", text="Shape")
- if not (ccscene.primitive in {'CURVE_SEGMENTS', 'LINE_SEGMENTS'} and ccscene.shape == 'RIBBONS'):
- col.prop(ccscene, "cull_backfacing", text="Cull back-faces")
- col.prop(ccscene, "primitive", text="Primitive")
-
- if ccscene.primitive == 'TRIANGLES' and ccscene.shape == 'THICK':
- col.prop(ccscene, "resolution", text="Resolution")
- elif ccscene.primitive == 'CURVE_SEGMENTS':
- col.prop(ccscene, "subdivisions", text="Curve subdivisions")
+ if ccscene.shape == 'RIBBONS':
+ col.prop(ccscene, "subdivisions", text="Curve Subdivisions")
class CYCLES_RENDER_PT_volumes(CyclesButtonsPanel, Panel):
@@ -391,9 +450,11 @@ class CYCLES_RENDER_PT_volumes(CyclesButtonsPanel, Panel):
scene = context.scene
cscene = scene.cycles
- col = layout.column()
- col.prop(cscene, "volume_step_size", text="Step Size")
- col.prop(cscene, "volume_max_steps", text="Max Steps")
+ col = layout.column(align=True)
+ col.prop(cscene, "volume_step_rate", text="Step Rate Render")
+ col.prop(cscene, "volume_preview_step_rate", text="Viewport")
+
+ layout.prop(cscene, "volume_max_steps", text="Max Steps")
class CYCLES_RENDER_PT_light_paths(CyclesButtonsPanel, Panel):
@@ -461,8 +522,9 @@ class CYCLES_RENDER_PT_light_paths_caustics(CyclesButtonsPanel, Panel):
col = layout.column()
col.prop(cscene, "blur_glossy")
- col.prop(cscene, "caustics_reflective")
- col.prop(cscene, "caustics_refractive")
+ col = layout.column(heading="Caustics", align=True)
+ col.prop(cscene, "caustics_reflective", text="Reflective")
+ col.prop(cscene, "caustics_refractive", text="Refractive")
class CYCLES_RENDER_PT_motion_blur(CyclesButtonsPanel, Panel):
@@ -538,31 +600,32 @@ class CYCLES_RENDER_PT_film(CyclesButtonsPanel, Panel):
class CYCLES_RENDER_PT_film_transparency(CyclesButtonsPanel, Panel):
- bl_label = "Transparency"
+ bl_label = "Transparent"
bl_parent_id = "CYCLES_RENDER_PT_film"
def draw_header(self, context):
layout = self.layout
scene = context.scene
- cscene = scene.cycles
+ rd = scene.render
- layout.prop(cscene, "film_transparent", text="")
+ layout.prop(rd, "film_transparent", text="")
def draw(self, context):
layout = self.layout
layout.use_property_split = True
layout.use_property_decorate = False
scene = context.scene
+ rd = scene.render
cscene = scene.cycles
- layout.active = cscene.film_transparent
+ layout.active = rd.film_transparent
col = layout.column()
col.prop(cscene, "film_transparent_glass", text="Transparent Glass")
sub = col.column()
- sub.active = cscene.film_transparent and cscene.film_transparent_glass
+ sub.active = rd.film_transparent and cscene.film_transparent_glass
sub.prop(cscene, "film_transparent_roughness", text="Roughness Threshold")
@@ -633,9 +696,6 @@ class CYCLES_RENDER_PT_performance_tiles(CyclesButtonsPanel, Panel):
sub = col.column()
sub.active = not rd.use_save_buffers
- for view_layer in scene.view_layers:
- if view_layer.cycles.use_denoising:
- sub.active = False
sub.prop(cscene, "use_progressive_refine")
@@ -655,16 +715,20 @@ class CYCLES_RENDER_PT_performance_acceleration_structure(CyclesButtonsPanel, Pa
col = layout.column()
- if _cycles.with_embree:
- row = col.row()
- row.active = use_cpu(context)
- row.prop(cscene, "use_bvh_embree")
+ use_embree = False
+ if use_cpu(context):
+ use_embree = _cycles.with_embree
+ if not use_embree:
+ sub = col.column(align=True)
+ sub.label(text="Cycles built without Embree support")
+ sub.label(text="CPU raytracing performance will be poor")
+
col.prop(cscene, "debug_use_spatial_splits")
sub = col.column()
- sub.active = not cscene.use_bvh_embree or not _cycles.with_embree
+ sub.active = not use_embree
sub.prop(cscene, "debug_use_hair_bvh")
sub = col.column()
- sub.active = not cscene.debug_use_spatial_splits and not cscene.use_bvh_embree
+ sub.active = not cscene.debug_use_spatial_splits and not use_embree
sub.prop(cscene, "debug_bvh_time_steps")
@@ -720,20 +784,12 @@ class CYCLES_RENDER_PT_filter(CyclesButtonsPanel, Panel):
rd = scene.render
view_layer = context.view_layer
- flow = layout.grid_flow(row_major=True, columns=0, even_columns=True, even_rows=False, align=False)
-
- col = flow.column()
+ col = layout.column(heading="Include")
col.prop(view_layer, "use_sky", text="Environment")
- col = flow.column()
col.prop(view_layer, "use_ao", text="Ambient Occlusion")
- col = flow.column()
col.prop(view_layer, "use_solid", text="Surfaces")
- col = flow.column()
col.prop(view_layer, "use_strand", text="Hair")
- if with_freestyle:
- col = flow.column()
- col.prop(view_layer, "use_freestyle", text="Freestyle")
- col.active = rd.use_freestyle
+ col.prop(view_layer, "use_volumes", text="Volumes")
class CYCLES_RENDER_PT_override(CyclesButtonsPanel, Panel):
@@ -755,7 +811,6 @@ class CYCLES_RENDER_PT_override(CyclesButtonsPanel, Panel):
class CYCLES_RENDER_PT_passes(CyclesButtonsPanel, Panel):
bl_label = "Passes"
bl_context = "view_layer"
- bl_options = {'DEFAULT_CLOSED'}
def draw(self, context):
pass
@@ -776,34 +831,27 @@ class CYCLES_RENDER_PT_passes_data(CyclesButtonsPanel, Panel):
view_layer = context.view_layer
cycles_view_layer = view_layer.cycles
- flow = layout.grid_flow(row_major=True, columns=0, even_columns=True, even_rows=False, align=False)
- col = flow.column()
+ col = layout.column(heading="Include", align=True)
col.prop(view_layer, "use_pass_combined")
- col = flow.column()
col.prop(view_layer, "use_pass_z")
- col = flow.column()
col.prop(view_layer, "use_pass_mist")
- col = flow.column()
col.prop(view_layer, "use_pass_normal")
- col = flow.column()
- col.prop(view_layer, "use_pass_vector")
- col.active = not rd.use_motion_blur
- col = flow.column()
+ sub = col.column()
+ sub.active = not rd.use_motion_blur
+ sub.prop(view_layer, "use_pass_vector")
col.prop(view_layer, "use_pass_uv")
- col = flow.column()
+
+ col.prop(cycles_view_layer, "denoising_store_passes", text="Denoising Data")
+
+ col = layout.column(heading="Indexes", align=True)
col.prop(view_layer, "use_pass_object_index")
- col = flow.column()
col.prop(view_layer, "use_pass_material_index")
- layout.separator()
-
- flow = layout.grid_flow(row_major=True, columns=0, even_columns=True, even_rows=False, align=False)
- col = flow.column()
- col.prop(cycles_view_layer, "denoising_store_passes", text="Denoising Data")
- col = flow.column()
+ col = layout.column(heading="Debug", align=True)
col.prop(cycles_view_layer, "pass_debug_render_time", text="Render Time")
+ col.prop(cycles_view_layer, "pass_debug_sample_count", text="Sample Count")
+
- layout.separator()
layout.prop(view_layer, "pass_alpha_threshold")
@@ -821,46 +869,26 @@ class CYCLES_RENDER_PT_passes_light(CyclesButtonsPanel, Panel):
view_layer = context.view_layer
cycles_view_layer = view_layer.cycles
- split = layout.split(factor=0.35)
- split.use_property_split = False
- split.label(text="Diffuse")
- row = split.row(align=True)
- row.prop(view_layer, "use_pass_diffuse_direct", text="Direct", toggle=True)
- row.prop(view_layer, "use_pass_diffuse_indirect", text="Indirect", toggle=True)
- row.prop(view_layer, "use_pass_diffuse_color", text="Color", toggle=True)
-
- split = layout.split(factor=0.35)
- split.use_property_split = False
- split.label(text="Glossy")
- row = split.row(align=True)
- row.prop(view_layer, "use_pass_glossy_direct", text="Direct", toggle=True)
- row.prop(view_layer, "use_pass_glossy_indirect", text="Indirect", toggle=True)
- row.prop(view_layer, "use_pass_glossy_color", text="Color", toggle=True)
-
- split = layout.split(factor=0.35)
- split.use_property_split = False
- split.label(text="Transmission")
- row = split.row(align=True)
- row.prop(view_layer, "use_pass_transmission_direct", text="Direct", toggle=True)
- row.prop(view_layer, "use_pass_transmission_indirect", text="Indirect", toggle=True)
- row.prop(view_layer, "use_pass_transmission_color", text="Color", toggle=True)
-
- split = layout.split(factor=0.35)
- split.use_property_split = False
- split.label(text="Subsurface")
- row = split.row(align=True)
- row.prop(view_layer, "use_pass_subsurface_direct", text="Direct", toggle=True)
- row.prop(view_layer, "use_pass_subsurface_indirect", text="Indirect", toggle=True)
- row.prop(view_layer, "use_pass_subsurface_color", text="Color", toggle=True)
-
- split = layout.split(factor=0.35)
- split.use_property_split = False
- split.label(text="Volume")
- row = split.row(align=True)
- row.prop(cycles_view_layer, "use_pass_volume_direct", text="Direct", toggle=True)
- row.prop(cycles_view_layer, "use_pass_volume_indirect", text="Indirect", toggle=True)
+ col = layout.column(heading="Diffuse", align=True)
+ col.prop(view_layer, "use_pass_diffuse_direct", text="Direct")
+ col.prop(view_layer, "use_pass_diffuse_indirect", text="Indirect")
+ col.prop(view_layer, "use_pass_diffuse_color", text="Color")
- col = layout.column(align=True)
+ col = layout.column(heading="Glossy", align=True)
+ col.prop(view_layer, "use_pass_glossy_direct", text="Direct")
+ col.prop(view_layer, "use_pass_glossy_indirect", text="Indirect")
+ col.prop(view_layer, "use_pass_glossy_color", text="Color")
+
+ col = layout.column(heading="Transmission", align=True)
+ col.prop(view_layer, "use_pass_transmission_direct", text="Direct")
+ col.prop(view_layer, "use_pass_transmission_indirect", text="Indirect")
+ col.prop(view_layer, "use_pass_transmission_color", text="Color")
+
+ col = layout.column(heading="Volume", align=True)
+ col.prop(cycles_view_layer, "use_pass_volume_direct", text="Direct")
+ col.prop(cycles_view_layer, "use_pass_volume_indirect", text="Indirect")
+
+ col = layout.column(heading="Other", align=True)
col.prop(view_layer, "use_pass_emit", text="Emission")
col.prop(view_layer, "use_pass_environment")
col.prop(view_layer, "use_pass_shadow")
@@ -881,11 +909,10 @@ class CYCLES_RENDER_PT_passes_crypto(CyclesButtonsPanel, Panel):
cycles_view_layer = context.view_layer.cycles
- row = layout.row(align=True)
- row.use_property_split = False
- row.prop(cycles_view_layer, "use_pass_crypto_object", text="Object", toggle=True)
- row.prop(cycles_view_layer, "use_pass_crypto_material", text="Material", toggle=True)
- row.prop(cycles_view_layer, "use_pass_crypto_asset", text="Asset", toggle=True)
+ col = layout.column(heading="Include", align=True)
+ col.prop(cycles_view_layer, "use_pass_crypto_object", text="Object")
+ col.prop(cycles_view_layer, "use_pass_crypto_material", text="Material")
+ col.prop(cycles_view_layer, "use_pass_crypto_asset", text="Asset")
layout.prop(cycles_view_layer, "pass_crypto_depth", text="Levels")
@@ -917,17 +944,58 @@ class CYCLES_RENDER_PT_passes_debug(CyclesButtonsPanel, Panel):
layout.prop(cycles_view_layer, "pass_debug_ray_bounces")
+class CYCLES_RENDER_UL_aov(bpy.types.UIList):
+ def draw_item(self, context, layout, data, item, icon, active_data, active_propname):
+ row = layout.row()
+ split = row.split(factor=0.65)
+ icon = 'ERROR' if item.conflict else 'NONE'
+ split.row().prop(item, "name", text="", icon=icon, emboss=False)
+ split.row().prop(item, "type", text="", emboss=False)
+
+
+class CYCLES_RENDER_PT_passes_aov(CyclesButtonsPanel, Panel):
+ bl_label = "Shader AOV"
+ bl_context = "view_layer"
+ bl_parent_id = "CYCLES_RENDER_PT_passes"
+
+ def draw(self, context):
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False
+
+ cycles_view_layer = context.view_layer.cycles
+
+ row = layout.row()
+ col = row.column()
+ col.template_list("CYCLES_RENDER_UL_aov", "aovs", cycles_view_layer, "aovs", cycles_view_layer, "active_aov", rows=2)
+
+ col = row.column()
+ sub = col.column(align=True)
+ sub.operator("cycles.add_aov", icon='ADD', text="")
+ sub.operator("cycles.remove_aov", icon='REMOVE', text="")
+
+ if cycles_view_layer.active_aov < len(cycles_view_layer.aovs):
+ active_aov = cycles_view_layer.aovs[cycles_view_layer.active_aov]
+ if active_aov.conflict:
+ layout.label(text=active_aov.conflict, icon='ERROR')
+
+
class CYCLES_RENDER_PT_denoising(CyclesButtonsPanel, Panel):
bl_label = "Denoising"
bl_context = "view_layer"
bl_options = {'DEFAULT_CLOSED'}
+ @classmethod
+ def poll(cls, context):
+ cscene = context.scene.cycles
+ return CyclesButtonsPanel.poll(context) and cscene.use_denoising
+
def draw_header(self, context):
scene = context.scene
view_layer = context.view_layer
cycles_view_layer = view_layer.cycles
- layout = self.layout
+ layout = self.layout
layout.prop(cycles_view_layer, "use_denoising", text="")
def draw(self, context):
@@ -938,66 +1006,43 @@ class CYCLES_RENDER_PT_denoising(CyclesButtonsPanel, Panel):
scene = context.scene
view_layer = context.view_layer
cycles_view_layer = view_layer.cycles
+ denoiser = scene.cycles.denoiser
- split = layout.split()
- split.active = cycles_view_layer.use_denoising
+ layout.active = denoiser != 'NONE' and cycles_view_layer.use_denoising
- layout = layout.column(align=True)
- layout.prop(cycles_view_layer, "denoising_radius", text="Radius")
- layout.prop(cycles_view_layer, "denoising_strength", slider=True, text="Strength")
- layout.prop(cycles_view_layer, "denoising_feature_strength", slider=True, text="Feature Strength")
- layout.prop(cycles_view_layer, "denoising_relative_pca")
+ col = layout.column()
- layout.separator()
+ if denoiser == 'OPTIX':
+ col.prop(cycles_view_layer, "denoising_optix_input_passes")
+ return
+ elif denoiser == 'OPENIMAGEDENOISE':
+ col.prop(cycles_view_layer, "denoising_openimagedenoise_input_passes")
+ return
- split = layout.split(factor=0.5)
- split.active = cycles_view_layer.use_denoising or cycles_view_layer.denoising_store_passes
+ col.prop(cycles_view_layer, "denoising_radius", text="Radius")
- col = split.column()
- col.alignment = 'RIGHT'
- col.label(text="Diffuse")
+ col = layout.column()
+ col.prop(cycles_view_layer, "denoising_strength", slider=True, text="Strength")
+ col.prop(cycles_view_layer, "denoising_feature_strength", slider=True, text="Feature Strength")
+ col.prop(cycles_view_layer, "denoising_relative_pca")
- row = split.row(align=True)
- row.use_property_split = False
- row.prop(cycles_view_layer, "denoising_diffuse_direct", text="Direct", toggle=True)
- row.prop(cycles_view_layer, "denoising_diffuse_indirect", text="Indirect", toggle=True)
+ layout.separator()
- split = layout.split(factor=0.5)
- split.active = cycles_view_layer.use_denoising or cycles_view_layer.denoising_store_passes
+ col = layout.column()
+ col.active = cycles_view_layer.use_denoising or cycles_view_layer.denoising_store_passes
- col = split.column()
- col.alignment = 'RIGHT'
- col.label(text="Glossy")
+ row = col.row(heading="Diffuse", align=True)
+ row.prop(cycles_view_layer, "denoising_diffuse_direct", text="Direct", toggle=True)
+ row.prop(cycles_view_layer, "denoising_diffuse_indirect", text="Indirect", toggle=True)
- row = split.row(align=True)
- row.use_property_split = False
+ row = col.row(heading="Glossy", align=True)
row.prop(cycles_view_layer, "denoising_glossy_direct", text="Direct", toggle=True)
row.prop(cycles_view_layer, "denoising_glossy_indirect", text="Indirect", toggle=True)
- split = layout.split(factor=0.5)
- split.active = cycles_view_layer.use_denoising or cycles_view_layer.denoising_store_passes
-
- col = split.column()
- col.alignment = 'RIGHT'
- col.label(text="Transmission")
-
- row = split.row(align=True)
- row.use_property_split = False
+ row = col.row(heading="Transmission", align=True)
row.prop(cycles_view_layer, "denoising_transmission_direct", text="Direct", toggle=True)
row.prop(cycles_view_layer, "denoising_transmission_indirect", text="Indirect", toggle=True)
- split = layout.split(factor=0.5)
- split.active = cycles_view_layer.use_denoising or cycles_view_layer.denoising_store_passes
-
- col = split.column()
- col.alignment = 'RIGHT'
- col.label(text="Subsurface")
-
- row = split.row(align=True)
- row.use_property_split = False
- row.prop(cycles_view_layer, "denoising_subsurface_direct", text="Direct", toggle=True)
- row.prop(cycles_view_layer, "denoising_subsurface_indirect", text="Indirect", toggle=True)
-
class CYCLES_PT_post_processing(CyclesButtonsPanel, Panel):
bl_label = "Post Processing"
@@ -1011,7 +1056,7 @@ class CYCLES_PT_post_processing(CyclesButtonsPanel, Panel):
rd = context.scene.render
- col = layout.column(align=True)
+ col = layout.column(align=True, heading="Pipeline")
col.prop(rd, "use_compositing")
col.prop(rd, "use_sequencer")
@@ -1026,20 +1071,27 @@ class CYCLES_CAMERA_PT_dof(CyclesButtonsPanel, Panel):
def poll(cls, context):
return context.camera and CyclesButtonsPanel.poll(context)
+ def draw_header(self, context):
+ cam = context.camera
+ dof = cam.dof
+ self.layout.prop(dof, "use_dof", text="")
+
def draw(self, context):
layout = self.layout
layout.use_property_split = True
cam = context.camera
+ dof = cam.dof
+ layout.active = dof.use_dof
split = layout.split()
col = split.column()
- col.prop(cam, "dof_object", text="Focus Object")
+ col.prop(dof, "focus_object", text="Focus Object")
sub = col.row()
- sub.active = cam.dof_object is None
- sub.prop(cam, "dof_distance", text="Distance")
+ sub.active = dof.focus_object is None
+ sub.prop(dof, "focus_distance", text="Distance")
class CYCLES_CAMERA_PT_dof_aperture(CyclesButtonsPanel, Panel):
@@ -1053,44 +1105,17 @@ class CYCLES_CAMERA_PT_dof_aperture(CyclesButtonsPanel, Panel):
def draw(self, context):
layout = self.layout
layout.use_property_split = True
- flow = layout.grid_flow(row_major=True, columns=0, even_columns=True, even_rows=False, align=False)
cam = context.camera
- ccam = cam.cycles
-
- col = flow.column()
- col.prop(ccam, "aperture_type")
- if ccam.aperture_type == 'RADIUS':
- col.prop(ccam, "aperture_size", text="Size")
- elif ccam.aperture_type == 'FSTOP':
- col.prop(ccam, "aperture_fstop", text="Number")
- col.separator()
-
- col = flow.column()
- col.prop(ccam, "aperture_blades", text="Blades")
- col.prop(ccam, "aperture_rotation", text="Rotation")
- col.prop(ccam, "aperture_ratio", text="Ratio")
-
-
-class CYCLES_CAMERA_PT_dof_viewport(CyclesButtonsPanel, Panel):
- bl_label = "Viewport"
- bl_parent_id = "CYCLES_CAMERA_PT_dof"
-
- @classmethod
- def poll(cls, context):
- return context.camera and CyclesButtonsPanel.poll(context)
-
- def draw(self, context):
- layout = self.layout
- layout.use_property_split = True
+ dof = cam.dof
+ layout.active = dof.use_dof
flow = layout.grid_flow(row_major=True, columns=0, even_columns=True, even_rows=False, align=False)
- cam = context.camera
- dof_options = cam.gpu_dof
-
- sub = flow.column(align=True)
- sub.prop(dof_options, "fstop")
- sub.prop(dof_options, "blades")
+ col = flow.column()
+ col.prop(dof, "aperture_fstop")
+ col.prop(dof, "aperture_blades")
+ col.prop(dof, "aperture_rotation")
+ col.prop(dof, "aperture_ratio")
class CYCLES_PT_context_material(CyclesButtonsPanel, Panel):
@@ -1187,6 +1212,7 @@ class CYCLES_OBJECT_PT_motion_blur(CyclesButtonsPanel, Panel):
def draw(self, context):
layout = self.layout
+ layout.use_property_split = True
rd = context.scene.render
# scene = context.scene
@@ -1196,33 +1222,78 @@ class CYCLES_OBJECT_PT_motion_blur(CyclesButtonsPanel, Panel):
layout.active = (rd.use_motion_blur and cob.use_motion_blur)
- row = layout.row()
+ col = layout.column()
+ col.prop(cob, "motion_steps", text="Steps")
if ob.type != 'CAMERA':
- row.prop(cob, "use_deform_motion", text="Deformation")
- row.prop(cob, "motion_steps", text="Steps")
+ col.prop(cob, "use_deform_motion", text="Deformation")
+
+
+def has_geometry_visibility(ob):
+ return ob and ((ob.type in {'MESH', 'CURVE', 'SURFACE', 'FONT', 'META', 'LIGHT'}) or
+ (ob.instance_type == 'COLLECTION' and ob.instance_collection))
-class CYCLES_OBJECT_PT_cycles_settings(CyclesButtonsPanel, Panel):
- bl_label = "Cycles Settings"
+class CYCLES_OBJECT_PT_shading(CyclesButtonsPanel, Panel):
+ bl_label = "Shading"
bl_context = "object"
bl_options = {'DEFAULT_CLOSED'}
@classmethod
def poll(cls, context):
+ return CyclesButtonsPanel.poll(context) and (context.object)
+
+ def draw(self, context):
+ layout = self.layout
+ layout.use_property_split = True
+
+ flow = layout.grid_flow(row_major=False, columns=0, even_columns=True, even_rows=False, align=False)
+ layout = self.layout
ob = context.object
- return (CyclesButtonsPanel.poll(context) and
- ob and ((ob.type in {'MESH', 'CURVE', 'SURFACE', 'FONT', 'META', 'LIGHT'}) or
- (ob.instance_type == 'COLLECTION' and ob.instance_collection)))
+ cob = ob.cycles
+
+ if has_geometry_visibility(ob):
+ col = flow.column()
+ col.prop(cob, "shadow_terminator_offset")
+
+
+class CYCLES_OBJECT_PT_visibility(CyclesButtonsPanel, Panel):
+ bl_label = "Visibility"
+ bl_context = "object"
+ bl_options = {'DEFAULT_CLOSED'}
+
+ @classmethod
+ def poll(cls, context):
+ return CyclesButtonsPanel.poll(context) and (context.object)
def draw(self, context):
- pass
+ layout = self.layout
+ layout.use_property_split = True
+
+ ob = context.object
+ layout.prop(ob, "hide_select", text="Selectable", invert_checkbox=True, toggle=False)
-class CYCLES_OBJECT_PT_cycles_settings_ray_visibility(CyclesButtonsPanel, Panel):
+ col = layout.column(heading="Show in")
+ col.prop(ob, "hide_viewport", text="Viewports", invert_checkbox=True, toggle=False)
+ col.prop(ob, "hide_render", text="Renders", invert_checkbox=True, toggle=False)
+
+ if has_geometry_visibility(ob):
+ cob = ob.cycles
+ col = layout.column(heading="Mask")
+ col.prop(cob, "is_shadow_catcher")
+ col.prop(cob, "is_holdout")
+
+
+class CYCLES_OBJECT_PT_visibility_ray_visibility(CyclesButtonsPanel, Panel):
bl_label = "Ray Visibility"
- bl_parent_id = "CYCLES_OBJECT_PT_cycles_settings"
+ bl_parent_id = "CYCLES_OBJECT_PT_visibility"
bl_context = "object"
+ @classmethod
+ def poll(cls, context):
+ ob = context.object
+ return CyclesButtonsPanel.poll(context) and has_geometry_visibility(ob)
+
def draw(self, context):
layout = self.layout
layout.use_property_split = True
@@ -1233,38 +1304,27 @@ class CYCLES_OBJECT_PT_cycles_settings_ray_visibility(CyclesButtonsPanel, Panel)
cob = ob.cycles
visibility = ob.cycles_visibility
- flow = layout.grid_flow(row_major=True, columns=0, even_columns=True, even_rows=False, align=False)
-
- col = flow.column()
+ col = layout.column()
col.prop(visibility, "camera")
- col = flow.column()
col.prop(visibility, "diffuse")
- col = flow.column()
col.prop(visibility, "glossy")
- col = flow.column()
col.prop(visibility, "transmission")
- col = flow.column()
col.prop(visibility, "scatter")
if ob.type != 'LIGHT':
- col = flow.column()
- col.prop(visibility, "shadow")
-
- layout.separator()
-
- flow = layout.grid_flow(row_major=True, columns=0, even_columns=True, even_rows=False, align=False)
-
- col = flow.column()
- col.prop(cob, "is_shadow_catcher")
- col = flow.column()
- col.prop(cob, "is_holdout")
+ sub = col.column()
+ sub.prop(visibility, "shadow")
-class CYCLES_OBJECT_PT_cycles_settings_performance(CyclesButtonsPanel, Panel):
- bl_label = "Performance"
- bl_parent_id = "CYCLES_OBJECT_PT_cycles_settings"
+class CYCLES_OBJECT_PT_visibility_culling(CyclesButtonsPanel, Panel):
+ bl_label = "Culling"
+ bl_parent_id = "CYCLES_OBJECT_PT_visibility"
bl_context = "object"
+ @classmethod
+ def poll(cls, context):
+ ob = context.object
+ return CyclesButtonsPanel.poll(context) and has_geometry_visibility(ob)
def draw(self, context):
layout = self.layout
@@ -1276,15 +1336,13 @@ class CYCLES_OBJECT_PT_cycles_settings_performance(CyclesButtonsPanel, Panel):
ob = context.object
cob = ob.cycles
- flow = layout.grid_flow(row_major=True, columns=0, even_columns=True, even_rows=False, align=False)
-
- col = flow.column()
- col.active = scene.render.use_simplify and cscene.use_camera_cull
- col.prop(cob, "use_camera_cull")
+ row = layout.row()
+ row.active = scene.render.use_simplify and cscene.use_camera_cull
+ row.prop(cob, "use_camera_cull")
- col = flow.column()
- col.active = scene.render.use_simplify and cscene.use_distance_cull
- col.prop(cob, "use_distance_cull")
+ row = layout.row()
+ row.active = scene.render.use_simplify and cscene.use_distance_cull
+ row.prop(cob, "use_distance_cull")
def panel_node_draw(layout, id_data, output_type, input_name):
@@ -1341,8 +1399,6 @@ class CYCLES_LIGHT_PT_light(CyclesButtonsPanel, Panel):
light = context.light
clamp = light.cycles
- layout.use_property_decorate = False
-
if self.bl_space_type == 'PROPERTIES':
layout.row().prop(light, "type", expand=True)
layout.use_property_split = True
@@ -1352,8 +1408,14 @@ class CYCLES_LIGHT_PT_light(CyclesButtonsPanel, Panel):
col = layout.column()
- if light.type in {'POINT', 'SUN', 'SPOT'}:
- col.prop(light, "shadow_soft_size", text="Size")
+ col.prop(light, "color")
+ col.prop(light, "energy")
+ col.separator()
+
+ if light.type in {'POINT', 'SPOT'}:
+ col.prop(light, "shadow_soft_size", text="Radius")
+ elif light.type == 'SUN':
+ col.prop(light, "angle")
elif light.type == 'AREA':
col.prop(light, "shape", text="Shape")
sub = col.column(align=True)
@@ -1394,9 +1456,10 @@ class CYCLES_LIGHT_PT_nodes(CyclesButtonsPanel, Panel):
def draw(self, context):
layout = self.layout
+ layout.use_property_split = True
+
light = context.light
- if not panel_node_draw(layout, light, 'OUTPUT_LIGHT', 'Surface'):
- layout.prop(light, "color")
+ panel_node_draw(layout, light, 'OUTPUT_LIGHT', 'Surface')
class CYCLES_LIGHT_PT_spot(CyclesButtonsPanel, Panel):
@@ -1444,6 +1507,8 @@ class CYCLES_WORLD_PT_surface(CyclesButtonsPanel, Panel):
def draw(self, context):
layout = self.layout
+ layout.use_property_split = True
+
world = context.world
if not panel_node_draw(layout, world, 'OUTPUT_WORLD', 'Surface'):
@@ -1463,6 +1528,8 @@ class CYCLES_WORLD_PT_volume(CyclesButtonsPanel, Panel):
def draw(self, context):
layout = self.layout
+ layout.use_property_split = True
+
world = context.world
panel_node_draw(layout, world, 'OUTPUT_WORLD', 'Volume')
@@ -1533,17 +1600,18 @@ class CYCLES_WORLD_PT_ray_visibility(CyclesButtonsPanel, Panel):
def draw(self, context):
layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False
world = context.world
visibility = world.cycles_visibility
- flow = layout.column_flow()
-
- flow.prop(visibility, "camera")
- flow.prop(visibility, "diffuse")
- flow.prop(visibility, "glossy")
- flow.prop(visibility, "transmission")
- flow.prop(visibility, "scatter")
+ col = layout.column()
+ col.prop(visibility, "camera")
+ col.prop(visibility, "diffuse")
+ col.prop(visibility, "glossy")
+ col.prop(visibility, "transmission")
+ col.prop(visibility, "scatter")
class CYCLES_WORLD_PT_settings(CyclesButtonsPanel, Panel):
@@ -1619,6 +1687,9 @@ class CYCLES_WORLD_PT_settings_volume(CyclesButtonsPanel, Panel):
sub.prop(cworld, "volume_sampling", text="Sampling")
col.prop(cworld, "volume_interpolation", text="Interpolation")
col.prop(cworld, "homogeneous_volume", text="Homogeneous")
+ sub = col.column()
+ sub.active = not cworld.homogeneous_volume
+ sub.prop(cworld, "volume_step_size")
class CYCLES_MATERIAL_PT_preview(CyclesButtonsPanel, Panel):
@@ -1647,6 +1718,8 @@ class CYCLES_MATERIAL_PT_surface(CyclesButtonsPanel, Panel):
def draw(self, context):
layout = self.layout
+ layout.use_property_split = True
+
mat = context.material
if not panel_node_draw(layout, mat, 'OUTPUT_MATERIAL', 'Surface'):
layout.prop(mat, "diffuse_color")
@@ -1665,6 +1738,8 @@ class CYCLES_MATERIAL_PT_volume(CyclesButtonsPanel, Panel):
def draw(self, context):
layout = self.layout
+ layout.use_property_split = True
+
mat = context.material
# cmat = mat.cycles
@@ -1683,6 +1758,8 @@ class CYCLES_MATERIAL_PT_displacement(CyclesButtonsPanel, Panel):
def draw(self, context):
layout = self.layout
+ layout.use_property_split = True
+
mat = context.material
panel_node_draw(layout, mat, 'OUTPUT_MATERIAL', 'Displacement')
@@ -1725,7 +1802,7 @@ class CYCLES_MATERIAL_PT_settings_surface(CyclesButtonsPanel, Panel):
col = layout.column()
col.prop(cmat, "sample_as_light", text="Multiple Importance")
col.prop(cmat, "use_transparent_shadow")
- col.prop(cmat, "displacement_method", text="Displacement Method")
+ col.prop(cmat, "displacement_method", text="Displacement")
def draw(self, context):
self.draw_shared(self, context.material)
@@ -1750,6 +1827,9 @@ class CYCLES_MATERIAL_PT_settings_volume(CyclesButtonsPanel, Panel):
sub.prop(cmat, "volume_sampling", text="Sampling")
col.prop(cmat, "volume_interpolation", text="Interpolation")
col.prop(cmat, "homogeneous_volume", text="Homogeneous")
+ sub = col.column()
+ sub.active = not cmat.homogeneous_volume
+ sub.prop(cmat, "volume_step_rate")
def draw(self, context):
self.draw_shared(self, context, context.material)
@@ -1761,6 +1841,10 @@ class CYCLES_RENDER_PT_bake(CyclesButtonsPanel, Panel):
bl_options = {'DEFAULT_CLOSED'}
COMPAT_ENGINES = {'CYCLES'}
+ @classmethod
+ def poll(cls, context):
+ return CyclesButtonsPanel.poll(context) and not use_optix(context)
+
def draw(self, context):
layout = self.layout
layout.use_property_split = True
@@ -1793,7 +1877,7 @@ class CYCLES_RENDER_PT_bake_influence(CyclesButtonsPanel, Panel):
cscene = scene.cycles
rd = scene.render
if rd.use_bake_multires == False and cscene.bake_type in {
- 'NORMAL', 'COMBINED', 'DIFFUSE', 'GLOSSY', 'TRANSMISSION', 'SUBSURFACE'}:
+ 'NORMAL', 'COMBINED', 'DIFFUSE', 'GLOSSY', 'TRANSMISSION'}:
return True
def draw(self, context):
@@ -1817,27 +1901,24 @@ class CYCLES_RENDER_PT_bake_influence(CyclesButtonsPanel, Panel):
sub.prop(cbk, "normal_b", text="B")
elif cscene.bake_type == 'COMBINED':
- row = col.row(align=True)
- row.use_property_split = False
- row.prop(cbk, "use_pass_direct", toggle=True)
- row.prop(cbk, "use_pass_indirect", toggle=True)
- flow = col.grid_flow(row_major=False, columns=0, even_columns=False, even_rows=False, align=True)
+ col = layout.column(heading="Lighting", align=True)
+ col.prop(cbk, "use_pass_direct")
+ col.prop(cbk, "use_pass_indirect")
- flow.active = cbk.use_pass_direct or cbk.use_pass_indirect
- flow.prop(cbk, "use_pass_diffuse")
- flow.prop(cbk, "use_pass_glossy")
- flow.prop(cbk, "use_pass_transmission")
- flow.prop(cbk, "use_pass_subsurface")
- flow.prop(cbk, "use_pass_ambient_occlusion")
- flow.prop(cbk, "use_pass_emit")
+ col = layout.column(heading="Contributions", align=True)
+ col.active = cbk.use_pass_direct or cbk.use_pass_indirect
+ col.prop(cbk, "use_pass_diffuse")
+ col.prop(cbk, "use_pass_glossy")
+ col.prop(cbk, "use_pass_transmission")
+ col.prop(cbk, "use_pass_ambient_occlusion")
+ col.prop(cbk, "use_pass_emit")
- elif cscene.bake_type in {'DIFFUSE', 'GLOSSY', 'TRANSMISSION', 'SUBSURFACE'}:
- row = col.row(align=True)
- row.use_property_split = False
- row.prop(cbk, "use_pass_direct", toggle=True)
- row.prop(cbk, "use_pass_indirect", toggle=True)
- row.prop(cbk, "use_pass_color", toggle=True)
+ elif cscene.bake_type in {'DIFFUSE', 'GLOSSY', 'TRANSMISSION'}:
+ col = layout.column(heading="Contributions", align=True)
+ col.prop(cbk, "use_pass_direct")
+ col.prop(cbk, "use_pass_indirect")
+ col.prop(cbk, "use_pass_color")
class CYCLES_RENDER_PT_bake_selected_to_active(CyclesButtonsPanel, Panel):
@@ -1873,10 +1954,15 @@ class CYCLES_RENDER_PT_bake_selected_to_active(CyclesButtonsPanel, Panel):
col.prop(cbk, "use_cage", text="Cage")
if cbk.use_cage:
- col.prop(cbk, "cage_extrusion", text="Extrusion")
- col.prop(cbk, "cage_object", text="Cage Object")
+ col.prop(cbk, "cage_object")
+ col = layout.column()
+ col.prop(cbk, "cage_extrusion")
+ col.active = cbk.cage_object is None
else:
- col.prop(cbk, "cage_extrusion", text="Ray Distance")
+ col.prop(cbk, "cage_extrusion", text="Extrusion")
+
+ col = layout.column()
+ col.prop(cbk, "max_ray_distance")
class CYCLES_RENDER_PT_bake_output(CyclesButtonsPanel, Panel):
@@ -1900,7 +1986,7 @@ class CYCLES_RENDER_PT_bake_output(CyclesButtonsPanel, Panel):
layout.prop(rd, "use_bake_clear", text="Clear Image")
if rd.bake_type == 'DISPLACEMENT':
- col.prop(rd, "use_bake_lores_mesh")
+ layout.prop(rd, "use_bake_lores_mesh")
else:
layout.prop(cbk, "margin")
@@ -1915,7 +2001,10 @@ class CYCLES_RENDER_PT_debug(CyclesButtonsPanel, Panel):
@classmethod
def poll(cls, context):
- return CyclesButtonsPanel.poll(context) and bpy.app.debug_value == 256
+ prefs = bpy.context.preferences
+ return (CyclesButtonsPanel.poll(context)
+ and prefs.experimental.use_cycles_debug
+ and prefs.view.show_developer_ui)
def draw(self, context):
layout = self.layout
@@ -1945,7 +2034,14 @@ class CYCLES_RENDER_PT_debug(CyclesButtonsPanel, Panel):
col.separator()
col = layout.column()
- col.label(text='OpenCL Flags:')
+ col.label(text="OptiX Flags:")
+ col.prop(cscene, "debug_optix_cuda_streams")
+ col.prop(cscene, "debug_optix_curves_api")
+
+ col.separator()
+
+ col = layout.column()
+ col.label(text="OpenCL Flags:")
col.prop(cscene, "debug_opencl_device_type", text="Device")
col.prop(cscene, "debug_use_opencl_debug", text="Debug")
col.prop(cscene, "debug_opencl_mem_limit")
@@ -1992,7 +2088,7 @@ class CYCLES_RENDER_PT_simplify_viewport(CyclesButtonsPanel, Panel):
col.prop(rd, "simplify_child_particles", text="Child Particles")
col.prop(cscene, "texture_limit", text="Texture Limit")
col.prop(cscene, "ao_bounces", text="AO Bounces")
- col.prop(rd, "use_simplify_smoke_highres")
+
class CYCLES_RENDER_PT_simplify_render(CyclesButtonsPanel, Panel):
bl_label = "Render"
@@ -2037,19 +2133,83 @@ class CYCLES_RENDER_PT_simplify_culling(CyclesButtonsPanel, Panel):
layout.active = rd.use_simplify
- col = layout.column()
- col.prop(cscene, "use_camera_cull")
- sub = col.column()
+ row = layout.row(heading="Camera Culling")
+ row.prop(cscene, "use_camera_cull", text="")
+ sub = row.column()
sub.active = cscene.use_camera_cull
- sub.prop(cscene, "camera_cull_margin")
+ sub.prop(cscene, "camera_cull_margin", text="")
- col = layout.column()
- col.prop(cscene, "use_distance_cull")
- sub = col.column()
+ row = layout.row(heading="Distance Culling")
+ row.prop(cscene, "use_distance_cull", text="")
+ sub = row.column()
sub.active = cscene.use_distance_cull
- sub.prop(cscene, "distance_cull_margin", text="Distance")
+ sub.prop(cscene, "distance_cull_margin", text="")
+
+
+class CYCLES_VIEW3D_PT_shading_render_pass(Panel):
+ bl_space_type = 'VIEW_3D'
+ bl_region_type = 'HEADER'
+ bl_label = "Render Pass"
+ bl_parent_id = 'VIEW3D_PT_shading'
+ COMPAT_ENGINES = {'CYCLES'}
+
+ @classmethod
+ def poll(cls, context):
+ return (context.engine in cls.COMPAT_ENGINES
+ and context.space_data.shading.type == 'RENDERED')
+
+ def draw(self, context):
+ shading = context.space_data.shading
+
+ layout = self.layout
+ layout.prop(shading.cycles, "render_pass", text="")
+class CYCLES_VIEW3D_PT_shading_lighting(Panel):
+ bl_space_type = 'VIEW_3D'
+ bl_region_type = 'HEADER'
+ bl_label = "Lighting"
+ bl_parent_id = 'VIEW3D_PT_shading'
+ COMPAT_ENGINES = {'CYCLES'}
+
+ @classmethod
+ def poll(cls, context):
+ return (context.engine in cls.COMPAT_ENGINES
+ and context.space_data.shading.type == 'RENDERED')
+
+ def draw(self, context):
+ layout = self.layout
+ col = layout.column()
+ split = col.split(factor=0.9)
+
+ shading = context.space_data.shading
+ col.prop(shading, "use_scene_lights_render")
+ col.prop(shading, "use_scene_world_render")
+
+ if not shading.use_scene_world_render:
+ col = layout.column()
+ split = col.split(factor=0.9)
+
+ col = split.column()
+ sub = col.row()
+ sub.scale_y = 0.6
+ sub.template_icon_view(shading, "studio_light", scale_popup=3)
+
+ col = split.column()
+ col.operator("preferences.studiolight_show", emboss=False, text="", icon='PREFERENCES')
+
+ split = layout.split(factor=0.9)
+ col = split.column()
+ col.prop(shading, "studiolight_rotate_z", text="Rotation")
+ col.prop(shading, "studiolight_intensity")
+ col.prop(shading, "studiolight_background_alpha")
+
+class CYCLES_VIEW3D_PT_simplify_greasepencil(CyclesButtonsPanel, Panel, GreasePencilSimplifyPanel):
+ bl_label = "Grease Pencil"
+ bl_parent_id = "CYCLES_RENDER_PT_simplify"
+ COMPAT_ENGINES = {'CYCLES'}
+ bl_options = {'DEFAULT_CLOSED'}
+
def draw_device(self, context):
scene = context.scene
layout = self.layout
@@ -2063,8 +2223,6 @@ def draw_device(self, context):
col = layout.column()
col.prop(cscene, "feature_set")
- scene = context.scene
-
col = layout.column()
col.active = show_device_active(context)
col.prop(cscene, "device")
@@ -2083,7 +2241,7 @@ def draw_pause(self, context):
if view.shading.type == 'RENDERED':
cscene = scene.cycles
- layout.prop(cscene, "preview_pause", icon='PAUSE', text="")
+ layout.prop(cscene, "preview_pause", icon='PLAY' if cscene.preview_pause else 'PAUSE', text="")
def get_panels():
@@ -2098,6 +2256,7 @@ def get_panels():
'MATERIAL_PT_preview',
'NODE_DATA_PT_light',
'NODE_DATA_PT_spot',
+ 'OBJECT_PT_visibility',
'VIEWLAYER_PT_filter',
'VIEWLAYER_PT_layer_passes',
'RENDER_PT_post_processing',
@@ -2118,6 +2277,8 @@ classes = (
CYCLES_PT_integrator_presets,
CYCLES_RENDER_PT_sampling,
CYCLES_RENDER_PT_sampling_sub_samples,
+ CYCLES_RENDER_PT_sampling_adaptive,
+ CYCLES_RENDER_PT_sampling_denoising,
CYCLES_RENDER_PT_sampling_advanced,
CYCLES_RENDER_PT_light_paths,
CYCLES_RENDER_PT_light_paths_max_bounces,
@@ -2130,6 +2291,9 @@ classes = (
CYCLES_RENDER_PT_simplify_viewport,
CYCLES_RENDER_PT_simplify_render,
CYCLES_RENDER_PT_simplify_culling,
+ CYCLES_VIEW3D_PT_simplify_greasepencil,
+ CYCLES_VIEW3D_PT_shading_lighting,
+ CYCLES_VIEW3D_PT_shading_render_pass,
CYCLES_RENDER_PT_motion_blur,
CYCLES_RENDER_PT_motion_blur_curve,
CYCLES_RENDER_PT_film,
@@ -2141,23 +2305,25 @@ classes = (
CYCLES_RENDER_PT_performance_acceleration_structure,
CYCLES_RENDER_PT_performance_final_render,
CYCLES_RENDER_PT_performance_viewport,
- CYCLES_RENDER_PT_filter,
- CYCLES_RENDER_PT_override,
CYCLES_RENDER_PT_passes,
CYCLES_RENDER_PT_passes_data,
CYCLES_RENDER_PT_passes_light,
CYCLES_RENDER_PT_passes_crypto,
CYCLES_RENDER_PT_passes_debug,
+ CYCLES_RENDER_UL_aov,
+ CYCLES_RENDER_PT_passes_aov,
+ CYCLES_RENDER_PT_filter,
+ CYCLES_RENDER_PT_override,
CYCLES_RENDER_PT_denoising,
CYCLES_PT_post_processing,
CYCLES_CAMERA_PT_dof,
CYCLES_CAMERA_PT_dof_aperture,
- CYCLES_CAMERA_PT_dof_viewport,
CYCLES_PT_context_material,
CYCLES_OBJECT_PT_motion_blur,
- CYCLES_OBJECT_PT_cycles_settings,
- CYCLES_OBJECT_PT_cycles_settings_ray_visibility,
- CYCLES_OBJECT_PT_cycles_settings_performance,
+ CYCLES_OBJECT_PT_shading,
+ CYCLES_OBJECT_PT_visibility,
+ CYCLES_OBJECT_PT_visibility_ray_visibility,
+ CYCLES_OBJECT_PT_visibility_culling,
CYCLES_LIGHT_PT_preview,
CYCLES_LIGHT_PT_light,
CYCLES_LIGHT_PT_nodes,
diff --git a/intern/cycles/blender/addon/version_update.py b/intern/cycles/blender/addon/version_update.py
index 6f005727b95..49f23f4ba30 100644
--- a/intern/cycles/blender/addon/version_update.py
+++ b/intern/cycles/blender/addon/version_update.py
@@ -22,140 +22,6 @@ import math
from bpy.app.handlers import persistent
-def foreach_cycles_nodetree_group(nodetree, traversed):
- for node in nodetree.nodes:
- if node.bl_idname == 'ShaderNodeGroup':
- group = node.node_tree
- if group and group not in traversed:
- traversed.add(group)
- yield group, group.library
- yield from foreach_cycles_nodetree_group(group, traversed)
-
-
-def foreach_cycles_nodetree():
- traversed = set()
-
- for material in bpy.data.materials:
- nodetree = material.node_tree
- if nodetree:
- yield nodetree, material.library
- yield from foreach_cycles_nodetree_group(nodetree, traversed)
-
- for world in bpy.data.worlds:
- nodetree = world.node_tree
- if nodetree:
- yield nodetree, world.library
- foreach_cycles_nodetree_group(nodetree, traversed)
-
- for light in bpy.data.lights:
- nodetree = light.node_tree
- if nodetree:
- yield nodetree, light.library
- foreach_cycles_nodetree_group(nodetree, traversed)
-
-
-def displacement_node_insert(nodetree):
- # Gather links to replace
- displacement_links = []
- for link in nodetree.links:
- if (
- link.to_node.bl_idname == 'ShaderNodeOutputMaterial' and
- link.from_node.bl_idname != 'ShaderNodeDisplacement' and
- link.to_socket.identifier == 'Displacement'
- ):
- displacement_links.append(link)
-
- # Replace links with displacement node
- for link in displacement_links:
- from_node = link.from_node
- from_socket = link.from_socket
- to_node = link.to_node
- to_socket = link.to_socket
-
- nodetree.links.remove(link)
-
- node = nodetree.nodes.new(type='ShaderNodeDisplacement')
- node.location[0] = 0.5 * (from_node.location[0] + to_node.location[0])
- node.location[1] = 0.5 * (from_node.location[1] + to_node.location[1])
- node.inputs['Scale'].default_value = 0.1
- node.inputs['Midlevel'].default_value = 0.0
-
- nodetree.links.new(from_socket, node.inputs['Height'])
- nodetree.links.new(node.outputs['Displacement'], to_socket)
-
-
-def displacement_principled_nodes(node):
- if node.bl_idname == 'ShaderNodeDisplacement':
- if node.space != 'WORLD':
- node.space = 'OBJECT'
- if node.bl_idname == 'ShaderNodeBsdfPrincipled':
- if node.subsurface_method != 'RANDOM_WALK':
- node.subsurface_method = 'BURLEY'
-
-
-def square_roughness_node_insert(nodetree):
- roughness_node_types = {
- 'ShaderNodeBsdfAnisotropic',
- 'ShaderNodeBsdfGlass',
- 'ShaderNodeBsdfGlossy',
- 'ShaderNodeBsdfRefraction'}
-
- # Update default values
- for node in nodetree.nodes:
- if node.bl_idname in roughness_node_types:
- roughness_input = node.inputs['Roughness']
- roughness_input.default_value = math.sqrt(max(roughness_input.default_value, 0.0))
-
- # Gather roughness links to replace
- roughness_links = []
- for link in nodetree.links:
- if link.to_node.bl_idname in roughness_node_types and \
- link.to_socket.identifier == 'Roughness':
- roughness_links.append(link)
-
- # Replace links with sqrt node
- for link in roughness_links:
- from_node = link.from_node
- from_socket = link.from_socket
- to_node = link.to_node
- to_socket = link.to_socket
-
- nodetree.links.remove(link)
-
- node = nodetree.nodes.new(type='ShaderNodeMath')
- node.operation = 'POWER'
- node.location[0] = 0.5 * (from_node.location[0] + to_node.location[0])
- node.location[1] = 0.5 * (from_node.location[1] + to_node.location[1])
-
- nodetree.links.new(from_socket, node.inputs[0])
- node.inputs[1].default_value = 0.5
- nodetree.links.new(node.outputs['Value'], to_socket)
-
-
-def mapping_node_order_flip(node):
- """
- Flip euler order of mapping shader node
- """
- if node.bl_idname == 'ShaderNodeMapping':
- rot = node.rotation.copy()
- rot.order = 'ZYX'
- quat = rot.to_quaternion()
- node.rotation = quat.to_euler('XYZ')
-
-
-def vector_curve_node_remap(node):
- """
- Remap values of vector curve node from normalized to absolute values
- """
- if node.bl_idname == 'ShaderNodeVectorCurve':
- node.mapping.use_clip = False
- for curve in node.mapping.curves:
- for point in curve.points:
- point.location.x = (point.location.x * 2.0) - 1.0
- point.location.y = (point.location.y - 0.5) * 2.0
- node.mapping.update()
-
-
def custom_bake_remap(scene):
"""
Remap bake types into the new types and set the flags accordingly
@@ -176,10 +42,7 @@ def custom_bake_remap(scene):
'GLOSSY_COLOR',
'TRANSMISSION_DIRECT',
'TRANSMISSION_INDIRECT',
- 'TRANSMISSION_COLOR',
- 'SUBSURFACE_DIRECT',
- 'SUBSURFACE_INDIRECT',
- 'SUBSURFACE_COLOR')
+ 'TRANSMISSION_COLOR')
diffuse_direct_idx = bake_lookup.index('DIFFUSE_DIRECT')
@@ -213,28 +76,6 @@ def custom_bake_remap(scene):
scene.render.bake.use_pass_indirect = False
-def ambient_occlusion_node_relink(nodetree):
- for node in nodetree.nodes:
- if node.bl_idname == 'ShaderNodeAmbientOcclusion':
- node.samples = 1
- node.only_local = False
- node.inputs['Distance'].default_value = 0.0
-
- # Gather links to replace
- ao_links = []
- for link in nodetree.links:
- if link.from_node.bl_idname == 'ShaderNodeAmbientOcclusion':
- ao_links.append(link)
-
- # Replace links
- for link in ao_links:
- from_node = link.from_node
- to_socket = link.to_socket
-
- nodetree.links.remove(link)
- nodetree.links.new(from_node.outputs['Color'], to_socket)
-
-
@persistent
def do_versions(self):
if bpy.context.preferences.version <= (2, 78, 1):
@@ -411,48 +252,3 @@ def do_versions(self):
cmat = mat.cycles
if not cmat.is_property_set("displacement_method"):
cmat.displacement_method = 'DISPLACEMENT'
-
- # Nodes
- for nodetree, library in foreach_cycles_nodetree():
- if library not in libraries:
- continue
-
- # Euler order was ZYX in previous versions.
- if version <= (2, 73, 4):
- for node in nodetree.nodes:
- mapping_node_order_flip(node)
-
- if version <= (2, 76, 5):
- for node in nodetree.nodes:
- vector_curve_node_remap(node)
-
- if version <= (2, 79, 1) or \
- (version >= (2, 80, 0) and version <= (2, 80, 3)):
- displacement_node_insert(nodetree)
-
- if version <= (2, 79, 2):
- for node in nodetree.nodes:
- displacement_principled_nodes(node)
-
- if version <= (2, 79, 3) or \
- (version >= (2, 80, 0) and version <= (2, 80, 4)):
- # Switch to squared roughness convention
- square_roughness_node_insert(nodetree)
-
- if version <= (2, 79, 4):
- ambient_occlusion_node_relink(nodetree)
-
- # Particles
- for part in bpy.data.particles:
- if part.library not in libraries:
- continue
-
- # Copy cycles hair settings to internal settings
- if version <= (2, 80, 15):
- cpart = part.get("cycles", None)
- if cpart:
- part.shape = cpart.get("shape", 0.0)
- part.root_radius = cpart.get("root_width", 1.0)
- part.tip_radius = cpart.get("tip_width", 0.0)
- part.radius_scale = cpart.get("radius_scale", 0.01)
- part.use_close_tip = cpart.get("use_closetip", True)
diff --git a/intern/cycles/blender/blender_camera.cpp b/intern/cycles/blender/blender_camera.cpp
index b3bfaa992a9..592a69585de 100644
--- a/intern/cycles/blender/blender_camera.cpp
+++ b/intern/cycles/blender/blender_camera.cpp
@@ -91,16 +91,31 @@ static void blender_camera_init(BlenderCamera *bcam, BL::RenderSettings &b_rende
{
memset((void *)bcam, 0, sizeof(BlenderCamera));
+ bcam->nearclip = 1e-5f;
+ bcam->farclip = 1e5f;
+
bcam->type = CAMERA_PERSPECTIVE;
+ bcam->ortho_scale = 1.0f;
+
+ bcam->lens = 50.0f;
+ bcam->shuttertime = 1.0f;
+
+ bcam->rolling_shutter_type = Camera::ROLLING_SHUTTER_NONE;
+ bcam->rolling_shutter_duration = 0.1f;
+
+ bcam->aperturesize = 0.0f;
+ bcam->apertureblades = 0;
+ bcam->aperturerotation = 0.0f;
+ bcam->focaldistance = 10.0f;
+
bcam->zoom = 1.0f;
bcam->pixelaspect = make_float2(1.0f, 1.0f);
+ bcam->aperture_ratio = 1.0f;
+
bcam->sensor_width = 36.0f;
bcam->sensor_height = 24.0f;
bcam->sensor_fit = BlenderCamera::AUTO;
- bcam->shuttertime = 1.0f;
bcam->motion_position = Camera::MOTION_POSITION_CENTER;
- bcam->rolling_shutter_type = Camera::ROLLING_SHUTTER_NONE;
- bcam->rolling_shutter_duration = 0.1f;
bcam->border.right = 1.0f;
bcam->border.top = 1.0f;
bcam->pano_viewplane.right = 1.0f;
@@ -108,6 +123,7 @@ static void blender_camera_init(BlenderCamera *bcam, BL::RenderSettings &b_rende
bcam->viewport_camera_border.right = 1.0f;
bcam->viewport_camera_border.top = 1.0f;
bcam->offscreen_dicing_scale = 1.0f;
+ bcam->matrix = transform_identity();
/* render resolution */
bcam->full_width = render_resolution_x(b_render);
@@ -119,10 +135,10 @@ static float blender_camera_focal_distance(BL::RenderEngine &b_engine,
BL::Camera &b_camera,
BlenderCamera *bcam)
{
- BL::Object b_dof_object = b_camera.dof_object();
+ BL::Object b_dof_object = b_camera.dof().focus_object();
if (!b_dof_object)
- return b_camera.dof_distance();
+ return b_camera.dof().focus_distance();
/* for dof object, return distance along camera Z direction */
BL::Array<float, 16> b_ob_matrix;
@@ -191,26 +207,30 @@ static void blender_camera_from_object(BlenderCamera *bcam,
bcam->lens = b_camera.lens();
- /* allow f/stop number to change aperture_size but still
- * give manual control over aperture radius */
- int aperture_type = get_enum(ccamera, "aperture_type");
-
- if (aperture_type == 1) {
- float fstop = RNA_float_get(&ccamera, "aperture_fstop");
+ if (b_camera.dof().use_dof()) {
+ /* allow f/stop number to change aperture_size but still
+ * give manual control over aperture radius */
+ float fstop = b_camera.dof().aperture_fstop();
fstop = max(fstop, 1e-5f);
if (bcam->type == CAMERA_ORTHOGRAPHIC)
bcam->aperturesize = 1.0f / (2.0f * fstop);
else
bcam->aperturesize = (bcam->lens * 1e-3f) / (2.0f * fstop);
- }
- else
- bcam->aperturesize = RNA_float_get(&ccamera, "aperture_size");
- bcam->apertureblades = RNA_int_get(&ccamera, "aperture_blades");
- bcam->aperturerotation = RNA_float_get(&ccamera, "aperture_rotation");
- bcam->focaldistance = blender_camera_focal_distance(b_engine, b_ob, b_camera, bcam);
- bcam->aperture_ratio = RNA_float_get(&ccamera, "aperture_ratio");
+ bcam->apertureblades = b_camera.dof().aperture_blades();
+ bcam->aperturerotation = b_camera.dof().aperture_rotation();
+ bcam->focaldistance = blender_camera_focal_distance(b_engine, b_ob, b_camera, bcam);
+ bcam->aperture_ratio = b_camera.dof().aperture_ratio();
+ }
+ else {
+ /* DOF is turned of for the camera. */
+ bcam->aperturesize = 0.0f;
+ bcam->apertureblades = 0;
+ bcam->aperturerotation = 0.0f;
+ bcam->focaldistance = 0.0f;
+ bcam->aperture_ratio = 1.0f;
+ }
bcam->shift.x = b_engine.camera_shift_x(b_ob, bcam->use_spherical_stereo);
bcam->shift.y = b_camera.shift_y();
@@ -689,6 +709,10 @@ static void blender_camera_from_view(BlenderCamera *bcam,
/* 3d view transform */
bcam->matrix = transform_inverse(get_transform(b_rv3d.view_matrix()));
+
+ /* dimensions */
+ bcam->full_width = width;
+ bcam->full_height = height;
}
static void blender_camera_view_subset(BL::RenderEngine &b_engine,
@@ -705,22 +729,26 @@ static void blender_camera_view_subset(BL::RenderEngine &b_engine,
BoundBox2D cam, view;
float view_aspect, cam_aspect, sensor_size;
- /* get viewport viewplane */
+ /* Get viewport viewplane. */
BlenderCamera view_bcam;
blender_camera_init(&view_bcam, b_render);
blender_camera_from_view(&view_bcam, b_engine, b_scene, b_v3d, b_rv3d, width, height, true);
blender_camera_viewplane(&view_bcam, width, height, &view, &view_aspect, &sensor_size);
- /* get camera viewplane */
+ /* Get camera viewplane. */
BlenderCamera cam_bcam;
blender_camera_init(&cam_bcam, b_render);
blender_camera_from_object(&cam_bcam, b_engine, b_ob, true);
+ /* Camera border is affect by aspect, viewport is not. */
+ cam_bcam.pixelaspect.x = b_render.pixel_aspect_x();
+ cam_bcam.pixelaspect.y = b_render.pixel_aspect_y();
+
blender_camera_viewplane(
&cam_bcam, cam_bcam.full_width, cam_bcam.full_height, &cam, &cam_aspect, &sensor_size);
- /* return */
+ /* Return */
*view_box = view * (1.0f / view_aspect);
*cam_box = cam * (1.0f / cam_aspect);
}
@@ -848,7 +876,8 @@ BufferParams BlenderSync::get_buffer_params(BL::RenderSettings &b_render,
BL::RegionView3D &b_rv3d,
Camera *cam,
int width,
- int height)
+ int height,
+ const bool use_denoiser)
{
BufferParams params;
bool use_border = false;
@@ -879,6 +908,11 @@ BufferParams BlenderSync::get_buffer_params(BL::RenderSettings &b_render,
params.height = height;
}
+ PassType display_pass = update_viewport_display_passes(b_v3d, params.passes);
+
+ /* Can only denoise the combined image pass */
+ params.denoising_data_pass = display_pass == PASS_COMBINED && use_denoiser;
+
return params;
}
diff --git a/intern/cycles/blender/blender_curves.cpp b/intern/cycles/blender/blender_curves.cpp
index d0375ceb79c..82c99631a89 100644
--- a/intern/cycles/blender/blender_curves.cpp
+++ b/intern/cycles/blender/blender_curves.cpp
@@ -17,7 +17,7 @@
#include "render/attribute.h"
#include "render/camera.h"
#include "render/curves.h"
-#include "render/mesh.h"
+#include "render/hair.h"
#include "render/object.h"
#include "render/scene.h"
@@ -38,27 +38,6 @@ ParticleCurveData::~ParticleCurveData()
{
}
-static void interp_weights(float t, float data[4])
-{
- /* Cardinal curve interpolation */
- float t2 = t * t;
- float t3 = t2 * t;
- float fc = 0.71f;
-
- data[0] = -fc * t3 + 2.0f * fc * t2 - fc * t;
- data[1] = (2.0f - fc) * t3 + (fc - 3.0f) * t2 + 1.0f;
- data[2] = (fc - 2.0f) * t3 + (3.0f - 2.0f * fc) * t2 + fc * t;
- data[3] = fc * t3 - fc * t2;
-}
-
-static void curveinterp_v3_v3v3v3v3(
- float3 *p, float3 *v1, float3 *v2, float3 *v3, float3 *v4, const float w[4])
-{
- p->x = v1->x * w[0] + v2->x * w[1] + v3->x * w[2] + v4->x * w[3];
- p->y = v1->y * w[0] + v2->y * w[1] + v3->y * w[2] + v4->y * w[3];
- p->z = v1->z * w[0] + v2->z * w[1] + v3->z * w[2] + v4->z * w[3];
-}
-
static float shaperadius(float shape, float root, float tip, float time)
{
assert(time >= 0.0f);
@@ -76,43 +55,13 @@ static float shaperadius(float shape, float root, float tip, float time)
/* curve functions */
-static void InterpolateKeySegments(
- int seg, int segno, int key, int curve, float3 *keyloc, float *time, ParticleCurveData *CData)
-{
- float3 ckey_loc1 = CData->curvekey_co[key];
- float3 ckey_loc2 = ckey_loc1;
- float3 ckey_loc3 = CData->curvekey_co[key + 1];
- float3 ckey_loc4 = ckey_loc3;
-
- if (key > CData->curve_firstkey[curve])
- ckey_loc1 = CData->curvekey_co[key - 1];
-
- if (key < CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 2)
- ckey_loc4 = CData->curvekey_co[key + 2];
-
- float time1 = CData->curvekey_time[key] / CData->curve_length[curve];
- float time2 = CData->curvekey_time[key + 1] / CData->curve_length[curve];
-
- float dfra = (time2 - time1) / (float)segno;
-
- if (time)
- *time = (dfra * seg) + time1;
-
- float t[4];
-
- interp_weights((float)seg / (float)segno, t);
-
- if (keyloc)
- curveinterp_v3_v3v3v3v3(keyloc, &ckey_loc1, &ckey_loc2, &ckey_loc3, &ckey_loc4, t);
-}
-
static bool ObtainCacheParticleData(
- Mesh *mesh, BL::Mesh *b_mesh, BL::Object *b_ob, ParticleCurveData *CData, bool background)
+ Hair *hair, BL::Mesh *b_mesh, BL::Object *b_ob, ParticleCurveData *CData, bool background)
{
int curvenum = 0;
int keyno = 0;
- if (!(mesh && b_mesh && b_ob && CData))
+ if (!(hair && b_mesh && b_ob && CData))
return false;
Transform tfm = get_transform(b_ob->matrix_world());
@@ -128,7 +77,7 @@ static bool ObtainCacheParticleData(
if ((b_part.render_type() == BL::ParticleSettings::render_type_PATH) &&
(b_part.type() == BL::ParticleSettings::type_HAIR)) {
- int shader = clamp(b_part.material() - 1, 0, mesh->used_shaders.size() - 1);
+ int shader = clamp(b_part.material() - 1, 0, hair->used_shaders.size() - 1);
int display_step = background ? b_part.render_step() : b_part.display_step();
int totparts = b_psys.particles.length();
int totchild = background ? b_psys.child_particles.length() :
@@ -173,19 +122,20 @@ static bool ObtainCacheParticleData(
CData->curve_firstkey.push_back_slow(keyno);
float curve_length = 0.0f;
- float3 pcKey;
+ float3 prev_co_world = make_float3(0.0f, 0.0f, 0.0f);
+ float3 prev_co_object = make_float3(0.0f, 0.0f, 0.0f);
for (int step_no = 0; step_no < ren_step; step_no++) {
- float nco[3];
- b_psys.co_hair(*b_ob, pa_no, step_no, nco);
- float3 cKey = make_float3(nco[0], nco[1], nco[2]);
- cKey = transform_point(&itfm, cKey);
+ float3 co_world = prev_co_world;
+ b_psys.co_hair(*b_ob, pa_no, step_no, &co_world.x);
+ float3 co_object = transform_point(&itfm, co_world);
if (step_no > 0) {
- const float step_length = len(cKey - pcKey);
+ const float step_length = len(co_object - prev_co_object);
curve_length += step_length;
}
- CData->curvekey_co.push_back_slow(cKey);
+ CData->curvekey_co.push_back_slow(co_object);
CData->curvekey_time.push_back_slow(curve_length);
- pcKey = cKey;
+ prev_co_object = co_object;
+ prev_co_world = co_world;
keynum++;
}
keyno += keynum;
@@ -201,14 +151,14 @@ static bool ObtainCacheParticleData(
return true;
}
-static bool ObtainCacheParticleUV(Mesh *mesh,
+static bool ObtainCacheParticleUV(Hair *hair,
BL::Mesh *b_mesh,
BL::Object *b_ob,
ParticleCurveData *CData,
bool background,
int uv_num)
{
- if (!(mesh && b_mesh && b_ob && CData))
+ if (!(hair && b_mesh && b_ob && CData))
return false;
CData->curve_uv.clear();
@@ -264,14 +214,14 @@ static bool ObtainCacheParticleUV(Mesh *mesh,
return true;
}
-static bool ObtainCacheParticleVcol(Mesh *mesh,
+static bool ObtainCacheParticleVcol(Hair *hair,
BL::Mesh *b_mesh,
BL::Object *b_ob,
ParticleCurveData *CData,
bool background,
int vcol_num)
{
- if (!(mesh && b_mesh && b_ob && CData))
+ if (!(hair && b_mesh && b_ob && CData))
return false;
CData->curve_vcol.clear();
@@ -312,7 +262,7 @@ static bool ObtainCacheParticleVcol(Mesh *mesh,
BL::Mesh::vertex_colors_iterator l;
b_mesh->vertex_colors.begin(l);
- float3 vcol = make_float3(0.0f, 0.0f, 0.0f);
+ float4 vcol = make_float4(0.0f, 0.0f, 0.0f, 1.0f);
if (b_mesh->vertex_colors.length())
b_psys.mcol_on_emitter(psmd, *b_pa, pa_no, vcol_num, &vcol.x);
CData->curve_vcol.push_back_slow(vcol);
@@ -327,287 +277,21 @@ static bool ObtainCacheParticleVcol(Mesh *mesh,
return true;
}
-static void ExportCurveTrianglePlanes(Mesh *mesh,
- ParticleCurveData *CData,
- float3 RotCam,
- bool is_ortho)
-{
- int vertexno = mesh->verts.size();
- int vertexindex = vertexno;
- int numverts = 0, numtris = 0;
-
- /* compute and reserve size of arrays */
- for (int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
- for (int curve = CData->psys_firstcurve[sys];
- curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys];
- curve++) {
- numverts += 2 + (CData->curve_keynum[curve] - 1) * 2;
- numtris += (CData->curve_keynum[curve] - 1) * 2;
- }
- }
-
- mesh->reserve_mesh(mesh->verts.size() + numverts, mesh->num_triangles() + numtris);
-
- /* actually export */
- for (int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
- for (int curve = CData->psys_firstcurve[sys];
- curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys];
- curve++) {
- float3 xbasis;
- float3 v1;
- float time = 0.0f;
- float3 ickey_loc = CData->curvekey_co[CData->curve_firstkey[curve]];
- float radius = shaperadius(
- CData->psys_shape[sys], CData->psys_rootradius[sys], CData->psys_tipradius[sys], 0.0f);
- v1 = CData->curvekey_co[CData->curve_firstkey[curve] + 1] -
- CData->curvekey_co[CData->curve_firstkey[curve]];
- if (is_ortho)
- xbasis = normalize(cross(RotCam, v1));
- else
- xbasis = normalize(cross(RotCam - ickey_loc, v1));
- float3 ickey_loc_shfl = ickey_loc - radius * xbasis;
- float3 ickey_loc_shfr = ickey_loc + radius * xbasis;
- mesh->add_vertex(ickey_loc_shfl);
- mesh->add_vertex(ickey_loc_shfr);
- vertexindex += 2;
-
- for (int curvekey = CData->curve_firstkey[curve] + 1;
- curvekey < CData->curve_firstkey[curve] + CData->curve_keynum[curve];
- curvekey++) {
- ickey_loc = CData->curvekey_co[curvekey];
-
- if (curvekey == CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1)
- v1 = CData->curvekey_co[curvekey] -
- CData->curvekey_co[max(curvekey - 1, CData->curve_firstkey[curve])];
- else
- v1 = CData->curvekey_co[curvekey + 1] - CData->curvekey_co[curvekey - 1];
-
- time = CData->curvekey_time[curvekey] / CData->curve_length[curve];
- radius = shaperadius(
- CData->psys_shape[sys], CData->psys_rootradius[sys], CData->psys_tipradius[sys], time);
-
- if (curvekey == CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1)
- radius = shaperadius(CData->psys_shape[sys],
- CData->psys_rootradius[sys],
- CData->psys_tipradius[sys],
- 0.95f);
-
- if (CData->psys_closetip[sys] &&
- (curvekey == CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1))
- radius = shaperadius(CData->psys_shape[sys], CData->psys_rootradius[sys], 0.0f, 0.95f);
-
- if (is_ortho)
- xbasis = normalize(cross(RotCam, v1));
- else
- xbasis = normalize(cross(RotCam - ickey_loc, v1));
- float3 ickey_loc_shfl = ickey_loc - radius * xbasis;
- float3 ickey_loc_shfr = ickey_loc + radius * xbasis;
- mesh->add_vertex(ickey_loc_shfl);
- mesh->add_vertex(ickey_loc_shfr);
- mesh->add_triangle(
- vertexindex - 2, vertexindex, vertexindex - 1, CData->psys_shader[sys], true);
- mesh->add_triangle(
- vertexindex + 1, vertexindex - 1, vertexindex, CData->psys_shader[sys], true);
- vertexindex += 2;
- }
- }
- }
-
- mesh->resize_mesh(mesh->verts.size(), mesh->num_triangles());
- mesh->attributes.remove(ATTR_STD_VERTEX_NORMAL);
- mesh->attributes.remove(ATTR_STD_FACE_NORMAL);
- mesh->add_face_normals();
- mesh->add_vertex_normals();
- mesh->attributes.remove(ATTR_STD_FACE_NORMAL);
-
- /* texture coords still needed */
-}
-
-static void ExportCurveTriangleGeometry(Mesh *mesh, ParticleCurveData *CData, int resolution)
-{
- int vertexno = mesh->verts.size();
- int vertexindex = vertexno;
- int numverts = 0, numtris = 0;
-
- /* compute and reserve size of arrays */
- for (int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
- for (int curve = CData->psys_firstcurve[sys];
- curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys];
- curve++) {
- numverts += (CData->curve_keynum[curve] - 1) * resolution + resolution;
- numtris += (CData->curve_keynum[curve] - 1) * 2 * resolution;
- }
- }
-
- mesh->reserve_mesh(mesh->verts.size() + numverts, mesh->num_triangles() + numtris);
-
- /* actually export */
- for (int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
- for (int curve = CData->psys_firstcurve[sys];
- curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys];
- curve++) {
- float3 firstxbasis = cross(make_float3(1.0f, 0.0f, 0.0f),
- CData->curvekey_co[CData->curve_firstkey[curve] + 1] -
- CData->curvekey_co[CData->curve_firstkey[curve]]);
- if (!is_zero(firstxbasis))
- firstxbasis = normalize(firstxbasis);
- else
- firstxbasis = normalize(cross(make_float3(0.0f, 1.0f, 0.0f),
- CData->curvekey_co[CData->curve_firstkey[curve] + 1] -
- CData->curvekey_co[CData->curve_firstkey[curve]]));
-
- for (int curvekey = CData->curve_firstkey[curve];
- curvekey < CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1;
- curvekey++) {
- float3 xbasis = firstxbasis;
- float3 v1;
- float3 v2;
-
- if (curvekey == CData->curve_firstkey[curve]) {
- v1 = CData->curvekey_co[min(
- curvekey + 2, CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1)] -
- CData->curvekey_co[curvekey + 1];
- v2 = CData->curvekey_co[curvekey + 1] - CData->curvekey_co[curvekey];
- }
- else if (curvekey == CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1) {
- v1 = CData->curvekey_co[curvekey] - CData->curvekey_co[curvekey - 1];
- v2 = CData->curvekey_co[curvekey - 1] -
- CData->curvekey_co[max(curvekey - 2, CData->curve_firstkey[curve])];
- }
- else {
- v1 = CData->curvekey_co[curvekey + 1] - CData->curvekey_co[curvekey];
- v2 = CData->curvekey_co[curvekey] - CData->curvekey_co[curvekey - 1];
- }
-
- xbasis = cross(v1, v2);
-
- if (len_squared(xbasis) >= 0.05f * len_squared(v1) * len_squared(v2)) {
- firstxbasis = normalize(xbasis);
- break;
- }
- }
-
- for (int curvekey = CData->curve_firstkey[curve];
- curvekey < CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1;
- curvekey++) {
- int subv = 1;
- float3 xbasis;
- float3 ybasis;
- float3 v1;
- float3 v2;
-
- if (curvekey == CData->curve_firstkey[curve]) {
- subv = 0;
- v1 = CData->curvekey_co[min(
- curvekey + 2, CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1)] -
- CData->curvekey_co[curvekey + 1];
- v2 = CData->curvekey_co[curvekey + 1] - CData->curvekey_co[curvekey];
- }
- else if (curvekey == CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1) {
- v1 = CData->curvekey_co[curvekey] - CData->curvekey_co[curvekey - 1];
- v2 = CData->curvekey_co[curvekey - 1] -
- CData->curvekey_co[max(curvekey - 2, CData->curve_firstkey[curve])];
- }
- else {
- v1 = CData->curvekey_co[curvekey + 1] - CData->curvekey_co[curvekey];
- v2 = CData->curvekey_co[curvekey] - CData->curvekey_co[curvekey - 1];
- }
-
- xbasis = cross(v1, v2);
-
- if (len_squared(xbasis) >= 0.05f * len_squared(v1) * len_squared(v2)) {
- xbasis = normalize(xbasis);
- firstxbasis = xbasis;
- }
- else
- xbasis = firstxbasis;
-
- ybasis = normalize(cross(xbasis, v2));
-
- for (; subv <= 1; subv++) {
- float3 ickey_loc = make_float3(0.0f, 0.0f, 0.0f);
- float time = 0.0f;
-
- InterpolateKeySegments(subv, 1, curvekey, curve, &ickey_loc, &time, CData);
-
- float radius = shaperadius(CData->psys_shape[sys],
- CData->psys_rootradius[sys],
- CData->psys_tipradius[sys],
- time);
-
- if ((curvekey == CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 2) &&
- (subv == 1))
- radius = shaperadius(CData->psys_shape[sys],
- CData->psys_rootradius[sys],
- CData->psys_tipradius[sys],
- 0.95f);
-
- if (CData->psys_closetip[sys] && (subv == 1) &&
- (curvekey == CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 2))
- radius = shaperadius(CData->psys_shape[sys], CData->psys_rootradius[sys], 0.0f, 0.95f);
-
- float angle = M_2PI_F / (float)resolution;
- for (int section = 0; section < resolution; section++) {
- float3 ickey_loc_shf = ickey_loc + radius * (cosf(angle * section) * xbasis +
- sinf(angle * section) * ybasis);
- mesh->add_vertex(ickey_loc_shf);
- }
-
- if (subv != 0) {
- for (int section = 0; section < resolution - 1; section++) {
- mesh->add_triangle(vertexindex - resolution + section,
- vertexindex + section,
- vertexindex - resolution + section + 1,
- CData->psys_shader[sys],
- true);
- mesh->add_triangle(vertexindex + section + 1,
- vertexindex - resolution + section + 1,
- vertexindex + section,
- CData->psys_shader[sys],
- true);
- }
- mesh->add_triangle(vertexindex - 1,
- vertexindex + resolution - 1,
- vertexindex - resolution,
- CData->psys_shader[sys],
- true);
- mesh->add_triangle(vertexindex,
- vertexindex - resolution,
- vertexindex + resolution - 1,
- CData->psys_shader[sys],
- true);
- }
- vertexindex += resolution;
- }
- }
- }
- }
-
- mesh->resize_mesh(mesh->verts.size(), mesh->num_triangles());
- mesh->attributes.remove(ATTR_STD_VERTEX_NORMAL);
- mesh->attributes.remove(ATTR_STD_FACE_NORMAL);
- mesh->add_face_normals();
- mesh->add_vertex_normals();
- mesh->attributes.remove(ATTR_STD_FACE_NORMAL);
-
- /* texture coords still needed */
-}
-
-static void ExportCurveSegments(Scene *scene, Mesh *mesh, ParticleCurveData *CData)
+static void ExportCurveSegments(Scene *scene, Hair *hair, ParticleCurveData *CData)
{
int num_keys = 0;
int num_curves = 0;
- if (mesh->num_curves())
+ if (hair->num_curves())
return;
Attribute *attr_intercept = NULL;
Attribute *attr_random = NULL;
- if (mesh->need_attribute(scene, ATTR_STD_CURVE_INTERCEPT))
- attr_intercept = mesh->curve_attributes.add(ATTR_STD_CURVE_INTERCEPT);
- if (mesh->need_attribute(scene, ATTR_STD_CURVE_RANDOM))
- attr_random = mesh->curve_attributes.add(ATTR_STD_CURVE_RANDOM);
+ if (hair->need_attribute(scene, ATTR_STD_CURVE_INTERCEPT))
+ attr_intercept = hair->attributes.add(ATTR_STD_CURVE_INTERCEPT);
+ if (hair->need_attribute(scene, ATTR_STD_CURVE_RANDOM))
+ attr_random = hair->attributes.add(ATTR_STD_CURVE_RANDOM);
/* compute and reserve size of arrays */
for (int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
@@ -620,10 +304,10 @@ static void ExportCurveSegments(Scene *scene, Mesh *mesh, ParticleCurveData *CDa
}
if (num_curves > 0) {
- VLOG(1) << "Exporting curve segments for mesh " << mesh->name;
+ VLOG(1) << "Exporting curve segments for mesh " << hair->name;
}
- mesh->reserve_curves(mesh->num_curves() + num_curves, mesh->curve_keys.size() + num_keys);
+ hair->reserve_curves(hair->num_curves() + num_curves, hair->curve_keys.size() + num_keys);
num_keys = 0;
num_curves = 0;
@@ -648,7 +332,7 @@ static void ExportCurveSegments(Scene *scene, Mesh *mesh, ParticleCurveData *CDa
(curvekey == CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1)) {
radius = 0.0f;
}
- mesh->add_curve_key(ickey_loc, radius);
+ hair->add_curve_key(ickey_loc, radius);
if (attr_intercept)
attr_intercept->add(time);
@@ -656,19 +340,19 @@ static void ExportCurveSegments(Scene *scene, Mesh *mesh, ParticleCurveData *CDa
}
if (attr_random != NULL) {
- attr_random->add(hash_int_01(num_curves));
+ attr_random->add(hash_uint2_to_float(num_curves, 0));
}
- mesh->add_curve(num_keys, CData->psys_shader[sys]);
+ hair->add_curve(num_keys, CData->psys_shader[sys]);
num_keys += num_curve_keys;
num_curves++;
}
}
/* check allocation */
- if ((mesh->curve_keys.size() != num_keys) || (mesh->num_curves() != num_curves)) {
+ if ((hair->curve_keys.size() != num_keys) || (hair->num_curves() != num_curves)) {
VLOG(1) << "Allocation failed, clearing data";
- mesh->clear();
+ hair->clear();
}
}
@@ -712,24 +396,58 @@ static float4 LerpCurveSegmentMotionCV(ParticleCurveData *CData, int sys, int cu
return lerp(mP, mP2, remainder);
}
-static void ExportCurveSegmentsMotion(Mesh *mesh, ParticleCurveData *CData, int motion_step)
+static void export_hair_motion_validate_attribute(Hair *hair,
+ int motion_step,
+ int num_motion_keys,
+ bool have_motion)
+{
+ Attribute *attr_mP = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+ const int num_keys = hair->curve_keys.size();
+
+ if (num_motion_keys != num_keys || !have_motion) {
+ /* No motion or hair "topology" changed, remove attributes again. */
+ if (num_motion_keys != num_keys) {
+ VLOG(1) << "Hair topology changed, removing attribute.";
+ }
+ else {
+ VLOG(1) << "No motion, removing attribute.";
+ }
+ hair->attributes.remove(ATTR_STD_MOTION_VERTEX_POSITION);
+ }
+ else if (motion_step > 0) {
+ VLOG(1) << "Filling in new motion vertex position for motion_step " << motion_step;
+
+ /* Motion, fill up previous steps that we might have skipped because
+ * they had no motion, but we need them anyway now. */
+ for (int step = 0; step < motion_step; step++) {
+ float4 *mP = attr_mP->data_float4() + step * num_keys;
+
+ for (int key = 0; key < num_keys; key++) {
+ mP[key] = float3_to_float4(hair->curve_keys[key]);
+ mP[key].w = hair->curve_radius[key];
+ }
+ }
+ }
+}
+
+static void ExportCurveSegmentsMotion(Hair *hair, ParticleCurveData *CData, int motion_step)
{
- VLOG(1) << "Exporting curve motion segments for mesh " << mesh->name << ", motion step "
+ VLOG(1) << "Exporting curve motion segments for hair " << hair->name << ", motion step "
<< motion_step;
/* find attribute */
- Attribute *attr_mP = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+ Attribute *attr_mP = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
bool new_attribute = false;
/* add new attribute if it doesn't exist already */
if (!attr_mP) {
VLOG(1) << "Creating new motion vertex position attribute";
- attr_mP = mesh->curve_attributes.add(ATTR_STD_MOTION_VERTEX_POSITION);
+ attr_mP = hair->attributes.add(ATTR_STD_MOTION_VERTEX_POSITION);
new_attribute = true;
}
/* export motion vectors for curve keys */
- size_t numkeys = mesh->curve_keys.size();
+ size_t numkeys = hair->curve_keys.size();
float4 *mP = attr_mP->data_float4() + motion_step * numkeys;
bool have_motion = false;
int i = 0;
@@ -740,24 +458,24 @@ static void ExportCurveSegmentsMotion(Mesh *mesh, ParticleCurveData *CData, int
curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys];
curve++) {
/* Curve lengths may not match! Curves can be clipped. */
- int curve_key_end = (num_curves + 1 < (int)mesh->curve_first_key.size() ?
- mesh->curve_first_key[num_curves + 1] :
- (int)mesh->curve_keys.size());
- const int num_center_curve_keys = curve_key_end - mesh->curve_first_key[num_curves];
+ int curve_key_end = (num_curves + 1 < (int)hair->curve_first_key.size() ?
+ hair->curve_first_key[num_curves + 1] :
+ (int)hair->curve_keys.size());
+ const int num_center_curve_keys = curve_key_end - hair->curve_first_key[num_curves];
const int is_num_keys_different = CData->curve_keynum[curve] - num_center_curve_keys;
if (!is_num_keys_different) {
for (int curvekey = CData->curve_firstkey[curve];
curvekey < CData->curve_firstkey[curve] + CData->curve_keynum[curve];
curvekey++) {
- if (i < mesh->curve_keys.size()) {
+ if (i < hair->curve_keys.size()) {
mP[i] = CurveSegmentMotionCV(CData, sys, curve, curvekey);
if (!have_motion) {
/* unlike mesh coordinates, these tend to be slightly different
* between frames due to particle transforms into/out of object
* space, so we use an epsilon to detect actual changes */
- float4 curve_key = float3_to_float4(mesh->curve_keys[i]);
- curve_key.w = mesh->curve_radius[i];
+ float4 curve_key = float3_to_float4(hair->curve_keys[i]);
+ curve_key.w = hair->curve_radius[i];
if (len_squared(mP[i] - curve_key) > 1e-5f * 1e-5f)
have_motion = true;
}
@@ -766,7 +484,7 @@ static void ExportCurveSegmentsMotion(Mesh *mesh, ParticleCurveData *CData, int
}
}
else {
- /* Number of keys has changed. Genereate an interpolated version
+ /* Number of keys has changed. Generate an interpolated version
* to preserve motion blur. */
const float step_size = num_center_curve_keys > 1 ? 1.0f / (num_center_curve_keys - 1) :
0.0f;
@@ -781,276 +499,69 @@ static void ExportCurveSegmentsMotion(Mesh *mesh, ParticleCurveData *CData, int
}
}
- /* in case of new attribute, we verify if there really was any motion */
+ /* In case of new attribute, we verify if there really was any motion. */
if (new_attribute) {
- if (i != numkeys || !have_motion) {
- /* No motion or hair "topology" changed, remove attributes again. */
- if (i != numkeys) {
- VLOG(1) << "Hair topology changed, removing attribute.";
- }
- else {
- VLOG(1) << "No motion, removing attribute.";
- }
- mesh->curve_attributes.remove(ATTR_STD_MOTION_VERTEX_POSITION);
- }
- else if (motion_step > 0) {
- VLOG(1) << "Filling in new motion vertex position for motion_step " << motion_step;
- /* motion, fill up previous steps that we might have skipped because
- * they had no motion, but we need them anyway now */
- for (int step = 0; step < motion_step; step++) {
- float4 *mP = attr_mP->data_float4() + step * numkeys;
-
- for (int key = 0; key < numkeys; key++) {
- mP[key] = float3_to_float4(mesh->curve_keys[key]);
- mP[key].w = mesh->curve_radius[key];
- }
- }
- }
- }
-}
-
-static void ExportCurveTriangleUV(ParticleCurveData *CData,
- int vert_offset,
- int resol,
- float2 *uvdata)
-{
- if (uvdata == NULL)
- return;
- int vertexindex = vert_offset;
-
- for (int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
- for (int curve = CData->psys_firstcurve[sys];
- curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys];
- curve++) {
- for (int curvekey = CData->curve_firstkey[curve];
- curvekey < CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1;
- curvekey++) {
- for (int section = 0; section < resol; section++) {
- uvdata[vertexindex] = CData->curve_uv[curve];
- vertexindex++;
- uvdata[vertexindex] = CData->curve_uv[curve];
- vertexindex++;
- uvdata[vertexindex] = CData->curve_uv[curve];
- vertexindex++;
- uvdata[vertexindex] = CData->curve_uv[curve];
- vertexindex++;
- uvdata[vertexindex] = CData->curve_uv[curve];
- vertexindex++;
- uvdata[vertexindex] = CData->curve_uv[curve];
- vertexindex++;
- }
- }
- }
- }
-}
-
-static void ExportCurveTriangleVcol(ParticleCurveData *CData,
- int vert_offset,
- int resol,
- uchar4 *cdata)
-{
- if (cdata == NULL)
- return;
-
- int vertexindex = vert_offset;
-
- for (int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
- for (int curve = CData->psys_firstcurve[sys];
- curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys];
- curve++) {
- for (int curvekey = CData->curve_firstkey[curve];
- curvekey < CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1;
- curvekey++) {
- for (int section = 0; section < resol; section++) {
- /* Encode vertex color using the sRGB curve. */
- cdata[vertexindex] = color_float_to_byte(
- color_srgb_to_linear_v3(CData->curve_vcol[curve]));
- vertexindex++;
- cdata[vertexindex] = color_float_to_byte(
- color_srgb_to_linear_v3(CData->curve_vcol[curve]));
- vertexindex++;
- cdata[vertexindex] = color_float_to_byte(
- color_srgb_to_linear_v3(CData->curve_vcol[curve]));
- vertexindex++;
- cdata[vertexindex] = color_float_to_byte(
- color_srgb_to_linear_v3(CData->curve_vcol[curve]));
- vertexindex++;
- cdata[vertexindex] = color_float_to_byte(
- color_srgb_to_linear_v3(CData->curve_vcol[curve]));
- vertexindex++;
- cdata[vertexindex] = color_float_to_byte(
- color_srgb_to_linear_v3(CData->curve_vcol[curve]));
- vertexindex++;
- }
- }
- }
+ export_hair_motion_validate_attribute(hair, motion_step, i, have_motion);
}
}
/* Hair Curve Sync */
-void BlenderSync::sync_curve_settings()
+bool BlenderSync::object_has_particle_hair(BL::Object b_ob)
{
- PointerRNA csscene = RNA_pointer_get(&b_scene.ptr, "cycles_curves");
-
- CurveSystemManager *curve_system_manager = scene->curve_system_manager;
- CurveSystemManager prev_curve_system_manager = *curve_system_manager;
-
- curve_system_manager->use_curves = get_boolean(csscene, "use_curves");
- curve_system_manager->minimum_width = get_float(csscene, "minimum_width");
- curve_system_manager->maximum_width = get_float(csscene, "maximum_width");
-
- curve_system_manager->primitive = (CurvePrimitiveType)get_enum(
- csscene, "primitive", CURVE_NUM_PRIMITIVE_TYPES, CURVE_LINE_SEGMENTS);
- curve_system_manager->curve_shape = (CurveShapeType)get_enum(
- csscene, "shape", CURVE_NUM_SHAPE_TYPES, CURVE_THICK);
- curve_system_manager->resolution = get_int(csscene, "resolution");
- curve_system_manager->subdivisions = get_int(csscene, "subdivisions");
- curve_system_manager->use_backfacing = !get_boolean(csscene, "cull_backfacing");
-
- /* Triangles */
- if (curve_system_manager->primitive == CURVE_TRIANGLES) {
- /* camera facing planes */
- if (curve_system_manager->curve_shape == CURVE_RIBBON) {
- curve_system_manager->triangle_method = CURVE_CAMERA_TRIANGLES;
- curve_system_manager->resolution = 1;
- }
- else if (curve_system_manager->curve_shape == CURVE_THICK) {
- curve_system_manager->triangle_method = CURVE_TESSELATED_TRIANGLES;
- }
- }
- /* Line Segments */
- else if (curve_system_manager->primitive == CURVE_LINE_SEGMENTS) {
- if (curve_system_manager->curve_shape == CURVE_RIBBON) {
- /* tangent shading */
- curve_system_manager->line_method = CURVE_UNCORRECTED;
- curve_system_manager->use_encasing = true;
- curve_system_manager->use_backfacing = false;
- curve_system_manager->use_tangent_normal_geometry = true;
- }
- else if (curve_system_manager->curve_shape == CURVE_THICK) {
- curve_system_manager->line_method = CURVE_ACCURATE;
- curve_system_manager->use_encasing = false;
- curve_system_manager->use_tangent_normal_geometry = false;
- }
- }
- /* Curve Segments */
- else if (curve_system_manager->primitive == CURVE_SEGMENTS) {
- if (curve_system_manager->curve_shape == CURVE_RIBBON) {
- curve_system_manager->primitive = CURVE_RIBBONS;
- curve_system_manager->use_backfacing = false;
- }
- }
+ /* Test if the object has a particle modifier with hair. */
+ BL::Object::modifiers_iterator b_mod;
+ for (b_ob.modifiers.begin(b_mod); b_mod != b_ob.modifiers.end(); ++b_mod) {
+ if ((b_mod->type() == b_mod->type_PARTICLE_SYSTEM) &&
+ (preview ? b_mod->show_viewport() : b_mod->show_render())) {
+ BL::ParticleSystemModifier psmd((const PointerRNA)b_mod->ptr);
+ BL::ParticleSystem b_psys((const PointerRNA)psmd.particle_system().ptr);
+ BL::ParticleSettings b_part((const PointerRNA)b_psys.settings().ptr);
- if (curve_system_manager->modified_mesh(prev_curve_system_manager)) {
- BL::BlendData::objects_iterator b_ob;
-
- for (b_data.objects.begin(b_ob); b_ob != b_data.objects.end(); ++b_ob) {
- if (object_is_mesh(*b_ob)) {
- BL::Object::particle_systems_iterator b_psys;
- for (b_ob->particle_systems.begin(b_psys); b_psys != b_ob->particle_systems.end();
- ++b_psys) {
- if ((b_psys->settings().render_type() == BL::ParticleSettings::render_type_PATH) &&
- (b_psys->settings().type() == BL::ParticleSettings::type_HAIR)) {
- BL::ID key = BKE_object_is_modified(*b_ob) ? *b_ob : b_ob->data();
- mesh_map.set_recalc(key);
- object_map.set_recalc(*b_ob);
- }
- }
+ if ((b_part.render_type() == BL::ParticleSettings::render_type_PATH) &&
+ (b_part.type() == BL::ParticleSettings::type_HAIR)) {
+ return true;
}
}
}
- if (curve_system_manager->modified(prev_curve_system_manager))
- curve_system_manager->tag_update(scene);
+ return false;
}
-void BlenderSync::sync_curves(
- Mesh *mesh, BL::Mesh &b_mesh, BL::Object &b_ob, bool motion, int motion_step)
+/* Old particle hair. */
+void BlenderSync::sync_particle_hair(
+ Hair *hair, BL::Mesh &b_mesh, BL::Object &b_ob, bool motion, int motion_step)
{
- if (!motion) {
- /* Clear stored curve data */
- mesh->curve_keys.clear();
- mesh->curve_radius.clear();
- mesh->curve_first_key.clear();
- mesh->curve_shader.clear();
- mesh->curve_attributes.clear();
- }
-
/* obtain general settings */
- const bool use_curves = scene->curve_system_manager->use_curves;
-
- if (!(use_curves && b_ob.mode() != b_ob.mode_PARTICLE_EDIT)) {
- if (!motion)
- mesh->compute_bounds();
+ if (b_ob.mode() == b_ob.mode_PARTICLE_EDIT || b_ob.mode() == b_ob.mode_EDIT) {
return;
}
- const int primitive = scene->curve_system_manager->primitive;
- const int triangle_method = scene->curve_system_manager->triangle_method;
- const int resolution = scene->curve_system_manager->resolution;
- const size_t vert_num = mesh->verts.size();
- const size_t tri_num = mesh->num_triangles();
- int used_res = 1;
-
/* extract particle hair data - should be combined with connecting to mesh later*/
ParticleCurveData CData;
- ObtainCacheParticleData(mesh, &b_mesh, &b_ob, &CData, !preview);
-
- /* add hair geometry to mesh */
- if (primitive == CURVE_TRIANGLES) {
- if (triangle_method == CURVE_CAMERA_TRIANGLES) {
- /* obtain camera parameters */
- float3 RotCam;
- Camera *camera = scene->camera;
- Transform &ctfm = camera->matrix;
- if (camera->type == CAMERA_ORTHOGRAPHIC) {
- RotCam = -make_float3(ctfm.x.z, ctfm.y.z, ctfm.z.z);
- }
- else {
- Transform tfm = get_transform(b_ob.matrix_world());
- Transform itfm = transform_quick_inverse(tfm);
- RotCam = transform_point(&itfm, make_float3(ctfm.x.w, ctfm.y.w, ctfm.z.w));
- }
- bool is_ortho = camera->type == CAMERA_ORTHOGRAPHIC;
- ExportCurveTrianglePlanes(mesh, &CData, RotCam, is_ortho);
- }
- else {
- ExportCurveTriangleGeometry(mesh, &CData, resolution);
- used_res = resolution;
- }
- }
- else {
- if (motion)
- ExportCurveSegmentsMotion(mesh, &CData, motion_step);
- else
- ExportCurveSegments(scene, mesh, &CData);
- }
+ ObtainCacheParticleData(hair, &b_mesh, &b_ob, &CData, !preview);
+
+ /* add hair geometry */
+ if (motion)
+ ExportCurveSegmentsMotion(hair, &CData, motion_step);
+ else
+ ExportCurveSegments(scene, hair, &CData);
/* generated coordinates from first key. we should ideally get this from
* blender to handle deforming objects */
if (!motion) {
- if (mesh->need_attribute(scene, ATTR_STD_GENERATED)) {
+ if (hair->need_attribute(scene, ATTR_STD_GENERATED)) {
float3 loc, size;
mesh_texture_space(b_mesh, loc, size);
- if (primitive == CURVE_TRIANGLES) {
- Attribute *attr_generated = mesh->attributes.add(ATTR_STD_GENERATED);
- float3 *generated = attr_generated->data_float3();
+ Attribute *attr_generated = hair->attributes.add(ATTR_STD_GENERATED);
+ float3 *generated = attr_generated->data_float3();
- for (size_t i = vert_num; i < mesh->verts.size(); i++)
- generated[i] = mesh->verts[i] * size - loc;
- }
- else {
- Attribute *attr_generated = mesh->curve_attributes.add(ATTR_STD_GENERATED);
- float3 *generated = attr_generated->data_float3();
-
- for (size_t i = 0; i < mesh->num_curves(); i++) {
- float3 co = mesh->curve_keys[mesh->get_curve(i).first_key];
- generated[i] = co * size - loc;
- }
+ for (size_t i = 0; i < hair->num_curves(); i++) {
+ float3 co = hair->curve_keys[hair->get_curve(i).first_key];
+ generated[i] = co * size - loc;
}
}
}
@@ -1061,32 +572,22 @@ void BlenderSync::sync_curves(
int vcol_num = 0;
for (b_mesh.vertex_colors.begin(l); l != b_mesh.vertex_colors.end(); ++l, vcol_num++) {
- if (!mesh->need_attribute(scene, ustring(l->name().c_str())))
+ if (!hair->need_attribute(scene, ustring(l->name().c_str())))
continue;
- ObtainCacheParticleVcol(mesh, &b_mesh, &b_ob, &CData, !preview, vcol_num);
+ ObtainCacheParticleVcol(hair, &b_mesh, &b_ob, &CData, !preview, vcol_num);
- if (primitive == CURVE_TRIANGLES) {
- Attribute *attr_vcol = mesh->attributes.add(
- ustring(l->name().c_str()), TypeDesc::TypeColor, ATTR_ELEMENT_CORNER_BYTE);
+ Attribute *attr_vcol = hair->attributes.add(
+ ustring(l->name().c_str()), TypeRGBA, ATTR_ELEMENT_CURVE);
- uchar4 *cdata = attr_vcol->data_uchar4();
+ float4 *fdata = attr_vcol->data_float4();
- ExportCurveTriangleVcol(&CData, tri_num * 3, used_res, cdata);
- }
- else {
- Attribute *attr_vcol = mesh->curve_attributes.add(
- ustring(l->name().c_str()), TypeDesc::TypeColor, ATTR_ELEMENT_CURVE);
-
- float3 *fdata = attr_vcol->data_float3();
-
- if (fdata) {
- size_t i = 0;
+ if (fdata) {
+ size_t i = 0;
- /* Encode vertex color using the sRGB curve. */
- for (size_t curve = 0; curve < CData.curve_vcol.size(); curve++) {
- fdata[i++] = color_srgb_to_linear_v3(CData.curve_vcol[curve]);
- }
+ /* Encode vertex color using the sRGB curve. */
+ for (size_t curve = 0; curve < CData.curve_vcol.size(); curve++) {
+ fdata[i++] = color_srgb_to_linear_v4(CData.curve_vcol[curve]);
}
}
}
@@ -1103,42 +604,279 @@ void BlenderSync::sync_curves(
ustring name = ustring(l->name().c_str());
/* UV map */
- if (mesh->need_attribute(scene, name) || mesh->need_attribute(scene, std)) {
+ if (hair->need_attribute(scene, name) || hair->need_attribute(scene, std)) {
Attribute *attr_uv;
- ObtainCacheParticleUV(mesh, &b_mesh, &b_ob, &CData, !preview, uv_num);
+ ObtainCacheParticleUV(hair, &b_mesh, &b_ob, &CData, !preview, uv_num);
- if (primitive == CURVE_TRIANGLES) {
- if (active_render)
- attr_uv = mesh->attributes.add(std, name);
- else
- attr_uv = mesh->attributes.add(name, TypeFloat2, ATTR_ELEMENT_CORNER);
+ if (active_render)
+ attr_uv = hair->attributes.add(std, name);
+ else
+ attr_uv = hair->attributes.add(name, TypeFloat2, ATTR_ELEMENT_CURVE);
- float2 *uv = attr_uv->data_float2();
+ float2 *uv = attr_uv->data_float2();
- ExportCurveTriangleUV(&CData, tri_num * 3, used_res, uv);
+ if (uv) {
+ size_t i = 0;
+
+ for (size_t curve = 0; curve < CData.curve_uv.size(); curve++) {
+ uv[i++] = CData.curve_uv[curve];
+ }
}
- else {
- if (active_render)
- attr_uv = mesh->curve_attributes.add(std, name);
- else
- attr_uv = mesh->curve_attributes.add(name, TypeFloat2, ATTR_ELEMENT_CURVE);
+ }
+ }
+ }
+}
- float2 *uv = attr_uv->data_float2();
+static float4 hair_point_as_float4(BL::HairPoint b_point)
+{
+ float4 mP = float3_to_float4(get_float3(b_point.co()));
+ mP.w = b_point.radius();
+ return mP;
+}
- if (uv) {
- size_t i = 0;
+static float4 interpolate_hair_points(BL::Hair b_hair,
+ const int first_point_index,
+ const int num_points,
+ const float step)
+{
+ const float curve_t = step * (num_points - 1);
+ const int point_a = clamp((int)curve_t, 0, num_points - 1);
+ const int point_b = min(point_a + 1, num_points - 1);
+ const float t = curve_t - (float)point_a;
+ return lerp(hair_point_as_float4(b_hair.points[first_point_index + point_a]),
+ hair_point_as_float4(b_hair.points[first_point_index + point_b]),
+ t);
+}
- for (size_t curve = 0; curve < CData.curve_uv.size(); curve++) {
- uv[i++] = CData.curve_uv[curve];
- }
+static void export_hair_curves(Scene *scene, Hair *hair, BL::Hair b_hair)
+{
+ /* TODO: optimize so we can straight memcpy arrays from Blender? */
+
+ /* Add requested attributes. */
+ Attribute *attr_intercept = NULL;
+ Attribute *attr_random = NULL;
+
+ if (hair->need_attribute(scene, ATTR_STD_CURVE_INTERCEPT)) {
+ attr_intercept = hair->attributes.add(ATTR_STD_CURVE_INTERCEPT);
+ }
+ if (hair->need_attribute(scene, ATTR_STD_CURVE_RANDOM)) {
+ attr_random = hair->attributes.add(ATTR_STD_CURVE_RANDOM);
+ }
+
+ /* Reserve memory. */
+ const int num_keys = b_hair.points.length();
+ const int num_curves = b_hair.curves.length();
+
+ if (num_curves > 0) {
+ VLOG(1) << "Exporting curve segments for hair " << hair->name;
+ }
+
+ hair->reserve_curves(num_curves, num_keys);
+
+ /* Export curves and points. */
+ vector<float> points_length;
+
+ BL::Hair::curves_iterator b_curve_iter;
+ for (b_hair.curves.begin(b_curve_iter); b_curve_iter != b_hair.curves.end(); ++b_curve_iter) {
+ BL::HairCurve b_curve = *b_curve_iter;
+ const int first_point_index = b_curve.first_point_index();
+ const int num_points = b_curve.num_points();
+
+ float3 prev_co = make_float3(0.0f, 0.0f, 0.0f);
+ float length = 0.0f;
+ if (attr_intercept) {
+ points_length.clear();
+ points_length.reserve(num_points);
+ }
+
+ /* Position and radius. */
+ for (int i = 0; i < num_points; i++) {
+ BL::HairPoint b_point = b_hair.points[first_point_index + i];
+
+ const float3 co = get_float3(b_point.co());
+ const float radius = b_point.radius();
+ hair->add_curve_key(co, radius);
+
+ if (attr_intercept) {
+ if (i > 0) {
+ length += len(co - prev_co);
+ points_length.push_back(length);
+ }
+ prev_co = co;
+ }
+ }
+
+ /* Normalized 0..1 attribute along curve. */
+ if (attr_intercept) {
+ for (int i = 0; i < num_points; i++) {
+ attr_intercept->add((length == 0.0f) ? 0.0f : points_length[i] / length);
+ }
+ }
+
+ /* Random number per curve. */
+ if (attr_random != NULL) {
+ attr_random->add(hash_uint2_to_float(b_curve.index(), 0));
+ }
+
+ /* Curve. */
+ const int shader_index = 0;
+ hair->add_curve(first_point_index, shader_index);
+ }
+}
+
+static void export_hair_curves_motion(Hair *hair, BL::Hair b_hair, int motion_step)
+{
+ VLOG(1) << "Exporting curve motion segments for hair " << hair->name << ", motion step "
+ << motion_step;
+
+ /* Find or add attribute. */
+ Attribute *attr_mP = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+ bool new_attribute = false;
+
+ if (!attr_mP) {
+ VLOG(1) << "Creating new motion vertex position attribute";
+ attr_mP = hair->attributes.add(ATTR_STD_MOTION_VERTEX_POSITION);
+ new_attribute = true;
+ }
+
+ /* Export motion keys. */
+ const int num_keys = hair->curve_keys.size();
+ float4 *mP = attr_mP->data_float4() + motion_step * num_keys;
+ bool have_motion = false;
+ int num_motion_keys = 0;
+ int curve_index = 0;
+
+ BL::Hair::curves_iterator b_curve_iter;
+ for (b_hair.curves.begin(b_curve_iter); b_curve_iter != b_hair.curves.end(); ++b_curve_iter) {
+ BL::HairCurve b_curve = *b_curve_iter;
+ const int first_point_index = b_curve.first_point_index();
+ const int num_points = b_curve.num_points();
+
+ Hair::Curve curve = hair->get_curve(curve_index);
+ curve_index++;
+
+ if (num_points == curve.num_keys) {
+ /* Number of keys matches. */
+ for (int i = 0; i < num_points; i++) {
+ int point_index = first_point_index + i;
+
+ if (point_index < num_keys) {
+ mP[num_motion_keys] = hair_point_as_float4(b_hair.points[point_index]);
+ num_motion_keys++;
+
+ if (!have_motion) {
+ /* TODO: use epsilon for comparison? Was needed for particles due to
+ * transform, but ideally should not happen anymore. */
+ float4 curve_key = float3_to_float4(hair->curve_keys[i]);
+ curve_key.w = hair->curve_radius[i];
+ have_motion = !(mP[i] == curve_key);
}
}
}
}
+ else {
+ /* Number of keys has changed. Generate an interpolated version
+ * to preserve motion blur. */
+ const float step_size = curve.num_keys > 1 ? 1.0f / (curve.num_keys - 1) : 0.0f;
+ for (int i = 0; i < curve.num_keys; i++) {
+ const float step = i * step_size;
+ mP[num_motion_keys] = interpolate_hair_points(b_hair, first_point_index, num_points, step);
+ num_motion_keys++;
+ }
+ have_motion = true;
+ }
+ }
+
+ /* In case of new attribute, we verify if there really was any motion. */
+ if (new_attribute) {
+ export_hair_motion_validate_attribute(hair, motion_step, num_motion_keys, have_motion);
+ }
+}
+
+/* Hair object. */
+void BlenderSync::sync_hair(Hair *hair, BL::Object &b_ob, bool motion, int motion_step)
+{
+ /* Convert Blender hair to Cycles curves. */
+ BL::Hair b_hair(b_ob.data());
+ if (motion) {
+ export_hair_curves_motion(hair, b_hair, motion_step);
+ }
+ else {
+ export_hair_curves(scene, hair, b_hair);
+ }
+}
+
+void BlenderSync::sync_hair(BL::Depsgraph b_depsgraph,
+ BL::Object b_ob,
+ Hair *hair,
+ const vector<Shader *> &used_shaders)
+{
+ /* Compares curve_keys rather than strands in order to handle quick hair
+ * adjustments in dynamic BVH - other methods could probably do this better. */
+ array<float3> oldcurve_keys;
+ array<float> oldcurve_radius;
+ oldcurve_keys.steal_data(hair->curve_keys);
+ oldcurve_radius.steal_data(hair->curve_radius);
+
+ hair->clear();
+ hair->used_shaders = used_shaders;
+
+ if (view_layer.use_hair) {
+ if (b_ob.type() == BL::Object::type_HAIR) {
+ /* Hair object. */
+ sync_hair(hair, b_ob, false);
+ }
+ else {
+ /* Particle hair. */
+ bool need_undeformed = hair->need_attribute(scene, ATTR_STD_GENERATED);
+ BL::Mesh b_mesh = object_to_mesh(
+ b_data, b_ob, b_depsgraph, need_undeformed, Mesh::SUBDIVISION_NONE);
+
+ if (b_mesh) {
+ sync_particle_hair(hair, b_mesh, b_ob, false);
+ free_object_to_mesh(b_data, b_ob, b_mesh);
+ }
+ }
+ }
+
+ /* tag update */
+ const bool rebuild = ((oldcurve_keys != hair->curve_keys) ||
+ (oldcurve_radius != hair->curve_radius));
+
+ hair->tag_update(scene, rebuild);
+}
+
+void BlenderSync::sync_hair_motion(BL::Depsgraph b_depsgraph,
+ BL::Object b_ob,
+ Hair *hair,
+ int motion_step)
+{
+ /* Skip if nothing exported. */
+ if (hair->num_keys() == 0) {
+ return;
+ }
+
+ /* Export deformed coordinates. */
+ if (ccl::BKE_object_is_deform_modified(b_ob, b_scene, preview)) {
+ if (b_ob.type() == BL::Object::type_HAIR) {
+ /* Hair object. */
+ sync_hair(hair, b_ob, true, motion_step);
+ return;
+ }
+ else {
+ /* Particle hair. */
+ BL::Mesh b_mesh = object_to_mesh(b_data, b_ob, b_depsgraph, false, Mesh::SUBDIVISION_NONE);
+ if (b_mesh) {
+ sync_particle_hair(hair, b_mesh, b_ob, true, motion_step);
+ free_object_to_mesh(b_data, b_ob, b_mesh);
+ return;
+ }
+ }
}
- mesh->compute_bounds();
+ /* No deformation on this frame, copy coordinates if other frames did have it. */
+ hair->copy_center_to_motion_step(motion_step);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/blender_device.cpp b/intern/cycles/blender/blender_device.cpp
index 98fc0c6dec4..fb9ab9e8c97 100644
--- a/intern/cycles/blender/blender_device.cpp
+++ b/intern/cycles/blender/blender_device.cpp
@@ -17,8 +17,19 @@
#include "blender/blender_device.h"
#include "blender/blender_util.h"
+#include "util/util_foreach.h"
+
CCL_NAMESPACE_BEGIN
+enum ComputeDevice {
+ COMPUTE_DEVICE_CPU = 0,
+ COMPUTE_DEVICE_CUDA = 1,
+ COMPUTE_DEVICE_OPENCL = 2,
+ COMPUTE_DEVICE_OPTIX = 3,
+
+ COMPUTE_DEVICE_NUM
+};
+
int blender_device_threads(BL::Scene &b_scene)
{
BL::RenderSettings b_r = b_scene.render();
@@ -40,7 +51,7 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences, BL::Scene &b_scen
/* Find network device. */
vector<DeviceInfo> devices = Device::available_devices(DEVICE_MASK_NETWORK);
if (!devices.empty()) {
- device = devices.front();
+ return devices.front();
}
}
else if (get_enum(cscene, "device") == 1) {
@@ -57,13 +68,6 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences, BL::Scene &b_scen
}
/* Test if we are using GPU devices. */
- enum ComputeDevice {
- COMPUTE_DEVICE_CPU = 0,
- COMPUTE_DEVICE_CUDA = 1,
- COMPUTE_DEVICE_OPENCL = 2,
- COMPUTE_DEVICE_NUM = 3,
- };
-
ComputeDevice compute_device = (ComputeDevice)get_enum(
cpreferences, "compute_device_type", COMPUTE_DEVICE_NUM, COMPUTE_DEVICE_CPU);
@@ -73,6 +77,10 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences, BL::Scene &b_scen
if (compute_device == COMPUTE_DEVICE_CUDA) {
mask |= DEVICE_MASK_CUDA;
}
+ else if (compute_device == COMPUTE_DEVICE_OPTIX) {
+ /* Cannot use CPU and OptiX device at the same time right now, so replace mask. */
+ mask = DEVICE_MASK_OPTIX;
+ }
else if (compute_device == COMPUTE_DEVICE_OPENCL) {
mask |= DEVICE_MASK_OPENCL;
}
@@ -98,6 +106,10 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences, BL::Scene &b_scen
device = Device::get_multi_device(used_devices, threads, background);
}
/* Else keep using the CPU device that was set before. */
+
+ if (!get_boolean(cpreferences, "peer_memory")) {
+ device.has_peer_memory = false;
+ }
}
}
diff --git a/intern/cycles/blender/blender_device.h b/intern/cycles/blender/blender_device.h
index fd6c045c966..8d2ecac7483 100644
--- a/intern/cycles/blender/blender_device.h
+++ b/intern/cycles/blender/blender_device.h
@@ -18,9 +18,9 @@
#define __BLENDER_DEVICE_H__
#include "MEM_guardedalloc.h"
-#include "RNA_types.h"
#include "RNA_access.h"
#include "RNA_blender_cpp.h"
+#include "RNA_types.h"
#include "device/device.h"
diff --git a/intern/cycles/blender/blender_geometry.cpp b/intern/cycles/blender/blender_geometry.cpp
new file mode 100644
index 00000000000..f7e4623024d
--- /dev/null
+++ b/intern/cycles/blender/blender_geometry.cpp
@@ -0,0 +1,178 @@
+
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "render/curves.h"
+#include "render/hair.h"
+#include "render/mesh.h"
+#include "render/object.h"
+
+#include "blender/blender_sync.h"
+#include "blender/blender_util.h"
+
+#include "util/util_foreach.h"
+
+CCL_NAMESPACE_BEGIN
+
+Geometry *BlenderSync::sync_geometry(BL::Depsgraph &b_depsgraph,
+ BL::Object &b_ob,
+ BL::Object &b_ob_instance,
+ bool object_updated,
+ bool use_particle_hair)
+{
+ /* Test if we can instance or if the object is modified. */
+ BL::ID b_ob_data = b_ob.data();
+ BL::ID b_key_id = (BKE_object_is_modified(b_ob)) ? b_ob_instance : b_ob_data;
+ GeometryKey key(b_key_id.ptr.data, use_particle_hair);
+ BL::Material material_override = view_layer.material_override;
+ Shader *default_shader = (b_ob.type() == BL::Object::type_VOLUME) ? scene->default_volume :
+ scene->default_surface;
+ Geometry::Type geom_type = (b_ob.type() == BL::Object::type_HAIR || use_particle_hair) ?
+ Geometry::HAIR :
+ Geometry::MESH;
+
+ /* Find shader indices. */
+ vector<Shader *> used_shaders;
+
+ BL::Object::material_slots_iterator slot;
+ for (b_ob.material_slots.begin(slot); slot != b_ob.material_slots.end(); ++slot) {
+ if (material_override) {
+ find_shader(material_override, used_shaders, default_shader);
+ }
+ else {
+ BL::ID b_material(slot->material());
+ find_shader(b_material, used_shaders, default_shader);
+ }
+ }
+
+ if (used_shaders.size() == 0) {
+ if (material_override)
+ find_shader(material_override, used_shaders, default_shader);
+ else
+ used_shaders.push_back(default_shader);
+ }
+
+ /* Test if we need to sync. */
+ Geometry *geom = geometry_map.find(key);
+ bool sync = true;
+ if (geom == NULL) {
+ /* Add new geometry if it did not exist yet. */
+ if (geom_type == Geometry::HAIR) {
+ geom = new Hair();
+ }
+ else {
+ geom = new Mesh();
+ }
+ geometry_map.add(key, geom);
+ }
+ else {
+ /* Test if we need to update existing geometry. */
+ sync = geometry_map.update(geom, b_key_id);
+ }
+
+ if (!sync) {
+ /* If transform was applied to geometry, need full update. */
+ if (object_updated && geom->transform_applied) {
+ ;
+ }
+ /* Test if shaders changed, these can be object level so geometry
+ * does not get tagged for recalc. */
+ else if (geom->used_shaders != used_shaders) {
+ ;
+ }
+ else {
+ /* Even if not tagged for recalc, we may need to sync anyway
+ * because the shader needs different geometry attributes. */
+ bool attribute_recalc = false;
+
+ foreach (Shader *shader, geom->used_shaders) {
+ if (shader->need_update_geometry) {
+ attribute_recalc = true;
+ }
+ }
+
+ if (!attribute_recalc) {
+ return geom;
+ }
+ }
+ }
+
+ /* Ensure we only sync instanced geometry once. */
+ if (geometry_synced.find(geom) != geometry_synced.end()) {
+ return geom;
+ }
+
+ progress.set_sync_status("Synchronizing object", b_ob.name());
+
+ geometry_synced.insert(geom);
+
+ geom->name = ustring(b_ob_data.name().c_str());
+
+ if (b_ob.type() == BL::Object::type_HAIR || use_particle_hair) {
+ Hair *hair = static_cast<Hair *>(geom);
+ sync_hair(b_depsgraph, b_ob, hair, used_shaders);
+ }
+ else if (b_ob.type() == BL::Object::type_VOLUME || object_fluid_gas_domain_find(b_ob)) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ sync_volume(b_ob, mesh, used_shaders);
+ }
+ else {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ sync_mesh(b_depsgraph, b_ob, mesh, used_shaders);
+ }
+
+ return geom;
+}
+
+void BlenderSync::sync_geometry_motion(BL::Depsgraph &b_depsgraph,
+ BL::Object &b_ob,
+ Object *object,
+ float motion_time,
+ bool use_particle_hair)
+{
+ /* Ensure we only sync instanced geometry once. */
+ Geometry *geom = object->geometry;
+
+ if (geometry_motion_synced.find(geom) != geometry_motion_synced.end())
+ return;
+
+ geometry_motion_synced.insert(geom);
+
+ /* Ensure we only motion sync geometry that also had geometry synced, to avoid
+ * unnecessary work and to ensure that its attributes were clear. */
+ if (geometry_synced.find(geom) == geometry_synced.end())
+ return;
+
+ /* Find time matching motion step required by geometry. */
+ int motion_step = geom->motion_step(motion_time);
+ if (motion_step < 0) {
+ return;
+ }
+
+ if (b_ob.type() == BL::Object::type_HAIR || use_particle_hair) {
+ Hair *hair = static_cast<Hair *>(geom);
+ sync_hair_motion(b_depsgraph, b_ob, hair, motion_step);
+ }
+ else if (b_ob.type() == BL::Object::type_VOLUME || object_fluid_gas_domain_find(b_ob)) {
+ /* No volume motion blur support yet. */
+ }
+ else {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ sync_mesh_motion(b_depsgraph, b_ob, mesh, motion_step);
+ }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/blender_id_map.h b/intern/cycles/blender/blender_id_map.h
new file mode 100644
index 00000000000..b5f6aaa67a8
--- /dev/null
+++ b/intern/cycles/blender/blender_id_map.h
@@ -0,0 +1,299 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BLENDER_ID_MAP_H__
+#define __BLENDER_ID_MAP_H__
+
+#include <string.h>
+
+#include "util/util_map.h"
+#include "util/util_set.h"
+#include "util/util_vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* ID Map
+ *
+ * Utility class to map between Blender datablocks and Cycles data structures,
+ * and keep track of recalc tags from the dependency graph. */
+
+template<typename K, typename T> class id_map {
+ public:
+ id_map(vector<T *> *scene_data_)
+ {
+ scene_data = scene_data_;
+ }
+
+ T *find(const BL::ID &id)
+ {
+ return find(id.ptr.owner_id);
+ }
+
+ T *find(const K &key)
+ {
+ if (b_map.find(key) != b_map.end()) {
+ T *data = b_map[key];
+ return data;
+ }
+
+ return NULL;
+ }
+
+ void set_recalc(const BL::ID &id)
+ {
+ b_recalc.insert(id.ptr.data);
+ }
+
+ void set_recalc(void *id_ptr)
+ {
+ b_recalc.insert(id_ptr);
+ }
+
+ bool has_recalc()
+ {
+ return !(b_recalc.empty());
+ }
+
+ void pre_sync()
+ {
+ used_set.clear();
+ }
+
+ /* Add new data. */
+ void add(const K &key, T *data)
+ {
+ assert(find(key) == NULL);
+ scene_data->push_back(data);
+ b_map[key] = data;
+ used(data);
+ }
+
+ /* Update existing data. */
+ bool update(T *data, const BL::ID &id)
+ {
+ return update(data, id, id);
+ }
+ bool update(T *data, const BL::ID &id, const BL::ID &parent)
+ {
+ bool recalc = (b_recalc.find(id.ptr.data) != b_recalc.end());
+ if (parent.ptr.data && parent.ptr.data != id.ptr.data) {
+ recalc = recalc || (b_recalc.find(parent.ptr.data) != b_recalc.end());
+ }
+ used(data);
+ return recalc;
+ }
+
+ /* Combined add and update as needed. */
+ bool add_or_update(T **r_data, const BL::ID &id)
+ {
+ return add_or_update(r_data, id, id, id.ptr.owner_id);
+ }
+ bool add_or_update(T **r_data, const BL::ID &id, const K &key)
+ {
+ return add_or_update(r_data, id, id, key);
+ }
+ bool add_or_update(T **r_data, const BL::ID &id, const BL::ID &parent, const K &key)
+ {
+ T *data = find(key);
+ bool recalc;
+
+ if (!data) {
+ /* Add data if it didn't exist yet. */
+ data = new T();
+ add(key, data);
+ recalc = true;
+ }
+ else {
+ /* check if updated needed. */
+ recalc = update(data, id, parent);
+ }
+
+ *r_data = data;
+ return recalc;
+ }
+
+ /* Combined add or update for convenience. */
+
+ bool is_used(const K &key)
+ {
+ T *data = find(key);
+ return (data) ? used_set.find(data) != used_set.end() : false;
+ }
+
+ void used(T *data)
+ {
+ /* tag data as still in use */
+ used_set.insert(data);
+ }
+
+ void set_default(T *data)
+ {
+ b_map[NULL] = data;
+ }
+
+ bool post_sync(bool do_delete = true)
+ {
+ /* remove unused data */
+ vector<T *> new_scene_data;
+ typename vector<T *>::iterator it;
+ bool deleted = false;
+
+ for (it = scene_data->begin(); it != scene_data->end(); it++) {
+ T *data = *it;
+
+ if (do_delete && used_set.find(data) == used_set.end()) {
+ delete data;
+ deleted = true;
+ }
+ else
+ new_scene_data.push_back(data);
+ }
+
+ *scene_data = new_scene_data;
+
+ /* update mapping */
+ map<K, T *> new_map;
+ typedef pair<const K, T *> TMapPair;
+ typename map<K, T *>::iterator jt;
+
+ for (jt = b_map.begin(); jt != b_map.end(); jt++) {
+ TMapPair &pair = *jt;
+
+ if (used_set.find(pair.second) != used_set.end())
+ new_map[pair.first] = pair.second;
+ }
+
+ used_set.clear();
+ b_recalc.clear();
+ b_map = new_map;
+
+ return deleted;
+ }
+
+ const map<K, T *> &key_to_scene_data()
+ {
+ return b_map;
+ }
+
+ protected:
+ vector<T *> *scene_data;
+ map<K, T *> b_map;
+ set<T *> used_set;
+ set<void *> b_recalc;
+};
+
+/* Object Key
+ *
+ * To uniquely identify instances, we use the parent, object and persistent instance ID.
+ * We also export separate object for a mesh and its particle hair. */
+
+enum { OBJECT_PERSISTENT_ID_SIZE = 8 /* MAX_DUPLI_RECUR in Blender. */ };
+
+struct ObjectKey {
+ void *parent;
+ int id[OBJECT_PERSISTENT_ID_SIZE];
+ void *ob;
+ bool use_particle_hair;
+
+ ObjectKey(void *parent_, int id_[OBJECT_PERSISTENT_ID_SIZE], void *ob_, bool use_particle_hair_)
+ : parent(parent_), ob(ob_), use_particle_hair(use_particle_hair_)
+ {
+ if (id_)
+ memcpy(id, id_, sizeof(id));
+ else
+ memset(id, 0, sizeof(id));
+ }
+
+ bool operator<(const ObjectKey &k) const
+ {
+ if (ob < k.ob) {
+ return true;
+ }
+ else if (ob == k.ob) {
+ if (parent < k.parent) {
+ return true;
+ }
+ else if (parent == k.parent) {
+ if (use_particle_hair < k.use_particle_hair) {
+ return true;
+ }
+ else if (use_particle_hair == k.use_particle_hair) {
+ return memcmp(id, k.id, sizeof(id)) < 0;
+ }
+ }
+ }
+
+ return false;
+ }
+};
+
+/* Geometry Key
+ *
+ * We export separate geometry for a mesh and its particle hair, so key needs to
+ * distinguish between them. */
+
+struct GeometryKey {
+ void *id;
+ bool use_particle_hair;
+
+ GeometryKey(void *id, bool use_particle_hair) : id(id), use_particle_hair(use_particle_hair)
+ {
+ }
+
+ bool operator<(const GeometryKey &k) const
+ {
+ if (id < k.id) {
+ return true;
+ }
+ else if (id == k.id) {
+ if (use_particle_hair < k.use_particle_hair) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+};
+
+/* Particle System Key */
+
+struct ParticleSystemKey {
+ void *ob;
+ int id[OBJECT_PERSISTENT_ID_SIZE];
+
+ ParticleSystemKey(void *ob_, int id_[OBJECT_PERSISTENT_ID_SIZE]) : ob(ob_)
+ {
+ if (id_)
+ memcpy(id, id_, sizeof(id));
+ else
+ memset(id, 0, sizeof(id));
+ }
+
+ bool operator<(const ParticleSystemKey &k) const
+ {
+ /* first id is particle index, we don't compare that */
+ if (ob < k.ob)
+ return true;
+ else if (ob == k.ob)
+ return memcmp(id + 1, k.id + 1, sizeof(int) * (OBJECT_PERSISTENT_ID_SIZE - 1)) < 0;
+
+ return false;
+ }
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __BLENDER_ID_MAP_H__ */
diff --git a/intern/cycles/blender/blender_image.cpp b/intern/cycles/blender/blender_image.cpp
new file mode 100644
index 00000000000..459dc1779fb
--- /dev/null
+++ b/intern/cycles/blender/blender_image.cpp
@@ -0,0 +1,220 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MEM_guardedalloc.h"
+
+#include "blender/blender_image.h"
+#include "blender/blender_session.h"
+#include "blender/blender_util.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Packed Images */
+
+BlenderImageLoader::BlenderImageLoader(BL::Image b_image, int frame)
+ : b_image(b_image), frame(frame), free_cache(!b_image.has_data())
+{
+}
+
+bool BlenderImageLoader::load_metadata(ImageMetaData &metadata)
+{
+ metadata.width = b_image.size()[0];
+ metadata.height = b_image.size()[1];
+ metadata.depth = 1;
+ metadata.channels = b_image.channels();
+
+ if (b_image.is_float()) {
+ if (metadata.channels == 1) {
+ metadata.type = IMAGE_DATA_TYPE_FLOAT;
+ }
+ else if (metadata.channels == 4) {
+ metadata.type = IMAGE_DATA_TYPE_FLOAT4;
+ }
+ else {
+ return false;
+ }
+
+ /* Float images are already converted on the Blender side,
+ * no need to do anything in Cycles. */
+ metadata.colorspace = u_colorspace_raw;
+ }
+ else {
+ if (metadata.channels == 1) {
+ metadata.type = IMAGE_DATA_TYPE_BYTE;
+ }
+ else if (metadata.channels == 4) {
+ metadata.type = IMAGE_DATA_TYPE_BYTE4;
+ }
+ else {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool BlenderImageLoader::load_pixels(const ImageMetaData &metadata,
+ void *pixels,
+ const size_t pixels_size,
+ const bool associate_alpha)
+{
+ const size_t num_pixels = ((size_t)metadata.width) * metadata.height;
+ const int channels = metadata.channels;
+ const int tile = 0; /* TODO(lukas): Support tiles here? */
+
+ if (b_image.is_float()) {
+ /* image data */
+ float *image_pixels;
+ image_pixels = image_get_float_pixels_for_frame(b_image, frame, tile);
+
+ if (image_pixels && num_pixels * channels == pixels_size) {
+ memcpy(pixels, image_pixels, pixels_size * sizeof(float));
+ }
+ else {
+ if (channels == 1) {
+ memset(pixels, 0, num_pixels * sizeof(float));
+ }
+ else {
+ const size_t num_pixels_safe = pixels_size / channels;
+ float *fp = (float *)pixels;
+ for (int i = 0; i < num_pixels_safe; i++, fp += channels) {
+ fp[0] = 1.0f;
+ fp[1] = 0.0f;
+ fp[2] = 1.0f;
+ if (channels == 4) {
+ fp[3] = 1.0f;
+ }
+ }
+ }
+ }
+
+ if (image_pixels) {
+ MEM_freeN(image_pixels);
+ }
+ }
+ else {
+ unsigned char *image_pixels = image_get_pixels_for_frame(b_image, frame, tile);
+
+ if (image_pixels && num_pixels * channels == pixels_size) {
+ memcpy(pixels, image_pixels, pixels_size * sizeof(unsigned char));
+ }
+ else {
+ if (channels == 1) {
+ memset(pixels, 0, pixels_size * sizeof(unsigned char));
+ }
+ else {
+ const size_t num_pixels_safe = pixels_size / channels;
+ unsigned char *cp = (unsigned char *)pixels;
+ for (size_t i = 0; i < num_pixels_safe; i++, cp += channels) {
+ cp[0] = 255;
+ cp[1] = 0;
+ cp[2] = 255;
+ if (channels == 4) {
+ cp[3] = 255;
+ }
+ }
+ }
+ }
+
+ if (image_pixels) {
+ MEM_freeN(image_pixels);
+ }
+
+ if (associate_alpha) {
+ /* Premultiply, byte images are always straight for Blender. */
+ unsigned char *cp = (unsigned char *)pixels;
+ for (size_t i = 0; i < num_pixels; i++, cp += channels) {
+ cp[0] = (cp[0] * cp[3]) >> 8;
+ cp[1] = (cp[1] * cp[3]) >> 8;
+ cp[2] = (cp[2] * cp[3]) >> 8;
+ }
+ }
+ }
+
+ /* Free image buffers to save memory during render. */
+ if (free_cache) {
+ b_image.buffers_free();
+ }
+
+ return true;
+}
+
+string BlenderImageLoader::name() const
+{
+ return BL::Image(b_image).name();
+}
+
+bool BlenderImageLoader::equals(const ImageLoader &other) const
+{
+ const BlenderImageLoader &other_loader = (const BlenderImageLoader &)other;
+ return b_image == other_loader.b_image && frame == other_loader.frame;
+}
+
+/* Point Density */
+
+BlenderPointDensityLoader::BlenderPointDensityLoader(BL::Depsgraph b_depsgraph,
+ BL::ShaderNodeTexPointDensity b_node)
+ : b_depsgraph(b_depsgraph), b_node(b_node)
+{
+}
+
+bool BlenderPointDensityLoader::load_metadata(ImageMetaData &metadata)
+{
+ metadata.channels = 4;
+ metadata.width = b_node.resolution();
+ metadata.height = metadata.width;
+ metadata.depth = metadata.width;
+ metadata.type = IMAGE_DATA_TYPE_FLOAT4;
+ return true;
+}
+
+bool BlenderPointDensityLoader::load_pixels(const ImageMetaData &,
+ void *pixels,
+ const size_t,
+ const bool)
+{
+ int length;
+ b_node.calc_point_density(b_depsgraph, &length, (float **)&pixels);
+ return true;
+}
+
+void BlenderSession::builtin_images_load()
+{
+ /* Force builtin images to be loaded along with Blender data sync. This
+ * is needed because we may be reading from depsgraph evaluated data which
+ * can be freed by Blender before Cycles reads it.
+ *
+ * TODO: the assumption that no further access to builtin image data will
+ * happen is really weak, and likely to break in the future. We should find
+ * a better solution to hand over the data directly to the image manager
+ * instead of through callbacks whose timing is difficult to control. */
+ ImageManager *manager = session->scene->image_manager;
+ Device *device = session->device;
+ manager->device_load_builtin(device, session->scene, session->progress);
+}
+
+string BlenderPointDensityLoader::name() const
+{
+ return BL::ShaderNodeTexPointDensity(b_node).name();
+}
+
+bool BlenderPointDensityLoader::equals(const ImageLoader &other) const
+{
+ const BlenderPointDensityLoader &other_loader = (const BlenderPointDensityLoader &)other;
+ return b_node == other_loader.b_node && b_depsgraph == other_loader.b_depsgraph;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/blender_image.h b/intern/cycles/blender/blender_image.h
new file mode 100644
index 00000000000..b58a159a6ba
--- /dev/null
+++ b/intern/cycles/blender/blender_image.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2011-2020 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BLENDER_IMAGE_H__
+#define __BLENDER_IMAGE_H__
+
+#include "RNA_blender_cpp.h"
+
+#include "render/image.h"
+
+CCL_NAMESPACE_BEGIN
+
+class BlenderImageLoader : public ImageLoader {
+ public:
+ BlenderImageLoader(BL::Image b_image, int frame);
+
+ bool load_metadata(ImageMetaData &metadata) override;
+ bool load_pixels(const ImageMetaData &metadata,
+ void *pixels,
+ const size_t pixels_size,
+ const bool associate_alpha) override;
+ string name() const override;
+ bool equals(const ImageLoader &other) const override;
+
+ BL::Image b_image;
+ int frame;
+ bool free_cache;
+};
+
+class BlenderPointDensityLoader : public ImageLoader {
+ public:
+ BlenderPointDensityLoader(BL::Depsgraph depsgraph, BL::ShaderNodeTexPointDensity b_node);
+
+ bool load_metadata(ImageMetaData &metadata) override;
+ bool load_pixels(const ImageMetaData &metadata,
+ void *pixels,
+ const size_t pixels_size,
+ const bool associate_alpha) override;
+ string name() const override;
+ bool equals(const ImageLoader &other) const override;
+
+ BL::Depsgraph b_depsgraph;
+ BL::ShaderNodeTexPointDensity b_node;
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __BLENDER_IMAGE_H__ */
diff --git a/intern/cycles/blender/blender_light.cpp b/intern/cycles/blender/blender_light.cpp
new file mode 100644
index 00000000000..6f95821e31e
--- /dev/null
+++ b/intern/cycles/blender/blender_light.cpp
@@ -0,0 +1,212 @@
+
+
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "render/light.h"
+
+#include "blender/blender_sync.h"
+#include "blender/blender_util.h"
+
+#include "util/util_hash.h"
+
+CCL_NAMESPACE_BEGIN
+
+void BlenderSync::sync_light(BL::Object &b_parent,
+ int persistent_id[OBJECT_PERSISTENT_ID_SIZE],
+ BL::Object &b_ob,
+ BL::Object &b_ob_instance,
+ int random_id,
+ Transform &tfm,
+ bool *use_portal)
+{
+ /* test if we need to sync */
+ Light *light;
+ ObjectKey key(b_parent, persistent_id, b_ob_instance, false);
+ BL::Light b_light(b_ob.data());
+
+ /* Update if either object or light data changed. */
+ if (!light_map.add_or_update(&light, b_ob, b_parent, key)) {
+ Shader *shader;
+ if (!shader_map.add_or_update(&shader, b_light)) {
+ if (light->is_portal)
+ *use_portal = true;
+ return;
+ }
+ }
+
+ /* type */
+ switch (b_light.type()) {
+ case BL::Light::type_POINT: {
+ BL::PointLight b_point_light(b_light);
+ light->size = b_point_light.shadow_soft_size();
+ light->type = LIGHT_POINT;
+ break;
+ }
+ case BL::Light::type_SPOT: {
+ BL::SpotLight b_spot_light(b_light);
+ light->size = b_spot_light.shadow_soft_size();
+ light->type = LIGHT_SPOT;
+ light->spot_angle = b_spot_light.spot_size();
+ light->spot_smooth = b_spot_light.spot_blend();
+ break;
+ }
+ /* Hemi were removed from 2.8 */
+ // case BL::Light::type_HEMI: {
+ // light->type = LIGHT_DISTANT;
+ // light->size = 0.0f;
+ // break;
+ // }
+ case BL::Light::type_SUN: {
+ BL::SunLight b_sun_light(b_light);
+ light->angle = b_sun_light.angle();
+ light->type = LIGHT_DISTANT;
+ break;
+ }
+ case BL::Light::type_AREA: {
+ BL::AreaLight b_area_light(b_light);
+ light->size = 1.0f;
+ light->axisu = transform_get_column(&tfm, 0);
+ light->axisv = transform_get_column(&tfm, 1);
+ light->sizeu = b_area_light.size();
+ switch (b_area_light.shape()) {
+ case BL::AreaLight::shape_SQUARE:
+ light->sizev = light->sizeu;
+ light->round = false;
+ break;
+ case BL::AreaLight::shape_RECTANGLE:
+ light->sizev = b_area_light.size_y();
+ light->round = false;
+ break;
+ case BL::AreaLight::shape_DISK:
+ light->sizev = light->sizeu;
+ light->round = true;
+ break;
+ case BL::AreaLight::shape_ELLIPSE:
+ light->sizev = b_area_light.size_y();
+ light->round = true;
+ break;
+ }
+ light->type = LIGHT_AREA;
+ break;
+ }
+ }
+
+ /* strength */
+ light->strength = get_float3(b_light.color());
+ light->strength *= BL::PointLight(b_light).energy();
+
+ /* location and (inverted!) direction */
+ light->co = transform_get_column(&tfm, 3);
+ light->dir = -transform_get_column(&tfm, 2);
+ light->tfm = tfm;
+
+ /* shader */
+ vector<Shader *> used_shaders;
+ find_shader(b_light, used_shaders, scene->default_light);
+ light->shader = used_shaders[0];
+
+ /* shadow */
+ PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
+ PointerRNA clight = RNA_pointer_get(&b_light.ptr, "cycles");
+ light->cast_shadow = get_boolean(clight, "cast_shadow");
+ light->use_mis = get_boolean(clight, "use_multiple_importance_sampling");
+
+ int samples = get_int(clight, "samples");
+ if (get_boolean(cscene, "use_square_samples"))
+ light->samples = samples * samples;
+ else
+ light->samples = samples;
+
+ light->max_bounces = get_int(clight, "max_bounces");
+
+ if (b_ob != b_ob_instance) {
+ light->random_id = random_id;
+ }
+ else {
+ light->random_id = hash_uint2(hash_string(b_ob.name().c_str()), 0);
+ }
+
+ if (light->type == LIGHT_AREA)
+ light->is_portal = get_boolean(clight, "is_portal");
+ else
+ light->is_portal = false;
+
+ if (light->is_portal)
+ *use_portal = true;
+
+ /* visibility */
+ uint visibility = object_ray_visibility(b_ob);
+ light->use_diffuse = (visibility & PATH_RAY_DIFFUSE) != 0;
+ light->use_glossy = (visibility & PATH_RAY_GLOSSY) != 0;
+ light->use_transmission = (visibility & PATH_RAY_TRANSMIT) != 0;
+ light->use_scatter = (visibility & PATH_RAY_VOLUME_SCATTER) != 0;
+
+ /* tag */
+ light->tag_update(scene);
+}
+
+void BlenderSync::sync_background_light(BL::SpaceView3D &b_v3d, bool use_portal)
+{
+ BL::World b_world = b_scene.world();
+
+ if (b_world) {
+ PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
+ PointerRNA cworld = RNA_pointer_get(&b_world.ptr, "cycles");
+
+ enum SamplingMethod { SAMPLING_NONE = 0, SAMPLING_AUTOMATIC, SAMPLING_MANUAL, SAMPLING_NUM };
+ int sampling_method = get_enum(cworld, "sampling_method", SAMPLING_NUM, SAMPLING_AUTOMATIC);
+ bool sample_as_light = (sampling_method != SAMPLING_NONE);
+
+ if (sample_as_light || use_portal) {
+ /* test if we need to sync */
+ Light *light;
+ ObjectKey key(b_world, 0, b_world, false);
+
+ if (light_map.add_or_update(&light, b_world, b_world, key) || world_recalc ||
+ b_world.ptr.data != world_map) {
+ light->type = LIGHT_BACKGROUND;
+ if (sampling_method == SAMPLING_MANUAL) {
+ light->map_resolution = get_int(cworld, "sample_map_resolution");
+ }
+ else {
+ light->map_resolution = 0;
+ }
+ light->shader = scene->default_background;
+ light->use_mis = sample_as_light;
+ light->max_bounces = get_int(cworld, "max_bounces");
+
+ /* force enable light again when world is resynced */
+ light->is_enabled = true;
+
+ int samples = get_int(cworld, "samples");
+ if (get_boolean(cscene, "use_square_samples"))
+ light->samples = samples * samples;
+ else
+ light->samples = samples;
+
+ light->tag_update(scene);
+ light_map.set_recalc(b_world);
+ }
+ }
+ }
+
+ world_map = b_world.ptr.data;
+ world_recalc = false;
+ viewport_parameters = BlenderViewportParameters(b_v3d);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/blender_mesh.cpp b/intern/cycles/blender/blender_mesh.cpp
index de594f4fb6c..49407799fcd 100644
--- a/intern/cycles/blender/blender_mesh.cpp
+++ b/intern/cycles/blender/blender_mesh.cpp
@@ -14,20 +14,23 @@
* limitations under the License.
*/
+#include "render/camera.h"
+#include "render/colorspace.h"
#include "render/mesh.h"
#include "render/object.h"
#include "render/scene.h"
-#include "render/camera.h"
-#include "blender/blender_sync.h"
#include "blender/blender_session.h"
+#include "blender/blender_sync.h"
#include "blender/blender_util.h"
#include "subd/subd_patch.h"
#include "subd/subd_split.h"
#include "util/util_algorithm.h"
+#include "util/util_disjoint_set.h"
#include "util/util_foreach.h"
+#include "util/util_hash.h"
#include "util/util_logging.h"
#include "util/util_math.h"
@@ -275,102 +278,99 @@ static void mikk_compute_tangents(
genTangSpaceDefault(&context);
}
-/* Create Volume Attribute */
-
-static void create_mesh_volume_attribute(
- BL::Object &b_ob, Mesh *mesh, ImageManager *image_manager, AttributeStandard std, float frame)
+/* Create sculpt vertex color attributes. */
+static void attr_create_sculpt_vertex_color(Scene *scene,
+ Mesh *mesh,
+ BL::Mesh &b_mesh,
+ bool subdivision)
{
- BL::SmokeDomainSettings b_domain = object_smoke_domain_find(b_ob);
+ BL::Mesh::sculpt_vertex_colors_iterator l;
- if (!b_domain)
- return;
+ for (b_mesh.sculpt_vertex_colors.begin(l); l != b_mesh.sculpt_vertex_colors.end(); ++l) {
+ const bool active_render = l->active_render();
+ AttributeStandard vcol_std = (active_render) ? ATTR_STD_VERTEX_COLOR : ATTR_STD_NONE;
+ ustring vcol_name = ustring(l->name().c_str());
- mesh->volume_isovalue = b_domain.clipping();
-
- Attribute *attr = mesh->attributes.add(std);
- VoxelAttribute *volume_data = attr->data_voxel();
- ImageMetaData metadata;
- bool animated = false;
- bool use_alpha = true;
-
- volume_data->manager = image_manager;
- volume_data->slot = image_manager->add_image(Attribute::standard_name(std),
- b_ob.ptr.data,
- animated,
- frame,
- INTERPOLATION_LINEAR,
- EXTENSION_CLIP,
- use_alpha,
- metadata);
-}
+ const bool need_vcol = mesh->need_attribute(scene, vcol_name) ||
+ mesh->need_attribute(scene, vcol_std);
-static void create_mesh_volume_attributes(Scene *scene, BL::Object &b_ob, Mesh *mesh, float frame)
-{
- /* for smoke volume rendering */
- if (mesh->need_attribute(scene, ATTR_STD_VOLUME_DENSITY))
- create_mesh_volume_attribute(b_ob, mesh, scene->image_manager, ATTR_STD_VOLUME_DENSITY, frame);
- if (mesh->need_attribute(scene, ATTR_STD_VOLUME_COLOR))
- create_mesh_volume_attribute(b_ob, mesh, scene->image_manager, ATTR_STD_VOLUME_COLOR, frame);
- if (mesh->need_attribute(scene, ATTR_STD_VOLUME_FLAME))
- create_mesh_volume_attribute(b_ob, mesh, scene->image_manager, ATTR_STD_VOLUME_FLAME, frame);
- if (mesh->need_attribute(scene, ATTR_STD_VOLUME_HEAT))
- create_mesh_volume_attribute(b_ob, mesh, scene->image_manager, ATTR_STD_VOLUME_HEAT, frame);
- if (mesh->need_attribute(scene, ATTR_STD_VOLUME_TEMPERATURE))
- create_mesh_volume_attribute(
- b_ob, mesh, scene->image_manager, ATTR_STD_VOLUME_TEMPERATURE, frame);
- if (mesh->need_attribute(scene, ATTR_STD_VOLUME_VELOCITY))
- create_mesh_volume_attribute(
- b_ob, mesh, scene->image_manager, ATTR_STD_VOLUME_VELOCITY, frame);
+ if (!need_vcol) {
+ continue;
+ }
+
+ AttributeSet &attributes = (subdivision) ? mesh->subd_attributes : mesh->attributes;
+ Attribute *vcol_attr = attributes.add(vcol_name, TypeRGBA, ATTR_ELEMENT_VERTEX);
+ vcol_attr->std = vcol_std;
+
+ float4 *cdata = vcol_attr->data_float4();
+ int numverts = b_mesh.vertices.length();
+
+ for (int i = 0; i < numverts; i++) {
+ *(cdata++) = get_float4(l->data[i].color());
+ }
+ }
}
/* Create vertex color attributes. */
static void attr_create_vertex_color(Scene *scene, Mesh *mesh, BL::Mesh &b_mesh, bool subdivision)
{
- if (subdivision) {
- BL::Mesh::vertex_colors_iterator l;
+ BL::Mesh::vertex_colors_iterator l;
+
+ for (b_mesh.vertex_colors.begin(l); l != b_mesh.vertex_colors.end(); ++l) {
+ const bool active_render = l->active_render();
+ AttributeStandard vcol_std = (active_render) ? ATTR_STD_VERTEX_COLOR : ATTR_STD_NONE;
+ ustring vcol_name = ustring(l->name().c_str());
- for (b_mesh.vertex_colors.begin(l); l != b_mesh.vertex_colors.end(); ++l) {
- if (!mesh->need_attribute(scene, ustring(l->name().c_str())))
- continue;
+ const bool need_vcol = mesh->need_attribute(scene, vcol_name) ||
+ mesh->need_attribute(scene, vcol_std);
- Attribute *attr = mesh->subd_attributes.add(
- ustring(l->name().c_str()), TypeDesc::TypeColor, ATTR_ELEMENT_CORNER_BYTE);
+ if (!need_vcol) {
+ continue;
+ }
+
+ Attribute *vcol_attr = NULL;
+
+ if (subdivision) {
+ if (active_render) {
+ vcol_attr = mesh->subd_attributes.add(vcol_std, vcol_name);
+ }
+ else {
+ vcol_attr = mesh->subd_attributes.add(vcol_name, TypeRGBA, ATTR_ELEMENT_CORNER_BYTE);
+ }
BL::Mesh::polygons_iterator p;
- uchar4 *cdata = attr->data_uchar4();
+ uchar4 *cdata = vcol_attr->data_uchar4();
for (b_mesh.polygons.begin(p); p != b_mesh.polygons.end(); ++p) {
int n = p->loop_total();
for (int i = 0; i < n; i++) {
- float3 color = get_float3(l->data[p->loop_start() + i].color());
+ float4 color = get_float4(l->data[p->loop_start() + i].color());
/* Compress/encode vertex color using the sRGB curve. */
- *(cdata++) = color_float_to_byte(color_srgb_to_linear_v3(color));
+ *(cdata++) = color_float4_to_uchar4(color_srgb_to_linear_v4(color));
}
}
}
- }
- else {
- BL::Mesh::vertex_colors_iterator l;
- for (b_mesh.vertex_colors.begin(l); l != b_mesh.vertex_colors.end(); ++l) {
- if (!mesh->need_attribute(scene, ustring(l->name().c_str())))
- continue;
-
- Attribute *attr = mesh->attributes.add(
- ustring(l->name().c_str()), TypeDesc::TypeColor, ATTR_ELEMENT_CORNER_BYTE);
+ else {
+ if (active_render) {
+ vcol_attr = mesh->attributes.add(vcol_std, vcol_name);
+ }
+ else {
+ vcol_attr = mesh->attributes.add(vcol_name, TypeRGBA, ATTR_ELEMENT_CORNER_BYTE);
+ }
BL::Mesh::loop_triangles_iterator t;
- uchar4 *cdata = attr->data_uchar4();
+ uchar4 *cdata = vcol_attr->data_uchar4();
for (b_mesh.loop_triangles.begin(t); t != b_mesh.loop_triangles.end(); ++t) {
int3 li = get_int3(t->loops());
- float3 c1 = get_float3(l->data[li[0]].color());
- float3 c2 = get_float3(l->data[li[1]].color());
- float3 c3 = get_float3(l->data[li[2]].color());
+ float4 c1 = get_float4(l->data[li[0]].color());
+ float4 c2 = get_float4(l->data[li[1]].color());
+ float4 c3 = get_float4(l->data[li[2]].color());
/* Compress/encode vertex color using the sRGB curve. */
- cdata[0] = color_float_to_byte(color_srgb_to_linear_v3(c1));
- cdata[1] = color_float_to_byte(color_srgb_to_linear_v3(c2));
- cdata[2] = color_float_to_byte(color_srgb_to_linear_v3(c3));
+ cdata[0] = color_float4_to_uchar4(color_srgb_to_linear_v4(c1));
+ cdata[1] = color_float4_to_uchar4(color_srgb_to_linear_v4(c2));
+ cdata[2] = color_float4_to_uchar4(color_srgb_to_linear_v4(c3));
cdata += 3;
}
}
@@ -678,6 +678,55 @@ static void attr_create_pointiness(Scene *scene, Mesh *mesh, BL::Mesh &b_mesh, b
}
}
+/* The Random Per Island attribute is a random float associated with each
+ * connected component (island) of the mesh. The attribute is computed by
+ * first classifying the vertices into different sets using a Disjoint Set
+ * data structure. Then the index of the root of each vertex (Which is the
+ * representative of the set the vertex belongs to) is hashed and stored.
+ *
+ * We are using a face attribute to avoid interpolation during rendering,
+ * allowing the user to safely hash the output further. Had we used vertex
+ * attribute, the interpolation will introduce very slight variations,
+ * making the output unsafe to hash. */
+static void attr_create_random_per_island(Scene *scene,
+ Mesh *mesh,
+ BL::Mesh &b_mesh,
+ bool subdivision)
+{
+ if (!mesh->need_attribute(scene, ATTR_STD_RANDOM_PER_ISLAND)) {
+ return;
+ }
+
+ int number_of_vertices = b_mesh.vertices.length();
+ if (number_of_vertices == 0) {
+ return;
+ }
+
+ DisjointSet vertices_sets(number_of_vertices);
+
+ BL::Mesh::edges_iterator e;
+ for (b_mesh.edges.begin(e); e != b_mesh.edges.end(); ++e) {
+ vertices_sets.join(e->vertices()[0], e->vertices()[1]);
+ }
+
+ AttributeSet &attributes = (subdivision) ? mesh->subd_attributes : mesh->attributes;
+ Attribute *attribute = attributes.add(ATTR_STD_RANDOM_PER_ISLAND);
+ float *data = attribute->data_float();
+
+ if (!subdivision) {
+ BL::Mesh::loop_triangles_iterator t;
+ for (b_mesh.loop_triangles.begin(t); t != b_mesh.loop_triangles.end(); ++t) {
+ data[t->index()] = hash_uint_to_float(vertices_sets.find(t->vertices()[0]));
+ }
+ }
+ else {
+ BL::Mesh::polygons_iterator p;
+ for (b_mesh.polygons.begin(p); p != b_mesh.polygons.end(); ++p) {
+ data[p->index()] = hash_uint_to_float(vertices_sets.find(p->vertices()[0]));
+ }
+ }
+}
+
/* Create Mesh */
static void create_mesh(Scene *scene,
@@ -798,6 +847,8 @@ static void create_mesh(Scene *scene,
*/
attr_create_pointiness(scene, mesh, b_mesh, subdivision);
attr_create_vertex_color(scene, mesh, b_mesh, subdivision);
+ attr_create_sculpt_vertex_color(scene, mesh, b_mesh, subdivision);
+ attr_create_random_per_island(scene, mesh, b_mesh, subdivision);
if (subdivision) {
attr_create_subd_uv_map(scene, mesh, b_mesh, subdivide_uvs);
@@ -806,9 +857,9 @@ static void create_mesh(Scene *scene,
attr_create_uv_map(scene, mesh, b_mesh);
}
- /* for volume objects, create a matrix to transform from object space to
+ /* For volume objects, create a matrix to transform from object space to
* mesh texture space. this does not work with deformations but that can
- * probably only be done well with a volume grid mapping of coordinates */
+ * probably only be done well with a volume grid mapping of coordinates. */
if (mesh->need_attribute(scene, ATTR_STD_GENERATED_TRANSFORM)) {
Attribute *attr = mesh->attributes.add(ATTR_STD_GENERATED_TRANSFORM);
Transform *tfm = attr->data_transform();
@@ -877,13 +928,13 @@ static void sync_mesh_fluid_motion(BL::Object &b_ob, Scene *scene, Mesh *mesh)
if (scene->need_motion() == Scene::MOTION_NONE)
return;
- BL::DomainFluidSettings b_fluid_domain = object_fluid_domain_find(b_ob);
+ BL::FluidDomainSettings b_fluid_domain = object_fluid_liquid_domain_find(b_ob);
if (!b_fluid_domain)
return;
/* If the mesh has modifiers following the fluid domain we can't export motion. */
- if (b_fluid_domain.fluid_mesh_vertices.length() != mesh->verts.size())
+ if (b_fluid_domain.mesh_vertices.length() != mesh->verts.size())
return;
/* Find or add attribute */
@@ -900,93 +951,21 @@ static void sync_mesh_fluid_motion(BL::Object &b_ob, Scene *scene, Mesh *mesh)
float relative_time = motion_times[step] * scene->motion_shutter_time() * 0.5f;
float3 *mP = attr_mP->data_float3() + step * mesh->verts.size();
- BL::DomainFluidSettings::fluid_mesh_vertices_iterator fvi;
+ BL::FluidDomainSettings::mesh_vertices_iterator svi;
int i = 0;
- for (b_fluid_domain.fluid_mesh_vertices.begin(fvi);
- fvi != b_fluid_domain.fluid_mesh_vertices.end();
- ++fvi, ++i) {
- mP[i] = P[i] + get_float3(fvi->velocity()) * relative_time;
+ for (b_fluid_domain.mesh_vertices.begin(svi); svi != b_fluid_domain.mesh_vertices.end();
+ ++svi, ++i) {
+ mP[i] = P[i] + get_float3(svi->velocity()) * relative_time;
}
}
}
-Mesh *BlenderSync::sync_mesh(BL::Depsgraph &b_depsgraph,
- BL::Object &b_ob,
- BL::Object &b_ob_instance,
- bool object_updated,
- bool show_self,
- bool show_particles)
+void BlenderSync::sync_mesh(BL::Depsgraph b_depsgraph,
+ BL::Object b_ob,
+ Mesh *mesh,
+ const vector<Shader *> &used_shaders)
{
- /* test if we can instance or if the object is modified */
- BL::ID b_ob_data = b_ob.data();
- BL::ID key = (BKE_object_is_modified(b_ob)) ? b_ob_instance : b_ob_data;
- BL::Material material_override = view_layer.material_override;
-
- /* find shader indices */
- vector<Shader *> used_shaders;
-
- BL::Object::material_slots_iterator slot;
- for (b_ob.material_slots.begin(slot); slot != b_ob.material_slots.end(); ++slot) {
- if (material_override) {
- find_shader(material_override, used_shaders, scene->default_surface);
- }
- else {
- BL::ID b_material(slot->material());
- find_shader(b_material, used_shaders, scene->default_surface);
- }
- }
-
- if (used_shaders.size() == 0) {
- if (material_override)
- find_shader(material_override, used_shaders, scene->default_surface);
- else
- used_shaders.push_back(scene->default_surface);
- }
-
- /* test if we need to sync */
- int requested_geometry_flags = Mesh::GEOMETRY_NONE;
- if (view_layer.use_surfaces) {
- requested_geometry_flags |= Mesh::GEOMETRY_TRIANGLES;
- }
- if (view_layer.use_hair) {
- requested_geometry_flags |= Mesh::GEOMETRY_CURVES;
- }
- Mesh *mesh;
-
- if (!mesh_map.sync(&mesh, key)) {
- /* if transform was applied to mesh, need full update */
- if (object_updated && mesh->transform_applied)
- ;
- /* test if shaders changed, these can be object level so mesh
- * does not get tagged for recalc */
- else if (mesh->used_shaders != used_shaders)
- ;
- else if (requested_geometry_flags != mesh->geometry_flags)
- ;
- else {
- /* even if not tagged for recalc, we may need to sync anyway
- * because the shader needs different mesh attributes */
- bool attribute_recalc = false;
-
- foreach (Shader *shader, mesh->used_shaders)
- if (shader->need_update_mesh)
- attribute_recalc = true;
-
- if (!attribute_recalc)
- return mesh;
- }
- }
-
- /* ensure we only sync instanced meshes once */
- if (mesh_synced.find(mesh) != mesh_synced.end())
- return mesh;
-
- progress.set_sync_status("Synchronizing object", b_ob.name());
-
- mesh_synced.insert(mesh);
-
- /* create derived mesh */
array<int> oldtriangles;
array<Mesh::SubdFace> oldsubd_faces;
array<int> oldsubd_face_corners;
@@ -994,146 +973,73 @@ Mesh *BlenderSync::sync_mesh(BL::Depsgraph &b_depsgraph,
oldsubd_faces.steal_data(mesh->subd_faces);
oldsubd_face_corners.steal_data(mesh->subd_face_corners);
- /* compares curve_keys rather than strands in order to handle quick hair
- * adjustments in dynamic BVH - other methods could probably do this better*/
- array<float3> oldcurve_keys;
- array<float> oldcurve_radius;
- oldcurve_keys.steal_data(mesh->curve_keys);
- oldcurve_radius.steal_data(mesh->curve_radius);
-
mesh->clear();
mesh->used_shaders = used_shaders;
- mesh->name = ustring(b_ob_data.name().c_str());
- if (requested_geometry_flags != Mesh::GEOMETRY_NONE) {
+ mesh->subdivision_type = Mesh::SUBDIVISION_NONE;
+
+ if (view_layer.use_surfaces) {
/* Adaptive subdivision setup. Not for baking since that requires
* exact mapping to the Blender mesh. */
- if (scene->bake_manager->get_baking()) {
- mesh->subdivision_type = Mesh::SUBDIVISION_NONE;
- }
- else {
+ if (!scene->bake_manager->get_baking()) {
mesh->subdivision_type = object_subdivision_type(b_ob, preview, experimental);
}
/* For some reason, meshes do not need this... */
bool need_undeformed = mesh->need_attribute(scene, ATTR_STD_GENERATED);
-
BL::Mesh b_mesh = object_to_mesh(
b_data, b_ob, b_depsgraph, need_undeformed, mesh->subdivision_type);
if (b_mesh) {
/* Sync mesh itself. */
- if (view_layer.use_surfaces && show_self) {
- if (mesh->subdivision_type != Mesh::SUBDIVISION_NONE)
- create_subd_mesh(scene, mesh, b_ob, b_mesh, used_shaders, dicing_rate, max_subdivisions);
- else
- create_mesh(scene, mesh, b_mesh, used_shaders, false);
-
- create_mesh_volume_attributes(scene, b_ob, mesh, b_scene.frame_current());
- }
-
- /* Sync hair curves. */
- if (view_layer.use_hair && show_particles &&
- mesh->subdivision_type == Mesh::SUBDIVISION_NONE) {
- sync_curves(mesh, b_mesh, b_ob, false);
- }
+ if (mesh->subdivision_type != Mesh::SUBDIVISION_NONE)
+ create_subd_mesh(
+ scene, mesh, b_ob, b_mesh, mesh->used_shaders, dicing_rate, max_subdivisions);
+ else
+ create_mesh(scene, mesh, b_mesh, mesh->used_shaders, false);
free_object_to_mesh(b_data, b_ob, b_mesh);
}
}
- mesh->geometry_flags = requested_geometry_flags;
- /* fluid motion */
+ /* mesh fluid motion mantaflow */
sync_mesh_fluid_motion(b_ob, scene, mesh);
/* tag update */
bool rebuild = (oldtriangles != mesh->triangles) || (oldsubd_faces != mesh->subd_faces) ||
- (oldsubd_face_corners != mesh->subd_face_corners) ||
- (oldcurve_keys != mesh->curve_keys) || (oldcurve_radius != mesh->curve_radius);
+ (oldsubd_face_corners != mesh->subd_face_corners);
mesh->tag_update(scene, rebuild);
-
- return mesh;
}
-void BlenderSync::sync_mesh_motion(BL::Depsgraph &b_depsgraph,
- BL::Object &b_ob,
- Object *object,
- float motion_time)
+void BlenderSync::sync_mesh_motion(BL::Depsgraph b_depsgraph,
+ BL::Object b_ob,
+ Mesh *mesh,
+ int motion_step)
{
- /* ensure we only sync instanced meshes once */
- Mesh *mesh = object->mesh;
-
- if (mesh_motion_synced.find(mesh) != mesh_motion_synced.end())
- return;
-
- mesh_motion_synced.insert(mesh);
-
- /* ensure we only motion sync meshes that also had mesh synced, to avoid
- * unnecessary work and to ensure that its attributes were clear */
- if (mesh_synced.find(mesh) == mesh_synced.end())
- return;
-
- /* Find time matching motion step required by mesh. */
- int motion_step = mesh->motion_step(motion_time);
- if (motion_step < 0) {
+ /* Fluid motion blur already exported. */
+ BL::FluidDomainSettings b_fluid_domain = object_fluid_liquid_domain_find(b_ob);
+ if (b_fluid_domain) {
return;
}
- /* skip empty meshes */
- const size_t numverts = mesh->verts.size();
- const size_t numkeys = mesh->curve_keys.size();
-
- if (!numverts && !numkeys)
+ /* Skip if no vertices were exported. */
+ size_t numverts = mesh->verts.size();
+ if (numverts == 0) {
return;
+ }
- /* skip objects without deforming modifiers. this is not totally reliable,
- * would need a more extensive check to see which objects are animated */
+ /* Skip objects without deforming modifiers. this is not totally reliable,
+ * would need a more extensive check to see which objects are animated. */
BL::Mesh b_mesh(PointerRNA_NULL);
-
- /* fluid motion is exported immediate with mesh, skip here */
- BL::DomainFluidSettings b_fluid_domain = object_fluid_domain_find(b_ob);
- if (b_fluid_domain)
- return;
-
if (ccl::BKE_object_is_deform_modified(b_ob, b_scene, preview)) {
/* get derived mesh */
b_mesh = object_to_mesh(b_data, b_ob, b_depsgraph, false, Mesh::SUBDIVISION_NONE);
}
- if (!b_mesh) {
- /* if we have no motion blur on this frame, but on other frames, copy */
- if (numverts) {
- /* triangles */
- Attribute *attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
-
- if (attr_mP) {
- Attribute *attr_mN = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_NORMAL);
- Attribute *attr_N = mesh->attributes.find(ATTR_STD_VERTEX_NORMAL);
- float3 *P = &mesh->verts[0];
- float3 *N = (attr_N) ? attr_N->data_float3() : NULL;
-
- memcpy(attr_mP->data_float3() + motion_step * numverts, P, sizeof(float3) * numverts);
- if (attr_mN)
- memcpy(attr_mN->data_float3() + motion_step * numverts, N, sizeof(float3) * numverts);
- }
- }
-
- if (numkeys) {
- /* curves */
- Attribute *attr_mP = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
-
- if (attr_mP) {
- float3 *keys = &mesh->curve_keys[0];
- memcpy(attr_mP->data_float3() + motion_step * numkeys, keys, sizeof(float3) * numkeys);
- }
- }
-
- return;
- }
-
- /* TODO(sergey): Perform preliminary check for number of verticies. */
- if (numverts) {
+ /* TODO(sergey): Perform preliminary check for number of vertices. */
+ if (b_mesh) {
+ /* Export deformed coordinates. */
/* Find attributes. */
Attribute *attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
Attribute *attr_mN = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_NORMAL);
@@ -1198,14 +1104,13 @@ void BlenderSync::sync_mesh_motion(BL::Depsgraph &b_depsgraph,
}
}
}
- }
- /* hair motion */
- if (numkeys)
- sync_curves(mesh, b_mesh, b_ob, true, motion_step);
+ free_object_to_mesh(b_data, b_ob, b_mesh);
+ return;
+ }
- /* free derived mesh */
- free_object_to_mesh(b_data, b_ob, b_mesh);
+ /* No deformation on this frame, copy coordinates if other frames did have it. */
+ mesh->copy_center_to_motion_step(motion_step);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/blender_object.cpp b/intern/cycles/blender/blender_object.cpp
index 095ecd59985..3ea6892a349 100644
--- a/intern/cycles/blender/blender_object.cpp
+++ b/intern/cycles/blender/blender_object.cpp
@@ -15,14 +15,14 @@
*/
#include "render/camera.h"
-#include "render/integrator.h"
#include "render/graph.h"
+#include "render/integrator.h"
#include "render/light.h"
#include "render/mesh.h"
-#include "render/object.h"
-#include "render/scene.h"
#include "render/nodes.h"
+#include "render/object.h"
#include "render/particles.h"
+#include "render/scene.h"
#include "render/shader.h"
#include "blender/blender_object_cull.h"
@@ -59,7 +59,7 @@ bool BlenderSync::BKE_object_is_modified(BL::Object &b_ob)
return false;
}
-bool BlenderSync::object_is_mesh(BL::Object &b_ob)
+bool BlenderSync::object_is_geometry(BL::Object &b_ob)
{
BL::ID b_ob_data = b_ob.data();
@@ -67,10 +67,16 @@ bool BlenderSync::object_is_mesh(BL::Object &b_ob)
return false;
}
- if (b_ob.type() == BL::Object::type_CURVE) {
+ BL::Object::type_enum type = b_ob.type();
+
+ if (type == BL::Object::type_VOLUME || type == BL::Object::type_HAIR) {
+ /* Will be exported attached to mesh. */
+ return true;
+ }
+ else if (type == BL::Object::type_CURVE) {
/* Skip exporting curves without faces, overhead can be
* significant if there are many for path animation. */
- BL::Curve b_curve(b_ob.data());
+ BL::Curve b_curve(b_ob_data);
return (b_curve.bevel_object() || b_curve.extrude() != 0.0f || b_curve.bevel_depth() != 0.0f ||
b_curve.dimensions() == BL::Curve::dimensions_2D || b_ob.modifiers.length());
@@ -88,204 +94,14 @@ bool BlenderSync::object_is_light(BL::Object &b_ob)
return (b_ob_data && b_ob_data.is_a(&RNA_Light));
}
-static uint object_ray_visibility(BL::Object &b_ob)
-{
- PointerRNA cvisibility = RNA_pointer_get(&b_ob.ptr, "cycles_visibility");
- uint flag = 0;
-
- flag |= get_boolean(cvisibility, "camera") ? PATH_RAY_CAMERA : 0;
- flag |= get_boolean(cvisibility, "diffuse") ? PATH_RAY_DIFFUSE : 0;
- flag |= get_boolean(cvisibility, "glossy") ? PATH_RAY_GLOSSY : 0;
- flag |= get_boolean(cvisibility, "transmission") ? PATH_RAY_TRANSMIT : 0;
- flag |= get_boolean(cvisibility, "shadow") ? PATH_RAY_SHADOW : 0;
- flag |= get_boolean(cvisibility, "scatter") ? PATH_RAY_VOLUME_SCATTER : 0;
-
- return flag;
-}
-
-/* Light */
-
-void BlenderSync::sync_light(BL::Object &b_parent,
- int persistent_id[OBJECT_PERSISTENT_ID_SIZE],
- BL::Object &b_ob,
- BL::Object &b_ob_instance,
- int random_id,
- Transform &tfm,
- bool *use_portal)
-{
- /* test if we need to sync */
- Light *light;
- ObjectKey key(b_parent, persistent_id, b_ob_instance);
-
- if (!light_map.sync(&light, b_ob, b_parent, key)) {
- if (light->is_portal)
- *use_portal = true;
- return;
- }
-
- BL::Light b_light(b_ob.data());
-
- /* type */
- switch (b_light.type()) {
- case BL::Light::type_POINT: {
- BL::PointLight b_point_light(b_light);
- light->size = b_point_light.shadow_soft_size();
- light->type = LIGHT_POINT;
- break;
- }
- case BL::Light::type_SPOT: {
- BL::SpotLight b_spot_light(b_light);
- light->size = b_spot_light.shadow_soft_size();
- light->type = LIGHT_SPOT;
- light->spot_angle = b_spot_light.spot_size();
- light->spot_smooth = b_spot_light.spot_blend();
- break;
- }
- /* Hemi were removed from 2.8 */
- // case BL::Light::type_HEMI: {
- // light->type = LIGHT_DISTANT;
- // light->size = 0.0f;
- // break;
- // }
- case BL::Light::type_SUN: {
- BL::SunLight b_sun_light(b_light);
- light->size = b_sun_light.shadow_soft_size();
- light->type = LIGHT_DISTANT;
- break;
- }
- case BL::Light::type_AREA: {
- BL::AreaLight b_area_light(b_light);
- light->size = 1.0f;
- light->axisu = transform_get_column(&tfm, 0);
- light->axisv = transform_get_column(&tfm, 1);
- light->sizeu = b_area_light.size();
- switch (b_area_light.shape()) {
- case BL::AreaLight::shape_SQUARE:
- light->sizev = light->sizeu;
- light->round = false;
- break;
- case BL::AreaLight::shape_RECTANGLE:
- light->sizev = b_area_light.size_y();
- light->round = false;
- break;
- case BL::AreaLight::shape_DISK:
- light->sizev = light->sizeu;
- light->round = true;
- break;
- case BL::AreaLight::shape_ELLIPSE:
- light->sizev = b_area_light.size_y();
- light->round = true;
- break;
- }
- light->type = LIGHT_AREA;
- break;
- }
- }
-
- /* location and (inverted!) direction */
- light->co = transform_get_column(&tfm, 3);
- light->dir = -transform_get_column(&tfm, 2);
- light->tfm = tfm;
-
- /* shader */
- vector<Shader *> used_shaders;
- find_shader(b_light, used_shaders, scene->default_light);
- light->shader = used_shaders[0];
-
- /* shadow */
- PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
- PointerRNA clight = RNA_pointer_get(&b_light.ptr, "cycles");
- light->cast_shadow = get_boolean(clight, "cast_shadow");
- light->use_mis = get_boolean(clight, "use_multiple_importance_sampling");
-
- int samples = get_int(clight, "samples");
- if (get_boolean(cscene, "use_square_samples"))
- light->samples = samples * samples;
- else
- light->samples = samples;
-
- light->max_bounces = get_int(clight, "max_bounces");
-
- if (b_ob != b_ob_instance) {
- light->random_id = random_id;
- }
- else {
- light->random_id = hash_int_2d(hash_string(b_ob.name().c_str()), 0);
- }
-
- if (light->type == LIGHT_AREA)
- light->is_portal = get_boolean(clight, "is_portal");
- else
- light->is_portal = false;
-
- if (light->is_portal)
- *use_portal = true;
-
- /* visibility */
- uint visibility = object_ray_visibility(b_ob);
- light->use_diffuse = (visibility & PATH_RAY_DIFFUSE) != 0;
- light->use_glossy = (visibility & PATH_RAY_GLOSSY) != 0;
- light->use_transmission = (visibility & PATH_RAY_TRANSMIT) != 0;
- light->use_scatter = (visibility & PATH_RAY_VOLUME_SCATTER) != 0;
-
- /* tag */
- light->tag_update(scene);
-}
-
-void BlenderSync::sync_background_light(bool use_portal)
-{
- BL::World b_world = b_scene.world();
-
- if (b_world) {
- PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
- PointerRNA cworld = RNA_pointer_get(&b_world.ptr, "cycles");
-
- enum SamplingMethod { SAMPLING_NONE = 0, SAMPLING_AUTOMATIC, SAMPLING_MANUAL, SAMPLING_NUM };
- int sampling_method = get_enum(cworld, "sampling_method", SAMPLING_NUM, SAMPLING_AUTOMATIC);
- bool sample_as_light = (sampling_method != SAMPLING_NONE);
-
- if (sample_as_light || use_portal) {
- /* test if we need to sync */
- Light *light;
- ObjectKey key(b_world, 0, b_world);
-
- if (light_map.sync(&light, b_world, b_world, key) || world_recalc ||
- b_world.ptr.data != world_map) {
- light->type = LIGHT_BACKGROUND;
- if (sampling_method == SAMPLING_MANUAL) {
- light->map_resolution = get_int(cworld, "sample_map_resolution");
- }
- else {
- light->map_resolution = 0;
- }
- light->shader = scene->default_background;
- light->use_mis = sample_as_light;
- light->max_bounces = get_int(cworld, "max_bounces");
-
- int samples = get_int(cworld, "samples");
- if (get_boolean(cscene, "use_square_samples"))
- light->samples = samples * samples;
- else
- light->samples = samples;
-
- light->tag_update(scene);
- light_map.set_recalc(b_world);
- }
- }
- }
-
- world_map = b_world.ptr.data;
- world_recalc = false;
-}
-
/* Object */
Object *BlenderSync::sync_object(BL::Depsgraph &b_depsgraph,
BL::ViewLayer &b_view_layer,
BL::DepsgraphObjectInstance &b_instance,
float motion_time,
- bool show_self,
- bool show_particles,
+ bool use_particle_hair,
+ bool show_lights,
BlenderObjectCulling &culling,
bool *use_portal)
{
@@ -304,11 +120,14 @@ Object *BlenderSync::sync_object(BL::Depsgraph &b_depsgraph,
/* light is handled separately */
if (!motion && object_is_light(b_ob)) {
+ if (!show_lights) {
+ return NULL;
+ }
+
/* TODO: don't use lights for excluded layers used as mask layer,
* when dynamic overrides are back. */
#if 0
- if(!((layer_flag & view_layer.holdout_layer) &&
- (layer_flag & view_layer.exclude_layer)))
+ if (!((layer_flag & view_layer.holdout_layer) && (layer_flag & view_layer.exclude_layer)))
#endif
{
sync_light(b_parent,
@@ -324,7 +143,7 @@ Object *BlenderSync::sync_object(BL::Depsgraph &b_depsgraph,
}
/* only interested in object that we can create meshes from */
- if (!object_is_mesh(b_ob)) {
+ if (!object_is_geometry(b_ob)) {
return NULL;
}
@@ -345,13 +164,14 @@ Object *BlenderSync::sync_object(BL::Depsgraph &b_depsgraph,
/* TODO: make holdout objects on excluded layer invisible for non-camera rays. */
#if 0
- if(use_holdout && (layer_flag & view_layer.exclude_layer)) {
+ if (use_holdout && (layer_flag & view_layer.exclude_layer)) {
visibility &= ~(PATH_RAY_ALL_VISIBILITY - PATH_RAY_CAMERA);
}
#endif
/* Clear camera visibility for indirect only objects. */
- bool use_indirect_only = b_parent.indirect_only_get(PointerRNA_NULL, b_view_layer);
+ bool use_indirect_only = !use_holdout &&
+ b_parent.indirect_only_get(PointerRNA_NULL, b_view_layer);
if (use_indirect_only) {
visibility &= ~PATH_RAY_CAMERA;
}
@@ -362,7 +182,7 @@ Object *BlenderSync::sync_object(BL::Depsgraph &b_depsgraph,
}
/* key to lookup object */
- ObjectKey key(b_parent, persistent_id, b_ob_instance);
+ ObjectKey key(b_parent, persistent_id, b_ob_instance, use_particle_hair);
Object *object;
/* motion vector case */
@@ -377,8 +197,8 @@ Object *BlenderSync::sync_object(BL::Depsgraph &b_depsgraph,
}
/* mesh deformation */
- if (object->mesh)
- sync_mesh_motion(b_depsgraph, b_ob, object, motion_time);
+ if (object->geometry)
+ sync_geometry_motion(b_depsgraph, b_ob, object, motion_time, use_particle_hair);
}
return object;
@@ -387,12 +207,12 @@ Object *BlenderSync::sync_object(BL::Depsgraph &b_depsgraph,
/* test if we need to sync */
bool object_updated = false;
- if (object_map.sync(&object, b_ob, b_parent, key))
+ if (object_map.add_or_update(&object, b_ob, b_parent, key))
object_updated = true;
/* mesh sync */
- object->mesh = sync_mesh(
- b_depsgraph, b_ob, b_ob_instance, object_updated, show_self, show_particles);
+ object->geometry = sync_geometry(
+ b_depsgraph, b_ob, b_ob_instance, object_updated, use_particle_hair);
/* special case not tracked by object update flags */
@@ -414,6 +234,12 @@ Object *BlenderSync::sync_object(BL::Depsgraph &b_depsgraph,
object_updated = true;
}
+ float shadow_terminator_offset = get_float(cobject, "shadow_terminator_offset");
+ if (shadow_terminator_offset != object->shadow_terminator_offset) {
+ object->shadow_terminator_offset = shadow_terminator_offset;
+ object_updated = true;
+ }
+
/* sync the asset name for Cryptomatte */
BL::Object parent = b_ob.parent();
ustring parent_name;
@@ -434,31 +260,33 @@ Object *BlenderSync::sync_object(BL::Depsgraph &b_depsgraph,
/* object sync
* transform comparison should not be needed, but duplis don't work perfect
* in the depsgraph and may not signal changes, so this is a workaround */
- if (object_updated || (object->mesh && object->mesh->need_update) || tfm != object->tfm) {
+ if (object_updated || (object->geometry && object->geometry->need_update) ||
+ tfm != object->tfm) {
object->name = b_ob.name().c_str();
object->pass_id = b_ob.pass_index();
+ object->color = get_float3(b_ob.color());
object->tfm = tfm;
object->motion.clear();
/* motion blur */
Scene::MotionType need_motion = scene->need_motion();
- if (need_motion != Scene::MOTION_NONE && object->mesh) {
- Mesh *mesh = object->mesh;
- mesh->use_motion_blur = false;
- mesh->motion_steps = 0;
+ if (need_motion != Scene::MOTION_NONE && object->geometry) {
+ Geometry *geom = object->geometry;
+ geom->use_motion_blur = false;
+ geom->motion_steps = 0;
uint motion_steps;
if (need_motion == Scene::MOTION_BLUR) {
- motion_steps = object_motion_steps(b_parent, b_ob);
- mesh->motion_steps = motion_steps;
+ motion_steps = object_motion_steps(b_parent, b_ob, Object::MAX_MOTION_STEPS);
+ geom->motion_steps = motion_steps;
if (motion_steps && object_use_deform_motion(b_parent, b_ob)) {
- mesh->use_motion_blur = true;
+ geom->use_motion_blur = true;
}
}
else {
motion_steps = 3;
- mesh->motion_steps = motion_steps;
+ geom->motion_steps = motion_steps;
}
object->motion.clear();
@@ -483,7 +311,7 @@ Object *BlenderSync::sync_object(BL::Depsgraph &b_depsgraph,
else {
object->dupli_generated = make_float3(0.0f, 0.0f, 0.0f);
object->dupli_uv = make_float2(0.0f, 0.0f);
- object->random_id = hash_int_2d(hash_string(object->name.c_str()), 0);
+ object->random_id = hash_uint2(hash_string(object->name.c_str()), 0);
}
object->tag_update(scene);
@@ -499,7 +327,9 @@ Object *BlenderSync::sync_object(BL::Depsgraph &b_depsgraph,
/* Object Loop */
-void BlenderSync::sync_objects(BL::Depsgraph &b_depsgraph, float motion_time)
+void BlenderSync::sync_objects(BL::Depsgraph &b_depsgraph,
+ BL::SpaceView3D &b_v3d,
+ float motion_time)
{
/* layer data */
bool motion = motion_time != 0.0f;
@@ -507,13 +337,13 @@ void BlenderSync::sync_objects(BL::Depsgraph &b_depsgraph, float motion_time)
if (!motion) {
/* prepare for sync */
light_map.pre_sync();
- mesh_map.pre_sync();
+ geometry_map.pre_sync();
object_map.pre_sync();
particle_system_map.pre_sync();
motion_times.clear();
}
else {
- mesh_motion_synced.clear();
+ geometry_motion_synced.clear();
}
/* initialize culling */
@@ -522,6 +352,7 @@ void BlenderSync::sync_objects(BL::Depsgraph &b_depsgraph, float motion_time)
/* object loop */
bool cancel = false;
bool use_portal = false;
+ const bool show_lights = BlenderViewportParameters(b_v3d).use_scene_lights;
BL::ViewLayer b_view_layer = b_depsgraph.view_layer_eval();
@@ -532,21 +363,35 @@ void BlenderSync::sync_objects(BL::Depsgraph &b_depsgraph, float motion_time)
BL::DepsgraphObjectInstance b_instance = *b_instance_iter;
BL::Object b_ob = b_instance.object();
- /* load per-object culling data */
+ /* Viewport visibility. */
+ const bool show_in_viewport = !b_v3d || b_ob.visible_in_viewport_get(b_v3d);
+ if (show_in_viewport == false) {
+ continue;
+ }
+
+ /* Load per-object culling data. */
culling.init_object(scene, b_ob);
- /* test if object needs to be hidden */
- const bool show_self = b_instance.show_self();
- const bool show_particles = b_instance.show_particles();
+ /* Object itself. */
+ if (b_instance.show_self()) {
+ sync_object(b_depsgraph,
+ b_view_layer,
+ b_instance,
+ motion_time,
+ false,
+ show_lights,
+ culling,
+ &use_portal);
+ }
- if (show_self || show_particles) {
- /* object itself */
+ /* Particle hair as separate object. */
+ if (b_instance.show_particles() && object_has_particle_hair(b_ob)) {
sync_object(b_depsgraph,
b_view_layer,
b_instance,
motion_time,
- show_self,
- show_particles,
+ true,
+ show_lights,
culling,
&use_portal);
}
@@ -557,13 +402,13 @@ void BlenderSync::sync_objects(BL::Depsgraph &b_depsgraph, float motion_time)
progress.set_sync_status("");
if (!cancel && !motion) {
- sync_background_light(use_portal);
+ sync_background_light(b_v3d, use_portal);
/* handle removed data and modified pointers */
if (light_map.post_sync())
scene->light_manager->tag_update(scene);
- if (mesh_map.post_sync())
- scene->mesh_manager->tag_update(scene);
+ if (geometry_map.post_sync())
+ scene->geometry_manager->tag_update(scene);
if (object_map.post_sync())
scene->object_manager->tag_update(scene);
if (particle_system_map.post_sync())
@@ -571,11 +416,12 @@ void BlenderSync::sync_objects(BL::Depsgraph &b_depsgraph, float motion_time)
}
if (motion)
- mesh_motion_synced.clear();
+ geometry_motion_synced.clear();
}
void BlenderSync::sync_motion(BL::RenderSettings &b_render,
BL::Depsgraph &b_depsgraph,
+ BL::SpaceView3D &b_v3d,
BL::Object &b_override,
int width,
int height,
@@ -613,12 +459,15 @@ void BlenderSync::sync_motion(BL::RenderSettings &b_render,
b_engine.frame_set(frame, subframe);
python_thread_state_save(python_thread_state);
sync_camera_motion(b_render, b_cam, width, height, 0.0f);
- sync_objects(b_depsgraph, 0.0f);
+ sync_objects(b_depsgraph, b_v3d, 0.0f);
}
- /* always sample these times for camera motion */
- motion_times.insert(-1.0f);
- motion_times.insert(1.0f);
+ /* Insert motion times from camera. Motion times from other objects
+ * have already been added in a sync_objects call. */
+ uint camera_motion_steps = object_motion_steps(b_cam, b_cam);
+ for (size_t step = 0; step < camera_motion_steps; step++) {
+ motion_times.insert(scene->camera->motion_time(step));
+ }
/* note iteration over motion_times set happens in sorted order */
foreach (float relative_time, motion_times) {
@@ -643,13 +492,11 @@ void BlenderSync::sync_motion(BL::RenderSettings &b_render,
b_engine.frame_set(frame, subframe);
python_thread_state_save(python_thread_state);
- /* sync camera, only supports two times at the moment */
- if (relative_time == -1.0f || relative_time == 1.0f) {
- sync_camera_motion(b_render, b_cam, width, height, relative_time);
- }
+ /* Syncs camera motion if relative_time is one of the camera's motion times. */
+ sync_camera_motion(b_render, b_cam, width, height, relative_time);
/* sync object */
- sync_objects(b_depsgraph, relative_time);
+ sync_objects(b_depsgraph, b_v3d, relative_time);
}
/* we need to set the python thread state again because this
diff --git a/intern/cycles/blender/blender_object_cull.cpp b/intern/cycles/blender/blender_object_cull.cpp
index 74f8fb1dc53..bebecb364eb 100644
--- a/intern/cycles/blender/blender_object_cull.cpp
+++ b/intern/cycles/blender/blender_object_cull.cpp
@@ -19,6 +19,7 @@
#include "render/camera.h"
#include "blender/blender_object_cull.h"
+#include "blender/blender_util.h"
CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/blender/blender_particles.cpp b/intern/cycles/blender/blender_particles.cpp
index d74f132ed60..e5eab1ae62b 100644
--- a/intern/cycles/blender/blender_particles.cpp
+++ b/intern/cycles/blender/blender_particles.cpp
@@ -39,7 +39,7 @@ bool BlenderSync::sync_dupli_particle(BL::Object &b_ob,
object->hide_on_missing_motion = true;
/* test if we need particle data */
- if (!object->mesh->need_attribute(scene, ATTR_STD_PARTICLE))
+ if (!object->geometry->need_attribute(scene, ATTR_STD_PARTICLE))
return false;
/* don't handle child particles yet */
@@ -53,10 +53,10 @@ bool BlenderSync::sync_dupli_particle(BL::Object &b_ob,
ParticleSystem *psys;
bool first_use = !particle_system_map.is_used(key);
- bool need_update = particle_system_map.sync(&psys, b_ob, b_instance.object(), key);
+ bool need_update = particle_system_map.add_or_update(&psys, b_ob, b_instance.object(), key);
/* no update needed? */
- if (!need_update && !object->mesh->need_update && !scene->object_manager->need_update)
+ if (!need_update && !object->geometry->need_update && !scene->object_manager->need_update)
return true;
/* first time used in this sync loop? clear and tag update */
diff --git a/intern/cycles/blender/blender_python.cpp b/intern/cycles/blender/blender_python.cpp
index ffd1c70a4e4..25c77b74ce3 100644
--- a/intern/cycles/blender/blender_python.cpp
+++ b/intern/cycles/blender/blender_python.cpp
@@ -19,8 +19,9 @@
#include "blender/CCL_api.h"
#include "blender/blender_device.h"
-#include "blender/blender_sync.h"
#include "blender/blender_session.h"
+#include "blender/blender_sync.h"
+#include "blender/blender_util.h"
#include "render/denoising.h"
#include "render/merge.h"
@@ -30,15 +31,17 @@
#include "util/util_logging.h"
#include "util/util_md5.h"
#include "util/util_opengl.h"
+#include "util/util_openimagedenoise.h"
#include "util/util_path.h"
#include "util/util_string.h"
+#include "util/util_task.h"
#include "util/util_types.h"
#ifdef WITH_OSL
# include "render/osl.h"
-# include <OSL/oslquery.h>
# include <OSL/oslconfig.h>
+# include <OSL/oslquery.h>
#endif
#ifdef WITH_OPENCL
@@ -59,6 +62,12 @@ void *pylong_as_voidptr_typesafe(PyObject *object)
return PyLong_AsVoidPtr(object);
}
+PyObject *pyunicode_from_string(const char *str)
+{
+ /* Ignore errors if device API returns invalid UTF-8 strings. */
+ return PyUnicode_DecodeUTF8(str, strlen(str), "ignore");
+}
+
/* Synchronize debug flags from a given Blender scene.
* Return truth when device list needs invalidation.
*/
@@ -81,6 +90,9 @@ bool debug_flags_sync_from_scene(BL::Scene b_scene)
/* Synchronize CUDA flags. */
flags.cuda.adaptive_compile = get_boolean(cscene, "debug_use_cuda_adaptive_compile");
flags.cuda.split_kernel = get_boolean(cscene, "debug_use_cuda_split_kernel");
+ /* Synchronize OptiX flags. */
+ flags.optix.cuda_streams = get_int(cscene, "debug_optix_cuda_streams");
+ flags.optix.curves_api = get_boolean(cscene, "debug_optix_curves_api");
/* Synchronize OpenCL device type. */
switch (get_enum(cscene, "debug_opencl_device_type")) {
case 0:
@@ -138,7 +150,7 @@ static const char *PyC_UnicodeAsByte(PyObject *py_str, PyObject **coerce)
const char *result = _PyUnicode_AsString(py_str);
if (result) {
/* 99% of the time this is enough but we better support non unicode
- * chars since blender doesnt limit this.
+ * chars since blender doesn't limit this.
*/
return result;
}
@@ -151,7 +163,7 @@ static const char *PyC_UnicodeAsByte(PyObject *py_str, PyObject **coerce)
return PyBytes_AS_STRING(*coerce);
}
else {
- /* Clear the error, so Cycles can be at leadt used without
+ /* Clear the error, so Cycles can be at least used without
* GPU and OSL support,
*/
PyErr_Clear();
@@ -177,6 +189,8 @@ static PyObject *init_func(PyObject * /*self*/, PyObject *args)
BlenderSession::headless = headless;
+ DebugFlags().running_inside_blender = true;
+
VLOG(2) << "Debug flags initialized to:\n" << DebugFlags();
Py_RETURN_NONE;
@@ -192,14 +206,15 @@ static PyObject *exit_func(PyObject * /*self*/, PyObject * /*args*/)
static PyObject *create_func(PyObject * /*self*/, PyObject *args)
{
- PyObject *pyengine, *pypreferences, *pydata, *pyregion, *pyv3d, *pyrv3d;
+ PyObject *pyengine, *pypreferences, *pydata, *pyscreen, *pyregion, *pyv3d, *pyrv3d;
int preview_osl;
if (!PyArg_ParseTuple(args,
- "OOOOOOi",
+ "OOOOOOOi",
&pyengine,
&pypreferences,
&pydata,
+ &pyscreen,
&pyregion,
&pyv3d,
&pyrv3d,
@@ -208,6 +223,8 @@ static PyObject *create_func(PyObject * /*self*/, PyObject *args)
}
/* RNA */
+ ID *bScreen = (ID *)PyLong_AsVoidPtr(pyscreen);
+
PointerRNA engineptr;
RNA_pointer_create(NULL, &RNA_RenderEngine, (void *)PyLong_AsVoidPtr(pyengine), &engineptr);
BL::RenderEngine engine(engineptr);
@@ -222,15 +239,15 @@ static PyObject *create_func(PyObject * /*self*/, PyObject *args)
BL::BlendData data(dataptr);
PointerRNA regionptr;
- RNA_pointer_create(NULL, &RNA_Region, pylong_as_voidptr_typesafe(pyregion), &regionptr);
+ RNA_pointer_create(bScreen, &RNA_Region, pylong_as_voidptr_typesafe(pyregion), &regionptr);
BL::Region region(regionptr);
PointerRNA v3dptr;
- RNA_pointer_create(NULL, &RNA_SpaceView3D, pylong_as_voidptr_typesafe(pyv3d), &v3dptr);
+ RNA_pointer_create(bScreen, &RNA_SpaceView3D, pylong_as_voidptr_typesafe(pyv3d), &v3dptr);
BL::SpaceView3D v3d(v3dptr);
PointerRNA rv3dptr;
- RNA_pointer_create(NULL, &RNA_RegionView3D, pylong_as_voidptr_typesafe(pyrv3d), &rv3dptr);
+ RNA_pointer_create(bScreen, &RNA_RegionView3D, pylong_as_voidptr_typesafe(pyrv3d), &rv3dptr);
BL::RegionView3D rv3d(rv3dptr);
/* create session */
@@ -284,22 +301,18 @@ static PyObject *render_func(PyObject * /*self*/, PyObject *args)
static PyObject *bake_func(PyObject * /*self*/, PyObject *args)
{
PyObject *pysession, *pydepsgraph, *pyobject;
- PyObject *pypixel_array, *pyresult;
const char *pass_type;
- int num_pixels, depth, object_id, pass_filter;
+ int pass_filter, width, height;
if (!PyArg_ParseTuple(args,
- "OOOsiiOiiO",
+ "OOOsiii",
&pysession,
&pydepsgraph,
&pyobject,
&pass_type,
&pass_filter,
- &object_id,
- &pypixel_array,
- &num_pixels,
- &depth,
- &pyresult))
+ &width,
+ &height))
return NULL;
BlenderSession *session = (BlenderSession *)PyLong_AsVoidPtr(pysession);
@@ -312,23 +325,9 @@ static PyObject *bake_func(PyObject * /*self*/, PyObject *args)
RNA_id_pointer_create((ID *)PyLong_AsVoidPtr(pyobject), &objectptr);
BL::Object b_object(objectptr);
- void *b_result = PyLong_AsVoidPtr(pyresult);
-
- PointerRNA bakepixelptr;
- RNA_pointer_create(NULL, &RNA_BakePixel, PyLong_AsVoidPtr(pypixel_array), &bakepixelptr);
- BL::BakePixel b_bake_pixel(bakepixelptr);
-
python_thread_state_save(&session->python_thread_state);
- session->bake(b_depsgraph,
- b_object,
- pass_type,
- pass_filter,
- object_id,
- b_bake_pixel,
- (size_t)num_pixels,
- depth,
- (float *)b_result);
+ session->bake(b_depsgraph, b_object, pass_type, pass_filter, width, height);
python_thread_state_restore(&session->python_thread_state);
@@ -420,10 +419,11 @@ static PyObject *available_devices_func(PyObject * /*self*/, PyObject *args)
for (size_t i = 0; i < devices.size(); i++) {
DeviceInfo &device = devices[i];
string type_name = Device::string_from_type(device.type);
- PyObject *device_tuple = PyTuple_New(3);
- PyTuple_SET_ITEM(device_tuple, 0, PyUnicode_FromString(device.description.c_str()));
- PyTuple_SET_ITEM(device_tuple, 1, PyUnicode_FromString(type_name.c_str()));
- PyTuple_SET_ITEM(device_tuple, 2, PyUnicode_FromString(device.id.c_str()));
+ PyObject *device_tuple = PyTuple_New(4);
+ PyTuple_SET_ITEM(device_tuple, 0, pyunicode_from_string(device.description.c_str()));
+ PyTuple_SET_ITEM(device_tuple, 1, pyunicode_from_string(type_name.c_str()));
+ PyTuple_SET_ITEM(device_tuple, 2, pyunicode_from_string(device.id.c_str()));
+ PyTuple_SET_ITEM(device_tuple, 3, PyBool_FromLong(device.has_peer_memory));
PyTuple_SET_ITEM(ret, i, device_tuple);
}
@@ -634,7 +634,7 @@ static PyObject *osl_compile_func(PyObject * /*self*/, PyObject *args)
static PyObject *system_info_func(PyObject * /*self*/, PyObject * /*value*/)
{
string system_info = Device::device_capabilities();
- return PyUnicode_FromString(system_info.c_str());
+ return pyunicode_from_string(system_info.c_str());
}
#ifdef WITH_OPENCL
@@ -937,6 +937,15 @@ static PyObject *set_resumable_chunk_range_func(PyObject * /*self*/, PyObject *a
Py_RETURN_NONE;
}
+static PyObject *clear_resumable_chunk_func(PyObject * /*self*/, PyObject * /*value*/)
+{
+ VLOG(1) << "Clear resumable render";
+ BlenderSession::num_resumable_chunks = 0;
+ BlenderSession::current_resumable_chunk = 0;
+
+ Py_RETURN_NONE;
+}
+
static PyObject *enable_print_stats_func(PyObject * /*self*/, PyObject * /*args*/)
{
BlenderSession::print_render_stats = true;
@@ -946,14 +955,16 @@ static PyObject *enable_print_stats_func(PyObject * /*self*/, PyObject * /*args*
static PyObject *get_device_types_func(PyObject * /*self*/, PyObject * /*args*/)
{
vector<DeviceType> device_types = Device::available_types();
- bool has_cuda = false, has_opencl = false;
+ bool has_cuda = false, has_optix = false, has_opencl = false;
foreach (DeviceType device_type, device_types) {
has_cuda |= (device_type == DEVICE_CUDA);
+ has_optix |= (device_type == DEVICE_OPTIX);
has_opencl |= (device_type == DEVICE_OPENCL);
}
- PyObject *list = PyTuple_New(2);
+ PyObject *list = PyTuple_New(3);
PyTuple_SET_ITEM(list, 0, PyBool_FromLong(has_cuda));
- PyTuple_SET_ITEM(list, 1, PyBool_FromLong(has_opencl));
+ PyTuple_SET_ITEM(list, 1, PyBool_FromLong(has_optix));
+ PyTuple_SET_ITEM(list, 2, PyBool_FromLong(has_opencl));
return list;
}
@@ -992,6 +1003,7 @@ static PyMethodDef methods[] = {
/* Resumable render */
{"set_resumable_chunk", set_resumable_chunk_func, METH_VARARGS, ""},
{"set_resumable_chunk_range", set_resumable_chunk_range_func, METH_VARARGS, ""},
+ {"clear_resumable_chunk", clear_resumable_chunk_func, METH_NOARGS, ""},
/* Compute Device selection */
{"get_device_types", get_device_types_func, METH_VARARGS, ""},
@@ -1066,5 +1078,14 @@ void *CCL_python_module_init()
Py_INCREF(Py_False);
#endif /* WITH_EMBREE */
+ if (ccl::openimagedenoise_supported()) {
+ PyModule_AddObject(mod, "with_openimagedenoise", Py_True);
+ Py_INCREF(Py_True);
+ }
+ else {
+ PyModule_AddObject(mod, "with_openimagedenoise", Py_False);
+ Py_INCREF(Py_False);
+ }
+
return (void *)mod;
}
diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp
index 29a97bf6546..391a1b8f473 100644
--- a/intern/cycles/blender/blender_session.cpp
+++ b/intern/cycles/blender/blender_session.cpp
@@ -16,12 +16,13 @@
#include <stdlib.h>
+#include "device/device.h"
#include "render/background.h"
#include "render/buffers.h"
#include "render/camera.h"
-#include "device/device.h"
-#include "render/integrator.h"
+#include "render/colorspace.h"
#include "render/film.h"
+#include "render/integrator.h"
#include "render/light.h"
#include "render/mesh.h"
#include "render/object.h"
@@ -40,8 +41,8 @@
#include "util/util_progress.h"
#include "util/util_time.h"
-#include "blender/blender_sync.h"
#include "blender/blender_session.h"
+#include "blender/blender_sync.h"
#include "blender/blender_util.h"
CCL_NAMESPACE_BEGIN
@@ -113,11 +114,6 @@ BlenderSession::~BlenderSession()
free_session();
}
-void BlenderSession::create()
-{
- create_session();
-}
-
void BlenderSession::create_session()
{
SessionParams session_params = BlenderSync::get_session_params(
@@ -142,21 +138,13 @@ void BlenderSession::create_session()
scene = new Scene(scene_params, session->device);
scene->name = b_scene.name();
- /* setup callbacks for builtin image support */
- scene->image_manager->builtin_image_info_cb = function_bind(
- &BlenderSession::builtin_image_info, this, _1, _2, _3);
- scene->image_manager->builtin_image_pixels_cb = function_bind(
- &BlenderSession::builtin_image_pixels, this, _1, _2, _3, _4, _5);
- scene->image_manager->builtin_image_float_pixels_cb = function_bind(
- &BlenderSession::builtin_image_float_pixels, this, _1, _2, _3, _4, _5);
-
session->scene = scene;
/* There is no single depsgraph to use for the entire render.
* So we need to handle this differently.
*
- * We could loop over the final render result render layers in pipeline and keep Cycles unaware of multiple layers,
- * or perhaps move syncing further down in the pipeline.
+ * We could loop over the final render result render layers in pipeline and keep Cycles unaware
+ * of multiple layers, or perhaps move syncing further down in the pipeline.
*/
/* create sync */
sync = new BlenderSync(b_engine, b_data, b_scene, scene, !background, session->progress);
@@ -170,7 +158,7 @@ void BlenderSession::create_session()
/* set buffer parameters */
BufferParams buffer_params = BlenderSync::get_buffer_params(
- b_render, b_v3d, b_rv3d, scene->camera, width, height);
+ b_render, b_v3d, b_rv3d, scene->camera, width, height, session_params.denoising.use);
session->reset(buffer_params, session_params.samples);
b_engine.use_highlight_tiles(session_params.progressive_refine == false);
@@ -180,9 +168,13 @@ void BlenderSession::create_session()
void BlenderSession::reset_session(BL::BlendData &b_data, BL::Depsgraph &b_depsgraph)
{
+ /* Update data, scene and depsgraph pointers. These can change after undo. */
this->b_data = b_data;
this->b_depsgraph = b_depsgraph;
this->b_scene = b_depsgraph.scene_eval();
+ if (sync) {
+ sync->reset(this->b_data, this->b_scene);
+ }
if (preview_osl) {
PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
@@ -198,8 +190,12 @@ void BlenderSession::reset_session(BL::BlendData &b_data, BL::Depsgraph &b_depsg
height = render_resolution_y(b_render);
}
- if (session == NULL) {
- create();
+ bool is_new_session = (session == NULL);
+ if (is_new_session) {
+ /* Initialize session and remember it was just created so not to
+ * re-create it below.
+ */
+ create_session();
}
if (b_v3d) {
@@ -218,8 +214,10 @@ void BlenderSession::reset_session(BL::BlendData &b_data, BL::Depsgraph &b_depsg
/* if scene or session parameters changed, it's easier to simply re-create
* them rather than trying to distinguish which settings need to be updated
*/
- free_session();
- create_session();
+ if (!is_new_session) {
+ free_session();
+ create_session();
+ }
return;
}
@@ -241,8 +239,13 @@ void BlenderSession::reset_session(BL::BlendData &b_data, BL::Depsgraph &b_depsg
BL::SpaceView3D b_null_space_view3d(PointerRNA_NULL);
BL::RegionView3D b_null_region_view3d(PointerRNA_NULL);
- BufferParams buffer_params = BlenderSync::get_buffer_params(
- b_render, b_null_space_view3d, b_null_region_view3d, scene->camera, width, height);
+ BufferParams buffer_params = BlenderSync::get_buffer_params(b_render,
+ b_null_space_view3d,
+ b_null_region_view3d,
+ scene->camera,
+ width,
+ height,
+ session_params.denoising.use);
session->reset(buffer_params, session_params.samples);
b_engine.use_highlight_tiles(session_params.progressive_refine == false);
@@ -253,9 +256,7 @@ void BlenderSession::reset_session(BL::BlendData &b_data, BL::Depsgraph &b_depsg
void BlenderSession::free_session()
{
- if (sync)
- delete sync;
-
+ delete sync;
delete session;
}
@@ -276,8 +277,6 @@ static ShaderEvalType get_shader_type(const string &pass_type)
return SHADER_EVAL_GLOSSY_COLOR;
else if (strcmp(shader_type, "TRANSMISSION_COLOR") == 0)
return SHADER_EVAL_TRANSMISSION_COLOR;
- else if (strcmp(shader_type, "SUBSURFACE_COLOR") == 0)
- return SHADER_EVAL_SUBSURFACE_COLOR;
else if (strcmp(shader_type, "EMIT") == 0)
return SHADER_EVAL_EMISSION;
@@ -294,8 +293,6 @@ static ShaderEvalType get_shader_type(const string &pass_type)
return SHADER_EVAL_GLOSSY;
else if (strcmp(shader_type, "TRANSMISSION") == 0)
return SHADER_EVAL_TRANSMISSION;
- else if (strcmp(shader_type, "SUBSURFACE") == 0)
- return SHADER_EVAL_SUBSURFACE;
/* extra */
else if (strcmp(shader_type, "ENVIRONMENT") == 0)
@@ -327,6 +324,7 @@ static void end_render_result(BL::RenderEngine &b_engine,
void BlenderSession::do_write_update_render_tile(RenderTile &rtile,
bool do_update_only,
+ bool do_read_only,
bool highlight)
{
int x = rtile.x - session->tile_manager.params.full_x;
@@ -352,7 +350,23 @@ void BlenderSession::do_write_update_render_tile(RenderTile &rtile,
BL::RenderLayer b_rlay = *b_single_rlay;
- if (do_update_only) {
+ if (do_read_only) {
+ /* copy each pass */
+ BL::RenderLayer::passes_iterator b_iter;
+
+ for (b_rlay.passes.begin(b_iter); b_iter != b_rlay.passes.end(); ++b_iter) {
+ BL::RenderPass b_pass(*b_iter);
+
+ /* find matching pass type */
+ PassType pass_type = BlenderSync::get_pass_type(b_pass);
+ int components = b_pass.channels();
+
+ rtile.buffers->set_pass_rect(pass_type, components, (float *)b_pass.rect());
+ }
+
+ end_render_result(b_engine, b_rr, false, false, false);
+ }
+ else if (do_update_only) {
/* Sample would be zero at initial tile update, which is only needed
* to tag tile form blender side as IN PROGRESS for proper highlight
* no buffers should be sent to blender yet. For denoise we also
@@ -360,21 +374,26 @@ void BlenderSession::do_write_update_render_tile(RenderTile &rtile,
bool merge = (rtile.sample != 0) && (rtile.task != RenderTile::DENOISE);
if (merge) {
- update_render_result(b_rr, b_rlay, rtile);
+ update_render_result(b_rlay, rtile);
}
end_render_result(b_engine, b_rr, true, highlight, merge);
}
else {
/* Write final render result. */
- write_render_result(b_rr, b_rlay, rtile);
+ write_render_result(b_rlay, rtile);
end_render_result(b_engine, b_rr, false, false, true);
}
}
+void BlenderSession::read_render_tile(RenderTile &rtile)
+{
+ do_write_update_render_tile(rtile, false, true, false);
+}
+
void BlenderSession::write_render_tile(RenderTile &rtile)
{
- do_write_update_render_tile(rtile, false, false);
+ do_write_update_render_tile(rtile, false, false, false);
}
void BlenderSession::update_render_tile(RenderTile &rtile, bool highlight)
@@ -384,9 +403,9 @@ void BlenderSession::update_render_tile(RenderTile &rtile, bool highlight)
* would need to be investigated a bit further, but for now shall be fine
*/
if (!b_engine.is_preview())
- do_write_update_render_tile(rtile, true, highlight);
+ do_write_update_render_tile(rtile, true, false, highlight);
else
- do_write_update_render_tile(rtile, false, false);
+ do_write_update_render_tile(rtile, false, false, false);
}
static void add_cryptomatte_layer(BL::RenderResult &b_rr, string name, string manifest)
@@ -454,14 +473,13 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_)
session->update_render_tile_cb = function_bind(
&BlenderSession::update_render_tile, this, _1, _2);
+ BL::ViewLayer b_view_layer = b_depsgraph.view_layer_eval();
+
/* get buffer parameters */
SessionParams session_params = BlenderSync::get_session_params(
- b_engine, b_userpref, b_scene, background);
+ b_engine, b_userpref, b_scene, background, b_view_layer);
BufferParams buffer_params = BlenderSync::get_buffer_params(
- b_render, b_v3d, b_rv3d, scene->camera, width, height);
-
- /* render each layer */
- BL::ViewLayer b_view_layer = b_depsgraph.view_layer_eval();
+ b_render, b_v3d, b_rv3d, scene->camera, width, height, session_params.denoising.use);
/* temporary render result to find needed passes and views */
BL::RenderResult b_rr = begin_render_result(
@@ -471,33 +489,26 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_)
BL::RenderLayer b_rlay = *b_single_rlay;
b_rlay_name = b_view_layer.name();
- /* add passes */
- vector<Pass> passes = sync->sync_render_passes(b_rlay, b_view_layer);
- buffer_params.passes = passes;
-
- PointerRNA crl = RNA_pointer_get(&b_view_layer.ptr, "cycles");
- bool full_denoising = get_boolean(crl, "use_denoising");
- bool write_denoising_passes = get_boolean(crl, "denoising_store_passes");
+ /* Update denoising parameters. */
+ session->set_denoising(session_params.denoising);
- bool run_denoising = full_denoising || write_denoising_passes;
+ bool use_denoising = session_params.denoising.use;
+ bool store_denoising_passes = session_params.denoising.store_passes;
- session->tile_manager.schedule_denoising = run_denoising;
- buffer_params.denoising_data_pass = run_denoising;
+ buffer_params.denoising_data_pass = use_denoising || store_denoising_passes;
buffer_params.denoising_clean_pass = (scene->film->denoising_flags & DENOISING_CLEAN_ALL_PASSES);
- buffer_params.denoising_prefiltered_pass = write_denoising_passes;
-
- session->params.run_denoising = run_denoising;
- session->params.full_denoising = full_denoising;
- session->params.write_denoising_passes = write_denoising_passes;
- session->params.denoising.radius = get_int(crl, "denoising_radius");
- session->params.denoising.strength = get_float(crl, "denoising_strength");
- session->params.denoising.feature_strength = get_float(crl, "denoising_feature_strength");
- session->params.denoising.relative_pca = get_boolean(crl, "denoising_relative_pca");
+ buffer_params.denoising_prefiltered_pass = store_denoising_passes &&
+ session_params.denoising.type == DENOISER_NLM;
scene->film->denoising_data_pass = buffer_params.denoising_data_pass;
scene->film->denoising_clean_pass = buffer_params.denoising_clean_pass;
scene->film->denoising_prefiltered_pass = buffer_params.denoising_prefiltered_pass;
+ /* Add passes */
+ vector<Pass> passes = sync->sync_render_passes(
+ b_rlay, b_view_layer, session_params.adaptive_sampling, session_params.denoising);
+ buffer_params.passes = passes;
+
scene->film->pass_alpha_threshold = b_view_layer.pass_alpha_threshold();
scene->film->tag_passes_update(scene, passes);
scene->film->tag_update(scene);
@@ -526,19 +537,20 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_)
builtin_images_load();
/* Attempt to free all data which is held by Blender side, since at this
- * point we knwo that we've got everything to render current view layer.
+ * point we know that we've got everything to render current view layer.
*/
- /* At the moment we only free if we are not doing multi-view (or if we are rendering the last view).
- * See T58142/D4239 for discussion.
+ /* At the moment we only free if we are not doing multi-view
+ * (or if we are rendering the last view). See T58142/D4239 for discussion.
*/
if (view_index == num_views - 1) {
free_blender_memory_if_possible();
}
- /* Make sure all views have different noise patterns. - hardcoded value just to make it random */
+ /* Make sure all views have different noise patterns. - hardcoded value just to make it random
+ */
if (view_index != 0) {
- scene->integrator->seed += hash_int_2d(scene->integrator->seed,
- hash_int(view_index * 0xdeadbeef));
+ scene->integrator->seed += hash_uint2(scene->integrator->seed,
+ hash_uint2(view_index * 0xdeadbeef, 0));
scene->integrator->tag_update(scene);
}
@@ -600,25 +612,6 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_)
#endif
}
-static void populate_bake_data(BakeData *data,
- const int object_id,
- BL::BakePixel &pixel_array,
- const int num_pixels)
-{
- BL::BakePixel bp = pixel_array;
-
- int i;
- for (i = 0; i < num_pixels; i++) {
- if (bp.object_id() == object_id) {
- data->set(i, bp.primitive_id(), bp.uv(), bp.du_dx(), bp.du_dy(), bp.dv_dx(), bp.dv_dy());
- }
- else {
- data->set_null(i);
- }
- bp = bp.next();
- }
-}
-
static int bake_pass_filter_get(const int pass_filter)
{
int flag = BAKE_FILTER_NONE;
@@ -636,8 +629,6 @@ static int bake_pass_filter_get(const int pass_filter)
flag |= BAKE_FILTER_GLOSSY;
if ((pass_filter & BL::BakeSettings::pass_filter_TRANSMISSION) != 0)
flag |= BAKE_FILTER_TRANSMISSION;
- if ((pass_filter & BL::BakeSettings::pass_filter_SUBSURFACE) != 0)
- flag |= BAKE_FILTER_SUBSURFACE;
if ((pass_filter & BL::BakeSettings::pass_filter_EMIT) != 0)
flag |= BAKE_FILTER_EMISSION;
@@ -651,43 +642,26 @@ void BlenderSession::bake(BL::Depsgraph &b_depsgraph_,
BL::Object &b_object,
const string &pass_type,
const int pass_filter,
- const int object_id,
- BL::BakePixel &pixel_array,
- const size_t num_pixels,
- const int /*depth*/,
- float result[])
+ const int bake_width,
+ const int bake_height)
{
b_depsgraph = b_depsgraph_;
ShaderEvalType shader_type = get_shader_type(pass_type);
-
- /* Set baking flag in advance, so kernel loading can check if we need
- * any baking capabilities.
- */
- scene->bake_manager->set_baking(true);
-
- /* ensure kernels are loaded before we do any scene updates */
- session->load_kernels();
-
- if (shader_type == SHADER_EVAL_UV) {
- /* force UV to be available */
- Pass::add(PASS_UV, scene->film->passes);
- }
-
int bake_pass_filter = bake_pass_filter_get(pass_filter);
- bake_pass_filter = BakeManager::shader_type_to_pass_filter(shader_type, bake_pass_filter);
- /* force use_light_pass to be true if we bake more than just colors */
- if (bake_pass_filter & ~BAKE_FILTER_COLOR) {
- Pass::add(PASS_LIGHT, scene->film->passes);
- }
+ /* Initialize bake manager, before we load the baking kernels. */
+ scene->bake_manager->set(scene, b_object.name(), shader_type, bake_pass_filter);
- /* create device and update scene */
- scene->film->tag_update(scene);
- scene->integrator->tag_update(scene);
+ /* Passes are identified by name, so in order to return the combined pass we need to set the
+ * name. */
+ Pass::add(PASS_COMBINED, scene->film->passes, "Combined");
+
+ session->read_bake_tile_cb = function_bind(&BlenderSession::read_render_tile, this, _1);
+ session->write_render_tile_cb = function_bind(&BlenderSession::write_render_tile, this, _1);
if (!session->progress.get_cancel()) {
- /* update scene */
+ /* Sync scene. */
BL::Object b_camera_override(b_engine.camera_override());
sync->sync_camera(b_render, b_camera_override, width, height, "");
sync->sync_data(
@@ -695,76 +669,46 @@ void BlenderSession::bake(BL::Depsgraph &b_depsgraph_,
builtin_images_load();
}
- BakeData *bake_data = NULL;
+ /* Object might have been disabled for rendering or excluded in some
+ * other way, in that case Blender will report a warning afterwards. */
+ bool object_found = false;
+ foreach (Object *ob, scene->objects) {
+ if (ob->name == b_object.name()) {
+ object_found = true;
+ break;
+ }
+ }
- if (!session->progress.get_cancel()) {
- /* get buffer parameters */
+ if (object_found && !session->progress.get_cancel()) {
+ /* Get session and buffer parameters. */
SessionParams session_params = BlenderSync::get_session_params(
b_engine, b_userpref, b_scene, background);
- BufferParams buffer_params = BlenderSync::get_buffer_params(
- b_render, b_v3d, b_rv3d, scene->camera, width, height);
-
- scene->bake_manager->set_shader_limit((size_t)b_engine.tile_x(), (size_t)b_engine.tile_y());
-
- /* set number of samples */
- session->tile_manager.set_samples(session_params.samples);
- session->reset(buffer_params, session_params.samples);
- session->update_scene();
-
- /* find object index. todo: is arbitrary - copied from mesh_displace.cpp */
- size_t object_index = OBJECT_NONE;
- int tri_offset = 0;
-
- for (size_t i = 0; i < scene->objects.size(); i++) {
- if (strcmp(scene->objects[i]->name.c_str(), b_object.name().c_str()) == 0) {
- object_index = i;
- tri_offset = scene->objects[i]->mesh->tri_offset;
- break;
- }
- }
-
- /* Object might have been disabled for rendering or excluded in some
- * other way, in that case Blender will report a warning afterwards. */
- if (object_index != OBJECT_NONE) {
- int object = object_index;
+ session_params.progressive_refine = false;
- bake_data = scene->bake_manager->init(object, tri_offset, num_pixels);
- populate_bake_data(bake_data, object_id, pixel_array, num_pixels);
- }
+ BufferParams buffer_params;
+ buffer_params.width = bake_width;
+ buffer_params.height = bake_height;
+ buffer_params.passes = scene->film->passes;
- /* set number of samples */
+ /* Update session. */
session->tile_manager.set_samples(session_params.samples);
session->reset(buffer_params, session_params.samples);
- session->update_scene();
session->progress.set_update_callback(
function_bind(&BlenderSession::update_bake_progress, this));
}
/* Perform bake. Check cancel to avoid crash with incomplete scene data. */
- if (!session->progress.get_cancel() && bake_data) {
- scene->bake_manager->bake(scene->device,
- &scene->dscene,
- scene,
- session->progress,
- shader_type,
- bake_pass_filter,
- bake_data,
- result);
+ if (object_found && !session->progress.get_cancel()) {
+ session->start();
+ session->wait();
}
- /* free all memory used (host and device), so we wouldn't leave render
- * engine with extra memory allocated
- */
-
- session->device_free();
-
- delete sync;
- sync = NULL;
+ session->read_bake_tile_cb = function_null;
+ session->write_render_tile_cb = function_null;
}
-void BlenderSession::do_write_update_render_result(BL::RenderResult &b_rr,
- BL::RenderLayer &b_rlay,
+void BlenderSession::do_write_update_render_result(BL::RenderLayer &b_rlay,
RenderTile &rtile,
bool do_update_only)
{
@@ -791,18 +735,13 @@ void BlenderSession::do_write_update_render_result(BL::RenderResult &b_rr,
for (b_rlay.passes.begin(b_iter); b_iter != b_rlay.passes.end(); ++b_iter) {
BL::RenderPass b_pass(*b_iter);
-
- /* find matching pass type */
- PassType pass_type = BlenderSync::get_pass_type(b_pass);
int components = b_pass.channels();
- bool read = false;
- if (pass_type != PASS_NONE) {
- /* copy pixels */
- read = buffers->get_pass_rect(
- pass_type, exposure, sample, components, &pixels[0], b_pass.name());
- }
- else {
+ /* Copy pixels from regular render passes. */
+ bool read = buffers->get_pass_rect(b_pass.name(), exposure, sample, components, &pixels[0]);
+
+ /* If denoising pass, */
+ if (!read) {
int denoising_offset = BlenderSync::get_denoising_pass(b_pass);
if (denoising_offset >= 0) {
read = buffers->get_denoising_pass_rect(
@@ -820,26 +759,19 @@ void BlenderSession::do_write_update_render_result(BL::RenderResult &b_rr,
else {
/* copy combined pass */
BL::RenderPass b_combined_pass(b_rlay.passes.find_by_name("Combined", b_rview_name.c_str()));
- if (buffers->get_pass_rect(PASS_COMBINED, exposure, sample, 4, &pixels[0], "Combined"))
+ if (buffers->get_pass_rect("Combined", exposure, sample, 4, &pixels[0]))
b_combined_pass.rect(&pixels[0]);
}
-
- /* tag result as updated */
- b_engine.update_result(b_rr);
}
-void BlenderSession::write_render_result(BL::RenderResult &b_rr,
- BL::RenderLayer &b_rlay,
- RenderTile &rtile)
+void BlenderSession::write_render_result(BL::RenderLayer &b_rlay, RenderTile &rtile)
{
- do_write_update_render_result(b_rr, b_rlay, rtile, false);
+ do_write_update_render_result(b_rlay, rtile, false);
}
-void BlenderSession::update_render_result(BL::RenderResult &b_rr,
- BL::RenderLayer &b_rlay,
- RenderTile &rtile)
+void BlenderSession::update_render_result(BL::RenderLayer &b_rlay, RenderTile &rtile)
{
- do_write_update_render_result(b_rr, b_rlay, rtile, true);
+ do_write_update_render_result(b_rlay, rtile, true);
}
void BlenderSession::synchronize(BL::Depsgraph &b_depsgraph_)
@@ -857,16 +789,16 @@ void BlenderSession::synchronize(BL::Depsgraph &b_depsgraph_)
if (session->params.modified(session_params) || scene->params.modified(scene_params)) {
free_session();
create_session();
- return;
}
/* increase samples, but never decrease */
session->set_samples(session_params.samples);
+ session->set_denoising_start_sample(session_params.denoising.start_sample);
session->set_pause(session_pause);
/* copy recalc flags, outside of mutex so we can decide to do the real
* synchronization at a later time to not block on running updates */
- sync->sync_recalc(b_depsgraph_);
+ sync->sync_recalc(b_depsgraph_, b_v3d);
/* don't do synchronization if on pause */
if (session_pause) {
@@ -892,21 +824,36 @@ void BlenderSession::synchronize(BL::Depsgraph &b_depsgraph_)
else
sync->sync_camera(b_render, b_camera_override, width, height, "");
- builtin_images_load();
+ /* get buffer parameters */
+ BufferParams buffer_params = BlenderSync::get_buffer_params(
+ b_render, b_v3d, b_rv3d, scene->camera, width, height, session_params.denoising.use);
- /* unlock */
- session->scene->mutex.unlock();
+ if (!buffer_params.denoising_data_pass) {
+ session_params.denoising.use = false;
+ }
+
+ session->set_denoising(session_params.denoising);
+
+ /* Update film if denoising data was enabled or disabled. */
+ if (scene->film->denoising_data_pass != buffer_params.denoising_data_pass) {
+ scene->film->denoising_data_pass = buffer_params.denoising_data_pass;
+ scene->film->tag_update(scene);
+ }
/* reset if needed */
if (scene->need_reset()) {
- BufferParams buffer_params = BlenderSync::get_buffer_params(
- b_render, b_v3d, b_rv3d, scene->camera, width, height);
session->reset(buffer_params, session_params.samples);
+ /* After session reset, so device is not accessing image data anymore. */
+ builtin_images_load();
+
/* reset time */
start_resize_time = 0.0;
}
+ /* unlock */
+ session->scene->mutex.unlock();
+
/* Start rendering thread, if it's not running already. Do this
* after all scene data has been synced at least once. */
session->start();
@@ -961,7 +908,7 @@ bool BlenderSession::draw(int w, int h)
SessionParams session_params = BlenderSync::get_session_params(
b_engine, b_userpref, b_scene, background);
BufferParams buffer_params = BlenderSync::get_buffer_params(
- b_render, b_v3d, b_rv3d, scene->camera, width, height);
+ b_render, b_v3d, b_rv3d, scene->camera, width, height, session_params.denoising.use);
bool session_pause = BlenderSync::get_session_pause(b_scene, background);
if (session_pause == false) {
@@ -979,7 +926,7 @@ bool BlenderSession::draw(int w, int h)
/* draw */
BufferParams buffer_params = BlenderSync::get_buffer_params(
- b_render, b_v3d, b_rv3d, scene->camera, width, height);
+ b_render, b_v3d, b_rv3d, scene->camera, width, height, session->params.denoising.use);
DeviceDrawParams draw_params;
if (session->params.display_buffer_linear) {
@@ -1057,8 +1004,9 @@ void BlenderSession::update_status_progress()
}
double current_time = time_dt();
- /* When rendering in a window, redraw the status at least once per second to keep the elapsed and remaining time up-to-date.
- * For headless rendering, only report when something significant changes to keep the console output readable. */
+ /* When rendering in a window, redraw the status at least once per second to keep the elapsed and
+ * remaining time up-to-date. For headless rendering, only report when something significant
+ * changes to keep the console output readable. */
if (status != last_status || (!headless && (current_time - last_status_time) > 1.0)) {
b_engine.update_stats("", (timestatus + scene_status + status).c_str());
b_engine.update_memory_stats(mem_used, mem_peak);
@@ -1119,324 +1067,6 @@ void BlenderSession::test_cancel()
session->progress.set_cancel("Cancelled");
}
-/* builtin image file name is actually an image datablock name with
- * absolute sequence frame number concatenated via '@' character
- *
- * this function splits frame from builtin name
- */
-int BlenderSession::builtin_image_frame(const string &builtin_name)
-{
- int last = builtin_name.find_last_of('@');
- return atoi(builtin_name.substr(last + 1, builtin_name.size() - last - 1).c_str());
-}
-
-void BlenderSession::builtin_image_info(const string &builtin_name,
- void *builtin_data,
- ImageMetaData &metadata)
-{
- /* empty image */
- metadata.width = 1;
- metadata.height = 1;
-
- if (!builtin_data)
- return;
-
- /* recover ID pointer */
- PointerRNA ptr;
- RNA_id_pointer_create((ID *)builtin_data, &ptr);
- BL::ID b_id(ptr);
-
- if (b_id.is_a(&RNA_Image)) {
- /* image data */
- BL::Image b_image(b_id);
-
- metadata.builtin_free_cache = !b_image.has_data();
- metadata.is_float = b_image.is_float();
- metadata.width = b_image.size()[0];
- metadata.height = b_image.size()[1];
- metadata.depth = 1;
- metadata.channels = b_image.channels();
- }
- else if (b_id.is_a(&RNA_Object)) {
- /* smoke volume data */
- BL::Object b_ob(b_id);
- BL::SmokeDomainSettings b_domain = object_smoke_domain_find(b_ob);
-
- metadata.is_float = true;
- metadata.depth = 1;
- metadata.channels = 1;
-
- if (!b_domain)
- return;
-
- if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_DENSITY) ||
- builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_FLAME) ||
- builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_HEAT) ||
- builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_TEMPERATURE))
- metadata.channels = 1;
- else if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_COLOR))
- metadata.channels = 4;
- else if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_VELOCITY))
- metadata.channels = 3;
- else
- return;
-
- int3 resolution = get_int3(b_domain.domain_resolution());
- int amplify = (b_domain.use_high_resolution()) ? b_domain.amplify() + 1 : 1;
-
- /* Velocity and heat data is always low-resolution. */
- if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_VELOCITY) ||
- builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_HEAT)) {
- amplify = 1;
- }
-
- metadata.width = resolution.x * amplify;
- metadata.height = resolution.y * amplify;
- metadata.depth = resolution.z * amplify;
- }
- else {
- /* TODO(sergey): Check we're indeed in shader node tree. */
- PointerRNA ptr;
- RNA_pointer_create(NULL, &RNA_Node, builtin_data, &ptr);
- BL::Node b_node(ptr);
- if (b_node.is_a(&RNA_ShaderNodeTexPointDensity)) {
- BL::ShaderNodeTexPointDensity b_point_density_node(b_node);
- metadata.channels = 4;
- metadata.width = b_point_density_node.resolution();
- metadata.height = metadata.width;
- metadata.depth = metadata.width;
- metadata.is_float = true;
- }
- }
-}
-
-bool BlenderSession::builtin_image_pixels(const string &builtin_name,
- void *builtin_data,
- unsigned char *pixels,
- const size_t pixels_size,
- const bool free_cache)
-{
- if (!builtin_data) {
- return false;
- }
-
- const int frame = builtin_image_frame(builtin_name);
-
- PointerRNA ptr;
- RNA_id_pointer_create((ID *)builtin_data, &ptr);
- BL::Image b_image(ptr);
-
- const int width = b_image.size()[0];
- const int height = b_image.size()[1];
- const int channels = b_image.channels();
-
- unsigned char *image_pixels = image_get_pixels_for_frame(b_image, frame);
- const size_t num_pixels = ((size_t)width) * height;
-
- if (image_pixels && num_pixels * channels == pixels_size) {
- memcpy(pixels, image_pixels, pixels_size * sizeof(unsigned char));
- }
- else {
- if (channels == 1) {
- memset(pixels, 0, pixels_size * sizeof(unsigned char));
- }
- else {
- const size_t num_pixels_safe = pixels_size / channels;
- unsigned char *cp = pixels;
- for (size_t i = 0; i < num_pixels_safe; i++, cp += channels) {
- cp[0] = 255;
- cp[1] = 0;
- cp[2] = 255;
- if (channels == 4) {
- cp[3] = 255;
- }
- }
- }
- }
-
- if (image_pixels) {
- MEM_freeN(image_pixels);
- }
-
- /* Free image buffers to save memory during render. */
- if (free_cache) {
- b_image.buffers_free();
- }
-
- /* Premultiply, byte images are always straight for Blender. */
- unsigned char *cp = pixels;
- for (size_t i = 0; i < num_pixels; i++, cp += channels) {
- cp[0] = (cp[0] * cp[3]) >> 8;
- cp[1] = (cp[1] * cp[3]) >> 8;
- cp[2] = (cp[2] * cp[3]) >> 8;
- }
- return true;
-}
-
-bool BlenderSession::builtin_image_float_pixels(const string &builtin_name,
- void *builtin_data,
- float *pixels,
- const size_t pixels_size,
- const bool free_cache)
-{
- if (!builtin_data) {
- return false;
- }
-
- PointerRNA ptr;
- RNA_id_pointer_create((ID *)builtin_data, &ptr);
- BL::ID b_id(ptr);
-
- if (b_id.is_a(&RNA_Image)) {
- /* image data */
- BL::Image b_image(b_id);
- int frame = builtin_image_frame(builtin_name);
-
- const int width = b_image.size()[0];
- const int height = b_image.size()[1];
- const int channels = b_image.channels();
-
- float *image_pixels;
- image_pixels = image_get_float_pixels_for_frame(b_image, frame);
- const size_t num_pixels = ((size_t)width) * height;
-
- if (image_pixels && num_pixels * channels == pixels_size) {
- memcpy(pixels, image_pixels, pixels_size * sizeof(float));
- }
- else {
- if (channels == 1) {
- memset(pixels, 0, num_pixels * sizeof(float));
- }
- else {
- const size_t num_pixels_safe = pixels_size / channels;
- float *fp = pixels;
- for (int i = 0; i < num_pixels_safe; i++, fp += channels) {
- fp[0] = 1.0f;
- fp[1] = 0.0f;
- fp[2] = 1.0f;
- if (channels == 4) {
- fp[3] = 1.0f;
- }
- }
- }
- }
-
- if (image_pixels) {
- MEM_freeN(image_pixels);
- }
-
- /* Free image buffers to save memory during render. */
- if (free_cache) {
- b_image.buffers_free();
- }
-
- return true;
- }
- else if (b_id.is_a(&RNA_Object)) {
- /* smoke volume data */
- BL::Object b_ob(b_id);
- BL::SmokeDomainSettings b_domain = object_smoke_domain_find(b_ob);
-
- if (!b_domain) {
- return false;
- }
-
- int3 resolution = get_int3(b_domain.domain_resolution());
- int length, amplify = (b_domain.use_high_resolution()) ? b_domain.amplify() + 1 : 1;
-
- /* Velocity and heat data is always low-resolution. */
- if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_VELOCITY) ||
- builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_HEAT)) {
- amplify = 1;
- }
-
- const int width = resolution.x * amplify;
- const int height = resolution.y * amplify;
- const int depth = resolution.z * amplify;
- const size_t num_pixels = ((size_t)width) * height * depth;
-
- if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_DENSITY)) {
- SmokeDomainSettings_density_grid_get_length(&b_domain.ptr, &length);
- if (length == num_pixels) {
- SmokeDomainSettings_density_grid_get(&b_domain.ptr, pixels);
- return true;
- }
- }
- else if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_FLAME)) {
- /* this is in range 0..1, and interpreted by the OpenGL smoke viewer
- * as 1500..3000 K with the first part faded to zero density */
- SmokeDomainSettings_flame_grid_get_length(&b_domain.ptr, &length);
- if (length == num_pixels) {
- SmokeDomainSettings_flame_grid_get(&b_domain.ptr, pixels);
- return true;
- }
- }
- else if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_COLOR)) {
- /* the RGB is "premultiplied" by density for better interpolation results */
- SmokeDomainSettings_color_grid_get_length(&b_domain.ptr, &length);
- if (length == num_pixels * 4) {
- SmokeDomainSettings_color_grid_get(&b_domain.ptr, pixels);
- return true;
- }
- }
- else if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_VELOCITY)) {
- SmokeDomainSettings_velocity_grid_get_length(&b_domain.ptr, &length);
- if (length == num_pixels * 3) {
- SmokeDomainSettings_velocity_grid_get(&b_domain.ptr, pixels);
- return true;
- }
- }
- else if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_HEAT)) {
- SmokeDomainSettings_heat_grid_get_length(&b_domain.ptr, &length);
- if (length == num_pixels) {
- SmokeDomainSettings_heat_grid_get(&b_domain.ptr, pixels);
- return true;
- }
- }
- else if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_TEMPERATURE)) {
- SmokeDomainSettings_temperature_grid_get_length(&b_domain.ptr, &length);
- if (length == num_pixels) {
- SmokeDomainSettings_temperature_grid_get(&b_domain.ptr, pixels);
- return true;
- }
- }
- else {
- fprintf(
- stderr, "Cycles error: unknown volume attribute %s, skipping\n", builtin_name.c_str());
- pixels[0] = 0.0f;
- return false;
- }
-
- fprintf(stderr, "Cycles error: unexpected smoke volume resolution, skipping\n");
- }
- else {
- /* We originally were passing view_layer here but in reality we need a
- * a depsgraph to pass to the RE_point_density_minmax() function.
- */
- /* TODO(sergey): Check we're indeed in shader node tree. */
- PointerRNA ptr;
- RNA_pointer_create(NULL, &RNA_Node, builtin_data, &ptr);
- BL::Node b_node(ptr);
- if (b_node.is_a(&RNA_ShaderNodeTexPointDensity)) {
- BL::ShaderNodeTexPointDensity b_point_density_node(b_node);
- int length;
- b_point_density_node.calc_point_density(b_depsgraph, &length, &pixels);
- }
- }
-
- return false;
-}
-
-void BlenderSession::builtin_images_load()
-{
- /* Force builtin images to be loaded along with Blender data sync. This
- * is needed because we may be reading from depsgraph evaluated data which
- * can be freed by Blender before Cycles reads it. */
- ImageManager *manager = session->scene->image_manager;
- Device *device = session->device;
- manager->device_load_builtin(device, session->scene, session->progress);
-}
-
void BlenderSession::update_resumable_tile_manager(int num_samples)
{
const int num_resumable_chunks = BlenderSession::num_resumable_chunks,
@@ -1470,8 +1100,8 @@ void BlenderSession::update_resumable_tile_manager(int num_samples)
/* Round after doing the multiplications with num_chunks and num_samples_per_chunk
* to allow for many small chunks. */
- int rounded_range_start_sample = (int)floor(range_start_sample + 0.5f);
- int rounded_range_num_samples = max((int)floor(range_num_samples + 0.5f), 1);
+ int rounded_range_start_sample = (int)floorf(range_start_sample + 0.5f);
+ int rounded_range_num_samples = max((int)floorf(range_num_samples + 0.5f), 1);
/* Make sure we don't overshoot. */
if (rounded_range_start_sample + rounded_range_num_samples > num_samples) {
diff --git a/intern/cycles/blender/blender_session.h b/intern/cycles/blender/blender_session.h
index f0107d4e0b1..34e952e312b 100644
--- a/intern/cycles/blender/blender_session.h
+++ b/intern/cycles/blender/blender_session.h
@@ -17,15 +17,19 @@
#ifndef __BLENDER_SESSION_H__
#define __BLENDER_SESSION_H__
+#include "RNA_blender_cpp.h"
+
#include "device/device.h"
+
+#include "render/bake.h"
#include "render/scene.h"
#include "render/session.h"
-#include "render/bake.h"
#include "util/util_vector.h"
CCL_NAMESPACE_BEGIN
+class BlenderSync;
class ImageMetaData;
class Scene;
class Session;
@@ -49,8 +53,6 @@ class BlenderSession {
~BlenderSession();
- void create();
-
/* session */
void create_session();
void free_session();
@@ -64,18 +66,16 @@ class BlenderSession {
BL::Object &b_object,
const string &pass_type,
const int custom_flag,
- const int object_id,
- BL::BakePixel &pixel_array,
- const size_t num_pixels,
- const int depth,
- float pixels[]);
+ const int bake_width,
+ const int bake_height);
- void write_render_result(BL::RenderResult &b_rr, BL::RenderLayer &b_rlay, RenderTile &rtile);
+ void write_render_result(BL::RenderLayer &b_rlay, RenderTile &rtile);
void write_render_tile(RenderTile &rtile);
+ void read_render_tile(RenderTile &rtile);
/* update functions are used to update display buffer only after sample was rendered
* only needed for better visual feedback */
- void update_render_result(BL::RenderResult &b_rr, BL::RenderLayer &b_rlay, RenderTile &rtile);
+ void update_render_result(BL::RenderLayer &b_rlay, RenderTile &rtile);
void update_render_tile(RenderTile &rtile, bool highlight);
/* interactive updates */
@@ -150,24 +150,14 @@ class BlenderSession {
protected:
void stamp_view_layer_metadata(Scene *scene, const string &view_layer_name);
- void do_write_update_render_result(BL::RenderResult &b_rr,
- BL::RenderLayer &b_rlay,
+ void do_write_update_render_result(BL::RenderLayer &b_rlay,
RenderTile &rtile,
bool do_update_only);
- void do_write_update_render_tile(RenderTile &rtile, bool do_update_only, bool highlight);
-
- int builtin_image_frame(const string &builtin_name);
- void builtin_image_info(const string &builtin_name, void *builtin_data, ImageMetaData &metadata);
- bool builtin_image_pixels(const string &builtin_name,
- void *builtin_data,
- unsigned char *pixels,
- const size_t pixels_size,
- const bool free_cache);
- bool builtin_image_float_pixels(const string &builtin_name,
- void *builtin_data,
- float *pixels,
- const size_t pixels_size,
- const bool free_cache);
+ void do_write_update_render_tile(RenderTile &rtile,
+ bool do_update_only,
+ bool do_read_only,
+ bool highlight);
+
void builtin_images_load();
/* Update tile manager to reflect resumable render settings. */
diff --git a/intern/cycles/blender/blender_shader.cpp b/intern/cycles/blender/blender_shader.cpp
index 169c4d414a6..33e73b5a4b9 100644
--- a/intern/cycles/blender/blender_shader.cpp
+++ b/intern/cycles/blender/blender_shader.cpp
@@ -15,6 +15,7 @@
*/
#include "render/background.h"
+#include "render/colorspace.h"
#include "render/graph.h"
#include "render/light.h"
#include "render/nodes.h"
@@ -22,14 +23,15 @@
#include "render/scene.h"
#include "render/shader.h"
-#include "blender/blender_texture.h"
+#include "blender/blender_image.h"
#include "blender/blender_sync.h"
+#include "blender/blender_texture.h"
#include "blender/blender_util.h"
#include "util/util_debug.h"
#include "util/util_foreach.h"
-#include "util/util_string.h"
#include "util/util_set.h"
+#include "util/util_string.h"
#include "util/util_task.h"
CCL_NAMESPACE_BEGIN
@@ -89,6 +91,12 @@ template<typename NodeType> static ExtensionType get_image_extension(NodeType &b
return (ExtensionType)validate_enum_value(value, EXTENSION_NUM_TYPES, EXTENSION_REPEAT);
}
+static ImageAlphaType get_image_alpha_type(BL::Image &b_image)
+{
+ int value = b_image.alpha_mode();
+ return (ImageAlphaType)validate_enum_value(value, IMAGE_ALPHA_NUM_TYPES, IMAGE_ALPHA_AUTO);
+}
+
/* Graph */
static BL::NodeSocket get_node_output(BL::Node &b_node, const string &name)
@@ -201,24 +209,6 @@ static void get_tex_mapping(TextureMapping *mapping, BL::TexMapping &b_mapping)
mapping->z_mapping = (TextureMapping::Mapping)b_mapping.mapping_z();
}
-static void get_tex_mapping(TextureMapping *mapping, BL::ShaderNodeMapping &b_mapping)
-{
- if (!b_mapping)
- return;
-
- mapping->translation = get_float3(b_mapping.translation());
- mapping->rotation = get_float3(b_mapping.rotation());
- mapping->scale = get_float3(b_mapping.scale());
- mapping->type = (TextureMapping::Type)b_mapping.vector_type();
-
- mapping->use_minmax = b_mapping.use_min() || b_mapping.use_max();
-
- if (b_mapping.use_min())
- mapping->min = get_float3(b_mapping.min());
- if (b_mapping.use_max())
- mapping->max = get_float3(b_mapping.max());
-}
-
static ShaderNode *add_node(Scene *scene,
BL::RenderEngine &b_engine,
BL::BlendData &b_data,
@@ -308,18 +298,38 @@ static ShaderNode *add_node(Scene *scene,
else if (b_node.is_a(&RNA_ShaderNodeRGBToBW)) {
node = new RGBToBWNode();
}
+ else if (b_node.is_a(&RNA_ShaderNodeMapRange)) {
+ BL::ShaderNodeMapRange b_map_range_node(b_node);
+ MapRangeNode *map_range_node = new MapRangeNode();
+ map_range_node->clamp = b_map_range_node.clamp();
+ map_range_node->type = (NodeMapRangeType)b_map_range_node.interpolation_type();
+ node = map_range_node;
+ }
+ else if (b_node.is_a(&RNA_ShaderNodeClamp)) {
+ BL::ShaderNodeClamp b_clamp_node(b_node);
+ ClampNode *clamp_node = new ClampNode();
+ clamp_node->type = (NodeClampType)b_clamp_node.clamp_type();
+ node = clamp_node;
+ }
else if (b_node.is_a(&RNA_ShaderNodeMath)) {
BL::ShaderNodeMath b_math_node(b_node);
- MathNode *math = new MathNode();
- math->type = (NodeMath)b_math_node.operation();
- math->use_clamp = b_math_node.use_clamp();
- node = math;
+ MathNode *math_node = new MathNode();
+ math_node->type = (NodeMathType)b_math_node.operation();
+ math_node->use_clamp = b_math_node.use_clamp();
+ node = math_node;
}
else if (b_node.is_a(&RNA_ShaderNodeVectorMath)) {
BL::ShaderNodeVectorMath b_vector_math_node(b_node);
- VectorMathNode *vmath = new VectorMathNode();
- vmath->type = (NodeVectorMath)b_vector_math_node.operation();
- node = vmath;
+ VectorMathNode *vector_math_node = new VectorMathNode();
+ vector_math_node->type = (NodeVectorMathType)b_vector_math_node.operation();
+ node = vector_math_node;
+ }
+ else if (b_node.is_a(&RNA_ShaderNodeVectorRotate)) {
+ BL::ShaderNodeVectorRotate b_vector_rotate_node(b_node);
+ VectorRotateNode *vector_rotate_node = new VectorRotateNode();
+ vector_rotate_node->type = (NodeVectorRotateType)b_vector_rotate_node.rotation_type();
+ vector_rotate_node->invert = b_vector_rotate_node.invert();
+ node = vector_rotate_node;
}
else if (b_node.is_a(&RNA_ShaderNodeVectorTransform)) {
BL::ShaderNodeVectorTransform b_vector_transform_node(b_node);
@@ -341,9 +351,7 @@ static ShaderNode *add_node(Scene *scene,
else if (b_node.is_a(&RNA_ShaderNodeMapping)) {
BL::ShaderNodeMapping b_mapping_node(b_node);
MappingNode *mapping = new MappingNode();
-
- get_tex_mapping(&mapping->tex_mapping, b_mapping_node);
-
+ mapping->type = (NodeMappingType)b_mapping_node.vector_type();
node = mapping;
}
else if (b_node.is_a(&RNA_ShaderNodeFresnel)) {
@@ -376,16 +384,16 @@ static ShaderNode *add_node(Scene *scene,
switch (b_aniso_node.distribution()) {
case BL::ShaderNodeBsdfAnisotropic::distribution_BECKMANN:
- aniso->distribution = CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID;
+ aniso->distribution = CLOSURE_BSDF_MICROFACET_BECKMANN_ID;
break;
case BL::ShaderNodeBsdfAnisotropic::distribution_GGX:
- aniso->distribution = CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID;
+ aniso->distribution = CLOSURE_BSDF_MICROFACET_GGX_ID;
break;
case BL::ShaderNodeBsdfAnisotropic::distribution_MULTI_GGX:
- aniso->distribution = CLOSURE_BSDF_MICROFACET_MULTI_GGX_ANISO_ID;
+ aniso->distribution = CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID;
break;
case BL::ShaderNodeBsdfAnisotropic::distribution_ASHIKHMIN_SHIRLEY:
- aniso->distribution = CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID;
+ aniso->distribution = CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID;
break;
}
@@ -591,6 +599,15 @@ static ShaderNode *add_node(Scene *scene,
else if (b_node.is_a(&RNA_ShaderNodeHairInfo)) {
node = new HairInfoNode();
}
+ else if (b_node.is_a(&RNA_ShaderNodeVolumeInfo)) {
+ node = new VolumeInfoNode();
+ }
+ else if (b_node.is_a(&RNA_ShaderNodeVertexColor)) {
+ BL::ShaderNodeVertexColor b_vertex_color_node(b_node);
+ VertexColorNode *vertex_color_node = new VertexColorNode();
+ vertex_color_node->layer_name = b_vertex_color_node.layer_name();
+ node = vertex_color_node;
+ }
else if (b_node.is_a(&RNA_ShaderNodeBump)) {
BL::ShaderNodeBump b_bump_node(b_node);
BumpNode *bump = new BumpNode();
@@ -603,16 +620,16 @@ static ShaderNode *add_node(Scene *scene,
/* create script node */
BL::ShaderNodeScript b_script_node(b_node);
- OSLShaderManager *manager = (OSLShaderManager *)scene->shader_manager;
+ ShaderManager *manager = scene->shader_manager;
string bytecode_hash = b_script_node.bytecode_hash();
if (!bytecode_hash.empty()) {
- node = manager->osl_node("", bytecode_hash, b_script_node.bytecode());
+ node = OSLShaderManager::osl_node(manager, "", bytecode_hash, b_script_node.bytecode());
}
else {
string absolute_filepath = blender_absolute_path(
b_data, b_ntree, b_script_node.filepath());
- node = manager->osl_node(absolute_filepath, "");
+ node = OSLShaderManager::osl_node(manager, absolute_filepath, "");
}
}
#else
@@ -625,7 +642,27 @@ static ShaderNode *add_node(Scene *scene,
BL::Image b_image(b_image_node.image());
BL::ImageUser b_image_user(b_image_node.image_user());
ImageTextureNode *image = new ImageTextureNode();
+
+ image->interpolation = get_image_interpolation(b_image_node);
+ image->extension = get_image_extension(b_image_node);
+ image->projection = (NodeImageProjection)b_image_node.projection();
+ image->projection_blend = b_image_node.projection_blend();
+ BL::TexMapping b_texture_mapping(b_image_node.texture_mapping());
+ get_tex_mapping(&image->tex_mapping, b_texture_mapping);
+
if (b_image) {
+ PointerRNA colorspace_ptr = b_image.colorspace_settings().ptr;
+ image->colorspace = get_enum_identifier(colorspace_ptr, "name");
+
+ image->animated = b_image_node.image_user().use_auto_refresh();
+ image->alpha_type = get_image_alpha_type(b_image);
+
+ image->tiles.clear();
+ BL::Image::tiles_iterator b_iter;
+ for (b_image.tiles.begin(b_iter); b_iter != b_image.tiles.end(); ++b_iter) {
+ image->tiles.push_back(b_iter->number());
+ }
+
/* builtin images will use callback-based reading because
* they could only be loaded correct from blender side
*/
@@ -642,37 +679,14 @@ static ShaderNode *add_node(Scene *scene,
*/
int scene_frame = b_scene.frame_current();
int image_frame = image_user_frame_number(b_image_user, scene_frame);
- image->filename = b_image.name() + "@" + string_printf("%d", image_frame);
- image->builtin_data = b_image.ptr.data;
+ image->handle = scene->image_manager->add_image(
+ new BlenderImageLoader(b_image, image_frame), image->image_params());
}
else {
- image->filename = image_user_file_path(b_image_user, b_image, b_scene.frame_current());
- image->builtin_data = NULL;
- }
-
- image->animated = b_image_node.image_user().use_auto_refresh();
- image->use_alpha = b_image.use_alpha();
-
- /* TODO: restore */
- /* TODO(sergey): Does not work properly when we change builtin type. */
-#if 0
- if(b_image.is_updated()) {
- scene->image_manager->tag_reload_image(
- image->filename.string(),
- image->builtin_data,
- get_image_interpolation(b_image_node),
- get_image_extension(b_image_node),
- image->use_alpha);
+ image->filename = image_user_file_path(
+ b_image_user, b_image, b_scene.frame_current(), true);
}
-#endif
}
- image->color_space = (NodeImageColorSpace)b_image_node.color_space();
- image->projection = (NodeImageProjection)b_image_node.projection();
- image->interpolation = get_image_interpolation(b_image_node);
- image->extension = get_image_extension(b_image_node);
- image->projection_blend = b_image_node.projection_blend();
- BL::TexMapping b_texture_mapping(b_image_node.texture_mapping());
- get_tex_mapping(&image->tex_mapping, b_texture_mapping);
node = image;
}
else if (b_node.is_a(&RNA_ShaderNodeTexEnvironment)) {
@@ -680,7 +694,19 @@ static ShaderNode *add_node(Scene *scene,
BL::Image b_image(b_env_node.image());
BL::ImageUser b_image_user(b_env_node.image_user());
EnvironmentTextureNode *env = new EnvironmentTextureNode();
+
+ env->interpolation = get_image_interpolation(b_env_node);
+ env->projection = (NodeEnvironmentProjection)b_env_node.projection();
+ BL::TexMapping b_texture_mapping(b_env_node.texture_mapping());
+ get_tex_mapping(&env->tex_mapping, b_texture_mapping);
+
if (b_image) {
+ PointerRNA colorspace_ptr = b_image.colorspace_settings().ptr;
+ env->colorspace = get_enum_identifier(colorspace_ptr, "name");
+
+ env->animated = b_env_node.image_user().use_auto_refresh();
+ env->alpha_type = get_image_alpha_type(b_image);
+
bool is_builtin = b_image.packed_file() || b_image.source() == BL::Image::source_GENERATED ||
b_image.source() == BL::Image::source_MOVIE ||
(b_engine.is_preview() && b_image.source() != BL::Image::source_SEQUENCE);
@@ -688,35 +714,14 @@ static ShaderNode *add_node(Scene *scene,
if (is_builtin) {
int scene_frame = b_scene.frame_current();
int image_frame = image_user_frame_number(b_image_user, scene_frame);
- env->filename = b_image.name() + "@" + string_printf("%d", image_frame);
- env->builtin_data = b_image.ptr.data;
+ env->handle = scene->image_manager->add_image(new BlenderImageLoader(b_image, image_frame),
+ env->image_params());
}
else {
- env->filename = image_user_file_path(b_image_user, b_image, b_scene.frame_current());
- env->builtin_data = NULL;
- }
-
- env->animated = b_env_node.image_user().use_auto_refresh();
- env->use_alpha = b_image.use_alpha();
-
- /* TODO: restore */
- /* TODO(sergey): Does not work properly when we change builtin type. */
-#if 0
- if(b_image.is_updated()) {
- scene->image_manager->tag_reload_image(
- env->filename.string(),
- env->builtin_data,
- get_image_interpolation(b_env_node),
- EXTENSION_REPEAT,
- env->use_alpha);
+ env->filename = image_user_file_path(
+ b_image_user, b_image, b_scene.frame_current(), false);
}
-#endif
}
- env->color_space = (NodeImageColorSpace)b_env_node.color_space();
- env->interpolation = get_image_interpolation(b_env_node);
- env->projection = (NodeEnvironmentProjection)b_env_node.projection();
- BL::TexMapping b_texture_mapping(b_env_node.texture_mapping());
- get_tex_mapping(&env->tex_mapping, b_texture_mapping);
node = env;
}
else if (b_node.is_a(&RNA_ShaderNodeTexGradient)) {
@@ -730,9 +735,9 @@ static ShaderNode *add_node(Scene *scene,
else if (b_node.is_a(&RNA_ShaderNodeTexVoronoi)) {
BL::ShaderNodeTexVoronoi b_voronoi_node(b_node);
VoronoiTextureNode *voronoi = new VoronoiTextureNode();
- voronoi->coloring = (NodeVoronoiColoring)b_voronoi_node.coloring();
- voronoi->metric = (NodeVoronoiDistanceMetric)b_voronoi_node.distance();
+ voronoi->dimensions = b_voronoi_node.voronoi_dimensions();
voronoi->feature = (NodeVoronoiFeature)b_voronoi_node.feature();
+ voronoi->metric = (NodeVoronoiDistanceMetric)b_voronoi_node.distance();
BL::TexMapping b_texture_mapping(b_voronoi_node.texture_mapping());
get_tex_mapping(&voronoi->tex_mapping, b_texture_mapping);
node = voronoi;
@@ -749,6 +754,8 @@ static ShaderNode *add_node(Scene *scene,
BL::ShaderNodeTexWave b_wave_node(b_node);
WaveTextureNode *wave = new WaveTextureNode();
wave->type = (NodeWaveType)b_wave_node.wave_type();
+ wave->bands_direction = (NodeWaveBandsDirection)b_wave_node.bands_direction();
+ wave->rings_direction = (NodeWaveRingsDirection)b_wave_node.rings_direction();
wave->profile = (NodeWaveProfile)b_wave_node.wave_profile();
BL::TexMapping b_texture_mapping(b_wave_node.texture_mapping());
get_tex_mapping(&wave->tex_mapping, b_texture_mapping);
@@ -775,17 +782,19 @@ static ShaderNode *add_node(Scene *scene,
else if (b_node.is_a(&RNA_ShaderNodeTexNoise)) {
BL::ShaderNodeTexNoise b_noise_node(b_node);
NoiseTextureNode *noise = new NoiseTextureNode();
+ noise->dimensions = b_noise_node.noise_dimensions();
BL::TexMapping b_texture_mapping(b_noise_node.texture_mapping());
get_tex_mapping(&noise->tex_mapping, b_texture_mapping);
node = noise;
}
else if (b_node.is_a(&RNA_ShaderNodeTexMusgrave)) {
BL::ShaderNodeTexMusgrave b_musgrave_node(b_node);
- MusgraveTextureNode *musgrave = new MusgraveTextureNode();
- musgrave->type = (NodeMusgraveType)b_musgrave_node.musgrave_type();
+ MusgraveTextureNode *musgrave_node = new MusgraveTextureNode();
+ musgrave_node->type = (NodeMusgraveType)b_musgrave_node.musgrave_type();
+ musgrave_node->dimensions = b_musgrave_node.musgrave_dimensions();
BL::TexMapping b_texture_mapping(b_musgrave_node.texture_mapping());
- get_tex_mapping(&musgrave->tex_mapping, b_texture_mapping);
- node = musgrave;
+ get_tex_mapping(&musgrave_node->tex_mapping, b_texture_mapping);
+ node = musgrave_node;
}
else if (b_node.is_a(&RNA_ShaderNodeTexCoord)) {
BL::ShaderNodeTexCoord b_tex_coord_node(b_node);
@@ -804,6 +813,15 @@ static ShaderNode *add_node(Scene *scene,
sky->sun_direction = normalize(get_float3(b_sky_node.sun_direction()));
sky->turbidity = b_sky_node.turbidity();
sky->ground_albedo = b_sky_node.ground_albedo();
+ sky->sun_disc = b_sky_node.sun_disc();
+ sky->sun_size = b_sky_node.sun_size();
+ sky->sun_intensity = b_sky_node.sun_intensity();
+ sky->sun_elevation = b_sky_node.sun_elevation();
+ sky->sun_rotation = b_sky_node.sun_rotation();
+ sky->altitude = 1000.0f * b_sky_node.altitude();
+ sky->air_density = b_sky_node.air_density();
+ sky->dust_density = b_sky_node.dust_density();
+ sky->ozone_density = b_sky_node.ozone_density();
BL::TexMapping b_texture_mapping(b_sky_node.texture_mapping());
get_tex_mapping(&sky->tex_mapping, b_texture_mapping);
node = sky;
@@ -824,6 +842,12 @@ static ShaderNode *add_node(Scene *scene,
}
node = ies;
}
+ else if (b_node.is_a(&RNA_ShaderNodeTexWhiteNoise)) {
+ BL::ShaderNodeTexWhiteNoise b_tex_white_noise_node(b_node);
+ WhiteNoiseTextureNode *white_noise_node = new WhiteNoiseTextureNode();
+ white_noise_node->dimensions = b_tex_white_noise_node.noise_dimensions();
+ node = white_noise_node;
+ }
else if (b_node.is_a(&RNA_ShaderNodeNormalMap)) {
BL::ShaderNodeNormalMap b_normal_map_node(b_node);
NormalMapNode *nmap = new NormalMapNode();
@@ -849,22 +873,13 @@ static ShaderNode *add_node(Scene *scene,
else if (b_node.is_a(&RNA_ShaderNodeTexPointDensity)) {
BL::ShaderNodeTexPointDensity b_point_density_node(b_node);
PointDensityTextureNode *point_density = new PointDensityTextureNode();
- point_density->filename = b_point_density_node.name();
point_density->space = (NodeTexVoxelSpace)b_point_density_node.space();
point_density->interpolation = get_image_interpolation(b_point_density_node);
- point_density->builtin_data = b_point_density_node.ptr.data;
- point_density->image_manager = scene->image_manager;
-
- /* TODO(sergey): Use more proper update flag. */
- if (true) {
- point_density->add_image();
- b_point_density_node.cache_point_density(b_depsgraph);
- scene->image_manager->tag_reload_image(point_density->filename.string(),
- point_density->builtin_data,
- point_density->interpolation,
- EXTENSION_CLIP,
- true);
- }
+ point_density->handle = scene->image_manager->add_image(
+ new BlenderPointDensityLoader(b_depsgraph, b_point_density_node),
+ point_density->image_params());
+
+ b_point_density_node.cache_point_density(b_depsgraph);
node = point_density;
/* Transformation form world space to texture space.
@@ -899,6 +914,12 @@ static ShaderNode *add_node(Scene *scene,
disp->attribute = "";
node = disp;
}
+ else if (b_node.is_a(&RNA_ShaderNodeOutputAOV)) {
+ BL::ShaderNodeOutputAOV b_aov_node(b_node);
+ OutputAOVNode *aov = new OutputAOVNode();
+ aov->name = b_aov_node.name();
+ node = aov;
+ }
if (node) {
node->name = b_node.name();
@@ -910,7 +931,7 @@ static ShaderNode *add_node(Scene *scene,
static bool node_use_modified_socket_name(ShaderNode *node)
{
- if (node->special_type == SHADER_SPECIAL_TYPE_SCRIPT)
+ if (node->special_type == SHADER_SPECIAL_TYPE_OSL)
return false;
return true;
@@ -1153,8 +1174,10 @@ static void add_nodes(Scene *scene,
BL::NodeTree::links_iterator b_link;
for (b_ntree.links.begin(b_link); b_link != b_ntree.links.end(); ++b_link) {
- /* Ignore invalid links to avoid unwanted cycles created in graph. */
- if (!b_link->is_valid()) {
+ /* Ignore invalid links to avoid unwanted cycles created in graph.
+ * Also ignore links with unavailable sockets. */
+ if (!(b_link->is_valid() && b_link->from_socket().enabled() &&
+ b_link->to_socket().enabled())) {
continue;
}
/* get blender link data */
@@ -1217,12 +1240,11 @@ void BlenderSync::sync_materials(BL::Depsgraph &b_depsgraph, bool update_all)
Shader *shader;
/* test if we need to sync */
- if (shader_map.sync(&shader, b_mat) || shader->need_sync_object || update_all) {
+ if (shader_map.add_or_update(&shader, b_mat) || update_all) {
ShaderGraph *graph = new ShaderGraph();
shader->name = b_mat.name().c_str();
shader->pass_id = b_mat.pass_index();
- shader->need_sync_object = false;
/* create nodes */
if (b_mat.use_nodes() && b_mat.node_tree()) {
@@ -1246,6 +1268,7 @@ void BlenderSync::sync_materials(BL::Depsgraph &b_depsgraph, bool update_all)
shader->heterogeneous_volume = !get_boolean(cmat, "homogeneous_volume");
shader->volume_sampling_method = get_volume_sampling(cmat);
shader->volume_interpolation_method = get_volume_interpolation(cmat);
+ shader->volume_step_rate = get_float(cmat, "volume_step_rate");
shader->displacement_method = get_displacement_method(cmat);
shader->set_graph(graph);
@@ -1284,19 +1307,23 @@ void BlenderSync::sync_materials(BL::Depsgraph &b_depsgraph, bool update_all)
/* Sync World */
-void BlenderSync::sync_world(BL::Depsgraph &b_depsgraph, bool update_all)
+void BlenderSync::sync_world(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d, bool update_all)
{
Background *background = scene->background;
Background prevbackground = *background;
BL::World b_world = b_scene.world();
- if (world_recalc || update_all || b_world.ptr.data != world_map) {
+ BlenderViewportParameters new_viewport_parameters(b_v3d);
+
+ if (world_recalc || update_all || b_world.ptr.data != world_map ||
+ viewport_parameters.modified(new_viewport_parameters)) {
Shader *shader = scene->default_background;
ShaderGraph *graph = new ShaderGraph();
/* create nodes */
- if (b_world && b_world.use_nodes() && b_world.node_tree()) {
+ if (new_viewport_parameters.use_scene_world && b_world && b_world.use_nodes() &&
+ b_world.node_tree()) {
BL::ShaderNodeTree b_ntree(b_world.node_tree());
add_nodes(scene, b_engine, b_data, b_depsgraph, b_scene, graph, b_ntree);
@@ -1306,8 +1333,9 @@ void BlenderSync::sync_world(BL::Depsgraph &b_depsgraph, bool update_all)
shader->heterogeneous_volume = !get_boolean(cworld, "homogeneous_volume");
shader->volume_sampling_method = get_volume_sampling(cworld);
shader->volume_interpolation_method = get_volume_interpolation(cworld);
+ shader->volume_step_rate = get_float(cworld, "volume_step_size");
}
- else if (b_world) {
+ else if (new_viewport_parameters.use_scene_world && b_world) {
BackgroundNode *background = new BackgroundNode();
background->color = get_float3(b_world.color());
graph->add(background);
@@ -1315,6 +1343,61 @@ void BlenderSync::sync_world(BL::Depsgraph &b_depsgraph, bool update_all)
ShaderNode *out = graph->output();
graph->connect(background->output("Background"), out->input("Surface"));
}
+ else if (!new_viewport_parameters.use_scene_world) {
+ float3 world_color;
+ if (b_world) {
+ world_color = get_float3(b_world.color());
+ }
+ else {
+ world_color = make_float3(0.0f, 0.0f, 0.0f);
+ }
+
+ BackgroundNode *background = new BackgroundNode();
+ graph->add(background);
+
+ LightPathNode *light_path = new LightPathNode();
+ graph->add(light_path);
+
+ MixNode *mix_scene_with_background = new MixNode();
+ mix_scene_with_background->color2 = world_color;
+ graph->add(mix_scene_with_background);
+
+ EnvironmentTextureNode *texture_environment = new EnvironmentTextureNode();
+ texture_environment->tex_mapping.type = TextureMapping::VECTOR;
+ texture_environment->tex_mapping.rotation[2] = new_viewport_parameters.studiolight_rotate_z;
+ texture_environment->filename = new_viewport_parameters.studiolight_path;
+ graph->add(texture_environment);
+
+ MixNode *mix_intensity = new MixNode();
+ mix_intensity->type = NODE_MIX_MUL;
+ mix_intensity->fac = 1.0f;
+ mix_intensity->color2 = make_float3(new_viewport_parameters.studiolight_intensity,
+ new_viewport_parameters.studiolight_intensity,
+ new_viewport_parameters.studiolight_intensity);
+ graph->add(mix_intensity);
+
+ TextureCoordinateNode *texture_coordinate = new TextureCoordinateNode();
+ graph->add(texture_coordinate);
+
+ MixNode *mix_background_with_environment = new MixNode();
+ mix_background_with_environment->fac = new_viewport_parameters.studiolight_background_alpha;
+ mix_background_with_environment->color1 = world_color;
+ graph->add(mix_background_with_environment);
+
+ ShaderNode *out = graph->output();
+
+ graph->connect(texture_coordinate->output("Generated"),
+ texture_environment->input("Vector"));
+ graph->connect(texture_environment->output("Color"), mix_intensity->input("Color1"));
+ graph->connect(light_path->output("Is Camera Ray"), mix_scene_with_background->input("Fac"));
+ graph->connect(mix_intensity->output("Color"), mix_scene_with_background->input("Color1"));
+ graph->connect(mix_intensity->output("Color"),
+ mix_background_with_environment->input("Color2"));
+ graph->connect(mix_background_with_environment->output("Color"),
+ mix_scene_with_background->input("Color2"));
+ graph->connect(mix_scene_with_background->output("Color"), background->input("Color"));
+ graph->connect(background->output("Background"), out->input("Surface"));
+ }
if (b_world) {
/* AO */
@@ -1348,16 +1431,7 @@ void BlenderSync::sync_world(BL::Depsgraph &b_depsgraph, bool update_all)
}
PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
-
- /* when doing preview render check for BI's transparency settings,
- * this is so because Blender's preview render routines are not able
- * to tweak all cycles's settings depending on different circumstances
- */
- if (b_engine.is_preview() == false)
- background->transparent = get_boolean(cscene, "film_transparent");
- else
- background->transparent = b_scene.render().alpha_mode() ==
- BL::RenderSettings::alpha_mode_TRANSPARENT;
+ background->transparent = b_scene.render().film_transparent();
if (background->transparent) {
background->transparent_glass = get_boolean(cscene, "film_transparent_glass");
@@ -1368,7 +1442,8 @@ void BlenderSync::sync_world(BL::Depsgraph &b_depsgraph, bool update_all)
background->transparent_roughness_threshold = 0.0f;
}
- background->use_shader = view_layer.use_background_shader;
+ background->use_shader = view_layer.use_background_shader |
+ viewport_parameters.custom_viewport_parameters();
background->use_ao = background->use_ao && view_layer.use_background_ao;
if (background->modified(prevbackground))
@@ -1391,7 +1466,7 @@ void BlenderSync::sync_lights(BL::Depsgraph &b_depsgraph, bool update_all)
Shader *shader;
/* test if we need to sync */
- if (shader_map.sync(&shader, b_light) || update_all) {
+ if (shader_map.add_or_update(&shader, b_light) || update_all) {
ShaderGraph *graph = new ShaderGraph();
/* create nodes */
@@ -1403,16 +1478,9 @@ void BlenderSync::sync_lights(BL::Depsgraph &b_depsgraph, bool update_all)
add_nodes(scene, b_engine, b_data, b_depsgraph, b_scene, graph, b_ntree);
}
else {
- float strength = 1.0f;
-
- if (b_light.type() == BL::Light::type_POINT || b_light.type() == BL::Light::type_SPOT ||
- b_light.type() == BL::Light::type_AREA) {
- strength = 100.0f;
- }
-
EmissionNode *emission = new EmissionNode();
- emission->color = get_float3(b_light.color());
- emission->strength = strength;
+ emission->color = make_float3(1.0f, 1.0f, 1.0f);
+ emission->strength = 1.0f;
graph->add(emission);
ShaderNode *out = graph->output();
@@ -1425,7 +1493,7 @@ void BlenderSync::sync_lights(BL::Depsgraph &b_depsgraph, bool update_all)
}
}
-void BlenderSync::sync_shaders(BL::Depsgraph &b_depsgraph)
+void BlenderSync::sync_shaders(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d)
{
/* for auto refresh images */
bool auto_refresh_update = false;
@@ -1438,12 +1506,9 @@ void BlenderSync::sync_shaders(BL::Depsgraph &b_depsgraph)
shader_map.pre_sync();
- sync_world(b_depsgraph, auto_refresh_update);
+ sync_world(b_depsgraph, b_v3d, auto_refresh_update);
sync_lights(b_depsgraph, auto_refresh_update);
sync_materials(b_depsgraph, auto_refresh_update);
-
- /* false = don't delete unused shaders, not supported */
- shader_map.post_sync(false);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp
index 28d0c554f22..ee90b4dfbfe 100644
--- a/intern/cycles/blender/blender_sync.cpp
+++ b/intern/cycles/blender/blender_sync.cpp
@@ -16,6 +16,7 @@
#include "render/background.h"
#include "render/camera.h"
+#include "render/curves.h"
#include "render/film.h"
#include "render/graph.h"
#include "render/integrator.h"
@@ -25,19 +26,19 @@
#include "render/object.h"
#include "render/scene.h"
#include "render/shader.h"
-#include "render/curves.h"
#include "device/device.h"
#include "blender/blender_device.h"
-#include "blender/blender_sync.h"
#include "blender/blender_session.h"
+#include "blender/blender_sync.h"
#include "blender/blender_util.h"
#include "util/util_debug.h"
#include "util/util_foreach.h"
-#include "util/util_opengl.h"
#include "util/util_hash.h"
+#include "util/util_opengl.h"
+#include "util/util_openimagedenoise.h"
CCL_NAMESPACE_BEGIN
@@ -56,7 +57,7 @@ BlenderSync::BlenderSync(BL::RenderEngine &b_engine,
b_scene(b_scene),
shader_map(&scene->shaders),
object_map(&scene->objects),
- mesh_map(&scene->meshes),
+ geometry_map(&scene->geometry),
light_map(&scene->lights),
particle_system_map(&scene->particle_systems),
world_map(NULL),
@@ -78,15 +79,21 @@ BlenderSync::~BlenderSync()
{
}
+void BlenderSync::reset(BL::BlendData &b_data, BL::Scene &b_scene)
+{
+ /* Update data and scene pointers in case they change in session reset,
+ * for example after undo. */
+ this->b_data = b_data;
+ this->b_scene = b_scene;
+}
+
/* Sync */
-void BlenderSync::sync_recalc(BL::Depsgraph &b_depsgraph)
+void BlenderSync::sync_recalc(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d)
{
/* Sync recalc flags from blender to cycles. Actual update is done separate,
* so we can do it later on if doing it immediate is not suitable. */
- bool has_updated_objects = b_depsgraph.id_type_updated(BL::DriverTarget::id_type_OBJECT);
-
if (experimental) {
/* Mark all meshes as needing to be exported again if dicing changed. */
PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
@@ -108,10 +115,15 @@ void BlenderSync::sync_recalc(BL::Depsgraph &b_depsgraph)
}
if (dicing_prop_changed) {
- for (const pair<void *, Mesh *> &iter : mesh_map.key_to_scene_data()) {
- Mesh *mesh = iter.second;
- if (mesh->subdivision_type != Mesh::SUBDIVISION_NONE) {
- mesh_map.set_recalc(iter.first);
+ for (const pair<const GeometryKey, Geometry *> &iter : geometry_map.key_to_scene_data()) {
+ Geometry *geom = iter.second;
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ if (mesh->subdivision_type != Mesh::SUBDIVISION_NONE) {
+ PointerRNA id_ptr;
+ RNA_id_pointer_create((::ID *)iter.first.id, &id_ptr);
+ geometry_map.set_recalc(BL::ID(id_ptr));
+ }
}
}
}
@@ -135,36 +147,49 @@ void BlenderSync::sync_recalc(BL::Depsgraph &b_depsgraph)
/* Object */
else if (b_id.is_a(&RNA_Object)) {
BL::Object b_ob(b_id);
- const bool updated_geometry = b_update->is_updated_geometry();
-
- if (b_update->is_updated_transform()) {
- object_map.set_recalc(b_ob);
- light_map.set_recalc(b_ob);
- }
-
- if (object_is_mesh(b_ob)) {
- if (updated_geometry ||
- (object_subdivision_type(b_ob, preview, experimental) != Mesh::SUBDIVISION_NONE)) {
- BL::ID key = BKE_object_is_modified(b_ob) ? b_ob : b_ob.data();
- mesh_map.set_recalc(key);
+ const bool is_geometry = object_is_geometry(b_ob);
+ const bool is_light = !is_geometry && object_is_light(b_ob);
+
+ if (is_geometry || is_light) {
+ const bool updated_geometry = b_update->is_updated_geometry();
+
+ /* Geometry (mesh, hair, volume). */
+ if (is_geometry) {
+ if (b_update->is_updated_transform() || b_update->is_updated_shading()) {
+ object_map.set_recalc(b_ob);
+ }
+
+ if (updated_geometry ||
+ (object_subdivision_type(b_ob, preview, experimental) != Mesh::SUBDIVISION_NONE)) {
+ BL::ID key = BKE_object_is_modified(b_ob) ? b_ob : b_ob.data();
+ geometry_map.set_recalc(key);
+ }
+
+ if (updated_geometry) {
+ BL::Object::particle_systems_iterator b_psys;
+ for (b_ob.particle_systems.begin(b_psys); b_psys != b_ob.particle_systems.end();
+ ++b_psys) {
+ particle_system_map.set_recalc(b_ob);
+ }
+ }
}
- }
- else if (object_is_light(b_ob)) {
- if (updated_geometry) {
- light_map.set_recalc(b_ob);
+ /* Light */
+ else if (is_light) {
+ if (b_update->is_updated_transform() || b_update->is_updated_shading()) {
+ object_map.set_recalc(b_ob);
+ light_map.set_recalc(b_ob);
+ }
+
+ if (updated_geometry) {
+ light_map.set_recalc(b_ob);
+ }
}
}
-
- if (updated_geometry) {
- BL::Object::particle_systems_iterator b_psys;
- for (b_ob.particle_systems.begin(b_psys); b_psys != b_ob.particle_systems.end(); ++b_psys)
- particle_system_map.set_recalc(b_ob);
- }
}
/* Mesh */
else if (b_id.is_a(&RNA_Mesh)) {
BL::Mesh b_mesh(b_id);
- mesh_map.set_recalc(b_mesh);
+ geometry_map.set_recalc(b_mesh);
}
/* World */
else if (b_id.is_a(&RNA_World)) {
@@ -173,19 +198,16 @@ void BlenderSync::sync_recalc(BL::Depsgraph &b_depsgraph)
world_recalc = true;
}
}
- }
-
- /* Updates shader with object dependency if objects changed. */
- if (has_updated_objects) {
- if (scene->default_background->has_object_dependency) {
- world_recalc = true;
+ /* Volume */
+ else if (b_id.is_a(&RNA_Volume)) {
+ BL::Volume b_volume(b_id);
+ geometry_map.set_recalc(b_volume);
}
+ }
- foreach (Shader *shader, scene->shaders) {
- if (shader->has_object_dependency) {
- shader->need_sync_object = true;
- }
- }
+ BlenderViewportParameters new_viewport_parameters(b_v3d);
+ if (viewport_parameters.modified(new_viewport_parameters)) {
+ world_recalc = true;
}
}
@@ -201,20 +223,23 @@ void BlenderSync::sync_data(BL::RenderSettings &b_render,
sync_view_layer(b_v3d, b_view_layer);
sync_integrator();
- sync_film();
- sync_shaders(b_depsgraph);
+ sync_film(b_v3d);
+ sync_shaders(b_depsgraph, b_v3d);
sync_images();
- sync_curve_settings();
- mesh_synced.clear(); /* use for objects and motion sync */
+ geometry_synced.clear(); /* use for objects and motion sync */
if (scene->need_motion() == Scene::MOTION_PASS || scene->need_motion() == Scene::MOTION_NONE ||
scene->camera->motion_position == Camera::MOTION_POSITION_CENTER) {
- sync_objects(b_depsgraph);
+ sync_objects(b_depsgraph, b_v3d);
}
- sync_motion(b_render, b_depsgraph, b_override, width, height, python_thread_state);
+ sync_motion(b_render, b_depsgraph, b_v3d, b_override, width, height, python_thread_state);
- mesh_synced.clear();
+ geometry_synced.clear();
+
+ /* Shader sync done at the end, since object sync uses it.
+ * false = don't delete unused shaders, not supported. */
+ shader_map.post_sync(false);
free_data_after_sync(b_depsgraph);
}
@@ -231,6 +256,7 @@ void BlenderSync::sync_integrator()
Integrator *integrator = scene->integrator;
Integrator previntegrator = *integrator;
+ integrator->min_bounce = get_int(cscene, "min_light_bounces");
integrator->max_bounce = get_int(cscene, "max_bounces");
integrator->max_diffuse_bounce = get_int(cscene, "diffuse_bounces");
@@ -238,10 +264,12 @@ void BlenderSync::sync_integrator()
integrator->max_transmission_bounce = get_int(cscene, "transmission_bounces");
integrator->max_volume_bounce = get_int(cscene, "volume_bounces");
+ integrator->transparent_min_bounce = get_int(cscene, "min_transparent_bounces");
integrator->transparent_max_bounce = get_int(cscene, "transparent_max_bounces");
integrator->volume_max_steps = get_int(cscene, "volume_max_steps");
- integrator->volume_step_size = get_float(cscene, "volume_step_size");
+ integrator->volume_step_rate = (preview) ? get_float(cscene, "volume_preview_step_rate") :
+ get_float(cscene, "volume_step_rate");
integrator->caustics_reflective = get_boolean(cscene, "caustics_reflective");
integrator->caustics_refractive = get_boolean(cscene, "caustics_refractive");
@@ -249,13 +277,13 @@ void BlenderSync::sync_integrator()
integrator->seed = get_int(cscene, "seed");
if (get_boolean(cscene, "use_animated_seed")) {
- integrator->seed = hash_int_2d(b_scene.frame_current(), get_int(cscene, "seed"));
+ integrator->seed = hash_uint2(b_scene.frame_current(), get_int(cscene, "seed"));
if (b_scene.frame_subframe() != 0.0f) {
/* TODO(sergey): Ideally should be some sort of hash_merge,
* but this is good enough for now.
*/
- integrator->seed += hash_int_2d((int)(b_scene.frame_subframe() * (float)INT_MAX),
- get_int(cscene, "seed"));
+ integrator->seed += hash_uint2((int)(b_scene.frame_subframe() * (float)INT_MAX),
+ get_int(cscene, "seed"));
}
}
@@ -287,6 +315,16 @@ void BlenderSync::sync_integrator()
integrator->sample_all_lights_indirect = get_boolean(cscene, "sample_all_lights_indirect");
integrator->light_sampling_threshold = get_float(cscene, "light_sampling_threshold");
+ if (RNA_boolean_get(&cscene, "use_adaptive_sampling")) {
+ integrator->sampling_pattern = SAMPLING_PATTERN_PMJ;
+ integrator->adaptive_min_samples = get_int(cscene, "adaptive_min_samples");
+ integrator->adaptive_threshold = get_float(cscene, "adaptive_threshold");
+ }
+ else {
+ integrator->adaptive_min_samples = INT_MAX;
+ integrator->adaptive_threshold = 0.0f;
+ }
+
int diffuse_samples = get_int(cscene, "diffuse_samples");
int glossy_samples = get_int(cscene, "glossy_samples");
int transmission_samples = get_int(cscene, "transmission_samples");
@@ -303,6 +341,8 @@ void BlenderSync::sync_integrator()
integrator->mesh_light_samples = mesh_light_samples * mesh_light_samples;
integrator->subsurface_samples = subsurface_samples * subsurface_samples;
integrator->volume_samples = volume_samples * volume_samples;
+ integrator->adaptive_min_samples = min(
+ integrator->adaptive_min_samples * integrator->adaptive_min_samples, INT_MAX);
}
else {
integrator->diffuse_samples = diffuse_samples;
@@ -337,13 +377,17 @@ void BlenderSync::sync_integrator()
/* Film */
-void BlenderSync::sync_film()
+void BlenderSync::sync_film(BL::SpaceView3D &b_v3d)
{
PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
Film *film = scene->film;
Film prevfilm = *film;
+ if (b_v3d) {
+ film->display_pass = update_viewport_display_passes(b_v3d, film->passes);
+ }
+
film->exposure = get_float(cscene, "film_exposure");
film->filter_type = (FilterType)get_enum(
cscene, "pixel_filter_type", FILTER_NUM_TYPES, FILTER_BLACKMAN_HARRIS);
@@ -369,8 +413,10 @@ void BlenderSync::sync_film()
}
}
- if (film->modified(prevfilm))
+ if (film->modified(prevfilm)) {
film->tag_update(scene);
+ film->tag_passes_update(scene, prevfilm.passes, false);
+ }
}
/* Render Layer */
@@ -383,6 +429,7 @@ void BlenderSync::sync_view_layer(BL::SpaceView3D & /*b_v3d*/, BL::ViewLayer &b_
view_layer.use_background_ao = b_view_layer.use_ao();
view_layer.use_surfaces = b_view_layer.use_solid();
view_layer.use_hair = b_view_layer.use_strand();
+ view_layer.use_volumes = b_view_layer.use_volumes();
/* Material override. */
view_layer.material_override = b_view_layer.material_override();
@@ -451,25 +498,25 @@ PassType BlenderSync::get_pass_type(BL::RenderPass &b_pass)
MAP_PASS("DiffDir", PASS_DIFFUSE_DIRECT);
MAP_PASS("GlossDir", PASS_GLOSSY_DIRECT);
MAP_PASS("TransDir", PASS_TRANSMISSION_DIRECT);
- MAP_PASS("SubsurfaceDir", PASS_SUBSURFACE_DIRECT);
MAP_PASS("VolumeDir", PASS_VOLUME_DIRECT);
MAP_PASS("DiffInd", PASS_DIFFUSE_INDIRECT);
MAP_PASS("GlossInd", PASS_GLOSSY_INDIRECT);
MAP_PASS("TransInd", PASS_TRANSMISSION_INDIRECT);
- MAP_PASS("SubsurfaceInd", PASS_SUBSURFACE_INDIRECT);
MAP_PASS("VolumeInd", PASS_VOLUME_INDIRECT);
MAP_PASS("DiffCol", PASS_DIFFUSE_COLOR);
MAP_PASS("GlossCol", PASS_GLOSSY_COLOR);
MAP_PASS("TransCol", PASS_TRANSMISSION_COLOR);
- MAP_PASS("SubsurfaceCol", PASS_SUBSURFACE_COLOR);
MAP_PASS("Emit", PASS_EMISSION);
MAP_PASS("Env", PASS_BACKGROUND);
MAP_PASS("AO", PASS_AO);
MAP_PASS("Shadow", PASS_SHADOW);
+ MAP_PASS("BakePrimitive", PASS_BAKE_PRIMITIVE);
+ MAP_PASS("BakeDifferential", PASS_BAKE_DIFFERENTIAL);
+
#ifdef __KERNEL_DEBUG__
MAP_PASS("Debug BVH Traversed Nodes", PASS_BVH_TRAVERSED_NODES);
MAP_PASS("Debug BVH Traversed Instances", PASS_BVH_TRAVERSED_INSTANCES);
@@ -477,6 +524,8 @@ PassType BlenderSync::get_pass_type(BL::RenderPass &b_pass)
MAP_PASS("Debug Ray Bounces", PASS_RAY_BOUNCES);
#endif
MAP_PASS("Debug Render Time", PASS_RENDER_TIME);
+ MAP_PASS("AdaptiveAuxBuffer", PASS_ADAPTIVE_AUX_BUFFER);
+ MAP_PASS("Debug Sample Count", PASS_SAMPLE_COUNT);
if (string_startswith(name, cryptomatte_prefix)) {
return PASS_CRYPTOMATTE;
}
@@ -512,10 +561,12 @@ int BlenderSync::get_denoising_pass(BL::RenderPass &b_pass)
return -1;
}
-vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLayer &b_view_layer)
+vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay,
+ BL::ViewLayer &b_view_layer,
+ bool adaptive_sampling,
+ const DenoiseParams &denoising)
{
vector<Pass> passes;
- Pass::add(PASS_COMBINED, passes);
/* loop over passes */
BL::RenderLayer::passes_iterator b_pass_iter;
@@ -527,80 +578,85 @@ vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLa
if (pass_type == PASS_MOTION && scene->integrator->motion_blur)
continue;
if (pass_type != PASS_NONE)
- Pass::add(pass_type, passes);
+ Pass::add(pass_type, passes, b_pass.name().c_str());
}
- PointerRNA crp = RNA_pointer_get(&b_view_layer.ptr, "cycles");
- bool full_denoising = get_boolean(crp, "use_denoising");
- bool write_denoising_passes = get_boolean(crp, "denoising_store_passes");
+ PointerRNA crl = RNA_pointer_get(&b_view_layer.ptr, "cycles");
scene->film->denoising_flags = 0;
- if (full_denoising || write_denoising_passes) {
+ if (denoising.use || denoising.store_passes) {
+ if (denoising.type == DENOISER_NLM) {
#define MAP_OPTION(name, flag) \
- if (!get_boolean(crp, name)) \
+ if (!get_boolean(crl, name)) \
scene->film->denoising_flags |= flag;
- MAP_OPTION("denoising_diffuse_direct", DENOISING_CLEAN_DIFFUSE_DIR);
- MAP_OPTION("denoising_diffuse_indirect", DENOISING_CLEAN_DIFFUSE_IND);
- MAP_OPTION("denoising_glossy_direct", DENOISING_CLEAN_GLOSSY_DIR);
- MAP_OPTION("denoising_glossy_indirect", DENOISING_CLEAN_GLOSSY_IND);
- MAP_OPTION("denoising_transmission_direct", DENOISING_CLEAN_TRANSMISSION_DIR);
- MAP_OPTION("denoising_transmission_indirect", DENOISING_CLEAN_TRANSMISSION_IND);
- MAP_OPTION("denoising_subsurface_direct", DENOISING_CLEAN_SUBSURFACE_DIR);
- MAP_OPTION("denoising_subsurface_indirect", DENOISING_CLEAN_SUBSURFACE_IND);
+ MAP_OPTION("denoising_diffuse_direct", DENOISING_CLEAN_DIFFUSE_DIR);
+ MAP_OPTION("denoising_diffuse_indirect", DENOISING_CLEAN_DIFFUSE_IND);
+ MAP_OPTION("denoising_glossy_direct", DENOISING_CLEAN_GLOSSY_DIR);
+ MAP_OPTION("denoising_glossy_indirect", DENOISING_CLEAN_GLOSSY_IND);
+ MAP_OPTION("denoising_transmission_direct", DENOISING_CLEAN_TRANSMISSION_DIR);
+ MAP_OPTION("denoising_transmission_indirect", DENOISING_CLEAN_TRANSMISSION_IND);
#undef MAP_OPTION
+ }
b_engine.add_pass("Noisy Image", 4, "RGBA", b_view_layer.name().c_str());
}
- if (write_denoising_passes) {
+ if (denoising.store_passes) {
b_engine.add_pass("Denoising Normal", 3, "XYZ", b_view_layer.name().c_str());
b_engine.add_pass("Denoising Albedo", 3, "RGB", b_view_layer.name().c_str());
b_engine.add_pass("Denoising Depth", 1, "Z", b_view_layer.name().c_str());
- b_engine.add_pass("Denoising Shadowing", 1, "X", b_view_layer.name().c_str());
- b_engine.add_pass("Denoising Variance", 3, "RGB", b_view_layer.name().c_str());
- b_engine.add_pass("Denoising Intensity", 1, "X", b_view_layer.name().c_str());
+ if (denoising.type == DENOISER_NLM) {
+ b_engine.add_pass("Denoising Shadowing", 1, "X", b_view_layer.name().c_str());
+ b_engine.add_pass("Denoising Variance", 3, "RGB", b_view_layer.name().c_str());
+ b_engine.add_pass("Denoising Intensity", 1, "X", b_view_layer.name().c_str());
+ }
if (scene->film->denoising_flags & DENOISING_CLEAN_ALL_PASSES) {
b_engine.add_pass("Denoising Clean", 3, "RGB", b_view_layer.name().c_str());
}
}
+
#ifdef __KERNEL_DEBUG__
- if (get_boolean(crp, "pass_debug_bvh_traversed_nodes")) {
+ if (get_boolean(crl, "pass_debug_bvh_traversed_nodes")) {
b_engine.add_pass("Debug BVH Traversed Nodes", 1, "X", b_view_layer.name().c_str());
- Pass::add(PASS_BVH_TRAVERSED_NODES, passes);
+ Pass::add(PASS_BVH_TRAVERSED_NODES, passes, "Debug BVH Traversed Nodes");
}
- if (get_boolean(crp, "pass_debug_bvh_traversed_instances")) {
+ if (get_boolean(crl, "pass_debug_bvh_traversed_instances")) {
b_engine.add_pass("Debug BVH Traversed Instances", 1, "X", b_view_layer.name().c_str());
- Pass::add(PASS_BVH_TRAVERSED_INSTANCES, passes);
+ Pass::add(PASS_BVH_TRAVERSED_INSTANCES, passes, "Debug BVH Traversed Instances");
}
- if (get_boolean(crp, "pass_debug_bvh_intersections")) {
+ if (get_boolean(crl, "pass_debug_bvh_intersections")) {
b_engine.add_pass("Debug BVH Intersections", 1, "X", b_view_layer.name().c_str());
- Pass::add(PASS_BVH_INTERSECTIONS, passes);
+ Pass::add(PASS_BVH_INTERSECTIONS, passes, "Debug BVH Intersections");
}
- if (get_boolean(crp, "pass_debug_ray_bounces")) {
+ if (get_boolean(crl, "pass_debug_ray_bounces")) {
b_engine.add_pass("Debug Ray Bounces", 1, "X", b_view_layer.name().c_str());
- Pass::add(PASS_RAY_BOUNCES, passes);
+ Pass::add(PASS_RAY_BOUNCES, passes, "Debug Ray Bounces");
}
#endif
- if (get_boolean(crp, "pass_debug_render_time")) {
+ if (get_boolean(crl, "pass_debug_render_time")) {
b_engine.add_pass("Debug Render Time", 1, "X", b_view_layer.name().c_str());
- Pass::add(PASS_RENDER_TIME, passes);
+ Pass::add(PASS_RENDER_TIME, passes, "Debug Render Time");
+ }
+ if (get_boolean(crl, "pass_debug_sample_count")) {
+ b_engine.add_pass("Debug Sample Count", 1, "X", b_view_layer.name().c_str());
+ Pass::add(PASS_SAMPLE_COUNT, passes, "Debug Sample Count");
}
- if (get_boolean(crp, "use_pass_volume_direct")) {
+ if (get_boolean(crl, "use_pass_volume_direct")) {
b_engine.add_pass("VolumeDir", 3, "RGB", b_view_layer.name().c_str());
- Pass::add(PASS_VOLUME_DIRECT, passes);
+ Pass::add(PASS_VOLUME_DIRECT, passes, "VolumeDir");
}
- if (get_boolean(crp, "use_pass_volume_indirect")) {
+ if (get_boolean(crl, "use_pass_volume_indirect")) {
b_engine.add_pass("VolumeInd", 3, "RGB", b_view_layer.name().c_str());
- Pass::add(PASS_VOLUME_INDIRECT, passes);
+ Pass::add(PASS_VOLUME_INDIRECT, passes, "VolumeInd");
}
/* Cryptomatte stores two ID/weight pairs per RGBA layer.
- * User facing paramter is the number of pairs. */
- int crypto_depth = min(16, get_int(crp, "pass_crypto_depth")) / 2;
+ * User facing parameter is the number of pairs. */
+ int crypto_depth = divide_up(min(16, get_int(crl, "pass_crypto_depth")), 2);
scene->film->cryptomatte_depth = crypto_depth;
scene->film->cryptomatte_passes = CRYPT_NONE;
- if (get_boolean(crp, "use_pass_crypto_object")) {
- for (int i = 0; i < crypto_depth; ++i) {
+ if (get_boolean(crl, "use_pass_crypto_object")) {
+ for (int i = 0; i < crypto_depth; i++) {
string passname = cryptomatte_prefix + string_printf("Object%02d", i);
b_engine.add_pass(passname.c_str(), 4, "RGBA", b_view_layer.name().c_str());
Pass::add(PASS_CRYPTOMATTE, passes, passname.c_str());
@@ -608,8 +664,8 @@ vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLa
scene->film->cryptomatte_passes = (CryptomatteType)(scene->film->cryptomatte_passes |
CRYPT_OBJECT);
}
- if (get_boolean(crp, "use_pass_crypto_material")) {
- for (int i = 0; i < crypto_depth; ++i) {
+ if (get_boolean(crl, "use_pass_crypto_material")) {
+ for (int i = 0; i < crypto_depth; i++) {
string passname = cryptomatte_prefix + string_printf("Material%02d", i);
b_engine.add_pass(passname.c_str(), 4, "RGBA", b_view_layer.name().c_str());
Pass::add(PASS_CRYPTOMATTE, passes, passname.c_str());
@@ -617,8 +673,8 @@ vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLa
scene->film->cryptomatte_passes = (CryptomatteType)(scene->film->cryptomatte_passes |
CRYPT_MATERIAL);
}
- if (get_boolean(crp, "use_pass_crypto_asset")) {
- for (int i = 0; i < crypto_depth; ++i) {
+ if (get_boolean(crl, "use_pass_crypto_asset")) {
+ for (int i = 0; i < crypto_depth; i++) {
string passname = cryptomatte_prefix + string_printf("Asset%02d", i);
b_engine.add_pass(passname.c_str(), 4, "RGBA", b_view_layer.name().c_str());
Pass::add(PASS_CRYPTOMATTE, passes, passname.c_str());
@@ -626,11 +682,33 @@ vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLa
scene->film->cryptomatte_passes = (CryptomatteType)(scene->film->cryptomatte_passes |
CRYPT_ASSET);
}
- if (get_boolean(crp, "pass_crypto_accurate") && scene->film->cryptomatte_passes != CRYPT_NONE) {
+ if (get_boolean(crl, "pass_crypto_accurate") && scene->film->cryptomatte_passes != CRYPT_NONE) {
scene->film->cryptomatte_passes = (CryptomatteType)(scene->film->cryptomatte_passes |
CRYPT_ACCURATE);
}
+ if (adaptive_sampling) {
+ Pass::add(PASS_ADAPTIVE_AUX_BUFFER, passes);
+ if (!get_boolean(crl, "pass_debug_sample_count")) {
+ Pass::add(PASS_SAMPLE_COUNT, passes);
+ }
+ }
+
+ RNA_BEGIN (&crl, b_aov, "aovs") {
+ bool is_color = (get_enum(b_aov, "type") == 1);
+ string name = get_string(b_aov, "name");
+
+ if (is_color) {
+ b_engine.add_pass(name.c_str(), 4, "RGBA", b_view_layer.name().c_str());
+ Pass::add(PASS_AOV_COLOR, passes, name.c_str());
+ }
+ else {
+ b_engine.add_pass(name.c_str(), 1, "X", b_view_layer.name().c_str());
+ Pass::add(PASS_AOV_VALUE, passes, name.c_str());
+ }
+ }
+ RNA_END;
+
return passes;
}
@@ -677,6 +755,11 @@ SceneParams BlenderSync::get_scene_params(BL::Scene &b_scene, bool background)
params.use_bvh_unaligned_nodes = RNA_boolean_get(&cscene, "debug_use_hair_bvh");
params.num_bvh_time_steps = RNA_int_get(&cscene, "debug_bvh_time_steps");
+ PointerRNA csscene = RNA_pointer_get(&b_scene.ptr, "cycles_curves");
+ params.hair_subdivisions = get_int(csscene, "subdivisions");
+ params.hair_shape = (CurveShapeType)get_enum(
+ csscene, "shape", CURVE_NUM_SHAPE_TYPES, CURVE_THICK);
+
if (background && params.shadingsystem != SHADINGSYSTEM_OSL)
params.persistent_data = r.use_persistent_data();
else
@@ -696,20 +779,10 @@ SceneParams BlenderSync::get_scene_params(BL::Scene &b_scene, bool background)
params.texture_limit = 0;
}
- /* TODO(sergey): Once OSL supports per-microarchitecture optimization get
- * rid of this.
- */
- if (params.shadingsystem == SHADINGSYSTEM_OSL) {
- params.bvh_layout = BVH_LAYOUT_BVH4;
- }
- else {
- params.bvh_layout = DebugFlags().cpu.bvh_layout;
- }
+ params.bvh_layout = DebugFlags().cpu.bvh_layout;
+
+ params.background = background;
-#ifdef WITH_EMBREE
- params.bvh_layout = RNA_boolean_get(&cscene, "use_bvh_embree") ? BVH_LAYOUT_EMBREE :
- params.bvh_layout;
-#endif
return params;
}
@@ -724,7 +797,8 @@ bool BlenderSync::get_session_pause(BL::Scene &b_scene, bool background)
SessionParams BlenderSync::get_session_params(BL::RenderEngine &b_engine,
BL::Preferences &b_preferences,
BL::Scene &b_scene,
- bool background)
+ bool background,
+ BL::ViewLayer b_view_layer)
{
SessionParams params;
PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
@@ -753,7 +827,7 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine &b_engine,
preview_samples = preview_samples * preview_samples;
}
- if (get_enum(cscene, "progressive") == 0) {
+ if (get_enum(cscene, "progressive") == 0 && (params.device.type != DEVICE_OPTIX)) {
if (background) {
params.samples = aa_samples;
}
@@ -802,7 +876,21 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine &b_engine,
params.tile_order = TILE_BOTTOM_TO_TOP;
}
- /* other parameters */
+ /* Denoising */
+ params.denoising = get_denoise_params(b_scene, b_view_layer, background);
+
+ if (params.denoising.use) {
+ /* Add additional denoising devices if we are rendering and denoising
+ * with different devices. */
+ params.device.add_denoising_devices(params.denoising.type);
+
+ /* Check if denoiser is supported by device. */
+ if (!(params.device.denoisers & params.denoising.type)) {
+ params.denoising.use = false;
+ }
+ }
+
+ /* Viewport Performance */
params.start_resolution = get_int(cscene, "preview_start_resolution");
params.pixel_size = b_engine.get_preview_pixel_size(b_scene);
@@ -813,20 +901,10 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine &b_engine,
/* progressive refine */
BL::RenderSettings b_r = b_scene.render();
- params.progressive_refine = (b_engine.is_preview() ||
- get_boolean(cscene, "use_progressive_refine")) &&
- !b_r.use_save_buffers();
-
- if (params.progressive_refine) {
- BL::Scene::view_layers_iterator b_view_layer;
- for (b_scene.view_layers.begin(b_view_layer); b_view_layer != b_scene.view_layers.end();
- ++b_view_layer) {
- PointerRNA crl = RNA_pointer_get(&b_view_layer->ptr, "cycles");
- if (get_boolean(crl, "use_denoising")) {
- params.progressive_refine = false;
- }
- }
- }
+ params.progressive_refine = b_engine.is_preview() ||
+ get_boolean(cscene, "use_progressive_refine");
+ if (b_r.use_save_buffers())
+ params.progressive_refine = false;
if (background) {
if (params.progressive_refine)
@@ -861,7 +939,66 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine &b_engine,
params.use_profiling = params.device.has_profiling && !b_engine.is_preview() && background &&
BlenderSession::print_render_stats;
+ params.adaptive_sampling = RNA_boolean_get(&cscene, "use_adaptive_sampling");
+
return params;
}
+DenoiseParams BlenderSync::get_denoise_params(BL::Scene &b_scene,
+ BL::ViewLayer &b_view_layer,
+ bool background)
+{
+ DenoiseParams denoising;
+ PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
+
+ if (background) {
+ /* Final Render Denoising */
+ denoising.use = get_boolean(cscene, "use_denoising");
+ denoising.type = (DenoiserType)get_enum(cscene, "denoiser", DENOISER_NUM, DENOISER_NONE);
+
+ if (b_view_layer) {
+ PointerRNA clayer = RNA_pointer_get(&b_view_layer.ptr, "cycles");
+ if (!get_boolean(clayer, "use_denoising")) {
+ denoising.use = false;
+ }
+
+ denoising.radius = get_int(clayer, "denoising_radius");
+ denoising.strength = get_float(clayer, "denoising_strength");
+ denoising.feature_strength = get_float(clayer, "denoising_feature_strength");
+ denoising.relative_pca = get_boolean(clayer, "denoising_relative_pca");
+
+ denoising.input_passes = (DenoiserInput)get_enum(
+ clayer,
+ (denoising.type == DENOISER_OPTIX) ? "denoising_optix_input_passes" :
+ "denoising_openimagedenoise_input_passes",
+ DENOISER_INPUT_NUM,
+ DENOISER_INPUT_RGB_ALBEDO_NORMAL);
+
+ denoising.store_passes = get_boolean(clayer, "denoising_store_passes");
+ }
+ }
+ else {
+ /* Viewport Denoising */
+ denoising.use = get_boolean(cscene, "use_preview_denoising");
+ denoising.type = (DenoiserType)get_enum(
+ cscene, "preview_denoiser", DENOISER_NUM, DENOISER_NONE);
+ denoising.start_sample = get_int(cscene, "preview_denoising_start_sample");
+
+ /* Auto select fastest denoiser. */
+ if (denoising.type == DENOISER_NONE) {
+ if (!Device::available_devices(DEVICE_MASK_OPTIX).empty()) {
+ denoising.type = DENOISER_OPTIX;
+ }
+ else if (openimagedenoise_supported()) {
+ denoising.type = DENOISER_OPENIMAGEDENOISE;
+ }
+ else {
+ denoising.use = false;
+ }
+ }
+ }
+
+ return denoising;
+}
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/blender_sync.h b/intern/cycles/blender/blender_sync.h
index 00afceebde3..a551ec31e04 100644
--- a/intern/cycles/blender/blender_sync.h
+++ b/intern/cycles/blender/blender_sync.h
@@ -18,11 +18,12 @@
#define __BLENDER_SYNC_H__
#include "MEM_guardedalloc.h"
-#include "RNA_types.h"
#include "RNA_access.h"
#include "RNA_blender_cpp.h"
+#include "RNA_types.h"
-#include "blender/blender_util.h"
+#include "blender/blender_id_map.h"
+#include "blender/blender_viewport.h"
#include "render/scene.h"
#include "render/session.h"
@@ -36,8 +37,10 @@ CCL_NAMESPACE_BEGIN
class Background;
class BlenderObjectCulling;
+class BlenderViewportParameters;
class Camera;
class Film;
+class Hair;
class Light;
class Mesh;
class Object;
@@ -58,8 +61,10 @@ class BlenderSync {
Progress &progress);
~BlenderSync();
+ void reset(BL::BlendData &b_data, BL::Scene &b_scene);
+
/* sync */
- void sync_recalc(BL::Depsgraph &b_depsgraph);
+ void sync_recalc(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d);
void sync_data(BL::RenderSettings &b_render,
BL::Depsgraph &b_depsgraph,
BL::SpaceView3D &b_v3d,
@@ -68,7 +73,10 @@ class BlenderSync {
int height,
void **python_thread_state);
void sync_view_layer(BL::SpaceView3D &b_v3d, BL::ViewLayer &b_view_layer);
- vector<Pass> sync_render_passes(BL::RenderLayer &b_render_layer, BL::ViewLayer &b_view_layer);
+ vector<Pass> sync_render_passes(BL::RenderLayer &b_render_layer,
+ BL::ViewLayer &b_view_layer,
+ bool adaptive_sampling,
+ const DenoiseParams &denoising);
void sync_integrator();
void sync_camera(BL::RenderSettings &b_render,
BL::Object &b_override,
@@ -87,55 +95,96 @@ class BlenderSync {
/* get parameters */
static SceneParams get_scene_params(BL::Scene &b_scene, bool background);
- static SessionParams get_session_params(BL::RenderEngine &b_engine,
- BL::Preferences &b_userpref,
- BL::Scene &b_scene,
- bool background);
+ static SessionParams get_session_params(
+ BL::RenderEngine &b_engine,
+ BL::Preferences &b_userpref,
+ BL::Scene &b_scene,
+ bool background,
+ BL::ViewLayer b_view_layer = BL::ViewLayer(PointerRNA_NULL));
static bool get_session_pause(BL::Scene &b_scene, bool background);
static BufferParams get_buffer_params(BL::RenderSettings &b_render,
BL::SpaceView3D &b_v3d,
BL::RegionView3D &b_rv3d,
Camera *cam,
int width,
- int height);
+ int height,
+ const bool use_denoiser);
static PassType get_pass_type(BL::RenderPass &b_pass);
static int get_denoising_pass(BL::RenderPass &b_pass);
private:
+ static DenoiseParams get_denoise_params(BL::Scene &b_scene,
+ BL::ViewLayer &b_view_layer,
+ bool background);
+
/* sync */
void sync_lights(BL::Depsgraph &b_depsgraph, bool update_all);
void sync_materials(BL::Depsgraph &b_depsgraph, bool update_all);
- void sync_objects(BL::Depsgraph &b_depsgraph, float motion_time = 0.0f);
+ void sync_objects(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d, float motion_time = 0.0f);
void sync_motion(BL::RenderSettings &b_render,
BL::Depsgraph &b_depsgraph,
+ BL::SpaceView3D &b_v3d,
BL::Object &b_override,
int width,
int height,
void **python_thread_state);
- void sync_film();
+ void sync_film(BL::SpaceView3D &b_v3d);
void sync_view();
- void sync_world(BL::Depsgraph &b_depsgraph, bool update_all);
- void sync_shaders(BL::Depsgraph &b_depsgraph);
- void sync_curve_settings();
+ /* Shader */
+ void sync_world(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d, bool update_all);
+ void sync_shaders(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d);
void sync_nodes(Shader *shader, BL::ShaderNodeTree &b_ntree);
- Mesh *sync_mesh(BL::Depsgraph &b_depsgrpah,
- BL::Object &b_ob,
- BL::Object &b_ob_instance,
- bool object_updated,
- bool show_self,
- bool show_particles);
- void sync_curves(
- Mesh *mesh, BL::Mesh &b_mesh, BL::Object &b_ob, bool motion, int motion_step = 0);
+
+ /* Object */
Object *sync_object(BL::Depsgraph &b_depsgraph,
BL::ViewLayer &b_view_layer,
BL::DepsgraphObjectInstance &b_instance,
float motion_time,
- bool show_self,
- bool show_particles,
+ bool use_particle_hair,
+ bool show_lights,
BlenderObjectCulling &culling,
bool *use_portal);
+
+ /* Volume */
+ void sync_volume(BL::Object &b_ob, Mesh *mesh, const vector<Shader *> &used_shaders);
+
+ /* Mesh */
+ void sync_mesh(BL::Depsgraph b_depsgraph,
+ BL::Object b_ob,
+ Mesh *mesh,
+ const vector<Shader *> &used_shaders);
+ void sync_mesh_motion(BL::Depsgraph b_depsgraph, BL::Object b_ob, Mesh *mesh, int motion_step);
+
+ /* Hair */
+ void sync_hair(BL::Depsgraph b_depsgraph,
+ BL::Object b_ob,
+ Hair *hair,
+ const vector<Shader *> &used_shaders);
+ void sync_hair_motion(BL::Depsgraph b_depsgraph, BL::Object b_ob, Hair *hair, int motion_step);
+ void sync_hair(Hair *hair, BL::Object &b_ob, bool motion, int motion_step = 0);
+ void sync_particle_hair(
+ Hair *hair, BL::Mesh &b_mesh, BL::Object &b_ob, bool motion, int motion_step = 0);
+ bool object_has_particle_hair(BL::Object b_ob);
+
+ /* Camera */
+ void sync_camera_motion(
+ BL::RenderSettings &b_render, BL::Object &b_ob, int width, int height, float motion_time);
+
+ /* Geometry */
+ Geometry *sync_geometry(BL::Depsgraph &b_depsgrpah,
+ BL::Object &b_ob,
+ BL::Object &b_ob_instance,
+ bool object_updated,
+ bool use_particle_hair);
+ void sync_geometry_motion(BL::Depsgraph &b_depsgraph,
+ BL::Object &b_ob,
+ Object *object,
+ float motion_time,
+ bool use_particle_hair);
+
+ /* Light */
void sync_light(BL::Object &b_parent,
int persistent_id[OBJECT_PERSISTENT_ID_SIZE],
BL::Object &b_ob,
@@ -143,15 +192,9 @@ class BlenderSync {
int random_id,
Transform &tfm,
bool *use_portal);
- void sync_background_light(bool use_portal);
- void sync_mesh_motion(BL::Depsgraph &b_depsgraph,
- BL::Object &b_ob,
- Object *object,
- float motion_time);
- void sync_camera_motion(
- BL::RenderSettings &b_render, BL::Object &b_ob, int width, int height, float motion_time);
+ void sync_background_light(BL::SpaceView3D &b_v3d, bool use_portal);
- /* particles */
+ /* Particles */
bool sync_dupli_particle(BL::Object &b_ob,
BL::DepsgraphObjectInstance &b_instance,
Object *object);
@@ -165,7 +208,7 @@ class BlenderSync {
/* util */
void find_shader(BL::ID &id, vector<Shader *> &used_shaders, Shader *default_shader);
bool BKE_object_is_modified(BL::Object &b_ob);
- bool object_is_mesh(BL::Object &b_ob);
+ bool object_is_geometry(BL::Object &b_ob);
bool object_is_light(BL::Object &b_ob);
/* variables */
@@ -175,14 +218,15 @@ class BlenderSync {
id_map<void *, Shader> shader_map;
id_map<ObjectKey, Object> object_map;
- id_map<void *, Mesh> mesh_map;
+ id_map<GeometryKey, Geometry> geometry_map;
id_map<ObjectKey, Light> light_map;
id_map<ParticleSystemKey, ParticleSystem> particle_system_map;
- set<Mesh *> mesh_synced;
- set<Mesh *> mesh_motion_synced;
+ set<Geometry *> geometry_synced;
+ set<Geometry *> geometry_motion_synced;
set<float> motion_times;
void *world_map;
bool world_recalc;
+ BlenderViewportParameters viewport_parameters;
Scene *scene;
bool preview;
@@ -198,6 +242,7 @@ class BlenderSync {
use_background_ao(true),
use_surfaces(true),
use_hair(true),
+ use_volumes(true),
samples(0),
bound_samples(false)
{
@@ -209,6 +254,7 @@ class BlenderSync {
bool use_background_ao;
bool use_surfaces;
bool use_hair;
+ bool use_volumes;
int samples;
bool bound_samples;
} view_layer;
diff --git a/intern/cycles/blender/blender_texture.h b/intern/cycles/blender/blender_texture.h
index 896bf62da70..8ab061aaed9 100644
--- a/intern/cycles/blender/blender_texture.h
+++ b/intern/cycles/blender/blender_texture.h
@@ -17,8 +17,8 @@
#ifndef __BLENDER_TEXTURE_H__
#define __BLENDER_TEXTURE_H__
-#include <stdlib.h>
#include "blender/blender_sync.h"
+#include <stdlib.h>
CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/blender/blender_util.h b/intern/cycles/blender/blender_util.h
index e68f92474bf..ad90a5f8d52 100644
--- a/intern/cycles/blender/blender_util.h
+++ b/intern/cycles/blender/blender_util.h
@@ -32,10 +32,10 @@
* todo: clean this up ... */
extern "C" {
-void BKE_image_user_frame_calc(void *iuser, int cfra);
+void BKE_image_user_frame_calc(void *ima, void *iuser, int cfra);
void BKE_image_user_file_path(void *iuser, void *ima, char *path);
-unsigned char *BKE_image_get_pixels_for_frame(void *image, int frame);
-float *BKE_image_get_float_pixels_for_frame(void *image, int frame);
+unsigned char *BKE_image_get_pixels_for_frame(void *image, int frame, int tile);
+float *BKE_image_get_float_pixels_for_frame(void *image, int frame, int tile);
}
CCL_NAMESPACE_BEGIN
@@ -43,10 +43,10 @@ CCL_NAMESPACE_BEGIN
void python_thread_state_save(void **python_thread_state);
void python_thread_state_restore(void **python_thread_state);
-static inline BL::Mesh object_to_mesh(BL::BlendData &data,
+static inline BL::Mesh object_to_mesh(BL::BlendData & /*data*/,
BL::Object &object,
- BL::Depsgraph &depsgraph,
- bool calc_undeformed,
+ BL::Depsgraph & /*depsgraph*/,
+ bool /*calc_undeformed*/,
Mesh::SubdivisionType subdivision_type)
{
/* TODO: make this work with copy-on-write, modifiers are already evaluated. */
@@ -54,8 +54,8 @@ static inline BL::Mesh object_to_mesh(BL::BlendData &data,
bool subsurf_mod_show_render = false;
bool subsurf_mod_show_viewport = false;
- if(subdivision_type != Mesh::SUBDIVISION_NONE) {
- BL::Modifier subsurf_mod = object.modifiers[object.modifiers.length()-1];
+ if (subdivision_type != Mesh::SUBDIVISION_NONE) {
+ BL::Modifier subsurf_mod = object.modifiers[object.modifiers.length() - 1];
subsurf_mod_show_render = subsurf_mod.show_render();
subsurf_mod_show_viewport = subsurf_mod.show_viewport();
@@ -75,16 +75,18 @@ static inline BL::Mesh object_to_mesh(BL::BlendData &data,
* UV are not empty. */
if (mesh.is_editmode() ||
(mesh.use_auto_smooth() && subdivision_type == Mesh::SUBDIVISION_NONE)) {
- mesh = data.meshes.new_from_object(depsgraph, object, false, false);
+ BL::Depsgraph depsgraph(PointerRNA_NULL);
+ mesh = object.to_mesh(false, depsgraph);
}
}
else {
- mesh = data.meshes.new_from_object(depsgraph, object, true, calc_undeformed);
+ BL::Depsgraph depsgraph(PointerRNA_NULL);
+ mesh = object.to_mesh(false, depsgraph);
}
#if 0
- if(subdivision_type != Mesh::SUBDIVISION_NONE) {
- BL::Modifier subsurf_mod = object.modifiers[object.modifiers.length()-1];
+ if (subdivision_type != Mesh::SUBDIVISION_NONE) {
+ BL::Modifier subsurf_mod = object.modifiers[object.modifiers.length() - 1];
subsurf_mod.show_render(subsurf_mod_show_render);
subsurf_mod.show_viewport(subsurf_mod_show_viewport);
@@ -102,11 +104,13 @@ static inline BL::Mesh object_to_mesh(BL::BlendData &data,
return mesh;
}
-static inline void free_object_to_mesh(BL::BlendData &data, BL::Object &object, BL::Mesh &mesh)
+static inline void free_object_to_mesh(BL::BlendData & /*data*/,
+ BL::Object &object,
+ BL::Mesh &mesh)
{
/* Free mesh if we didn't just use the existing one. */
if (object.data().ptr.data != mesh.ptr.data) {
- data.meshes.remove(mesh, false, true, false);
+ object.to_mesh_clear();
}
}
@@ -155,7 +159,7 @@ static inline void curvemapping_to_array(BL::CurveMapping &cumap, array<float> &
data.resize(size);
for (int i = 0; i < size; i++) {
float t = (float)i / (float)(size - 1);
- data[i] = curve.evaluate(t);
+ data[i] = cumap.evaluate(curve, t);
}
}
@@ -193,15 +197,16 @@ static inline void curvemapping_color_to_array(BL::CurveMapping &cumap,
BL::CurveMap mapI = cumap.curves[3];
for (int i = 0; i < size; i++) {
const float t = min_x + (float)i / (float)(size - 1) * range_x;
- data[i] = make_float3(mapR.evaluate(mapI.evaluate(t)),
- mapG.evaluate(mapI.evaluate(t)),
- mapB.evaluate(mapI.evaluate(t)));
+ data[i] = make_float3(cumap.evaluate(mapR, cumap.evaluate(mapI, t)),
+ cumap.evaluate(mapG, cumap.evaluate(mapI, t)),
+ cumap.evaluate(mapB, cumap.evaluate(mapI, t)));
}
}
else {
for (int i = 0; i < size; i++) {
float t = min_x + (float)i / (float)(size - 1) * range_x;
- data[i] = make_float3(mapR.evaluate(t), mapG.evaluate(t), mapB.evaluate(t));
+ data[i] = make_float3(
+ cumap.evaluate(mapR, t), cumap.evaluate(mapG, t), cumap.evaluate(mapB, t));
}
}
}
@@ -226,28 +231,37 @@ static inline int render_resolution_y(BL::RenderSettings &b_render)
return b_render.resolution_y() * b_render.resolution_percentage() / 100;
}
-static inline string image_user_file_path(BL::ImageUser &iuser, BL::Image &ima, int cfra)
+static inline string image_user_file_path(BL::ImageUser &iuser,
+ BL::Image &ima,
+ int cfra,
+ bool load_tiled)
{
char filepath[1024];
- BKE_image_user_frame_calc(iuser.ptr.data, cfra);
+ iuser.tile(0);
+ BKE_image_user_frame_calc(NULL, iuser.ptr.data, cfra);
BKE_image_user_file_path(iuser.ptr.data, ima.ptr.data, filepath);
- return string(filepath);
+
+ string filepath_str = string(filepath);
+ if (load_tiled && ima.source() == BL::Image::source_TILED) {
+ string_replace(filepath_str, "1001", "<UDIM>");
+ }
+ return filepath_str;
}
static inline int image_user_frame_number(BL::ImageUser &iuser, int cfra)
{
- BKE_image_user_frame_calc(iuser.ptr.data, cfra);
+ BKE_image_user_frame_calc(NULL, iuser.ptr.data, cfra);
return iuser.frame_current();
}
-static inline unsigned char *image_get_pixels_for_frame(BL::Image &image, int frame)
+static inline unsigned char *image_get_pixels_for_frame(BL::Image &image, int frame, int tile)
{
- return BKE_image_get_pixels_for_frame(image.ptr.data, frame);
+ return BKE_image_get_pixels_for_frame(image.ptr.data, frame, tile);
}
-static inline float *image_get_float_pixels_for_frame(BL::Image &image, int frame)
+static inline float *image_get_float_pixels_for_frame(BL::Image &image, int frame, int tile)
{
- return BKE_image_get_float_pixels_for_frame(image.ptr.data, frame);
+ return BKE_image_get_float_pixels_for_frame(image.ptr.data, frame, tile);
}
static inline void render_add_metadata(BL::RenderResult &b_rr, string name, string value)
@@ -469,7 +483,9 @@ static inline void mesh_texture_space(BL::Mesh &b_mesh, float3 &loc, float3 &siz
}
/* Object motion steps, returns 0 if no motion blur needed. */
-static inline uint object_motion_steps(BL::Object &b_parent, BL::Object &b_ob)
+static inline uint object_motion_steps(BL::Object &b_parent,
+ BL::Object &b_ob,
+ const int max_steps = INT_MAX)
{
/* Get motion enabled and steps from object itself. */
PointerRNA cobject = RNA_pointer_get(&b_ob.ptr, "cycles");
@@ -478,7 +494,7 @@ static inline uint object_motion_steps(BL::Object &b_parent, BL::Object &b_ob)
return 0;
}
- uint steps = max(1, get_int(cobject, "motion_steps"));
+ int steps = max(1, get_int(cobject, "motion_steps"));
/* Also check parent object, so motion blur and steps can be
* controlled by dupligroup duplicator for linked groups. */
@@ -496,7 +512,7 @@ static inline uint object_motion_steps(BL::Object &b_parent, BL::Object &b_ob)
/* Use uneven number of steps so we get one keyframe at the current frame,
* and use 2^(steps - 1) so objects with more/fewer steps still have samples
* at the same times, to avoid sampling at many different times. */
- return (2 << (steps - 1)) + 1;
+ return min((2 << (steps - 1)) + 1, max_steps);
}
/* object uses deformation motion blur */
@@ -517,37 +533,40 @@ static inline bool object_use_deform_motion(BL::Object &b_parent, BL::Object &b_
return use_deform_motion;
}
-static inline BL::SmokeDomainSettings object_smoke_domain_find(BL::Object &b_ob)
+static inline BL::FluidDomainSettings object_fluid_liquid_domain_find(BL::Object &b_ob)
{
BL::Object::modifiers_iterator b_mod;
for (b_ob.modifiers.begin(b_mod); b_mod != b_ob.modifiers.end(); ++b_mod) {
- if (b_mod->is_a(&RNA_SmokeModifier)) {
- BL::SmokeModifier b_smd(*b_mod);
+ if (b_mod->is_a(&RNA_FluidModifier)) {
+ BL::FluidModifier b_mmd(*b_mod);
- if (b_smd.smoke_type() == BL::SmokeModifier::smoke_type_DOMAIN)
- return b_smd.domain_settings();
+ if (b_mmd.fluid_type() == BL::FluidModifier::fluid_type_DOMAIN &&
+ b_mmd.domain_settings().domain_type() == BL::FluidDomainSettings::domain_type_LIQUID) {
+ return b_mmd.domain_settings();
+ }
}
}
- return BL::SmokeDomainSettings(PointerRNA_NULL);
+ return BL::FluidDomainSettings(PointerRNA_NULL);
}
-static inline BL::DomainFluidSettings object_fluid_domain_find(BL::Object b_ob)
+static inline BL::FluidDomainSettings object_fluid_gas_domain_find(BL::Object &b_ob)
{
BL::Object::modifiers_iterator b_mod;
for (b_ob.modifiers.begin(b_mod); b_mod != b_ob.modifiers.end(); ++b_mod) {
- if (b_mod->is_a(&RNA_FluidSimulationModifier)) {
- BL::FluidSimulationModifier b_fmd(*b_mod);
- BL::FluidSettings fss = b_fmd.settings();
+ if (b_mod->is_a(&RNA_FluidModifier)) {
+ BL::FluidModifier b_mmd(*b_mod);
- if (fss.type() == BL::FluidSettings::type_DOMAIN)
- return (BL::DomainFluidSettings)b_fmd.settings();
+ if (b_mmd.fluid_type() == BL::FluidModifier::fluid_type_DOMAIN &&
+ b_mmd.domain_settings().domain_type() == BL::FluidDomainSettings::domain_type_GAS) {
+ return b_mmd.domain_settings();
+ }
}
}
- return BL::DomainFluidSettings(PointerRNA_NULL);
+ return BL::FluidDomainSettings(PointerRNA_NULL);
}
static inline Mesh::SubdivisionType object_subdivision_type(BL::Object &b_ob,
@@ -576,209 +595,20 @@ static inline Mesh::SubdivisionType object_subdivision_type(BL::Object &b_ob,
return Mesh::SUBDIVISION_NONE;
}
-/* ID Map
- *
- * Utility class to keep in sync with blender data.
- * Used for objects, meshes, lights and shaders. */
-
-template<typename K, typename T> class id_map {
- public:
- id_map(vector<T *> *scene_data_)
- {
- scene_data = scene_data_;
- }
-
- T *find(const BL::ID &id)
- {
- return find(id.ptr.id.data);
- }
-
- T *find(const K &key)
- {
- if (b_map.find(key) != b_map.end()) {
- T *data = b_map[key];
- return data;
- }
-
- return NULL;
- }
-
- void set_recalc(const BL::ID &id)
- {
- b_recalc.insert(id.ptr.data);
- }
-
- void set_recalc(void *id_ptr)
- {
- b_recalc.insert(id_ptr);
- }
-
- bool has_recalc()
- {
- return !(b_recalc.empty());
- }
-
- void pre_sync()
- {
- used_set.clear();
- }
-
- bool sync(T **r_data, const BL::ID &id)
- {
- return sync(r_data, id, id, id.ptr.id.data);
- }
-
- bool sync(T **r_data, const BL::ID &id, const BL::ID &parent, const K &key)
- {
- T *data = find(key);
- bool recalc;
-
- if (!data) {
- /* add data if it didn't exist yet */
- data = new T();
- scene_data->push_back(data);
- b_map[key] = data;
- recalc = true;
- }
- else {
- recalc = (b_recalc.find(id.ptr.data) != b_recalc.end());
- if (parent.ptr.data)
- recalc = recalc || (b_recalc.find(parent.ptr.data) != b_recalc.end());
- }
-
- used(data);
-
- *r_data = data;
- return recalc;
- }
-
- bool is_used(const K &key)
- {
- T *data = find(key);
- return (data) ? used_set.find(data) != used_set.end() : false;
- }
-
- void used(T *data)
- {
- /* tag data as still in use */
- used_set.insert(data);
- }
-
- void set_default(T *data)
- {
- b_map[NULL] = data;
- }
-
- bool post_sync(bool do_delete = true)
- {
- /* remove unused data */
- vector<T *> new_scene_data;
- typename vector<T *>::iterator it;
- bool deleted = false;
-
- for (it = scene_data->begin(); it != scene_data->end(); it++) {
- T *data = *it;
-
- if (do_delete && used_set.find(data) == used_set.end()) {
- delete data;
- deleted = true;
- }
- else
- new_scene_data.push_back(data);
- }
-
- *scene_data = new_scene_data;
-
- /* update mapping */
- map<K, T *> new_map;
- typedef pair<const K, T *> TMapPair;
- typename map<K, T *>::iterator jt;
-
- for (jt = b_map.begin(); jt != b_map.end(); jt++) {
- TMapPair &pair = *jt;
-
- if (used_set.find(pair.second) != used_set.end())
- new_map[pair.first] = pair.second;
- }
-
- used_set.clear();
- b_recalc.clear();
- b_map = new_map;
-
- return deleted;
- }
-
- const map<K, T *> &key_to_scene_data()
- {
- return b_map;
- }
-
- protected:
- vector<T *> *scene_data;
- map<K, T *> b_map;
- set<T *> used_set;
- set<void *> b_recalc;
-};
-
-/* Object Key */
-
-enum { OBJECT_PERSISTENT_ID_SIZE = 16 };
-
-struct ObjectKey {
- void *parent;
- int id[OBJECT_PERSISTENT_ID_SIZE];
- void *ob;
-
- ObjectKey(void *parent_, int id_[OBJECT_PERSISTENT_ID_SIZE], void *ob_)
- : parent(parent_), ob(ob_)
- {
- if (id_)
- memcpy(id, id_, sizeof(id));
- else
- memset(id, 0, sizeof(id));
- }
-
- bool operator<(const ObjectKey &k) const
- {
- if (ob < k.ob) {
- return true;
- }
- else if (ob == k.ob) {
- if (parent < k.parent)
- return true;
- else if (parent == k.parent)
- return memcmp(id, k.id, sizeof(id)) < 0;
- }
-
- return false;
- }
-};
-
-/* Particle System Key */
-
-struct ParticleSystemKey {
- void *ob;
- int id[OBJECT_PERSISTENT_ID_SIZE];
-
- ParticleSystemKey(void *ob_, int id_[OBJECT_PERSISTENT_ID_SIZE]) : ob(ob_)
- {
- if (id_)
- memcpy(id, id_, sizeof(id));
- else
- memset(id, 0, sizeof(id));
- }
+static inline uint object_ray_visibility(BL::Object &b_ob)
+{
+ PointerRNA cvisibility = RNA_pointer_get(&b_ob.ptr, "cycles_visibility");
+ uint flag = 0;
- bool operator<(const ParticleSystemKey &k) const
- {
- /* first id is particle index, we don't compare that */
- if (ob < k.ob)
- return true;
- else if (ob == k.ob)
- return memcmp(id + 1, k.id + 1, sizeof(int) * (OBJECT_PERSISTENT_ID_SIZE - 1)) < 0;
+ flag |= get_boolean(cvisibility, "camera") ? PATH_RAY_CAMERA : 0;
+ flag |= get_boolean(cvisibility, "diffuse") ? PATH_RAY_DIFFUSE : 0;
+ flag |= get_boolean(cvisibility, "glossy") ? PATH_RAY_GLOSSY : 0;
+ flag |= get_boolean(cvisibility, "transmission") ? PATH_RAY_TRANSMIT : 0;
+ flag |= get_boolean(cvisibility, "shadow") ? PATH_RAY_SHADOW : 0;
+ flag |= get_boolean(cvisibility, "scatter") ? PATH_RAY_VOLUME_SCATTER : 0;
- return false;
- }
-};
+ return flag;
+}
class EdgeMap {
public:
diff --git a/intern/cycles/blender/blender_viewport.cpp b/intern/cycles/blender/blender_viewport.cpp
new file mode 100644
index 00000000000..73ef5f94720
--- /dev/null
+++ b/intern/cycles/blender/blender_viewport.cpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "blender_viewport.h"
+
+#include "blender_util.h"
+
+CCL_NAMESPACE_BEGIN
+
+BlenderViewportParameters::BlenderViewportParameters()
+ : use_scene_world(true),
+ use_scene_lights(true),
+ studiolight_rotate_z(0.0f),
+ studiolight_intensity(1.0f),
+ studiolight_background_alpha(1.0f),
+ studiolight_path(ustring())
+{
+}
+
+BlenderViewportParameters::BlenderViewportParameters(BL::SpaceView3D &b_v3d)
+ : BlenderViewportParameters()
+{
+ /* We only copy the parameters if we are in look dev mode. otherwise
+ * defaults are being used. These defaults mimic normal render settings */
+ if (b_v3d && b_v3d.shading().type() == BL::View3DShading::type_RENDERED) {
+ use_scene_world = b_v3d.shading().use_scene_world_render();
+ use_scene_lights = b_v3d.shading().use_scene_lights_render();
+ if (!use_scene_world) {
+ studiolight_rotate_z = b_v3d.shading().studiolight_rotate_z();
+ studiolight_intensity = b_v3d.shading().studiolight_intensity();
+ studiolight_background_alpha = b_v3d.shading().studiolight_background_alpha();
+ studiolight_path = b_v3d.shading().selected_studio_light().path();
+ }
+ }
+}
+
+/* Check if two instances are different. */
+const bool BlenderViewportParameters::modified(const BlenderViewportParameters &other) const
+{
+ return use_scene_world != other.use_scene_world || use_scene_lights != other.use_scene_lights ||
+ studiolight_rotate_z != other.studiolight_rotate_z ||
+ studiolight_intensity != other.studiolight_intensity ||
+ studiolight_background_alpha != other.studiolight_background_alpha ||
+ studiolight_path != other.studiolight_path;
+}
+
+const bool BlenderViewportParameters::custom_viewport_parameters() const
+{
+ return !(use_scene_world && use_scene_lights);
+}
+
+PassType BlenderViewportParameters::get_viewport_display_render_pass(BL::SpaceView3D &b_v3d)
+{
+ PassType display_pass = PASS_NONE;
+ if (b_v3d) {
+ BL::View3DShading b_view3dshading = b_v3d.shading();
+ PointerRNA cshading = RNA_pointer_get(&b_view3dshading.ptr, "cycles");
+ display_pass = (PassType)get_enum(cshading, "render_pass", -1, -1);
+ }
+ return display_pass;
+}
+
+PassType update_viewport_display_passes(BL::SpaceView3D &b_v3d, vector<Pass> &passes)
+{
+ if (b_v3d) {
+ PassType display_pass = BlenderViewportParameters::get_viewport_display_render_pass(b_v3d);
+
+ passes.clear();
+ Pass::add(display_pass, passes);
+
+ return display_pass;
+ }
+ return PASS_NONE;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/blender_viewport.h b/intern/cycles/blender/blender_viewport.h
new file mode 100644
index 00000000000..7c6c9c4d274
--- /dev/null
+++ b/intern/cycles/blender/blender_viewport.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BLENDER_VIEWPORT_H__
+#define __BLENDER_VIEWPORT_H__
+
+#include "MEM_guardedalloc.h"
+#include "RNA_access.h"
+#include "RNA_blender_cpp.h"
+#include "RNA_types.h"
+
+#include "render/film.h"
+#include "util/util_param.h"
+
+CCL_NAMESPACE_BEGIN
+
+class BlenderViewportParameters {
+ private:
+ bool use_scene_world;
+ bool use_scene_lights;
+ float studiolight_rotate_z;
+ float studiolight_intensity;
+ float studiolight_background_alpha;
+ ustring studiolight_path;
+
+ BlenderViewportParameters();
+ BlenderViewportParameters(BL::SpaceView3D &b_v3d);
+
+ const bool modified(const BlenderViewportParameters &other) const;
+ const bool custom_viewport_parameters() const;
+ friend class BlenderSync;
+
+ public:
+ /* Retrieve the render pass that needs to be displayed on the given `SpaceView3D`
+ * When the `b_v3d` parameter is not given `PASS_NONE` will be returned. */
+ static PassType get_viewport_display_render_pass(BL::SpaceView3D &b_v3d);
+};
+
+PassType update_viewport_display_passes(BL::SpaceView3D &b_v3d, vector<Pass> &passes);
+
+CCL_NAMESPACE_END
+
+#endif
diff --git a/intern/cycles/blender/blender_volume.cpp b/intern/cycles/blender/blender_volume.cpp
new file mode 100644
index 00000000000..80591e0eec8
--- /dev/null
+++ b/intern/cycles/blender/blender_volume.cpp
@@ -0,0 +1,387 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "render/colorspace.h"
+#include "render/image.h"
+#include "render/image_vdb.h"
+#include "render/mesh.h"
+#include "render/object.h"
+
+#include "blender/blender_sync.h"
+#include "blender/blender_util.h"
+
+#ifdef WITH_OPENVDB
+# include <openvdb/openvdb.h>
+openvdb::GridBase::ConstPtr BKE_volume_grid_openvdb_for_read(const struct Volume *volume,
+ struct VolumeGrid *grid);
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+/* TODO: verify this is not loading unnecessary attributes. */
+class BlenderSmokeLoader : public ImageLoader {
+ public:
+ BlenderSmokeLoader(BL::Object &b_ob, AttributeStandard attribute)
+ : b_domain(object_fluid_gas_domain_find(b_ob)), attribute(attribute)
+ {
+ BL::Mesh b_mesh(b_ob.data());
+ mesh_texture_space(b_mesh, texspace_loc, texspace_size);
+ }
+
+ bool load_metadata(ImageMetaData &metadata) override
+ {
+ if (!b_domain) {
+ return false;
+ }
+
+ if (attribute == ATTR_STD_VOLUME_DENSITY || attribute == ATTR_STD_VOLUME_FLAME ||
+ attribute == ATTR_STD_VOLUME_HEAT || attribute == ATTR_STD_VOLUME_TEMPERATURE) {
+ metadata.type = IMAGE_DATA_TYPE_FLOAT;
+ metadata.channels = 1;
+ }
+ else if (attribute == ATTR_STD_VOLUME_COLOR) {
+ metadata.type = IMAGE_DATA_TYPE_FLOAT4;
+ metadata.channels = 4;
+ }
+ else if (attribute == ATTR_STD_VOLUME_VELOCITY) {
+ metadata.type = IMAGE_DATA_TYPE_FLOAT4;
+ metadata.channels = 3;
+ }
+ else {
+ return false;
+ }
+
+ int3 resolution = get_int3(b_domain.domain_resolution());
+ int amplify = (b_domain.use_noise()) ? b_domain.noise_scale() : 1;
+
+ /* Velocity and heat data is always low-resolution. */
+ if (attribute == ATTR_STD_VOLUME_VELOCITY || attribute == ATTR_STD_VOLUME_HEAT) {
+ amplify = 1;
+ }
+
+ metadata.width = resolution.x * amplify;
+ metadata.height = resolution.y * amplify;
+ metadata.depth = resolution.z * amplify;
+
+ /* Create a matrix to transform from object space to mesh texture space.
+ * This does not work with deformations but that can probably only be done
+ * well with a volume grid mapping of coordinates. */
+ metadata.transform_3d = transform_translate(-texspace_loc) * transform_scale(texspace_size);
+ metadata.use_transform_3d = true;
+
+ return true;
+ }
+
+ bool load_pixels(const ImageMetaData &, void *pixels, const size_t, const bool) override
+ {
+ if (!b_domain) {
+ return false;
+ }
+#ifdef WITH_FLUID
+ int3 resolution = get_int3(b_domain.domain_resolution());
+ int length, amplify = (b_domain.use_noise()) ? b_domain.noise_scale() : 1;
+
+ /* Velocity and heat data is always low-resolution. */
+ if (attribute == ATTR_STD_VOLUME_VELOCITY || attribute == ATTR_STD_VOLUME_HEAT) {
+ amplify = 1;
+ }
+
+ const int width = resolution.x * amplify;
+ const int height = resolution.y * amplify;
+ const int depth = resolution.z * amplify;
+ const size_t num_pixels = ((size_t)width) * height * depth;
+
+ float *fpixels = (float *)pixels;
+
+ if (attribute == ATTR_STD_VOLUME_DENSITY) {
+ FluidDomainSettings_density_grid_get_length(&b_domain.ptr, &length);
+ if (length == num_pixels) {
+ FluidDomainSettings_density_grid_get(&b_domain.ptr, fpixels);
+ return true;
+ }
+ }
+ else if (attribute == ATTR_STD_VOLUME_FLAME) {
+ /* this is in range 0..1, and interpreted by the OpenGL smoke viewer
+ * as 1500..3000 K with the first part faded to zero density */
+ FluidDomainSettings_flame_grid_get_length(&b_domain.ptr, &length);
+ if (length == num_pixels) {
+ FluidDomainSettings_flame_grid_get(&b_domain.ptr, fpixels);
+ return true;
+ }
+ }
+ else if (attribute == ATTR_STD_VOLUME_COLOR) {
+ /* the RGB is "premultiplied" by density for better interpolation results */
+ FluidDomainSettings_color_grid_get_length(&b_domain.ptr, &length);
+ if (length == num_pixels * 4) {
+ FluidDomainSettings_color_grid_get(&b_domain.ptr, fpixels);
+ return true;
+ }
+ }
+ else if (attribute == ATTR_STD_VOLUME_VELOCITY) {
+ FluidDomainSettings_velocity_grid_get_length(&b_domain.ptr, &length);
+ if (length == num_pixels * 3) {
+ FluidDomainSettings_velocity_grid_get(&b_domain.ptr, fpixels);
+ return true;
+ }
+ }
+ else if (attribute == ATTR_STD_VOLUME_HEAT) {
+ FluidDomainSettings_heat_grid_get_length(&b_domain.ptr, &length);
+ if (length == num_pixels) {
+ FluidDomainSettings_heat_grid_get(&b_domain.ptr, fpixels);
+ return true;
+ }
+ }
+ else if (attribute == ATTR_STD_VOLUME_TEMPERATURE) {
+ FluidDomainSettings_temperature_grid_get_length(&b_domain.ptr, &length);
+ if (length == num_pixels) {
+ FluidDomainSettings_temperature_grid_get(&b_domain.ptr, fpixels);
+ return true;
+ }
+ }
+ else {
+ fprintf(stderr,
+ "Cycles error: unknown volume attribute %s, skipping\n",
+ Attribute::standard_name(attribute));
+ fpixels[0] = 0.0f;
+ return false;
+ }
+#else
+ (void)pixels;
+#endif
+ fprintf(stderr, "Cycles error: unexpected smoke volume resolution, skipping\n");
+ return false;
+ }
+
+ string name() const override
+ {
+ return Attribute::standard_name(attribute);
+ }
+
+ bool equals(const ImageLoader &other) const override
+ {
+ const BlenderSmokeLoader &other_loader = (const BlenderSmokeLoader &)other;
+ return b_domain == other_loader.b_domain && attribute == other_loader.attribute;
+ }
+
+ BL::FluidDomainSettings b_domain;
+ float3 texspace_loc, texspace_size;
+ AttributeStandard attribute;
+};
+
+static void sync_smoke_volume(Scene *scene, BL::Object &b_ob, Mesh *mesh, float frame)
+{
+ BL::FluidDomainSettings b_domain = object_fluid_gas_domain_find(b_ob);
+ if (!b_domain) {
+ return;
+ }
+
+ AttributeStandard attributes[] = {ATTR_STD_VOLUME_DENSITY,
+ ATTR_STD_VOLUME_COLOR,
+ ATTR_STD_VOLUME_FLAME,
+ ATTR_STD_VOLUME_HEAT,
+ ATTR_STD_VOLUME_TEMPERATURE,
+ ATTR_STD_VOLUME_VELOCITY,
+ ATTR_STD_NONE};
+
+ for (int i = 0; attributes[i] != ATTR_STD_NONE; i++) {
+ AttributeStandard std = attributes[i];
+ if (!mesh->need_attribute(scene, std)) {
+ continue;
+ }
+
+ mesh->volume_clipping = b_domain.clipping();
+
+ Attribute *attr = mesh->attributes.add(std);
+
+ ImageLoader *loader = new BlenderSmokeLoader(b_ob, std);
+ ImageParams params;
+ params.frame = frame;
+
+ attr->data_voxel() = scene->image_manager->add_image(loader, params);
+ }
+}
+
+class BlenderVolumeLoader : public VDBImageLoader {
+ public:
+ BlenderVolumeLoader(BL::BlendData &b_data, BL::Volume &b_volume, const string &grid_name)
+ : VDBImageLoader(grid_name), b_data(b_data), b_volume(b_volume), unload(false)
+ {
+ }
+
+ bool load_metadata(ImageMetaData &metadata) override
+ {
+ b_volume.grids.load(b_data.ptr.data);
+ BL::VolumeGrid b_volume_grid = find_grid();
+
+ if (!b_volume_grid) {
+ return false;
+ }
+
+ unload = !b_volume_grid.is_loaded();
+
+#ifdef WITH_OPENVDB
+ Volume *volume = (Volume *)b_volume.ptr.data;
+ VolumeGrid *volume_grid = (VolumeGrid *)b_volume_grid.ptr.data;
+ grid = BKE_volume_grid_openvdb_for_read(volume, volume_grid);
+#endif
+
+ return VDBImageLoader::load_metadata(metadata);
+ }
+
+ bool load_pixels(const ImageMetaData &metadata,
+ void *pixels,
+ const size_t pixel_size,
+ const bool associate_alpha) override
+ {
+ b_volume.grids.load(b_data.ptr.data);
+ BL::VolumeGrid b_volume_grid = find_grid();
+
+ if (!b_volume_grid) {
+ return false;
+ }
+
+ return VDBImageLoader::load_pixels(metadata, pixels, pixel_size, associate_alpha);
+ }
+
+ bool equals(const ImageLoader &other) const override
+ {
+ /* TODO: detect multiple volume datablocks with the same filepath. */
+ const BlenderVolumeLoader &other_loader = (const BlenderVolumeLoader &)other;
+ return b_volume == other_loader.b_volume && grid_name == other_loader.grid_name;
+ }
+
+ void cleanup() override
+ {
+ VDBImageLoader::cleanup();
+
+ BL::VolumeGrid b_volume_grid = find_grid();
+ if (b_volume_grid && unload) {
+ b_volume_grid.unload();
+ }
+ }
+
+ /* Find grid with matching name. Grid point not stored in the class since
+ * grids may be unloaded before we load the pixels, for example for motion
+ * blur where we move between frames. */
+ BL::VolumeGrid find_grid()
+ {
+#ifdef WITH_OPENVDB
+ BL::Volume::grids_iterator b_grid_iter;
+ for (b_volume.grids.begin(b_grid_iter); b_grid_iter != b_volume.grids.end(); ++b_grid_iter) {
+ if (b_grid_iter->name() == grid_name) {
+ return *b_grid_iter;
+ }
+ }
+#endif
+
+ return BL::VolumeGrid(PointerRNA_NULL);
+ }
+
+ BL::BlendData b_data;
+ BL::Volume b_volume;
+ bool unload;
+};
+
+static void sync_volume_object(BL::BlendData &b_data, BL::Object &b_ob, Scene *scene, Mesh *mesh)
+{
+ BL::Volume b_volume(b_ob.data());
+ b_volume.grids.load(b_data.ptr.data);
+
+ BL::VolumeRender b_render(b_volume.render());
+
+ mesh->volume_clipping = b_render.clipping();
+ mesh->volume_step_size = b_render.step_size();
+ mesh->volume_object_space = (b_render.space() == BL::VolumeRender::space_OBJECT);
+
+ /* Find grid with matching name. */
+ BL::Volume::grids_iterator b_grid_iter;
+ for (b_volume.grids.begin(b_grid_iter); b_grid_iter != b_volume.grids.end(); ++b_grid_iter) {
+ BL::VolumeGrid b_grid = *b_grid_iter;
+ ustring name = ustring(b_grid.name());
+ AttributeStandard std = ATTR_STD_NONE;
+
+ if (name == Attribute::standard_name(ATTR_STD_VOLUME_DENSITY)) {
+ std = ATTR_STD_VOLUME_DENSITY;
+ }
+ else if (name == Attribute::standard_name(ATTR_STD_VOLUME_COLOR)) {
+ std = ATTR_STD_VOLUME_COLOR;
+ }
+ else if (name == Attribute::standard_name(ATTR_STD_VOLUME_FLAME)) {
+ std = ATTR_STD_VOLUME_FLAME;
+ }
+ else if (name == Attribute::standard_name(ATTR_STD_VOLUME_HEAT)) {
+ std = ATTR_STD_VOLUME_HEAT;
+ }
+ else if (name == Attribute::standard_name(ATTR_STD_VOLUME_TEMPERATURE)) {
+ std = ATTR_STD_VOLUME_TEMPERATURE;
+ }
+ else if (name == Attribute::standard_name(ATTR_STD_VOLUME_VELOCITY)) {
+ std = ATTR_STD_VOLUME_VELOCITY;
+ }
+
+ if ((std != ATTR_STD_NONE && mesh->need_attribute(scene, std)) ||
+ mesh->need_attribute(scene, name)) {
+ Attribute *attr = (std != ATTR_STD_NONE) ?
+ mesh->attributes.add(std) :
+ mesh->attributes.add(name, TypeDesc::TypeFloat, ATTR_ELEMENT_VOXEL);
+
+ ImageLoader *loader = new BlenderVolumeLoader(b_data, b_volume, name.string());
+ ImageParams params;
+ params.frame = b_volume.grids.frame();
+
+ attr->data_voxel() = scene->image_manager->add_image(loader, params);
+ }
+ }
+}
+
+/* If the voxel attributes change, we need to rebuild the bounding mesh. */
+static vector<int> get_voxel_image_slots(Mesh *mesh)
+{
+ vector<int> slots;
+ for (const Attribute &attr : mesh->attributes.attributes) {
+ if (attr.element == ATTR_ELEMENT_VOXEL) {
+ slots.push_back(attr.data_voxel().svm_slot());
+ }
+ }
+
+ return slots;
+}
+
+void BlenderSync::sync_volume(BL::Object &b_ob, Mesh *mesh, const vector<Shader *> &used_shaders)
+{
+ vector<int> old_voxel_slots = get_voxel_image_slots(mesh);
+
+ mesh->clear();
+ mesh->used_shaders = used_shaders;
+
+ if (view_layer.use_volumes) {
+ if (b_ob.type() == BL::Object::type_VOLUME) {
+ /* Volume object. Create only attributes, bounding mesh will then
+ * be automatically generated later. */
+ sync_volume_object(b_data, b_ob, scene, mesh);
+ }
+ else {
+ /* Smoke domain. */
+ sync_smoke_volume(scene, b_ob, mesh, b_scene.frame_current());
+ }
+ }
+
+ /* Tag update. */
+ bool rebuild = (old_voxel_slots != get_voxel_image_slots(mesh));
+ mesh->tag_update(scene, rebuild);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/bvh/CMakeLists.txt b/intern/cycles/bvh/CMakeLists.txt
index 36bbd937e1a..8b8f3ca7265 100644
--- a/intern/cycles/bvh/CMakeLists.txt
+++ b/intern/cycles/bvh/CMakeLists.txt
@@ -9,12 +9,11 @@ set(INC_SYS
set(SRC
bvh.cpp
bvh2.cpp
- bvh4.cpp
- bvh8.cpp
bvh_binning.cpp
bvh_build.cpp
bvh_embree.cpp
bvh_node.cpp
+ bvh_optix.cpp
bvh_sort.cpp
bvh_split.cpp
bvh_unaligned.cpp
@@ -23,12 +22,11 @@ set(SRC
set(SRC_HEADERS
bvh.h
bvh2.h
- bvh4.h
- bvh8.h
bvh_binning.h
bvh_build.h
bvh_embree.h
bvh_node.h
+ bvh_optix.h
bvh_params.h
bvh_sort.h
bvh_split.h
@@ -37,9 +35,16 @@ set(SRC_HEADERS
set(LIB
cycles_render
+ cycles_util
)
include_directories(${INC})
include_directories(SYSTEM ${INC_SYS})
+if(WITH_CYCLES_EMBREE)
+ list(APPEND LIB
+ ${EMBREE_LIBRARIES}
+ )
+endif()
+
cycles_add_library(cycles_bvh "${LIB}" ${SRC} ${SRC_HEADERS})
diff --git a/intern/cycles/bvh/bvh.cpp b/intern/cycles/bvh/bvh.cpp
index 53c66777928..e9e67fd1305 100644
--- a/intern/cycles/bvh/bvh.cpp
+++ b/intern/cycles/bvh/bvh.cpp
@@ -17,18 +17,15 @@
#include "bvh/bvh.h"
+#include "render/hair.h"
#include "render/mesh.h"
#include "render/object.h"
#include "bvh/bvh2.h"
-#include "bvh/bvh4.h"
-#include "bvh/bvh8.h"
#include "bvh/bvh_build.h"
+#include "bvh/bvh_embree.h"
#include "bvh/bvh_node.h"
-
-#ifdef WITH_EMBREE
-# include "bvh/bvh_embree.h"
-#endif
+#include "bvh/bvh_optix.h"
#include "util/util_foreach.h"
#include "util/util_logging.h"
@@ -43,14 +40,12 @@ const char *bvh_layout_name(BVHLayout layout)
switch (layout) {
case BVH_LAYOUT_BVH2:
return "BVH2";
- case BVH_LAYOUT_BVH4:
- return "BVH4";
- case BVH_LAYOUT_BVH8:
- return "BVH8";
case BVH_LAYOUT_NONE:
return "NONE";
case BVH_LAYOUT_EMBREE:
return "EMBREE";
+ case BVH_LAYOUT_OPTIX:
+ return "OPTIX";
case BVH_LAYOUT_ALL:
return "ALL";
}
@@ -71,7 +66,11 @@ BVHLayout BVHParams::best_bvh_layout(BVHLayout requested_layout, BVHLayoutMask s
/* This is a mask of supported BVH layouts which are narrower than the
* requested one.
*/
- const BVHLayoutMask allowed_layouts_mask = (supported_layouts & (requested_layout_mask - 1));
+ BVHLayoutMask allowed_layouts_mask = (supported_layouts & (requested_layout_mask - 1));
+ /* If the requested layout is not supported, choose from the supported layouts instead. */
+ if (allowed_layouts_mask == 0) {
+ allowed_layouts_mask = supported_layouts;
+ }
/* We get widest from allowed ones and convert mask to actual layout. */
const BVHLayoutMask widest_allowed_layout_mask = __bsr(allowed_layouts_mask);
return (BVHLayout)(1 << widest_allowed_layout_mask);
@@ -90,23 +89,31 @@ int BVHStackEntry::encodeIdx() const
/* BVH */
-BVH::BVH(const BVHParams &params_, const vector<Object *> &objects_)
- : params(params_), objects(objects_)
+BVH::BVH(const BVHParams &params_,
+ const vector<Geometry *> &geometry_,
+ const vector<Object *> &objects_)
+ : params(params_), geometry(geometry_), objects(objects_)
{
}
-BVH *BVH::create(const BVHParams &params, const vector<Object *> &objects)
+BVH *BVH::create(const BVHParams &params,
+ const vector<Geometry *> &geometry,
+ const vector<Object *> &objects)
{
switch (params.bvh_layout) {
case BVH_LAYOUT_BVH2:
- return new BVH2(params, objects);
- case BVH_LAYOUT_BVH4:
- return new BVH4(params, objects);
- case BVH_LAYOUT_BVH8:
- return new BVH8(params, objects);
+ return new BVH2(params, geometry, objects);
case BVH_LAYOUT_EMBREE:
#ifdef WITH_EMBREE
- return new BVHEmbree(params, objects);
+ return new BVHEmbree(params, geometry, objects);
+#else
+ break;
+#endif
+ case BVH_LAYOUT_OPTIX:
+#ifdef WITH_OPTIX
+ return new BVHOptiX(params, geometry, objects);
+#else
+ break;
#endif
case BVH_LAYOUT_NONE:
case BVH_LAYOUT_ALL:
@@ -198,36 +205,34 @@ void BVH::refit_primitives(int start, int end, BoundBox &bbox, uint &visibility)
}
else {
/* Primitives. */
- const Mesh *mesh = ob->mesh;
-
if (pack.prim_type[prim] & PRIMITIVE_ALL_CURVE) {
/* Curves. */
- int str_offset = (params.top_level) ? mesh->curve_offset : 0;
- Mesh::Curve curve = mesh->get_curve(pidx - str_offset);
+ const Hair *hair = static_cast<const Hair *>(ob->geometry);
+ int prim_offset = (params.top_level) ? hair->prim_offset : 0;
+ Hair::Curve curve = hair->get_curve(pidx - prim_offset);
int k = PRIMITIVE_UNPACK_SEGMENT(pack.prim_type[prim]);
- curve.bounds_grow(k, &mesh->curve_keys[0], &mesh->curve_radius[0], bbox);
-
- visibility |= PATH_RAY_CURVE;
+ curve.bounds_grow(k, &hair->curve_keys[0], &hair->curve_radius[0], bbox);
/* Motion curves. */
- if (mesh->use_motion_blur) {
- Attribute *attr = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+ if (hair->use_motion_blur) {
+ Attribute *attr = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
if (attr) {
- size_t mesh_size = mesh->curve_keys.size();
- size_t steps = mesh->motion_steps - 1;
+ size_t hair_size = hair->curve_keys.size();
+ size_t steps = hair->motion_steps - 1;
float3 *key_steps = attr->data_float3();
for (size_t i = 0; i < steps; i++)
- curve.bounds_grow(k, key_steps + i * mesh_size, &mesh->curve_radius[0], bbox);
+ curve.bounds_grow(k, key_steps + i * hair_size, &hair->curve_radius[0], bbox);
}
}
}
else {
/* Triangles. */
- int tri_offset = (params.top_level) ? mesh->tri_offset : 0;
- Mesh::Triangle triangle = mesh->get_triangle(pidx - tri_offset);
+ const Mesh *mesh = static_cast<const Mesh *>(ob->geometry);
+ int prim_offset = (params.top_level) ? mesh->prim_offset : 0;
+ Mesh::Triangle triangle = mesh->get_triangle(pidx - prim_offset);
const float3 *vpos = &mesh->verts[0];
triangle.bounds_grow(vpos, bbox);
@@ -257,7 +262,7 @@ void BVH::pack_triangle(int idx, float4 tri_verts[3])
{
int tob = pack.prim_object[idx];
assert(tob >= 0 && tob < objects.size());
- const Mesh *mesh = objects[tob]->mesh;
+ const Mesh *mesh = static_cast<const Mesh *>(objects[tob]->geometry);
int tidx = pack.prim_index[idx];
Mesh::Triangle t = mesh->get_triangle(tidx);
@@ -305,9 +310,6 @@ void BVH::pack_primitives()
pack.prim_tri_index[i] = -1;
}
pack.prim_visibility[i] = ob->visibility_for_tracing();
- if (pack.prim_type[i] & PRIMITIVE_ALL_CURVE) {
- pack.prim_visibility[i] |= PATH_RAY_CURVE;
- }
}
else {
pack.prim_tri_index[i] = -1;
@@ -320,23 +322,14 @@ void BVH::pack_primitives()
void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
{
- /* The BVH's for instances are built separately, but for traversal all
- * BVH's are stored in global arrays. This function merges them into the
- * top level BVH, adjusting indexes and offsets where appropriate.
- */
- const bool use_qbvh = (params.bvh_layout == BVH_LAYOUT_BVH4);
- const bool use_obvh = (params.bvh_layout == BVH_LAYOUT_BVH8);
-
/* Adjust primitive index to point to the triangle in the global array, for
- * meshes with transform applied and already in the top level BVH.
+ * geometry with transform applied and already in the top level BVH.
*/
- for (size_t i = 0; i < pack.prim_index.size(); i++)
+ for (size_t i = 0; i < pack.prim_index.size(); i++) {
if (pack.prim_index[i] != -1) {
- if (pack.prim_type[i] & PRIMITIVE_ALL_CURVE)
- pack.prim_index[i] += objects[pack.prim_object[i]]->mesh->curve_offset;
- else
- pack.prim_index[i] += objects[pack.prim_object[i]]->mesh->tri_offset;
+ pack.prim_index[i] += objects[pack.prim_object[i]]->geometry->prim_offset;
}
+ }
/* track offsets of instanced BVH data in global array */
size_t prim_offset = pack.prim_index.size();
@@ -356,26 +349,17 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
size_t pack_leaf_nodes_offset = leaf_nodes_size;
size_t object_offset = 0;
- map<Mesh *, int> mesh_map;
+ foreach (Geometry *geom, geometry) {
+ BVH *bvh = geom->bvh;
- foreach (Object *ob, objects) {
- Mesh *mesh = ob->mesh;
- BVH *bvh = mesh->bvh;
-
- if (mesh->need_build_bvh()) {
- if (mesh_map.find(mesh) == mesh_map.end()) {
- prim_index_size += bvh->pack.prim_index.size();
- prim_tri_verts_size += bvh->pack.prim_tri_verts.size();
- nodes_size += bvh->pack.nodes.size();
- leaf_nodes_size += bvh->pack.leaf_nodes.size();
-
- mesh_map[mesh] = 1;
- }
+ if (geom->need_build_bvh(params.bvh_layout)) {
+ prim_index_size += bvh->pack.prim_index.size();
+ prim_tri_verts_size += bvh->pack.prim_tri_verts.size();
+ nodes_size += bvh->pack.nodes.size();
+ leaf_nodes_size += bvh->pack.leaf_nodes.size();
}
}
- mesh_map.clear();
-
pack.prim_index.resize(prim_index_size);
pack.prim_type.resize(prim_index_size);
pack.prim_object.resize(prim_index_size);
@@ -400,34 +384,35 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
int4 *pack_leaf_nodes = (pack.leaf_nodes.size()) ? &pack.leaf_nodes[0] : NULL;
float2 *pack_prim_time = (pack.prim_time.size()) ? &pack.prim_time[0] : NULL;
+ map<Geometry *, int> geometry_map;
+
/* merge */
foreach (Object *ob, objects) {
- Mesh *mesh = ob->mesh;
+ Geometry *geom = ob->geometry;
/* We assume that if mesh doesn't need own BVH it was already included
* into a top-level BVH and no packing here is needed.
*/
- if (!mesh->need_build_bvh()) {
+ if (!geom->need_build_bvh(params.bvh_layout)) {
pack.object_node[object_offset++] = 0;
continue;
}
/* if mesh already added once, don't add it again, but used set
* node offset for this object */
- map<Mesh *, int>::iterator it = mesh_map.find(mesh);
+ map<Geometry *, int>::iterator it = geometry_map.find(geom);
- if (mesh_map.find(mesh) != mesh_map.end()) {
+ if (geometry_map.find(geom) != geometry_map.end()) {
int noffset = it->second;
pack.object_node[object_offset++] = noffset;
continue;
}
- BVH *bvh = mesh->bvh;
+ BVH *bvh = geom->bvh;
int noffset = nodes_offset;
int noffset_leaf = nodes_leaf_offset;
- int mesh_tri_offset = mesh->tri_offset;
- int mesh_curve_offset = mesh->curve_offset;
+ int geom_prim_offset = geom->prim_offset;
/* fill in node indexes for instances */
if (bvh->pack.root_index == -1)
@@ -435,7 +420,7 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
else
pack.object_node[object_offset++] = noffset;
- mesh_map[mesh] = pack.object_node[object_offset - 1];
+ geometry_map[geom] = pack.object_node[object_offset - 1];
/* merge primitive, object and triangle indexes */
if (bvh->pack.prim_index.size()) {
@@ -448,11 +433,11 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
for (size_t i = 0; i < bvh_prim_index_size; i++) {
if (bvh->pack.prim_type[i] & PRIMITIVE_ALL_CURVE) {
- pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + mesh_curve_offset;
+ pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + geom_prim_offset;
pack_prim_tri_index[pack_prim_index_offset] = -1;
}
else {
- pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + mesh_tri_offset;
+ pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + geom_prim_offset;
pack_prim_tri_index[pack_prim_index_offset] = bvh_prim_tri_index[i] +
pack_prim_tri_verts_offset;
}
@@ -499,53 +484,21 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
for (size_t i = 0, j = 0; i < bvh_nodes_size; j++) {
size_t nsize, nsize_bbox;
if (bvh_nodes[i].x & PATH_RAY_NODE_UNALIGNED) {
- if (use_obvh) {
- nsize = BVH_UNALIGNED_ONODE_SIZE;
- nsize_bbox = BVH_UNALIGNED_ONODE_SIZE - 1;
- }
- else {
- nsize = use_qbvh ? BVH_UNALIGNED_QNODE_SIZE : BVH_UNALIGNED_NODE_SIZE;
- nsize_bbox = (use_qbvh) ? BVH_UNALIGNED_QNODE_SIZE - 1 : 0;
- }
+ nsize = BVH_UNALIGNED_NODE_SIZE;
+ nsize_bbox = 0;
}
else {
- if (use_obvh) {
- nsize = BVH_ONODE_SIZE;
- nsize_bbox = BVH_ONODE_SIZE - 1;
- }
- else {
- nsize = (use_qbvh) ? BVH_QNODE_SIZE : BVH_NODE_SIZE;
- nsize_bbox = (use_qbvh) ? BVH_QNODE_SIZE - 1 : 0;
- }
+ nsize = BVH_NODE_SIZE;
+ nsize_bbox = 0;
}
memcpy(pack_nodes + pack_nodes_offset, bvh_nodes + i, nsize_bbox * sizeof(int4));
/* Modify offsets into arrays */
int4 data = bvh_nodes[i + nsize_bbox];
- int4 data1 = bvh_nodes[i + nsize_bbox - 1];
- if (use_obvh) {
- data.z += (data.z < 0) ? -noffset_leaf : noffset;
- data.w += (data.w < 0) ? -noffset_leaf : noffset;
- data.x += (data.x < 0) ? -noffset_leaf : noffset;
- data.y += (data.y < 0) ? -noffset_leaf : noffset;
- data1.z += (data1.z < 0) ? -noffset_leaf : noffset;
- data1.w += (data1.w < 0) ? -noffset_leaf : noffset;
- data1.x += (data1.x < 0) ? -noffset_leaf : noffset;
- data1.y += (data1.y < 0) ? -noffset_leaf : noffset;
- }
- else {
- data.z += (data.z < 0) ? -noffset_leaf : noffset;
- data.w += (data.w < 0) ? -noffset_leaf : noffset;
- if (use_qbvh) {
- data.x += (data.x < 0) ? -noffset_leaf : noffset;
- data.y += (data.y < 0) ? -noffset_leaf : noffset;
- }
- }
+ data.z += (data.z < 0) ? -noffset_leaf : noffset;
+ data.w += (data.w < 0) ? -noffset_leaf : noffset;
pack_nodes[pack_nodes_offset + nsize_bbox] = data;
- if (use_obvh) {
- pack_nodes[pack_nodes_offset + nsize_bbox - 1] = data1;
- }
/* Usually this copies nothing, but we better
* be prepared for possible node size extension.
diff --git a/intern/cycles/bvh/bvh.h b/intern/cycles/bvh/bvh.h
index edce3ca6f2a..6639e06b0bc 100644
--- a/intern/cycles/bvh/bvh.h
+++ b/intern/cycles/bvh/bvh.h
@@ -26,11 +26,14 @@
CCL_NAMESPACE_BEGIN
class Stats;
+class Device;
+class DeviceScene;
class BVHNode;
struct BVHStackEntry;
class BVHParams;
class BoundBox;
class LeafNode;
+class Geometry;
class Object;
class Progress;
@@ -73,7 +76,7 @@ struct PackedBVH {
}
};
-enum BVH_TYPE { bvh2, bvh4, bvh8 };
+enum BVH_TYPE { bvh2 };
/* BVH */
@@ -81,18 +84,27 @@ class BVH {
public:
PackedBVH pack;
BVHParams params;
+ vector<Geometry *> geometry;
vector<Object *> objects;
- static BVH *create(const BVHParams &params, const vector<Object *> &objects);
+ static BVH *create(const BVHParams &params,
+ const vector<Geometry *> &geometry,
+ const vector<Object *> &objects);
virtual ~BVH()
{
}
virtual void build(Progress &progress, Stats *stats = NULL);
+ virtual void copy_to_device(Progress & /*progress*/, DeviceScene * /*dscene*/)
+ {
+ }
+
void refit(Progress &progress);
protected:
- BVH(const BVHParams &params, const vector<Object *> &objects);
+ BVH(const BVHParams &params,
+ const vector<Geometry *> &geometry,
+ const vector<Object *> &objects);
/* Refit range of primitives. */
void refit_primitives(int start, int end, BoundBox &bbox, uint &visibility);
diff --git a/intern/cycles/bvh/bvh2.cpp b/intern/cycles/bvh/bvh2.cpp
index f419d413ef6..c903070429e 100644
--- a/intern/cycles/bvh/bvh2.cpp
+++ b/intern/cycles/bvh/bvh2.cpp
@@ -25,7 +25,10 @@
CCL_NAMESPACE_BEGIN
-BVH2::BVH2(const BVHParams &params_, const vector<Object *> &objects_) : BVH(params_, objects_)
+BVH2::BVH2(const BVHParams &params_,
+ const vector<Geometry *> &geometry_,
+ const vector<Object *> &objects_)
+ : BVH(params_, geometry_, objects_)
{
}
diff --git a/intern/cycles/bvh/bvh2.h b/intern/cycles/bvh/bvh2.h
index c6a4e6fa73a..fa3e45b72d2 100644
--- a/intern/cycles/bvh/bvh2.h
+++ b/intern/cycles/bvh/bvh2.h
@@ -46,7 +46,9 @@ class BVH2 : public BVH {
protected:
/* constructor */
friend class BVH;
- BVH2(const BVHParams &params, const vector<Object *> &objects);
+ BVH2(const BVHParams &params,
+ const vector<Geometry *> &geometry,
+ const vector<Object *> &objects);
/* Building process. */
virtual BVHNode *widen_children_nodes(const BVHNode *root) override;
diff --git a/intern/cycles/bvh/bvh4.cpp b/intern/cycles/bvh/bvh4.cpp
deleted file mode 100644
index 850bdf5b8b4..00000000000
--- a/intern/cycles/bvh/bvh4.cpp
+++ /dev/null
@@ -1,445 +0,0 @@
-/*
- * Adapted from code copyright 2009-2010 NVIDIA Corporation
- * Modifications Copyright 2011, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "bvh/bvh4.h"
-
-#include "render/mesh.h"
-#include "render/object.h"
-
-#include "bvh/bvh_node.h"
-#include "bvh/bvh_unaligned.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Can we avoid this somehow or make more generic?
- *
- * Perhaps we can merge nodes in actual tree and make our
- * life easier all over the place.
- */
-
-BVH4::BVH4(const BVHParams &params_, const vector<Object *> &objects_) : BVH(params_, objects_)
-{
- params.bvh_layout = BVH_LAYOUT_BVH4;
-}
-
-namespace {
-
-BVHNode *bvh_node_merge_children_recursively(const BVHNode *node)
-{
- if (node->is_leaf()) {
- return new LeafNode(*reinterpret_cast<const LeafNode *>(node));
- }
- /* Collect nodes of one layer deeper, allowing us to have more childrem in
- * an inner layer. */
- assert(node->num_children() <= 2);
- const BVHNode *children[4];
- const BVHNode *child0 = node->get_child(0);
- const BVHNode *child1 = node->get_child(1);
- int num_children = 0;
- if (child0->is_leaf()) {
- children[num_children++] = child0;
- }
- else {
- children[num_children++] = child0->get_child(0);
- children[num_children++] = child0->get_child(1);
- }
- if (child1->is_leaf()) {
- children[num_children++] = child1;
- }
- else {
- children[num_children++] = child1->get_child(0);
- children[num_children++] = child1->get_child(1);
- }
- /* Merge children in subtrees. */
- BVHNode *children4[4];
- for (int i = 0; i < num_children; ++i) {
- children4[i] = bvh_node_merge_children_recursively(children[i]);
- }
- /* Allocate new node. */
- BVHNode *node4 = new InnerNode(node->bounds, children4, num_children);
- /* TODO(sergey): Consider doing this from the InnerNode() constructor.
- * But in order to do this nicely need to think of how to pass all the
- * parameters there. */
- if (node->is_unaligned) {
- node4->is_unaligned = true;
- node4->aligned_space = new Transform();
- *node4->aligned_space = *node->aligned_space;
- }
- return node4;
-}
-
-} // namespace
-
-BVHNode *BVH4::widen_children_nodes(const BVHNode *root)
-{
- if (root == NULL) {
- return NULL;
- }
- if (root->is_leaf()) {
- return const_cast<BVHNode *>(root);
- }
- BVHNode *root4 = bvh_node_merge_children_recursively(root);
- /* TODO(sergey): Pack children nodes to parents which has less that 4
- * children. */
- return root4;
-}
-
-void BVH4::pack_leaf(const BVHStackEntry &e, const LeafNode *leaf)
-{
- float4 data[BVH_QNODE_LEAF_SIZE];
- memset(data, 0, sizeof(data));
- if (leaf->num_triangles() == 1 && pack.prim_index[leaf->lo] == -1) {
- /* object */
- data[0].x = __int_as_float(~(leaf->lo));
- data[0].y = __int_as_float(0);
- }
- else {
- /* triangle */
- data[0].x = __int_as_float(leaf->lo);
- data[0].y = __int_as_float(leaf->hi);
- }
- data[0].z = __uint_as_float(leaf->visibility);
- if (leaf->num_triangles() != 0) {
- data[0].w = __uint_as_float(pack.prim_type[leaf->lo]);
- }
-
- memcpy(&pack.leaf_nodes[e.idx], data, sizeof(float4) * BVH_QNODE_LEAF_SIZE);
-}
-
-void BVH4::pack_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num)
-{
- bool has_unaligned = false;
- /* Check whether we have to create unaligned node or all nodes are aligned
- * and we can cut some corner here.
- */
- if (params.use_unaligned_nodes) {
- for (int i = 0; i < num; i++) {
- if (en[i].node->is_unaligned) {
- has_unaligned = true;
- break;
- }
- }
- }
- if (has_unaligned) {
- /* There's no unaligned children, pack into AABB node. */
- pack_unaligned_inner(e, en, num);
- }
- else {
- /* Create unaligned node with orientation transform for each of the
- * children.
- */
- pack_aligned_inner(e, en, num);
- }
-}
-
-void BVH4::pack_aligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num)
-{
- BoundBox bounds[4];
- int child[4];
- for (int i = 0; i < num; ++i) {
- bounds[i] = en[i].node->bounds;
- child[i] = en[i].encodeIdx();
- }
- pack_aligned_node(
- e.idx, bounds, child, e.node->visibility, e.node->time_from, e.node->time_to, num);
-}
-
-void BVH4::pack_aligned_node(int idx,
- const BoundBox *bounds,
- const int *child,
- const uint visibility,
- const float time_from,
- const float time_to,
- const int num)
-{
- float4 data[BVH_QNODE_SIZE];
- memset(data, 0, sizeof(data));
-
- data[0].x = __uint_as_float(visibility & ~PATH_RAY_NODE_UNALIGNED);
- data[0].y = time_from;
- data[0].z = time_to;
-
- for (int i = 0; i < num; i++) {
- float3 bb_min = bounds[i].min;
- float3 bb_max = bounds[i].max;
-
- data[1][i] = bb_min.x;
- data[2][i] = bb_max.x;
- data[3][i] = bb_min.y;
- data[4][i] = bb_max.y;
- data[5][i] = bb_min.z;
- data[6][i] = bb_max.z;
-
- data[7][i] = __int_as_float(child[i]);
- }
-
- for (int i = num; i < 4; i++) {
- /* We store BB which would never be recorded as intersection
- * so kernel might safely assume there are always 4 child nodes.
- */
- data[1][i] = FLT_MAX;
- data[2][i] = -FLT_MAX;
-
- data[3][i] = FLT_MAX;
- data[4][i] = -FLT_MAX;
-
- data[5][i] = FLT_MAX;
- data[6][i] = -FLT_MAX;
-
- data[7][i] = __int_as_float(0);
- }
-
- memcpy(&pack.nodes[idx], data, sizeof(float4) * BVH_QNODE_SIZE);
-}
-
-void BVH4::pack_unaligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num)
-{
- Transform aligned_space[4];
- BoundBox bounds[4];
- int child[4];
- for (int i = 0; i < num; ++i) {
- aligned_space[i] = en[i].node->get_aligned_space();
- bounds[i] = en[i].node->bounds;
- child[i] = en[i].encodeIdx();
- }
- pack_unaligned_node(e.idx,
- aligned_space,
- bounds,
- child,
- e.node->visibility,
- e.node->time_from,
- e.node->time_to,
- num);
-}
-
-void BVH4::pack_unaligned_node(int idx,
- const Transform *aligned_space,
- const BoundBox *bounds,
- const int *child,
- const uint visibility,
- const float time_from,
- const float time_to,
- const int num)
-{
- float4 data[BVH_UNALIGNED_QNODE_SIZE];
- memset(data, 0, sizeof(data));
-
- data[0].x = __uint_as_float(visibility | PATH_RAY_NODE_UNALIGNED);
- data[0].y = time_from;
- data[0].z = time_to;
-
- for (int i = 0; i < num; i++) {
- Transform space = BVHUnaligned::compute_node_transform(bounds[i], aligned_space[i]);
-
- data[1][i] = space.x.x;
- data[2][i] = space.x.y;
- data[3][i] = space.x.z;
-
- data[4][i] = space.y.x;
- data[5][i] = space.y.y;
- data[6][i] = space.y.z;
-
- data[7][i] = space.z.x;
- data[8][i] = space.z.y;
- data[9][i] = space.z.z;
-
- data[10][i] = space.x.w;
- data[11][i] = space.y.w;
- data[12][i] = space.z.w;
-
- data[13][i] = __int_as_float(child[i]);
- }
-
- for (int i = num; i < 4; i++) {
- /* We store BB which would never be recorded as intersection
- * so kernel might safely assume there are always 4 child nodes.
- */
-
- data[1][i] = NAN;
- data[2][i] = NAN;
- data[3][i] = NAN;
-
- data[4][i] = NAN;
- data[5][i] = NAN;
- data[6][i] = NAN;
-
- data[7][i] = NAN;
- data[8][i] = NAN;
- data[9][i] = NAN;
-
- data[10][i] = NAN;
- data[11][i] = NAN;
- data[12][i] = NAN;
-
- data[13][i] = __int_as_float(0);
- }
-
- memcpy(&pack.nodes[idx], data, sizeof(float4) * BVH_UNALIGNED_QNODE_SIZE);
-}
-
-/* Quad SIMD Nodes */
-
-void BVH4::pack_nodes(const BVHNode *root)
-{
- /* Calculate size of the arrays required. */
- const size_t num_nodes = root->getSubtreeSize(BVH_STAT_NODE_COUNT);
- const size_t num_leaf_nodes = root->getSubtreeSize(BVH_STAT_LEAF_COUNT);
- assert(num_leaf_nodes <= num_nodes);
- const size_t num_inner_nodes = num_nodes - num_leaf_nodes;
- size_t node_size;
- if (params.use_unaligned_nodes) {
- const size_t num_unaligned_nodes = root->getSubtreeSize(BVH_STAT_UNALIGNED_INNER_COUNT);
- node_size = (num_unaligned_nodes * BVH_UNALIGNED_QNODE_SIZE) +
- (num_inner_nodes - num_unaligned_nodes) * BVH_QNODE_SIZE;
- }
- else {
- node_size = num_inner_nodes * BVH_QNODE_SIZE;
- }
- /* Resize arrays. */
- pack.nodes.clear();
- pack.leaf_nodes.clear();
- /* For top level BVH, first merge existing BVH's so we know the offsets. */
- if (params.top_level) {
- pack_instances(node_size, num_leaf_nodes * BVH_QNODE_LEAF_SIZE);
- }
- else {
- pack.nodes.resize(node_size);
- pack.leaf_nodes.resize(num_leaf_nodes * BVH_QNODE_LEAF_SIZE);
- }
-
- int nextNodeIdx = 0, nextLeafNodeIdx = 0;
-
- vector<BVHStackEntry> stack;
- stack.reserve(BVHParams::MAX_DEPTH * 2);
- if (root->is_leaf()) {
- stack.push_back(BVHStackEntry(root, nextLeafNodeIdx++));
- }
- else {
- stack.push_back(BVHStackEntry(root, nextNodeIdx));
- nextNodeIdx += root->has_unaligned() ? BVH_UNALIGNED_QNODE_SIZE : BVH_QNODE_SIZE;
- }
-
- while (stack.size()) {
- BVHStackEntry e = stack.back();
- stack.pop_back();
-
- if (e.node->is_leaf()) {
- /* leaf node */
- const LeafNode *leaf = reinterpret_cast<const LeafNode *>(e.node);
- pack_leaf(e, leaf);
- }
- else {
- /* Inner node. */
- /* Collect nodes. */
- const BVHNode *children[4];
- const int num_children = e.node->num_children();
- /* Push entries on the stack. */
- for (int i = 0; i < num_children; ++i) {
- int idx;
- children[i] = e.node->get_child(i);
- assert(children[i] != NULL);
- if (children[i]->is_leaf()) {
- idx = nextLeafNodeIdx++;
- }
- else {
- idx = nextNodeIdx;
- nextNodeIdx += children[i]->has_unaligned() ? BVH_UNALIGNED_QNODE_SIZE : BVH_QNODE_SIZE;
- }
- stack.push_back(BVHStackEntry(children[i], idx));
- }
- /* Set node. */
- pack_inner(e, &stack[stack.size() - num_children], num_children);
- }
- }
-
- assert(node_size == nextNodeIdx);
- /* Root index to start traversal at, to handle case of single leaf node. */
- pack.root_index = (root->is_leaf()) ? -1 : 0;
-}
-
-void BVH4::refit_nodes()
-{
- assert(!params.top_level);
-
- BoundBox bbox = BoundBox::empty;
- uint visibility = 0;
- refit_node(0, (pack.root_index == -1) ? true : false, bbox, visibility);
-}
-
-void BVH4::refit_node(int idx, bool leaf, BoundBox &bbox, uint &visibility)
-{
- if (leaf) {
- /* Refit leaf node. */
- int4 *data = &pack.leaf_nodes[idx];
- int4 c = data[0];
-
- BVH::refit_primitives(c.x, c.y, bbox, visibility);
-
- /* TODO(sergey): This is actually a copy of pack_leaf(),
- * but this chunk of code only knows actual data and has
- * no idea about BVHNode.
- *
- * Would be nice to de-duplicate code, but trying to make
- * making code more general ends up in much nastier code
- * in my opinion so far.
- *
- * Same applies to the inner nodes case below.
- */
- float4 leaf_data[BVH_QNODE_LEAF_SIZE];
- leaf_data[0].x = __int_as_float(c.x);
- leaf_data[0].y = __int_as_float(c.y);
- leaf_data[0].z = __uint_as_float(visibility);
- leaf_data[0].w = __uint_as_float(c.w);
- memcpy(&pack.leaf_nodes[idx], leaf_data, sizeof(float4) * BVH_QNODE_LEAF_SIZE);
- }
- else {
- int4 *data = &pack.nodes[idx];
- bool is_unaligned = (data[0].x & PATH_RAY_NODE_UNALIGNED) != 0;
- int4 c;
- if (is_unaligned) {
- c = data[13];
- }
- else {
- c = data[7];
- }
- /* Refit inner node, set bbox from children. */
- BoundBox child_bbox[4] = {BoundBox::empty, BoundBox::empty, BoundBox::empty, BoundBox::empty};
- uint child_visibility[4] = {0};
- int num_nodes = 0;
-
- for (int i = 0; i < 4; ++i) {
- if (c[i] != 0) {
- refit_node((c[i] < 0) ? -c[i] - 1 : c[i], (c[i] < 0), child_bbox[i], child_visibility[i]);
- ++num_nodes;
- bbox.grow(child_bbox[i]);
- visibility |= child_visibility[i];
- }
- }
-
- if (is_unaligned) {
- Transform aligned_space[4] = {
- transform_identity(), transform_identity(), transform_identity(), transform_identity()};
- pack_unaligned_node(
- idx, aligned_space, child_bbox, &c[0], visibility, 0.0f, 1.0f, num_nodes);
- }
- else {
- pack_aligned_node(idx, child_bbox, &c[0], visibility, 0.0f, 1.0f, num_nodes);
- }
- }
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/bvh/bvh4.h b/intern/cycles/bvh/bvh4.h
deleted file mode 100644
index 38b0961d3df..00000000000
--- a/intern/cycles/bvh/bvh4.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Adapted from code copyright 2009-2010 NVIDIA Corporation
- * Modifications Copyright 2011, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __BVH4_H__
-#define __BVH4_H__
-
-#include "bvh/bvh.h"
-#include "bvh/bvh_params.h"
-
-#include "util/util_types.h"
-#include "util/util_vector.h"
-
-CCL_NAMESPACE_BEGIN
-
-class BVHNode;
-struct BVHStackEntry;
-class BVHParams;
-class BoundBox;
-class LeafNode;
-class Object;
-class Progress;
-
-#define BVH_QNODE_SIZE 8
-#define BVH_QNODE_LEAF_SIZE 1
-#define BVH_UNALIGNED_QNODE_SIZE 14
-
-/* BVH4
- *
- * Quad BVH, with each node having four children, to use with SIMD instructions.
- */
-class BVH4 : public BVH {
- protected:
- /* constructor */
- friend class BVH;
- BVH4(const BVHParams &params, const vector<Object *> &objects);
-
- /* Building process. */
- virtual BVHNode *widen_children_nodes(const BVHNode *root) override;
-
- /* pack */
- void pack_nodes(const BVHNode *root) override;
-
- void pack_leaf(const BVHStackEntry &e, const LeafNode *leaf);
- void pack_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num);
-
- void pack_aligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num);
- void pack_aligned_node(int idx,
- const BoundBox *bounds,
- const int *child,
- const uint visibility,
- const float time_from,
- const float time_to,
- const int num);
-
- void pack_unaligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num);
- void pack_unaligned_node(int idx,
- const Transform *aligned_space,
- const BoundBox *bounds,
- const int *child,
- const uint visibility,
- const float time_from,
- const float time_to,
- const int num);
-
- /* refit */
- void refit_nodes() override;
- void refit_node(int idx, bool leaf, BoundBox &bbox, uint &visibility);
-};
-
-CCL_NAMESPACE_END
-
-#endif /* __BVH4_H__ */
diff --git a/intern/cycles/bvh/bvh8.cpp b/intern/cycles/bvh/bvh8.cpp
deleted file mode 100644
index e812d806b94..00000000000
--- a/intern/cycles/bvh/bvh8.cpp
+++ /dev/null
@@ -1,539 +0,0 @@
-/*
-* Original code Copyright 2017, Intel Corporation
-* Modifications Copyright 2018, Blender Foundation.
-*
-* Redistribution and use in source and binary forms, with or without
-* modification, are permitted provided that the following conditions are met:
-*
-* * Redistributions of source code must retain the above copyright notice,
-* this list of conditions and the following disclaimer.
-* * Redistributions in binary form must reproduce the above copyright
-* notice, this list of conditions and the following disclaimer in the
-* documentation and/or other materials provided with the distribution.
-* * Neither the name of Intel Corporation nor the names of its contributors
-* may be used to endorse or promote products derived from this software
-* without specific prior written permission.
-*
-* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
-* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#include "bvh/bvh8.h"
-
-#include "render/mesh.h"
-#include "render/object.h"
-
-#include "bvh/bvh_node.h"
-#include "bvh/bvh_unaligned.h"
-
-CCL_NAMESPACE_BEGIN
-
-BVH8::BVH8(const BVHParams &params_, const vector<Object *> &objects_) : BVH(params_, objects_)
-{
-}
-
-namespace {
-
-BVHNode *bvh_node_merge_children_recursively(const BVHNode *node)
-{
- if (node->is_leaf()) {
- return new LeafNode(*reinterpret_cast<const LeafNode *>(node));
- }
- /* Collect nodes of two layer deeper, allowing us to have more childrem in
- * an inner layer. */
- assert(node->num_children() <= 2);
- const BVHNode *children[8];
- const BVHNode *child0 = node->get_child(0);
- const BVHNode *child1 = node->get_child(1);
- int num_children = 0;
- if (child0->is_leaf()) {
- children[num_children++] = child0;
- }
- else {
- const BVHNode *child00 = child0->get_child(0), *child01 = child0->get_child(1);
- if (child00->is_leaf()) {
- children[num_children++] = child00;
- }
- else {
- children[num_children++] = child00->get_child(0);
- children[num_children++] = child00->get_child(1);
- }
- if (child01->is_leaf()) {
- children[num_children++] = child01;
- }
- else {
- children[num_children++] = child01->get_child(0);
- children[num_children++] = child01->get_child(1);
- }
- }
- if (child1->is_leaf()) {
- children[num_children++] = child1;
- }
- else {
- const BVHNode *child10 = child1->get_child(0), *child11 = child1->get_child(1);
- if (child10->is_leaf()) {
- children[num_children++] = child10;
- }
- else {
- children[num_children++] = child10->get_child(0);
- children[num_children++] = child10->get_child(1);
- }
- if (child11->is_leaf()) {
- children[num_children++] = child11;
- }
- else {
- children[num_children++] = child11->get_child(0);
- children[num_children++] = child11->get_child(1);
- }
- }
- /* Merge children in subtrees. */
- BVHNode *children4[8];
- for (int i = 0; i < num_children; ++i) {
- children4[i] = bvh_node_merge_children_recursively(children[i]);
- }
- /* Allocate new node. */
- BVHNode *node8 = new InnerNode(node->bounds, children4, num_children);
- /* TODO(sergey): Consider doing this from the InnerNode() constructor.
- * But in order to do this nicely need to think of how to pass all the
- * parameters there. */
- if (node->is_unaligned) {
- node8->is_unaligned = true;
- node8->aligned_space = new Transform();
- *node8->aligned_space = *node->aligned_space;
- }
- return node8;
-}
-
-} // namespace
-
-BVHNode *BVH8::widen_children_nodes(const BVHNode *root)
-{
- if (root == NULL) {
- return NULL;
- }
- if (root->is_leaf()) {
- return const_cast<BVHNode *>(root);
- }
- BVHNode *root8 = bvh_node_merge_children_recursively(root);
- /* TODO(sergey): Pack children nodes to parents which has less that 4
- * children. */
- return root8;
-}
-
-void BVH8::pack_leaf(const BVHStackEntry &e, const LeafNode *leaf)
-{
- float4 data[BVH_ONODE_LEAF_SIZE];
- memset(data, 0, sizeof(data));
- if (leaf->num_triangles() == 1 && pack.prim_index[leaf->lo] == -1) {
- /* object */
- data[0].x = __int_as_float(~(leaf->lo));
- data[0].y = __int_as_float(0);
- }
- else {
- /* triangle */
- data[0].x = __int_as_float(leaf->lo);
- data[0].y = __int_as_float(leaf->hi);
- }
- data[0].z = __uint_as_float(leaf->visibility);
- if (leaf->num_triangles() != 0) {
- data[0].w = __uint_as_float(pack.prim_type[leaf->lo]);
- }
-
- memcpy(&pack.leaf_nodes[e.idx], data, sizeof(float4) * BVH_ONODE_LEAF_SIZE);
-}
-
-void BVH8::pack_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num)
-{
- bool has_unaligned = false;
- /* Check whether we have to create unaligned node or all nodes are aligned
- * and we can cut some corner here.
- */
- if (params.use_unaligned_nodes) {
- for (int i = 0; i < num; i++) {
- if (en[i].node->is_unaligned) {
- has_unaligned = true;
- break;
- }
- }
- }
- if (has_unaligned) {
- /* There's no unaligned children, pack into AABB node. */
- pack_unaligned_inner(e, en, num);
- }
- else {
- /* Create unaligned node with orientation transform for each of the
- * children.
- */
- pack_aligned_inner(e, en, num);
- }
-}
-
-void BVH8::pack_aligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num)
-{
- BoundBox bounds[8];
- int child[8];
- for (int i = 0; i < num; ++i) {
- bounds[i] = en[i].node->bounds;
- child[i] = en[i].encodeIdx();
- }
- pack_aligned_node(
- e.idx, bounds, child, e.node->visibility, e.node->time_from, e.node->time_to, num);
-}
-
-void BVH8::pack_aligned_node(int idx,
- const BoundBox *bounds,
- const int *child,
- const uint visibility,
- const float time_from,
- const float time_to,
- const int num)
-{
- float8 data[8];
- memset(data, 0, sizeof(data));
-
- data[0].a = __uint_as_float(visibility & ~PATH_RAY_NODE_UNALIGNED);
- data[0].b = time_from;
- data[0].c = time_to;
-
- for (int i = 0; i < num; i++) {
- float3 bb_min = bounds[i].min;
- float3 bb_max = bounds[i].max;
-
- data[1][i] = bb_min.x;
- data[2][i] = bb_max.x;
- data[3][i] = bb_min.y;
- data[4][i] = bb_max.y;
- data[5][i] = bb_min.z;
- data[6][i] = bb_max.z;
-
- data[7][i] = __int_as_float(child[i]);
- }
-
- for (int i = num; i < 8; i++) {
- /* We store BB which would never be recorded as intersection
- * so kernel might safely assume there are always 4 child nodes.
- */
- data[1][i] = FLT_MAX;
- data[2][i] = -FLT_MAX;
-
- data[3][i] = FLT_MAX;
- data[4][i] = -FLT_MAX;
-
- data[5][i] = FLT_MAX;
- data[6][i] = -FLT_MAX;
-
- data[7][i] = __int_as_float(0);
- }
-
- memcpy(&pack.nodes[idx], data, sizeof(float4) * BVH_ONODE_SIZE);
-}
-
-void BVH8::pack_unaligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num)
-{
- Transform aligned_space[8];
- BoundBox bounds[8];
- int child[8];
- for (int i = 0; i < num; ++i) {
- aligned_space[i] = en[i].node->get_aligned_space();
- bounds[i] = en[i].node->bounds;
- child[i] = en[i].encodeIdx();
- }
- pack_unaligned_node(e.idx,
- aligned_space,
- bounds,
- child,
- e.node->visibility,
- e.node->time_from,
- e.node->time_to,
- num);
-}
-
-void BVH8::pack_unaligned_node(int idx,
- const Transform *aligned_space,
- const BoundBox *bounds,
- const int *child,
- const uint visibility,
- const float time_from,
- const float time_to,
- const int num)
-{
- float8 data[BVH_UNALIGNED_ONODE_SIZE];
- memset(data, 0, sizeof(data));
-
- data[0].a = __uint_as_float(visibility | PATH_RAY_NODE_UNALIGNED);
- data[0].b = time_from;
- data[0].c = time_to;
-
- for (int i = 0; i < num; i++) {
- Transform space = BVHUnaligned::compute_node_transform(bounds[i], aligned_space[i]);
-
- data[1][i] = space.x.x;
- data[2][i] = space.x.y;
- data[3][i] = space.x.z;
-
- data[4][i] = space.y.x;
- data[5][i] = space.y.y;
- data[6][i] = space.y.z;
-
- data[7][i] = space.z.x;
- data[8][i] = space.z.y;
- data[9][i] = space.z.z;
-
- data[10][i] = space.x.w;
- data[11][i] = space.y.w;
- data[12][i] = space.z.w;
-
- data[13][i] = __int_as_float(child[i]);
- }
-
- for (int i = num; i < 8; i++) {
- /* We store BB which would never be recorded as intersection
- * so kernel might safely assume there are always 4 child nodes.
- */
-
- data[1][i] = NAN;
- data[2][i] = NAN;
- data[3][i] = NAN;
-
- data[4][i] = NAN;
- data[5][i] = NAN;
- data[6][i] = NAN;
-
- data[7][i] = NAN;
- data[8][i] = NAN;
- data[9][i] = NAN;
-
- data[10][i] = NAN;
- data[11][i] = NAN;
- data[12][i] = NAN;
-
- data[13][i] = __int_as_float(0);
- }
-
- memcpy(&pack.nodes[idx], data, sizeof(float4) * BVH_UNALIGNED_ONODE_SIZE);
-}
-
-/* Quad SIMD Nodes */
-
-void BVH8::pack_nodes(const BVHNode *root)
-{
- /* Calculate size of the arrays required. */
- const size_t num_nodes = root->getSubtreeSize(BVH_STAT_NODE_COUNT);
- const size_t num_leaf_nodes = root->getSubtreeSize(BVH_STAT_LEAF_COUNT);
- assert(num_leaf_nodes <= num_nodes);
- const size_t num_inner_nodes = num_nodes - num_leaf_nodes;
- size_t node_size;
- if (params.use_unaligned_nodes) {
- const size_t num_unaligned_nodes = root->getSubtreeSize(BVH_STAT_UNALIGNED_INNER_COUNT);
- node_size = (num_unaligned_nodes * BVH_UNALIGNED_ONODE_SIZE) +
- (num_inner_nodes - num_unaligned_nodes) * BVH_ONODE_SIZE;
- }
- else {
- node_size = num_inner_nodes * BVH_ONODE_SIZE;
- }
- /* Resize arrays. */
- pack.nodes.clear();
- pack.leaf_nodes.clear();
- /* For top level BVH, first merge existing BVH's so we know the offsets. */
- if (params.top_level) {
- pack_instances(node_size, num_leaf_nodes * BVH_ONODE_LEAF_SIZE);
- }
- else {
- pack.nodes.resize(node_size);
- pack.leaf_nodes.resize(num_leaf_nodes * BVH_ONODE_LEAF_SIZE);
- }
-
- int nextNodeIdx = 0, nextLeafNodeIdx = 0;
-
- vector<BVHStackEntry> stack;
- stack.reserve(BVHParams::MAX_DEPTH * 2);
- if (root->is_leaf()) {
- stack.push_back(BVHStackEntry(root, nextLeafNodeIdx++));
- }
- else {
- stack.push_back(BVHStackEntry(root, nextNodeIdx));
- nextNodeIdx += root->has_unaligned() ? BVH_UNALIGNED_ONODE_SIZE : BVH_ONODE_SIZE;
- }
-
- while (stack.size()) {
- BVHStackEntry e = stack.back();
- stack.pop_back();
-
- if (e.node->is_leaf()) {
- /* leaf node */
- const LeafNode *leaf = reinterpret_cast<const LeafNode *>(e.node);
- pack_leaf(e, leaf);
- }
- else {
- /* Inner node. */
- /* Collect nodes. */
- const BVHNode *children[8];
- int num_children = e.node->num_children();
- /* Push entries on the stack. */
- for (int i = 0; i < num_children; ++i) {
- int idx;
- children[i] = e.node->get_child(i);
- if (children[i]->is_leaf()) {
- idx = nextLeafNodeIdx++;
- }
- else {
- idx = nextNodeIdx;
- nextNodeIdx += children[i]->has_unaligned() ? BVH_UNALIGNED_ONODE_SIZE : BVH_ONODE_SIZE;
- }
- stack.push_back(BVHStackEntry(children[i], idx));
- }
- /* Set node. */
- pack_inner(e, &stack[stack.size() - num_children], num_children);
- }
- }
-
- assert(node_size == nextNodeIdx);
- /* Root index to start traversal at, to handle case of single leaf node. */
- pack.root_index = (root->is_leaf()) ? -1 : 0;
-}
-
-void BVH8::refit_nodes()
-{
- assert(!params.top_level);
-
- BoundBox bbox = BoundBox::empty;
- uint visibility = 0;
- refit_node(0, (pack.root_index == -1) ? true : false, bbox, visibility);
-}
-
-void BVH8::refit_node(int idx, bool leaf, BoundBox &bbox, uint &visibility)
-{
- if (leaf) {
- int4 *data = &pack.leaf_nodes[idx];
- int4 c = data[0];
- /* Refit leaf node. */
- for (int prim = c.x; prim < c.y; prim++) {
- int pidx = pack.prim_index[prim];
- int tob = pack.prim_object[prim];
- Object *ob = objects[tob];
-
- if (pidx == -1) {
- /* Object instance. */
- bbox.grow(ob->bounds);
- }
- else {
- /* Primitives. */
- const Mesh *mesh = ob->mesh;
-
- if (pack.prim_type[prim] & PRIMITIVE_ALL_CURVE) {
- /* Curves. */
- int str_offset = (params.top_level) ? mesh->curve_offset : 0;
- Mesh::Curve curve = mesh->get_curve(pidx - str_offset);
- int k = PRIMITIVE_UNPACK_SEGMENT(pack.prim_type[prim]);
-
- curve.bounds_grow(k, &mesh->curve_keys[0], &mesh->curve_radius[0], bbox);
-
- visibility |= PATH_RAY_CURVE;
-
- /* Motion curves. */
- if (mesh->use_motion_blur) {
- Attribute *attr = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
-
- if (attr) {
- size_t mesh_size = mesh->curve_keys.size();
- size_t steps = mesh->motion_steps - 1;
- float3 *key_steps = attr->data_float3();
-
- for (size_t i = 0; i < steps; i++) {
- curve.bounds_grow(k, key_steps + i * mesh_size, &mesh->curve_radius[0], bbox);
- }
- }
- }
- }
- else {
- /* Triangles. */
- int tri_offset = (params.top_level) ? mesh->tri_offset : 0;
- Mesh::Triangle triangle = mesh->get_triangle(pidx - tri_offset);
- const float3 *vpos = &mesh->verts[0];
-
- triangle.bounds_grow(vpos, bbox);
-
- /* Motion triangles. */
- if (mesh->use_motion_blur) {
- Attribute *attr = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
-
- if (attr) {
- size_t mesh_size = mesh->verts.size();
- size_t steps = mesh->motion_steps - 1;
- float3 *vert_steps = attr->data_float3();
-
- for (size_t i = 0; i < steps; i++) {
- triangle.bounds_grow(vert_steps + i * mesh_size, bbox);
- }
- }
- }
- }
- }
-
- visibility |= ob->visibility;
- }
-
- float4 leaf_data[BVH_ONODE_LEAF_SIZE];
- leaf_data[0].x = __int_as_float(c.x);
- leaf_data[0].y = __int_as_float(c.y);
- leaf_data[0].z = __uint_as_float(visibility);
- leaf_data[0].w = __uint_as_float(c.w);
- memcpy(&pack.leaf_nodes[idx], leaf_data, sizeof(float4) * BVH_ONODE_LEAF_SIZE);
- }
- else {
- float8 *data = (float8 *)&pack.nodes[idx];
- bool is_unaligned = (__float_as_uint(data[0].a) & PATH_RAY_NODE_UNALIGNED) != 0;
- /* Refit inner node, set bbox from children. */
- BoundBox child_bbox[8] = {BoundBox::empty,
- BoundBox::empty,
- BoundBox::empty,
- BoundBox::empty,
- BoundBox::empty,
- BoundBox::empty,
- BoundBox::empty,
- BoundBox::empty};
- int child[8];
- uint child_visibility[8] = {0};
- int num_nodes = 0;
-
- for (int i = 0; i < 8; ++i) {
- child[i] = __float_as_int(data[(is_unaligned) ? 13 : 7][i]);
-
- if (child[i] != 0) {
- refit_node((child[i] < 0) ? -child[i] - 1 : child[i],
- (child[i] < 0),
- child_bbox[i],
- child_visibility[i]);
- ++num_nodes;
- bbox.grow(child_bbox[i]);
- visibility |= child_visibility[i];
- }
- }
-
- if (is_unaligned) {
- Transform aligned_space[8] = {transform_identity(),
- transform_identity(),
- transform_identity(),
- transform_identity(),
- transform_identity(),
- transform_identity(),
- transform_identity(),
- transform_identity()};
- pack_unaligned_node(
- idx, aligned_space, child_bbox, child, visibility, 0.0f, 1.0f, num_nodes);
- }
- else {
- pack_aligned_node(idx, child_bbox, child, visibility, 0.0f, 1.0f, num_nodes);
- }
- }
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/bvh/bvh8.h b/intern/cycles/bvh/bvh8.h
deleted file mode 100644
index fc07eadcada..00000000000
--- a/intern/cycles/bvh/bvh8.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
-* Original code Copyright 2017, Intel Corporation
-* Modifications Copyright 2018, Blender Foundation.
-*
-* Redistribution and use in source and binary forms, with or without
-* modification, are permitted provided that the following conditions are met:
-*
-* * Redistributions of source code must retain the above copyright notice,
-* this list of conditions and the following disclaimer.
-* * Redistributions in binary form must reproduce the above copyright
-* notice, this list of conditions and the following disclaimer in the
-* documentation and/or other materials provided with the distribution.
-* * Neither the name of Intel Corporation nor the names of its contributors
-* may be used to endorse or promote products derived from this software
-* without specific prior written permission.
-*
-* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
-* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef __BVH8_H__
-#define __BVH8_H__
-
-#include "bvh/bvh.h"
-#include "bvh/bvh_params.h"
-
-#include "util/util_types.h"
-#include "util/util_vector.h"
-
-CCL_NAMESPACE_BEGIN
-
-class BVHNode;
-struct BVHStackEntry;
-class BVHParams;
-class BoundBox;
-class LeafNode;
-class Object;
-class Progress;
-
-#define BVH_ONODE_SIZE 16
-#define BVH_ONODE_LEAF_SIZE 1
-#define BVH_UNALIGNED_ONODE_SIZE 28
-
-/* BVH8
-*
-* Octo BVH, with each node having eight children, to use with SIMD instructions.
-*/
-class BVH8 : public BVH {
- protected:
- /* constructor */
- friend class BVH;
- BVH8(const BVHParams &params, const vector<Object *> &objects);
-
- /* Building process. */
- virtual BVHNode *widen_children_nodes(const BVHNode *root) override;
-
- /* pack */
- void pack_nodes(const BVHNode *root) override;
-
- void pack_leaf(const BVHStackEntry &e, const LeafNode *leaf);
- void pack_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num);
-
- void pack_aligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num);
- void pack_aligned_node(int idx,
- const BoundBox *bounds,
- const int *child,
- const uint visibility,
- const float time_from,
- const float time_to,
- const int num);
-
- void pack_unaligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num);
- void pack_unaligned_node(int idx,
- const Transform *aligned_space,
- const BoundBox *bounds,
- const int *child,
- const uint visibility,
- const float time_from,
- const float time_to,
- const int num);
-
- /* refit */
- void refit_nodes() override;
- void refit_node(int idx, bool leaf, BoundBox &bbox, uint &visibility);
-};
-
-CCL_NAMESPACE_END
-
-#endif /* __BVH8_H__ */
diff --git a/intern/cycles/bvh/bvh_build.cpp b/intern/cycles/bvh/bvh_build.cpp
index 1d9b006e8cb..86ab7b00815 100644
--- a/intern/cycles/bvh/bvh_build.cpp
+++ b/intern/cycles/bvh/bvh_build.cpp
@@ -22,64 +22,23 @@
#include "bvh/bvh_params.h"
#include "bvh_split.h"
+#include "render/curves.h"
+#include "render/hair.h"
#include "render/mesh.h"
#include "render/object.h"
#include "render/scene.h"
-#include "render/curves.h"
#include "util/util_algorithm.h"
#include "util/util_foreach.h"
#include "util/util_logging.h"
#include "util/util_progress.h"
-#include "util/util_stack_allocator.h"
+#include "util/util_queue.h"
#include "util/util_simd.h"
+#include "util/util_stack_allocator.h"
#include "util/util_time.h"
-#include "util/util_queue.h"
CCL_NAMESPACE_BEGIN
-/* BVH Build Task */
-
-class BVHBuildTask : public Task {
- public:
- BVHBuildTask(
- BVHBuild *build, InnerNode *node, int child, const BVHObjectBinning &range, int level)
- : range_(range)
- {
- run = function_bind(&BVHBuild::thread_build_node, build, node, child, &range_, level);
- }
-
- private:
- BVHObjectBinning range_;
-};
-
-class BVHSpatialSplitBuildTask : public Task {
- public:
- BVHSpatialSplitBuildTask(BVHBuild *build,
- InnerNode *node,
- int child,
- const BVHRange &range,
- const vector<BVHReference> &references,
- int level)
- : range_(range),
- references_(references.begin() + range.start(), references.begin() + range.end())
- {
- range_.set_start(0);
- run = function_bind(&BVHBuild::thread_build_spatial_split_node,
- build,
- node,
- child,
- &range_,
- &references_,
- level,
- _1);
- }
-
- private:
- BVHRange range_;
- vector<BVHReference> references_;
-};
-
/* Constructor / Destructor */
BVHBuild::BVHBuild(const vector<Object *> &objects_,
@@ -194,23 +153,30 @@ void BVHBuild::add_reference_triangles(BoundBox &root, BoundBox &center, Mesh *m
}
}
-void BVHBuild::add_reference_curves(BoundBox &root, BoundBox &center, Mesh *mesh, int i)
+void BVHBuild::add_reference_curves(BoundBox &root, BoundBox &center, Hair *hair, int i)
{
const Attribute *curve_attr_mP = NULL;
- if (mesh->has_motion_blur()) {
- curve_attr_mP = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+ if (hair->has_motion_blur()) {
+ curve_attr_mP = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
}
- const size_t num_curves = mesh->num_curves();
+
+ const PrimitiveType primitive_type =
+ (curve_attr_mP != NULL) ?
+ ((hair->curve_shape == CURVE_RIBBON) ? PRIMITIVE_MOTION_CURVE_RIBBON :
+ PRIMITIVE_MOTION_CURVE_THICK) :
+ ((hair->curve_shape == CURVE_RIBBON) ? PRIMITIVE_CURVE_RIBBON : PRIMITIVE_CURVE_THICK);
+
+ const size_t num_curves = hair->num_curves();
for (uint j = 0; j < num_curves; j++) {
- const Mesh::Curve curve = mesh->get_curve(j);
- const float *curve_radius = &mesh->curve_radius[0];
+ const Hair::Curve curve = hair->get_curve(j);
+ const float *curve_radius = &hair->curve_radius[0];
for (int k = 0; k < curve.num_keys - 1; k++) {
if (curve_attr_mP == NULL) {
/* Really simple logic for static hair. */
BoundBox bounds = BoundBox::empty;
- curve.bounds_grow(k, &mesh->curve_keys[0], curve_radius, bounds);
+ curve.bounds_grow(k, &hair->curve_keys[0], curve_radius, bounds);
if (bounds.valid()) {
- int packed_type = PRIMITIVE_PACK_SEGMENT(PRIMITIVE_CURVE, k);
+ int packed_type = PRIMITIVE_PACK_SEGMENT(primitive_type, k);
references.push_back(BVHReference(bounds, j, i, packed_type));
root.grow(bounds);
center.grow(bounds.center2());
@@ -223,15 +189,15 @@ void BVHBuild::add_reference_curves(BoundBox &root, BoundBox &center, Mesh *mesh
*/
/* TODO(sergey): Support motion steps for spatially split BVH. */
BoundBox bounds = BoundBox::empty;
- curve.bounds_grow(k, &mesh->curve_keys[0], curve_radius, bounds);
- const size_t num_keys = mesh->curve_keys.size();
- const size_t num_steps = mesh->motion_steps;
+ curve.bounds_grow(k, &hair->curve_keys[0], curve_radius, bounds);
+ const size_t num_keys = hair->curve_keys.size();
+ const size_t num_steps = hair->motion_steps;
const float3 *key_steps = curve_attr_mP->data_float3();
for (size_t step = 0; step < num_steps - 1; step++) {
curve.bounds_grow(k, key_steps + step * num_keys, curve_radius, bounds);
}
if (bounds.valid()) {
- int packed_type = PRIMITIVE_PACK_SEGMENT(PRIMITIVE_MOTION_CURVE, k);
+ int packed_type = PRIMITIVE_PACK_SEGMENT(primitive_type, k);
references.push_back(BVHReference(bounds, j, i, packed_type));
root.grow(bounds);
center.grow(bounds.center2());
@@ -244,10 +210,10 @@ void BVHBuild::add_reference_curves(BoundBox &root, BoundBox &center, Mesh *mesh
*/
const int num_bvh_steps = params.num_motion_curve_steps * 2 + 1;
const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
- const size_t num_steps = mesh->motion_steps;
- const float3 *curve_keys = &mesh->curve_keys[0];
+ const size_t num_steps = hair->motion_steps;
+ const float3 *curve_keys = &hair->curve_keys[0];
const float3 *key_steps = curve_attr_mP->data_float3();
- const size_t num_keys = mesh->curve_keys.size();
+ const size_t num_keys = hair->curve_keys.size();
/* Calculate bounding box of the previous time step.
* Will be reused later to avoid duplicated work on
* calculating BVH time step boundbox.
@@ -287,7 +253,7 @@ void BVHBuild::add_reference_curves(BoundBox &root, BoundBox &center, Mesh *mesh
bounds.grow(curr_bounds);
if (bounds.valid()) {
const float prev_time = (float)(bvh_step - 1) * num_bvh_steps_inv_1;
- int packed_type = PRIMITIVE_PACK_SEGMENT(PRIMITIVE_MOTION_CURVE, k);
+ int packed_type = PRIMITIVE_PACK_SEGMENT(primitive_type, k);
references.push_back(BVHReference(bounds, j, i, packed_type, prev_time, curr_time));
root.grow(bounds);
center.grow(bounds.center2());
@@ -302,13 +268,15 @@ void BVHBuild::add_reference_curves(BoundBox &root, BoundBox &center, Mesh *mesh
}
}
-void BVHBuild::add_reference_mesh(BoundBox &root, BoundBox &center, Mesh *mesh, int i)
+void BVHBuild::add_reference_geometry(BoundBox &root, BoundBox &center, Geometry *geom, int i)
{
- if (params.primitive_mask & PRIMITIVE_ALL_TRIANGLE) {
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
add_reference_triangles(root, center, mesh, i);
}
- if (params.primitive_mask & PRIMITIVE_ALL_CURVE) {
- add_reference_curves(root, center, mesh, i);
+ else if (geom->type == Geometry::HAIR) {
+ Hair *hair = static_cast<Hair *>(geom);
+ add_reference_curves(root, center, hair, i);
}
}
@@ -319,16 +287,30 @@ void BVHBuild::add_reference_object(BoundBox &root, BoundBox &center, Object *ob
center.grow(ob->bounds.center2());
}
-static size_t count_curve_segments(Mesh *mesh)
+static size_t count_curve_segments(Hair *hair)
{
- size_t num = 0, num_curves = mesh->num_curves();
+ size_t num = 0, num_curves = hair->num_curves();
for (size_t i = 0; i < num_curves; i++)
- num += mesh->get_curve(i).num_keys - 1;
+ num += hair->get_curve(i).num_keys - 1;
return num;
}
+static size_t count_primitives(Geometry *geom)
+{
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ return mesh->num_triangles();
+ }
+ else if (geom->type == Geometry::HAIR) {
+ Hair *hair = static_cast<Hair *>(geom);
+ return count_curve_segments(hair);
+ }
+
+ return 0;
+}
+
void BVHBuild::add_references(BVHRange &root)
{
/* reserve space for references */
@@ -339,24 +321,14 @@ void BVHBuild::add_references(BVHRange &root)
if (!ob->is_traceable()) {
continue;
}
- if (!ob->mesh->is_instanced()) {
- if (params.primitive_mask & PRIMITIVE_ALL_TRIANGLE) {
- num_alloc_references += ob->mesh->num_triangles();
- }
- if (params.primitive_mask & PRIMITIVE_ALL_CURVE) {
- num_alloc_references += count_curve_segments(ob->mesh);
- }
+ if (!ob->geometry->is_instanced()) {
+ num_alloc_references += count_primitives(ob->geometry);
}
else
num_alloc_references++;
}
else {
- if (params.primitive_mask & PRIMITIVE_ALL_TRIANGLE) {
- num_alloc_references += ob->mesh->num_triangles();
- }
- if (params.primitive_mask & PRIMITIVE_ALL_CURVE) {
- num_alloc_references += count_curve_segments(ob->mesh);
- }
+ num_alloc_references += count_primitives(ob->geometry);
}
}
@@ -372,13 +344,13 @@ void BVHBuild::add_references(BVHRange &root)
++i;
continue;
}
- if (!ob->mesh->is_instanced())
- add_reference_mesh(bounds, center, ob->mesh, i);
+ if (!ob->geometry->is_instanced())
+ add_reference_geometry(bounds, center, ob->geometry, i);
else
add_reference_object(bounds, center, ob, i);
}
else
- add_reference_mesh(bounds, center, ob->mesh, i);
+ add_reference_geometry(bounds, center, ob->geometry, i);
i++;
@@ -416,22 +388,6 @@ BVHNode *BVHBuild::run()
}
spatial_min_overlap = root.bounds().safe_area() * params.spatial_split_alpha;
- if (params.use_spatial_split) {
- /* NOTE: The API here tries to be as much ready for multi-threaded build
- * as possible, but at the same time it tries not to introduce any
- * changes in behavior for until all refactoring needed for threading is
- * finished.
- *
- * So we currently allocate single storage for now, which is only used by
- * the only thread working on the spatial BVH build.
- */
- spatial_storage.resize(TaskScheduler::num_threads() + 1);
- size_t num_bins = max(root.size(), (int)BVHParams::NUM_SPATIAL_BINS) - 1;
- foreach (BVHSpatialStorage &storage, spatial_storage) {
- storage.right_bounds.clear();
- }
- spatial_storage[0].right_bounds.resize(num_bins);
- }
spatial_free_index = 0;
need_prim_time = params.num_motion_curve_steps > 0 || params.num_motion_triangle_steps > 0;
@@ -458,7 +414,8 @@ BVHNode *BVHBuild::run()
if (params.use_spatial_split) {
/* Perform multithreaded spatial split build. */
- rootnode = build_node(root, &references, 0, 0);
+ BVHSpatialStorage *local_storage = &spatial_storage.local();
+ rootnode = build_node(root, references, 0, local_storage);
task_pool.wait_work();
}
else {
@@ -468,6 +425,9 @@ BVHNode *BVHBuild::run()
task_pool.wait_work();
}
+ /* clean up temporary memory usage by threads */
+ spatial_storage.clear();
+
/* delete if we canceled */
if (rootnode) {
if (progress.get_cancel()) {
@@ -522,41 +482,46 @@ void BVHBuild::progress_update()
progress_start_time = time_dt();
}
-void BVHBuild::thread_build_node(InnerNode *inner, int child, BVHObjectBinning *range, int level)
+void BVHBuild::thread_build_node(InnerNode *inner,
+ int child,
+ const BVHObjectBinning &range,
+ int level)
{
if (progress.get_cancel())
return;
/* build nodes */
- BVHNode *node = build_node(*range, level);
+ BVHNode *node = build_node(range, level);
/* set child in inner node */
inner->children[child] = node;
/* update progress */
- if (range->size() < THREAD_TASK_SIZE) {
+ if (range.size() < THREAD_TASK_SIZE) {
/*rotate(node, INT_MAX, 5);*/
thread_scoped_lock lock(build_mutex);
- progress_count += range->size();
+ progress_count += range.size();
progress_update();
}
}
void BVHBuild::thread_build_spatial_split_node(InnerNode *inner,
int child,
- BVHRange *range,
- vector<BVHReference> *references,
- int level,
- int thread_id)
+ const BVHRange &range,
+ vector<BVHReference> &references,
+ int level)
{
if (progress.get_cancel()) {
return;
}
+ /* Get per-thread memory for spatial split. */
+ BVHSpatialStorage *local_storage = &spatial_storage.local();
+
/* build nodes */
- BVHNode *node = build_node(*range, references, level, thread_id);
+ BVHNode *node = build_node(range, references, level, local_storage);
/* set child in inner node */
inner->children[child] = node;
@@ -579,14 +544,22 @@ bool BVHBuild::range_within_max_leaf_size(const BVHRange &range,
for (int i = 0; i < size; i++) {
const BVHReference &ref = references[range.start() + i];
- if (ref.prim_type() & PRIMITIVE_CURVE)
- num_curves++;
- if (ref.prim_type() & PRIMITIVE_MOTION_CURVE)
- num_motion_curves++;
- else if (ref.prim_type() & PRIMITIVE_TRIANGLE)
- num_triangles++;
- else if (ref.prim_type() & PRIMITIVE_MOTION_TRIANGLE)
- num_motion_triangles++;
+ if (ref.prim_type() & PRIMITIVE_ALL_CURVE) {
+ if (ref.prim_type() & PRIMITIVE_ALL_MOTION) {
+ num_motion_curves++;
+ }
+ else {
+ num_curves++;
+ }
+ }
+ else if (ref.prim_type() & PRIMITIVE_ALL_TRIANGLE) {
+ if (ref.prim_type() & PRIMITIVE_ALL_MOTION) {
+ num_motion_triangles++;
+ }
+ else {
+ num_triangles++;
+ }
+ }
}
return (num_triangles <= params.max_triangle_leaf_size) &&
@@ -668,8 +641,8 @@ BVHNode *BVHBuild::build_node(const BVHObjectBinning &range, int level)
/* Threaded build */
inner = new InnerNode(bounds);
- task_pool.push(new BVHBuildTask(this, inner, 0, left, level + 1), true);
- task_pool.push(new BVHBuildTask(this, inner, 1, right, level + 1), true);
+ task_pool.push([=] { thread_build_node(inner, 0, left, level + 1); });
+ task_pool.push([=] { thread_build_node(inner, 1, right, level + 1); });
}
if (do_unalinged_split) {
@@ -681,9 +654,9 @@ BVHNode *BVHBuild::build_node(const BVHObjectBinning &range, int level)
/* multithreaded spatial split builder */
BVHNode *BVHBuild::build_node(const BVHRange &range,
- vector<BVHReference> *references,
+ vector<BVHReference> &references,
int level,
- int thread_id)
+ BVHSpatialStorage *storage)
{
/* Update progress.
*
@@ -700,18 +673,17 @@ BVHNode *BVHBuild::build_node(const BVHRange &range,
if (!(range.size() > 0 && params.top_level && level == 0)) {
if (params.small_enough_for_leaf(range.size(), level)) {
progress_count += range.size();
- return create_leaf_node(range, *references);
+ return create_leaf_node(range, references);
}
}
/* Perform splitting test. */
- BVHSpatialStorage *storage = &spatial_storage[thread_id];
BVHMixedSplit split(this, storage, range, references, level);
if (!(range.size() > 0 && params.top_level && level == 0)) {
if (split.no_split) {
progress_count += range.size();
- return create_leaf_node(range, *references);
+ return create_leaf_node(range, references);
}
}
float leafSAH = params.sah_primitive_cost * split.leafSAH;
@@ -724,7 +696,7 @@ BVHNode *BVHBuild::build_node(const BVHRange &range,
Transform aligned_space;
bool do_unalinged_split = false;
if (params.use_unaligned_nodes && splitSAH > params.unaligned_split_threshold * leafSAH) {
- aligned_space = unaligned_heuristic.compute_aligned_space(range, &references->at(0));
+ aligned_space = unaligned_heuristic.compute_aligned_space(range, &references.at(0));
unaligned_split = BVHMixedSplit(
this, storage, range, references, level, &unaligned_heuristic, &aligned_space);
/* unalignedLeafSAH = params.sah_primitive_cost * split.leafSAH; */
@@ -750,8 +722,7 @@ BVHNode *BVHBuild::build_node(const BVHRange &range,
BoundBox bounds;
if (do_unalinged_split) {
- bounds = unaligned_heuristic.compute_aligned_boundbox(
- range, &references->at(0), aligned_space);
+ bounds = unaligned_heuristic.compute_aligned_boundbox(range, &references.at(0), aligned_space);
}
else {
bounds = range.bounds();
@@ -763,24 +734,35 @@ BVHNode *BVHBuild::build_node(const BVHRange &range,
/* Local build. */
/* Build left node. */
- vector<BVHReference> copy(references->begin() + right.start(),
- references->begin() + right.end());
+ vector<BVHReference> right_references(references.begin() + right.start(),
+ references.begin() + right.end());
right.set_start(0);
- BVHNode *leftnode = build_node(left, references, level + 1, thread_id);
+ BVHNode *leftnode = build_node(left, references, level + 1, storage);
/* Build right node. */
- BVHNode *rightnode = build_node(right, &copy, level + 1, thread_id);
+ BVHNode *rightnode = build_node(right, right_references, level + 1, storage);
inner = new InnerNode(bounds, leftnode, rightnode);
}
else {
/* Threaded build. */
inner = new InnerNode(bounds);
- task_pool.push(new BVHSpatialSplitBuildTask(this, inner, 0, left, *references, level + 1),
- true);
- task_pool.push(new BVHSpatialSplitBuildTask(this, inner, 1, right, *references, level + 1),
- true);
+
+ vector<BVHReference> left_references(references.begin() + left.start(),
+ references.begin() + left.end());
+ vector<BVHReference> right_references(references.begin() + right.start(),
+ references.begin() + right.end());
+ right.set_start(0);
+
+ /* Create tasks for left and right nodes, using copy for most arguments and
+ * move for reference to avoid memory copies. */
+ task_pool.push([=, refs = std::move(left_references)]() mutable {
+ thread_build_spatial_split_node(inner, 0, left, refs, level + 1);
+ });
+ task_pool.push([=, refs = std::move(right_references)]() mutable {
+ thread_build_spatial_split_node(inner, 1, right, refs, level + 1);
+ });
}
if (do_unalinged_split) {
@@ -878,9 +860,6 @@ BVHNode *BVHBuild::create_leaf_node(const BVHRange &range, const vector<BVHRefer
bounds[type_index].grow(ref.bounds());
visibility[type_index] |= objects[ref.prim_object()]->visibility_for_tracing();
- if (ref.prim_type() & PRIMITIVE_ALL_CURVE) {
- visibility[type_index] |= PATH_RAY_CURVE;
- }
++num_new_prims;
}
else {
diff --git a/intern/cycles/bvh/bvh_build.h b/intern/cycles/bvh/bvh_build.h
index 9685e26cfac..c35af083fbd 100644
--- a/intern/cycles/bvh/bvh_build.h
+++ b/intern/cycles/bvh/bvh_build.h
@@ -35,6 +35,8 @@ class BVHNode;
class BVHSpatialSplitBuildTask;
class BVHParams;
class InnerNode;
+class Geometry;
+class Hair;
class Mesh;
class Object;
class Progress;
@@ -65,16 +67,16 @@ class BVHBuild {
/* Adding references. */
void add_reference_triangles(BoundBox &root, BoundBox &center, Mesh *mesh, int i);
- void add_reference_curves(BoundBox &root, BoundBox &center, Mesh *mesh, int i);
- void add_reference_mesh(BoundBox &root, BoundBox &center, Mesh *mesh, int i);
+ void add_reference_curves(BoundBox &root, BoundBox &center, Hair *hair, int i);
+ void add_reference_geometry(BoundBox &root, BoundBox &center, Geometry *geom, int i);
void add_reference_object(BoundBox &root, BoundBox &center, Object *ob, int i);
void add_references(BVHRange &root);
/* Building. */
BVHNode *build_node(const BVHRange &range,
- vector<BVHReference> *references,
+ vector<BVHReference> &references,
int level,
- int thread_id);
+ BVHSpatialStorage *storage);
BVHNode *build_node(const BVHObjectBinning &range, int level);
BVHNode *create_leaf_node(const BVHRange &range, const vector<BVHReference> &references);
BVHNode *create_object_leaf_nodes(const BVHReference *ref, int start, int num);
@@ -84,13 +86,12 @@ class BVHBuild {
/* Threads. */
enum { THREAD_TASK_SIZE = 4096 };
- void thread_build_node(InnerNode *node, int child, BVHObjectBinning *range, int level);
+ void thread_build_node(InnerNode *node, int child, const BVHObjectBinning &range, int level);
void thread_build_spatial_split_node(InnerNode *node,
int child,
- BVHRange *range,
- vector<BVHReference> *references,
- int level,
- int thread_id);
+ const BVHRange &range,
+ vector<BVHReference> &references,
+ int level);
thread_mutex build_mutex;
/* Progress. */
@@ -125,7 +126,7 @@ class BVHBuild {
/* Spatial splitting. */
float spatial_min_overlap;
- vector<BVHSpatialStorage> spatial_storage;
+ enumerable_thread_specific<BVHSpatialStorage> spatial_storage;
size_t spatial_free_index;
thread_spin_lock spatial_spin_lock;
diff --git a/intern/cycles/bvh/bvh_embree.cpp b/intern/cycles/bvh/bvh_embree.cpp
index 5ef9622aba2..17e1f86a589 100644
--- a/intern/cycles/bvh/bvh_embree.cpp
+++ b/intern/cycles/bvh/bvh_embree.cpp
@@ -14,47 +14,52 @@
* limitations under the License.
*/
-/* This class implemens a ray accelerator for Cycles using Intel's Embree library.
+/* This class implements a ray accelerator for Cycles using Intel's Embree library.
* It supports triangles, curves, object and deformation blur and instancing.
- * Not supported are thick line segments, those have no native equivalent in Embree.
- * They could be implemented using Embree's thick curves, at the expense of wasted memory.
- * User defined intersections for Embree could also be an option, but since Embree only uses aligned BVHs
- * for user geometry, this would come with reduced performance and/or higher memory usage.
*
- * Since Embree allows object to be either curves or triangles but not both, Cycles object IDs are maapped
- * to Embree IDs by multiplying by two and adding one for curves.
+ * Since Embree allows object to be either curves or triangles but not both, Cycles object IDs are
+ * mapped to Embree IDs by multiplying by two and adding one for curves.
*
- * This implementation shares RTCDevices between Cycles instances. Eventually each instance should get
- * a separate RTCDevice to correctly keep track of memory usage.
+ * This implementation shares RTCDevices between Cycles instances. Eventually each instance should
+ * get a separate RTCDevice to correctly keep track of memory usage.
*
- * Vertex and index buffers are duplicated between Cycles device arrays and Embree. These could be merged,
- * which would requrie changes to intersection refinement, shader setup, mesh light sampling and a few
- * other places in Cycles where direct access to vertex data is required.
+ * Vertex and index buffers are duplicated between Cycles device arrays and Embree. These could be
+ * merged, which would require changes to intersection refinement, shader setup, mesh light
+ * sampling and a few other places in Cycles where direct access to vertex data is required.
*/
#ifdef WITH_EMBREE
+# include <embree3/rtcore_geometry.h>
# include <pmmintrin.h>
# include <xmmintrin.h>
-# include <embree3/rtcore_geometry.h>
# include "bvh/bvh_embree.h"
-/* Kernel includes are necessary so that the filter function for Embree can access the packed BVH. */
+/* Kernel includes are necessary so that the filter function for Embree can access the packed BVH.
+ */
# include "kernel/bvh/bvh_embree.h"
# include "kernel/kernel_compat_cpu.h"
-# include "kernel/split/kernel_split_data_types.h"
# include "kernel/kernel_globals.h"
# include "kernel/kernel_random.h"
+# include "kernel/split/kernel_split_data_types.h"
+# include "render/hair.h"
# include "render/mesh.h"
# include "render/object.h"
+
# include "util/util_foreach.h"
# include "util/util_logging.h"
# include "util/util_progress.h"
+# include "util/util_stats.h"
CCL_NAMESPACE_BEGIN
+static_assert(Object::MAX_MOTION_STEPS <= RTC_MAX_TIME_STEP_COUNT,
+ "Object and Embree max motion steps inconsistent");
+static_assert(Object::MAX_MOTION_STEPS == Geometry::MAX_MOTION_STEPS,
+ "Object and Geometry max motion steps inconsistent");
+
# define IS_HAIR(x) (x & 1)
/* This gets called by Embree at every valid ray/object intersection.
@@ -62,30 +67,9 @@ CCL_NAMESPACE_BEGIN
* as well as filtering for volume objects happen here.
* Cycles' own BVH does that directly inside the traversal calls.
*/
-static void rtc_filter_func(const RTCFilterFunctionNArguments *args)
-{
- /* Current implementation in Cycles assumes only single-ray intersection queries. */
- assert(args->N == 1);
-
- const RTCRay *ray = (RTCRay *)args->ray;
- const RTCHit *hit = (RTCHit *)args->hit;
- CCLIntersectContext *ctx = ((IntersectContext *)args->context)->userRayExt;
- KernelGlobals *kg = ctx->kg;
-
- /* Check if there is backfacing hair to ignore. */
- if (IS_HAIR(hit->geomID) && (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) &&
- !(kernel_data.curve.curveflags & CURVE_KN_BACKFACING) &&
- !(kernel_data.curve.curveflags & CURVE_KN_RIBBONS)) {
- if (dot(make_float3(ray->dir_x, ray->dir_y, ray->dir_z),
- make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z)) > 0.0f) {
- *args->valid = 0;
- return;
- }
- }
-}
-
static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args)
{
+ /* Current implementation in Cycles assumes only single-ray intersection queries. */
assert(args->N == 1);
const RTCRay *ray = (RTCRay *)args->ray;
@@ -93,17 +77,6 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args)
CCLIntersectContext *ctx = ((IntersectContext *)args->context)->userRayExt;
KernelGlobals *kg = ctx->kg;
- /* For all ray types: Check if there is backfacing hair to ignore */
- if (IS_HAIR(hit->geomID) && (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) &&
- !(kernel_data.curve.curveflags & CURVE_KN_BACKFACING) &&
- !(kernel_data.curve.curveflags & CURVE_KN_RIBBONS)) {
- if (dot(make_float3(ray->dir_x, ray->dir_y, ray->dir_z),
- make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z)) > 0.0f) {
- *args->valid = 0;
- return;
- }
- }
-
switch (ctx->type) {
case CCLIntersectContext::RAY_SHADOW_ALL: {
/* Append the intersection to the end of the array. */
@@ -144,52 +117,68 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args)
}
break;
}
+ case CCLIntersectContext::RAY_LOCAL:
case CCLIntersectContext::RAY_SSS: {
+ /* Check if it's hitting the correct object. */
+ Intersection current_isect;
+ if (ctx->type == CCLIntersectContext::RAY_SSS) {
+ kernel_embree_convert_sss_hit(kg, ray, hit, &current_isect, ctx->local_object_id);
+ }
+ else {
+ kernel_embree_convert_hit(kg, ray, hit, &current_isect);
+ if (ctx->local_object_id != current_isect.object) {
+ /* This tells Embree to continue tracing. */
+ *args->valid = 0;
+ }
+ }
+
/* No intersection information requested, just return a hit. */
if (ctx->max_hits == 0) {
break;
}
/* Ignore curves. */
- if (hit->geomID & 1) {
+ if (IS_HAIR(hit->geomID)) {
/* This tells Embree to continue tracing. */
*args->valid = 0;
break;
}
/* See triangle_intersect_subsurface() for the native equivalent. */
- for (int i = min(ctx->max_hits, ctx->ss_isect->num_hits) - 1; i >= 0; --i) {
- if (ctx->ss_isect->hits[i].t == ray->tfar) {
+ for (int i = min(ctx->max_hits, ctx->local_isect->num_hits) - 1; i >= 0; --i) {
+ if (ctx->local_isect->hits[i].t == ray->tfar) {
/* This tells Embree to continue tracing. */
*args->valid = 0;
break;
}
}
- ++ctx->ss_isect->num_hits;
- int hit_idx;
+ int hit_idx = 0;
- if (ctx->ss_isect->num_hits <= ctx->max_hits) {
- hit_idx = ctx->ss_isect->num_hits - 1;
- }
- else {
- /* reservoir sampling: if we are at the maximum number of
- * hits, randomly replace element or skip it */
- hit_idx = lcg_step_uint(ctx->lcg_state) % ctx->ss_isect->num_hits;
+ if (ctx->lcg_state) {
- if (hit_idx >= ctx->max_hits) {
- /* This tells Embree to continue tracing. */
- *args->valid = 0;
- break;
+ ++ctx->local_isect->num_hits;
+ if (ctx->local_isect->num_hits <= ctx->max_hits) {
+ hit_idx = ctx->local_isect->num_hits - 1;
+ }
+ else {
+ /* reservoir sampling: if we are at the maximum number of
+ * hits, randomly replace element or skip it */
+ hit_idx = lcg_step_uint(ctx->lcg_state) % ctx->local_isect->num_hits;
+
+ if (hit_idx >= ctx->max_hits) {
+ /* This tells Embree to continue tracing. */
+ *args->valid = 0;
+ break;
+ }
}
}
+ else {
+ ctx->local_isect->num_hits = 1;
+ }
/* record intersection */
- kernel_embree_convert_local_hit(
- kg, ray, hit, &ctx->ss_isect->hits[hit_idx], ctx->sss_object_id);
- ctx->ss_isect->Ng[hit_idx].x = hit->Ng_x;
- ctx->ss_isect->Ng[hit_idx].y = hit->Ng_y;
- ctx->ss_isect->Ng[hit_idx].z = hit->Ng_z;
- ctx->ss_isect->Ng[hit_idx] = normalize(ctx->ss_isect->Ng[hit_idx]);
+ ctx->local_isect->hits[hit_idx] = current_isect;
+ ctx->local_isect->Ng[hit_idx] = normalize(make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z));
/* This tells Embree to continue tracing .*/
*args->valid = 0;
break;
@@ -230,6 +219,34 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args)
}
}
+static void rtc_filter_func_thick_curve(const RTCFilterFunctionNArguments *args)
+{
+ const RTCRay *ray = (RTCRay *)args->ray;
+ RTCHit *hit = (RTCHit *)args->hit;
+
+ /* Always ignore backfacing intersections. */
+ if (dot(make_float3(ray->dir_x, ray->dir_y, ray->dir_z),
+ make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z)) > 0.0f) {
+ *args->valid = 0;
+ return;
+ }
+}
+
+static void rtc_filter_occluded_func_thick_curve(const RTCFilterFunctionNArguments *args)
+{
+ const RTCRay *ray = (RTCRay *)args->ray;
+ RTCHit *hit = (RTCHit *)args->hit;
+
+ /* Always ignore backfacing intersections. */
+ if (dot(make_float3(ray->dir_x, ray->dir_y, ray->dir_z),
+ make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z)) > 0.0f) {
+ *args->valid = 0;
+ return;
+ }
+
+ rtc_filter_occluded_func(args);
+}
+
static size_t unaccounted_mem = 0;
static bool rtc_memory_monitor_func(void *userPtr, const ssize_t bytes, const bool)
@@ -283,16 +300,30 @@ RTCDevice BVHEmbree::rtc_shared_device = NULL;
int BVHEmbree::rtc_shared_users = 0;
thread_mutex BVHEmbree::rtc_shared_mutex;
-BVHEmbree::BVHEmbree(const BVHParams &params_, const vector<Object *> &objects_)
- : BVH(params_, objects_),
+static size_t count_primitives(Geometry *geom)
+{
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ return mesh->num_triangles();
+ }
+ else if (geom->type == Geometry::HAIR) {
+ Hair *hair = static_cast<Hair *>(geom);
+ return hair->num_segments();
+ }
+
+ return 0;
+}
+
+BVHEmbree::BVHEmbree(const BVHParams &params_,
+ const vector<Geometry *> &geometry_,
+ const vector<Object *> &objects_)
+ : BVH(params_, geometry_, objects_),
scene(NULL),
mem_used(0),
top_level(NULL),
stats(NULL),
curve_subdivisions(params.curve_subdivisions),
build_quality(RTC_BUILD_QUALITY_REFIT),
- use_curves(params_.curve_flags & CURVE_KN_INTERPOLATE),
- use_ribbons(params.curve_flags & CURVE_KN_RIBBONS),
dynamic_scene(true)
{
_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
@@ -305,7 +336,7 @@ BVHEmbree::BVHEmbree(const BVHParams &params_, const vector<Object *> &objects_)
if (ret != 1) {
assert(0);
VLOG(1) << "Embree is compiled without the RTC_DEVICE_PROPERTY_RAY_MASK_SUPPORTED flag."
- "Ray visiblity will not work.";
+ "Ray visibility will not work.";
}
ret = rtcGetDeviceProperty(rtc_shared_device, RTC_DEVICE_PROPERTY_FILTER_FUNCTION_SUPPORTED);
if (ret != 1) {
@@ -416,29 +447,15 @@ void BVHEmbree::build(Progress &progress, Stats *stats_)
if (!ob->is_traceable()) {
continue;
}
- if (!ob->mesh->is_instanced()) {
- if (params.primitive_mask & PRIMITIVE_ALL_TRIANGLE) {
- prim_count += ob->mesh->num_triangles();
- }
- if (params.primitive_mask & PRIMITIVE_ALL_CURVE) {
- for (size_t j = 0; j < ob->mesh->num_curves(); ++j) {
- prim_count += ob->mesh->get_curve(j).num_segments();
- }
- }
+ if (!ob->geometry->is_instanced()) {
+ prim_count += count_primitives(ob->geometry);
}
else {
++prim_count;
}
}
else {
- if (params.primitive_mask & PRIMITIVE_ALL_TRIANGLE && ob->mesh->num_triangles() > 0) {
- prim_count += ob->mesh->num_triangles();
- }
- if (params.primitive_mask & PRIMITIVE_ALL_CURVE) {
- for (size_t j = 0; j < ob->mesh->num_curves(); ++j) {
- prim_count += ob->mesh->get_curve(j).num_segments();
- }
- }
+ prim_count += count_primitives(ob->geometry);
}
}
@@ -457,7 +474,7 @@ void BVHEmbree::build(Progress &progress, Stats *stats_)
++i;
continue;
}
- if (!ob->mesh->is_instanced()) {
+ if (!ob->geometry->is_instanced()) {
add_object(ob, i);
}
else {
@@ -495,6 +512,11 @@ void BVHEmbree::build(Progress &progress, Stats *stats_)
stats = NULL;
}
+void BVHEmbree::copy_to_device(Progress & /*progress*/, DeviceScene *dscene)
+{
+ dscene->data.bvh.scene = scene;
+}
+
BVHNode *BVHEmbree::widen_children_nodes(const BVHNode * /*root*/)
{
assert(!"Must not be called.");
@@ -503,36 +525,57 @@ BVHNode *BVHEmbree::widen_children_nodes(const BVHNode * /*root*/)
void BVHEmbree::add_object(Object *ob, int i)
{
- Mesh *mesh = ob->mesh;
- if (params.primitive_mask & PRIMITIVE_ALL_TRIANGLE && mesh->num_triangles() > 0) {
- add_triangles(ob, i);
+ Geometry *geom = ob->geometry;
+
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ if (mesh->num_triangles() > 0) {
+ add_triangles(ob, mesh, i);
+ }
}
- if (params.primitive_mask & PRIMITIVE_ALL_CURVE && mesh->num_curves() > 0) {
- add_curves(ob, i);
+ else if (geom->type == Geometry::HAIR) {
+ Hair *hair = static_cast<Hair *>(geom);
+ if (hair->num_curves() > 0) {
+ add_curves(ob, hair, i);
+ }
}
}
void BVHEmbree::add_instance(Object *ob, int i)
{
- if (!ob || !ob->mesh) {
+ if (!ob || !ob->geometry) {
assert(0);
return;
}
- BVHEmbree *instance_bvh = (BVHEmbree *)(ob->mesh->bvh);
+ BVHEmbree *instance_bvh = (BVHEmbree *)(ob->geometry->bvh);
if (instance_bvh->top_level != this) {
instance_bvh->top_level = this;
}
- const size_t num_motion_steps = ob->use_motion() ? ob->motion.size() : 1;
+ const size_t num_object_motion_steps = ob->use_motion() ? ob->motion.size() : 1;
+ const size_t num_motion_steps = min(num_object_motion_steps, RTC_MAX_TIME_STEP_COUNT);
+ assert(num_object_motion_steps <= RTC_MAX_TIME_STEP_COUNT);
+
RTCGeometry geom_id = rtcNewGeometry(rtc_shared_device, RTC_GEOMETRY_TYPE_INSTANCE);
rtcSetGeometryInstancedScene(geom_id, instance_bvh->scene);
rtcSetGeometryTimeStepCount(geom_id, num_motion_steps);
if (ob->use_motion()) {
+ array<DecomposedTransform> decomp(ob->motion.size());
+ transform_motion_decompose(decomp.data(), ob->motion.data(), ob->motion.size());
for (size_t step = 0; step < num_motion_steps; ++step) {
- rtcSetGeometryTransform(
- geom_id, step, RTC_FORMAT_FLOAT3X4_ROW_MAJOR, (const float *)&ob->motion[step]);
+ RTCQuaternionDecomposition rtc_decomp;
+ rtcInitQuaternionDecomposition(&rtc_decomp);
+ rtcQuaternionDecompositionSetQuaternion(
+ &rtc_decomp, decomp[step].x.w, decomp[step].x.x, decomp[step].x.y, decomp[step].x.z);
+ rtcQuaternionDecompositionSetScale(
+ &rtc_decomp, decomp[step].y.w, decomp[step].z.w, decomp[step].w.w);
+ rtcQuaternionDecompositionSetTranslation(
+ &rtc_decomp, decomp[step].y.x, decomp[step].y.y, decomp[step].y.z);
+ rtcQuaternionDecompositionSetSkew(
+ &rtc_decomp, decomp[step].z.x, decomp[step].z.y, decomp[step].w.x);
+ rtcSetGeometryTransformQuaternion(geom_id, step, &rtc_decomp);
}
}
else {
@@ -545,30 +588,28 @@ void BVHEmbree::add_instance(Object *ob, int i)
pack.prim_tri_index.push_back_slow(-1);
rtcSetGeometryUserData(geom_id, (void *)instance_bvh->scene);
- rtcSetGeometryMask(geom_id, ob->visibility);
+ rtcSetGeometryMask(geom_id, ob->visibility_for_tracing());
rtcCommitGeometry(geom_id);
rtcAttachGeometryByID(scene, geom_id, i * 2);
rtcReleaseGeometry(geom_id);
}
-void BVHEmbree::add_triangles(Object *ob, int i)
+void BVHEmbree::add_triangles(const Object *ob, const Mesh *mesh, int i)
{
size_t prim_offset = pack.prim_index.size();
- Mesh *mesh = ob->mesh;
const Attribute *attr_mP = NULL;
- size_t num_motion_steps = 1;
+ size_t num_geometry_motion_steps = 1;
if (mesh->has_motion_blur()) {
attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
if (attr_mP) {
- num_motion_steps = mesh->motion_steps;
- if (num_motion_steps > RTC_MAX_TIME_STEP_COUNT) {
- assert(0);
- num_motion_steps = RTC_MAX_TIME_STEP_COUNT;
- }
+ num_geometry_motion_steps = mesh->motion_steps;
}
}
+ const size_t num_motion_steps = min(num_geometry_motion_steps, RTC_MAX_TIME_STEP_COUNT);
+ assert(num_geometry_motion_steps <= RTC_MAX_TIME_STEP_COUNT);
+
const size_t num_triangles = mesh->num_triangles();
RTCGeometry geom_id = rtcNewGeometry(rtc_shared_device, RTC_GEOMETRY_TYPE_TRIANGLE);
rtcSetGeometryBuildQuality(geom_id, build_quality);
@@ -608,9 +649,8 @@ void BVHEmbree::add_triangles(Object *ob, int i)
}
rtcSetGeometryUserData(geom_id, (void *)prim_offset);
- rtcSetGeometryIntersectFilterFunction(geom_id, rtc_filter_func);
rtcSetGeometryOccludedFilterFunction(geom_id, rtc_filter_occluded_func);
- rtcSetGeometryMask(geom_id, ob->visibility);
+ rtcSetGeometryMask(geom_id, ob->visibility_for_tracing());
rtcCommitGeometry(geom_id);
rtcAttachGeometryByID(scene, geom_id, i * 2);
@@ -659,31 +699,35 @@ void BVHEmbree::update_tri_vertex_buffer(RTCGeometry geom_id, const Mesh *mesh)
}
}
-void BVHEmbree::update_curve_vertex_buffer(RTCGeometry geom_id, const Mesh *mesh)
+void BVHEmbree::update_curve_vertex_buffer(RTCGeometry geom_id, const Hair *hair)
{
const Attribute *attr_mP = NULL;
size_t num_motion_steps = 1;
- if (mesh->has_motion_blur()) {
- attr_mP = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+ if (hair->has_motion_blur()) {
+ attr_mP = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
if (attr_mP) {
- num_motion_steps = mesh->motion_steps;
+ num_motion_steps = hair->motion_steps;
}
}
- const size_t num_curves = mesh->num_curves();
+ const size_t num_curves = hair->num_curves();
size_t num_keys = 0;
for (size_t j = 0; j < num_curves; ++j) {
- const Mesh::Curve c = mesh->get_curve(j);
+ const Hair::Curve c = hair->get_curve(j);
num_keys += c.num_keys;
}
+ /* Catmull-Rom splines need extra CVs at the beginning and end of each curve. */
+ size_t num_keys_embree = num_keys;
+ num_keys_embree += num_curves * 2;
+
/* Copy the CV data to Embree */
const int t_mid = (num_motion_steps - 1) / 2;
- const float *curve_radius = &mesh->curve_radius[0];
+ const float *curve_radius = &hair->curve_radius[0];
for (int t = 0; t < num_motion_steps; ++t) {
const float3 *verts;
if (t == t_mid || attr_mP == NULL) {
- verts = &mesh->curve_keys[0];
+ verts = &hair->curve_keys[0];
}
else {
int t_ = (t > t_mid) ? (t - 1) : t;
@@ -691,67 +735,53 @@ void BVHEmbree::update_curve_vertex_buffer(RTCGeometry geom_id, const Mesh *mesh
}
float4 *rtc_verts = (float4 *)rtcSetNewGeometryBuffer(
- geom_id, RTC_BUFFER_TYPE_VERTEX, t, RTC_FORMAT_FLOAT4, sizeof(float) * 4, num_keys);
- float4 *rtc_tangents = NULL;
- if (use_curves) {
- rtc_tangents = (float4 *)rtcSetNewGeometryBuffer(
- geom_id, RTC_BUFFER_TYPE_TANGENT, t, RTC_FORMAT_FLOAT4, sizeof(float) * 4, num_keys);
- assert(rtc_tangents);
- }
+ geom_id, RTC_BUFFER_TYPE_VERTEX, t, RTC_FORMAT_FLOAT4, sizeof(float) * 4, num_keys_embree);
+
assert(rtc_verts);
if (rtc_verts) {
- if (use_curves && rtc_tangents) {
- const size_t num_curves = mesh->num_curves();
- for (size_t j = 0; j < num_curves; ++j) {
- Mesh::Curve c = mesh->get_curve(j);
- int fk = c.first_key;
- rtc_verts[0] = float3_to_float4(verts[fk]);
- rtc_verts[0].w = curve_radius[fk];
- rtc_tangents[0] = float3_to_float4(verts[fk + 1] - verts[fk]);
- rtc_tangents[0].w = curve_radius[fk + 1] - curve_radius[fk];
- ++fk;
- int k = 1;
- for (; k < c.num_segments(); ++k, ++fk) {
- rtc_verts[k] = float3_to_float4(verts[fk]);
- rtc_verts[k].w = curve_radius[fk];
- rtc_tangents[k] = float3_to_float4((verts[fk + 1] - verts[fk - 1]) * 0.5f);
- rtc_tangents[k].w = (curve_radius[fk + 1] - curve_radius[fk - 1]) * 0.5f;
- }
+ const size_t num_curves = hair->num_curves();
+ for (size_t j = 0; j < num_curves; ++j) {
+ Hair::Curve c = hair->get_curve(j);
+ int fk = c.first_key;
+ int k = 1;
+ for (; k < c.num_keys + 1; ++k, ++fk) {
rtc_verts[k] = float3_to_float4(verts[fk]);
rtc_verts[k].w = curve_radius[fk];
- rtc_tangents[k] = float3_to_float4(verts[fk] - verts[fk - 1]);
- rtc_tangents[k].w = curve_radius[fk] - curve_radius[fk - 1];
- rtc_verts += c.num_keys;
- rtc_tangents += c.num_keys;
- }
- }
- else {
- for (size_t j = 0; j < num_keys; ++j) {
- rtc_verts[j] = float3_to_float4(verts[j]);
- rtc_verts[j].w = curve_radius[j];
}
+ /* Duplicate Embree's Catmull-Rom spline CVs at the start and end of each curve. */
+ rtc_verts[0] = rtc_verts[1];
+ rtc_verts[k] = rtc_verts[k - 1];
+ rtc_verts += c.num_keys + 2;
}
}
}
}
-void BVHEmbree::add_curves(Object *ob, int i)
+void BVHEmbree::add_curves(const Object *ob, const Hair *hair, int i)
{
size_t prim_offset = pack.prim_index.size();
- const Mesh *mesh = ob->mesh;
const Attribute *attr_mP = NULL;
- size_t num_motion_steps = 1;
- if (mesh->has_motion_blur()) {
- attr_mP = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+ size_t num_geometry_motion_steps = 1;
+ if (hair->has_motion_blur()) {
+ attr_mP = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
if (attr_mP) {
- num_motion_steps = mesh->motion_steps;
+ num_geometry_motion_steps = hair->motion_steps;
}
}
- const size_t num_curves = mesh->num_curves();
+ const size_t num_motion_steps = min(num_geometry_motion_steps, RTC_MAX_TIME_STEP_COUNT);
+ const PrimitiveType primitive_type =
+ (num_motion_steps > 1) ?
+ ((hair->curve_shape == CURVE_RIBBON) ? PRIMITIVE_MOTION_CURVE_RIBBON :
+ PRIMITIVE_MOTION_CURVE_THICK) :
+ ((hair->curve_shape == CURVE_RIBBON) ? PRIMITIVE_CURVE_RIBBON : PRIMITIVE_CURVE_THICK);
+
+ assert(num_geometry_motion_steps <= RTC_MAX_TIME_STEP_COUNT);
+
+ const size_t num_curves = hair->num_curves();
size_t num_segments = 0;
for (size_t j = 0; j < num_curves; ++j) {
- Mesh::Curve c = mesh->get_curve(j);
+ Hair::Curve c = hair->get_curve(j);
assert(c.num_segments() > 0);
num_segments += c.num_segments();
}
@@ -766,24 +796,24 @@ void BVHEmbree::add_curves(Object *ob, int i)
size_t prim_tri_index_size = pack.prim_index.size();
pack.prim_tri_index.resize(prim_tri_index_size + num_segments);
- enum RTCGeometryType type = (!use_curves) ?
- RTC_GEOMETRY_TYPE_FLAT_LINEAR_CURVE :
- (use_ribbons ? RTC_GEOMETRY_TYPE_FLAT_HERMITE_CURVE :
- RTC_GEOMETRY_TYPE_ROUND_HERMITE_CURVE);
+ enum RTCGeometryType type = (hair->curve_shape == CURVE_RIBBON ?
+ RTC_GEOMETRY_TYPE_FLAT_CATMULL_ROM_CURVE :
+ RTC_GEOMETRY_TYPE_ROUND_CATMULL_ROM_CURVE);
RTCGeometry geom_id = rtcNewGeometry(rtc_shared_device, type);
- rtcSetGeometryTessellationRate(geom_id, curve_subdivisions);
+ rtcSetGeometryTessellationRate(geom_id, curve_subdivisions + 1);
unsigned *rtc_indices = (unsigned *)rtcSetNewGeometryBuffer(
geom_id, RTC_BUFFER_TYPE_INDEX, 0, RTC_FORMAT_UINT, sizeof(int), num_segments);
size_t rtc_index = 0;
for (size_t j = 0; j < num_curves; ++j) {
- Mesh::Curve c = mesh->get_curve(j);
+ Hair::Curve c = hair->get_curve(j);
for (size_t k = 0; k < c.num_segments(); ++k) {
rtc_indices[rtc_index] = c.first_key + k;
+ /* Room for extra CVs at Catmull-Rom splines. */
+ rtc_indices[rtc_index] += j * 2;
/* Cycles specific data. */
pack.prim_object[prim_object_size + rtc_index] = i;
- pack.prim_type[prim_type_size + rtc_index] = (PRIMITIVE_PACK_SEGMENT(
- num_motion_steps > 1 ? PRIMITIVE_MOTION_CURVE : PRIMITIVE_CURVE, k));
+ pack.prim_type[prim_type_size + rtc_index] = (PRIMITIVE_PACK_SEGMENT(primitive_type, k));
pack.prim_index[prim_index_size + rtc_index] = j;
pack.prim_tri_index[prim_tri_index_size + rtc_index] = rtc_index;
@@ -794,12 +824,17 @@ void BVHEmbree::add_curves(Object *ob, int i)
rtcSetGeometryBuildQuality(geom_id, build_quality);
rtcSetGeometryTimeStepCount(geom_id, num_motion_steps);
- update_curve_vertex_buffer(geom_id, mesh);
+ update_curve_vertex_buffer(geom_id, hair);
rtcSetGeometryUserData(geom_id, (void *)prim_offset);
- rtcSetGeometryIntersectFilterFunction(geom_id, rtc_filter_func);
- rtcSetGeometryOccludedFilterFunction(geom_id, rtc_filter_occluded_func);
- rtcSetGeometryMask(geom_id, ob->visibility);
+ if (hair->curve_shape == CURVE_RIBBON) {
+ rtcSetGeometryOccludedFilterFunction(geom_id, rtc_filter_occluded_func);
+ }
+ else {
+ rtcSetGeometryIntersectFilterFunction(geom_id, rtc_filter_func_thick_curve);
+ rtcSetGeometryOccludedFilterFunction(geom_id, rtc_filter_occluded_func_thick_curve);
+ }
+ rtcSetGeometryMask(geom_id, ob->visibility_for_tracing());
rtcCommitGeometry(geom_id);
rtcAttachGeometryByID(scene, geom_id, i * 2 + 1);
@@ -815,10 +850,7 @@ void BVHEmbree::pack_nodes(const BVHNode *)
for (size_t i = 0; i < pack.prim_index.size(); ++i) {
if (pack.prim_index[i] != -1) {
- if (pack.prim_type[i] & PRIMITIVE_ALL_CURVE)
- pack.prim_index[i] += objects[pack.prim_object[i]]->mesh->curve_offset;
- else
- pack.prim_index[i] += objects[pack.prim_object[i]]->mesh->tri_offset;
+ pack.prim_index[i] += objects[pack.prim_object[i]]->geometry->prim_offset;
}
}
@@ -832,22 +864,22 @@ void BVHEmbree::pack_nodes(const BVHNode *)
size_t pack_prim_tri_verts_offset = prim_tri_verts_size;
size_t object_offset = 0;
- map<Mesh *, int> mesh_map;
+ map<Geometry *, int> geometry_map;
foreach (Object *ob, objects) {
- Mesh *mesh = ob->mesh;
- BVH *bvh = mesh->bvh;
+ Geometry *geom = ob->geometry;
+ BVH *bvh = geom->bvh;
- if (mesh->need_build_bvh()) {
- if (mesh_map.find(mesh) == mesh_map.end()) {
+ if (geom->need_build_bvh(BVH_LAYOUT_EMBREE)) {
+ if (geometry_map.find(geom) == geometry_map.end()) {
prim_index_size += bvh->pack.prim_index.size();
prim_tri_verts_size += bvh->pack.prim_tri_verts.size();
- mesh_map[mesh] = 1;
+ geometry_map[geom] = 1;
}
}
}
- mesh_map.clear();
+ geometry_map.clear();
pack.prim_index.resize(prim_index_size);
pack.prim_type.resize(prim_index_size);
@@ -865,38 +897,37 @@ void BVHEmbree::pack_nodes(const BVHNode *)
/* merge */
foreach (Object *ob, objects) {
- Mesh *mesh = ob->mesh;
+ Geometry *geom = ob->geometry;
/* We assume that if mesh doesn't need own BVH it was already included
* into a top-level BVH and no packing here is needed.
*/
- if (!mesh->need_build_bvh()) {
+ if (!geom->need_build_bvh(BVH_LAYOUT_EMBREE)) {
pack.object_node[object_offset++] = prim_offset;
continue;
}
- /* if mesh already added once, don't add it again, but used set
+ /* if geom already added once, don't add it again, but used set
* node offset for this object */
- map<Mesh *, int>::iterator it = mesh_map.find(mesh);
+ map<Geometry *, int>::iterator it = geometry_map.find(geom);
- if (mesh_map.find(mesh) != mesh_map.end()) {
+ if (geometry_map.find(geom) != geometry_map.end()) {
int noffset = it->second;
pack.object_node[object_offset++] = noffset;
continue;
}
- BVHEmbree *bvh = (BVHEmbree *)mesh->bvh;
+ BVHEmbree *bvh = (BVHEmbree *)geom->bvh;
rtc_memory_monitor_func(stats, unaccounted_mem, true);
unaccounted_mem = 0;
- int mesh_tri_offset = mesh->tri_offset;
- int mesh_curve_offset = mesh->curve_offset;
+ int geom_prim_offset = geom->prim_offset;
/* fill in node indexes for instances */
pack.object_node[object_offset++] = prim_offset;
- mesh_map[mesh] = pack.object_node[object_offset - 1];
+ geometry_map[geom] = pack.object_node[object_offset - 1];
/* merge primitive, object and triangle indexes */
if (bvh->pack.prim_index.size()) {
@@ -907,11 +938,11 @@ void BVHEmbree::pack_nodes(const BVHNode *)
for (size_t i = 0; i < bvh_prim_index_size; ++i) {
if (bvh->pack.prim_type[i] & PRIMITIVE_ALL_CURVE) {
- pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + mesh_curve_offset;
+ pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + geom_prim_offset;
pack_prim_tri_index[pack_prim_index_offset] = -1;
}
else {
- pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + mesh_tri_offset;
+ pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + geom_prim_offset;
pack_prim_tri_index[pack_prim_index_offset] = bvh_prim_tri_index[i] +
pack_prim_tri_verts_offset;
}
@@ -941,15 +972,22 @@ void BVHEmbree::refit_nodes()
/* Update all vertex buffers, then tell Embree to rebuild/-fit the BVHs. */
unsigned geom_id = 0;
foreach (Object *ob, objects) {
- if (!params.top_level || (ob->is_traceable() && !ob->mesh->is_instanced())) {
- if (params.primitive_mask & PRIMITIVE_ALL_TRIANGLE && ob->mesh->num_triangles() > 0) {
- update_tri_vertex_buffer(rtcGetGeometry(scene, geom_id), ob->mesh);
- rtcCommitGeometry(rtcGetGeometry(scene, geom_id));
+ if (!params.top_level || (ob->is_traceable() && !ob->geometry->is_instanced())) {
+ Geometry *geom = ob->geometry;
+
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ if (mesh->num_triangles() > 0) {
+ update_tri_vertex_buffer(rtcGetGeometry(scene, geom_id), mesh);
+ rtcCommitGeometry(rtcGetGeometry(scene, geom_id));
+ }
}
-
- if (params.primitive_mask & PRIMITIVE_ALL_CURVE && ob->mesh->num_curves() > 0) {
- update_curve_vertex_buffer(rtcGetGeometry(scene, geom_id + 1), ob->mesh);
- rtcCommitGeometry(rtcGetGeometry(scene, geom_id + 1));
+ else if (geom->type == Geometry::HAIR) {
+ Hair *hair = static_cast<Hair *>(geom);
+ if (hair->num_curves() > 0) {
+ update_curve_vertex_buffer(rtcGetGeometry(scene, geom_id + 1), hair);
+ rtcCommitGeometry(rtcGetGeometry(scene, geom_id + 1));
+ }
}
}
geom_id += 2;
diff --git a/intern/cycles/bvh/bvh_embree.h b/intern/cycles/bvh/bvh_embree.h
index 60702713583..f60a1ca0102 100644
--- a/intern/cycles/bvh/bvh_embree.h
+++ b/intern/cycles/bvh/bvh_embree.h
@@ -31,11 +31,14 @@
CCL_NAMESPACE_BEGIN
+class Geometry;
+class Hair;
class Mesh;
class BVHEmbree : public BVH {
public:
virtual void build(Progress &progress, Stats *stats) override;
+ virtual void copy_to_device(Progress &progress, DeviceScene *dscene) override;
virtual ~BVHEmbree();
RTCScene scene;
static void destroy(RTCScene);
@@ -45,15 +48,17 @@ class BVHEmbree : public BVH {
protected:
friend class BVH;
- BVHEmbree(const BVHParams &params, const vector<Object *> &objects);
+ BVHEmbree(const BVHParams &params,
+ const vector<Geometry *> &geometry,
+ const vector<Object *> &objects);
virtual void pack_nodes(const BVHNode *) override;
virtual void refit_nodes() override;
void add_object(Object *ob, int i);
void add_instance(Object *ob, int i);
- void add_curves(Object *ob, int i);
- void add_triangles(Object *ob, int i);
+ void add_curves(const Object *ob, const Hair *hair, int i);
+ void add_triangles(const Object *ob, const Mesh *mesh, int i);
ssize_t mem_used;
@@ -66,7 +71,7 @@ class BVHEmbree : public BVH {
private:
void delete_rtcScene();
void update_tri_vertex_buffer(RTCGeometry geom_id, const Mesh *mesh);
- void update_curve_vertex_buffer(RTCGeometry geom_id, const Mesh *mesh);
+ void update_curve_vertex_buffer(RTCGeometry geom_id, const Hair *hair);
static RTCDevice rtc_shared_device;
static int rtc_shared_users;
@@ -76,7 +81,7 @@ class BVHEmbree : public BVH {
vector<RTCScene> delayed_delete_scenes;
int curve_subdivisions;
enum RTCBuildQuality build_quality;
- bool use_curves, use_ribbons, dynamic_scene;
+ bool dynamic_scene;
};
CCL_NAMESPACE_END
diff --git a/intern/cycles/bvh/bvh_optix.cpp b/intern/cycles/bvh/bvh_optix.cpp
new file mode 100644
index 00000000000..ccb7ae08625
--- /dev/null
+++ b/intern/cycles/bvh/bvh_optix.cpp
@@ -0,0 +1,230 @@
+/*
+ * Copyright 2019, NVIDIA Corporation.
+ * Copyright 2019, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef WITH_OPTIX
+
+# include "bvh/bvh_optix.h"
+
+# include "device/device.h"
+
+# include "render/geometry.h"
+# include "render/hair.h"
+# include "render/mesh.h"
+# include "render/object.h"
+
+# include "util/util_foreach.h"
+# include "util/util_logging.h"
+# include "util/util_progress.h"
+
+CCL_NAMESPACE_BEGIN
+
+BVHOptiX::BVHOptiX(const BVHParams &params_,
+ const vector<Geometry *> &geometry_,
+ const vector<Object *> &objects_)
+ : BVH(params_, geometry_, objects_)
+{
+}
+
+BVHOptiX::~BVHOptiX()
+{
+}
+
+void BVHOptiX::build(Progress &, Stats *)
+{
+ if (params.top_level)
+ pack_tlas();
+ else
+ pack_blas();
+}
+
+void BVHOptiX::copy_to_device(Progress &progress, DeviceScene *dscene)
+{
+ progress.set_status("Updating Scene BVH", "Building OptiX acceleration structure");
+
+ Device *const device = dscene->bvh_nodes.device;
+ if (!device->build_optix_bvh(this))
+ progress.set_error("Failed to build OptiX acceleration structure");
+}
+
+void BVHOptiX::pack_blas()
+{
+ // Bottom-level BVH can contain multiple primitive types, so merge them:
+ assert(geometry.size() == 1 && objects.size() == 1); // These are built per-mesh
+ Geometry *const geom = geometry[0];
+
+ if (geom->type == Geometry::HAIR) {
+ Hair *const hair = static_cast<Hair *const>(geom);
+ if (hair->num_curves() > 0) {
+ const size_t num_curves = hair->num_curves();
+ const size_t num_segments = hair->num_segments();
+ pack.prim_type.reserve(pack.prim_type.size() + num_segments);
+ pack.prim_index.reserve(pack.prim_index.size() + num_segments);
+ pack.prim_object.reserve(pack.prim_object.size() + num_segments);
+ // 'pack.prim_time' is only used in geom_curve_intersect.h
+ // It is not needed because of OPTIX_MOTION_FLAG_[START|END]_VANISH
+
+ uint type = (hair->use_motion_blur &&
+ hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION)) ?
+ ((hair->curve_shape == CURVE_RIBBON) ? PRIMITIVE_MOTION_CURVE_RIBBON :
+ PRIMITIVE_MOTION_CURVE_THICK) :
+ ((hair->curve_shape == CURVE_RIBBON) ? PRIMITIVE_CURVE_RIBBON :
+ PRIMITIVE_CURVE_THICK);
+
+ for (size_t j = 0; j < num_curves; ++j) {
+ const Hair::Curve curve = hair->get_curve(j);
+ for (size_t k = 0; k < curve.num_segments(); ++k) {
+ pack.prim_type.push_back_reserved(PRIMITIVE_PACK_SEGMENT(type, k));
+ // Each curve segment points back to its curve index
+ pack.prim_index.push_back_reserved(j);
+ pack.prim_object.push_back_reserved(0);
+ }
+ }
+ }
+ }
+ else if (geom->type == Geometry::MESH) {
+ Mesh *const mesh = static_cast<Mesh *const>(geom);
+ if (mesh->num_triangles() > 0) {
+ const size_t num_triangles = mesh->num_triangles();
+ pack.prim_type.reserve(pack.prim_type.size() + num_triangles);
+ pack.prim_index.reserve(pack.prim_index.size() + num_triangles);
+ pack.prim_object.reserve(pack.prim_object.size() + num_triangles);
+
+ uint type = PRIMITIVE_TRIANGLE;
+ if (mesh->use_motion_blur && mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION))
+ type = PRIMITIVE_MOTION_TRIANGLE;
+
+ for (size_t k = 0; k < num_triangles; ++k) {
+ pack.prim_type.push_back_reserved(type);
+ pack.prim_index.push_back_reserved(k);
+ pack.prim_object.push_back_reserved(0);
+ }
+ }
+ }
+
+ // Initialize visibility to zero and later update it during top-level build
+ uint prev_visibility = objects[0]->visibility;
+ objects[0]->visibility = 0;
+
+ // Update 'pack.prim_tri_index', 'pack.prim_tri_verts' and 'pack.prim_visibility'
+ pack_primitives();
+
+ // Reset visibility after packing
+ objects[0]->visibility = prev_visibility;
+}
+
+void BVHOptiX::pack_tlas()
+{
+ // Calculate total packed size
+ size_t prim_index_size = 0;
+ size_t prim_tri_verts_size = 0;
+ foreach (Geometry *geom, geometry) {
+ BVH *const bvh = geom->bvh;
+ prim_index_size += bvh->pack.prim_index.size();
+ prim_tri_verts_size += bvh->pack.prim_tri_verts.size();
+ }
+
+ if (prim_index_size == 0)
+ return; // Abort right away if this is an empty BVH
+
+ size_t pack_offset = 0;
+ size_t pack_verts_offset = 0;
+
+ pack.prim_type.resize(prim_index_size);
+ int *pack_prim_type = pack.prim_type.data();
+ pack.prim_index.resize(prim_index_size);
+ int *pack_prim_index = pack.prim_index.data();
+ pack.prim_object.resize(prim_index_size);
+ int *pack_prim_object = pack.prim_object.data();
+ pack.prim_visibility.resize(prim_index_size);
+ uint *pack_prim_visibility = pack.prim_visibility.data();
+ pack.prim_tri_index.resize(prim_index_size);
+ uint *pack_prim_tri_index = pack.prim_tri_index.data();
+ pack.prim_tri_verts.resize(prim_tri_verts_size);
+ float4 *pack_prim_tri_verts = pack.prim_tri_verts.data();
+
+ // Top-level BVH should only contain instances, see 'Geometry::need_build_bvh'
+ // Iterate over scene mesh list instead of objects, since the 'prim_offset' is calculated based
+ // on that list, which may be ordered differently from the object list.
+ foreach (Geometry *geom, geometry) {
+ PackedBVH &bvh_pack = geom->bvh->pack;
+ int geom_prim_offset = geom->prim_offset;
+
+ // Merge visibility flags of all objects and fix object indices for non-instanced geometry
+ int object_index = 0; // Unused for instanced geometry
+ int object_visibility = 0;
+ foreach (Object *ob, objects) {
+ if (ob->geometry == geom) {
+ object_visibility |= ob->visibility_for_tracing();
+ if (!geom->is_instanced()) {
+ object_index = ob->get_device_index();
+ break;
+ }
+ }
+ }
+
+ // Merge primitive, object and triangle indexes
+ if (!bvh_pack.prim_index.empty()) {
+ int *bvh_prim_type = &bvh_pack.prim_type[0];
+ int *bvh_prim_index = &bvh_pack.prim_index[0];
+ uint *bvh_prim_tri_index = &bvh_pack.prim_tri_index[0];
+ uint *bvh_prim_visibility = &bvh_pack.prim_visibility[0];
+
+ for (size_t i = 0; i < bvh_pack.prim_index.size(); i++, pack_offset++) {
+ if (bvh_pack.prim_type[i] & PRIMITIVE_ALL_CURVE) {
+ pack_prim_index[pack_offset] = bvh_prim_index[i] + geom_prim_offset;
+ pack_prim_tri_index[pack_offset] = -1;
+ }
+ else {
+ pack_prim_index[pack_offset] = bvh_prim_index[i] + geom_prim_offset;
+ pack_prim_tri_index[pack_offset] = bvh_prim_tri_index[i] + pack_verts_offset;
+ }
+
+ pack_prim_type[pack_offset] = bvh_prim_type[i];
+ pack_prim_object[pack_offset] = object_index;
+ pack_prim_visibility[pack_offset] = bvh_prim_visibility[i] | object_visibility;
+ }
+ }
+
+ // Merge triangle vertex data
+ if (!bvh_pack.prim_tri_verts.empty()) {
+ const size_t prim_tri_size = bvh_pack.prim_tri_verts.size();
+ memcpy(pack_prim_tri_verts + pack_verts_offset,
+ bvh_pack.prim_tri_verts.data(),
+ prim_tri_size * sizeof(float4));
+ pack_verts_offset += prim_tri_size;
+ }
+ }
+}
+
+void BVHOptiX::pack_nodes(const BVHNode *)
+{
+}
+
+void BVHOptiX::refit_nodes()
+{
+ // TODO(pmours): Implement?
+ VLOG(1) << "Refit is not yet implemented for OptiX BVH.";
+}
+
+BVHNode *BVHOptiX::widen_children_nodes(const BVHNode *)
+{
+ return NULL;
+}
+
+CCL_NAMESPACE_END
+
+#endif /* WITH_OPTIX */
diff --git a/intern/cycles/bvh/bvh_optix.h b/intern/cycles/bvh/bvh_optix.h
new file mode 100644
index 00000000000..e4745b093b5
--- /dev/null
+++ b/intern/cycles/bvh/bvh_optix.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2019, NVIDIA Corporation.
+ * Copyright 2019, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BVH_OPTIX_H__
+#define __BVH_OPTIX_H__
+
+#ifdef WITH_OPTIX
+
+# include "bvh/bvh.h"
+# include "bvh/bvh_params.h"
+# include "device/device_memory.h"
+
+CCL_NAMESPACE_BEGIN
+
+class Geometry;
+class Optix;
+
+class BVHOptiX : public BVH {
+ friend class BVH;
+
+ public:
+ BVHOptiX(const BVHParams &params,
+ const vector<Geometry *> &geometry,
+ const vector<Object *> &objects);
+ virtual ~BVHOptiX();
+
+ virtual void build(Progress &progress, Stats *) override;
+ virtual void copy_to_device(Progress &progress, DeviceScene *dscene) override;
+
+ private:
+ void pack_blas();
+ void pack_tlas();
+
+ virtual void pack_nodes(const BVHNode *) override;
+ virtual void refit_nodes() override;
+
+ virtual BVHNode *widen_children_nodes(const BVHNode *) override;
+};
+
+CCL_NAMESPACE_END
+
+#endif /* WITH_OPTIX */
+
+#endif /* __BVH_OPTIX_H__ */
diff --git a/intern/cycles/bvh/bvh_params.h b/intern/cycles/bvh/bvh_params.h
index 2731662a39d..1a50742dc33 100644
--- a/intern/cycles/bvh/bvh_params.h
+++ b/intern/cycles/bvh/bvh_params.h
@@ -69,9 +69,6 @@ class BVHParams {
/* BVH layout to be built. */
BVHLayout bvh_layout;
- /* Mask of primitives to be included into the BVH. */
- int primitive_mask;
-
/* Use unaligned bounding boxes.
* Only used for curves BVH.
*/
@@ -92,7 +89,6 @@ class BVHParams {
int bvh_type;
/* These are needed for Embree. */
- int curve_flags;
int curve_subdivisions;
/* fixed parameters */
@@ -120,14 +116,11 @@ class BVHParams {
bvh_layout = BVH_LAYOUT_BVH2;
use_unaligned_nodes = false;
- primitive_mask = PRIMITIVE_ALL;
-
num_motion_curve_steps = 0;
num_motion_triangle_steps = 0;
bvh_type = 0;
- curve_flags = 0;
curve_subdivisions = 4;
}
diff --git a/intern/cycles/bvh/bvh_sort.cpp b/intern/cycles/bvh/bvh_sort.cpp
index 4498a759c08..b01785b547a 100644
--- a/intern/cycles/bvh/bvh_sort.cpp
+++ b/intern/cycles/bvh/bvh_sort.cpp
@@ -88,18 +88,6 @@ static void bvh_reference_sort_threaded(TaskPool *task_pool,
const int job_end,
const BVHReferenceCompare &compare);
-class BVHSortTask : public Task {
- public:
- BVHSortTask(TaskPool *task_pool,
- BVHReference *data,
- const int job_start,
- const int job_end,
- const BVHReferenceCompare &compare)
- {
- run = function_bind(bvh_reference_sort_threaded, task_pool, data, job_start, job_end, compare);
- }
-};
-
/* Multi-threaded reference sort. */
static void bvh_reference_sort_threaded(TaskPool *task_pool,
BVHReference *data,
@@ -158,7 +146,8 @@ static void bvh_reference_sort_threaded(TaskPool *task_pool,
have_work = false;
if (left < end) {
if (start < right) {
- task_pool->push(new BVHSortTask(task_pool, data, left, end, compare), true);
+ task_pool->push(
+ function_bind(bvh_reference_sort_threaded, task_pool, data, left, end, compare));
}
else {
start = left;
diff --git a/intern/cycles/bvh/bvh_split.cpp b/intern/cycles/bvh/bvh_split.cpp
index bd261c10d55..4b21f852d7a 100644
--- a/intern/cycles/bvh/bvh_split.cpp
+++ b/intern/cycles/bvh/bvh_split.cpp
@@ -20,6 +20,7 @@
#include "bvh/bvh_build.h"
#include "bvh/bvh_sort.h"
+#include "render/hair.h"
#include "render/mesh.h"
#include "render/object.h"
@@ -32,7 +33,7 @@ CCL_NAMESPACE_BEGIN
BVHObjectSplit::BVHObjectSplit(BVHBuild *builder,
BVHSpatialStorage *storage,
const BVHRange &range,
- vector<BVHReference> *references,
+ vector<BVHReference> &references,
float nodeSAH,
const BVHUnaligned *unaligned_heuristic,
const Transform *aligned_space)
@@ -42,7 +43,7 @@ BVHObjectSplit::BVHObjectSplit(BVHBuild *builder,
left_bounds(BoundBox::empty),
right_bounds(BoundBox::empty),
storage_(storage),
- references_(references),
+ references_(&references),
unaligned_heuristic_(unaligned_heuristic),
aligned_space_(aligned_space)
{
@@ -132,7 +133,7 @@ void BVHObjectSplit::split(BVHRange &left, BVHRange &right, const BVHRange &rang
BVHSpatialSplit::BVHSpatialSplit(const BVHBuild &builder,
BVHSpatialStorage *storage,
const BVHRange &range,
- vector<BVHReference> *references,
+ vector<BVHReference> &references,
float nodeSAH,
const BVHUnaligned *unaligned_heuristic,
const Transform *aligned_space)
@@ -140,7 +141,7 @@ BVHSpatialSplit::BVHSpatialSplit(const BVHBuild &builder,
dim(0),
pos(0.0f),
storage_(storage),
- references_(references),
+ references_(&references),
unaligned_heuristic_(unaligned_heuristic),
aligned_space_(aligned_space)
{
@@ -151,7 +152,7 @@ BVHSpatialSplit::BVHSpatialSplit(const BVHBuild &builder,
}
else {
range_bounds = unaligned_heuristic->compute_aligned_boundbox(
- range, &references->at(0), *aligned_space);
+ range, &references_->at(0), *aligned_space);
}
float3 origin = range_bounds.min;
@@ -378,7 +379,7 @@ void BVHSpatialSplit::split_triangle_primitive(const Mesh *mesh,
}
}
-void BVHSpatialSplit::split_curve_primitive(const Mesh *mesh,
+void BVHSpatialSplit::split_curve_primitive(const Hair *hair,
const Transform *tfm,
int prim_index,
int segment_index,
@@ -388,11 +389,11 @@ void BVHSpatialSplit::split_curve_primitive(const Mesh *mesh,
BoundBox &right_bounds)
{
/* curve split: NOTE - Currently ignores curve width and needs to be fixed.*/
- Mesh::Curve curve = mesh->get_curve(prim_index);
+ Hair::Curve curve = hair->get_curve(prim_index);
const int k0 = curve.first_key + segment_index;
const int k1 = k0 + 1;
- float3 v0 = mesh->curve_keys[k0];
- float3 v1 = mesh->curve_keys[k1];
+ float3 v0 = hair->curve_keys[k0];
+ float3 v1 = hair->curve_keys[k1];
if (tfm != NULL) {
v0 = transform_point(tfm, v0);
@@ -436,13 +437,13 @@ void BVHSpatialSplit::split_triangle_reference(const BVHReference &ref,
}
void BVHSpatialSplit::split_curve_reference(const BVHReference &ref,
- const Mesh *mesh,
+ const Hair *hair,
int dim,
float pos,
BoundBox &left_bounds,
BoundBox &right_bounds)
{
- split_curve_primitive(mesh,
+ split_curve_primitive(hair,
NULL,
ref.prim_index(),
PRIMITIVE_UNPACK_SEGMENT(ref.prim_type()),
@@ -455,15 +456,22 @@ void BVHSpatialSplit::split_curve_reference(const BVHReference &ref,
void BVHSpatialSplit::split_object_reference(
const Object *object, int dim, float pos, BoundBox &left_bounds, BoundBox &right_bounds)
{
- Mesh *mesh = object->mesh;
- for (int tri_idx = 0; tri_idx < mesh->num_triangles(); ++tri_idx) {
- split_triangle_primitive(mesh, &object->tfm, tri_idx, dim, pos, left_bounds, right_bounds);
+ Geometry *geom = object->geometry;
+
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ for (int tri_idx = 0; tri_idx < mesh->num_triangles(); ++tri_idx) {
+ split_triangle_primitive(mesh, &object->tfm, tri_idx, dim, pos, left_bounds, right_bounds);
+ }
}
- for (int curve_idx = 0; curve_idx < mesh->num_curves(); ++curve_idx) {
- Mesh::Curve curve = mesh->get_curve(curve_idx);
- for (int segment_idx = 0; segment_idx < curve.num_keys - 1; ++segment_idx) {
- split_curve_primitive(
- mesh, &object->tfm, curve_idx, segment_idx, dim, pos, left_bounds, right_bounds);
+ else if (geom->type == Geometry::HAIR) {
+ Hair *hair = static_cast<Hair *>(geom);
+ for (int curve_idx = 0; curve_idx < hair->num_curves(); ++curve_idx) {
+ Hair::Curve curve = hair->get_curve(curve_idx);
+ for (int segment_idx = 0; segment_idx < curve.num_keys - 1; ++segment_idx) {
+ split_curve_primitive(
+ hair, &object->tfm, curve_idx, segment_idx, dim, pos, left_bounds, right_bounds);
+ }
}
}
}
@@ -481,13 +489,14 @@ void BVHSpatialSplit::split_reference(const BVHBuild &builder,
/* loop over vertices/edges. */
const Object *ob = builder.objects[ref.prim_object()];
- const Mesh *mesh = ob->mesh;
if (ref.prim_type() & PRIMITIVE_ALL_TRIANGLE) {
+ Mesh *mesh = static_cast<Mesh *>(ob->geometry);
split_triangle_reference(ref, mesh, dim, pos, left_bounds, right_bounds);
}
else if (ref.prim_type() & PRIMITIVE_ALL_CURVE) {
- split_curve_reference(ref, mesh, dim, pos, left_bounds, right_bounds);
+ Hair *hair = static_cast<Hair *>(ob->geometry);
+ split_curve_reference(ref, hair, dim, pos, left_bounds, right_bounds);
}
else {
split_object_reference(ob, dim, pos, left_bounds, right_bounds);
diff --git a/intern/cycles/bvh/bvh_split.h b/intern/cycles/bvh/bvh_split.h
index eddd1c27f49..28ff0e05fc3 100644
--- a/intern/cycles/bvh/bvh_split.h
+++ b/intern/cycles/bvh/bvh_split.h
@@ -24,6 +24,8 @@
CCL_NAMESPACE_BEGIN
class BVHBuild;
+class Hair;
+class Mesh;
struct Transform;
/* Object Split */
@@ -42,7 +44,7 @@ class BVHObjectSplit {
BVHObjectSplit(BVHBuild *builder,
BVHSpatialStorage *storage,
const BVHRange &range,
- vector<BVHReference> *references,
+ vector<BVHReference> &references,
float nodeSAH,
const BVHUnaligned *unaligned_heuristic = NULL,
const Transform *aligned_space = NULL);
@@ -80,7 +82,7 @@ class BVHSpatialSplit {
BVHSpatialSplit(const BVHBuild &builder,
BVHSpatialStorage *storage,
const BVHRange &range,
- vector<BVHReference> *references,
+ vector<BVHReference> &references,
float nodeSAH,
const BVHUnaligned *unaligned_heuristic = NULL,
const Transform *aligned_space = NULL);
@@ -113,7 +115,7 @@ class BVHSpatialSplit {
float pos,
BoundBox &left_bounds,
BoundBox &right_bounds);
- void split_curve_primitive(const Mesh *mesh,
+ void split_curve_primitive(const Hair *hair,
const Transform *tfm,
int prim_index,
int segment_index,
@@ -134,7 +136,7 @@ class BVHSpatialSplit {
BoundBox &left_bounds,
BoundBox &right_bounds);
void split_curve_reference(const BVHReference &ref,
- const Mesh *mesh,
+ const Hair *hair,
int dim,
float pos,
BoundBox &left_bounds,
@@ -185,7 +187,7 @@ class BVHMixedSplit {
__forceinline BVHMixedSplit(BVHBuild *builder,
BVHSpatialStorage *storage,
const BVHRange &range,
- vector<BVHReference> *references,
+ vector<BVHReference> &references,
int level,
const BVHUnaligned *unaligned_heuristic = NULL,
const Transform *aligned_space = NULL)
@@ -195,7 +197,7 @@ class BVHMixedSplit {
}
else {
bounds = unaligned_heuristic->compute_aligned_boundbox(
- range, &references->at(0), *aligned_space);
+ range, &references.at(0), *aligned_space);
}
/* find split candidates. */
float area = bounds.safe_area();
@@ -218,7 +220,7 @@ class BVHMixedSplit {
/* leaf SAH is the lowest => create leaf. */
minSAH = min(min(leafSAH, object.sah), spatial.sah);
- no_split = (minSAH == leafSAH && builder->range_within_max_leaf_size(range, *references));
+ no_split = (minSAH == leafSAH && builder->range_within_max_leaf_size(range, references));
}
__forceinline void split(BVHBuild *builder,
diff --git a/intern/cycles/bvh/bvh_unaligned.cpp b/intern/cycles/bvh/bvh_unaligned.cpp
index 1843ca403a5..c969b361643 100644
--- a/intern/cycles/bvh/bvh_unaligned.cpp
+++ b/intern/cycles/bvh/bvh_unaligned.cpp
@@ -16,7 +16,7 @@
#include "bvh/bvh_unaligned.h"
-#include "render/mesh.h"
+#include "render/hair.h"
#include "render/object.h"
#include "bvh/bvh_binning.h"
@@ -68,13 +68,14 @@ bool BVHUnaligned::compute_aligned_space(const BVHReference &ref, Transform *ali
const Object *object = objects_[ref.prim_object()];
const int packed_type = ref.prim_type();
const int type = (packed_type & PRIMITIVE_ALL);
- if (type & PRIMITIVE_CURVE) {
+ /* No motion blur curves here, we can't fit them to aligned boxes well. */
+ if (type & (PRIMITIVE_CURVE_RIBBON | PRIMITIVE_CURVE_THICK)) {
const int curve_index = ref.prim_index();
const int segment = PRIMITIVE_UNPACK_SEGMENT(packed_type);
- const Mesh *mesh = object->mesh;
- const Mesh::Curve &curve = mesh->get_curve(curve_index);
+ const Hair *hair = static_cast<const Hair *>(object->geometry);
+ const Hair::Curve &curve = hair->get_curve(curve_index);
const int key = curve.first_key + segment;
- const float3 v1 = mesh->curve_keys[key], v2 = mesh->curve_keys[key + 1];
+ const float3 v1 = hair->curve_keys[key], v2 = hair->curve_keys[key + 1];
float length;
const float3 axis = normalize_len(v2 - v1, &length);
if (length > 1e-6f) {
@@ -93,13 +94,14 @@ BoundBox BVHUnaligned::compute_aligned_prim_boundbox(const BVHReference &prim,
const Object *object = objects_[prim.prim_object()];
const int packed_type = prim.prim_type();
const int type = (packed_type & PRIMITIVE_ALL);
- if (type & PRIMITIVE_CURVE) {
+ /* No motion blur curves here, we can't fit them to aligned boxes well. */
+ if (type & (PRIMITIVE_CURVE_RIBBON | PRIMITIVE_CURVE_THICK)) {
const int curve_index = prim.prim_index();
const int segment = PRIMITIVE_UNPACK_SEGMENT(packed_type);
- const Mesh *mesh = object->mesh;
- const Mesh::Curve &curve = mesh->get_curve(curve_index);
+ const Hair *hair = static_cast<const Hair *>(object->geometry);
+ const Hair::Curve &curve = hair->get_curve(curve_index);
curve.bounds_grow(
- segment, &mesh->curve_keys[0], &mesh->curve_radius[0], aligned_space, bounds);
+ segment, &hair->curve_keys[0], &hair->curve_radius[0], aligned_space, bounds);
}
else {
bounds = prim.bounds().transformed(&aligned_space);
diff --git a/intern/cycles/cmake/external_libs.cmake b/intern/cycles/cmake/external_libs.cmake
index 5bf681792ca..b09f442bd16 100644
--- a/intern/cycles/cmake/external_libs.cmake
+++ b/intern/cycles/cmake/external_libs.cmake
@@ -133,9 +133,9 @@ if(CYCLES_STANDALONE_REPOSITORY)
set(BOOST_DEFINITIONS "-DBOOST_ALL_NO_LIB")
####
- # embree
+ # Embree
if(WITH_CYCLES_EMBREE)
- find_package(embree 3.2.4 REQUIRED)
+ find_package(Embree 3.8.0 REQUIRED)
endif()
####
diff --git a/intern/cycles/cmake/macros.cmake b/intern/cycles/cmake/macros.cmake
index 0efd8bb7ea8..13328a8b6bf 100644
--- a/intern/cycles/cmake/macros.cmake
+++ b/intern/cycles/cmake/macros.cmake
@@ -8,8 +8,64 @@ endfunction()
macro(cycles_add_library target library_deps)
add_library(${target} ${ARGN})
- if(NOT ("${library_deps}" STREQUAL ""))
- target_link_libraries(${target} "${library_deps}")
+
+ # On Windows certain libraries have two sets of binaries: one for debug builds and one for
+ # release builds. The root of this requirement goes into ABI, I believe, but that's outside
+ # of a scope of this comment.
+ #
+ # CMake have a native way of dealing with this, which is specifying what build type the
+ # libraries are provided for:
+ #
+ # target_link_libraries(tagret optimized|debug|general <libraries>)
+ #
+ # The build type is to be provided as a separate argument to the function.
+ #
+ # CMake's variables for libraries will contain build type in such cases. For example:
+ #
+ # set(FOO_LIBRARIES optimized libfoo.lib debug libfoo_d.lib)
+ #
+ # Complications starts with a single argument for library_deps: all the elements are being
+ # put to a list: "${FOO_LIBRARIES}" will become "optimized;libfoo.lib;debug;libfoo_d.lib".
+ # This makes it impossible to pass it as-is to target_link_libraries sine it will treat
+ # this argument as a list of libraries to be linked against, causing missing libraries
+ # for optimized.lib.
+ #
+ # What this code does it traverses library_deps and extracts information about whether
+ # library is to provided as general, debug or optimized. This is a little state machine which
+ # keeps track of whiuch build type library is to provided for:
+ #
+ # - If "debug" or "optimized" word is found, the next element in the list is expected to be
+ # a library which will be passed to target_link_libraries() under corresponding build type.
+ #
+ # - If there is no "debug" or "optimized" used library is specified for all build types.
+ #
+ # NOTE: If separated libraries for debug and release ar eneeded every library is the list are
+ # to be prefixed explicitly.
+ #
+ # Use: "optimized libfoo optimized libbar debug libfoo_d debug libbar_d"
+ # NOT: "optimized libfoo libbar debug libfoo_d libbar_d"
+ #
+ # TODO(sergey): This is the same as Blender's side CMake. Find a way to avoid duplication
+ # somehow in a way which allows to have Cycles standalone.
+ if(NOT "${library_deps}" STREQUAL "")
+ set(next_library_mode "")
+ foreach(library ${library_deps})
+ string(TOLOWER "${library}" library_lower)
+ if(("${library_lower}" STREQUAL "optimized") OR
+ ("${library_lower}" STREQUAL "debug"))
+ set(next_library_mode "${library_lower}")
+ else()
+ if("${next_library_mode}" STREQUAL "optimized")
+ target_link_libraries(${target} optimized ${library})
+ elseif("${next_library_mode}" STREQUAL "debug")
+ target_link_libraries(${target} debug ${library})
+ else()
+ target_link_libraries(${target} ${library})
+ endif()
+ set(next_library_mode "")
+ endif()
+ endforeach()
endif()
+
cycles_set_solution_folder(${target})
endmacro()
diff --git a/intern/cycles/device/CMakeLists.txt b/intern/cycles/device/CMakeLists.txt
index 75f4a72bee3..ca366722eb7 100644
--- a/intern/cycles/device/CMakeLists.txt
+++ b/intern/cycles/device/CMakeLists.txt
@@ -29,17 +29,22 @@ set(SRC
device_memory.cpp
device_multi.cpp
device_opencl.cpp
+ device_optix.cpp
device_split_kernel.cpp
device_task.cpp
)
+set(SRC_CUDA
+ cuda/device_cuda.h
+ cuda/device_cuda_impl.cpp
+)
+
set(SRC_OPENCL
- opencl/opencl.h
+ opencl/device_opencl.h
+ opencl/device_opencl_impl.cpp
opencl/memory_manager.h
-
- opencl/opencl_split.cpp
- opencl/opencl_util.cpp
opencl/memory_manager.cpp
+ opencl/opencl_util.cpp
)
if(WITH_CYCLES_NETWORK)
@@ -59,7 +64,9 @@ set(SRC_HEADERS
)
set(LIB
-
+ cycles_render
+ cycles_kernel
+ cycles_util
)
if(WITH_CUDA_DYNLOAD)
@@ -77,16 +84,34 @@ if(WITH_CYCLES_NETWORK)
add_definitions(-DWITH_NETWORK)
endif()
if(WITH_CYCLES_DEVICE_OPENCL)
+ list(APPEND LIB
+ extern_clew
+ )
add_definitions(-DWITH_OPENCL)
endif()
if(WITH_CYCLES_DEVICE_CUDA)
add_definitions(-DWITH_CUDA)
endif()
+if(WITH_CYCLES_DEVICE_OPTIX)
+ add_definitions(-DWITH_OPTIX)
+endif()
if(WITH_CYCLES_DEVICE_MULTI)
add_definitions(-DWITH_MULTI)
endif()
+if(WITH_OPENIMAGEDENOISE)
+ add_definitions(-DWITH_OPENIMAGEDENOISE)
+ add_definitions(-DOIDN_STATIC_LIB)
+ list(APPEND INC_SYS
+ ${OPENIMAGEDENOISE_INCLUDE_DIRS}
+ )
+ list(APPEND LIB
+ ${OPENIMAGEDENOISE_LIBRARIES}
+ ${TBB_LIBRARIES}
+ )
+endif()
+
include_directories(${INC})
include_directories(SYSTEM ${INC_SYS})
-cycles_add_library(cycles_device "${LIB}" ${SRC} ${SRC_OPENCL} ${SRC_HEADERS})
+cycles_add_library(cycles_device "${LIB}" ${SRC} ${SRC_CUDA} ${SRC_OPENCL} ${SRC_HEADERS})
diff --git a/intern/cycles/device/cuda/device_cuda.h b/intern/cycles/device/cuda/device_cuda.h
new file mode 100644
index 00000000000..e5e3e24165d
--- /dev/null
+++ b/intern/cycles/device/cuda/device_cuda.h
@@ -0,0 +1,269 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef WITH_CUDA
+
+# include "device/device.h"
+# include "device/device_denoising.h"
+# include "device/device_split_kernel.h"
+
+# include "util/util_map.h"
+# include "util/util_task.h"
+
+# ifdef WITH_CUDA_DYNLOAD
+# include "cuew.h"
+# else
+# include "util/util_opengl.h"
+# include <cuda.h>
+# include <cudaGL.h>
+# endif
+
+CCL_NAMESPACE_BEGIN
+
+class CUDASplitKernel;
+
+class CUDADevice : public Device {
+
+ friend class CUDASplitKernelFunction;
+ friend class CUDASplitKernel;
+ friend class CUDAContextScope;
+
+ public:
+ DedicatedTaskPool task_pool;
+ CUdevice cuDevice;
+ CUcontext cuContext;
+ CUmodule cuModule, cuFilterModule;
+ size_t device_texture_headroom;
+ size_t device_working_headroom;
+ bool move_texture_to_host;
+ size_t map_host_used;
+ size_t map_host_limit;
+ int can_map_host;
+ int pitch_alignment;
+ int cuDevId;
+ int cuDevArchitecture;
+ bool first_error;
+ CUDASplitKernel *split_kernel;
+
+ struct CUDAMem {
+ CUDAMem() : texobject(0), array(0), use_mapped_host(false)
+ {
+ }
+
+ CUtexObject texobject;
+ CUarray array;
+
+ /* If true, a mapped host memory in shared_pointer is being used. */
+ bool use_mapped_host;
+ };
+ typedef map<device_memory *, CUDAMem> CUDAMemMap;
+ CUDAMemMap cuda_mem_map;
+
+ struct PixelMem {
+ GLuint cuPBO;
+ CUgraphicsResource cuPBOresource;
+ GLuint cuTexId;
+ int w, h;
+ };
+ map<device_ptr, PixelMem> pixel_mem_map;
+
+ /* Bindless Textures */
+ device_vector<TextureInfo> texture_info;
+ bool need_texture_info;
+
+ /* Kernels */
+ struct {
+ bool loaded;
+
+ CUfunction adaptive_stopping;
+ CUfunction adaptive_filter_x;
+ CUfunction adaptive_filter_y;
+ CUfunction adaptive_scale_samples;
+ int adaptive_num_threads_per_block;
+ } functions;
+
+ static bool have_precompiled_kernels();
+
+ virtual bool show_samples() const override;
+
+ virtual BVHLayoutMask get_bvh_layout_mask() const override;
+
+ void set_error(const string &error) override;
+
+ CUDADevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background_);
+
+ virtual ~CUDADevice();
+
+ bool support_device(const DeviceRequestedFeatures & /*requested_features*/);
+
+ bool check_peer_access(Device *peer_device) override;
+
+ bool use_adaptive_compilation();
+
+ bool use_split_kernel();
+
+ virtual string compile_kernel_get_common_cflags(
+ const DeviceRequestedFeatures &requested_features, bool filter = false, bool split = false);
+
+ string compile_kernel(const DeviceRequestedFeatures &requested_features,
+ const char *name,
+ const char *base = "cuda",
+ bool force_ptx = false);
+
+ virtual bool load_kernels(const DeviceRequestedFeatures &requested_features) override;
+
+ void load_functions();
+
+ void reserve_local_memory(const DeviceRequestedFeatures &requested_features);
+
+ void init_host_memory();
+
+ void load_texture_info();
+
+ void move_textures_to_host(size_t size, bool for_texture);
+
+ CUDAMem *generic_alloc(device_memory &mem, size_t pitch_padding = 0);
+
+ void generic_copy_to(device_memory &mem);
+
+ void generic_free(device_memory &mem);
+
+ void mem_alloc(device_memory &mem) override;
+
+ void mem_copy_to(device_memory &mem) override;
+
+ void mem_copy_from(device_memory &mem, int y, int w, int h, int elem) override;
+
+ void mem_zero(device_memory &mem) override;
+
+ void mem_free(device_memory &mem) override;
+
+ device_ptr mem_alloc_sub_ptr(device_memory &mem, int offset, int /*size*/) override;
+
+ virtual void const_copy_to(const char *name, void *host, size_t size) override;
+
+ void global_alloc(device_memory &mem);
+
+ void global_free(device_memory &mem);
+
+ void tex_alloc(device_texture &mem);
+
+ void tex_free(device_texture &mem);
+
+ bool denoising_non_local_means(device_ptr image_ptr,
+ device_ptr guide_ptr,
+ device_ptr variance_ptr,
+ device_ptr out_ptr,
+ DenoisingTask *task);
+
+ bool denoising_construct_transform(DenoisingTask *task);
+
+ bool denoising_accumulate(device_ptr color_ptr,
+ device_ptr color_variance_ptr,
+ device_ptr scale_ptr,
+ int frame,
+ DenoisingTask *task);
+
+ bool denoising_solve(device_ptr output_ptr, DenoisingTask *task);
+
+ bool denoising_combine_halves(device_ptr a_ptr,
+ device_ptr b_ptr,
+ device_ptr mean_ptr,
+ device_ptr variance_ptr,
+ int r,
+ int4 rect,
+ DenoisingTask *task);
+
+ bool denoising_divide_shadow(device_ptr a_ptr,
+ device_ptr b_ptr,
+ device_ptr sample_variance_ptr,
+ device_ptr sv_variance_ptr,
+ device_ptr buffer_variance_ptr,
+ DenoisingTask *task);
+
+ bool denoising_get_feature(int mean_offset,
+ int variance_offset,
+ device_ptr mean_ptr,
+ device_ptr variance_ptr,
+ float scale,
+ DenoisingTask *task);
+
+ bool denoising_write_feature(int out_offset,
+ device_ptr from_ptr,
+ device_ptr buffer_ptr,
+ DenoisingTask *task);
+
+ bool denoising_detect_outliers(device_ptr image_ptr,
+ device_ptr variance_ptr,
+ device_ptr depth_ptr,
+ device_ptr output_ptr,
+ DenoisingTask *task);
+
+ void denoise(RenderTile &rtile, DenoisingTask &denoising);
+
+ void adaptive_sampling_filter(uint filter_sample,
+ WorkTile *wtile,
+ CUdeviceptr d_wtile,
+ CUstream stream = 0);
+ void adaptive_sampling_post(RenderTile &rtile,
+ WorkTile *wtile,
+ CUdeviceptr d_wtile,
+ CUstream stream = 0);
+
+ void render(DeviceTask &task, RenderTile &rtile, device_vector<WorkTile> &work_tiles);
+
+ void film_convert(DeviceTask &task,
+ device_ptr buffer,
+ device_ptr rgba_byte,
+ device_ptr rgba_half);
+
+ void shader(DeviceTask &task);
+
+ CUdeviceptr map_pixels(device_ptr mem);
+
+ void unmap_pixels(device_ptr mem);
+
+ void pixels_alloc(device_memory &mem);
+
+ void pixels_copy_from(device_memory &mem, int y, int w, int h);
+
+ void pixels_free(device_memory &mem);
+
+ void draw_pixels(device_memory &mem,
+ int y,
+ int w,
+ int h,
+ int width,
+ int height,
+ int dx,
+ int dy,
+ int dw,
+ int dh,
+ bool transparent,
+ const DeviceDrawParams &draw_params) override;
+
+ void thread_run(DeviceTask &task);
+
+ virtual void task_add(DeviceTask &task) override;
+
+ virtual void task_wait() override;
+
+ virtual void task_cancel() override;
+};
+
+CCL_NAMESPACE_END
+
+#endif
diff --git a/intern/cycles/device/cuda/device_cuda_impl.cpp b/intern/cycles/device/cuda/device_cuda_impl.cpp
new file mode 100644
index 00000000000..3a2eb8df95b
--- /dev/null
+++ b/intern/cycles/device/cuda/device_cuda_impl.cpp
@@ -0,0 +1,2683 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef WITH_CUDA
+
+# include <climits>
+# include <limits.h>
+# include <stdio.h>
+# include <stdlib.h>
+# include <string.h>
+
+# include "device/cuda/device_cuda.h"
+# include "device/device_intern.h"
+# include "device/device_split_kernel.h"
+
+# include "render/buffers.h"
+
+# include "kernel/filter/filter_defines.h"
+
+# include "util/util_debug.h"
+# include "util/util_foreach.h"
+# include "util/util_logging.h"
+# include "util/util_map.h"
+# include "util/util_md5.h"
+# include "util/util_opengl.h"
+# include "util/util_path.h"
+# include "util/util_string.h"
+# include "util/util_system.h"
+# include "util/util_time.h"
+# include "util/util_types.h"
+# include "util/util_windows.h"
+
+# include "kernel/split/kernel_split_data_types.h"
+
+CCL_NAMESPACE_BEGIN
+
+# ifndef WITH_CUDA_DYNLOAD
+
+/* Transparently implement some functions, so majority of the file does not need
+ * to worry about difference between dynamically loaded and linked CUDA at all.
+ */
+
+namespace {
+
+const char *cuewErrorString(CUresult result)
+{
+ /* We can only give error code here without major code duplication, that
+ * should be enough since dynamic loading is only being disabled by folks
+ * who knows what they're doing anyway.
+ *
+ * NOTE: Avoid call from several threads.
+ */
+ static string error;
+ error = string_printf("%d", result);
+ return error.c_str();
+}
+
+const char *cuewCompilerPath()
+{
+ return CYCLES_CUDA_NVCC_EXECUTABLE;
+}
+
+int cuewCompilerVersion()
+{
+ return (CUDA_VERSION / 100) + (CUDA_VERSION % 100 / 10);
+}
+
+} /* namespace */
+# endif /* WITH_CUDA_DYNLOAD */
+
+class CUDADevice;
+
+class CUDASplitKernel : public DeviceSplitKernel {
+ CUDADevice *device;
+
+ public:
+ explicit CUDASplitKernel(CUDADevice *device);
+
+ virtual uint64_t state_buffer_size(device_memory &kg, device_memory &data, size_t num_threads);
+
+ virtual bool enqueue_split_kernel_data_init(const KernelDimensions &dim,
+ RenderTile &rtile,
+ int num_global_elements,
+ device_memory &kernel_globals,
+ device_memory &kernel_data_,
+ device_memory &split_data,
+ device_memory &ray_state,
+ device_memory &queue_index,
+ device_memory &use_queues_flag,
+ device_memory &work_pool_wgs);
+
+ virtual SplitKernelFunction *get_split_kernel_function(const string &kernel_name,
+ const DeviceRequestedFeatures &);
+ virtual int2 split_kernel_local_size();
+ virtual int2 split_kernel_global_size(device_memory &kg, device_memory &data, DeviceTask &task);
+};
+
+/* Utility to push/pop CUDA context. */
+class CUDAContextScope {
+ public:
+ CUDAContextScope(CUDADevice *device);
+ ~CUDAContextScope();
+
+ private:
+ CUDADevice *device;
+};
+
+bool CUDADevice::have_precompiled_kernels()
+{
+ string cubins_path = path_get("lib");
+ return path_exists(cubins_path);
+}
+
+bool CUDADevice::show_samples() const
+{
+ /* The CUDADevice only processes one tile at a time, so showing samples is fine. */
+ return true;
+}
+
+BVHLayoutMask CUDADevice::get_bvh_layout_mask() const
+{
+ return BVH_LAYOUT_BVH2;
+}
+
+void CUDADevice::set_error(const string &error)
+{
+ Device::set_error(error);
+
+ if (first_error) {
+ fprintf(stderr, "\nRefer to the Cycles GPU rendering documentation for possible solutions:\n");
+ fprintf(stderr,
+ "https://docs.blender.org/manual/en/latest/render/cycles/gpu_rendering.html\n\n");
+ first_error = false;
+ }
+}
+
+# define cuda_assert(stmt) \
+ { \
+ CUresult result = stmt; \
+ if (result != CUDA_SUCCESS) { \
+ const char *name = cuewErrorString(result); \
+ set_error(string_printf("%s in %s (device_cuda_impl.cpp:%d)", name, #stmt, __LINE__)); \
+ } \
+ } \
+ (void)0
+
+CUDADevice::CUDADevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background_)
+ : Device(info, stats, profiler, background_), texture_info(this, "__texture_info", MEM_GLOBAL)
+{
+ first_error = true;
+ background = background_;
+
+ cuDevId = info.num;
+ cuDevice = 0;
+ cuContext = 0;
+
+ cuModule = 0;
+ cuFilterModule = 0;
+
+ split_kernel = NULL;
+
+ need_texture_info = false;
+
+ device_texture_headroom = 0;
+ device_working_headroom = 0;
+ move_texture_to_host = false;
+ map_host_limit = 0;
+ map_host_used = 0;
+ can_map_host = 0;
+ pitch_alignment = 0;
+
+ functions.loaded = false;
+
+ /* Intialize CUDA. */
+ CUresult result = cuInit(0);
+ if (result != CUDA_SUCCESS) {
+ set_error(string_printf("Failed to initialize CUDA runtime (%s)", cuewErrorString(result)));
+ return;
+ }
+
+ /* Setup device and context. */
+ result = cuDeviceGet(&cuDevice, cuDevId);
+ if (result != CUDA_SUCCESS) {
+ set_error(string_printf("Failed to get CUDA device handle from ordinal (%s)",
+ cuewErrorString(result)));
+ return;
+ }
+
+ /* CU_CTX_MAP_HOST for mapping host memory when out of device memory.
+ * CU_CTX_LMEM_RESIZE_TO_MAX for reserving local memory ahead of render,
+ * so we can predict which memory to map to host. */
+ cuda_assert(
+ cuDeviceGetAttribute(&can_map_host, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, cuDevice));
+
+ cuda_assert(cuDeviceGetAttribute(
+ &pitch_alignment, CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT, cuDevice));
+
+ unsigned int ctx_flags = CU_CTX_LMEM_RESIZE_TO_MAX;
+ if (can_map_host) {
+ ctx_flags |= CU_CTX_MAP_HOST;
+ init_host_memory();
+ }
+
+ /* Create context. */
+ if (background) {
+ result = cuCtxCreate(&cuContext, ctx_flags, cuDevice);
+ }
+ else {
+ result = cuGLCtxCreate(&cuContext, ctx_flags, cuDevice);
+
+ if (result != CUDA_SUCCESS) {
+ result = cuCtxCreate(&cuContext, ctx_flags, cuDevice);
+ background = true;
+ }
+ }
+
+ if (result != CUDA_SUCCESS) {
+ set_error(string_printf("Failed to create CUDA context (%s)", cuewErrorString(result)));
+ return;
+ }
+
+ int major, minor;
+ cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId);
+ cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId);
+ cuDevArchitecture = major * 100 + minor * 10;
+
+ /* Pop context set by cuCtxCreate. */
+ cuCtxPopCurrent(NULL);
+}
+
+CUDADevice::~CUDADevice()
+{
+ task_pool.cancel();
+
+ delete split_kernel;
+
+ texture_info.free();
+
+ cuda_assert(cuCtxDestroy(cuContext));
+}
+
+bool CUDADevice::support_device(const DeviceRequestedFeatures & /*requested_features*/)
+{
+ int major, minor;
+ cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId);
+ cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId);
+
+ /* We only support sm_30 and above */
+ if (major < 3) {
+ set_error(string_printf(
+ "CUDA backend requires compute capability 3.0 or up, but found %d.%d.", major, minor));
+ return false;
+ }
+
+ return true;
+}
+
+bool CUDADevice::check_peer_access(Device *peer_device)
+{
+ if (peer_device == this) {
+ return false;
+ }
+ if (peer_device->info.type != DEVICE_CUDA && peer_device->info.type != DEVICE_OPTIX) {
+ return false;
+ }
+
+ CUDADevice *const peer_device_cuda = static_cast<CUDADevice *>(peer_device);
+
+ int can_access = 0;
+ cuda_assert(cuDeviceCanAccessPeer(&can_access, cuDevice, peer_device_cuda->cuDevice));
+ if (can_access == 0) {
+ return false;
+ }
+
+ // Ensure array access over the link is possible as well (for 3D textures)
+ cuda_assert(cuDeviceGetP2PAttribute(&can_access,
+ CU_DEVICE_P2P_ATTRIBUTE_ARRAY_ACCESS_ACCESS_SUPPORTED,
+ cuDevice,
+ peer_device_cuda->cuDevice));
+ if (can_access == 0) {
+ return false;
+ }
+
+ // Enable peer access in both directions
+ {
+ const CUDAContextScope scope(this);
+ CUresult result = cuCtxEnablePeerAccess(peer_device_cuda->cuContext, 0);
+ if (result != CUDA_SUCCESS) {
+ set_error(string_printf("Failed to enable peer access on CUDA context (%s)",
+ cuewErrorString(result)));
+ return false;
+ }
+ }
+ {
+ const CUDAContextScope scope(peer_device_cuda);
+ CUresult result = cuCtxEnablePeerAccess(cuContext, 0);
+ if (result != CUDA_SUCCESS) {
+ set_error(string_printf("Failed to enable peer access on CUDA context (%s)",
+ cuewErrorString(result)));
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool CUDADevice::use_adaptive_compilation()
+{
+ return DebugFlags().cuda.adaptive_compile;
+}
+
+bool CUDADevice::use_split_kernel()
+{
+ return DebugFlags().cuda.split_kernel;
+}
+
+/* Common NVCC flags which stays the same regardless of shading model,
+ * kernel sources md5 and only depends on compiler or compilation settings.
+ */
+string CUDADevice::compile_kernel_get_common_cflags(
+ const DeviceRequestedFeatures &requested_features, bool filter, bool split)
+{
+ const int machine = system_cpu_bits();
+ const string source_path = path_get("source");
+ const string include_path = source_path;
+ string cflags = string_printf(
+ "-m%d "
+ "--ptxas-options=\"-v\" "
+ "--use_fast_math "
+ "-DNVCC "
+ "-I\"%s\"",
+ machine,
+ include_path.c_str());
+ if (!filter && use_adaptive_compilation()) {
+ cflags += " " + requested_features.get_build_options();
+ }
+ const char *extra_cflags = getenv("CYCLES_CUDA_EXTRA_CFLAGS");
+ if (extra_cflags) {
+ cflags += string(" ") + string(extra_cflags);
+ }
+# ifdef WITH_CYCLES_DEBUG
+ cflags += " -D__KERNEL_DEBUG__";
+# endif
+
+ if (split) {
+ cflags += " -D__SPLIT__";
+ }
+
+ return cflags;
+}
+
+string CUDADevice::compile_kernel(const DeviceRequestedFeatures &requested_features,
+ const char *name,
+ const char *base,
+ bool force_ptx)
+{
+ /* Compute kernel name. */
+ int major, minor;
+ cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId);
+ cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId);
+
+ /* Attempt to use kernel provided with Blender. */
+ if (!use_adaptive_compilation()) {
+ if (!force_ptx) {
+ const string cubin = path_get(string_printf("lib/%s_sm_%d%d.cubin", name, major, minor));
+ VLOG(1) << "Testing for pre-compiled kernel " << cubin << ".";
+ if (path_exists(cubin)) {
+ VLOG(1) << "Using precompiled kernel.";
+ return cubin;
+ }
+ }
+
+ /* The driver can JIT-compile PTX generated for older generations, so find the closest one. */
+ int ptx_major = major, ptx_minor = minor;
+ while (ptx_major >= 3) {
+ const string ptx = path_get(
+ string_printf("lib/%s_compute_%d%d.ptx", name, ptx_major, ptx_minor));
+ VLOG(1) << "Testing for pre-compiled kernel " << ptx << ".";
+ if (path_exists(ptx)) {
+ VLOG(1) << "Using precompiled kernel.";
+ return ptx;
+ }
+
+ if (ptx_minor > 0) {
+ ptx_minor--;
+ }
+ else {
+ ptx_major--;
+ ptx_minor = 9;
+ }
+ }
+ }
+
+ /* Try to use locally compiled kernel. */
+ string source_path = path_get("source");
+ const string source_md5 = path_files_md5_hash(source_path);
+
+ /* We include cflags into md5 so changing cuda toolkit or changing other
+ * compiler command line arguments makes sure cubin gets re-built.
+ */
+ string common_cflags = compile_kernel_get_common_cflags(
+ requested_features, strstr(name, "filter") != NULL, strstr(name, "split") != NULL);
+ const string kernel_md5 = util_md5_string(source_md5 + common_cflags);
+
+ const char *const kernel_ext = force_ptx ? "ptx" : "cubin";
+ const char *const kernel_arch = force_ptx ? "compute" : "sm";
+ const string cubin_file = string_printf(
+ "cycles_%s_%s_%d%d_%s.%s", name, kernel_arch, major, minor, kernel_md5.c_str(), kernel_ext);
+ const string cubin = path_cache_get(path_join("kernels", cubin_file));
+ VLOG(1) << "Testing for locally compiled kernel " << cubin << ".";
+ if (path_exists(cubin)) {
+ VLOG(1) << "Using locally compiled kernel.";
+ return cubin;
+ }
+
+# ifdef _WIN32
+ if (!use_adaptive_compilation() && have_precompiled_kernels()) {
+ if (major < 3) {
+ set_error(
+ string_printf("CUDA backend requires compute capability 3.0 or up, but found %d.%d. "
+ "Your GPU is not supported.",
+ major,
+ minor));
+ }
+ else {
+ set_error(
+ string_printf("CUDA binary kernel for this graphics card compute "
+ "capability (%d.%d) not found.",
+ major,
+ minor));
+ }
+ return string();
+ }
+# endif
+
+ /* Compile. */
+ const char *const nvcc = cuewCompilerPath();
+ if (nvcc == NULL) {
+ set_error(
+ "CUDA nvcc compiler not found. "
+ "Install CUDA toolkit in default location.");
+ return string();
+ }
+
+ const int nvcc_cuda_version = cuewCompilerVersion();
+ VLOG(1) << "Found nvcc " << nvcc << ", CUDA version " << nvcc_cuda_version << ".";
+ if (nvcc_cuda_version < 80) {
+ printf(
+ "Unsupported CUDA version %d.%d detected, "
+ "you need CUDA 8.0 or newer.\n",
+ nvcc_cuda_version / 10,
+ nvcc_cuda_version % 10);
+ return string();
+ }
+ else if (!(nvcc_cuda_version == 101 || nvcc_cuda_version == 102)) {
+ printf(
+ "CUDA version %d.%d detected, build may succeed but only "
+ "CUDA 10.1 and 10.2 are officially supported.\n",
+ nvcc_cuda_version / 10,
+ nvcc_cuda_version % 10);
+ }
+
+ double starttime = time_dt();
+
+ path_create_directories(cubin);
+
+ source_path = path_join(path_join(source_path, "kernel"),
+ path_join("kernels", path_join(base, string_printf("%s.cu", name))));
+
+ string command = string_printf(
+ "\"%s\" "
+ "-arch=%s_%d%d "
+ "--%s \"%s\" "
+ "-o \"%s\" "
+ "%s",
+ nvcc,
+ kernel_arch,
+ major,
+ minor,
+ kernel_ext,
+ source_path.c_str(),
+ cubin.c_str(),
+ common_cflags.c_str());
+
+ printf("Compiling CUDA kernel ...\n%s\n", command.c_str());
+
+# ifdef _WIN32
+ command = "call " + command;
+# endif
+ if (system(command.c_str()) != 0) {
+ set_error(
+ "Failed to execute compilation command, "
+ "see console for details.");
+ return string();
+ }
+
+ /* Verify if compilation succeeded */
+ if (!path_exists(cubin)) {
+ set_error(
+ "CUDA kernel compilation failed, "
+ "see console for details.");
+ return string();
+ }
+
+ printf("Kernel compilation finished in %.2lfs.\n", time_dt() - starttime);
+
+ return cubin;
+}
+
+bool CUDADevice::load_kernels(const DeviceRequestedFeatures &requested_features)
+{
+ /* TODO(sergey): Support kernels re-load for CUDA devices.
+ *
+ * Currently re-loading kernel will invalidate memory pointers,
+ * causing problems in cuCtxSynchronize.
+ */
+ if (cuFilterModule && cuModule) {
+ VLOG(1) << "Skipping kernel reload, not currently supported.";
+ return true;
+ }
+
+ /* check if cuda init succeeded */
+ if (cuContext == 0)
+ return false;
+
+ /* check if GPU is supported */
+ if (!support_device(requested_features))
+ return false;
+
+ /* get kernel */
+ const char *kernel_name = use_split_kernel() ? "kernel_split" : "kernel";
+ string cubin = compile_kernel(requested_features, kernel_name);
+ if (cubin.empty())
+ return false;
+
+ const char *filter_name = "filter";
+ string filter_cubin = compile_kernel(requested_features, filter_name);
+ if (filter_cubin.empty())
+ return false;
+
+ /* open module */
+ CUDAContextScope scope(this);
+
+ string cubin_data;
+ CUresult result;
+
+ if (path_read_text(cubin, cubin_data))
+ result = cuModuleLoadData(&cuModule, cubin_data.c_str());
+ else
+ result = CUDA_ERROR_FILE_NOT_FOUND;
+
+ if (result != CUDA_SUCCESS)
+ set_error(string_printf(
+ "Failed to load CUDA kernel from '%s' (%s)", cubin.c_str(), cuewErrorString(result)));
+
+ if (path_read_text(filter_cubin, cubin_data))
+ result = cuModuleLoadData(&cuFilterModule, cubin_data.c_str());
+ else
+ result = CUDA_ERROR_FILE_NOT_FOUND;
+
+ if (result != CUDA_SUCCESS)
+ set_error(string_printf("Failed to load CUDA kernel from '%s' (%s)",
+ filter_cubin.c_str(),
+ cuewErrorString(result)));
+
+ if (result == CUDA_SUCCESS) {
+ reserve_local_memory(requested_features);
+ }
+
+ load_functions();
+
+ return (result == CUDA_SUCCESS);
+}
+
+void CUDADevice::load_functions()
+{
+ /* TODO: load all functions here. */
+ if (functions.loaded) {
+ return;
+ }
+ functions.loaded = true;
+
+ cuda_assert(cuModuleGetFunction(
+ &functions.adaptive_stopping, cuModule, "kernel_cuda_adaptive_stopping"));
+ cuda_assert(cuModuleGetFunction(
+ &functions.adaptive_filter_x, cuModule, "kernel_cuda_adaptive_filter_x"));
+ cuda_assert(cuModuleGetFunction(
+ &functions.adaptive_filter_y, cuModule, "kernel_cuda_adaptive_filter_y"));
+ cuda_assert(cuModuleGetFunction(
+ &functions.adaptive_scale_samples, cuModule, "kernel_cuda_adaptive_scale_samples"));
+
+ cuda_assert(cuFuncSetCacheConfig(functions.adaptive_stopping, CU_FUNC_CACHE_PREFER_L1));
+ cuda_assert(cuFuncSetCacheConfig(functions.adaptive_filter_x, CU_FUNC_CACHE_PREFER_L1));
+ cuda_assert(cuFuncSetCacheConfig(functions.adaptive_filter_y, CU_FUNC_CACHE_PREFER_L1));
+ cuda_assert(cuFuncSetCacheConfig(functions.adaptive_scale_samples, CU_FUNC_CACHE_PREFER_L1));
+
+ int unused_min_blocks;
+ cuda_assert(cuOccupancyMaxPotentialBlockSize(&unused_min_blocks,
+ &functions.adaptive_num_threads_per_block,
+ functions.adaptive_scale_samples,
+ NULL,
+ 0,
+ 0));
+}
+
+void CUDADevice::reserve_local_memory(const DeviceRequestedFeatures &requested_features)
+{
+ if (use_split_kernel()) {
+ /* Split kernel mostly uses global memory and adaptive compilation,
+ * difficult to predict how much is needed currently. */
+ return;
+ }
+
+ /* Together with CU_CTX_LMEM_RESIZE_TO_MAX, this reserves local memory
+ * needed for kernel launches, so that we can reliably figure out when
+ * to allocate scene data in mapped host memory. */
+ CUDAContextScope scope(this);
+
+ size_t total = 0, free_before = 0, free_after = 0;
+ cuMemGetInfo(&free_before, &total);
+
+ /* Get kernel function. */
+ CUfunction cuRender;
+
+ if (requested_features.use_baking) {
+ cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_bake"));
+ }
+ else if (requested_features.use_integrator_branched) {
+ cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_branched_path_trace"));
+ }
+ else {
+ cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_path_trace"));
+ }
+
+ cuda_assert(cuFuncSetCacheConfig(cuRender, CU_FUNC_CACHE_PREFER_L1));
+
+ int min_blocks, num_threads_per_block;
+ cuda_assert(
+ cuOccupancyMaxPotentialBlockSize(&min_blocks, &num_threads_per_block, cuRender, NULL, 0, 0));
+
+ /* Launch kernel, using just 1 block appears sufficient to reserve
+ * memory for all multiprocessors. It would be good to do this in
+ * parallel for the multi GPU case still to make it faster. */
+ CUdeviceptr d_work_tiles = 0;
+ uint total_work_size = 0;
+
+ void *args[] = {&d_work_tiles, &total_work_size};
+
+ cuda_assert(cuLaunchKernel(cuRender, 1, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0));
+
+ cuda_assert(cuCtxSynchronize());
+
+ cuMemGetInfo(&free_after, &total);
+ VLOG(1) << "Local memory reserved " << string_human_readable_number(free_before - free_after)
+ << " bytes. (" << string_human_readable_size(free_before - free_after) << ")";
+
+# if 0
+ /* For testing mapped host memory, fill up device memory. */
+ const size_t keep_mb = 1024;
+
+ while (free_after > keep_mb * 1024 * 1024LL) {
+ CUdeviceptr tmp;
+ cuda_assert(cuMemAlloc(&tmp, 10 * 1024 * 1024LL));
+ cuMemGetInfo(&free_after, &total);
+ }
+# endif
+}
+
+void CUDADevice::init_host_memory()
+{
+ /* Limit amount of host mapped memory, because allocating too much can
+ * cause system instability. Leave at least half or 4 GB of system
+ * memory free, whichever is smaller. */
+ size_t default_limit = 4 * 1024 * 1024 * 1024LL;
+ size_t system_ram = system_physical_ram();
+
+ if (system_ram > 0) {
+ if (system_ram / 2 > default_limit) {
+ map_host_limit = system_ram - default_limit;
+ }
+ else {
+ map_host_limit = system_ram / 2;
+ }
+ }
+ else {
+ VLOG(1) << "Mapped host memory disabled, failed to get system RAM";
+ map_host_limit = 0;
+ }
+
+ /* Amount of device memory to keep is free after texture memory
+ * and working memory allocations respectively. We set the working
+ * memory limit headroom lower so that some space is left after all
+ * texture memory allocations. */
+ device_working_headroom = 32 * 1024 * 1024LL; // 32MB
+ device_texture_headroom = 128 * 1024 * 1024LL; // 128MB
+
+ VLOG(1) << "Mapped host memory limit set to " << string_human_readable_number(map_host_limit)
+ << " bytes. (" << string_human_readable_size(map_host_limit) << ")";
+}
+
+void CUDADevice::load_texture_info()
+{
+ if (need_texture_info) {
+ texture_info.copy_to_device();
+ need_texture_info = false;
+ }
+}
+
+void CUDADevice::move_textures_to_host(size_t size, bool for_texture)
+{
+ /* Break out of recursive call, which can happen when moving memory on a multi device. */
+ static bool any_device_moving_textures_to_host = false;
+ if (any_device_moving_textures_to_host) {
+ return;
+ }
+
+ /* Signal to reallocate textures in host memory only. */
+ move_texture_to_host = true;
+
+ while (size > 0) {
+ /* Find suitable memory allocation to move. */
+ device_memory *max_mem = NULL;
+ size_t max_size = 0;
+ bool max_is_image = false;
+
+ foreach (CUDAMemMap::value_type &pair, cuda_mem_map) {
+ device_memory &mem = *pair.first;
+ CUDAMem *cmem = &pair.second;
+
+ /* Can only move textures allocated on this device (and not those from peer devices).
+ * And need to ignore memory that is already on the host. */
+ if (!mem.is_resident(this) || cmem->use_mapped_host) {
+ continue;
+ }
+
+ bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) &&
+ (&mem != &texture_info);
+ bool is_image = is_texture && (mem.data_height > 1);
+
+ /* Can't move this type of memory. */
+ if (!is_texture || cmem->array) {
+ continue;
+ }
+
+ /* For other textures, only move image textures. */
+ if (for_texture && !is_image) {
+ continue;
+ }
+
+ /* Try to move largest allocation, prefer moving images. */
+ if (is_image > max_is_image || (is_image == max_is_image && mem.device_size > max_size)) {
+ max_is_image = is_image;
+ max_size = mem.device_size;
+ max_mem = &mem;
+ }
+ }
+
+ /* Move to host memory. This part is mutex protected since
+ * multiple CUDA devices could be moving the memory. The
+ * first one will do it, and the rest will adopt the pointer. */
+ if (max_mem) {
+ VLOG(1) << "Move memory from device to host: " << max_mem->name;
+
+ static thread_mutex move_mutex;
+ thread_scoped_lock lock(move_mutex);
+
+ any_device_moving_textures_to_host = true;
+
+ /* Potentially need to call back into multi device, so pointer mapping
+ * and peer devices are updated. This is also necessary since the device
+ * pointer may just be a key here, so cannot be accessed and freed directly.
+ * Unfortunately it does mean that memory is reallocated on all other
+ * devices as well, which is potentially dangerous when still in use (since
+ * a thread rendering on another devices would only be caught in this mutex
+ * if it so happens to do an allocation at the same time as well. */
+ max_mem->device_copy_to();
+ size = (max_size >= size) ? 0 : size - max_size;
+
+ any_device_moving_textures_to_host = false;
+ }
+ else {
+ break;
+ }
+ }
+
+ /* Unset flag before texture info is reloaded, since it should stay in device memory. */
+ move_texture_to_host = false;
+
+ /* Update texture info array with new pointers. */
+ load_texture_info();
+}
+
+CUDADevice::CUDAMem *CUDADevice::generic_alloc(device_memory &mem, size_t pitch_padding)
+{
+ CUDAContextScope scope(this);
+
+ CUdeviceptr device_pointer = 0;
+ size_t size = mem.memory_size() + pitch_padding;
+
+ CUresult mem_alloc_result = CUDA_ERROR_OUT_OF_MEMORY;
+ const char *status = "";
+
+ /* First try allocating in device memory, respecting headroom. We make
+ * an exception for texture info. It is small and frequently accessed,
+ * so treat it as working memory.
+ *
+ * If there is not enough room for working memory, we will try to move
+ * textures to host memory, assuming the performance impact would have
+ * been worse for working memory. */
+ bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) && (&mem != &texture_info);
+ bool is_image = is_texture && (mem.data_height > 1);
+
+ size_t headroom = (is_texture) ? device_texture_headroom : device_working_headroom;
+
+ size_t total = 0, free = 0;
+ cuMemGetInfo(&free, &total);
+
+ /* Move textures to host memory if needed. */
+ if (!move_texture_to_host && !is_image && (size + headroom) >= free && can_map_host) {
+ move_textures_to_host(size + headroom - free, is_texture);
+ cuMemGetInfo(&free, &total);
+ }
+
+ /* Allocate in device memory. */
+ if (!move_texture_to_host && (size + headroom) < free) {
+ mem_alloc_result = cuMemAlloc(&device_pointer, size);
+ if (mem_alloc_result == CUDA_SUCCESS) {
+ status = " in device memory";
+ }
+ }
+
+ /* Fall back to mapped host memory if needed and possible. */
+
+ void *shared_pointer = 0;
+
+ if (mem_alloc_result != CUDA_SUCCESS && can_map_host) {
+ if (mem.shared_pointer) {
+ /* Another device already allocated host memory. */
+ mem_alloc_result = CUDA_SUCCESS;
+ shared_pointer = mem.shared_pointer;
+ }
+ else if (map_host_used + size < map_host_limit) {
+ /* Allocate host memory ourselves. */
+ mem_alloc_result = cuMemHostAlloc(
+ &shared_pointer, size, CU_MEMHOSTALLOC_DEVICEMAP | CU_MEMHOSTALLOC_WRITECOMBINED);
+
+ assert((mem_alloc_result == CUDA_SUCCESS && shared_pointer != 0) ||
+ (mem_alloc_result != CUDA_SUCCESS && shared_pointer == 0));
+ }
+
+ if (mem_alloc_result == CUDA_SUCCESS) {
+ cuda_assert(cuMemHostGetDevicePointer_v2(&device_pointer, shared_pointer, 0));
+ map_host_used += size;
+ status = " in host memory";
+ }
+ }
+
+ if (mem_alloc_result != CUDA_SUCCESS) {
+ status = " failed, out of device and host memory";
+ set_error("System is out of GPU and shared host memory");
+ }
+
+ if (mem.name) {
+ VLOG(1) << "Buffer allocate: " << mem.name << ", "
+ << string_human_readable_number(mem.memory_size()) << " bytes. ("
+ << string_human_readable_size(mem.memory_size()) << ")" << status;
+ }
+
+ mem.device_pointer = (device_ptr)device_pointer;
+ mem.device_size = size;
+ stats.mem_alloc(size);
+
+ if (!mem.device_pointer) {
+ return NULL;
+ }
+
+ /* Insert into map of allocations. */
+ CUDAMem *cmem = &cuda_mem_map[&mem];
+ if (shared_pointer != 0) {
+ /* Replace host pointer with our host allocation. Only works if
+ * CUDA memory layout is the same and has no pitch padding. Also
+ * does not work if we move textures to host during a render,
+ * since other devices might be using the memory. */
+
+ if (!move_texture_to_host && pitch_padding == 0 && mem.host_pointer &&
+ mem.host_pointer != shared_pointer) {
+ memcpy(shared_pointer, mem.host_pointer, size);
+
+ /* A Call to device_memory::host_free() should be preceded by
+ * a call to device_memory::device_free() for host memory
+ * allocated by a device to be handled properly. Two exceptions
+ * are here and a call in OptiXDevice::generic_alloc(), where
+ * the current host memory can be assumed to be allocated by
+ * device_memory::host_alloc(), not by a device */
+
+ mem.host_free();
+ mem.host_pointer = shared_pointer;
+ }
+ mem.shared_pointer = shared_pointer;
+ mem.shared_counter++;
+ cmem->use_mapped_host = true;
+ }
+ else {
+ cmem->use_mapped_host = false;
+ }
+
+ return cmem;
+}
+
+void CUDADevice::generic_copy_to(device_memory &mem)
+{
+ if (!mem.host_pointer || !mem.device_pointer) {
+ return;
+ }
+
+ /* If use_mapped_host of mem is false, the current device only uses device memory allocated by
+ * cuMemAlloc regardless of mem.host_pointer and mem.shared_pointer, and should copy data from
+ * mem.host_pointer. */
+ if (!cuda_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
+ const CUDAContextScope scope(this);
+ cuda_assert(
+ cuMemcpyHtoD((CUdeviceptr)mem.device_pointer, mem.host_pointer, mem.memory_size()));
+ }
+}
+
+void CUDADevice::generic_free(device_memory &mem)
+{
+ if (mem.device_pointer) {
+ CUDAContextScope scope(this);
+ const CUDAMem &cmem = cuda_mem_map[&mem];
+
+ /* If cmem.use_mapped_host is true, reference counting is used
+ * to safely free a mapped host memory. */
+
+ if (cmem.use_mapped_host) {
+ assert(mem.shared_pointer);
+ if (mem.shared_pointer) {
+ assert(mem.shared_counter > 0);
+ if (--mem.shared_counter == 0) {
+ if (mem.host_pointer == mem.shared_pointer) {
+ mem.host_pointer = 0;
+ }
+ cuMemFreeHost(mem.shared_pointer);
+ mem.shared_pointer = 0;
+ }
+ }
+ map_host_used -= mem.device_size;
+ }
+ else {
+ /* Free device memory. */
+ cuda_assert(cuMemFree(mem.device_pointer));
+ }
+
+ stats.mem_free(mem.device_size);
+ mem.device_pointer = 0;
+ mem.device_size = 0;
+
+ cuda_mem_map.erase(cuda_mem_map.find(&mem));
+ }
+}
+
+void CUDADevice::mem_alloc(device_memory &mem)
+{
+ if (mem.type == MEM_PIXELS && !background) {
+ pixels_alloc(mem);
+ }
+ else if (mem.type == MEM_TEXTURE) {
+ assert(!"mem_alloc not supported for textures.");
+ }
+ else if (mem.type == MEM_GLOBAL) {
+ assert(!"mem_alloc not supported for global memory.");
+ }
+ else {
+ generic_alloc(mem);
+ }
+}
+
+void CUDADevice::mem_copy_to(device_memory &mem)
+{
+ if (mem.type == MEM_PIXELS) {
+ assert(!"mem_copy_to not supported for pixels.");
+ }
+ else if (mem.type == MEM_GLOBAL) {
+ global_free(mem);
+ global_alloc(mem);
+ }
+ else if (mem.type == MEM_TEXTURE) {
+ tex_free((device_texture &)mem);
+ tex_alloc((device_texture &)mem);
+ }
+ else {
+ if (!mem.device_pointer) {
+ generic_alloc(mem);
+ }
+
+ generic_copy_to(mem);
+ }
+}
+
+void CUDADevice::mem_copy_from(device_memory &mem, int y, int w, int h, int elem)
+{
+ if (mem.type == MEM_PIXELS && !background) {
+ pixels_copy_from(mem, y, w, h);
+ }
+ else if (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) {
+ assert(!"mem_copy_from not supported for textures.");
+ }
+ else if (mem.host_pointer) {
+ const size_t size = elem * w * h;
+ const size_t offset = elem * y * w;
+
+ if (mem.device_pointer) {
+ const CUDAContextScope scope(this);
+ cuda_assert(cuMemcpyDtoH(
+ (char *)mem.host_pointer + offset, (CUdeviceptr)mem.device_pointer + offset, size));
+ }
+ else {
+ memset((char *)mem.host_pointer + offset, 0, size);
+ }
+ }
+}
+
+void CUDADevice::mem_zero(device_memory &mem)
+{
+ if (!mem.device_pointer) {
+ mem_alloc(mem);
+ }
+ if (!mem.device_pointer) {
+ return;
+ }
+
+ /* If use_mapped_host of mem is false, mem.device_pointer currently refers to device memory
+ * regardless of mem.host_pointer and mem.shared_pointer. */
+ if (!cuda_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
+ const CUDAContextScope scope(this);
+ cuda_assert(cuMemsetD8((CUdeviceptr)mem.device_pointer, 0, mem.memory_size()));
+ }
+ else if (mem.host_pointer) {
+ memset(mem.host_pointer, 0, mem.memory_size());
+ }
+}
+
+void CUDADevice::mem_free(device_memory &mem)
+{
+ if (mem.type == MEM_PIXELS && !background) {
+ pixels_free(mem);
+ }
+ else if (mem.type == MEM_GLOBAL) {
+ global_free(mem);
+ }
+ else if (mem.type == MEM_TEXTURE) {
+ tex_free((device_texture &)mem);
+ }
+ else {
+ generic_free(mem);
+ }
+}
+
+device_ptr CUDADevice::mem_alloc_sub_ptr(device_memory &mem, int offset, int /*size*/)
+{
+ return (device_ptr)(((char *)mem.device_pointer) + mem.memory_elements_size(offset));
+}
+
+void CUDADevice::const_copy_to(const char *name, void *host, size_t size)
+{
+ CUDAContextScope scope(this);
+ CUdeviceptr mem;
+ size_t bytes;
+
+ cuda_assert(cuModuleGetGlobal(&mem, &bytes, cuModule, name));
+ // assert(bytes == size);
+ cuda_assert(cuMemcpyHtoD(mem, host, size));
+}
+
+void CUDADevice::global_alloc(device_memory &mem)
+{
+ if (mem.is_resident(this)) {
+ generic_alloc(mem);
+ generic_copy_to(mem);
+ }
+
+ const_copy_to(mem.name, &mem.device_pointer, sizeof(mem.device_pointer));
+}
+
+void CUDADevice::global_free(device_memory &mem)
+{
+ if (mem.is_resident(this) && mem.device_pointer) {
+ generic_free(mem);
+ }
+}
+
+void CUDADevice::tex_alloc(device_texture &mem)
+{
+ CUDAContextScope scope(this);
+
+ /* General variables for both architectures */
+ string bind_name = mem.name;
+ size_t dsize = datatype_size(mem.data_type);
+ size_t size = mem.memory_size();
+
+ CUaddress_mode address_mode = CU_TR_ADDRESS_MODE_WRAP;
+ switch (mem.info.extension) {
+ case EXTENSION_REPEAT:
+ address_mode = CU_TR_ADDRESS_MODE_WRAP;
+ break;
+ case EXTENSION_EXTEND:
+ address_mode = CU_TR_ADDRESS_MODE_CLAMP;
+ break;
+ case EXTENSION_CLIP:
+ address_mode = CU_TR_ADDRESS_MODE_BORDER;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ CUfilter_mode filter_mode;
+ if (mem.info.interpolation == INTERPOLATION_CLOSEST) {
+ filter_mode = CU_TR_FILTER_MODE_POINT;
+ }
+ else {
+ filter_mode = CU_TR_FILTER_MODE_LINEAR;
+ }
+
+ /* Image Texture Storage */
+ CUarray_format_enum format;
+ switch (mem.data_type) {
+ case TYPE_UCHAR:
+ format = CU_AD_FORMAT_UNSIGNED_INT8;
+ break;
+ case TYPE_UINT16:
+ format = CU_AD_FORMAT_UNSIGNED_INT16;
+ break;
+ case TYPE_UINT:
+ format = CU_AD_FORMAT_UNSIGNED_INT32;
+ break;
+ case TYPE_INT:
+ format = CU_AD_FORMAT_SIGNED_INT32;
+ break;
+ case TYPE_FLOAT:
+ format = CU_AD_FORMAT_FLOAT;
+ break;
+ case TYPE_HALF:
+ format = CU_AD_FORMAT_HALF;
+ break;
+ default:
+ assert(0);
+ return;
+ }
+
+ CUDAMem *cmem = NULL;
+ CUarray array_3d = NULL;
+ size_t src_pitch = mem.data_width * dsize * mem.data_elements;
+ size_t dst_pitch = src_pitch;
+
+ if (!mem.is_resident(this)) {
+ cmem = &cuda_mem_map[&mem];
+ cmem->texobject = 0;
+
+ if (mem.data_depth > 1) {
+ array_3d = (CUarray)mem.device_pointer;
+ cmem->array = array_3d;
+ }
+ else if (mem.data_height > 0) {
+ dst_pitch = align_up(src_pitch, pitch_alignment);
+ }
+ }
+ else if (mem.data_depth > 1) {
+ /* 3D texture using array, there is no API for linear memory. */
+ CUDA_ARRAY3D_DESCRIPTOR desc;
+
+ desc.Width = mem.data_width;
+ desc.Height = mem.data_height;
+ desc.Depth = mem.data_depth;
+ desc.Format = format;
+ desc.NumChannels = mem.data_elements;
+ desc.Flags = 0;
+
+ VLOG(1) << "Array 3D allocate: " << mem.name << ", "
+ << string_human_readable_number(mem.memory_size()) << " bytes. ("
+ << string_human_readable_size(mem.memory_size()) << ")";
+
+ cuda_assert(cuArray3DCreate(&array_3d, &desc));
+
+ if (!array_3d) {
+ return;
+ }
+
+ CUDA_MEMCPY3D param;
+ memset(&param, 0, sizeof(param));
+ param.dstMemoryType = CU_MEMORYTYPE_ARRAY;
+ param.dstArray = array_3d;
+ param.srcMemoryType = CU_MEMORYTYPE_HOST;
+ param.srcHost = mem.host_pointer;
+ param.srcPitch = src_pitch;
+ param.WidthInBytes = param.srcPitch;
+ param.Height = mem.data_height;
+ param.Depth = mem.data_depth;
+
+ cuda_assert(cuMemcpy3D(&param));
+
+ mem.device_pointer = (device_ptr)array_3d;
+ mem.device_size = size;
+ stats.mem_alloc(size);
+
+ cmem = &cuda_mem_map[&mem];
+ cmem->texobject = 0;
+ cmem->array = array_3d;
+ }
+ else if (mem.data_height > 0) {
+ /* 2D texture, using pitch aligned linear memory. */
+ dst_pitch = align_up(src_pitch, pitch_alignment);
+ size_t dst_size = dst_pitch * mem.data_height;
+
+ cmem = generic_alloc(mem, dst_size - mem.memory_size());
+ if (!cmem) {
+ return;
+ }
+
+ CUDA_MEMCPY2D param;
+ memset(&param, 0, sizeof(param));
+ param.dstMemoryType = CU_MEMORYTYPE_DEVICE;
+ param.dstDevice = mem.device_pointer;
+ param.dstPitch = dst_pitch;
+ param.srcMemoryType = CU_MEMORYTYPE_HOST;
+ param.srcHost = mem.host_pointer;
+ param.srcPitch = src_pitch;
+ param.WidthInBytes = param.srcPitch;
+ param.Height = mem.data_height;
+
+ cuda_assert(cuMemcpy2DUnaligned(&param));
+ }
+ else {
+ /* 1D texture, using linear memory. */
+ cmem = generic_alloc(mem);
+ if (!cmem) {
+ return;
+ }
+
+ cuda_assert(cuMemcpyHtoD(mem.device_pointer, mem.host_pointer, size));
+ }
+
+ /* Kepler+, bindless textures. */
+ CUDA_RESOURCE_DESC resDesc;
+ memset(&resDesc, 0, sizeof(resDesc));
+
+ if (array_3d) {
+ resDesc.resType = CU_RESOURCE_TYPE_ARRAY;
+ resDesc.res.array.hArray = array_3d;
+ resDesc.flags = 0;
+ }
+ else if (mem.data_height > 0) {
+ resDesc.resType = CU_RESOURCE_TYPE_PITCH2D;
+ resDesc.res.pitch2D.devPtr = mem.device_pointer;
+ resDesc.res.pitch2D.format = format;
+ resDesc.res.pitch2D.numChannels = mem.data_elements;
+ resDesc.res.pitch2D.height = mem.data_height;
+ resDesc.res.pitch2D.width = mem.data_width;
+ resDesc.res.pitch2D.pitchInBytes = dst_pitch;
+ }
+ else {
+ resDesc.resType = CU_RESOURCE_TYPE_LINEAR;
+ resDesc.res.linear.devPtr = mem.device_pointer;
+ resDesc.res.linear.format = format;
+ resDesc.res.linear.numChannels = mem.data_elements;
+ resDesc.res.linear.sizeInBytes = mem.device_size;
+ }
+
+ CUDA_TEXTURE_DESC texDesc;
+ memset(&texDesc, 0, sizeof(texDesc));
+ texDesc.addressMode[0] = address_mode;
+ texDesc.addressMode[1] = address_mode;
+ texDesc.addressMode[2] = address_mode;
+ texDesc.filterMode = filter_mode;
+ texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES;
+
+ cuda_assert(cuTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL));
+
+ /* Resize once */
+ const uint slot = mem.slot;
+ if (slot >= texture_info.size()) {
+ /* Allocate some slots in advance, to reduce amount
+ * of re-allocations. */
+ texture_info.resize(slot + 128);
+ }
+
+ /* Set Mapping and tag that we need to (re-)upload to device */
+ texture_info[slot] = mem.info;
+ texture_info[slot].data = (uint64_t)cmem->texobject;
+ need_texture_info = true;
+}
+
+void CUDADevice::tex_free(device_texture &mem)
+{
+ if (mem.device_pointer) {
+ CUDAContextScope scope(this);
+ const CUDAMem &cmem = cuda_mem_map[&mem];
+
+ if (cmem.texobject) {
+ /* Free bindless texture. */
+ cuTexObjectDestroy(cmem.texobject);
+ }
+
+ if (!mem.is_resident(this)) {
+ /* Do not free memory here, since it was allocated on a different device. */
+ cuda_mem_map.erase(cuda_mem_map.find(&mem));
+ }
+ else if (cmem.array) {
+ /* Free array. */
+ cuArrayDestroy(cmem.array);
+ stats.mem_free(mem.device_size);
+ mem.device_pointer = 0;
+ mem.device_size = 0;
+
+ cuda_mem_map.erase(cuda_mem_map.find(&mem));
+ }
+ else {
+ generic_free(mem);
+ }
+ }
+}
+
+# define CUDA_GET_BLOCKSIZE(func, w, h) \
+ int threads_per_block; \
+ cuda_assert( \
+ cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func)); \
+ int threads = (int)sqrt((float)threads_per_block); \
+ int xblocks = ((w) + threads - 1) / threads; \
+ int yblocks = ((h) + threads - 1) / threads;
+
+# define CUDA_LAUNCH_KERNEL(func, args) \
+ cuda_assert(cuLaunchKernel(func, xblocks, yblocks, 1, threads, threads, 1, 0, 0, args, 0));
+
+/* Similar as above, but for 1-dimensional blocks. */
+# define CUDA_GET_BLOCKSIZE_1D(func, w, h) \
+ int threads_per_block; \
+ cuda_assert( \
+ cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func)); \
+ int xblocks = ((w) + threads_per_block - 1) / threads_per_block; \
+ int yblocks = h;
+
+# define CUDA_LAUNCH_KERNEL_1D(func, args) \
+ cuda_assert(cuLaunchKernel(func, xblocks, yblocks, 1, threads_per_block, 1, 1, 0, 0, args, 0));
+
+bool CUDADevice::denoising_non_local_means(device_ptr image_ptr,
+ device_ptr guide_ptr,
+ device_ptr variance_ptr,
+ device_ptr out_ptr,
+ DenoisingTask *task)
+{
+ if (have_error())
+ return false;
+
+ CUDAContextScope scope(this);
+
+ int stride = task->buffer.stride;
+ int w = task->buffer.width;
+ int h = task->buffer.h;
+ int r = task->nlm_state.r;
+ int f = task->nlm_state.f;
+ float a = task->nlm_state.a;
+ float k_2 = task->nlm_state.k_2;
+
+ int pass_stride = task->buffer.pass_stride;
+ int num_shifts = (2 * r + 1) * (2 * r + 1);
+ int channel_offset = task->nlm_state.is_color ? task->buffer.pass_stride : 0;
+ int frame_offset = 0;
+
+ if (have_error())
+ return false;
+
+ CUdeviceptr difference = (CUdeviceptr)task->buffer.temporary_mem.device_pointer;
+ CUdeviceptr blurDifference = difference + sizeof(float) * pass_stride * num_shifts;
+ CUdeviceptr weightAccum = difference + 2 * sizeof(float) * pass_stride * num_shifts;
+ CUdeviceptr scale_ptr = 0;
+
+ cuda_assert(cuMemsetD8(weightAccum, 0, sizeof(float) * pass_stride));
+ cuda_assert(cuMemsetD8(out_ptr, 0, sizeof(float) * pass_stride));
+
+ {
+ CUfunction cuNLMCalcDifference, cuNLMBlur, cuNLMCalcWeight, cuNLMUpdateOutput;
+ cuda_assert(cuModuleGetFunction(
+ &cuNLMCalcDifference, cuFilterModule, "kernel_cuda_filter_nlm_calc_difference"));
+ cuda_assert(cuModuleGetFunction(&cuNLMBlur, cuFilterModule, "kernel_cuda_filter_nlm_blur"));
+ cuda_assert(cuModuleGetFunction(
+ &cuNLMCalcWeight, cuFilterModule, "kernel_cuda_filter_nlm_calc_weight"));
+ cuda_assert(cuModuleGetFunction(
+ &cuNLMUpdateOutput, cuFilterModule, "kernel_cuda_filter_nlm_update_output"));
+
+ cuda_assert(cuFuncSetCacheConfig(cuNLMCalcDifference, CU_FUNC_CACHE_PREFER_L1));
+ cuda_assert(cuFuncSetCacheConfig(cuNLMBlur, CU_FUNC_CACHE_PREFER_L1));
+ cuda_assert(cuFuncSetCacheConfig(cuNLMCalcWeight, CU_FUNC_CACHE_PREFER_L1));
+ cuda_assert(cuFuncSetCacheConfig(cuNLMUpdateOutput, CU_FUNC_CACHE_PREFER_L1));
+
+ CUDA_GET_BLOCKSIZE_1D(cuNLMCalcDifference, w * h, num_shifts);
+
+ void *calc_difference_args[] = {&guide_ptr,
+ &variance_ptr,
+ &scale_ptr,
+ &difference,
+ &w,
+ &h,
+ &stride,
+ &pass_stride,
+ &r,
+ &channel_offset,
+ &frame_offset,
+ &a,
+ &k_2};
+ void *blur_args[] = {&difference, &blurDifference, &w, &h, &stride, &pass_stride, &r, &f};
+ void *calc_weight_args[] = {
+ &blurDifference, &difference, &w, &h, &stride, &pass_stride, &r, &f};
+ void *update_output_args[] = {&blurDifference,
+ &image_ptr,
+ &out_ptr,
+ &weightAccum,
+ &w,
+ &h,
+ &stride,
+ &pass_stride,
+ &channel_offset,
+ &r,
+ &f};
+
+ CUDA_LAUNCH_KERNEL_1D(cuNLMCalcDifference, calc_difference_args);
+ CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args);
+ CUDA_LAUNCH_KERNEL_1D(cuNLMCalcWeight, calc_weight_args);
+ CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args);
+ CUDA_LAUNCH_KERNEL_1D(cuNLMUpdateOutput, update_output_args);
+ }
+
+ {
+ CUfunction cuNLMNormalize;
+ cuda_assert(
+ cuModuleGetFunction(&cuNLMNormalize, cuFilterModule, "kernel_cuda_filter_nlm_normalize"));
+ cuda_assert(cuFuncSetCacheConfig(cuNLMNormalize, CU_FUNC_CACHE_PREFER_L1));
+ void *normalize_args[] = {&out_ptr, &weightAccum, &w, &h, &stride};
+ CUDA_GET_BLOCKSIZE(cuNLMNormalize, w, h);
+ CUDA_LAUNCH_KERNEL(cuNLMNormalize, normalize_args);
+ cuda_assert(cuCtxSynchronize());
+ }
+
+ return !have_error();
+}
+
+bool CUDADevice::denoising_construct_transform(DenoisingTask *task)
+{
+ if (have_error())
+ return false;
+
+ CUDAContextScope scope(this);
+
+ CUfunction cuFilterConstructTransform;
+ cuda_assert(cuModuleGetFunction(
+ &cuFilterConstructTransform, cuFilterModule, "kernel_cuda_filter_construct_transform"));
+ cuda_assert(cuFuncSetCacheConfig(cuFilterConstructTransform, CU_FUNC_CACHE_PREFER_SHARED));
+ CUDA_GET_BLOCKSIZE(cuFilterConstructTransform, task->storage.w, task->storage.h);
+
+ void *args[] = {&task->buffer.mem.device_pointer,
+ &task->tile_info_mem.device_pointer,
+ &task->storage.transform.device_pointer,
+ &task->storage.rank.device_pointer,
+ &task->filter_area,
+ &task->rect,
+ &task->radius,
+ &task->pca_threshold,
+ &task->buffer.pass_stride,
+ &task->buffer.frame_stride,
+ &task->buffer.use_time};
+ CUDA_LAUNCH_KERNEL(cuFilterConstructTransform, args);
+ cuda_assert(cuCtxSynchronize());
+
+ return !have_error();
+}
+
+bool CUDADevice::denoising_accumulate(device_ptr color_ptr,
+ device_ptr color_variance_ptr,
+ device_ptr scale_ptr,
+ int frame,
+ DenoisingTask *task)
+{
+ if (have_error())
+ return false;
+
+ CUDAContextScope scope(this);
+
+ int r = task->radius;
+ int f = 4;
+ float a = 1.0f;
+ float k_2 = task->nlm_k_2;
+
+ int w = task->reconstruction_state.source_w;
+ int h = task->reconstruction_state.source_h;
+ int stride = task->buffer.stride;
+ int frame_offset = frame * task->buffer.frame_stride;
+ int t = task->tile_info->frames[frame];
+
+ int pass_stride = task->buffer.pass_stride;
+ int num_shifts = (2 * r + 1) * (2 * r + 1);
+
+ if (have_error())
+ return false;
+
+ CUdeviceptr difference = (CUdeviceptr)task->buffer.temporary_mem.device_pointer;
+ CUdeviceptr blurDifference = difference + sizeof(float) * pass_stride * num_shifts;
+
+ CUfunction cuNLMCalcDifference, cuNLMBlur, cuNLMCalcWeight, cuNLMConstructGramian;
+ cuda_assert(cuModuleGetFunction(
+ &cuNLMCalcDifference, cuFilterModule, "kernel_cuda_filter_nlm_calc_difference"));
+ cuda_assert(cuModuleGetFunction(&cuNLMBlur, cuFilterModule, "kernel_cuda_filter_nlm_blur"));
+ cuda_assert(
+ cuModuleGetFunction(&cuNLMCalcWeight, cuFilterModule, "kernel_cuda_filter_nlm_calc_weight"));
+ cuda_assert(cuModuleGetFunction(
+ &cuNLMConstructGramian, cuFilterModule, "kernel_cuda_filter_nlm_construct_gramian"));
+
+ cuda_assert(cuFuncSetCacheConfig(cuNLMCalcDifference, CU_FUNC_CACHE_PREFER_L1));
+ cuda_assert(cuFuncSetCacheConfig(cuNLMBlur, CU_FUNC_CACHE_PREFER_L1));
+ cuda_assert(cuFuncSetCacheConfig(cuNLMCalcWeight, CU_FUNC_CACHE_PREFER_L1));
+ cuda_assert(cuFuncSetCacheConfig(cuNLMConstructGramian, CU_FUNC_CACHE_PREFER_SHARED));
+
+ CUDA_GET_BLOCKSIZE_1D(cuNLMCalcDifference,
+ task->reconstruction_state.source_w * task->reconstruction_state.source_h,
+ num_shifts);
+
+ void *calc_difference_args[] = {&color_ptr,
+ &color_variance_ptr,
+ &scale_ptr,
+ &difference,
+ &w,
+ &h,
+ &stride,
+ &pass_stride,
+ &r,
+ &pass_stride,
+ &frame_offset,
+ &a,
+ &k_2};
+ void *blur_args[] = {&difference, &blurDifference, &w, &h, &stride, &pass_stride, &r, &f};
+ void *calc_weight_args[] = {&blurDifference, &difference, &w, &h, &stride, &pass_stride, &r, &f};
+ void *construct_gramian_args[] = {&t,
+ &blurDifference,
+ &task->buffer.mem.device_pointer,
+ &task->storage.transform.device_pointer,
+ &task->storage.rank.device_pointer,
+ &task->storage.XtWX.device_pointer,
+ &task->storage.XtWY.device_pointer,
+ &task->reconstruction_state.filter_window,
+ &w,
+ &h,
+ &stride,
+ &pass_stride,
+ &r,
+ &f,
+ &frame_offset,
+ &task->buffer.use_time};
+
+ CUDA_LAUNCH_KERNEL_1D(cuNLMCalcDifference, calc_difference_args);
+ CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args);
+ CUDA_LAUNCH_KERNEL_1D(cuNLMCalcWeight, calc_weight_args);
+ CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args);
+ CUDA_LAUNCH_KERNEL_1D(cuNLMConstructGramian, construct_gramian_args);
+ cuda_assert(cuCtxSynchronize());
+
+ return !have_error();
+}
+
+bool CUDADevice::denoising_solve(device_ptr output_ptr, DenoisingTask *task)
+{
+ CUfunction cuFinalize;
+ cuda_assert(cuModuleGetFunction(&cuFinalize, cuFilterModule, "kernel_cuda_filter_finalize"));
+ cuda_assert(cuFuncSetCacheConfig(cuFinalize, CU_FUNC_CACHE_PREFER_L1));
+ void *finalize_args[] = {&output_ptr,
+ &task->storage.rank.device_pointer,
+ &task->storage.XtWX.device_pointer,
+ &task->storage.XtWY.device_pointer,
+ &task->filter_area,
+ &task->reconstruction_state.buffer_params.x,
+ &task->render_buffer.samples};
+ CUDA_GET_BLOCKSIZE(
+ cuFinalize, task->reconstruction_state.source_w, task->reconstruction_state.source_h);
+ CUDA_LAUNCH_KERNEL(cuFinalize, finalize_args);
+ cuda_assert(cuCtxSynchronize());
+
+ return !have_error();
+}
+
+bool CUDADevice::denoising_combine_halves(device_ptr a_ptr,
+ device_ptr b_ptr,
+ device_ptr mean_ptr,
+ device_ptr variance_ptr,
+ int r,
+ int4 rect,
+ DenoisingTask *task)
+{
+ if (have_error())
+ return false;
+
+ CUDAContextScope scope(this);
+
+ CUfunction cuFilterCombineHalves;
+ cuda_assert(cuModuleGetFunction(
+ &cuFilterCombineHalves, cuFilterModule, "kernel_cuda_filter_combine_halves"));
+ cuda_assert(cuFuncSetCacheConfig(cuFilterCombineHalves, CU_FUNC_CACHE_PREFER_L1));
+ CUDA_GET_BLOCKSIZE(
+ cuFilterCombineHalves, task->rect.z - task->rect.x, task->rect.w - task->rect.y);
+
+ void *args[] = {&mean_ptr, &variance_ptr, &a_ptr, &b_ptr, &rect, &r};
+ CUDA_LAUNCH_KERNEL(cuFilterCombineHalves, args);
+ cuda_assert(cuCtxSynchronize());
+
+ return !have_error();
+}
+
+bool CUDADevice::denoising_divide_shadow(device_ptr a_ptr,
+ device_ptr b_ptr,
+ device_ptr sample_variance_ptr,
+ device_ptr sv_variance_ptr,
+ device_ptr buffer_variance_ptr,
+ DenoisingTask *task)
+{
+ if (have_error())
+ return false;
+
+ CUDAContextScope scope(this);
+
+ CUfunction cuFilterDivideShadow;
+ cuda_assert(cuModuleGetFunction(
+ &cuFilterDivideShadow, cuFilterModule, "kernel_cuda_filter_divide_shadow"));
+ cuda_assert(cuFuncSetCacheConfig(cuFilterDivideShadow, CU_FUNC_CACHE_PREFER_L1));
+ CUDA_GET_BLOCKSIZE(
+ cuFilterDivideShadow, task->rect.z - task->rect.x, task->rect.w - task->rect.y);
+
+ void *args[] = {&task->render_buffer.samples,
+ &task->tile_info_mem.device_pointer,
+ &a_ptr,
+ &b_ptr,
+ &sample_variance_ptr,
+ &sv_variance_ptr,
+ &buffer_variance_ptr,
+ &task->rect,
+ &task->render_buffer.pass_stride,
+ &task->render_buffer.offset};
+ CUDA_LAUNCH_KERNEL(cuFilterDivideShadow, args);
+ cuda_assert(cuCtxSynchronize());
+
+ return !have_error();
+}
+
+bool CUDADevice::denoising_get_feature(int mean_offset,
+ int variance_offset,
+ device_ptr mean_ptr,
+ device_ptr variance_ptr,
+ float scale,
+ DenoisingTask *task)
+{
+ if (have_error())
+ return false;
+
+ CUDAContextScope scope(this);
+
+ CUfunction cuFilterGetFeature;
+ cuda_assert(
+ cuModuleGetFunction(&cuFilterGetFeature, cuFilterModule, "kernel_cuda_filter_get_feature"));
+ cuda_assert(cuFuncSetCacheConfig(cuFilterGetFeature, CU_FUNC_CACHE_PREFER_L1));
+ CUDA_GET_BLOCKSIZE(cuFilterGetFeature, task->rect.z - task->rect.x, task->rect.w - task->rect.y);
+
+ void *args[] = {&task->render_buffer.samples,
+ &task->tile_info_mem.device_pointer,
+ &mean_offset,
+ &variance_offset,
+ &mean_ptr,
+ &variance_ptr,
+ &scale,
+ &task->rect,
+ &task->render_buffer.pass_stride,
+ &task->render_buffer.offset};
+ CUDA_LAUNCH_KERNEL(cuFilterGetFeature, args);
+ cuda_assert(cuCtxSynchronize());
+
+ return !have_error();
+}
+
+bool CUDADevice::denoising_write_feature(int out_offset,
+ device_ptr from_ptr,
+ device_ptr buffer_ptr,
+ DenoisingTask *task)
+{
+ if (have_error())
+ return false;
+
+ CUDAContextScope scope(this);
+
+ CUfunction cuFilterWriteFeature;
+ cuda_assert(cuModuleGetFunction(
+ &cuFilterWriteFeature, cuFilterModule, "kernel_cuda_filter_write_feature"));
+ cuda_assert(cuFuncSetCacheConfig(cuFilterWriteFeature, CU_FUNC_CACHE_PREFER_L1));
+ CUDA_GET_BLOCKSIZE(cuFilterWriteFeature, task->filter_area.z, task->filter_area.w);
+
+ void *args[] = {&task->render_buffer.samples,
+ &task->reconstruction_state.buffer_params,
+ &task->filter_area,
+ &from_ptr,
+ &buffer_ptr,
+ &out_offset,
+ &task->rect};
+ CUDA_LAUNCH_KERNEL(cuFilterWriteFeature, args);
+ cuda_assert(cuCtxSynchronize());
+
+ return !have_error();
+}
+
+bool CUDADevice::denoising_detect_outliers(device_ptr image_ptr,
+ device_ptr variance_ptr,
+ device_ptr depth_ptr,
+ device_ptr output_ptr,
+ DenoisingTask *task)
+{
+ if (have_error())
+ return false;
+
+ CUDAContextScope scope(this);
+
+ CUfunction cuFilterDetectOutliers;
+ cuda_assert(cuModuleGetFunction(
+ &cuFilterDetectOutliers, cuFilterModule, "kernel_cuda_filter_detect_outliers"));
+ cuda_assert(cuFuncSetCacheConfig(cuFilterDetectOutliers, CU_FUNC_CACHE_PREFER_L1));
+ CUDA_GET_BLOCKSIZE(
+ cuFilterDetectOutliers, task->rect.z - task->rect.x, task->rect.w - task->rect.y);
+
+ void *args[] = {
+ &image_ptr, &variance_ptr, &depth_ptr, &output_ptr, &task->rect, &task->buffer.pass_stride};
+
+ CUDA_LAUNCH_KERNEL(cuFilterDetectOutliers, args);
+ cuda_assert(cuCtxSynchronize());
+
+ return !have_error();
+}
+
+void CUDADevice::denoise(RenderTile &rtile, DenoisingTask &denoising)
+{
+ denoising.functions.construct_transform = function_bind(
+ &CUDADevice::denoising_construct_transform, this, &denoising);
+ denoising.functions.accumulate = function_bind(
+ &CUDADevice::denoising_accumulate, this, _1, _2, _3, _4, &denoising);
+ denoising.functions.solve = function_bind(&CUDADevice::denoising_solve, this, _1, &denoising);
+ denoising.functions.divide_shadow = function_bind(
+ &CUDADevice::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising);
+ denoising.functions.non_local_means = function_bind(
+ &CUDADevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising);
+ denoising.functions.combine_halves = function_bind(
+ &CUDADevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising);
+ denoising.functions.get_feature = function_bind(
+ &CUDADevice::denoising_get_feature, this, _1, _2, _3, _4, _5, &denoising);
+ denoising.functions.write_feature = function_bind(
+ &CUDADevice::denoising_write_feature, this, _1, _2, _3, &denoising);
+ denoising.functions.detect_outliers = function_bind(
+ &CUDADevice::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising);
+
+ denoising.filter_area = make_int4(rtile.x, rtile.y, rtile.w, rtile.h);
+ denoising.render_buffer.samples = rtile.sample;
+ denoising.buffer.gpu_temporary_mem = true;
+
+ denoising.run_denoising(rtile);
+}
+
+void CUDADevice::adaptive_sampling_filter(uint filter_sample,
+ WorkTile *wtile,
+ CUdeviceptr d_wtile,
+ CUstream stream)
+{
+ const int num_threads_per_block = functions.adaptive_num_threads_per_block;
+
+ /* These are a series of tiny kernels because there is no grid synchronization
+ * from within a kernel, so multiple kernel launches it is. */
+ uint total_work_size = wtile->h * wtile->w;
+ void *args2[] = {&d_wtile, &filter_sample, &total_work_size};
+ uint num_blocks = divide_up(total_work_size, num_threads_per_block);
+ cuda_assert(cuLaunchKernel(functions.adaptive_stopping,
+ num_blocks,
+ 1,
+ 1,
+ num_threads_per_block,
+ 1,
+ 1,
+ 0,
+ stream,
+ args2,
+ 0));
+ total_work_size = wtile->h;
+ num_blocks = divide_up(total_work_size, num_threads_per_block);
+ cuda_assert(cuLaunchKernel(functions.adaptive_filter_x,
+ num_blocks,
+ 1,
+ 1,
+ num_threads_per_block,
+ 1,
+ 1,
+ 0,
+ stream,
+ args2,
+ 0));
+ total_work_size = wtile->w;
+ num_blocks = divide_up(total_work_size, num_threads_per_block);
+ cuda_assert(cuLaunchKernel(functions.adaptive_filter_y,
+ num_blocks,
+ 1,
+ 1,
+ num_threads_per_block,
+ 1,
+ 1,
+ 0,
+ stream,
+ args2,
+ 0));
+}
+
+void CUDADevice::adaptive_sampling_post(RenderTile &rtile,
+ WorkTile *wtile,
+ CUdeviceptr d_wtile,
+ CUstream stream)
+{
+ const int num_threads_per_block = functions.adaptive_num_threads_per_block;
+ uint total_work_size = wtile->h * wtile->w;
+
+ void *args[] = {&d_wtile, &rtile.start_sample, &rtile.sample, &total_work_size};
+ uint num_blocks = divide_up(total_work_size, num_threads_per_block);
+ cuda_assert(cuLaunchKernel(functions.adaptive_scale_samples,
+ num_blocks,
+ 1,
+ 1,
+ num_threads_per_block,
+ 1,
+ 1,
+ 0,
+ stream,
+ args,
+ 0));
+}
+
+void CUDADevice::render(DeviceTask &task, RenderTile &rtile, device_vector<WorkTile> &work_tiles)
+{
+ scoped_timer timer(&rtile.buffers->render_time);
+
+ if (have_error())
+ return;
+
+ CUDAContextScope scope(this);
+ CUfunction cuRender;
+
+ /* Get kernel function. */
+ if (rtile.task == RenderTile::BAKE) {
+ cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_bake"));
+ }
+ else if (task.integrator_branched) {
+ cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_branched_path_trace"));
+ }
+ else {
+ cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_path_trace"));
+ }
+
+ if (have_error()) {
+ return;
+ }
+
+ cuda_assert(cuFuncSetCacheConfig(cuRender, CU_FUNC_CACHE_PREFER_L1));
+
+ /* Allocate work tile. */
+ work_tiles.alloc(1);
+
+ WorkTile *wtile = work_tiles.data();
+ wtile->x = rtile.x;
+ wtile->y = rtile.y;
+ wtile->w = rtile.w;
+ wtile->h = rtile.h;
+ wtile->offset = rtile.offset;
+ wtile->stride = rtile.stride;
+ wtile->buffer = (float *)(CUdeviceptr)rtile.buffer;
+
+ /* Prepare work size. More step samples render faster, but for now we
+ * remain conservative for GPUs connected to a display to avoid driver
+ * timeouts and display freezing. */
+ int min_blocks, num_threads_per_block;
+ cuda_assert(
+ cuOccupancyMaxPotentialBlockSize(&min_blocks, &num_threads_per_block, cuRender, NULL, 0, 0));
+ if (!info.display_device) {
+ min_blocks *= 8;
+ }
+
+ uint step_samples = divide_up(min_blocks * num_threads_per_block, wtile->w * wtile->h);
+ if (task.adaptive_sampling.use) {
+ step_samples = task.adaptive_sampling.align_static_samples(step_samples);
+ }
+
+ /* Render all samples. */
+ int start_sample = rtile.start_sample;
+ int end_sample = rtile.start_sample + rtile.num_samples;
+
+ for (int sample = start_sample; sample < end_sample; sample += step_samples) {
+ /* Setup and copy work tile to device. */
+ wtile->start_sample = sample;
+ wtile->num_samples = min(step_samples, end_sample - sample);
+ work_tiles.copy_to_device();
+
+ CUdeviceptr d_work_tiles = (CUdeviceptr)work_tiles.device_pointer;
+ uint total_work_size = wtile->w * wtile->h * wtile->num_samples;
+ uint num_blocks = divide_up(total_work_size, num_threads_per_block);
+
+ /* Launch kernel. */
+ void *args[] = {&d_work_tiles, &total_work_size};
+
+ cuda_assert(
+ cuLaunchKernel(cuRender, num_blocks, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0));
+
+ /* Run the adaptive sampling kernels at selected samples aligned to step samples. */
+ uint filter_sample = sample + wtile->num_samples - 1;
+ if (task.adaptive_sampling.use && task.adaptive_sampling.need_filter(filter_sample)) {
+ adaptive_sampling_filter(filter_sample, wtile, d_work_tiles);
+ }
+
+ cuda_assert(cuCtxSynchronize());
+
+ /* Update progress. */
+ rtile.sample = sample + wtile->num_samples;
+ task.update_progress(&rtile, rtile.w * rtile.h * wtile->num_samples);
+
+ if (task.get_cancel()) {
+ if (task.need_finish_queue == false)
+ break;
+ }
+ }
+
+ /* Finalize adaptive sampling. */
+ if (task.adaptive_sampling.use) {
+ CUdeviceptr d_work_tiles = (CUdeviceptr)work_tiles.device_pointer;
+ adaptive_sampling_post(rtile, wtile, d_work_tiles);
+ cuda_assert(cuCtxSynchronize());
+ task.update_progress(&rtile, rtile.w * rtile.h * wtile->num_samples);
+ }
+}
+
+void CUDADevice::film_convert(DeviceTask &task,
+ device_ptr buffer,
+ device_ptr rgba_byte,
+ device_ptr rgba_half)
+{
+ if (have_error())
+ return;
+
+ CUDAContextScope scope(this);
+
+ CUfunction cuFilmConvert;
+ CUdeviceptr d_rgba = map_pixels((rgba_byte) ? rgba_byte : rgba_half);
+ CUdeviceptr d_buffer = (CUdeviceptr)buffer;
+
+ /* get kernel function */
+ if (rgba_half) {
+ cuda_assert(
+ cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_convert_to_half_float"));
+ }
+ else {
+ cuda_assert(cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_convert_to_byte"));
+ }
+
+ float sample_scale = 1.0f / (task.sample + 1);
+
+ /* pass in parameters */
+ void *args[] = {&d_rgba,
+ &d_buffer,
+ &sample_scale,
+ &task.x,
+ &task.y,
+ &task.w,
+ &task.h,
+ &task.offset,
+ &task.stride};
+
+ /* launch kernel */
+ int threads_per_block;
+ cuda_assert(cuFuncGetAttribute(
+ &threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuFilmConvert));
+
+ int xthreads = (int)sqrt(threads_per_block);
+ int ythreads = (int)sqrt(threads_per_block);
+ int xblocks = (task.w + xthreads - 1) / xthreads;
+ int yblocks = (task.h + ythreads - 1) / ythreads;
+
+ cuda_assert(cuFuncSetCacheConfig(cuFilmConvert, CU_FUNC_CACHE_PREFER_L1));
+
+ cuda_assert(cuLaunchKernel(cuFilmConvert,
+ xblocks,
+ yblocks,
+ 1, /* blocks */
+ xthreads,
+ ythreads,
+ 1, /* threads */
+ 0,
+ 0,
+ args,
+ 0));
+
+ unmap_pixels((rgba_byte) ? rgba_byte : rgba_half);
+
+ cuda_assert(cuCtxSynchronize());
+}
+
+void CUDADevice::shader(DeviceTask &task)
+{
+ if (have_error())
+ return;
+
+ CUDAContextScope scope(this);
+
+ CUfunction cuShader;
+ CUdeviceptr d_input = (CUdeviceptr)task.shader_input;
+ CUdeviceptr d_output = (CUdeviceptr)task.shader_output;
+
+ /* get kernel function */
+ if (task.shader_eval_type == SHADER_EVAL_DISPLACE) {
+ cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_displace"));
+ }
+ else {
+ cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_background"));
+ }
+
+ /* do tasks in smaller chunks, so we can cancel it */
+ const int shader_chunk_size = 65536;
+ const int start = task.shader_x;
+ const int end = task.shader_x + task.shader_w;
+ int offset = task.offset;
+
+ bool canceled = false;
+ for (int sample = 0; sample < task.num_samples && !canceled; sample++) {
+ for (int shader_x = start; shader_x < end; shader_x += shader_chunk_size) {
+ int shader_w = min(shader_chunk_size, end - shader_x);
+
+ /* pass in parameters */
+ void *args[8];
+ int arg = 0;
+ args[arg++] = &d_input;
+ args[arg++] = &d_output;
+ args[arg++] = &task.shader_eval_type;
+ if (task.shader_eval_type >= SHADER_EVAL_BAKE) {
+ args[arg++] = &task.shader_filter;
+ }
+ args[arg++] = &shader_x;
+ args[arg++] = &shader_w;
+ args[arg++] = &offset;
+ args[arg++] = &sample;
+
+ /* launch kernel */
+ int threads_per_block;
+ cuda_assert(cuFuncGetAttribute(
+ &threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuShader));
+
+ int xblocks = (shader_w + threads_per_block - 1) / threads_per_block;
+
+ cuda_assert(cuFuncSetCacheConfig(cuShader, CU_FUNC_CACHE_PREFER_L1));
+ cuda_assert(cuLaunchKernel(cuShader,
+ xblocks,
+ 1,
+ 1, /* blocks */
+ threads_per_block,
+ 1,
+ 1, /* threads */
+ 0,
+ 0,
+ args,
+ 0));
+
+ cuda_assert(cuCtxSynchronize());
+
+ if (task.get_cancel()) {
+ canceled = true;
+ break;
+ }
+ }
+
+ task.update_progress(NULL);
+ }
+}
+
+CUdeviceptr CUDADevice::map_pixels(device_ptr mem)
+{
+ if (!background) {
+ PixelMem pmem = pixel_mem_map[mem];
+ CUdeviceptr buffer;
+
+ size_t bytes;
+ cuda_assert(cuGraphicsMapResources(1, &pmem.cuPBOresource, 0));
+ cuda_assert(cuGraphicsResourceGetMappedPointer(&buffer, &bytes, pmem.cuPBOresource));
+
+ return buffer;
+ }
+
+ return (CUdeviceptr)mem;
+}
+
+void CUDADevice::unmap_pixels(device_ptr mem)
+{
+ if (!background) {
+ PixelMem pmem = pixel_mem_map[mem];
+
+ cuda_assert(cuGraphicsUnmapResources(1, &pmem.cuPBOresource, 0));
+ }
+}
+
+void CUDADevice::pixels_alloc(device_memory &mem)
+{
+ PixelMem pmem;
+
+ pmem.w = mem.data_width;
+ pmem.h = mem.data_height;
+
+ CUDAContextScope scope(this);
+
+ glGenBuffers(1, &pmem.cuPBO);
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO);
+ if (mem.data_type == TYPE_HALF)
+ glBufferData(
+ GL_PIXEL_UNPACK_BUFFER, pmem.w * pmem.h * sizeof(GLhalf) * 4, NULL, GL_DYNAMIC_DRAW);
+ else
+ glBufferData(
+ GL_PIXEL_UNPACK_BUFFER, pmem.w * pmem.h * sizeof(uint8_t) * 4, NULL, GL_DYNAMIC_DRAW);
+
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+
+ glActiveTexture(GL_TEXTURE0);
+ glGenTextures(1, &pmem.cuTexId);
+ glBindTexture(GL_TEXTURE_2D, pmem.cuTexId);
+ if (mem.data_type == TYPE_HALF)
+ glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F, pmem.w, pmem.h, 0, GL_RGBA, GL_HALF_FLOAT, NULL);
+ else
+ glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, pmem.w, pmem.h, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+ glBindTexture(GL_TEXTURE_2D, 0);
+
+ CUresult result = cuGraphicsGLRegisterBuffer(
+ &pmem.cuPBOresource, pmem.cuPBO, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE);
+
+ if (result == CUDA_SUCCESS) {
+ mem.device_pointer = pmem.cuTexId;
+ pixel_mem_map[mem.device_pointer] = pmem;
+
+ mem.device_size = mem.memory_size();
+ stats.mem_alloc(mem.device_size);
+
+ return;
+ }
+ else {
+ /* failed to register buffer, fallback to no interop */
+ glDeleteBuffers(1, &pmem.cuPBO);
+ glDeleteTextures(1, &pmem.cuTexId);
+
+ background = true;
+ }
+}
+
+void CUDADevice::pixels_copy_from(device_memory &mem, int y, int w, int h)
+{
+ PixelMem pmem = pixel_mem_map[mem.device_pointer];
+
+ CUDAContextScope scope(this);
+
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO);
+ uchar *pixels = (uchar *)glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_READ_ONLY);
+ size_t offset = sizeof(uchar) * 4 * y * w;
+ memcpy((uchar *)mem.host_pointer + offset, pixels + offset, sizeof(uchar) * 4 * w * h);
+ glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+}
+
+void CUDADevice::pixels_free(device_memory &mem)
+{
+ if (mem.device_pointer) {
+ PixelMem pmem = pixel_mem_map[mem.device_pointer];
+
+ CUDAContextScope scope(this);
+
+ cuda_assert(cuGraphicsUnregisterResource(pmem.cuPBOresource));
+ glDeleteBuffers(1, &pmem.cuPBO);
+ glDeleteTextures(1, &pmem.cuTexId);
+
+ pixel_mem_map.erase(pixel_mem_map.find(mem.device_pointer));
+ mem.device_pointer = 0;
+
+ stats.mem_free(mem.device_size);
+ mem.device_size = 0;
+ }
+}
+
+void CUDADevice::draw_pixels(device_memory &mem,
+ int y,
+ int w,
+ int h,
+ int width,
+ int height,
+ int dx,
+ int dy,
+ int dw,
+ int dh,
+ bool transparent,
+ const DeviceDrawParams &draw_params)
+{
+ assert(mem.type == MEM_PIXELS);
+
+ if (!background) {
+ const bool use_fallback_shader = (draw_params.bind_display_space_shader_cb == NULL);
+ PixelMem pmem = pixel_mem_map[mem.device_pointer];
+ float *vpointer;
+
+ CUDAContextScope scope(this);
+
+ /* for multi devices, this assumes the inefficient method that we allocate
+ * all pixels on the device even though we only render to a subset */
+ size_t offset = 4 * y * w;
+
+ if (mem.data_type == TYPE_HALF)
+ offset *= sizeof(GLhalf);
+ else
+ offset *= sizeof(uint8_t);
+
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO);
+ glActiveTexture(GL_TEXTURE0);
+ glBindTexture(GL_TEXTURE_2D, pmem.cuTexId);
+ if (mem.data_type == TYPE_HALF) {
+ glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, w, h, GL_RGBA, GL_HALF_FLOAT, (void *)offset);
+ }
+ else {
+ glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, w, h, GL_RGBA, GL_UNSIGNED_BYTE, (void *)offset);
+ }
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+
+ if (transparent) {
+ glEnable(GL_BLEND);
+ glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA);
+ }
+
+ GLint shader_program;
+ if (use_fallback_shader) {
+ if (!bind_fallback_display_space_shader(dw, dh)) {
+ return;
+ }
+ shader_program = fallback_shader_program;
+ }
+ else {
+ draw_params.bind_display_space_shader_cb();
+ glGetIntegerv(GL_CURRENT_PROGRAM, &shader_program);
+ }
+
+ if (!vertex_buffer) {
+ glGenBuffers(1, &vertex_buffer);
+ }
+
+ glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer);
+ /* invalidate old contents -
+ * avoids stalling if buffer is still waiting in queue to be rendered */
+ glBufferData(GL_ARRAY_BUFFER, 16 * sizeof(float), NULL, GL_STREAM_DRAW);
+
+ vpointer = (float *)glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY);
+
+ if (vpointer) {
+ /* texture coordinate - vertex pair */
+ vpointer[0] = 0.0f;
+ vpointer[1] = 0.0f;
+ vpointer[2] = dx;
+ vpointer[3] = dy;
+
+ vpointer[4] = (float)w / (float)pmem.w;
+ vpointer[5] = 0.0f;
+ vpointer[6] = (float)width + dx;
+ vpointer[7] = dy;
+
+ vpointer[8] = (float)w / (float)pmem.w;
+ vpointer[9] = (float)h / (float)pmem.h;
+ vpointer[10] = (float)width + dx;
+ vpointer[11] = (float)height + dy;
+
+ vpointer[12] = 0.0f;
+ vpointer[13] = (float)h / (float)pmem.h;
+ vpointer[14] = dx;
+ vpointer[15] = (float)height + dy;
+
+ glUnmapBuffer(GL_ARRAY_BUFFER);
+ }
+
+ GLuint vertex_array_object;
+ GLuint position_attribute, texcoord_attribute;
+
+ glGenVertexArrays(1, &vertex_array_object);
+ glBindVertexArray(vertex_array_object);
+
+ texcoord_attribute = glGetAttribLocation(shader_program, "texCoord");
+ position_attribute = glGetAttribLocation(shader_program, "pos");
+
+ glEnableVertexAttribArray(texcoord_attribute);
+ glEnableVertexAttribArray(position_attribute);
+
+ glVertexAttribPointer(
+ texcoord_attribute, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (const GLvoid *)0);
+ glVertexAttribPointer(position_attribute,
+ 2,
+ GL_FLOAT,
+ GL_FALSE,
+ 4 * sizeof(float),
+ (const GLvoid *)(sizeof(float) * 2));
+
+ glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
+
+ if (use_fallback_shader) {
+ glUseProgram(0);
+ }
+ else {
+ draw_params.unbind_display_space_shader_cb();
+ }
+
+ if (transparent) {
+ glDisable(GL_BLEND);
+ }
+
+ glBindTexture(GL_TEXTURE_2D, 0);
+
+ return;
+ }
+
+ Device::draw_pixels(mem, y, w, h, width, height, dx, dy, dw, dh, transparent, draw_params);
+}
+
+void CUDADevice::thread_run(DeviceTask &task)
+{
+ CUDAContextScope scope(this);
+
+ if (task.type == DeviceTask::RENDER) {
+ DeviceRequestedFeatures requested_features;
+ if (use_split_kernel()) {
+ if (split_kernel == NULL) {
+ split_kernel = new CUDASplitKernel(this);
+ split_kernel->load_kernels(requested_features);
+ }
+ }
+
+ device_vector<WorkTile> work_tiles(this, "work_tiles", MEM_READ_ONLY);
+
+ /* keep rendering tiles until done */
+ RenderTile tile;
+ DenoisingTask denoising(this, task);
+
+ while (task.acquire_tile(this, tile, task.tile_types)) {
+ if (tile.task == RenderTile::PATH_TRACE) {
+ if (use_split_kernel()) {
+ device_only_memory<uchar> void_buffer(this, "void_buffer");
+ split_kernel->path_trace(task, tile, void_buffer, void_buffer);
+ }
+ else {
+ render(task, tile, work_tiles);
+ }
+ }
+ else if (tile.task == RenderTile::BAKE) {
+ render(task, tile, work_tiles);
+ }
+ else if (tile.task == RenderTile::DENOISE) {
+ tile.sample = tile.start_sample + tile.num_samples;
+
+ denoise(tile, denoising);
+
+ task.update_progress(&tile, tile.w * tile.h);
+ }
+
+ task.release_tile(tile);
+
+ if (task.get_cancel()) {
+ if (task.need_finish_queue == false)
+ break;
+ }
+ }
+
+ work_tiles.free();
+ }
+ else if (task.type == DeviceTask::SHADER) {
+ shader(task);
+
+ cuda_assert(cuCtxSynchronize());
+ }
+ else if (task.type == DeviceTask::DENOISE_BUFFER) {
+ RenderTile tile;
+ tile.x = task.x;
+ tile.y = task.y;
+ tile.w = task.w;
+ tile.h = task.h;
+ tile.buffer = task.buffer;
+ tile.sample = task.sample + task.num_samples;
+ tile.num_samples = task.num_samples;
+ tile.start_sample = task.sample;
+ tile.offset = task.offset;
+ tile.stride = task.stride;
+ tile.buffers = task.buffers;
+
+ DenoisingTask denoising(this, task);
+ denoise(tile, denoising);
+ task.update_progress(&tile, tile.w * tile.h);
+ }
+}
+
+void CUDADevice::task_add(DeviceTask &task)
+{
+ CUDAContextScope scope(this);
+
+ /* Load texture info. */
+ load_texture_info();
+
+ /* Synchronize all memory copies before executing task. */
+ cuda_assert(cuCtxSynchronize());
+
+ if (task.type == DeviceTask::FILM_CONVERT) {
+ /* must be done in main thread due to opengl access */
+ film_convert(task, task.buffer, task.rgba_byte, task.rgba_half);
+ }
+ else {
+ task_pool.push([=] {
+ DeviceTask task_copy = task;
+ thread_run(task_copy);
+ });
+ }
+}
+
+void CUDADevice::task_wait()
+{
+ task_pool.wait();
+}
+
+void CUDADevice::task_cancel()
+{
+ task_pool.cancel();
+}
+
+/* redefine the cuda_assert macro so it can be used outside of the CUDADevice class
+ * now that the definition of that class is complete
+ */
+# undef cuda_assert
+# define cuda_assert(stmt) \
+ { \
+ CUresult result = stmt; \
+ if (result != CUDA_SUCCESS) { \
+ const char *name = cuewErrorString(result); \
+ device->set_error( \
+ string_printf("%s in %s (device_cuda_impl.cpp:%d)", name, #stmt, __LINE__)); \
+ } \
+ } \
+ (void)0
+
+/* CUDA context scope. */
+
+CUDAContextScope::CUDAContextScope(CUDADevice *device) : device(device)
+{
+ cuda_assert(cuCtxPushCurrent(device->cuContext));
+}
+
+CUDAContextScope::~CUDAContextScope()
+{
+ cuda_assert(cuCtxPopCurrent(NULL));
+}
+
+/* split kernel */
+
+class CUDASplitKernelFunction : public SplitKernelFunction {
+ CUDADevice *device;
+ CUfunction func;
+
+ public:
+ CUDASplitKernelFunction(CUDADevice *device, CUfunction func) : device(device), func(func)
+ {
+ }
+
+ /* enqueue the kernel, returns false if there is an error */
+ bool enqueue(const KernelDimensions &dim, device_memory & /*kg*/, device_memory & /*data*/)
+ {
+ return enqueue(dim, NULL);
+ }
+
+ /* enqueue the kernel, returns false if there is an error */
+ bool enqueue(const KernelDimensions &dim, void *args[])
+ {
+ if (device->have_error())
+ return false;
+
+ CUDAContextScope scope(device);
+
+ /* we ignore dim.local_size for now, as this is faster */
+ int threads_per_block;
+ cuda_assert(
+ cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func));
+
+ int xblocks = (dim.global_size[0] * dim.global_size[1] + threads_per_block - 1) /
+ threads_per_block;
+
+ cuda_assert(cuFuncSetCacheConfig(func, CU_FUNC_CACHE_PREFER_L1));
+
+ cuda_assert(cuLaunchKernel(func,
+ xblocks,
+ 1,
+ 1, /* blocks */
+ threads_per_block,
+ 1,
+ 1, /* threads */
+ 0,
+ 0,
+ args,
+ 0));
+
+ return !device->have_error();
+ }
+};
+
+CUDASplitKernel::CUDASplitKernel(CUDADevice *device) : DeviceSplitKernel(device), device(device)
+{
+}
+
+uint64_t CUDASplitKernel::state_buffer_size(device_memory & /*kg*/,
+ device_memory & /*data*/,
+ size_t num_threads)
+{
+ CUDAContextScope scope(device);
+
+ device_vector<uint64_t> size_buffer(device, "size_buffer", MEM_READ_WRITE);
+ size_buffer.alloc(1);
+ size_buffer.zero_to_device();
+
+ uint threads = num_threads;
+ CUdeviceptr d_size = (CUdeviceptr)size_buffer.device_pointer;
+
+ struct args_t {
+ uint *num_threads;
+ CUdeviceptr *size;
+ };
+
+ args_t args = {&threads, &d_size};
+
+ CUfunction state_buffer_size;
+ cuda_assert(
+ cuModuleGetFunction(&state_buffer_size, device->cuModule, "kernel_cuda_state_buffer_size"));
+
+ cuda_assert(cuLaunchKernel(state_buffer_size, 1, 1, 1, 1, 1, 1, 0, 0, (void **)&args, 0));
+
+ size_buffer.copy_from_device(0, 1, 1);
+ size_t size = size_buffer[0];
+ size_buffer.free();
+
+ return size;
+}
+
+bool CUDASplitKernel::enqueue_split_kernel_data_init(const KernelDimensions &dim,
+ RenderTile &rtile,
+ int num_global_elements,
+ device_memory & /*kernel_globals*/,
+ device_memory & /*kernel_data*/,
+ device_memory &split_data,
+ device_memory &ray_state,
+ device_memory &queue_index,
+ device_memory &use_queues_flag,
+ device_memory &work_pool_wgs)
+{
+ CUDAContextScope scope(device);
+
+ CUdeviceptr d_split_data = (CUdeviceptr)split_data.device_pointer;
+ CUdeviceptr d_ray_state = (CUdeviceptr)ray_state.device_pointer;
+ CUdeviceptr d_queue_index = (CUdeviceptr)queue_index.device_pointer;
+ CUdeviceptr d_use_queues_flag = (CUdeviceptr)use_queues_flag.device_pointer;
+ CUdeviceptr d_work_pool_wgs = (CUdeviceptr)work_pool_wgs.device_pointer;
+
+ CUdeviceptr d_buffer = (CUdeviceptr)rtile.buffer;
+
+ int end_sample = rtile.start_sample + rtile.num_samples;
+ int queue_size = dim.global_size[0] * dim.global_size[1];
+
+ struct args_t {
+ CUdeviceptr *split_data_buffer;
+ int *num_elements;
+ CUdeviceptr *ray_state;
+ int *start_sample;
+ int *end_sample;
+ int *sx;
+ int *sy;
+ int *sw;
+ int *sh;
+ int *offset;
+ int *stride;
+ CUdeviceptr *queue_index;
+ int *queuesize;
+ CUdeviceptr *use_queues_flag;
+ CUdeviceptr *work_pool_wgs;
+ int *num_samples;
+ CUdeviceptr *buffer;
+ };
+
+ args_t args = {&d_split_data,
+ &num_global_elements,
+ &d_ray_state,
+ &rtile.start_sample,
+ &end_sample,
+ &rtile.x,
+ &rtile.y,
+ &rtile.w,
+ &rtile.h,
+ &rtile.offset,
+ &rtile.stride,
+ &d_queue_index,
+ &queue_size,
+ &d_use_queues_flag,
+ &d_work_pool_wgs,
+ &rtile.num_samples,
+ &d_buffer};
+
+ CUfunction data_init;
+ cuda_assert(
+ cuModuleGetFunction(&data_init, device->cuModule, "kernel_cuda_path_trace_data_init"));
+ if (device->have_error()) {
+ return false;
+ }
+
+ CUDASplitKernelFunction(device, data_init).enqueue(dim, (void **)&args);
+
+ return !device->have_error();
+}
+
+SplitKernelFunction *CUDASplitKernel::get_split_kernel_function(const string &kernel_name,
+ const DeviceRequestedFeatures &)
+{
+ const CUDAContextScope scope(device);
+
+ CUfunction func;
+ const CUresult result = cuModuleGetFunction(
+ &func, device->cuModule, (string("kernel_cuda_") + kernel_name).data());
+ if (result != CUDA_SUCCESS) {
+ device->set_error(string_printf("Could not find kernel \"kernel_cuda_%s\" in module (%s)",
+ kernel_name.data(),
+ cuewErrorString(result)));
+ return NULL;
+ }
+
+ return new CUDASplitKernelFunction(device, func);
+}
+
+int2 CUDASplitKernel::split_kernel_local_size()
+{
+ return make_int2(32, 1);
+}
+
+int2 CUDASplitKernel::split_kernel_global_size(device_memory &kg,
+ device_memory &data,
+ DeviceTask & /*task*/)
+{
+ CUDAContextScope scope(device);
+ size_t free;
+ size_t total;
+
+ cuda_assert(cuMemGetInfo(&free, &total));
+
+ VLOG(1) << "Maximum device allocation size: " << string_human_readable_number(free)
+ << " bytes. (" << string_human_readable_size(free) << ").";
+
+ size_t num_elements = max_elements_for_max_buffer_size(kg, data, free / 2);
+ size_t side = round_down((int)sqrt(num_elements), 32);
+ int2 global_size = make_int2(side, round_down(num_elements / side, 16));
+ VLOG(1) << "Global size: " << global_size << ".";
+ return global_size;
+}
+
+CCL_NAMESPACE_END
+
+#endif
diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp
index 16a68e8b855..407f73e8451 100644
--- a/intern/cycles/device/device.cpp
+++ b/intern/cycles/device/device.cpp
@@ -25,11 +25,11 @@
#include "util/util_logging.h"
#include "util/util_math.h"
#include "util/util_opengl.h"
-#include "util/util_time.h"
+#include "util/util_string.h"
#include "util/util_system.h"
+#include "util/util_time.h"
#include "util/util_types.h"
#include "util/util_vector.h"
-#include "util/util_string.h"
CCL_NAMESPACE_BEGIN
@@ -38,6 +38,7 @@ bool Device::need_devices_update = true;
thread_mutex Device::device_mutex;
vector<DeviceInfo> Device::opencl_devices;
vector<DeviceInfo> Device::cuda_devices;
+vector<DeviceInfo> Device::optix_devices;
vector<DeviceInfo> Device::cpu_devices;
vector<DeviceInfo> Device::network_devices;
uint Device::devices_initialized_mask = 0;
@@ -76,7 +77,7 @@ std::ostream &operator<<(std::ostream &os, const DeviceRequestedFeatures &reques
/* Device */
-Device::~Device()
+Device::~Device() noexcept(false)
{
if (!background) {
if (vertex_buffer != 0) {
@@ -208,13 +209,13 @@ bool Device::bind_fallback_display_space_shader(const float width, const float h
glUseProgram(fallback_shader_program);
image_texture_location = glGetUniformLocation(fallback_shader_program, "image_texture");
if (image_texture_location < 0) {
- LOG(ERROR) << "Shader doesn't containt the 'image_texture' uniform.";
+ LOG(ERROR) << "Shader doesn't contain the 'image_texture' uniform.";
return false;
}
fullscreen_location = glGetUniformLocation(fallback_shader_program, "fullscreen");
if (fullscreen_location < 0) {
- LOG(ERROR) << "Shader doesn't containt the 'fullscreen' uniform.";
+ LOG(ERROR) << "Shader doesn't contain the 'fullscreen' uniform.";
return false;
}
@@ -287,7 +288,8 @@ void Device::draw_pixels(device_memory &rgba,
}
glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer);
- /* invalidate old contents - avoids stalling if buffer is still waiting in queue to be rendered */
+ /* invalidate old contents - avoids stalling if buffer is still waiting in queue to be rendered
+ */
glBufferData(GL_ARRAY_BUFFER, 16 * sizeof(float), NULL, GL_STREAM_DRAW);
float *vpointer = (float *)glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY);
@@ -364,6 +366,15 @@ void Device::draw_pixels(device_memory &rgba,
Device *Device::create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background)
{
+#ifdef WITH_MULTI
+ if (!info.multi_devices.empty()) {
+ /* Always create a multi device when info contains multiple devices.
+ * This is done so that the type can still be e.g. DEVICE_CPU to indicate
+ * that it is a homogeneous collection of devices, which simplifies checks. */
+ return device_multi_create(info, stats, profiler, background);
+ }
+#endif
+
Device *device;
switch (info.type) {
@@ -378,9 +389,12 @@ Device *Device::create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool
device = NULL;
break;
#endif
-#ifdef WITH_MULTI
- case DEVICE_MULTI:
- device = device_multi_create(info, stats, profiler, background);
+#ifdef WITH_OPTIX
+ case DEVICE_OPTIX:
+ if (device_optix_init())
+ device = device_optix_create(info, stats, profiler, background);
+ else
+ device = NULL;
break;
#endif
#ifdef WITH_NETWORK
@@ -409,6 +423,8 @@ DeviceType Device::type_from_string(const char *name)
return DEVICE_CPU;
else if (strcmp(name, "CUDA") == 0)
return DEVICE_CUDA;
+ else if (strcmp(name, "OPTIX") == 0)
+ return DEVICE_OPTIX;
else if (strcmp(name, "OPENCL") == 0)
return DEVICE_OPENCL;
else if (strcmp(name, "NETWORK") == 0)
@@ -425,6 +441,8 @@ string Device::string_from_type(DeviceType type)
return "CPU";
else if (type == DEVICE_CUDA)
return "CUDA";
+ else if (type == DEVICE_OPTIX)
+ return "OPTIX";
else if (type == DEVICE_OPENCL)
return "OPENCL";
else if (type == DEVICE_NETWORK)
@@ -442,6 +460,9 @@ vector<DeviceType> Device::available_types()
#ifdef WITH_CUDA
types.push_back(DEVICE_CUDA);
#endif
+#ifdef WITH_OPTIX
+ types.push_back(DEVICE_OPTIX);
+#endif
#ifdef WITH_OPENCL
types.push_back(DEVICE_OPENCL);
#endif
@@ -473,15 +494,31 @@ vector<DeviceInfo> Device::available_devices(uint mask)
}
#endif
-#ifdef WITH_CUDA
- if (mask & DEVICE_MASK_CUDA) {
+#if defined(WITH_CUDA) || defined(WITH_OPTIX)
+ if (mask & (DEVICE_MASK_CUDA | DEVICE_MASK_OPTIX)) {
if (!(devices_initialized_mask & DEVICE_MASK_CUDA)) {
if (device_cuda_init()) {
device_cuda_info(cuda_devices);
}
devices_initialized_mask |= DEVICE_MASK_CUDA;
}
- foreach (DeviceInfo &info, cuda_devices) {
+ if (mask & DEVICE_MASK_CUDA) {
+ foreach (DeviceInfo &info, cuda_devices) {
+ devices.push_back(info);
+ }
+ }
+ }
+#endif
+
+#ifdef WITH_OPTIX
+ if (mask & DEVICE_MASK_OPTIX) {
+ if (!(devices_initialized_mask & DEVICE_MASK_OPTIX)) {
+ if (device_optix_init()) {
+ device_optix_info(cuda_devices, optix_devices);
+ }
+ devices_initialized_mask |= DEVICE_MASK_OPTIX;
+ }
+ foreach (DeviceInfo &info, optix_devices) {
devices.push_back(info);
}
}
@@ -555,15 +592,18 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
}
DeviceInfo info;
- info.type = DEVICE_MULTI;
+ info.type = subdevices.front().type;
info.id = "MULTI";
info.description = "Multi Device";
info.num = 0;
info.has_half_images = true;
info.has_volume_decoupled = true;
+ info.has_adaptive_stop_per_sample = true;
info.has_osl = true;
info.has_profiling = true;
+ info.has_peer_memory = false;
+ info.denoisers = DENOISER_ALL;
foreach (const DeviceInfo &device, subdevices) {
/* Ensure CPU device does not slow down GPU. */
@@ -593,11 +633,22 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
info.multi_devices.push_back(device);
}
+ /* Create unique ID for this combination of devices. */
+ info.id += device.id;
+
+ /* Set device type to MULTI if subdevices are not of a common type. */
+ if (device.type != info.type) {
+ info.type = DEVICE_MULTI;
+ }
+
/* Accumulate device info. */
info.has_half_images &= device.has_half_images;
info.has_volume_decoupled &= device.has_volume_decoupled;
+ info.has_adaptive_stop_per_sample &= device.has_adaptive_stop_per_sample;
info.has_osl &= device.has_osl;
info.has_profiling &= device.has_profiling;
+ info.has_peer_memory |= device.has_peer_memory;
+ info.denoisers &= device.denoisers;
}
return info;
@@ -612,9 +663,61 @@ void Device::free_memory()
{
devices_initialized_mask = 0;
cuda_devices.free_memory();
+ optix_devices.free_memory();
opencl_devices.free_memory();
cpu_devices.free_memory();
network_devices.free_memory();
}
+/* DeviceInfo */
+
+void DeviceInfo::add_denoising_devices(DenoiserType denoiser_type)
+{
+ assert(denoising_devices.empty());
+
+ if (denoiser_type == DENOISER_OPTIX && type != DEVICE_OPTIX) {
+ vector<DeviceInfo> optix_devices = Device::available_devices(DEVICE_MASK_OPTIX);
+ if (!optix_devices.empty()) {
+ /* Convert to a special multi device with separate denoising devices. */
+ if (multi_devices.empty()) {
+ multi_devices.push_back(*this);
+ }
+
+ /* Try to use the same physical devices for denoising. */
+ for (const DeviceInfo &cuda_device : multi_devices) {
+ if (cuda_device.type == DEVICE_CUDA) {
+ for (const DeviceInfo &optix_device : optix_devices) {
+ if (cuda_device.num == optix_device.num) {
+ id += optix_device.id;
+ denoising_devices.push_back(optix_device);
+ break;
+ }
+ }
+ }
+ }
+
+ if (denoising_devices.empty()) {
+ /* Simply use the first available OptiX device. */
+ const DeviceInfo optix_device = optix_devices.front();
+ id += optix_device.id; /* Uniquely identify this special multi device. */
+ denoising_devices.push_back(optix_device);
+ }
+
+ denoisers = denoiser_type;
+ }
+ }
+ else if (denoiser_type == DENOISER_OPENIMAGEDENOISE && type != DEVICE_CPU) {
+ /* Convert to a special multi device with separate denoising devices. */
+ if (multi_devices.empty()) {
+ multi_devices.push_back(*this);
+ }
+
+ /* Add CPU denoising devices. */
+ DeviceInfo cpu_device = Device::available_devices(DEVICE_MASK_CPU).front();
+ denoising_devices.push_back(cpu_device);
+
+ denoisers = denoiser_type;
+ }
+}
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index 15a0ceb4a19..115b05e3911 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -27,13 +27,14 @@
#include "util/util_list.h"
#include "util/util_stats.h"
#include "util/util_string.h"
-#include "util/util_thread.h"
#include "util/util_texture.h"
+#include "util/util_thread.h"
#include "util/util_types.h"
#include "util/util_vector.h"
CCL_NAMESPACE_BEGIN
+class BVH;
class Progress;
class RenderTile;
@@ -45,13 +46,15 @@ enum DeviceType {
DEVICE_OPENCL,
DEVICE_CUDA,
DEVICE_NETWORK,
- DEVICE_MULTI
+ DEVICE_MULTI,
+ DEVICE_OPTIX,
};
enum DeviceTypeMask {
DEVICE_MASK_CPU = (1 << DEVICE_CPU),
DEVICE_MASK_OPENCL = (1 << DEVICE_OPENCL),
DEVICE_MASK_CUDA = (1 << DEVICE_CUDA),
+ DEVICE_MASK_OPTIX = (1 << DEVICE_OPTIX),
DEVICE_MASK_NETWORK = (1 << DEVICE_NETWORK),
DEVICE_MASK_ALL = ~0
};
@@ -72,14 +75,18 @@ class DeviceInfo {
string description;
string id; /* used for user preferences, should stay fixed with changing hardware config */
int num;
- bool display_device; /* GPU is used as a display device. */
- bool has_half_images; /* Support half-float textures. */
- bool has_volume_decoupled; /* Decoupled volume shading. */
- bool has_osl; /* Support Open Shading Language. */
- bool use_split_kernel; /* Use split or mega kernel. */
- bool has_profiling; /* Supports runtime collection of profiling info. */
+ bool display_device; /* GPU is used as a display device. */
+ bool has_half_images; /* Support half-float textures. */
+ bool has_volume_decoupled; /* Decoupled volume shading. */
+ bool has_adaptive_stop_per_sample; /* Per-sample adaptive sampling stopping. */
+ bool has_osl; /* Support Open Shading Language. */
+ bool use_split_kernel; /* Use split or mega kernel. */
+ bool has_profiling; /* Supports runtime collection of profiling info. */
+ bool has_peer_memory; /* GPU has P2P access to memory of another GPU. */
+ DenoiserTypeMask denoisers; /* Supported denoiser types. */
int cpu_threads;
vector<DeviceInfo> multi_devices;
+ vector<DeviceInfo> denoising_devices;
DeviceInfo()
{
@@ -90,9 +97,12 @@ class DeviceInfo {
display_device = false;
has_half_images = false;
has_volume_decoupled = false;
+ has_adaptive_stop_per_sample = false;
has_osl = false;
use_split_kernel = false;
has_profiling = false;
+ has_peer_memory = false;
+ denoisers = DENOISER_NONE;
}
bool operator==(const DeviceInfo &info)
@@ -102,6 +112,9 @@ class DeviceInfo {
(type == info.type && num == info.num && description == info.description));
return id == info.id;
}
+
+ /* Add additional devices needed for the specified denoiser. */
+ void add_denoising_devices(DenoiserType denoiser_type);
};
class DeviceRequestedFeatures {
@@ -124,6 +137,7 @@ class DeviceRequestedFeatures {
/* BVH/sampling kernel features. */
bool use_hair;
+ bool use_hair_thick;
bool use_object_motion;
bool use_camera_motion;
@@ -170,6 +184,7 @@ class DeviceRequestedFeatures {
max_nodes_group = 0;
nodes_features = 0;
use_hair = false;
+ use_hair_thick = false;
use_object_motion = false;
use_camera_motion = false;
use_baking = false;
@@ -192,6 +207,7 @@ class DeviceRequestedFeatures {
max_nodes_group == requested_features.max_nodes_group &&
nodes_features == requested_features.nodes_features &&
use_hair == requested_features.use_hair &&
+ use_hair_thick == requested_features.use_hair_thick &&
use_object_motion == requested_features.use_object_motion &&
use_camera_motion == requested_features.use_camera_motion &&
use_baking == requested_features.use_baking &&
@@ -311,7 +327,8 @@ class Device {
virtual void mem_free_sub_ptr(device_ptr /*ptr*/){};
public:
- virtual ~Device();
+ /* noexcept needed to silence TBB warning. */
+ virtual ~Device() noexcept(false);
/* info */
DeviceInfo info;
@@ -380,7 +397,11 @@ class Device {
}
/* tasks */
- virtual int get_split_task_count(DeviceTask &task) = 0;
+ virtual int get_split_task_count(DeviceTask &)
+ {
+ return 1;
+ }
+
virtual void task_add(DeviceTask &task) = 0;
virtual void task_wait() = 0;
virtual void task_cancel() = 0;
@@ -399,6 +420,12 @@ class Device {
bool transparent,
const DeviceDrawParams &draw_params);
+ /* acceleration structure building */
+ virtual bool build_optix_bvh(BVH *)
+ {
+ return false;
+ }
+
#ifdef WITH_NETWORK
/* networking */
void server_run();
@@ -412,11 +439,22 @@ class Device {
{
return 0;
}
- virtual void map_neighbor_tiles(Device * /*sub_device*/, RenderTile * /*tiles*/)
+ virtual void map_neighbor_tiles(Device * /*sub_device*/, RenderTileNeighbors & /*neighbors*/)
+ {
+ }
+ virtual void unmap_neighbor_tiles(Device * /*sub_device*/, RenderTileNeighbors & /*neighbors*/)
{
}
- virtual void unmap_neighbor_tiles(Device * /*sub_device*/, RenderTile * /*tiles*/)
+
+ virtual bool is_resident(device_ptr /*key*/, Device *sub_device)
+ {
+ /* Memory is always resident if this is not a multi device, regardless of whether the pointer
+ * is valid or not (since it may not have been allocated yet). */
+ return sub_device == this;
+ }
+ virtual bool check_peer_access(Device * /*peer_device*/)
{
+ return false;
}
/* static */
@@ -456,6 +494,7 @@ class Device {
static bool need_types_update, need_devices_update;
static thread_mutex device_mutex;
static vector<DeviceInfo> cuda_devices;
+ static vector<DeviceInfo> optix_devices;
static vector<DeviceInfo> opencl_devices;
static vector<DeviceInfo> cpu_devices;
static vector<DeviceInfo> network_devices;
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 837a8186064..ee3a3ddea64 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -29,16 +29,19 @@
#include "device/device_intern.h"
#include "device/device_split_kernel.h"
+// clang-format off
#include "kernel/kernel.h"
#include "kernel/kernel_compat_cpu.h"
#include "kernel/kernel_types.h"
#include "kernel/split/kernel_split_data.h"
#include "kernel/kernel_globals.h"
+#include "kernel/kernel_adaptive_sampling.h"
#include "kernel/filter/filter.h"
#include "kernel/osl/osl_shader.h"
#include "kernel/osl/osl_globals.h"
+// clang-format on
#include "render/buffers.h"
#include "render/coverage.h"
@@ -48,10 +51,12 @@
#include "util/util_function.h"
#include "util/util_logging.h"
#include "util/util_map.h"
+#include "util/util_openimagedenoise.h"
#include "util/util_opengl.h"
#include "util/util_optimization.h"
#include "util/util_progress.h"
#include "util/util_system.h"
+#include "util/util_task.h"
#include "util/util_thread.h"
CCL_NAMESPACE_BEGIN
@@ -114,6 +119,12 @@ template<typename F> class KernelFunctions {
architecture_name = "SSE2";
kernel = kernel_sse2;
}
+#else
+ {
+ /* Dummy to prevent the architecture if below become
+ * conditional when WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
+ * is not defined. */
+ }
#endif
if (strcmp(architecture_name, logged_architecture) != 0) {
@@ -152,7 +163,7 @@ class CPUSplitKernel : public DeviceSplitKernel {
virtual SplitKernelFunction *get_split_kernel_function(const string &kernel_name,
const DeviceRequestedFeatures &);
virtual int2 split_kernel_local_size();
- virtual int2 split_kernel_global_size(device_memory &kg, device_memory &data, DeviceTask *task);
+ virtual int2 split_kernel_global_size(device_memory &kg, device_memory &data, DeviceTask &task);
virtual uint64_t state_buffer_size(device_memory &kg, device_memory &data, size_t num_threads);
};
@@ -167,6 +178,11 @@ class CPUDevice : public Device {
#ifdef WITH_OSL
OSLGlobals osl_globals;
#endif
+#ifdef WITH_OPENIMAGEDENOISE
+ oidn::DeviceRef oidn_device;
+ oidn::FilterRef oidn_filter;
+#endif
+ thread_spin_lock oidn_task_lock;
bool use_split_kernel;
@@ -179,6 +195,7 @@ class CPUDevice : public Device {
convert_to_byte_kernel;
KernelFunctions<void (*)(KernelGlobals *, uint4 *, float4 *, int, int, int, int, int)>
shader_kernel;
+ KernelFunctions<void (*)(KernelGlobals *, float *, int, int, int, int, int)> bake_kernel;
KernelFunctions<void (*)(
int, TileInfo *, int, int, float *, float *, float *, float *, float *, int *, int, int)>
@@ -255,12 +272,13 @@ class CPUDevice : public Device {
CPUDevice(DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool background_)
: Device(info_, stats_, profiler_, background_),
- texture_info(this, "__texture_info", MEM_TEXTURE),
+ texture_info(this, "__texture_info", MEM_GLOBAL),
#define REGISTER_KERNEL(name) name##_kernel(KERNEL_FUNCTIONS(name))
REGISTER_KERNEL(path_trace),
REGISTER_KERNEL(convert_to_half_float),
REGISTER_KERNEL(convert_to_byte),
REGISTER_KERNEL(shader),
+ REGISTER_KERNEL(bake),
REGISTER_KERNEL(filter_divide_shadow),
REGISTER_KERNEL(filter_get_feature),
REGISTER_KERNEL(filter_write_feature),
@@ -311,13 +329,17 @@ class CPUDevice : public Device {
REGISTER_SPLIT_KERNEL(next_iteration_setup);
REGISTER_SPLIT_KERNEL(indirect_subsurface);
REGISTER_SPLIT_KERNEL(buffer_update);
+ REGISTER_SPLIT_KERNEL(adaptive_stopping);
+ REGISTER_SPLIT_KERNEL(adaptive_filter_x);
+ REGISTER_SPLIT_KERNEL(adaptive_filter_y);
+ REGISTER_SPLIT_KERNEL(adaptive_adjust_samples);
#undef REGISTER_SPLIT_KERNEL
#undef KERNEL_FUNCTIONS
}
~CPUDevice()
{
- task_pool.stop();
+ task_pool.cancel();
texture_info.free();
}
@@ -329,12 +351,6 @@ class CPUDevice : public Device {
virtual BVHLayoutMask get_bvh_layout_mask() const
{
BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_BVH2;
- if (DebugFlags().cpu.has_sse2() && system_cpu_support_sse2()) {
- bvh_layout_mask |= BVH_LAYOUT_BVH4;
- }
- if (DebugFlags().cpu.has_avx2() && system_cpu_support_avx2()) {
- bvh_layout_mask |= BVH_LAYOUT_BVH8;
- }
#ifdef WITH_EMBREE
bvh_layout_mask |= BVH_LAYOUT_EMBREE;
#endif /* WITH_EMBREE */
@@ -354,6 +370,9 @@ class CPUDevice : public Device {
if (mem.type == MEM_TEXTURE) {
assert(!"mem_alloc not supported for textures.");
}
+ else if (mem.type == MEM_GLOBAL) {
+ assert(!"mem_alloc not supported for global memory.");
+ }
else {
if (mem.name) {
VLOG(1) << "Buffer allocate: " << mem.name << ", "
@@ -378,9 +397,13 @@ class CPUDevice : public Device {
void mem_copy_to(device_memory &mem)
{
- if (mem.type == MEM_TEXTURE) {
- tex_free(mem);
- tex_alloc(mem);
+ if (mem.type == MEM_GLOBAL) {
+ global_free(mem);
+ global_alloc(mem);
+ }
+ else if (mem.type == MEM_TEXTURE) {
+ tex_free((device_texture &)mem);
+ tex_alloc((device_texture &)mem);
}
else if (mem.type == MEM_PIXELS) {
assert(!"mem_copy_to not supported for pixels.");
@@ -412,8 +435,11 @@ class CPUDevice : public Device {
void mem_free(device_memory &mem)
{
- if (mem.type == MEM_TEXTURE) {
- tex_free(mem);
+ if (mem.type == MEM_GLOBAL) {
+ global_free(mem);
+ }
+ else if (mem.type == MEM_TEXTURE) {
+ tex_free((device_texture &)mem);
}
else if (mem.device_pointer) {
if (mem.type == MEM_DEVICE_ONLY) {
@@ -435,51 +461,50 @@ class CPUDevice : public Device {
kernel_const_copy(&kernel_globals, name, host, size);
}
- void tex_alloc(device_memory &mem)
+ void global_alloc(device_memory &mem)
{
- VLOG(1) << "Texture allocate: " << mem.name << ", "
+ VLOG(1) << "Global memory allocate: " << mem.name << ", "
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
<< string_human_readable_size(mem.memory_size()) << ")";
- if (mem.interpolation == INTERPOLATION_NONE) {
- /* Data texture. */
- kernel_tex_copy(&kernel_globals, mem.name, mem.host_pointer, mem.data_size);
- }
- else {
- /* Image Texture. */
- int flat_slot = 0;
- if (string_startswith(mem.name, "__tex_image")) {
- int pos = string(mem.name).rfind("_");
- flat_slot = atoi(mem.name + pos + 1);
- }
- else {
- assert(0);
- }
+ kernel_global_memory_copy(&kernel_globals, mem.name, mem.host_pointer, mem.data_size);
- if (flat_slot >= texture_info.size()) {
- /* Allocate some slots in advance, to reduce amount
- * of re-allocations. */
- texture_info.resize(flat_slot + 128);
- }
-
- TextureInfo &info = texture_info[flat_slot];
- info.data = (uint64_t)mem.host_pointer;
- info.cl_buffer = 0;
- info.interpolation = mem.interpolation;
- info.extension = mem.extension;
- info.width = mem.data_width;
- info.height = mem.data_height;
- info.depth = mem.data_depth;
+ mem.device_pointer = (device_ptr)mem.host_pointer;
+ mem.device_size = mem.memory_size();
+ stats.mem_alloc(mem.device_size);
+ }
- need_texture_info = true;
+ void global_free(device_memory &mem)
+ {
+ if (mem.device_pointer) {
+ mem.device_pointer = 0;
+ stats.mem_free(mem.device_size);
+ mem.device_size = 0;
}
+ }
+
+ void tex_alloc(device_texture &mem)
+ {
+ VLOG(1) << "Texture allocate: " << mem.name << ", "
+ << string_human_readable_number(mem.memory_size()) << " bytes. ("
+ << string_human_readable_size(mem.memory_size()) << ")";
mem.device_pointer = (device_ptr)mem.host_pointer;
mem.device_size = mem.memory_size();
stats.mem_alloc(mem.device_size);
+
+ const uint slot = mem.slot;
+ if (slot >= texture_info.size()) {
+ /* Allocate some slots in advance, to reduce amount of re-allocations. */
+ texture_info.resize(slot + 128);
+ }
+
+ texture_info[slot] = mem.info;
+ texture_info[slot].data = (uint64_t)mem.host_pointer;
+ need_texture_info = true;
}
- void tex_free(device_memory &mem)
+ void tex_free(device_texture &mem)
{
if (mem.device_pointer) {
mem.device_pointer = 0;
@@ -498,25 +523,18 @@ class CPUDevice : public Device {
#endif
}
- void thread_run(DeviceTask *task)
+ void thread_run(DeviceTask &task)
{
- if (task->type == DeviceTask::RENDER) {
- thread_render(*task);
- }
- else if (task->type == DeviceTask::FILM_CONVERT)
- thread_film_convert(*task);
- else if (task->type == DeviceTask::SHADER)
- thread_shader(*task);
+ if (task.type == DeviceTask::RENDER)
+ thread_render(task);
+ else if (task.type == DeviceTask::SHADER)
+ thread_shader(task);
+ else if (task.type == DeviceTask::FILM_CONVERT)
+ thread_film_convert(task);
+ else if (task.type == DeviceTask::DENOISE_BUFFER)
+ thread_denoise(task);
}
- class CPUDeviceTask : public DeviceTask {
- public:
- CPUDeviceTask(CPUDevice *device, DeviceTask &task) : DeviceTask(task)
- {
- run = function_bind(&CPUDevice::thread_run, device, this);
- }
- };
-
bool denoising_non_local_means(device_ptr image_ptr,
device_ptr guide_ptr,
device_ptr variance_ptr,
@@ -811,7 +829,63 @@ class CPUDevice : public Device {
return true;
}
- void path_trace(DeviceTask &task, RenderTile &tile, KernelGlobals *kg)
+ bool adaptive_sampling_filter(KernelGlobals *kg, RenderTile &tile, int sample)
+ {
+ WorkTile wtile;
+ wtile.x = tile.x;
+ wtile.y = tile.y;
+ wtile.w = tile.w;
+ wtile.h = tile.h;
+ wtile.offset = tile.offset;
+ wtile.stride = tile.stride;
+ wtile.buffer = (float *)tile.buffer;
+
+ /* For CPU we do adaptive stopping per sample so we can stop earlier, but
+ * for combined CPU + GPU rendering we match the GPU and do it per tile
+ * after a given number of sample steps. */
+ if (!kernel_data.integrator.adaptive_stop_per_sample) {
+ for (int y = wtile.y; y < wtile.y + wtile.h; ++y) {
+ for (int x = wtile.x; x < wtile.x + wtile.w; ++x) {
+ const int index = wtile.offset + x + y * wtile.stride;
+ float *buffer = wtile.buffer + index * kernel_data.film.pass_stride;
+ kernel_do_adaptive_stopping(kg, buffer, sample);
+ }
+ }
+ }
+
+ bool any = false;
+ for (int y = wtile.y; y < wtile.y + wtile.h; ++y) {
+ any |= kernel_do_adaptive_filter_x(kg, y, &wtile);
+ }
+ for (int x = wtile.x; x < wtile.x + wtile.w; ++x) {
+ any |= kernel_do_adaptive_filter_y(kg, x, &wtile);
+ }
+ return (!any);
+ }
+
+ void adaptive_sampling_post(const RenderTile &tile, KernelGlobals *kg)
+ {
+ float *render_buffer = (float *)tile.buffer;
+ for (int y = tile.y; y < tile.y + tile.h; y++) {
+ for (int x = tile.x; x < tile.x + tile.w; x++) {
+ int index = tile.offset + x + y * tile.stride;
+ ccl_global float *buffer = render_buffer + index * kernel_data.film.pass_stride;
+ if (buffer[kernel_data.film.pass_sample_count] < 0.0f) {
+ buffer[kernel_data.film.pass_sample_count] = -buffer[kernel_data.film.pass_sample_count];
+ float sample_multiplier = tile.sample / max((float)tile.start_sample + 1.0f,
+ buffer[kernel_data.film.pass_sample_count]);
+ if (sample_multiplier != 1.0f) {
+ kernel_adaptive_post_adjust(kg, buffer, sample_multiplier);
+ }
+ }
+ else {
+ kernel_adaptive_post_adjust(kg, buffer, tile.sample / (tile.sample - 1.0f));
+ }
+ }
+ }
+ }
+
+ void render(DeviceTask &task, RenderTile &tile, KernelGlobals *kg)
{
const bool use_coverage = kernel_data.film.cryptomatte_passes & CRYPT_ACCURATE;
@@ -835,25 +909,262 @@ class CPUDevice : public Device {
break;
}
- for (int y = tile.y; y < tile.y + tile.h; y++) {
- for (int x = tile.x; x < tile.x + tile.w; x++) {
- if (use_coverage) {
- coverage.init_pixel(x, y);
+ if (tile.task == RenderTile::PATH_TRACE) {
+ for (int y = tile.y; y < tile.y + tile.h; y++) {
+ for (int x = tile.x; x < tile.x + tile.w; x++) {
+ if (use_coverage) {
+ coverage.init_pixel(x, y);
+ }
+ path_trace_kernel()(kg, render_buffer, sample, x, y, tile.offset, tile.stride);
+ }
+ }
+ }
+ else {
+ for (int y = tile.y; y < tile.y + tile.h; y++) {
+ for (int x = tile.x; x < tile.x + tile.w; x++) {
+ bake_kernel()(kg, render_buffer, sample, x, y, tile.offset, tile.stride);
}
- path_trace_kernel()(kg, render_buffer, sample, x, y, tile.offset, tile.stride);
}
}
-
tile.sample = sample + 1;
+ if (task.adaptive_sampling.use && task.adaptive_sampling.need_filter(sample)) {
+ const bool stop = adaptive_sampling_filter(kg, tile, sample);
+ if (stop) {
+ const int num_progress_samples = end_sample - sample;
+ tile.sample = end_sample;
+ task.update_progress(&tile, tile.w * tile.h * num_progress_samples);
+ break;
+ }
+ }
+
task.update_progress(&tile, tile.w * tile.h);
}
if (use_coverage) {
coverage.finalize();
}
+
+ if (task.adaptive_sampling.use) {
+ adaptive_sampling_post(tile, kg);
+ }
+ }
+
+ void denoise_openimagedenoise_buffer(DeviceTask &task,
+ float *buffer,
+ const size_t offset,
+ const size_t stride,
+ const size_t x,
+ const size_t y,
+ const size_t w,
+ const size_t h,
+ const float scale)
+ {
+#ifdef WITH_OPENIMAGEDENOISE
+ assert(openimagedenoise_supported());
+
+ /* Only one at a time, since OpenImageDenoise itself is multithreaded for full
+ * buffers, and for tiled rendering because creating multiple devices and filters
+ * is slow and memory hungry as well.
+ *
+ * TODO: optimize tiled rendering case, by batching together denoising of many
+ * tiles somehow? */
+ static thread_mutex mutex;
+ thread_scoped_lock lock(mutex);
+
+ /* Create device and filter, cached for reuse. */
+ if (!oidn_device) {
+ oidn_device = oidn::newDevice();
+ oidn_device.commit();
+ }
+ if (!oidn_filter) {
+ oidn_filter = oidn_device.newFilter("RT");
+ oidn_filter.set("hdr", true);
+ oidn_filter.set("srgb", false);
+ }
+
+ /* Set images with appropriate stride for our interleaved pass storage. */
+ struct {
+ const char *name;
+ const int offset;
+ const bool scale;
+ const bool use;
+ array<float> scaled_buffer;
+ } passes[] = {{"color", task.pass_denoising_data + DENOISING_PASS_COLOR, false, true},
+ {"albedo",
+ task.pass_denoising_data + DENOISING_PASS_ALBEDO,
+ true,
+ task.denoising.input_passes >= DENOISER_INPUT_RGB_ALBEDO},
+ {"normal",
+ task.pass_denoising_data + DENOISING_PASS_NORMAL,
+ true,
+ task.denoising.input_passes >= DENOISER_INPUT_RGB_ALBEDO_NORMAL},
+ {"output", 0, false, true},
+ { NULL,
+ 0 }};
+
+ for (int i = 0; passes[i].name; i++) {
+ if (!passes[i].use) {
+ continue;
+ }
+
+ const int64_t pixel_offset = offset + x + y * stride;
+ const int64_t buffer_offset = (pixel_offset * task.pass_stride + passes[i].offset);
+ const int64_t pixel_stride = task.pass_stride;
+ const int64_t row_stride = stride * pixel_stride;
+
+ if (passes[i].scale && scale != 1.0f) {
+ /* Normalize albedo and normal passes as they are scaled by the number of samples.
+ * For the color passes OIDN will perform auto-exposure making it unnecessary. */
+ array<float> &scaled_buffer = passes[i].scaled_buffer;
+ scaled_buffer.resize(w * h * 3);
+
+ for (int y = 0; y < h; y++) {
+ const float *pass_row = buffer + buffer_offset + y * row_stride;
+ float *scaled_row = scaled_buffer.data() + y * w * 3;
+
+ for (int x = 0; x < w; x++) {
+ scaled_row[x * 3 + 0] = pass_row[x * pixel_stride + 0] * scale;
+ scaled_row[x * 3 + 1] = pass_row[x * pixel_stride + 1] * scale;
+ scaled_row[x * 3 + 2] = pass_row[x * pixel_stride + 2] * scale;
+ }
+ }
+
+ oidn_filter.setImage(
+ passes[i].name, scaled_buffer.data(), oidn::Format::Float3, w, h, 0, 0, 0);
+ }
+ else {
+ oidn_filter.setImage(passes[i].name,
+ buffer + buffer_offset,
+ oidn::Format::Float3,
+ w,
+ h,
+ 0,
+ pixel_stride * sizeof(float),
+ row_stride * sizeof(float));
+ }
+ }
+
+ /* Execute filter. */
+ oidn_filter.commit();
+ oidn_filter.execute();
+#else
+ (void)task;
+ (void)buffer;
+ (void)offset;
+ (void)stride;
+ (void)x;
+ (void)y;
+ (void)w;
+ (void)h;
+ (void)scale;
+#endif
}
- void denoise(DenoisingTask &denoising, RenderTile &tile)
+ void denoise_openimagedenoise(DeviceTask &task, RenderTile &rtile)
+ {
+ if (task.type == DeviceTask::DENOISE_BUFFER) {
+ /* Copy pixels from compute device to CPU (no-op for CPU device). */
+ rtile.buffers->buffer.copy_from_device();
+
+ denoise_openimagedenoise_buffer(task,
+ (float *)rtile.buffer,
+ rtile.offset,
+ rtile.stride,
+ rtile.x,
+ rtile.y,
+ rtile.w,
+ rtile.h,
+ 1.0f / rtile.sample);
+
+ /* todo: it may be possible to avoid this copy, but we have to ensure that
+ * when other code copies data from the device it doesn't overwrite the
+ * denoiser buffers. */
+ rtile.buffers->buffer.copy_to_device();
+ }
+ else {
+ /* Per-tile denoising. */
+ rtile.sample = rtile.start_sample + rtile.num_samples;
+ const float scale = 1.0f / rtile.sample;
+ const float invscale = rtile.sample;
+ const size_t pass_stride = task.pass_stride;
+
+ /* Map neighboring tiles into one buffer for denoising. */
+ RenderTileNeighbors neighbors(rtile);
+ task.map_neighbor_tiles(neighbors, this);
+ RenderTile &center_tile = neighbors.tiles[RenderTileNeighbors::CENTER];
+ rtile = center_tile;
+
+ /* Calculate size of the tile to denoise (including overlap). The overlap
+ * size was chosen empirically. OpenImageDenoise specifies an overlap size
+ * of 128 but this is significantly bigger than typical tile size. */
+ const int4 rect = rect_clip(rect_expand(center_tile.bounds(), 64), neighbors.bounds());
+ const int2 rect_size = make_int2(rect.z - rect.x, rect.w - rect.y);
+
+ /* Adjacent tiles are in separate memory regions, copy into single buffer. */
+ array<float> merged(rect_size.x * rect_size.y * task.pass_stride);
+
+ for (int i = 0; i < RenderTileNeighbors::SIZE; i++) {
+ RenderTile &ntile = neighbors.tiles[i];
+ if (!ntile.buffer) {
+ continue;
+ }
+
+ const int xmin = max(ntile.x, rect.x);
+ const int ymin = max(ntile.y, rect.y);
+ const int xmax = min(ntile.x + ntile.w, rect.z);
+ const int ymax = min(ntile.y + ntile.h, rect.w);
+
+ const size_t tile_offset = ntile.offset + xmin + ymin * ntile.stride;
+ const float *tile_buffer = (float *)ntile.buffer + tile_offset * pass_stride;
+
+ const size_t merged_stride = rect_size.x;
+ const size_t merged_offset = (xmin - rect.x) + (ymin - rect.y) * merged_stride;
+ float *merged_buffer = merged.data() + merged_offset * pass_stride;
+
+ for (int y = ymin; y < ymax; y++) {
+ for (int x = 0; x < pass_stride * (xmax - xmin); x++) {
+ merged_buffer[x] = tile_buffer[x] * scale;
+ }
+ tile_buffer += ntile.stride * pass_stride;
+ merged_buffer += merged_stride * pass_stride;
+ }
+ }
+
+ /* Denoise */
+ denoise_openimagedenoise_buffer(
+ task, merged.data(), 0, rect_size.x, 0, 0, rect_size.x, rect_size.y, 1.0f);
+
+ /* Copy back result from merged buffer. */
+ RenderTile &ntile = neighbors.target;
+ if (ntile.buffer) {
+ const int xmin = max(ntile.x, rect.x);
+ const int ymin = max(ntile.y, rect.y);
+ const int xmax = min(ntile.x + ntile.w, rect.z);
+ const int ymax = min(ntile.y + ntile.h, rect.w);
+
+ const size_t tile_offset = ntile.offset + xmin + ymin * ntile.stride;
+ float *tile_buffer = (float *)ntile.buffer + tile_offset * pass_stride;
+
+ const size_t merged_stride = rect_size.x;
+ const size_t merged_offset = (xmin - rect.x) + (ymin - rect.y) * merged_stride;
+ const float *merged_buffer = merged.data() + merged_offset * pass_stride;
+
+ for (int y = ymin; y < ymax; y++) {
+ for (int x = 0; x < pass_stride * (xmax - xmin); x += pass_stride) {
+ tile_buffer[x + 0] = merged_buffer[x + 0] * invscale;
+ tile_buffer[x + 1] = merged_buffer[x + 1] * invscale;
+ tile_buffer[x + 2] = merged_buffer[x + 2] * invscale;
+ }
+ tile_buffer += ntile.stride * pass_stride;
+ merged_buffer += merged_stride * pass_stride;
+ }
+ }
+
+ task.unmap_neighbor_tiles(neighbors, this);
+ }
+ }
+
+ void denoise_nlm(DenoisingTask &denoising, RenderTile &tile)
{
ProfilingHelper profiling(denoising.profiler, PROFILING_DENOISING);
@@ -881,7 +1192,7 @@ class CPUDevice : public Device {
denoising.render_buffer.samples = tile.sample;
denoising.buffer.gpu_temporary_mem = false;
- denoising.run_denoising(&tile);
+ denoising.run_denoising(tile);
}
void thread_render(DeviceTask &task)
@@ -911,22 +1222,46 @@ class CPUDevice : public Device {
}
}
- RenderTile tile;
- DenoisingTask denoising(this, task);
- denoising.profiler = &kg->profiler;
+ /* NLM denoiser. */
+ DenoisingTask *denoising = NULL;
+
+ /* OpenImageDenoise: we can only denoise with one thread at a time, so to
+ * avoid waiting with mutex locks in the denoiser, we let only a single
+ * thread acquire denoising tiles. */
+ uint tile_types = task.tile_types;
+ bool hold_denoise_lock = false;
+ if ((tile_types & RenderTile::DENOISE) && task.denoising.type == DENOISER_OPENIMAGEDENOISE) {
+ if (!oidn_task_lock.try_lock()) {
+ tile_types &= ~RenderTile::DENOISE;
+ hold_denoise_lock = true;
+ }
+ }
- while (task.acquire_tile(this, tile)) {
+ RenderTile tile;
+ while (task.acquire_tile(this, tile, tile_types)) {
if (tile.task == RenderTile::PATH_TRACE) {
if (use_split_kernel) {
device_only_memory<uchar> void_buffer(this, "void_buffer");
- split_kernel->path_trace(&task, tile, kgbuffer, void_buffer);
+ split_kernel->path_trace(task, tile, kgbuffer, void_buffer);
}
else {
- path_trace(task, tile, kg);
+ render(task, tile, kg);
}
}
+ else if (tile.task == RenderTile::BAKE) {
+ render(task, tile, kg);
+ }
else if (tile.task == RenderTile::DENOISE) {
- denoise(denoising, tile);
+ if (task.denoising.type == DENOISER_OPENIMAGEDENOISE) {
+ denoise_openimagedenoise(task, tile);
+ }
+ else if (task.denoising.type == DENOISER_NLM) {
+ if (denoising == NULL) {
+ denoising = new DenoisingTask(this, task);
+ denoising->profiler = &kg->profiler;
+ }
+ denoise_nlm(*denoising, tile);
+ }
task.update_progress(&tile, tile.w * tile.h);
}
@@ -938,12 +1273,50 @@ class CPUDevice : public Device {
}
}
+ if (hold_denoise_lock) {
+ oidn_task_lock.unlock();
+ }
+
profiler.remove_state(&kg->profiler);
thread_kernel_globals_free((KernelGlobals *)kgbuffer.device_pointer);
kg->~KernelGlobals();
kgbuffer.free();
delete split_kernel;
+ delete denoising;
+ }
+
+ void thread_denoise(DeviceTask &task)
+ {
+ RenderTile tile;
+ tile.x = task.x;
+ tile.y = task.y;
+ tile.w = task.w;
+ tile.h = task.h;
+ tile.buffer = task.buffer;
+ tile.sample = task.sample + task.num_samples;
+ tile.num_samples = task.num_samples;
+ tile.start_sample = task.sample;
+ tile.offset = task.offset;
+ tile.stride = task.stride;
+ tile.buffers = task.buffers;
+
+ if (task.denoising.type == DENOISER_OPENIMAGEDENOISE) {
+ denoise_openimagedenoise(task, tile);
+ }
+ else {
+ DenoisingTask denoising(this, task);
+
+ ProfilingState denoising_profiler_state;
+ profiler.add_state(&denoising_profiler_state);
+ denoising.profiler = &denoising_profiler_state;
+
+ denoise_nlm(denoising, tile);
+
+ profiler.remove_state(&denoising_profiler_state);
+ }
+
+ task.update_progress(&tile, tile.w * tile.h);
}
void thread_film_convert(DeviceTask &task)
@@ -978,14 +1351,11 @@ class CPUDevice : public Device {
void thread_shader(DeviceTask &task)
{
- KernelGlobals kg = kernel_globals;
+ KernelGlobals *kg = new KernelGlobals(thread_kernel_globals_init());
-#ifdef WITH_OSL
- OSLShader::thread_init(&kg, &kernel_globals, &osl_globals);
-#endif
for (int sample = 0; sample < task.num_samples; sample++) {
for (int x = task.shader_x; x < task.shader_x + task.shader_w; x++)
- shader_kernel()(&kg,
+ shader_kernel()(kg,
(uint4 *)task.shader_input,
(float4 *)task.shader_output,
task.shader_eval_type,
@@ -1000,9 +1370,8 @@ class CPUDevice : public Device {
task.update_progress(NULL);
}
-#ifdef WITH_OSL
- OSLShader::thread_free(&kg);
-#endif
+ thread_kernel_globals_free(kg);
+ delete kg;
}
int get_split_task_count(DeviceTask &task)
@@ -1021,13 +1390,24 @@ class CPUDevice : public Device {
/* split task into smaller ones */
list<DeviceTask> tasks;
- if (task.type == DeviceTask::SHADER)
+ if (task.type == DeviceTask::DENOISE_BUFFER &&
+ task.denoising.type == DENOISER_OPENIMAGEDENOISE) {
+ /* Denoise entire buffer at once with OIDN, it has own threading. */
+ tasks.push_back(task);
+ }
+ else if (task.type == DeviceTask::SHADER) {
task.split(tasks, info.cpu_threads, 256);
- else
+ }
+ else {
task.split(tasks, info.cpu_threads);
+ }
- foreach (DeviceTask &task, tasks)
- task_pool.push(new CPUDeviceTask(this, task));
+ foreach (DeviceTask &task, tasks) {
+ task_pool.push([=] {
+ DeviceTask task_copy = task;
+ thread_run(task_copy);
+ });
+ }
}
void task_wait()
@@ -1192,7 +1572,7 @@ int2 CPUSplitKernel::split_kernel_local_size()
int2 CPUSplitKernel::split_kernel_global_size(device_memory & /*kg*/,
device_memory & /*data*/,
- DeviceTask * /*task*/)
+ DeviceTask & /*task*/)
{
return make_int2(1, 1);
}
@@ -1220,9 +1600,14 @@ void device_cpu_info(vector<DeviceInfo> &devices)
info.id = "CPU";
info.num = 0;
info.has_volume_decoupled = true;
+ info.has_adaptive_stop_per_sample = true;
info.has_osl = true;
info.has_half_images = true;
info.has_profiling = true;
+ info.denoisers = DENOISER_NLM;
+ if (openimagedenoise_supported()) {
+ info.denoisers |= DENOISER_OPENIMAGEDENOISE;
+ }
devices.insert(devices.begin(), info);
}
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 68bc3bd4045..d9ffcceb06e 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -14,2530 +14,21 @@
* limitations under the License.
*/
-#include <climits>
-#include <limits.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
+#ifdef WITH_CUDA
-#include "device/device.h"
-#include "device/device_denoising.h"
-#include "device/device_intern.h"
-#include "device/device_split_kernel.h"
+# include "device/cuda/device_cuda.h"
+# include "device/device.h"
+# include "device/device_intern.h"
-#include "render/buffers.h"
-
-#include "kernel/filter/filter_defines.h"
-
-#ifdef WITH_CUDA_DYNLOAD
-# include "cuew.h"
-#else
-# include "util/util_opengl.h"
-# include <cuda.h>
-# include <cudaGL.h>
-#endif
-#include "util/util_debug.h"
-#include "util/util_foreach.h"
-#include "util/util_logging.h"
-#include "util/util_map.h"
-#include "util/util_md5.h"
-#include "util/util_opengl.h"
-#include "util/util_path.h"
-#include "util/util_string.h"
-#include "util/util_system.h"
-#include "util/util_types.h"
-#include "util/util_time.h"
-
-#include "kernel/split/kernel_split_data_types.h"
+# include "util/util_logging.h"
+# include "util/util_string.h"
+# include "util/util_windows.h"
CCL_NAMESPACE_BEGIN
-#ifndef WITH_CUDA_DYNLOAD
-
-/* Transparently implement some functions, so majority of the file does not need
- * to worry about difference between dynamically loaded and linked CUDA at all.
- */
-
-namespace {
-
-const char *cuewErrorString(CUresult result)
-{
- /* We can only give error code here without major code duplication, that
- * should be enough since dynamic loading is only being disabled by folks
- * who knows what they're doing anyway.
- *
- * NOTE: Avoid call from several threads.
- */
- static string error;
- error = string_printf("%d", result);
- return error.c_str();
-}
-
-const char *cuewCompilerPath()
-{
- return CYCLES_CUDA_NVCC_EXECUTABLE;
-}
-
-int cuewCompilerVersion()
-{
- return (CUDA_VERSION / 100) + (CUDA_VERSION % 100 / 10);
-}
-
-} /* namespace */
-#endif /* WITH_CUDA_DYNLOAD */
-
-class CUDADevice;
-
-class CUDASplitKernel : public DeviceSplitKernel {
- CUDADevice *device;
-
- public:
- explicit CUDASplitKernel(CUDADevice *device);
-
- virtual uint64_t state_buffer_size(device_memory &kg, device_memory &data, size_t num_threads);
-
- virtual bool enqueue_split_kernel_data_init(const KernelDimensions &dim,
- RenderTile &rtile,
- int num_global_elements,
- device_memory &kernel_globals,
- device_memory &kernel_data_,
- device_memory &split_data,
- device_memory &ray_state,
- device_memory &queue_index,
- device_memory &use_queues_flag,
- device_memory &work_pool_wgs);
-
- virtual SplitKernelFunction *get_split_kernel_function(const string &kernel_name,
- const DeviceRequestedFeatures &);
- virtual int2 split_kernel_local_size();
- virtual int2 split_kernel_global_size(device_memory &kg, device_memory &data, DeviceTask *task);
-};
-
-/* Utility to push/pop CUDA context. */
-class CUDAContextScope {
- public:
- CUDAContextScope(CUDADevice *device);
- ~CUDAContextScope();
-
- private:
- CUDADevice *device;
-};
-
-class CUDADevice : public Device {
- public:
- DedicatedTaskPool task_pool;
- CUdevice cuDevice;
- CUcontext cuContext;
- CUmodule cuModule, cuFilterModule;
- size_t device_texture_headroom;
- size_t device_working_headroom;
- bool move_texture_to_host;
- size_t map_host_used;
- size_t map_host_limit;
- int can_map_host;
- int cuDevId;
- int cuDevArchitecture;
- bool first_error;
- CUDASplitKernel *split_kernel;
-
- struct CUDAMem {
- CUDAMem() : texobject(0), array(0), map_host_pointer(0), free_map_host(false)
- {
- }
-
- CUtexObject texobject;
- CUarray array;
- void *map_host_pointer;
- bool free_map_host;
- };
- typedef map<device_memory *, CUDAMem> CUDAMemMap;
- CUDAMemMap cuda_mem_map;
-
- struct PixelMem {
- GLuint cuPBO;
- CUgraphicsResource cuPBOresource;
- GLuint cuTexId;
- int w, h;
- };
- map<device_ptr, PixelMem> pixel_mem_map;
-
- /* Bindless Textures */
- device_vector<TextureInfo> texture_info;
- bool need_texture_info;
-
- CUdeviceptr cuda_device_ptr(device_ptr mem)
- {
- return (CUdeviceptr)mem;
- }
-
- static bool have_precompiled_kernels()
- {
- string cubins_path = path_get("lib");
- return path_exists(cubins_path);
- }
-
- virtual bool show_samples() const
- {
- /* The CUDADevice only processes one tile at a time, so showing samples is fine. */
- return true;
- }
-
- virtual BVHLayoutMask get_bvh_layout_mask() const
- {
- return BVH_LAYOUT_BVH2;
- }
-
- /*#ifdef NDEBUG
-#define cuda_abort()
-#else
-#define cuda_abort() abort()
-#endif*/
- void cuda_error_documentation()
- {
- if (first_error) {
- fprintf(stderr,
- "\nRefer to the Cycles GPU rendering documentation for possible solutions:\n");
- fprintf(stderr,
- "https://docs.blender.org/manual/en/dev/render/cycles/gpu_rendering.html\n\n");
- first_error = false;
- }
- }
-
-#define cuda_assert(stmt) \
- { \
- CUresult result = stmt; \
-\
- if (result != CUDA_SUCCESS) { \
- string message = string_printf( \
- "CUDA error: %s in %s, line %d", cuewErrorString(result), #stmt, __LINE__); \
- if (error_msg == "") \
- error_msg = message; \
- fprintf(stderr, "%s\n", message.c_str()); \
- /*cuda_abort();*/ \
- cuda_error_documentation(); \
- } \
- } \
- (void)0
-
- bool cuda_error_(CUresult result, const string &stmt)
- {
- if (result == CUDA_SUCCESS)
- return false;
-
- string message = string_printf("CUDA error at %s: %s", stmt.c_str(), cuewErrorString(result));
- if (error_msg == "")
- error_msg = message;
- fprintf(stderr, "%s\n", message.c_str());
- cuda_error_documentation();
- return true;
- }
-
-#define cuda_error(stmt) cuda_error_(stmt, #stmt)
-
- void cuda_error_message(const string &message)
- {
- if (error_msg == "")
- error_msg = message;
- fprintf(stderr, "%s\n", message.c_str());
- cuda_error_documentation();
- }
-
- CUDADevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background_)
- : Device(info, stats, profiler, background_),
- texture_info(this, "__texture_info", MEM_TEXTURE)
- {
- first_error = true;
- background = background_;
-
- cuDevId = info.num;
- cuDevice = 0;
- cuContext = 0;
-
- cuModule = 0;
- cuFilterModule = 0;
-
- split_kernel = NULL;
-
- need_texture_info = false;
-
- device_texture_headroom = 0;
- device_working_headroom = 0;
- move_texture_to_host = false;
- map_host_limit = 0;
- map_host_used = 0;
- can_map_host = 0;
-
- /* Intialize CUDA. */
- if (cuda_error(cuInit(0)))
- return;
-
- /* Setup device and context. */
- if (cuda_error(cuDeviceGet(&cuDevice, cuDevId)))
- return;
-
- /* CU_CTX_MAP_HOST for mapping host memory when out of device memory.
- * CU_CTX_LMEM_RESIZE_TO_MAX for reserving local memory ahead of render,
- * so we can predict which memory to map to host. */
- cuda_assert(
- cuDeviceGetAttribute(&can_map_host, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, cuDevice));
-
- unsigned int ctx_flags = CU_CTX_LMEM_RESIZE_TO_MAX;
- if (can_map_host) {
- ctx_flags |= CU_CTX_MAP_HOST;
- init_host_memory();
- }
-
- /* Create context. */
- CUresult result;
-
- if (background) {
- result = cuCtxCreate(&cuContext, ctx_flags, cuDevice);
- }
- else {
- result = cuGLCtxCreate(&cuContext, ctx_flags, cuDevice);
-
- if (result != CUDA_SUCCESS) {
- result = cuCtxCreate(&cuContext, ctx_flags, cuDevice);
- background = true;
- }
- }
-
- if (cuda_error_(result, "cuCtxCreate"))
- return;
-
- int major, minor;
- cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId);
- cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId);
- cuDevArchitecture = major * 100 + minor * 10;
-
- /* Pop context set by cuCtxCreate. */
- cuCtxPopCurrent(NULL);
- }
-
- ~CUDADevice()
- {
- task_pool.stop();
-
- delete split_kernel;
-
- texture_info.free();
-
- cuda_assert(cuCtxDestroy(cuContext));
- }
-
- bool support_device(const DeviceRequestedFeatures & /*requested_features*/)
- {
- int major, minor;
- cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId);
- cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId);
-
- /* We only support sm_30 and above */
- if (major < 3) {
- cuda_error_message(string_printf(
- "CUDA device supported only with compute capability 3.0 or up, found %d.%d.",
- major,
- minor));
- return false;
- }
-
- return true;
- }
-
- bool use_adaptive_compilation()
- {
- return DebugFlags().cuda.adaptive_compile;
- }
-
- bool use_split_kernel()
- {
- return DebugFlags().cuda.split_kernel;
- }
-
- /* Common NVCC flags which stays the same regardless of shading model,
- * kernel sources md5 and only depends on compiler or compilation settings.
- */
- string compile_kernel_get_common_cflags(const DeviceRequestedFeatures &requested_features,
- bool filter = false,
- bool split = false)
- {
- const int machine = system_cpu_bits();
- const string source_path = path_get("source");
- const string include_path = source_path;
- string cflags = string_printf(
- "-m%d "
- "--ptxas-options=\"-v\" "
- "--use_fast_math "
- "-DNVCC "
- "-I\"%s\"",
- machine,
- include_path.c_str());
- if (!filter && use_adaptive_compilation()) {
- cflags += " " + requested_features.get_build_options();
- }
- const char *extra_cflags = getenv("CYCLES_CUDA_EXTRA_CFLAGS");
- if (extra_cflags) {
- cflags += string(" ") + string(extra_cflags);
- }
-#ifdef WITH_CYCLES_DEBUG
- cflags += " -D__KERNEL_DEBUG__";
-#endif
-
- if (split) {
- cflags += " -D__SPLIT__";
- }
-
- return cflags;
- }
-
- bool compile_check_compiler()
- {
- const char *nvcc = cuewCompilerPath();
- if (nvcc == NULL) {
- cuda_error_message(
- "CUDA nvcc compiler not found. "
- "Install CUDA toolkit in default location.");
- return false;
- }
- const int cuda_version = cuewCompilerVersion();
- VLOG(1) << "Found nvcc " << nvcc << ", CUDA version " << cuda_version << ".";
- const int major = cuda_version / 10, minor = cuda_version % 10;
- if (cuda_version == 0) {
- cuda_error_message("CUDA nvcc compiler version could not be parsed.");
- return false;
- }
- if (cuda_version < 80) {
- printf(
- "Unsupported CUDA version %d.%d detected, "
- "you need CUDA 8.0 or newer.\n",
- major,
- minor);
- return false;
- }
- else if (cuda_version != 101) {
- printf(
- "CUDA version %d.%d detected, build may succeed but only "
- "CUDA 10.1 is officially supported.\n",
- major,
- minor);
- }
- return true;
- }
-
- string compile_kernel(const DeviceRequestedFeatures &requested_features,
- bool filter = false,
- bool split = false)
- {
- const char *name, *source;
- if (filter) {
- name = "filter";
- source = "filter.cu";
- }
- else if (split) {
- name = "kernel_split";
- source = "kernel_split.cu";
- }
- else {
- name = "kernel";
- source = "kernel.cu";
- }
- /* Compute cubin name. */
- int major, minor;
- cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId);
- cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId);
-
- /* Attempt to use kernel provided with Blender. */
- if (!use_adaptive_compilation()) {
- const string cubin = path_get(string_printf("lib/%s_sm_%d%d.cubin", name, major, minor));
- VLOG(1) << "Testing for pre-compiled kernel " << cubin << ".";
- if (path_exists(cubin)) {
- VLOG(1) << "Using precompiled kernel.";
- return cubin;
- }
- }
-
- const string common_cflags = compile_kernel_get_common_cflags(
- requested_features, filter, split);
-
- /* Try to use locally compiled kernel. */
- const string source_path = path_get("source");
- const string kernel_md5 = path_files_md5_hash(source_path);
-
- /* We include cflags into md5 so changing cuda toolkit or changing other
- * compiler command line arguments makes sure cubin gets re-built.
- */
- const string cubin_md5 = util_md5_string(kernel_md5 + common_cflags);
-
- const string cubin_file = string_printf(
- "cycles_%s_sm%d%d_%s.cubin", name, major, minor, cubin_md5.c_str());
- const string cubin = path_cache_get(path_join("kernels", cubin_file));
- VLOG(1) << "Testing for locally compiled kernel " << cubin << ".";
- if (path_exists(cubin)) {
- VLOG(1) << "Using locally compiled kernel.";
- return cubin;
- }
-
-#ifdef _WIN32
- if (have_precompiled_kernels()) {
- if (major < 3) {
- cuda_error_message(
- string_printf("CUDA device requires compute capability 3.0 or up, "
- "found %d.%d. Your GPU is not supported.",
- major,
- minor));
- }
- else {
- cuda_error_message(
- string_printf("CUDA binary kernel for this graphics card compute "
- "capability (%d.%d) not found.",
- major,
- minor));
- }
- return "";
- }
-#endif
-
- /* Compile. */
- if (!compile_check_compiler()) {
- return "";
- }
- const char *nvcc = cuewCompilerPath();
- const string kernel = path_join(path_join(source_path, "kernel"),
- path_join("kernels", path_join("cuda", source)));
- double starttime = time_dt();
- printf("Compiling CUDA kernel ...\n");
-
- path_create_directories(cubin);
-
- string command = string_printf(
- "\"%s\" "
- "-arch=sm_%d%d "
- "--cubin \"%s\" "
- "-o \"%s\" "
- "%s ",
- nvcc,
- major,
- minor,
- kernel.c_str(),
- cubin.c_str(),
- common_cflags.c_str());
-
- printf("%s\n", command.c_str());
-
- if (system(command.c_str()) == -1) {
- cuda_error_message(
- "Failed to execute compilation command, "
- "see console for details.");
- return "";
- }
-
- /* Verify if compilation succeeded */
- if (!path_exists(cubin)) {
- cuda_error_message(
- "CUDA kernel compilation failed, "
- "see console for details.");
- return "";
- }
-
- printf("Kernel compilation finished in %.2lfs.\n", time_dt() - starttime);
-
- return cubin;
- }
-
- bool load_kernels(const DeviceRequestedFeatures &requested_features)
- {
- /* TODO(sergey): Support kernels re-load for CUDA devices.
- *
- * Currently re-loading kernel will invalidate memory pointers,
- * causing problems in cuCtxSynchronize.
- */
- if (cuFilterModule && cuModule) {
- VLOG(1) << "Skipping kernel reload, not currently supported.";
- return true;
- }
-
- /* check if cuda init succeeded */
- if (cuContext == 0)
- return false;
-
- /* check if GPU is supported */
- if (!support_device(requested_features))
- return false;
-
- /* get kernel */
- string cubin = compile_kernel(requested_features, false, use_split_kernel());
- if (cubin == "")
- return false;
-
- string filter_cubin = compile_kernel(requested_features, true, false);
- if (filter_cubin == "")
- return false;
-
- /* open module */
- CUDAContextScope scope(this);
-
- string cubin_data;
- CUresult result;
-
- if (path_read_text(cubin, cubin_data))
- result = cuModuleLoadData(&cuModule, cubin_data.c_str());
- else
- result = CUDA_ERROR_FILE_NOT_FOUND;
-
- if (cuda_error_(result, "cuModuleLoad"))
- cuda_error_message(string_printf("Failed loading CUDA kernel %s.", cubin.c_str()));
-
- if (path_read_text(filter_cubin, cubin_data))
- result = cuModuleLoadData(&cuFilterModule, cubin_data.c_str());
- else
- result = CUDA_ERROR_FILE_NOT_FOUND;
-
- if (cuda_error_(result, "cuModuleLoad"))
- cuda_error_message(string_printf("Failed loading CUDA kernel %s.", filter_cubin.c_str()));
-
- if (result == CUDA_SUCCESS) {
- reserve_local_memory(requested_features);
- }
-
- return (result == CUDA_SUCCESS);
- }
-
- void reserve_local_memory(const DeviceRequestedFeatures &requested_features)
- {
- if (use_split_kernel()) {
- /* Split kernel mostly uses global memory and adaptive compilation,
- * difficult to predict how much is needed currently. */
- return;
- }
-
- /* Together with CU_CTX_LMEM_RESIZE_TO_MAX, this reserves local memory
- * needed for kernel launches, so that we can reliably figure out when
- * to allocate scene data in mapped host memory. */
- CUDAContextScope scope(this);
-
- size_t total = 0, free_before = 0, free_after = 0;
- cuMemGetInfo(&free_before, &total);
-
- /* Get kernel function. */
- CUfunction cuPathTrace;
-
- if (requested_features.use_integrator_branched) {
- cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_branched_path_trace"));
- }
- else {
- cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace"));
- }
-
- cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1));
-
- int min_blocks, num_threads_per_block;
- cuda_assert(cuOccupancyMaxPotentialBlockSize(
- &min_blocks, &num_threads_per_block, cuPathTrace, NULL, 0, 0));
-
- /* Launch kernel, using just 1 block appears sufficient to reserve
- * memory for all multiprocessors. It would be good to do this in
- * parallel for the multi GPU case still to make it faster. */
- CUdeviceptr d_work_tiles = 0;
- uint total_work_size = 0;
-
- void *args[] = {&d_work_tiles, &total_work_size};
-
- cuda_assert(cuLaunchKernel(cuPathTrace, 1, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0));
-
- cuda_assert(cuCtxSynchronize());
-
- cuMemGetInfo(&free_after, &total);
- VLOG(1) << "Local memory reserved " << string_human_readable_number(free_before - free_after)
- << " bytes. (" << string_human_readable_size(free_before - free_after) << ")";
-
-#if 0
- /* For testing mapped host memory, fill up device memory. */
- const size_t keep_mb = 1024;
-
- while(free_after > keep_mb * 1024 * 1024LL) {
- CUdeviceptr tmp;
- cuda_assert(cuMemAlloc(&tmp, 10 * 1024 * 1024LL));
- cuMemGetInfo(&free_after, &total);
- }
-#endif
- }
-
- void init_host_memory()
- {
- /* Limit amount of host mapped memory, because allocating too much can
- * cause system instability. Leave at least half or 4 GB of system
- * memory free, whichever is smaller. */
- size_t default_limit = 4 * 1024 * 1024 * 1024LL;
- size_t system_ram = system_physical_ram();
-
- if (system_ram > 0) {
- if (system_ram / 2 > default_limit) {
- map_host_limit = system_ram - default_limit;
- }
- else {
- map_host_limit = system_ram / 2;
- }
- }
- else {
- VLOG(1) << "Mapped host memory disabled, failed to get system RAM";
- map_host_limit = 0;
- }
-
- /* Amount of device memory to keep is free after texture memory
- * and working memory allocations respectively. We set the working
- * memory limit headroom lower so that some space is left after all
- * texture memory allocations. */
- device_working_headroom = 32 * 1024 * 1024LL; // 32MB
- device_texture_headroom = 128 * 1024 * 1024LL; // 128MB
-
- VLOG(1) << "Mapped host memory limit set to " << string_human_readable_number(map_host_limit)
- << " bytes. (" << string_human_readable_size(map_host_limit) << ")";
- }
-
- void load_texture_info()
- {
- if (need_texture_info) {
- texture_info.copy_to_device();
- need_texture_info = false;
- }
- }
-
- void move_textures_to_host(size_t size, bool for_texture)
- {
- /* Signal to reallocate textures in host memory only. */
- move_texture_to_host = true;
-
- while (size > 0) {
- /* Find suitable memory allocation to move. */
- device_memory *max_mem = NULL;
- size_t max_size = 0;
- bool max_is_image = false;
-
- foreach (CUDAMemMap::value_type &pair, cuda_mem_map) {
- device_memory &mem = *pair.first;
- CUDAMem *cmem = &pair.second;
-
- bool is_texture = (mem.type == MEM_TEXTURE) && (&mem != &texture_info);
- bool is_image = is_texture && (mem.data_height > 1);
-
- /* Can't move this type of memory. */
- if (!is_texture || cmem->array) {
- continue;
- }
-
- /* Already in host memory. */
- if (cmem->map_host_pointer) {
- continue;
- }
-
- /* For other textures, only move image textures. */
- if (for_texture && !is_image) {
- continue;
- }
-
- /* Try to move largest allocation, prefer moving images. */
- if (is_image > max_is_image || (is_image == max_is_image && mem.device_size > max_size)) {
- max_is_image = is_image;
- max_size = mem.device_size;
- max_mem = &mem;
- }
- }
-
- /* Move to host memory. This part is mutex protected since
- * multiple CUDA devices could be moving the memory. The
- * first one will do it, and the rest will adopt the pointer. */
- if (max_mem) {
- VLOG(1) << "Move memory from device to host: " << max_mem->name;
-
- static thread_mutex move_mutex;
- thread_scoped_lock lock(move_mutex);
-
- /* Preserve the original device pointer, in case of multi device
- * we can't change it because the pointer mapping would break. */
- device_ptr prev_pointer = max_mem->device_pointer;
- size_t prev_size = max_mem->device_size;
-
- tex_free(*max_mem);
- tex_alloc(*max_mem);
- size = (max_size >= size) ? 0 : size - max_size;
-
- max_mem->device_pointer = prev_pointer;
- max_mem->device_size = prev_size;
- }
- else {
- break;
- }
- }
-
- /* Update texture info array with new pointers. */
- load_texture_info();
-
- move_texture_to_host = false;
- }
-
- CUDAMem *generic_alloc(device_memory &mem, size_t pitch_padding = 0)
- {
- CUDAContextScope scope(this);
-
- CUdeviceptr device_pointer = 0;
- size_t size = mem.memory_size() + pitch_padding;
-
- CUresult mem_alloc_result = CUDA_ERROR_OUT_OF_MEMORY;
- const char *status = "";
-
- /* First try allocating in device memory, respecting headroom. We make
- * an exception for texture info. It is small and frequently accessed,
- * so treat it as working memory.
- *
- * If there is not enough room for working memory, we will try to move
- * textures to host memory, assuming the performance impact would have
- * been worse for working memory. */
- bool is_texture = (mem.type == MEM_TEXTURE) && (&mem != &texture_info);
- bool is_image = is_texture && (mem.data_height > 1);
-
- size_t headroom = (is_texture) ? device_texture_headroom : device_working_headroom;
-
- size_t total = 0, free = 0;
- cuMemGetInfo(&free, &total);
-
- /* Move textures to host memory if needed. */
- if (!move_texture_to_host && !is_image && (size + headroom) >= free) {
- move_textures_to_host(size + headroom - free, is_texture);
- cuMemGetInfo(&free, &total);
- }
-
- /* Allocate in device memory. */
- if (!move_texture_to_host && (size + headroom) < free) {
- mem_alloc_result = cuMemAlloc(&device_pointer, size);
- if (mem_alloc_result == CUDA_SUCCESS) {
- status = " in device memory";
- }
- }
-
- /* Fall back to mapped host memory if needed and possible. */
- void *map_host_pointer = 0;
- bool free_map_host = false;
-
- if (mem_alloc_result != CUDA_SUCCESS && can_map_host &&
- map_host_used + size < map_host_limit) {
- if (mem.shared_pointer) {
- /* Another device already allocated host memory. */
- mem_alloc_result = CUDA_SUCCESS;
- map_host_pointer = mem.shared_pointer;
- }
- else {
- /* Allocate host memory ourselves. */
- mem_alloc_result = cuMemHostAlloc(
- &map_host_pointer, size, CU_MEMHOSTALLOC_DEVICEMAP | CU_MEMHOSTALLOC_WRITECOMBINED);
- mem.shared_pointer = map_host_pointer;
- free_map_host = true;
- }
-
- if (mem_alloc_result == CUDA_SUCCESS) {
- cuda_assert(cuMemHostGetDevicePointer_v2(&device_pointer, mem.shared_pointer, 0));
- map_host_used += size;
- status = " in host memory";
-
- /* Replace host pointer with our host allocation. Only works if
- * CUDA memory layout is the same and has no pitch padding. Also
- * does not work if we move textures to host during a render,
- * since other devices might be using the memory. */
- if (!move_texture_to_host && pitch_padding == 0 && mem.host_pointer &&
- mem.host_pointer != mem.shared_pointer) {
- memcpy(mem.shared_pointer, mem.host_pointer, size);
- mem.host_free();
- mem.host_pointer = mem.shared_pointer;
- }
- }
- else {
- status = " failed, out of host memory";
- }
- }
- else if (mem_alloc_result != CUDA_SUCCESS) {
- status = " failed, out of device and host memory";
- }
-
- if (mem_alloc_result != CUDA_SUCCESS) {
- cuda_assert(mem_alloc_result);
- }
-
- if (mem.name) {
- VLOG(1) << "Buffer allocate: " << mem.name << ", "
- << string_human_readable_number(mem.memory_size()) << " bytes. ("
- << string_human_readable_size(mem.memory_size()) << ")" << status;
- }
-
- mem.device_pointer = (device_ptr)device_pointer;
- mem.device_size = size;
- stats.mem_alloc(size);
-
- if (!mem.device_pointer) {
- return NULL;
- }
-
- /* Insert into map of allocations. */
- CUDAMem *cmem = &cuda_mem_map[&mem];
- cmem->map_host_pointer = map_host_pointer;
- cmem->free_map_host = free_map_host;
- return cmem;
- }
-
- void generic_copy_to(device_memory &mem)
- {
- if (mem.host_pointer && mem.device_pointer) {
- CUDAContextScope scope(this);
-
- if (mem.host_pointer != mem.shared_pointer) {
- cuda_assert(cuMemcpyHtoD(
- cuda_device_ptr(mem.device_pointer), mem.host_pointer, mem.memory_size()));
- }
- }
- }
-
- void generic_free(device_memory &mem)
- {
- if (mem.device_pointer) {
- CUDAContextScope scope(this);
- const CUDAMem &cmem = cuda_mem_map[&mem];
-
- if (cmem.map_host_pointer) {
- /* Free host memory. */
- if (cmem.free_map_host) {
- cuMemFreeHost(cmem.map_host_pointer);
- if (mem.host_pointer == mem.shared_pointer) {
- mem.host_pointer = 0;
- }
- mem.shared_pointer = 0;
- }
-
- map_host_used -= mem.device_size;
- }
- else {
- /* Free device memory. */
- cuMemFree(mem.device_pointer);
- }
-
- stats.mem_free(mem.device_size);
- mem.device_pointer = 0;
- mem.device_size = 0;
-
- cuda_mem_map.erase(cuda_mem_map.find(&mem));
- }
- }
-
- void mem_alloc(device_memory &mem)
- {
- if (mem.type == MEM_PIXELS && !background) {
- pixels_alloc(mem);
- }
- else if (mem.type == MEM_TEXTURE) {
- assert(!"mem_alloc not supported for textures.");
- }
- else {
- generic_alloc(mem);
- }
- }
-
- void mem_copy_to(device_memory &mem)
- {
- if (mem.type == MEM_PIXELS) {
- assert(!"mem_copy_to not supported for pixels.");
- }
- else if (mem.type == MEM_TEXTURE) {
- tex_free(mem);
- tex_alloc(mem);
- }
- else {
- if (!mem.device_pointer) {
- generic_alloc(mem);
- }
-
- generic_copy_to(mem);
- }
- }
-
- void mem_copy_from(device_memory &mem, int y, int w, int h, int elem)
- {
- if (mem.type == MEM_PIXELS && !background) {
- pixels_copy_from(mem, y, w, h);
- }
- else if (mem.type == MEM_TEXTURE) {
- assert(!"mem_copy_from not supported for textures.");
- }
- else {
- CUDAContextScope scope(this);
- size_t offset = elem * y * w;
- size_t size = elem * w * h;
-
- if (mem.host_pointer && mem.device_pointer) {
- cuda_assert(cuMemcpyDtoH(
- (uchar *)mem.host_pointer + offset, (CUdeviceptr)(mem.device_pointer + offset), size));
- }
- else if (mem.host_pointer) {
- memset((char *)mem.host_pointer + offset, 0, size);
- }
- }
- }
-
- void mem_zero(device_memory &mem)
- {
- if (!mem.device_pointer) {
- mem_alloc(mem);
- }
-
- if (mem.host_pointer) {
- memset(mem.host_pointer, 0, mem.memory_size());
- }
-
- if (mem.device_pointer && (!mem.host_pointer || mem.host_pointer != mem.shared_pointer)) {
- CUDAContextScope scope(this);
- cuda_assert(cuMemsetD8(cuda_device_ptr(mem.device_pointer), 0, mem.memory_size()));
- }
- }
-
- void mem_free(device_memory &mem)
- {
- if (mem.type == MEM_PIXELS && !background) {
- pixels_free(mem);
- }
- else if (mem.type == MEM_TEXTURE) {
- tex_free(mem);
- }
- else {
- generic_free(mem);
- }
- }
-
- virtual device_ptr mem_alloc_sub_ptr(device_memory &mem, int offset, int /*size*/)
- {
- return (device_ptr)(((char *)mem.device_pointer) + mem.memory_elements_size(offset));
- }
-
- void const_copy_to(const char *name, void *host, size_t size)
- {
- CUDAContextScope scope(this);
- CUdeviceptr mem;
- size_t bytes;
-
- cuda_assert(cuModuleGetGlobal(&mem, &bytes, cuModule, name));
- //assert(bytes == size);
- cuda_assert(cuMemcpyHtoD(mem, host, size));
- }
-
- void tex_alloc(device_memory &mem)
- {
- CUDAContextScope scope(this);
-
- /* General variables for both architectures */
- string bind_name = mem.name;
- size_t dsize = datatype_size(mem.data_type);
- size_t size = mem.memory_size();
-
- CUaddress_mode address_mode = CU_TR_ADDRESS_MODE_WRAP;
- switch (mem.extension) {
- case EXTENSION_REPEAT:
- address_mode = CU_TR_ADDRESS_MODE_WRAP;
- break;
- case EXTENSION_EXTEND:
- address_mode = CU_TR_ADDRESS_MODE_CLAMP;
- break;
- case EXTENSION_CLIP:
- address_mode = CU_TR_ADDRESS_MODE_BORDER;
- break;
- default:
- assert(0);
- break;
- }
-
- CUfilter_mode filter_mode;
- if (mem.interpolation == INTERPOLATION_CLOSEST) {
- filter_mode = CU_TR_FILTER_MODE_POINT;
- }
- else {
- filter_mode = CU_TR_FILTER_MODE_LINEAR;
- }
-
- /* Data Storage */
- if (mem.interpolation == INTERPOLATION_NONE) {
- generic_alloc(mem);
- generic_copy_to(mem);
-
- CUdeviceptr cumem;
- size_t cubytes;
-
- cuda_assert(cuModuleGetGlobal(&cumem, &cubytes, cuModule, bind_name.c_str()));
-
- if (cubytes == 8) {
- /* 64 bit device pointer */
- uint64_t ptr = mem.device_pointer;
- cuda_assert(cuMemcpyHtoD(cumem, (void *)&ptr, cubytes));
- }
- else {
- /* 32 bit device pointer */
- uint32_t ptr = (uint32_t)mem.device_pointer;
- cuda_assert(cuMemcpyHtoD(cumem, (void *)&ptr, cubytes));
- }
- return;
- }
-
- /* Image Texture Storage */
- CUarray_format_enum format;
- switch (mem.data_type) {
- case TYPE_UCHAR:
- format = CU_AD_FORMAT_UNSIGNED_INT8;
- break;
- case TYPE_UINT16:
- format = CU_AD_FORMAT_UNSIGNED_INT16;
- break;
- case TYPE_UINT:
- format = CU_AD_FORMAT_UNSIGNED_INT32;
- break;
- case TYPE_INT:
- format = CU_AD_FORMAT_SIGNED_INT32;
- break;
- case TYPE_FLOAT:
- format = CU_AD_FORMAT_FLOAT;
- break;
- case TYPE_HALF:
- format = CU_AD_FORMAT_HALF;
- break;
- default:
- assert(0);
- return;
- }
-
- CUDAMem *cmem = NULL;
- CUarray array_3d = NULL;
- size_t src_pitch = mem.data_width * dsize * mem.data_elements;
- size_t dst_pitch = src_pitch;
-
- if (mem.data_depth > 1) {
- /* 3D texture using array, there is no API for linear memory. */
- CUDA_ARRAY3D_DESCRIPTOR desc;
-
- desc.Width = mem.data_width;
- desc.Height = mem.data_height;
- desc.Depth = mem.data_depth;
- desc.Format = format;
- desc.NumChannels = mem.data_elements;
- desc.Flags = 0;
-
- VLOG(1) << "Array 3D allocate: " << mem.name << ", "
- << string_human_readable_number(mem.memory_size()) << " bytes. ("
- << string_human_readable_size(mem.memory_size()) << ")";
-
- cuda_assert(cuArray3DCreate(&array_3d, &desc));
-
- if (!array_3d) {
- return;
- }
-
- CUDA_MEMCPY3D param;
- memset(&param, 0, sizeof(param));
- param.dstMemoryType = CU_MEMORYTYPE_ARRAY;
- param.dstArray = array_3d;
- param.srcMemoryType = CU_MEMORYTYPE_HOST;
- param.srcHost = mem.host_pointer;
- param.srcPitch = src_pitch;
- param.WidthInBytes = param.srcPitch;
- param.Height = mem.data_height;
- param.Depth = mem.data_depth;
-
- cuda_assert(cuMemcpy3D(&param));
-
- mem.device_pointer = (device_ptr)array_3d;
- mem.device_size = size;
- stats.mem_alloc(size);
-
- cmem = &cuda_mem_map[&mem];
- cmem->texobject = 0;
- cmem->array = array_3d;
- }
- else if (mem.data_height > 0) {
- /* 2D texture, using pitch aligned linear memory. */
- int alignment = 0;
- cuda_assert(
- cuDeviceGetAttribute(&alignment, CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT, cuDevice));
- dst_pitch = align_up(src_pitch, alignment);
- size_t dst_size = dst_pitch * mem.data_height;
-
- cmem = generic_alloc(mem, dst_size - mem.memory_size());
- if (!cmem) {
- return;
- }
-
- CUDA_MEMCPY2D param;
- memset(&param, 0, sizeof(param));
- param.dstMemoryType = CU_MEMORYTYPE_DEVICE;
- param.dstDevice = mem.device_pointer;
- param.dstPitch = dst_pitch;
- param.srcMemoryType = CU_MEMORYTYPE_HOST;
- param.srcHost = mem.host_pointer;
- param.srcPitch = src_pitch;
- param.WidthInBytes = param.srcPitch;
- param.Height = mem.data_height;
-
- cuda_assert(cuMemcpy2DUnaligned(&param));
- }
- else {
- /* 1D texture, using linear memory. */
- cmem = generic_alloc(mem);
- if (!cmem) {
- return;
- }
-
- cuda_assert(cuMemcpyHtoD(mem.device_pointer, mem.host_pointer, size));
- }
-
- /* Kepler+, bindless textures. */
- int flat_slot = 0;
- if (string_startswith(mem.name, "__tex_image")) {
- int pos = string(mem.name).rfind("_");
- flat_slot = atoi(mem.name + pos + 1);
- }
- else {
- assert(0);
- }
-
- CUDA_RESOURCE_DESC resDesc;
- memset(&resDesc, 0, sizeof(resDesc));
-
- if (array_3d) {
- resDesc.resType = CU_RESOURCE_TYPE_ARRAY;
- resDesc.res.array.hArray = array_3d;
- resDesc.flags = 0;
- }
- else if (mem.data_height > 0) {
- resDesc.resType = CU_RESOURCE_TYPE_PITCH2D;
- resDesc.res.pitch2D.devPtr = mem.device_pointer;
- resDesc.res.pitch2D.format = format;
- resDesc.res.pitch2D.numChannels = mem.data_elements;
- resDesc.res.pitch2D.height = mem.data_height;
- resDesc.res.pitch2D.width = mem.data_width;
- resDesc.res.pitch2D.pitchInBytes = dst_pitch;
- }
- else {
- resDesc.resType = CU_RESOURCE_TYPE_LINEAR;
- resDesc.res.linear.devPtr = mem.device_pointer;
- resDesc.res.linear.format = format;
- resDesc.res.linear.numChannels = mem.data_elements;
- resDesc.res.linear.sizeInBytes = mem.device_size;
- }
-
- CUDA_TEXTURE_DESC texDesc;
- memset(&texDesc, 0, sizeof(texDesc));
- texDesc.addressMode[0] = address_mode;
- texDesc.addressMode[1] = address_mode;
- texDesc.addressMode[2] = address_mode;
- texDesc.filterMode = filter_mode;
- texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES;
-
- cuda_assert(cuTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL));
-
- /* Resize once */
- if (flat_slot >= texture_info.size()) {
- /* Allocate some slots in advance, to reduce amount
- * of re-allocations. */
- texture_info.resize(flat_slot + 128);
- }
-
- /* Set Mapping and tag that we need to (re-)upload to device */
- TextureInfo &info = texture_info[flat_slot];
- info.data = (uint64_t)cmem->texobject;
- info.cl_buffer = 0;
- info.interpolation = mem.interpolation;
- info.extension = mem.extension;
- info.width = mem.data_width;
- info.height = mem.data_height;
- info.depth = mem.data_depth;
- need_texture_info = true;
- }
-
- void tex_free(device_memory &mem)
- {
- if (mem.device_pointer) {
- CUDAContextScope scope(this);
- const CUDAMem &cmem = cuda_mem_map[&mem];
-
- if (cmem.texobject) {
- /* Free bindless texture. */
- cuTexObjectDestroy(cmem.texobject);
- }
-
- if (cmem.array) {
- /* Free array. */
- cuArrayDestroy(cmem.array);
- stats.mem_free(mem.device_size);
- mem.device_pointer = 0;
- mem.device_size = 0;
-
- cuda_mem_map.erase(cuda_mem_map.find(&mem));
- }
- else {
- generic_free(mem);
- }
- }
- }
-
-#define CUDA_GET_BLOCKSIZE(func, w, h) \
- int threads_per_block; \
- cuda_assert( \
- cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func)); \
- int threads = (int)sqrt((float)threads_per_block); \
- int xblocks = ((w) + threads - 1) / threads; \
- int yblocks = ((h) + threads - 1) / threads;
-
-#define CUDA_LAUNCH_KERNEL(func, args) \
- cuda_assert(cuLaunchKernel(func, xblocks, yblocks, 1, threads, threads, 1, 0, 0, args, 0));
-
-/* Similar as above, but for 1-dimensional blocks. */
-#define CUDA_GET_BLOCKSIZE_1D(func, w, h) \
- int threads_per_block; \
- cuda_assert( \
- cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func)); \
- int xblocks = ((w) + threads_per_block - 1) / threads_per_block; \
- int yblocks = h;
-
-#define CUDA_LAUNCH_KERNEL_1D(func, args) \
- cuda_assert(cuLaunchKernel(func, xblocks, yblocks, 1, threads_per_block, 1, 1, 0, 0, args, 0));
-
- bool denoising_non_local_means(device_ptr image_ptr,
- device_ptr guide_ptr,
- device_ptr variance_ptr,
- device_ptr out_ptr,
- DenoisingTask *task)
- {
- if (have_error())
- return false;
-
- CUDAContextScope scope(this);
-
- int stride = task->buffer.stride;
- int w = task->buffer.width;
- int h = task->buffer.h;
- int r = task->nlm_state.r;
- int f = task->nlm_state.f;
- float a = task->nlm_state.a;
- float k_2 = task->nlm_state.k_2;
-
- int pass_stride = task->buffer.pass_stride;
- int num_shifts = (2 * r + 1) * (2 * r + 1);
- int channel_offset = task->nlm_state.is_color ? task->buffer.pass_stride : 0;
- int frame_offset = 0;
-
- if (have_error())
- return false;
-
- CUdeviceptr difference = cuda_device_ptr(task->buffer.temporary_mem.device_pointer);
- CUdeviceptr blurDifference = difference + sizeof(float) * pass_stride * num_shifts;
- CUdeviceptr weightAccum = difference + 2 * sizeof(float) * pass_stride * num_shifts;
- CUdeviceptr scale_ptr = 0;
-
- cuda_assert(cuMemsetD8(weightAccum, 0, sizeof(float) * pass_stride));
- cuda_assert(cuMemsetD8(out_ptr, 0, sizeof(float) * pass_stride));
-
- {
- CUfunction cuNLMCalcDifference, cuNLMBlur, cuNLMCalcWeight, cuNLMUpdateOutput;
- cuda_assert(cuModuleGetFunction(
- &cuNLMCalcDifference, cuFilterModule, "kernel_cuda_filter_nlm_calc_difference"));
- cuda_assert(cuModuleGetFunction(&cuNLMBlur, cuFilterModule, "kernel_cuda_filter_nlm_blur"));
- cuda_assert(cuModuleGetFunction(
- &cuNLMCalcWeight, cuFilterModule, "kernel_cuda_filter_nlm_calc_weight"));
- cuda_assert(cuModuleGetFunction(
- &cuNLMUpdateOutput, cuFilterModule, "kernel_cuda_filter_nlm_update_output"));
-
- cuda_assert(cuFuncSetCacheConfig(cuNLMCalcDifference, CU_FUNC_CACHE_PREFER_L1));
- cuda_assert(cuFuncSetCacheConfig(cuNLMBlur, CU_FUNC_CACHE_PREFER_L1));
- cuda_assert(cuFuncSetCacheConfig(cuNLMCalcWeight, CU_FUNC_CACHE_PREFER_L1));
- cuda_assert(cuFuncSetCacheConfig(cuNLMUpdateOutput, CU_FUNC_CACHE_PREFER_L1));
-
- CUDA_GET_BLOCKSIZE_1D(cuNLMCalcDifference, w * h, num_shifts);
-
- void *calc_difference_args[] = {&guide_ptr,
- &variance_ptr,
- &scale_ptr,
- &difference,
- &w,
- &h,
- &stride,
- &pass_stride,
- &r,
- &channel_offset,
- &frame_offset,
- &a,
- &k_2};
- void *blur_args[] = {&difference, &blurDifference, &w, &h, &stride, &pass_stride, &r, &f};
- void *calc_weight_args[] = {
- &blurDifference, &difference, &w, &h, &stride, &pass_stride, &r, &f};
- void *update_output_args[] = {&blurDifference,
- &image_ptr,
- &out_ptr,
- &weightAccum,
- &w,
- &h,
- &stride,
- &pass_stride,
- &channel_offset,
- &r,
- &f};
-
- CUDA_LAUNCH_KERNEL_1D(cuNLMCalcDifference, calc_difference_args);
- CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args);
- CUDA_LAUNCH_KERNEL_1D(cuNLMCalcWeight, calc_weight_args);
- CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args);
- CUDA_LAUNCH_KERNEL_1D(cuNLMUpdateOutput, update_output_args);
- }
-
- {
- CUfunction cuNLMNormalize;
- cuda_assert(cuModuleGetFunction(
- &cuNLMNormalize, cuFilterModule, "kernel_cuda_filter_nlm_normalize"));
- cuda_assert(cuFuncSetCacheConfig(cuNLMNormalize, CU_FUNC_CACHE_PREFER_L1));
- void *normalize_args[] = {&out_ptr, &weightAccum, &w, &h, &stride};
- CUDA_GET_BLOCKSIZE(cuNLMNormalize, w, h);
- CUDA_LAUNCH_KERNEL(cuNLMNormalize, normalize_args);
- cuda_assert(cuCtxSynchronize());
- }
-
- return !have_error();
- }
-
- bool denoising_construct_transform(DenoisingTask *task)
- {
- if (have_error())
- return false;
-
- CUDAContextScope scope(this);
-
- CUfunction cuFilterConstructTransform;
- cuda_assert(cuModuleGetFunction(
- &cuFilterConstructTransform, cuFilterModule, "kernel_cuda_filter_construct_transform"));
- cuda_assert(cuFuncSetCacheConfig(cuFilterConstructTransform, CU_FUNC_CACHE_PREFER_SHARED));
- CUDA_GET_BLOCKSIZE(cuFilterConstructTransform, task->storage.w, task->storage.h);
-
- void *args[] = {&task->buffer.mem.device_pointer,
- &task->tile_info_mem.device_pointer,
- &task->storage.transform.device_pointer,
- &task->storage.rank.device_pointer,
- &task->filter_area,
- &task->rect,
- &task->radius,
- &task->pca_threshold,
- &task->buffer.pass_stride,
- &task->buffer.frame_stride,
- &task->buffer.use_time};
- CUDA_LAUNCH_KERNEL(cuFilterConstructTransform, args);
- cuda_assert(cuCtxSynchronize());
-
- return !have_error();
- }
-
- bool denoising_accumulate(device_ptr color_ptr,
- device_ptr color_variance_ptr,
- device_ptr scale_ptr,
- int frame,
- DenoisingTask *task)
- {
- if (have_error())
- return false;
-
- CUDAContextScope scope(this);
-
- int r = task->radius;
- int f = 4;
- float a = 1.0f;
- float k_2 = task->nlm_k_2;
-
- int w = task->reconstruction_state.source_w;
- int h = task->reconstruction_state.source_h;
- int stride = task->buffer.stride;
- int frame_offset = frame * task->buffer.frame_stride;
- int t = task->tile_info->frames[frame];
-
- int pass_stride = task->buffer.pass_stride;
- int num_shifts = (2 * r + 1) * (2 * r + 1);
-
- if (have_error())
- return false;
-
- CUdeviceptr difference = cuda_device_ptr(task->buffer.temporary_mem.device_pointer);
- CUdeviceptr blurDifference = difference + sizeof(float) * pass_stride * num_shifts;
-
- CUfunction cuNLMCalcDifference, cuNLMBlur, cuNLMCalcWeight, cuNLMConstructGramian;
- cuda_assert(cuModuleGetFunction(
- &cuNLMCalcDifference, cuFilterModule, "kernel_cuda_filter_nlm_calc_difference"));
- cuda_assert(cuModuleGetFunction(&cuNLMBlur, cuFilterModule, "kernel_cuda_filter_nlm_blur"));
- cuda_assert(cuModuleGetFunction(
- &cuNLMCalcWeight, cuFilterModule, "kernel_cuda_filter_nlm_calc_weight"));
- cuda_assert(cuModuleGetFunction(
- &cuNLMConstructGramian, cuFilterModule, "kernel_cuda_filter_nlm_construct_gramian"));
-
- cuda_assert(cuFuncSetCacheConfig(cuNLMCalcDifference, CU_FUNC_CACHE_PREFER_L1));
- cuda_assert(cuFuncSetCacheConfig(cuNLMBlur, CU_FUNC_CACHE_PREFER_L1));
- cuda_assert(cuFuncSetCacheConfig(cuNLMCalcWeight, CU_FUNC_CACHE_PREFER_L1));
- cuda_assert(cuFuncSetCacheConfig(cuNLMConstructGramian, CU_FUNC_CACHE_PREFER_SHARED));
-
- CUDA_GET_BLOCKSIZE_1D(cuNLMCalcDifference,
- task->reconstruction_state.source_w *
- task->reconstruction_state.source_h,
- num_shifts);
-
- void *calc_difference_args[] = {&color_ptr,
- &color_variance_ptr,
- &scale_ptr,
- &difference,
- &w,
- &h,
- &stride,
- &pass_stride,
- &r,
- &pass_stride,
- &frame_offset,
- &a,
- &k_2};
- void *blur_args[] = {&difference, &blurDifference, &w, &h, &stride, &pass_stride, &r, &f};
- void *calc_weight_args[] = {
- &blurDifference, &difference, &w, &h, &stride, &pass_stride, &r, &f};
- void *construct_gramian_args[] = {&t,
- &blurDifference,
- &task->buffer.mem.device_pointer,
- &task->storage.transform.device_pointer,
- &task->storage.rank.device_pointer,
- &task->storage.XtWX.device_pointer,
- &task->storage.XtWY.device_pointer,
- &task->reconstruction_state.filter_window,
- &w,
- &h,
- &stride,
- &pass_stride,
- &r,
- &f,
- &frame_offset,
- &task->buffer.use_time};
-
- CUDA_LAUNCH_KERNEL_1D(cuNLMCalcDifference, calc_difference_args);
- CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args);
- CUDA_LAUNCH_KERNEL_1D(cuNLMCalcWeight, calc_weight_args);
- CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args);
- CUDA_LAUNCH_KERNEL_1D(cuNLMConstructGramian, construct_gramian_args);
- cuda_assert(cuCtxSynchronize());
-
- return !have_error();
- }
-
- bool denoising_solve(device_ptr output_ptr, DenoisingTask *task)
- {
- CUfunction cuFinalize;
- cuda_assert(cuModuleGetFunction(&cuFinalize, cuFilterModule, "kernel_cuda_filter_finalize"));
- cuda_assert(cuFuncSetCacheConfig(cuFinalize, CU_FUNC_CACHE_PREFER_L1));
- void *finalize_args[] = {&output_ptr,
- &task->storage.rank.device_pointer,
- &task->storage.XtWX.device_pointer,
- &task->storage.XtWY.device_pointer,
- &task->filter_area,
- &task->reconstruction_state.buffer_params.x,
- &task->render_buffer.samples};
- CUDA_GET_BLOCKSIZE(
- cuFinalize, task->reconstruction_state.source_w, task->reconstruction_state.source_h);
- CUDA_LAUNCH_KERNEL(cuFinalize, finalize_args);
- cuda_assert(cuCtxSynchronize());
-
- return !have_error();
- }
-
- bool denoising_combine_halves(device_ptr a_ptr,
- device_ptr b_ptr,
- device_ptr mean_ptr,
- device_ptr variance_ptr,
- int r,
- int4 rect,
- DenoisingTask *task)
- {
- if (have_error())
- return false;
-
- CUDAContextScope scope(this);
-
- CUfunction cuFilterCombineHalves;
- cuda_assert(cuModuleGetFunction(
- &cuFilterCombineHalves, cuFilterModule, "kernel_cuda_filter_combine_halves"));
- cuda_assert(cuFuncSetCacheConfig(cuFilterCombineHalves, CU_FUNC_CACHE_PREFER_L1));
- CUDA_GET_BLOCKSIZE(
- cuFilterCombineHalves, task->rect.z - task->rect.x, task->rect.w - task->rect.y);
-
- void *args[] = {&mean_ptr, &variance_ptr, &a_ptr, &b_ptr, &rect, &r};
- CUDA_LAUNCH_KERNEL(cuFilterCombineHalves, args);
- cuda_assert(cuCtxSynchronize());
-
- return !have_error();
- }
-
- bool denoising_divide_shadow(device_ptr a_ptr,
- device_ptr b_ptr,
- device_ptr sample_variance_ptr,
- device_ptr sv_variance_ptr,
- device_ptr buffer_variance_ptr,
- DenoisingTask *task)
- {
- if (have_error())
- return false;
-
- CUDAContextScope scope(this);
-
- CUfunction cuFilterDivideShadow;
- cuda_assert(cuModuleGetFunction(
- &cuFilterDivideShadow, cuFilterModule, "kernel_cuda_filter_divide_shadow"));
- cuda_assert(cuFuncSetCacheConfig(cuFilterDivideShadow, CU_FUNC_CACHE_PREFER_L1));
- CUDA_GET_BLOCKSIZE(
- cuFilterDivideShadow, task->rect.z - task->rect.x, task->rect.w - task->rect.y);
-
- void *args[] = {&task->render_buffer.samples,
- &task->tile_info_mem.device_pointer,
- &a_ptr,
- &b_ptr,
- &sample_variance_ptr,
- &sv_variance_ptr,
- &buffer_variance_ptr,
- &task->rect,
- &task->render_buffer.pass_stride,
- &task->render_buffer.offset};
- CUDA_LAUNCH_KERNEL(cuFilterDivideShadow, args);
- cuda_assert(cuCtxSynchronize());
-
- return !have_error();
- }
-
- bool denoising_get_feature(int mean_offset,
- int variance_offset,
- device_ptr mean_ptr,
- device_ptr variance_ptr,
- float scale,
- DenoisingTask *task)
- {
- if (have_error())
- return false;
-
- CUDAContextScope scope(this);
-
- CUfunction cuFilterGetFeature;
- cuda_assert(cuModuleGetFunction(
- &cuFilterGetFeature, cuFilterModule, "kernel_cuda_filter_get_feature"));
- cuda_assert(cuFuncSetCacheConfig(cuFilterGetFeature, CU_FUNC_CACHE_PREFER_L1));
- CUDA_GET_BLOCKSIZE(
- cuFilterGetFeature, task->rect.z - task->rect.x, task->rect.w - task->rect.y);
-
- void *args[] = {&task->render_buffer.samples,
- &task->tile_info_mem.device_pointer,
- &mean_offset,
- &variance_offset,
- &mean_ptr,
- &variance_ptr,
- &scale,
- &task->rect,
- &task->render_buffer.pass_stride,
- &task->render_buffer.offset};
- CUDA_LAUNCH_KERNEL(cuFilterGetFeature, args);
- cuda_assert(cuCtxSynchronize());
-
- return !have_error();
- }
-
- bool denoising_write_feature(int out_offset,
- device_ptr from_ptr,
- device_ptr buffer_ptr,
- DenoisingTask *task)
- {
- if (have_error())
- return false;
-
- CUDAContextScope scope(this);
-
- CUfunction cuFilterWriteFeature;
- cuda_assert(cuModuleGetFunction(
- &cuFilterWriteFeature, cuFilterModule, "kernel_cuda_filter_write_feature"));
- cuda_assert(cuFuncSetCacheConfig(cuFilterWriteFeature, CU_FUNC_CACHE_PREFER_L1));
- CUDA_GET_BLOCKSIZE(cuFilterWriteFeature, task->filter_area.z, task->filter_area.w);
-
- void *args[] = {&task->render_buffer.samples,
- &task->reconstruction_state.buffer_params,
- &task->filter_area,
- &from_ptr,
- &buffer_ptr,
- &out_offset,
- &task->rect};
- CUDA_LAUNCH_KERNEL(cuFilterWriteFeature, args);
- cuda_assert(cuCtxSynchronize());
-
- return !have_error();
- }
-
- bool denoising_detect_outliers(device_ptr image_ptr,
- device_ptr variance_ptr,
- device_ptr depth_ptr,
- device_ptr output_ptr,
- DenoisingTask *task)
- {
- if (have_error())
- return false;
-
- CUDAContextScope scope(this);
-
- CUfunction cuFilterDetectOutliers;
- cuda_assert(cuModuleGetFunction(
- &cuFilterDetectOutliers, cuFilterModule, "kernel_cuda_filter_detect_outliers"));
- cuda_assert(cuFuncSetCacheConfig(cuFilterDetectOutliers, CU_FUNC_CACHE_PREFER_L1));
- CUDA_GET_BLOCKSIZE(
- cuFilterDetectOutliers, task->rect.z - task->rect.x, task->rect.w - task->rect.y);
-
- void *args[] = {&image_ptr,
- &variance_ptr,
- &depth_ptr,
- &output_ptr,
- &task->rect,
- &task->buffer.pass_stride};
-
- CUDA_LAUNCH_KERNEL(cuFilterDetectOutliers, args);
- cuda_assert(cuCtxSynchronize());
-
- return !have_error();
- }
-
- void denoise(RenderTile &rtile, DenoisingTask &denoising)
- {
- denoising.functions.construct_transform = function_bind(
- &CUDADevice::denoising_construct_transform, this, &denoising);
- denoising.functions.accumulate = function_bind(
- &CUDADevice::denoising_accumulate, this, _1, _2, _3, _4, &denoising);
- denoising.functions.solve = function_bind(&CUDADevice::denoising_solve, this, _1, &denoising);
- denoising.functions.divide_shadow = function_bind(
- &CUDADevice::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising);
- denoising.functions.non_local_means = function_bind(
- &CUDADevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising);
- denoising.functions.combine_halves = function_bind(
- &CUDADevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising);
- denoising.functions.get_feature = function_bind(
- &CUDADevice::denoising_get_feature, this, _1, _2, _3, _4, _5, &denoising);
- denoising.functions.write_feature = function_bind(
- &CUDADevice::denoising_write_feature, this, _1, _2, _3, &denoising);
- denoising.functions.detect_outliers = function_bind(
- &CUDADevice::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising);
-
- denoising.filter_area = make_int4(rtile.x, rtile.y, rtile.w, rtile.h);
- denoising.render_buffer.samples = rtile.sample;
- denoising.buffer.gpu_temporary_mem = true;
-
- denoising.run_denoising(&rtile);
- }
-
- void path_trace(DeviceTask &task, RenderTile &rtile, device_vector<WorkTile> &work_tiles)
- {
- scoped_timer timer(&rtile.buffers->render_time);
-
- if (have_error())
- return;
-
- CUDAContextScope scope(this);
- CUfunction cuPathTrace;
-
- /* Get kernel function. */
- if (task.integrator_branched) {
- cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_branched_path_trace"));
- }
- else {
- cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace"));
- }
-
- if (have_error()) {
- return;
- }
-
- cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1));
-
- /* Allocate work tile. */
- work_tiles.alloc(1);
-
- WorkTile *wtile = work_tiles.data();
- wtile->x = rtile.x;
- wtile->y = rtile.y;
- wtile->w = rtile.w;
- wtile->h = rtile.h;
- wtile->offset = rtile.offset;
- wtile->stride = rtile.stride;
- wtile->buffer = (float *)cuda_device_ptr(rtile.buffer);
-
- /* Prepare work size. More step samples render faster, but for now we
- * remain conservative for GPUs connected to a display to avoid driver
- * timeouts and display freezing. */
- int min_blocks, num_threads_per_block;
- cuda_assert(cuOccupancyMaxPotentialBlockSize(
- &min_blocks, &num_threads_per_block, cuPathTrace, NULL, 0, 0));
- if (!info.display_device) {
- min_blocks *= 8;
- }
-
- uint step_samples = divide_up(min_blocks * num_threads_per_block, wtile->w * wtile->h);
-
- /* Render all samples. */
- int start_sample = rtile.start_sample;
- int end_sample = rtile.start_sample + rtile.num_samples;
-
- for (int sample = start_sample; sample < end_sample; sample += step_samples) {
- /* Setup and copy work tile to device. */
- wtile->start_sample = sample;
- wtile->num_samples = min(step_samples, end_sample - sample);
- work_tiles.copy_to_device();
-
- CUdeviceptr d_work_tiles = cuda_device_ptr(work_tiles.device_pointer);
- uint total_work_size = wtile->w * wtile->h * wtile->num_samples;
- uint num_blocks = divide_up(total_work_size, num_threads_per_block);
-
- /* Launch kernel. */
- void *args[] = {&d_work_tiles, &total_work_size};
-
- cuda_assert(cuLaunchKernel(
- cuPathTrace, num_blocks, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0));
-
- cuda_assert(cuCtxSynchronize());
-
- /* Update progress. */
- rtile.sample = sample + wtile->num_samples;
- task.update_progress(&rtile, rtile.w * rtile.h * wtile->num_samples);
-
- if (task.get_cancel()) {
- if (task.need_finish_queue == false)
- break;
- }
- }
- }
-
- void film_convert(DeviceTask &task,
- device_ptr buffer,
- device_ptr rgba_byte,
- device_ptr rgba_half)
- {
- if (have_error())
- return;
-
- CUDAContextScope scope(this);
-
- CUfunction cuFilmConvert;
- CUdeviceptr d_rgba = map_pixels((rgba_byte) ? rgba_byte : rgba_half);
- CUdeviceptr d_buffer = cuda_device_ptr(buffer);
-
- /* get kernel function */
- if (rgba_half) {
- cuda_assert(
- cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_convert_to_half_float"));
- }
- else {
- cuda_assert(cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_convert_to_byte"));
- }
-
- float sample_scale = 1.0f / (task.sample + 1);
-
- /* pass in parameters */
- void *args[] = {&d_rgba,
- &d_buffer,
- &sample_scale,
- &task.x,
- &task.y,
- &task.w,
- &task.h,
- &task.offset,
- &task.stride};
-
- /* launch kernel */
- int threads_per_block;
- cuda_assert(cuFuncGetAttribute(
- &threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuFilmConvert));
-
- int xthreads = (int)sqrt(threads_per_block);
- int ythreads = (int)sqrt(threads_per_block);
- int xblocks = (task.w + xthreads - 1) / xthreads;
- int yblocks = (task.h + ythreads - 1) / ythreads;
-
- cuda_assert(cuFuncSetCacheConfig(cuFilmConvert, CU_FUNC_CACHE_PREFER_L1));
-
- cuda_assert(cuLaunchKernel(cuFilmConvert,
- xblocks,
- yblocks,
- 1, /* blocks */
- xthreads,
- ythreads,
- 1, /* threads */
- 0,
- 0,
- args,
- 0));
-
- unmap_pixels((rgba_byte) ? rgba_byte : rgba_half);
-
- cuda_assert(cuCtxSynchronize());
- }
-
- void shader(DeviceTask &task)
- {
- if (have_error())
- return;
-
- CUDAContextScope scope(this);
-
- CUfunction cuShader;
- CUdeviceptr d_input = cuda_device_ptr(task.shader_input);
- CUdeviceptr d_output = cuda_device_ptr(task.shader_output);
-
- /* get kernel function */
- if (task.shader_eval_type >= SHADER_EVAL_BAKE) {
- cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_bake"));
- }
- else if (task.shader_eval_type == SHADER_EVAL_DISPLACE) {
- cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_displace"));
- }
- else {
- cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_background"));
- }
-
- /* do tasks in smaller chunks, so we can cancel it */
- const int shader_chunk_size = 65536;
- const int start = task.shader_x;
- const int end = task.shader_x + task.shader_w;
- int offset = task.offset;
-
- bool canceled = false;
- for (int sample = 0; sample < task.num_samples && !canceled; sample++) {
- for (int shader_x = start; shader_x < end; shader_x += shader_chunk_size) {
- int shader_w = min(shader_chunk_size, end - shader_x);
-
- /* pass in parameters */
- void *args[8];
- int arg = 0;
- args[arg++] = &d_input;
- args[arg++] = &d_output;
- args[arg++] = &task.shader_eval_type;
- if (task.shader_eval_type >= SHADER_EVAL_BAKE) {
- args[arg++] = &task.shader_filter;
- }
- args[arg++] = &shader_x;
- args[arg++] = &shader_w;
- args[arg++] = &offset;
- args[arg++] = &sample;
-
- /* launch kernel */
- int threads_per_block;
- cuda_assert(cuFuncGetAttribute(
- &threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuShader));
-
- int xblocks = (shader_w + threads_per_block - 1) / threads_per_block;
-
- cuda_assert(cuFuncSetCacheConfig(cuShader, CU_FUNC_CACHE_PREFER_L1));
- cuda_assert(cuLaunchKernel(cuShader,
- xblocks,
- 1,
- 1, /* blocks */
- threads_per_block,
- 1,
- 1, /* threads */
- 0,
- 0,
- args,
- 0));
-
- cuda_assert(cuCtxSynchronize());
-
- if (task.get_cancel()) {
- canceled = true;
- break;
- }
- }
-
- task.update_progress(NULL);
- }
- }
-
- CUdeviceptr map_pixels(device_ptr mem)
- {
- if (!background) {
- PixelMem pmem = pixel_mem_map[mem];
- CUdeviceptr buffer;
-
- size_t bytes;
- cuda_assert(cuGraphicsMapResources(1, &pmem.cuPBOresource, 0));
- cuda_assert(cuGraphicsResourceGetMappedPointer(&buffer, &bytes, pmem.cuPBOresource));
-
- return buffer;
- }
-
- return cuda_device_ptr(mem);
- }
-
- void unmap_pixels(device_ptr mem)
- {
- if (!background) {
- PixelMem pmem = pixel_mem_map[mem];
-
- cuda_assert(cuGraphicsUnmapResources(1, &pmem.cuPBOresource, 0));
- }
- }
-
- void pixels_alloc(device_memory &mem)
- {
- PixelMem pmem;
-
- pmem.w = mem.data_width;
- pmem.h = mem.data_height;
-
- CUDAContextScope scope(this);
-
- glGenBuffers(1, &pmem.cuPBO);
- glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO);
- if (mem.data_type == TYPE_HALF)
- glBufferData(
- GL_PIXEL_UNPACK_BUFFER, pmem.w * pmem.h * sizeof(GLhalf) * 4, NULL, GL_DYNAMIC_DRAW);
- else
- glBufferData(
- GL_PIXEL_UNPACK_BUFFER, pmem.w * pmem.h * sizeof(uint8_t) * 4, NULL, GL_DYNAMIC_DRAW);
-
- glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
-
- glActiveTexture(GL_TEXTURE0);
- glGenTextures(1, &pmem.cuTexId);
- glBindTexture(GL_TEXTURE_2D, pmem.cuTexId);
- if (mem.data_type == TYPE_HALF)
- glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F, pmem.w, pmem.h, 0, GL_RGBA, GL_HALF_FLOAT, NULL);
- else
- glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, pmem.w, pmem.h, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
- glBindTexture(GL_TEXTURE_2D, 0);
-
- CUresult result = cuGraphicsGLRegisterBuffer(
- &pmem.cuPBOresource, pmem.cuPBO, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE);
-
- if (result == CUDA_SUCCESS) {
- mem.device_pointer = pmem.cuTexId;
- pixel_mem_map[mem.device_pointer] = pmem;
-
- mem.device_size = mem.memory_size();
- stats.mem_alloc(mem.device_size);
-
- return;
- }
- else {
- /* failed to register buffer, fallback to no interop */
- glDeleteBuffers(1, &pmem.cuPBO);
- glDeleteTextures(1, &pmem.cuTexId);
-
- background = true;
- }
- }
-
- void pixels_copy_from(device_memory &mem, int y, int w, int h)
- {
- PixelMem pmem = pixel_mem_map[mem.device_pointer];
-
- CUDAContextScope scope(this);
-
- glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO);
- uchar *pixels = (uchar *)glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_READ_ONLY);
- size_t offset = sizeof(uchar) * 4 * y * w;
- memcpy((uchar *)mem.host_pointer + offset, pixels + offset, sizeof(uchar) * 4 * w * h);
- glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
- glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
- }
-
- void pixels_free(device_memory &mem)
- {
- if (mem.device_pointer) {
- PixelMem pmem = pixel_mem_map[mem.device_pointer];
-
- CUDAContextScope scope(this);
-
- cuda_assert(cuGraphicsUnregisterResource(pmem.cuPBOresource));
- glDeleteBuffers(1, &pmem.cuPBO);
- glDeleteTextures(1, &pmem.cuTexId);
-
- pixel_mem_map.erase(pixel_mem_map.find(mem.device_pointer));
- mem.device_pointer = 0;
-
- stats.mem_free(mem.device_size);
- mem.device_size = 0;
- }
- }
-
- void draw_pixels(device_memory &mem,
- int y,
- int w,
- int h,
- int width,
- int height,
- int dx,
- int dy,
- int dw,
- int dh,
- bool transparent,
- const DeviceDrawParams &draw_params)
- {
- assert(mem.type == MEM_PIXELS);
-
- if (!background) {
- const bool use_fallback_shader = (draw_params.bind_display_space_shader_cb == NULL);
- PixelMem pmem = pixel_mem_map[mem.device_pointer];
- float *vpointer;
-
- CUDAContextScope scope(this);
-
- /* for multi devices, this assumes the inefficient method that we allocate
- * all pixels on the device even though we only render to a subset */
- size_t offset = 4 * y * w;
-
- if (mem.data_type == TYPE_HALF)
- offset *= sizeof(GLhalf);
- else
- offset *= sizeof(uint8_t);
-
- glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO);
- glActiveTexture(GL_TEXTURE0);
- glBindTexture(GL_TEXTURE_2D, pmem.cuTexId);
- if (mem.data_type == TYPE_HALF) {
- glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, w, h, GL_RGBA, GL_HALF_FLOAT, (void *)offset);
- }
- else {
- glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, w, h, GL_RGBA, GL_UNSIGNED_BYTE, (void *)offset);
- }
- glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
-
- if (transparent) {
- glEnable(GL_BLEND);
- glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA);
- }
-
- GLint shader_program;
- if (use_fallback_shader) {
- if (!bind_fallback_display_space_shader(dw, dh)) {
- return;
- }
- shader_program = fallback_shader_program;
- }
- else {
- draw_params.bind_display_space_shader_cb();
- glGetIntegerv(GL_CURRENT_PROGRAM, &shader_program);
- }
-
- if (!vertex_buffer) {
- glGenBuffers(1, &vertex_buffer);
- }
-
- glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer);
- /* invalidate old contents - avoids stalling if buffer is still waiting in queue to be rendered */
- glBufferData(GL_ARRAY_BUFFER, 16 * sizeof(float), NULL, GL_STREAM_DRAW);
-
- vpointer = (float *)glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY);
-
- if (vpointer) {
- /* texture coordinate - vertex pair */
- vpointer[0] = 0.0f;
- vpointer[1] = 0.0f;
- vpointer[2] = dx;
- vpointer[3] = dy;
-
- vpointer[4] = (float)w / (float)pmem.w;
- vpointer[5] = 0.0f;
- vpointer[6] = (float)width + dx;
- vpointer[7] = dy;
-
- vpointer[8] = (float)w / (float)pmem.w;
- vpointer[9] = (float)h / (float)pmem.h;
- vpointer[10] = (float)width + dx;
- vpointer[11] = (float)height + dy;
-
- vpointer[12] = 0.0f;
- vpointer[13] = (float)h / (float)pmem.h;
- vpointer[14] = dx;
- vpointer[15] = (float)height + dy;
-
- glUnmapBuffer(GL_ARRAY_BUFFER);
- }
-
- GLuint vertex_array_object;
- GLuint position_attribute, texcoord_attribute;
-
- glGenVertexArrays(1, &vertex_array_object);
- glBindVertexArray(vertex_array_object);
-
- texcoord_attribute = glGetAttribLocation(shader_program, "texCoord");
- position_attribute = glGetAttribLocation(shader_program, "pos");
-
- glEnableVertexAttribArray(texcoord_attribute);
- glEnableVertexAttribArray(position_attribute);
-
- glVertexAttribPointer(
- texcoord_attribute, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (const GLvoid *)0);
- glVertexAttribPointer(position_attribute,
- 2,
- GL_FLOAT,
- GL_FALSE,
- 4 * sizeof(float),
- (const GLvoid *)(sizeof(float) * 2));
-
- glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
-
- if (use_fallback_shader) {
- glUseProgram(0);
- }
- else {
- draw_params.unbind_display_space_shader_cb();
- }
-
- if (transparent) {
- glDisable(GL_BLEND);
- }
-
- glBindTexture(GL_TEXTURE_2D, 0);
-
- return;
- }
-
- Device::draw_pixels(mem, y, w, h, width, height, dx, dy, dw, dh, transparent, draw_params);
- }
-
- void thread_run(DeviceTask *task)
- {
- CUDAContextScope scope(this);
-
- if (task->type == DeviceTask::RENDER) {
- DeviceRequestedFeatures requested_features;
- if (use_split_kernel()) {
- if (split_kernel == NULL) {
- split_kernel = new CUDASplitKernel(this);
- split_kernel->load_kernels(requested_features);
- }
- }
-
- device_vector<WorkTile> work_tiles(this, "work_tiles", MEM_READ_ONLY);
-
- /* keep rendering tiles until done */
- RenderTile tile;
- DenoisingTask denoising(this, *task);
-
- while (task->acquire_tile(this, tile)) {
- if (tile.task == RenderTile::PATH_TRACE) {
- if (use_split_kernel()) {
- device_only_memory<uchar> void_buffer(this, "void_buffer");
- split_kernel->path_trace(task, tile, void_buffer, void_buffer);
- }
- else {
- path_trace(*task, tile, work_tiles);
- }
- }
- else if (tile.task == RenderTile::DENOISE) {
- tile.sample = tile.start_sample + tile.num_samples;
-
- denoise(tile, denoising);
-
- task->update_progress(&tile, tile.w * tile.h);
- }
-
- task->release_tile(tile);
-
- if (task->get_cancel()) {
- if (task->need_finish_queue == false)
- break;
- }
- }
-
- work_tiles.free();
- }
- else if (task->type == DeviceTask::SHADER) {
- shader(*task);
-
- cuda_assert(cuCtxSynchronize());
- }
- }
-
- class CUDADeviceTask : public DeviceTask {
- public:
- CUDADeviceTask(CUDADevice *device, DeviceTask &task) : DeviceTask(task)
- {
- run = function_bind(&CUDADevice::thread_run, device, this);
- }
- };
-
- int get_split_task_count(DeviceTask & /*task*/)
- {
- return 1;
- }
-
- void task_add(DeviceTask &task)
- {
- CUDAContextScope scope(this);
-
- /* Load texture info. */
- load_texture_info();
-
- /* Synchronize all memory copies before executing task. */
- cuda_assert(cuCtxSynchronize());
-
- if (task.type == DeviceTask::FILM_CONVERT) {
- /* must be done in main thread due to opengl access */
- film_convert(task, task.buffer, task.rgba_byte, task.rgba_half);
- }
- else {
- task_pool.push(new CUDADeviceTask(this, task));
- }
- }
-
- void task_wait()
- {
- task_pool.wait();
- }
-
- void task_cancel()
- {
- task_pool.cancel();
- }
-
- friend class CUDASplitKernelFunction;
- friend class CUDASplitKernel;
- friend class CUDAContextScope;
-};
-
-/* redefine the cuda_assert macro so it can be used outside of the CUDADevice class
- * now that the definition of that class is complete
- */
-#undef cuda_assert
-#define cuda_assert(stmt) \
- { \
- CUresult result = stmt; \
-\
- if (result != CUDA_SUCCESS) { \
- string message = string_printf("CUDA error: %s in %s", cuewErrorString(result), #stmt); \
- if (device->error_msg == "") \
- device->error_msg = message; \
- fprintf(stderr, "%s\n", message.c_str()); \
- /*cuda_abort();*/ \
- device->cuda_error_documentation(); \
- } \
- } \
- (void)0
-
-/* CUDA context scope. */
-
-CUDAContextScope::CUDAContextScope(CUDADevice *device) : device(device)
-{
- cuda_assert(cuCtxPushCurrent(device->cuContext));
-}
-
-CUDAContextScope::~CUDAContextScope()
-{
- cuda_assert(cuCtxPopCurrent(NULL));
-}
-
-/* split kernel */
-
-class CUDASplitKernelFunction : public SplitKernelFunction {
- CUDADevice *device;
- CUfunction func;
-
- public:
- CUDASplitKernelFunction(CUDADevice *device, CUfunction func) : device(device), func(func)
- {
- }
-
- /* enqueue the kernel, returns false if there is an error */
- bool enqueue(const KernelDimensions &dim, device_memory & /*kg*/, device_memory & /*data*/)
- {
- return enqueue(dim, NULL);
- }
-
- /* enqueue the kernel, returns false if there is an error */
- bool enqueue(const KernelDimensions &dim, void *args[])
- {
- if (device->have_error())
- return false;
-
- CUDAContextScope scope(device);
-
- /* we ignore dim.local_size for now, as this is faster */
- int threads_per_block;
- cuda_assert(
- cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func));
-
- int xblocks = (dim.global_size[0] * dim.global_size[1] + threads_per_block - 1) /
- threads_per_block;
-
- cuda_assert(cuFuncSetCacheConfig(func, CU_FUNC_CACHE_PREFER_L1));
-
- cuda_assert(cuLaunchKernel(func,
- xblocks,
- 1,
- 1, /* blocks */
- threads_per_block,
- 1,
- 1, /* threads */
- 0,
- 0,
- args,
- 0));
-
- return !device->have_error();
- }
-};
-
-CUDASplitKernel::CUDASplitKernel(CUDADevice *device) : DeviceSplitKernel(device), device(device)
-{
-}
-
-uint64_t CUDASplitKernel::state_buffer_size(device_memory & /*kg*/,
- device_memory & /*data*/,
- size_t num_threads)
-{
- CUDAContextScope scope(device);
-
- device_vector<uint64_t> size_buffer(device, "size_buffer", MEM_READ_WRITE);
- size_buffer.alloc(1);
- size_buffer.zero_to_device();
-
- uint threads = num_threads;
- CUdeviceptr d_size = device->cuda_device_ptr(size_buffer.device_pointer);
-
- struct args_t {
- uint *num_threads;
- CUdeviceptr *size;
- };
-
- args_t args = {&threads, &d_size};
-
- CUfunction state_buffer_size;
- cuda_assert(
- cuModuleGetFunction(&state_buffer_size, device->cuModule, "kernel_cuda_state_buffer_size"));
-
- cuda_assert(cuLaunchKernel(state_buffer_size, 1, 1, 1, 1, 1, 1, 0, 0, (void **)&args, 0));
-
- size_buffer.copy_from_device(0, 1, 1);
- size_t size = size_buffer[0];
- size_buffer.free();
-
- return size;
-}
-
-bool CUDASplitKernel::enqueue_split_kernel_data_init(const KernelDimensions &dim,
- RenderTile &rtile,
- int num_global_elements,
- device_memory & /*kernel_globals*/,
- device_memory & /*kernel_data*/,
- device_memory &split_data,
- device_memory &ray_state,
- device_memory &queue_index,
- device_memory &use_queues_flag,
- device_memory &work_pool_wgs)
-{
- CUDAContextScope scope(device);
-
- CUdeviceptr d_split_data = device->cuda_device_ptr(split_data.device_pointer);
- CUdeviceptr d_ray_state = device->cuda_device_ptr(ray_state.device_pointer);
- CUdeviceptr d_queue_index = device->cuda_device_ptr(queue_index.device_pointer);
- CUdeviceptr d_use_queues_flag = device->cuda_device_ptr(use_queues_flag.device_pointer);
- CUdeviceptr d_work_pool_wgs = device->cuda_device_ptr(work_pool_wgs.device_pointer);
-
- CUdeviceptr d_buffer = device->cuda_device_ptr(rtile.buffer);
-
- int end_sample = rtile.start_sample + rtile.num_samples;
- int queue_size = dim.global_size[0] * dim.global_size[1];
-
- struct args_t {
- CUdeviceptr *split_data_buffer;
- int *num_elements;
- CUdeviceptr *ray_state;
- int *start_sample;
- int *end_sample;
- int *sx;
- int *sy;
- int *sw;
- int *sh;
- int *offset;
- int *stride;
- CUdeviceptr *queue_index;
- int *queuesize;
- CUdeviceptr *use_queues_flag;
- CUdeviceptr *work_pool_wgs;
- int *num_samples;
- CUdeviceptr *buffer;
- };
-
- args_t args = {&d_split_data,
- &num_global_elements,
- &d_ray_state,
- &rtile.start_sample,
- &end_sample,
- &rtile.x,
- &rtile.y,
- &rtile.w,
- &rtile.h,
- &rtile.offset,
- &rtile.stride,
- &d_queue_index,
- &queue_size,
- &d_use_queues_flag,
- &d_work_pool_wgs,
- &rtile.num_samples,
- &d_buffer};
-
- CUfunction data_init;
- cuda_assert(
- cuModuleGetFunction(&data_init, device->cuModule, "kernel_cuda_path_trace_data_init"));
- if (device->have_error()) {
- return false;
- }
-
- CUDASplitKernelFunction(device, data_init).enqueue(dim, (void **)&args);
-
- return !device->have_error();
-}
-
-SplitKernelFunction *CUDASplitKernel::get_split_kernel_function(const string &kernel_name,
- const DeviceRequestedFeatures &)
-{
- CUDAContextScope scope(device);
- CUfunction func;
-
- cuda_assert(
- cuModuleGetFunction(&func, device->cuModule, (string("kernel_cuda_") + kernel_name).data()));
- if (device->have_error()) {
- device->cuda_error_message(
- string_printf("kernel \"kernel_cuda_%s\" not found in module", kernel_name.data()));
- return NULL;
- }
-
- return new CUDASplitKernelFunction(device, func);
-}
-
-int2 CUDASplitKernel::split_kernel_local_size()
-{
- return make_int2(32, 1);
-}
-
-int2 CUDASplitKernel::split_kernel_global_size(device_memory &kg,
- device_memory &data,
- DeviceTask * /*task*/)
-{
- CUDAContextScope scope(device);
- size_t free;
- size_t total;
-
- cuda_assert(cuMemGetInfo(&free, &total));
-
- VLOG(1) << "Maximum device allocation size: " << string_human_readable_number(free)
- << " bytes. (" << string_human_readable_size(free) << ").";
-
- size_t num_elements = max_elements_for_max_buffer_size(kg, data, free / 2);
- size_t side = round_down((int)sqrt(num_elements), 32);
- int2 global_size = make_int2(side, round_down(num_elements / side, 16));
- VLOG(1) << "Global size: " << global_size << ".";
- return global_size;
-}
-
bool device_cuda_init()
{
-#ifdef WITH_CUDA_DYNLOAD
+# ifdef WITH_CUDA_DYNLOAD
static bool initialized = false;
static bool result = false;
@@ -2552,7 +43,6 @@ bool device_cuda_init()
VLOG(1) << "Found precompiled kernels";
result = true;
}
-# ifndef _WIN32
else if (cuewCompilerPath() != NULL) {
VLOG(1) << "Found CUDA compiler " << cuewCompilerPath();
result = true;
@@ -2561,7 +51,6 @@ bool device_cuda_init()
VLOG(1) << "Neither precompiled kernels nor CUDA compiler was found,"
<< " unable to use CUDA";
}
-# endif
}
else {
VLOG(1) << "CUEW initialization failed: "
@@ -2570,9 +59,9 @@ bool device_cuda_init()
}
return result;
-#else /* WITH_CUDA_DYNLOAD */
+# else /* WITH_CUDA_DYNLOAD */
return true;
-#endif /* WITH_CUDA_DYNLOAD */
+# endif /* WITH_CUDA_DYNLOAD */
}
Device *device_cuda_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background)
@@ -2582,7 +71,7 @@ Device *device_cuda_create(DeviceInfo &info, Stats &stats, Profiler &profiler, b
static CUresult device_cuda_safe_init()
{
-#ifdef _WIN32
+# ifdef _WIN32
__try {
return cuInit(0);
}
@@ -2593,9 +82,9 @@ static CUresult device_cuda_safe_init()
}
return CUDA_ERROR_NO_DEVICE;
-#else
+# else
return cuInit(0);
-#endif
+# endif
}
void device_cuda_info(vector<DeviceInfo> &devices)
@@ -2640,6 +129,17 @@ void device_cuda_info(vector<DeviceInfo> &devices)
info.has_half_images = (major >= 3);
info.has_volume_decoupled = false;
+ info.has_adaptive_stop_per_sample = false;
+ info.denoisers = DENOISER_NLM;
+
+ /* Check if the device has P2P access to any other device in the system. */
+ for (int peer_num = 0; peer_num < count && !info.has_peer_memory; peer_num++) {
+ if (num != peer_num) {
+ int can_access = 0;
+ cuDeviceCanAccessPeer(&can_access, num, peer_num);
+ info.has_peer_memory = (can_access != 0);
+ }
+ }
int pci_location[3] = {0, 0, 0};
cuDeviceGetAttribute(&pci_location[0], CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, num);
@@ -2658,6 +158,14 @@ void device_cuda_info(vector<DeviceInfo> &devices)
cuDeviceGetAttribute(&timeout_attr, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, num);
cuDeviceGetAttribute(&preempt_attr, CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED, num);
+ /* The CUDA driver reports compute preemption as not being available on
+ * Windows 10 even when it is, due to an issue in application profiles.
+ * Detect case where we expect it to be available and override. */
+ if (preempt_attr == 0 && (major >= 6) && system_windows_version_at_least(10, 17134)) {
+ VLOG(1) << "Assuming device has compute preemption on Windows 10.";
+ preempt_attr = 1;
+ }
+
if (timeout_attr && !preempt_attr) {
VLOG(1) << "Device is recognized as display.";
info.description += " (Display)";
@@ -2665,6 +173,7 @@ void device_cuda_info(vector<DeviceInfo> &devices)
display_devices.push_back(info);
}
else {
+ VLOG(1) << "Device has compute preemption or is not used for display.";
devices.push_back(info);
}
VLOG(1) << "Added device \"" << name << "\" with id \"" << info.id << "\".";
@@ -2698,13 +207,13 @@ string device_cuda_capabilities()
}
capabilities += string("\t") + name + "\n";
int value;
-#define GET_ATTR(attr) \
- { \
- if (cuDeviceGetAttribute(&value, CU_DEVICE_ATTRIBUTE_##attr, num) == CUDA_SUCCESS) { \
- capabilities += string_printf("\t\tCU_DEVICE_ATTRIBUTE_" #attr "\t\t\t%d\n", value); \
+# define GET_ATTR(attr) \
+ { \
+ if (cuDeviceGetAttribute(&value, CU_DEVICE_ATTRIBUTE_##attr, num) == CUDA_SUCCESS) { \
+ capabilities += string_printf("\t\tCU_DEVICE_ATTRIBUTE_" #attr "\t\t\t%d\n", value); \
+ } \
} \
- } \
- (void)0
+ (void)0
/* TODO(sergey): Strip all attributes which are not useful for us
* or does not depend on the driver.
*/
@@ -2795,7 +304,7 @@ string device_cuda_capabilities()
GET_ATTR(MANAGED_MEMORY);
GET_ATTR(MULTI_GPU_BOARD);
GET_ATTR(MULTI_GPU_BOARD_GROUP_ID);
-#undef GET_ATTR
+# undef GET_ATTR
capabilities += "\n";
}
@@ -2803,3 +312,5 @@ string device_cuda_capabilities()
}
CCL_NAMESPACE_END
+
+#endif
diff --git a/intern/cycles/device/device_denoising.cpp b/intern/cycles/device/device_denoising.cpp
index 05a7fb8ae4d..38c42d15cab 100644
--- a/intern/cycles/device/device_denoising.cpp
+++ b/intern/cycles/device/device_denoising.cpp
@@ -56,8 +56,8 @@ DenoisingTask::DenoisingTask(Device *device, const DeviceTask &task)
tile_info->frames[i] = task.denoising_frames[i - 1];
}
- write_passes = task.denoising_write_passes;
- do_filter = task.denoising_do_filter;
+ do_prefilter = task.denoising.store_passes && task.denoising.type == DENOISER_NLM;
+ do_filter = task.denoising.use && task.denoising.type == DENOISER_NLM;
}
DenoisingTask::~DenoisingTask()
@@ -71,29 +71,30 @@ DenoisingTask::~DenoisingTask()
tile_info_mem.free();
}
-void DenoisingTask::set_render_buffer(RenderTile *rtiles)
+void DenoisingTask::set_render_buffer(RenderTileNeighbors &neighbors)
{
- for (int i = 0; i < 9; i++) {
- tile_info->offsets[i] = rtiles[i].offset;
- tile_info->strides[i] = rtiles[i].stride;
- tile_info->buffers[i] = rtiles[i].buffer;
+ for (int i = 0; i < RenderTileNeighbors::SIZE; i++) {
+ RenderTile &rtile = neighbors.tiles[i];
+ tile_info->offsets[i] = rtile.offset;
+ tile_info->strides[i] = rtile.stride;
+ tile_info->buffers[i] = rtile.buffer;
}
- tile_info->x[0] = rtiles[3].x;
- tile_info->x[1] = rtiles[4].x;
- tile_info->x[2] = rtiles[5].x;
- tile_info->x[3] = rtiles[5].x + rtiles[5].w;
- tile_info->y[0] = rtiles[1].y;
- tile_info->y[1] = rtiles[4].y;
- tile_info->y[2] = rtiles[7].y;
- tile_info->y[3] = rtiles[7].y + rtiles[7].h;
-
- target_buffer.offset = rtiles[9].offset;
- target_buffer.stride = rtiles[9].stride;
- target_buffer.ptr = rtiles[9].buffer;
-
- if (write_passes && rtiles[9].buffers) {
+ tile_info->x[0] = neighbors.tiles[3].x;
+ tile_info->x[1] = neighbors.tiles[4].x;
+ tile_info->x[2] = neighbors.tiles[5].x;
+ tile_info->x[3] = neighbors.tiles[5].x + neighbors.tiles[5].w;
+ tile_info->y[0] = neighbors.tiles[1].y;
+ tile_info->y[1] = neighbors.tiles[4].y;
+ tile_info->y[2] = neighbors.tiles[7].y;
+ tile_info->y[3] = neighbors.tiles[7].y + neighbors.tiles[7].h;
+
+ target_buffer.offset = neighbors.target.offset;
+ target_buffer.stride = neighbors.target.stride;
+ target_buffer.ptr = neighbors.target.buffer;
+
+ if (do_prefilter && neighbors.target.buffers) {
target_buffer.denoising_output_offset =
- rtiles[9].buffers->params.get_denoising_prefiltered_offset();
+ neighbors.target.buffers->params.get_denoising_prefiltered_offset();
}
else {
target_buffer.denoising_output_offset = 0;
@@ -104,13 +105,14 @@ void DenoisingTask::set_render_buffer(RenderTile *rtiles)
void DenoisingTask::setup_denoising_buffer()
{
- /* Expand filter_area by radius pixels and clamp the result to the extent of the neighboring tiles */
+ /* Expand filter_area by radius pixels and clamp the result to the extent of the neighboring
+ * tiles */
rect = rect_from_shape(filter_area.x, filter_area.y, filter_area.z, filter_area.w);
rect = rect_expand(rect, radius);
rect = rect_clip(rect,
make_int4(tile_info->x[0], tile_info->y[0], tile_info->x[3], tile_info->y[3]));
- buffer.use_intensity = write_passes || (tile_info->num_frames > 1);
+ buffer.use_intensity = do_prefilter || (tile_info->num_frames > 1);
buffer.passes = buffer.use_intensity ? 15 : 14;
buffer.width = rect.z - rect.x;
buffer.stride = align_up(buffer.width, 4);
@@ -149,16 +151,19 @@ void DenoisingTask::prefilter_shadowing()
device_sub_ptr buffer_var(buffer.mem, 5 * buffer.pass_stride, buffer.pass_stride);
device_sub_ptr filtered_var(buffer.mem, 6 * buffer.pass_stride, buffer.pass_stride);
- /* Get the A/B unfiltered passes, the combined sample variance, the estimated variance of the sample variance and the buffer variance. */
+ /* Get the A/B unfiltered passes, the combined sample variance, the estimated variance of the
+ * sample variance and the buffer variance. */
functions.divide_shadow(*unfiltered_a, *unfiltered_b, *sample_var, *sample_var_var, *buffer_var);
- /* Smooth the (generally pretty noisy) buffer variance using the spatial information from the sample variance. */
+ /* Smooth the (generally pretty noisy) buffer variance using the spatial information from the
+ * sample variance. */
nlm_state.set_parameters(6, 3, 4.0f, 1.0f, false);
functions.non_local_means(*buffer_var, *sample_var, *sample_var_var, *filtered_var);
/* Reuse memory, the previous data isn't needed anymore. */
device_ptr filtered_a = *buffer_var, filtered_b = *sample_var;
- /* Use the smoothed variance to filter the two shadow half images using each other for weight calculation. */
+ /* Use the smoothed variance to filter the two shadow half images using each other for weight
+ * calculation. */
nlm_state.set_parameters(5, 3, 1.0f, 0.25f, false);
functions.non_local_means(*unfiltered_a, *unfiltered_b, *filtered_var, filtered_a);
functions.non_local_means(*unfiltered_b, *unfiltered_a, *filtered_var, filtered_b);
@@ -210,12 +215,12 @@ void DenoisingTask::prefilter_color()
int num_color_passes = 3;
device_only_memory<float> temporary_color(device, "denoising temporary color");
- temporary_color.alloc_to_device(3 * buffer.pass_stride, false);
+ temporary_color.alloc_to_device(6 * buffer.pass_stride, false);
for (int pass = 0; pass < num_color_passes; pass++) {
device_sub_ptr color_pass(temporary_color, pass * buffer.pass_stride, buffer.pass_stride);
device_sub_ptr color_var_pass(
- buffer.mem, variance_to[pass] * buffer.pass_stride, buffer.pass_stride);
+ temporary_color, (pass + 3) * buffer.pass_stride, buffer.pass_stride);
functions.get_feature(mean_from[pass],
variance_from[pass],
*color_pass,
@@ -316,12 +321,11 @@ void DenoisingTask::reconstruct()
functions.solve(target_buffer.ptr);
}
-void DenoisingTask::run_denoising(RenderTile *tile)
+void DenoisingTask::run_denoising(RenderTile &tile)
{
- RenderTile rtiles[10];
- rtiles[4] = *tile;
- functions.map_neighbor_tiles(rtiles);
- set_render_buffer(rtiles);
+ RenderTileNeighbors neighbors(tile);
+ functions.map_neighbor_tiles(neighbors);
+ set_render_buffer(neighbors);
setup_denoising_buffer();
@@ -339,11 +343,11 @@ void DenoisingTask::run_denoising(RenderTile *tile)
reconstruct();
}
- if (write_passes) {
+ if (do_prefilter) {
write_buffer();
}
- functions.unmap_neighbor_tiles(rtiles);
+ functions.unmap_neighbor_tiles(neighbors);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/device/device_denoising.h b/intern/cycles/device/device_denoising.h
index bd1d0193dbd..2c0dc23b44a 100644
--- a/intern/cycles/device/device_denoising.h
+++ b/intern/cycles/device/device_denoising.h
@@ -60,7 +60,7 @@ class DenoisingTask {
int4 rect;
int4 filter_area;
- bool write_passes;
+ bool do_prefilter;
bool do_filter;
struct DeviceFunctions {
@@ -102,8 +102,8 @@ class DenoisingTask {
device_ptr output_ptr)>
detect_outliers;
function<bool(int out_offset, device_ptr frop_ptr, device_ptr buffer_ptr)> write_feature;
- function<void(RenderTile *rtiles)> map_neighbor_tiles;
- function<void(RenderTile *rtiles)> unmap_neighbor_tiles;
+ function<void(RenderTileNeighbors &neighbors)> map_neighbor_tiles;
+ function<void(RenderTileNeighbors &neighbors)> unmap_neighbor_tiles;
} functions;
/* Stores state of the current Reconstruction operation,
@@ -154,7 +154,7 @@ class DenoisingTask {
DenoisingTask(Device *device, const DeviceTask &task);
~DenoisingTask();
- void run_denoising(RenderTile *tile);
+ void run_denoising(RenderTile &tile);
struct DenoiseBuffers {
int pass_stride;
@@ -179,7 +179,7 @@ class DenoisingTask {
protected:
Device *device;
- void set_render_buffer(RenderTile *rtiles);
+ void set_render_buffer(RenderTileNeighbors &neighbors);
void setup_denoising_buffer();
void prefilter_shadowing();
void prefilter_features();
diff --git a/intern/cycles/device/device_intern.h b/intern/cycles/device/device_intern.h
index c393a3f9cda..94d63e8f333 100644
--- a/intern/cycles/device/device_intern.h
+++ b/intern/cycles/device/device_intern.h
@@ -17,9 +17,15 @@
#ifndef __DEVICE_INTERN_H__
#define __DEVICE_INTERN_H__
+#include "util/util_string.h"
+#include "util/util_vector.h"
+
CCL_NAMESPACE_BEGIN
class Device;
+class DeviceInfo;
+class Profiler;
+class Stats;
Device *device_cpu_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background);
bool device_opencl_init();
@@ -27,6 +33,9 @@ Device *device_opencl_create(DeviceInfo &info, Stats &stats, Profiler &profiler,
bool device_opencl_compile_kernel(const vector<string> &parameters);
bool device_cuda_init();
Device *device_cuda_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background);
+bool device_optix_init();
+Device *device_optix_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background);
+
Device *device_network_create(DeviceInfo &info,
Stats &stats,
Profiler &profiler,
@@ -36,6 +45,7 @@ Device *device_multi_create(DeviceInfo &info, Stats &stats, Profiler &profiler,
void device_cpu_info(vector<DeviceInfo> &devices);
void device_opencl_info(vector<DeviceInfo> &devices);
void device_cuda_info(vector<DeviceInfo> &devices);
+void device_optix_info(const vector<DeviceInfo> &cuda_devices, vector<DeviceInfo> &devices);
void device_network_info(vector<DeviceInfo> &devices);
string device_cpu_capabilities();
diff --git a/intern/cycles/device/device_memory.cpp b/intern/cycles/device/device_memory.cpp
index 859535307f4..8064d50d31f 100644
--- a/intern/cycles/device/device_memory.cpp
+++ b/intern/cycles/device/device_memory.cpp
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include "device/device.h"
#include "device/device_memory.h"
+#include "device/device.h"
CCL_NAMESPACE_BEGIN
@@ -31,17 +31,18 @@ device_memory::device_memory(Device *device, const char *name, MemoryType type)
data_depth(0),
type(type),
name(name),
- interpolation(INTERPOLATION_NONE),
- extension(EXTENSION_REPEAT),
device(device),
device_pointer(0),
host_pointer(0),
- shared_pointer(0)
+ shared_pointer(0),
+ shared_counter(0)
{
}
device_memory::~device_memory()
{
+ assert(shared_pointer == 0);
+ assert(shared_counter == 0);
}
void *device_memory::host_alloc(size_t size)
@@ -73,7 +74,7 @@ void device_memory::host_free()
void device_memory::device_alloc()
{
- assert(!device_pointer && type != MEM_TEXTURE);
+ assert(!device_pointer && type != MEM_TEXTURE && type != MEM_GLOBAL);
device->mem_alloc(*this);
}
@@ -93,7 +94,7 @@ void device_memory::device_copy_to()
void device_memory::device_copy_from(int y, int w, int h, int elem)
{
- assert(type != MEM_TEXTURE && type != MEM_READ_ONLY);
+ assert(type != MEM_TEXTURE && type != MEM_READ_ONLY && type != MEM_GLOBAL);
device->mem_copy_from(*this, y, w, h, elem);
}
@@ -124,6 +125,11 @@ void device_memory::restore_device()
device_pointer = original_device_ptr;
}
+bool device_memory::is_resident(Device *sub_device) const
+{
+ return device->is_resident(device_pointer, sub_device);
+}
+
/* Device Sub Ptr */
device_sub_ptr::device_sub_ptr(device_memory &mem, int offset, int size) : device(mem.device)
@@ -136,4 +142,93 @@ device_sub_ptr::~device_sub_ptr()
device->mem_free_sub_ptr(ptr);
}
+/* Device Texture */
+
+device_texture::device_texture(Device *device,
+ const char *name,
+ const uint slot,
+ ImageDataType image_data_type,
+ InterpolationType interpolation,
+ ExtensionType extension)
+ : device_memory(device, name, MEM_TEXTURE), slot(slot)
+{
+ switch (image_data_type) {
+ case IMAGE_DATA_TYPE_FLOAT4:
+ data_type = TYPE_FLOAT;
+ data_elements = 4;
+ break;
+ case IMAGE_DATA_TYPE_FLOAT:
+ data_type = TYPE_FLOAT;
+ data_elements = 1;
+ break;
+ case IMAGE_DATA_TYPE_BYTE4:
+ data_type = TYPE_UCHAR;
+ data_elements = 4;
+ break;
+ case IMAGE_DATA_TYPE_BYTE:
+ data_type = TYPE_UCHAR;
+ data_elements = 1;
+ break;
+ case IMAGE_DATA_TYPE_HALF4:
+ data_type = TYPE_HALF;
+ data_elements = 4;
+ break;
+ case IMAGE_DATA_TYPE_HALF:
+ data_type = TYPE_HALF;
+ data_elements = 1;
+ break;
+ case IMAGE_DATA_TYPE_USHORT4:
+ data_type = TYPE_UINT16;
+ data_elements = 4;
+ break;
+ case IMAGE_DATA_TYPE_USHORT:
+ data_type = TYPE_UINT16;
+ data_elements = 1;
+ break;
+ case IMAGE_DATA_NUM_TYPES:
+ assert(0);
+ return;
+ }
+
+ memset(&info, 0, sizeof(info));
+ info.data_type = image_data_type;
+ info.interpolation = interpolation;
+ info.extension = extension;
+}
+
+device_texture::~device_texture()
+{
+ device_free();
+ host_free();
+}
+
+/* Host memory allocation. */
+void *device_texture::alloc(const size_t width, const size_t height, const size_t depth)
+{
+ const size_t new_size = size(width, height, depth);
+
+ if (new_size != data_size) {
+ device_free();
+ host_free();
+ host_pointer = host_alloc(data_elements * datatype_size(data_type) * new_size);
+ assert(device_pointer == 0);
+ }
+
+ data_size = new_size;
+ data_width = width;
+ data_height = height;
+ data_depth = depth;
+
+ info.width = width;
+ info.height = height;
+ info.depth = depth;
+
+ return host_pointer;
+}
+
+void device_texture::copy_to_device()
+{
+ device_copy_to();
+}
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/device/device_memory.h b/intern/cycles/device/device_memory.h
index f50184efba7..32654e62a6f 100644
--- a/intern/cycles/device/device_memory.h
+++ b/intern/cycles/device/device_memory.h
@@ -23,6 +23,7 @@
#include "util/util_array.h"
#include "util/util_half.h"
+#include "util/util_string.h"
#include "util/util_texture.h"
#include "util/util_types.h"
#include "util/util_vector.h"
@@ -31,7 +32,14 @@ CCL_NAMESPACE_BEGIN
class Device;
-enum MemoryType { MEM_READ_ONLY, MEM_READ_WRITE, MEM_DEVICE_ONLY, MEM_TEXTURE, MEM_PIXELS };
+enum MemoryType {
+ MEM_READ_ONLY,
+ MEM_READ_WRITE,
+ MEM_DEVICE_ONLY,
+ MEM_GLOBAL,
+ MEM_TEXTURE,
+ MEM_PIXELS
+};
/* Supported Data Types */
@@ -208,29 +216,32 @@ class device_memory {
size_t data_depth;
MemoryType type;
const char *name;
- InterpolationType interpolation;
- ExtensionType extension;
/* Pointers. */
Device *device;
device_ptr device_pointer;
void *host_pointer;
void *shared_pointer;
+ /* reference counter for shared_pointer */
+ int shared_counter;
virtual ~device_memory();
void swap_device(Device *new_device, size_t new_device_size, device_ptr new_device_ptr);
void restore_device();
+ bool is_resident(Device *sub_device) const;
+
protected:
friend class CUDADevice;
+ friend class OptiXDevice;
/* Only create through subclasses. */
device_memory(Device *device, const char *name, MemoryType type);
/* No copying allowed. */
- device_memory(const device_memory &);
- device_memory &operator=(const device_memory &);
+ device_memory(const device_memory &) = delete;
+ device_memory &operator=(const device_memory &) = delete;
/* Host allocation on the device. All host_pointer memory should be
* allocated with these functions, for devices that support using
@@ -307,7 +318,7 @@ template<typename T> class device_only_memory : public device_memory {
* in and copied to the device with copy_to_device(). Or alternatively
* allocated and set to zero on the device with zero_to_device().
*
- * When using memory type MEM_TEXTURE, a pointer to this memory will be
+ * When using memory type MEM_GLOBAL, a pointer to this memory will be
* automatically attached to kernel globals, using the provided name
* matching an entry in kernel_textures.h. */
@@ -424,6 +435,11 @@ template<typename T> class device_vector : public device_memory {
device_copy_to();
}
+ void copy_from_device()
+ {
+ device_copy_from(0, data_width, data_height, sizeof(T));
+ }
+
void copy_from_device(int y, int w, int h)
{
device_copy_from(y, w, h, sizeof(T));
@@ -495,6 +511,33 @@ class device_sub_ptr {
device_ptr ptr;
};
+/* Device Texture
+ *
+ * 2D or 3D image texture memory. */
+
+class device_texture : public device_memory {
+ public:
+ device_texture(Device *device,
+ const char *name,
+ const uint slot,
+ ImageDataType image_data_type,
+ InterpolationType interpolation,
+ ExtensionType extension);
+ ~device_texture();
+
+ void *alloc(const size_t width, const size_t height, const size_t depth = 0);
+ void copy_to_device();
+
+ uint slot;
+ TextureInfo info;
+
+ protected:
+ size_t size(const size_t width, const size_t height, const size_t depth)
+ {
+ return width * ((height == 0) ? 1 : height) * ((depth == 0) ? 1 : depth);
+ }
+};
+
CCL_NAMESPACE_END
#endif /* __DEVICE_MEMORY_H__ */
diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp
index 4a40e106115..9ea8782d0f0 100644
--- a/intern/cycles/device/device_multi.cpp
+++ b/intern/cycles/device/device_multi.cpp
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include <stdlib.h>
#include <sstream>
+#include <stdlib.h>
#include "device/device.h"
#include "device/device_intern.h"
@@ -34,30 +34,87 @@ CCL_NAMESPACE_BEGIN
class MultiDevice : public Device {
public:
struct SubDevice {
- explicit SubDevice(Device *device_) : device(device_)
- {
- }
-
+ Stats stats;
Device *device;
map<device_ptr, device_ptr> ptr_map;
+ int peer_island_index = -1;
};
- list<SubDevice> devices;
+ list<SubDevice> devices, denoising_devices;
device_ptr unique_key;
+ vector<vector<SubDevice *>> peer_islands;
+ bool matching_rendering_and_denoising_devices;
MultiDevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background_)
: Device(info, stats, profiler, background_), unique_key(1)
{
foreach (DeviceInfo &subinfo, info.multi_devices) {
- Device *device = Device::create(subinfo, sub_stats_, profiler, background);
-
/* Always add CPU devices at the back since GPU devices can change
* host memory pointers, which CPU uses as device pointer. */
+ SubDevice *sub;
if (subinfo.type == DEVICE_CPU) {
- devices.push_back(SubDevice(device));
+ devices.emplace_back();
+ sub = &devices.back();
}
else {
- devices.push_front(SubDevice(device));
+ devices.emplace_front();
+ sub = &devices.front();
+ }
+
+ /* The pointer to 'sub->stats' will stay valid even after new devices
+ * are added, since 'devices' is a linked list. */
+ sub->device = Device::create(subinfo, sub->stats, profiler, background);
+ }
+
+ foreach (DeviceInfo &subinfo, info.denoising_devices) {
+ denoising_devices.emplace_front();
+ SubDevice *sub = &denoising_devices.front();
+
+ sub->device = Device::create(subinfo, sub->stats, profiler, background);
+ }
+
+ /* Build a list of peer islands for the available render devices */
+ foreach (SubDevice &sub, devices) {
+ /* First ensure that every device is in at least once peer island */
+ if (sub.peer_island_index < 0) {
+ peer_islands.emplace_back();
+ sub.peer_island_index = (int)peer_islands.size() - 1;
+ peer_islands[sub.peer_island_index].push_back(&sub);
+ }
+
+ if (!info.has_peer_memory) {
+ continue;
+ }
+
+ /* Second check peer access between devices and fill up the islands accordingly */
+ foreach (SubDevice &peer_sub, devices) {
+ if (peer_sub.peer_island_index < 0 &&
+ peer_sub.device->info.type == sub.device->info.type &&
+ peer_sub.device->check_peer_access(sub.device)) {
+ peer_sub.peer_island_index = sub.peer_island_index;
+ peer_islands[sub.peer_island_index].push_back(&peer_sub);
+ }
+ }
+ }
+
+ /* Try to re-use memory when denoising and render devices use the same physical devices
+ * (e.g. OptiX denoising and CUDA rendering device pointing to the same GPU).
+ * Ordering has to match as well, so that 'DeviceTask::split' behaves consistent. */
+ matching_rendering_and_denoising_devices = denoising_devices.empty() ||
+ (devices.size() == denoising_devices.size());
+ if (matching_rendering_and_denoising_devices) {
+ for (list<SubDevice>::iterator device_it = devices.begin(),
+ denoising_device_it = denoising_devices.begin();
+ device_it != devices.end() && denoising_device_it != denoising_devices.end();
+ ++device_it, ++denoising_device_it) {
+ const DeviceInfo &info = device_it->device->info;
+ const DeviceInfo &denoising_info = denoising_device_it->device->info;
+ if ((info.type != DEVICE_CUDA && info.type != DEVICE_OPTIX) ||
+ (denoising_info.type != DEVICE_CUDA && denoising_info.type != DEVICE_OPTIX) ||
+ info.num != denoising_info.num) {
+ matching_rendering_and_denoising_devices = false;
+ break;
+ }
}
}
@@ -80,17 +137,18 @@ class MultiDevice : public Device {
{
foreach (SubDevice &sub, devices)
delete sub.device;
+ foreach (SubDevice &sub, denoising_devices)
+ delete sub.device;
}
const string &error_message()
{
- foreach (SubDevice &sub, devices) {
- if (sub.device->error_message() != "") {
- if (error_msg == "")
- error_msg = sub.device->error_message();
- break;
- }
- }
+ error_msg.clear();
+
+ foreach (SubDevice &sub, devices)
+ error_msg += sub.device->error_message();
+ foreach (SubDevice &sub, denoising_devices)
+ error_msg += sub.device->error_message();
return error_msg;
}
@@ -118,6 +176,15 @@ class MultiDevice : public Device {
if (!sub.device->load_kernels(requested_features))
return false;
+ if (requested_features.use_denoising) {
+ /* Only need denoising feature, everything else is unused. */
+ DeviceRequestedFeatures denoising_features;
+ denoising_features.use_denoising = true;
+ foreach (SubDevice &sub, denoising_devices)
+ if (!sub.device->load_kernels(denoising_features))
+ return false;
+ }
+
return true;
}
@@ -127,6 +194,12 @@ class MultiDevice : public Device {
if (!sub.device->wait_for_availability(requested_features))
return false;
+ if (requested_features.use_denoising) {
+ foreach (SubDevice &sub, denoising_devices)
+ if (!sub.device->wait_for_availability(requested_features))
+ return false;
+ }
+
return true;
}
@@ -150,20 +223,104 @@ class MultiDevice : public Device {
break;
}
}
+
return result;
}
+ bool build_optix_bvh(BVH *bvh)
+ {
+ /* Broadcast acceleration structure build to all render devices */
+ foreach (SubDevice &sub, devices) {
+ if (!sub.device->build_optix_bvh(bvh))
+ return false;
+ }
+ return true;
+ }
+
+ virtual void *osl_memory()
+ {
+ if (devices.size() > 1) {
+ return NULL;
+ }
+ return devices.front().device->osl_memory();
+ }
+
+ bool is_resident(device_ptr key, Device *sub_device)
+ {
+ foreach (SubDevice &sub, devices) {
+ if (sub.device == sub_device) {
+ return find_matching_mem_device(key, sub)->device == sub_device;
+ }
+ }
+ return false;
+ }
+
+ SubDevice *find_matching_mem_device(device_ptr key, SubDevice &sub)
+ {
+ assert(key != 0 && (sub.peer_island_index >= 0 || sub.ptr_map.find(key) != sub.ptr_map.end()));
+
+ /* Get the memory owner of this key (first try current device, then peer devices) */
+ SubDevice *owner_sub = &sub;
+ if (owner_sub->ptr_map.find(key) == owner_sub->ptr_map.end()) {
+ foreach (SubDevice *island_sub, peer_islands[sub.peer_island_index]) {
+ if (island_sub != owner_sub &&
+ island_sub->ptr_map.find(key) != island_sub->ptr_map.end()) {
+ owner_sub = island_sub;
+ }
+ }
+ }
+ return owner_sub;
+ }
+
+ SubDevice *find_suitable_mem_device(device_ptr key, const vector<SubDevice *> &island)
+ {
+ assert(!island.empty());
+
+ /* Get the memory owner of this key or the device with the lowest memory usage when new */
+ SubDevice *owner_sub = island.front();
+ foreach (SubDevice *island_sub, island) {
+ if (key ? (island_sub->ptr_map.find(key) != island_sub->ptr_map.end()) :
+ (island_sub->device->stats.mem_used < owner_sub->device->stats.mem_used)) {
+ owner_sub = island_sub;
+ }
+ }
+ return owner_sub;
+ }
+
+ inline device_ptr find_matching_mem(device_ptr key, SubDevice &sub)
+ {
+ return find_matching_mem_device(key, sub)->ptr_map[key];
+ }
+
void mem_alloc(device_memory &mem)
{
device_ptr key = unique_key++;
- foreach (SubDevice &sub, devices) {
- mem.device = sub.device;
- mem.device_pointer = 0;
- mem.device_size = 0;
+ if (mem.type == MEM_PIXELS) {
+ /* Always allocate pixels memory on all devices
+ * This is necessary to ensure PBOs are registered everywhere, which FILM_CONVERT uses */
+ foreach (SubDevice &sub, devices) {
+ mem.device = sub.device;
+ mem.device_pointer = 0;
+ mem.device_size = 0;
- sub.device->mem_alloc(mem);
- sub.ptr_map[key] = mem.device_pointer;
+ sub.device->mem_alloc(mem);
+ sub.ptr_map[key] = mem.device_pointer;
+ }
+ }
+ else {
+ assert(mem.type == MEM_READ_ONLY || mem.type == MEM_READ_WRITE ||
+ mem.type == MEM_DEVICE_ONLY);
+ /* The remaining memory types can be distributed across devices */
+ foreach (const vector<SubDevice *> &island, peer_islands) {
+ SubDevice *owner_sub = find_suitable_mem_device(key, island);
+ mem.device = owner_sub->device;
+ mem.device_pointer = 0;
+ mem.device_size = 0;
+
+ owner_sub->device->mem_alloc(mem);
+ owner_sub->ptr_map[key] = mem.device_pointer;
+ }
}
mem.device = this;
@@ -177,13 +334,36 @@ class MultiDevice : public Device {
device_ptr key = (existing_key) ? existing_key : unique_key++;
size_t existing_size = mem.device_size;
- foreach (SubDevice &sub, devices) {
- mem.device = sub.device;
- mem.device_pointer = (existing_key) ? sub.ptr_map[existing_key] : 0;
- mem.device_size = existing_size;
+ /* The tile buffers are allocated on each device (see below), so copy to all of them */
+ if (strcmp(mem.name, "RenderBuffers") == 0) {
+ foreach (SubDevice &sub, devices) {
+ mem.device = sub.device;
+ mem.device_pointer = (existing_key) ? sub.ptr_map[existing_key] : 0;
+ mem.device_size = existing_size;
- sub.device->mem_copy_to(mem);
- sub.ptr_map[key] = mem.device_pointer;
+ sub.device->mem_copy_to(mem);
+ sub.ptr_map[key] = mem.device_pointer;
+ }
+ }
+ else {
+ foreach (const vector<SubDevice *> &island, peer_islands) {
+ SubDevice *owner_sub = find_suitable_mem_device(existing_key, island);
+ mem.device = owner_sub->device;
+ mem.device_pointer = (existing_key) ? owner_sub->ptr_map[existing_key] : 0;
+ mem.device_size = existing_size;
+
+ owner_sub->device->mem_copy_to(mem);
+ owner_sub->ptr_map[key] = mem.device_pointer;
+
+ if (mem.type == MEM_GLOBAL || mem.type == MEM_TEXTURE) {
+ /* Need to create texture objects and update pointer in kernel globals on all devices */
+ foreach (SubDevice *island_sub, island) {
+ if (island_sub != owner_sub) {
+ island_sub->device->mem_copy_to(mem);
+ }
+ }
+ }
+ }
}
mem.device = this;
@@ -200,10 +380,11 @@ class MultiDevice : public Device {
int sy = y + i * sub_h;
int sh = (i == (int)devices.size() - 1) ? h - sub_h * i : sub_h;
- mem.device = sub.device;
- mem.device_pointer = sub.ptr_map[key];
+ SubDevice *owner_sub = find_matching_mem_device(key, sub);
+ mem.device = owner_sub->device;
+ mem.device_pointer = owner_sub->ptr_map[key];
- sub.device->mem_copy_from(mem, sy, w, sh, elem);
+ owner_sub->device->mem_copy_from(mem, sy, w, sh, elem);
i++;
}
@@ -217,13 +398,48 @@ class MultiDevice : public Device {
device_ptr key = (existing_key) ? existing_key : unique_key++;
size_t existing_size = mem.device_size;
- foreach (SubDevice &sub, devices) {
- mem.device = sub.device;
- mem.device_pointer = (existing_key) ? sub.ptr_map[existing_key] : 0;
- mem.device_size = existing_size;
+ /* This is a hack to only allocate the tile buffers on denoising devices
+ * Similarly the tile buffers also need to be allocated separately on all devices so any
+ * overlap rendered for denoising does not interfere with each other */
+ if (strcmp(mem.name, "RenderBuffers") == 0) {
+ vector<device_ptr> device_pointers;
+ device_pointers.reserve(devices.size());
+
+ foreach (SubDevice &sub, devices) {
+ mem.device = sub.device;
+ mem.device_pointer = (existing_key) ? sub.ptr_map[existing_key] : 0;
+ mem.device_size = existing_size;
+
+ sub.device->mem_zero(mem);
+ sub.ptr_map[key] = mem.device_pointer;
- sub.device->mem_zero(mem);
- sub.ptr_map[key] = mem.device_pointer;
+ device_pointers.push_back(mem.device_pointer);
+ }
+ foreach (SubDevice &sub, denoising_devices) {
+ if (matching_rendering_and_denoising_devices) {
+ sub.ptr_map[key] = device_pointers.front();
+ device_pointers.erase(device_pointers.begin());
+ }
+ else {
+ mem.device = sub.device;
+ mem.device_pointer = (existing_key) ? sub.ptr_map[existing_key] : 0;
+ mem.device_size = existing_size;
+
+ sub.device->mem_zero(mem);
+ sub.ptr_map[key] = mem.device_pointer;
+ }
+ }
+ }
+ else {
+ foreach (const vector<SubDevice *> &island, peer_islands) {
+ SubDevice *owner_sub = find_suitable_mem_device(existing_key, island);
+ mem.device = owner_sub->device;
+ mem.device_pointer = (existing_key) ? owner_sub->ptr_map[existing_key] : 0;
+ mem.device_size = existing_size;
+
+ owner_sub->device->mem_zero(mem);
+ owner_sub->ptr_map[key] = mem.device_pointer;
+ }
}
mem.device = this;
@@ -236,13 +452,49 @@ class MultiDevice : public Device {
device_ptr key = mem.device_pointer;
size_t existing_size = mem.device_size;
- foreach (SubDevice &sub, devices) {
- mem.device = sub.device;
- mem.device_pointer = sub.ptr_map[key];
- mem.device_size = existing_size;
+ /* Free memory that was allocated for all devices (see above) on each device */
+ if (strcmp(mem.name, "RenderBuffers") == 0 || mem.type == MEM_PIXELS) {
+ foreach (SubDevice &sub, devices) {
+ mem.device = sub.device;
+ mem.device_pointer = sub.ptr_map[key];
+ mem.device_size = existing_size;
- sub.device->mem_free(mem);
- sub.ptr_map.erase(sub.ptr_map.find(key));
+ sub.device->mem_free(mem);
+ sub.ptr_map.erase(sub.ptr_map.find(key));
+ }
+ foreach (SubDevice &sub, denoising_devices) {
+ if (matching_rendering_and_denoising_devices) {
+ sub.ptr_map.erase(key);
+ }
+ else {
+ mem.device = sub.device;
+ mem.device_pointer = sub.ptr_map[key];
+ mem.device_size = existing_size;
+
+ sub.device->mem_free(mem);
+ sub.ptr_map.erase(sub.ptr_map.find(key));
+ }
+ }
+ }
+ else {
+ foreach (const vector<SubDevice *> &island, peer_islands) {
+ SubDevice *owner_sub = find_matching_mem_device(key, *island.front());
+ mem.device = owner_sub->device;
+ mem.device_pointer = owner_sub->ptr_map[key];
+ mem.device_size = existing_size;
+
+ owner_sub->device->mem_free(mem);
+ owner_sub->ptr_map.erase(owner_sub->ptr_map.find(key));
+
+ if (mem.type == MEM_TEXTURE) {
+ /* Free texture objects on all devices */
+ foreach (SubDevice *island_sub, island) {
+ if (island_sub != owner_sub) {
+ island_sub->device->mem_free(mem);
+ }
+ }
+ }
+ }
}
mem.device = this;
@@ -270,6 +522,8 @@ class MultiDevice : public Device {
bool transparent,
const DeviceDrawParams &draw_params)
{
+ assert(rgba.type == MEM_PIXELS);
+
device_ptr key = rgba.device_pointer;
int i = 0, sub_h = h / devices.size();
int sub_height = height / devices.size();
@@ -292,10 +546,21 @@ class MultiDevice : public Device {
void map_tile(Device *sub_device, RenderTile &tile)
{
+ if (!tile.buffer) {
+ return;
+ }
+
foreach (SubDevice &sub, devices) {
if (sub.device == sub_device) {
- if (tile.buffer)
- tile.buffer = sub.ptr_map[tile.buffer];
+ tile.buffer = find_matching_mem(tile.buffer, sub);
+ return;
+ }
+ }
+
+ foreach (SubDevice &sub, denoising_devices) {
+ if (sub.device == sub_device) {
+ tile.buffer = sub.ptr_map[tile.buffer];
+ return;
}
}
}
@@ -310,13 +575,31 @@ class MultiDevice : public Device {
i++;
}
+ foreach (SubDevice &sub, denoising_devices) {
+ if (sub.device == sub_device)
+ return i;
+ i++;
+ }
+
return -1;
}
- void map_neighbor_tiles(Device *sub_device, RenderTile *tiles)
+ void map_neighbor_tiles(Device *sub_device, RenderTileNeighbors &neighbors)
{
- for (int i = 0; i < 9; i++) {
- if (!tiles[i].buffers) {
+ for (int i = 0; i < RenderTileNeighbors::SIZE; i++) {
+ RenderTile &tile = neighbors.tiles[i];
+
+ if (!tile.buffers) {
+ continue;
+ }
+
+ device_vector<float> &mem = tile.buffers->buffer;
+ tile.buffer = mem.device_pointer;
+
+ if (mem.device == this && matching_rendering_and_denoising_devices) {
+ /* Skip unnecessary copies in viewport mode (buffer covers the
+ * whole image), but still need to fix up the tile device pointer. */
+ map_tile(sub_device, tile);
continue;
}
@@ -324,46 +607,63 @@ class MultiDevice : public Device {
* to the current device now, for the duration of the denoising task.
* Note that this temporarily modifies the RenderBuffers and calls
* the device, so this function is not thread safe. */
- device_vector<float> &mem = tiles[i].buffers->buffer;
if (mem.device != sub_device) {
/* Only copy from device to host once. This is faster, but
* also required for the case where a CPU thread is denoising
* a tile rendered on the GPU. In that case we have to avoid
- * overwriting the buffer being denoised by the CPU thread. */
- if (!tiles[i].buffers->map_neighbor_copied) {
- tiles[i].buffers->map_neighbor_copied = true;
- mem.copy_from_device(0, mem.data_size, 1);
+ * overwriting the buffer being de-noised by the CPU thread. */
+ if (!tile.buffers->map_neighbor_copied) {
+ tile.buffers->map_neighbor_copied = true;
+ mem.copy_from_device();
}
- mem.swap_device(sub_device, 0, 0);
+ if (mem.device == this) {
+ /* Can re-use memory if tile is already allocated on the sub device. */
+ map_tile(sub_device, tile);
+ mem.swap_device(sub_device, mem.device_size, tile.buffer);
+ }
+ else {
+ mem.swap_device(sub_device, 0, 0);
+ }
mem.copy_to_device();
- tiles[i].buffer = mem.device_pointer;
- tiles[i].device_size = mem.device_size;
+
+ tile.buffer = mem.device_pointer;
+ tile.device_size = mem.device_size;
mem.restore_device();
}
}
}
- void unmap_neighbor_tiles(Device *sub_device, RenderTile *tiles)
+ void unmap_neighbor_tiles(Device *sub_device, RenderTileNeighbors &neighbors)
{
+ RenderTile &target_tile = neighbors.target;
+ device_vector<float> &mem = target_tile.buffers->buffer;
+
+ if (mem.device == this && matching_rendering_and_denoising_devices) {
+ return;
+ }
+
/* Copy denoised result back to the host. */
- device_vector<float> &mem = tiles[9].buffers->buffer;
- mem.swap_device(sub_device, tiles[9].device_size, tiles[9].buffer);
- mem.copy_from_device(0, mem.data_size, 1);
+ mem.swap_device(sub_device, target_tile.device_size, target_tile.buffer);
+ mem.copy_from_device();
mem.restore_device();
+
/* Copy denoised result to the original device. */
mem.copy_to_device();
- for (int i = 0; i < 9; i++) {
- if (!tiles[i].buffers) {
+ for (int i = 0; i < RenderTileNeighbors::SIZE; i++) {
+ RenderTile &tile = neighbors.tiles[i];
+ if (!tile.buffers) {
continue;
}
- device_vector<float> &mem = tiles[i].buffers->buffer;
- if (mem.device != sub_device) {
- mem.swap_device(sub_device, tiles[i].device_size, tiles[i].buffer);
+ device_vector<float> &mem = tile.buffers->buffer;
+
+ if (mem.device != sub_device && mem.device != this) {
+ /* Free up memory again if it was allocated for the copy above. */
+ mem.swap_device(sub_device, tile.device_size, tile.buffer);
sub_device->mem_free(mem);
mem.restore_device();
}
@@ -388,26 +688,50 @@ class MultiDevice : public Device {
void task_add(DeviceTask &task)
{
+ list<SubDevice> task_devices = devices;
+ if (!denoising_devices.empty()) {
+ if (task.type == DeviceTask::DENOISE_BUFFER) {
+ /* Denoising tasks should be redirected to the denoising devices entirely. */
+ task_devices = denoising_devices;
+ }
+ else if (task.type == DeviceTask::RENDER && (task.tile_types & RenderTile::DENOISE)) {
+ const uint tile_types = task.tile_types;
+ /* For normal rendering tasks only redirect the denoising part to the denoising devices.
+ * Do not need to split the task here, since they all run through 'acquire_tile'. */
+ task.tile_types = RenderTile::DENOISE;
+ foreach (SubDevice &sub, denoising_devices) {
+ sub.device->task_add(task);
+ }
+ /* Rendering itself should still be executed on the rendering devices. */
+ task.tile_types = tile_types ^ RenderTile::DENOISE;
+ }
+ }
+
list<DeviceTask> tasks;
- task.split(tasks, devices.size());
+ task.split(tasks, task_devices.size());
- foreach (SubDevice &sub, devices) {
+ foreach (SubDevice &sub, task_devices) {
if (!tasks.empty()) {
DeviceTask subtask = tasks.front();
tasks.pop_front();
if (task.buffer)
- subtask.buffer = sub.ptr_map[task.buffer];
+ subtask.buffer = find_matching_mem(task.buffer, sub);
if (task.rgba_byte)
subtask.rgba_byte = sub.ptr_map[task.rgba_byte];
if (task.rgba_half)
subtask.rgba_half = sub.ptr_map[task.rgba_half];
if (task.shader_input)
- subtask.shader_input = sub.ptr_map[task.shader_input];
+ subtask.shader_input = find_matching_mem(task.shader_input, sub);
if (task.shader_output)
- subtask.shader_output = sub.ptr_map[task.shader_output];
+ subtask.shader_output = find_matching_mem(task.shader_output, sub);
sub.device->task_add(subtask);
+
+ if (task.buffers && task.buffers->buffer.device == this) {
+ /* Synchronize access to RenderBuffers, since 'map_neighbor_tiles' is not thread-safe. */
+ sub.device->task_wait();
+ }
}
}
}
@@ -416,16 +740,17 @@ class MultiDevice : public Device {
{
foreach (SubDevice &sub, devices)
sub.device->task_wait();
+ foreach (SubDevice &sub, denoising_devices)
+ sub.device->task_wait();
}
void task_cancel()
{
foreach (SubDevice &sub, devices)
sub.device->task_cancel();
+ foreach (SubDevice &sub, denoising_devices)
+ sub.device->task_cancel();
}
-
- protected:
- Stats sub_stats_;
};
Device *device_multi_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background)
diff --git a/intern/cycles/device/device_network.cpp b/intern/cycles/device/device_network.cpp
index 80334ad8f22..8904b517e92 100644
--- a/intern/cycles/device/device_network.cpp
+++ b/intern/cycles/device/device_network.cpp
@@ -14,9 +14,9 @@
* limitations under the License.
*/
+#include "device/device_network.h"
#include "device/device.h"
#include "device/device_intern.h"
-#include "device/device_network.h"
#include "util/util_foreach.h"
#include "util/util_logging.h"
@@ -311,7 +311,9 @@ void device_network_info(vector<DeviceInfo> &devices)
/* todo: get this info from device */
info.has_volume_decoupled = false;
+ info.has_adaptive_stop_per_sample = false;
info.has_osl = false;
+ info.denoisers = DENOISER_NONE;
devices.push_back(info);
}
diff --git a/intern/cycles/device/device_network.h b/intern/cycles/device/device_network.h
index 5b69b815cc6..e74c4508ab6 100644
--- a/intern/cycles/device/device_network.h
+++ b/intern/cycles/device/device_network.h
@@ -19,19 +19,19 @@
#ifdef WITH_NETWORK
-# include <boost/archive/text_iarchive.hpp>
-# include <boost/archive/text_oarchive.hpp>
# include <boost/archive/binary_iarchive.hpp>
# include <boost/archive/binary_oarchive.hpp>
+# include <boost/archive/text_iarchive.hpp>
+# include <boost/archive/text_oarchive.hpp>
# include <boost/array.hpp>
# include <boost/asio.hpp>
# include <boost/bind.hpp>
# include <boost/serialization/vector.hpp>
# include <boost/thread.hpp>
+# include <deque>
# include <iostream>
# include <sstream>
-# include <deque>
# include "render/buffers.h"
diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp
index 99a8d2438d6..39b9ef70192 100644
--- a/intern/cycles/device/device_opencl.cpp
+++ b/intern/cycles/device/device_opencl.cpp
@@ -16,8 +16,8 @@
#ifdef WITH_OPENCL
-# include "device/opencl/opencl.h"
-
+# include "device/opencl/device_opencl.h"
+# include "device/device.h"
# include "device/device_intern.h"
# include "util/util_foreach.h"
@@ -119,6 +119,8 @@ void device_opencl_info(vector<DeviceInfo> &devices)
info.display_device = true;
info.use_split_kernel = true;
info.has_volume_decoupled = false;
+ info.has_adaptive_stop_per_sample = false;
+ info.denoisers = DENOISER_NLM;
info.id = id;
/* Check OpenCL extensions */
@@ -136,8 +138,8 @@ string device_opencl_capabilities()
}
string result = "";
string error_msg = ""; /* Only used by opencl_assert(), but in the future
- * it could also be nicely reported to the console.
- */
+ * it could also be nicely reported to the console.
+ */
cl_uint num_platforms = 0;
opencl_assert(device_opencl_get_num_platforms_safe(&num_platforms));
if (num_platforms == 0) {
diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp
new file mode 100644
index 00000000000..1cc45983565
--- /dev/null
+++ b/intern/cycles/device/device_optix.cpp
@@ -0,0 +1,1770 @@
+/*
+ * Copyright 2019, NVIDIA Corporation.
+ * Copyright 2019, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef WITH_OPTIX
+
+# include "bvh/bvh.h"
+# include "device/cuda/device_cuda.h"
+# include "device/device_denoising.h"
+# include "device/device_intern.h"
+# include "render/buffers.h"
+# include "render/hair.h"
+# include "render/mesh.h"
+# include "render/object.h"
+# include "render/scene.h"
+# include "util/util_debug.h"
+# include "util/util_logging.h"
+# include "util/util_md5.h"
+# include "util/util_path.h"
+# include "util/util_time.h"
+
+# ifdef WITH_CUDA_DYNLOAD
+# include <cuew.h>
+// Do not use CUDA SDK headers when using CUEW
+# define OPTIX_DONT_INCLUDE_CUDA
+# endif
+# include <optix_function_table_definition.h>
+# include <optix_stubs.h>
+
+// TODO(pmours): Disable this once drivers have native support
+# define OPTIX_DENOISER_NO_PIXEL_STRIDE 1
+
+CCL_NAMESPACE_BEGIN
+
+/* Make sure this stays in sync with kernel_globals.h */
+struct ShaderParams {
+ uint4 *input;
+ float4 *output;
+ int type;
+ int filter;
+ int sx;
+ int offset;
+ int sample;
+};
+struct KernelParams {
+ WorkTile tile;
+ KernelData data;
+ ShaderParams shader;
+# define KERNEL_TEX(type, name) const type *name;
+# include "kernel/kernel_textures.h"
+# undef KERNEL_TEX
+};
+
+# define check_result_cuda(stmt) \
+ { \
+ CUresult res = stmt; \
+ if (res != CUDA_SUCCESS) { \
+ const char *name; \
+ cuGetErrorName(res, &name); \
+ set_error(string_printf("%s in %s (device_optix.cpp:%d)", name, #stmt, __LINE__)); \
+ return; \
+ } \
+ } \
+ (void)0
+# define check_result_cuda_ret(stmt) \
+ { \
+ CUresult res = stmt; \
+ if (res != CUDA_SUCCESS) { \
+ const char *name; \
+ cuGetErrorName(res, &name); \
+ set_error(string_printf("%s in %s (device_optix.cpp:%d)", name, #stmt, __LINE__)); \
+ return false; \
+ } \
+ } \
+ (void)0
+
+# define check_result_optix(stmt) \
+ { \
+ enum OptixResult res = stmt; \
+ if (res != OPTIX_SUCCESS) { \
+ const char *name = optixGetErrorName(res); \
+ set_error(string_printf("%s in %s (device_optix.cpp:%d)", name, #stmt, __LINE__)); \
+ return; \
+ } \
+ } \
+ (void)0
+# define check_result_optix_ret(stmt) \
+ { \
+ enum OptixResult res = stmt; \
+ if (res != OPTIX_SUCCESS) { \
+ const char *name = optixGetErrorName(res); \
+ set_error(string_printf("%s in %s (device_optix.cpp:%d)", name, #stmt, __LINE__)); \
+ return false; \
+ } \
+ } \
+ (void)0
+
+# define launch_filter_kernel(func_name, w, h, args) \
+ { \
+ CUfunction func; \
+ check_result_cuda_ret(cuModuleGetFunction(&func, cuFilterModule, func_name)); \
+ check_result_cuda_ret(cuFuncSetCacheConfig(func, CU_FUNC_CACHE_PREFER_L1)); \
+ int threads; \
+ check_result_cuda_ret( \
+ cuFuncGetAttribute(&threads, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func)); \
+ threads = (int)sqrt((float)threads); \
+ int xblocks = ((w) + threads - 1) / threads; \
+ int yblocks = ((h) + threads - 1) / threads; \
+ check_result_cuda_ret( \
+ cuLaunchKernel(func, xblocks, yblocks, 1, threads, threads, 1, 0, 0, args, 0)); \
+ } \
+ (void)0
+
+class OptiXDevice : public CUDADevice {
+
+ // List of OptiX program groups
+ enum {
+ PG_RGEN,
+ PG_MISS,
+ PG_HITD, // Default hit group
+ PG_HITS, // __SHADOW_RECORD_ALL__ hit group
+ PG_HITL, // __BVH_LOCAL__ hit group (only used for triangles)
+# if OPTIX_ABI_VERSION >= 36
+ PG_HITD_MOTION,
+ PG_HITS_MOTION,
+# endif
+# ifdef WITH_CYCLES_DEBUG
+ PG_EXCP,
+# endif
+ PG_BAKE, // kernel_bake_evaluate
+ PG_DISP, // kernel_displace_evaluate
+ PG_BACK, // kernel_background_evaluate
+ NUM_PROGRAM_GROUPS
+ };
+
+ // List of OptiX pipelines
+ enum { PIP_PATH_TRACE, PIP_SHADER_EVAL, NUM_PIPELINES };
+
+ // A single shader binding table entry
+ struct SbtRecord {
+ char header[OPTIX_SBT_RECORD_HEADER_SIZE];
+ };
+
+ // Information stored about CUDA memory allocations
+ struct CUDAMem {
+ bool free_map_host = false;
+ CUarray array = NULL;
+ CUtexObject texobject = 0;
+ bool use_mapped_host = false;
+ };
+
+ // Helper class to manage current CUDA context
+ struct CUDAContextScope {
+ CUDAContextScope(CUcontext ctx)
+ {
+ cuCtxPushCurrent(ctx);
+ }
+ ~CUDAContextScope()
+ {
+ cuCtxPopCurrent(NULL);
+ }
+ };
+
+ // Use a pool with multiple threads to support launches with multiple CUDA streams
+ TaskPool task_pool;
+
+ vector<CUstream> cuda_stream;
+ OptixDeviceContext context = NULL;
+
+ OptixModule optix_module = NULL; // All necessary OptiX kernels are in one module
+ OptixModule builtin_modules[2] = {};
+ OptixPipeline pipelines[NUM_PIPELINES] = {};
+
+ bool motion_blur = false;
+ device_vector<SbtRecord> sbt_data;
+ device_only_memory<KernelParams> launch_params;
+ vector<CUdeviceptr> as_mem;
+ OptixTraversableHandle tlas_handle = 0;
+
+ OptixDenoiser denoiser = NULL;
+ device_only_memory<unsigned char> denoiser_state;
+ int denoiser_input_passes = 0;
+
+ public:
+ OptiXDevice(DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool background_)
+ : CUDADevice(info_, stats_, profiler_, background_),
+ sbt_data(this, "__sbt", MEM_READ_ONLY),
+ launch_params(this, "__params"),
+ denoiser_state(this, "__denoiser_state")
+ {
+ // Store number of CUDA streams in device info
+ info.cpu_threads = DebugFlags().optix.cuda_streams;
+
+ // Make the CUDA context current
+ if (!cuContext) {
+ return; // Do not initialize if CUDA context creation failed already
+ }
+ const CUDAContextScope scope(cuContext);
+
+ // Create OptiX context for this device
+ OptixDeviceContextOptions options = {};
+# ifdef WITH_CYCLES_LOGGING
+ options.logCallbackLevel = 4; // Fatal = 1, Error = 2, Warning = 3, Print = 4
+ options.logCallbackFunction =
+ [](unsigned int level, const char *, const char *message, void *) {
+ switch (level) {
+ case 1:
+ LOG_IF(FATAL, VLOG_IS_ON(1)) << message;
+ break;
+ case 2:
+ LOG_IF(ERROR, VLOG_IS_ON(1)) << message;
+ break;
+ case 3:
+ LOG_IF(WARNING, VLOG_IS_ON(1)) << message;
+ break;
+ case 4:
+ LOG_IF(INFO, VLOG_IS_ON(1)) << message;
+ break;
+ }
+ };
+# endif
+ check_result_optix(optixDeviceContextCreate(cuContext, &options, &context));
+# ifdef WITH_CYCLES_LOGGING
+ check_result_optix(optixDeviceContextSetLogCallback(
+ context, options.logCallbackFunction, options.logCallbackData, options.logCallbackLevel));
+# endif
+
+ // Create launch streams
+ cuda_stream.resize(info.cpu_threads);
+ for (int i = 0; i < info.cpu_threads; ++i)
+ check_result_cuda(cuStreamCreate(&cuda_stream[i], CU_STREAM_NON_BLOCKING));
+
+ // Fix weird compiler bug that assigns wrong size
+ launch_params.data_elements = sizeof(KernelParams);
+ // Allocate launch parameter buffer memory on device
+ launch_params.alloc_to_device(info.cpu_threads);
+ }
+ ~OptiXDevice()
+ {
+ // Stop processing any more tasks
+ task_pool.cancel();
+
+ // Make CUDA context current
+ const CUDAContextScope scope(cuContext);
+
+ // Free all acceleration structures
+ for (CUdeviceptr mem : as_mem) {
+ cuMemFree(mem);
+ }
+
+ sbt_data.free();
+ texture_info.free();
+ launch_params.free();
+ denoiser_state.free();
+
+ // Unload modules
+ if (optix_module != NULL)
+ optixModuleDestroy(optix_module);
+ for (unsigned int i = 0; i < 2; ++i)
+ if (builtin_modules[i] != NULL)
+ optixModuleDestroy(builtin_modules[i]);
+ for (unsigned int i = 0; i < NUM_PIPELINES; ++i)
+ if (pipelines[i] != NULL)
+ optixPipelineDestroy(pipelines[i]);
+
+ // Destroy launch streams
+ for (CUstream stream : cuda_stream)
+ cuStreamDestroy(stream);
+
+ if (denoiser != NULL)
+ optixDenoiserDestroy(denoiser);
+
+ optixDeviceContextDestroy(context);
+ }
+
+ private:
+ bool show_samples() const override
+ {
+ // Only show samples if not rendering multiple tiles in parallel
+ return info.cpu_threads == 1;
+ }
+
+ BVHLayoutMask get_bvh_layout_mask() const override
+ {
+ // OptiX has its own internal acceleration structure format
+ return BVH_LAYOUT_OPTIX;
+ }
+
+ string compile_kernel_get_common_cflags(const DeviceRequestedFeatures &requested_features,
+ bool filter,
+ bool /*split*/) override
+ {
+ // Split kernel is not supported in OptiX
+ string common_cflags = CUDADevice::compile_kernel_get_common_cflags(
+ requested_features, filter, false);
+
+ // Add OptiX SDK include directory to include paths
+ const char *optix_sdk_path = getenv("OPTIX_ROOT_DIR");
+ if (optix_sdk_path) {
+ common_cflags += string_printf(" -I\"%s/include\"", optix_sdk_path);
+ }
+
+ return common_cflags;
+ }
+
+ bool load_kernels(const DeviceRequestedFeatures &requested_features) override
+ {
+ if (have_error()) {
+ // Abort early if context creation failed already
+ return false;
+ }
+
+ // Load CUDA modules because we need some of the utility kernels
+ if (!CUDADevice::load_kernels(requested_features)) {
+ return false;
+ }
+
+ // Disable baking for now, since its kernel is not well-suited for inlining and is very slow
+ if (requested_features.use_baking) {
+ set_error("OptiX backend does not support baking yet");
+ return false;
+ }
+ // Disable shader raytracing support for now, since continuation callables are slow
+ if (requested_features.use_shader_raytrace) {
+ set_error("OptiX backend does not support 'Ambient Occlusion' and 'Bevel' shader nodes yet");
+ return false;
+ }
+
+ const CUDAContextScope scope(cuContext);
+
+ // Unload existing OptiX module and pipelines first
+ if (optix_module != NULL) {
+ optixModuleDestroy(optix_module);
+ optix_module = NULL;
+ }
+ for (unsigned int i = 0; i < 2; ++i) {
+ if (builtin_modules[i] != NULL) {
+ optixModuleDestroy(builtin_modules[i]);
+ builtin_modules[i] = NULL;
+ }
+ }
+ for (unsigned int i = 0; i < NUM_PIPELINES; ++i) {
+ if (pipelines[i] != NULL) {
+ optixPipelineDestroy(pipelines[i]);
+ pipelines[i] = NULL;
+ }
+ }
+
+ OptixModuleCompileOptions module_options;
+ module_options.maxRegisterCount = 0; // Do not set an explicit register limit
+# ifdef WITH_CYCLES_DEBUG
+ module_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_0;
+ module_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL;
+# else
+ module_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_3;
+ module_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO;
+# endif
+ OptixPipelineCompileOptions pipeline_options;
+ // Default to no motion blur and two-level graph, since it is the fastest option
+ pipeline_options.usesMotionBlur = false;
+ pipeline_options.traversableGraphFlags =
+ OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_LEVEL_INSTANCING;
+ pipeline_options.numPayloadValues = 6;
+ pipeline_options.numAttributeValues = 2; // u, v
+# ifdef WITH_CYCLES_DEBUG
+ pipeline_options.exceptionFlags = OPTIX_EXCEPTION_FLAG_STACK_OVERFLOW |
+ OPTIX_EXCEPTION_FLAG_TRACE_DEPTH;
+# else
+ pipeline_options.exceptionFlags = OPTIX_EXCEPTION_FLAG_NONE;
+# endif
+ pipeline_options.pipelineLaunchParamsVariableName = "__params"; // See kernel_globals.h
+
+# if OPTIX_ABI_VERSION >= 36
+ pipeline_options.usesPrimitiveTypeFlags = OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE;
+ if (requested_features.use_hair) {
+ if (DebugFlags().optix.curves_api && requested_features.use_hair_thick) {
+ pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE;
+ }
+ else {
+ pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM;
+ }
+ }
+# endif
+
+ // Keep track of whether motion blur is enabled, so to enable/disable motion in BVH builds
+ // This is necessary since objects may be reported to have motion if the Vector pass is
+ // active, but may still need to be rendered without motion blur if that isn't active as well
+ motion_blur = requested_features.use_object_motion;
+
+ if (motion_blur) {
+ pipeline_options.usesMotionBlur = true;
+ // Motion blur can insert motion transforms into the traversal graph
+ // It is no longer a two-level graph then, so need to set flags to allow any configuration
+ pipeline_options.traversableGraphFlags = OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_ANY;
+ }
+
+ { // Load and compile PTX module with OptiX kernels
+ string ptx_data, ptx_filename = path_get("lib/kernel_optix.ptx");
+ if (use_adaptive_compilation() || path_file_size(ptx_filename) == -1) {
+ if (!getenv("OPTIX_ROOT_DIR")) {
+ set_error(
+ "Missing OPTIX_ROOT_DIR environment variable (which must be set with the path to "
+ "the Optix SDK to be able to compile Optix kernels on demand).");
+ return false;
+ }
+ ptx_filename = compile_kernel(requested_features, "kernel_optix", "optix", true);
+ }
+ if (ptx_filename.empty() || !path_read_text(ptx_filename, ptx_data)) {
+ set_error("Failed to load OptiX kernel from '" + ptx_filename + "'");
+ return false;
+ }
+
+ check_result_optix_ret(optixModuleCreateFromPTX(context,
+ &module_options,
+ &pipeline_options,
+ ptx_data.data(),
+ ptx_data.size(),
+ nullptr,
+ 0,
+ &optix_module));
+ }
+
+ // Create program groups
+ OptixProgramGroup groups[NUM_PROGRAM_GROUPS] = {};
+ OptixProgramGroupDesc group_descs[NUM_PROGRAM_GROUPS] = {};
+ OptixProgramGroupOptions group_options = {}; // There are no options currently
+ group_descs[PG_RGEN].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
+ group_descs[PG_RGEN].raygen.module = optix_module;
+ // Ignore branched integrator for now (see "requested_features.use_integrator_branched")
+ group_descs[PG_RGEN].raygen.entryFunctionName = "__raygen__kernel_optix_path_trace";
+ group_descs[PG_MISS].kind = OPTIX_PROGRAM_GROUP_KIND_MISS;
+ group_descs[PG_MISS].miss.module = optix_module;
+ group_descs[PG_MISS].miss.entryFunctionName = "__miss__kernel_optix_miss";
+ group_descs[PG_HITD].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
+ group_descs[PG_HITD].hitgroup.moduleCH = optix_module;
+ group_descs[PG_HITD].hitgroup.entryFunctionNameCH = "__closesthit__kernel_optix_hit";
+ group_descs[PG_HITD].hitgroup.moduleAH = optix_module;
+ group_descs[PG_HITD].hitgroup.entryFunctionNameAH = "__anyhit__kernel_optix_visibility_test";
+ group_descs[PG_HITS].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
+ group_descs[PG_HITS].hitgroup.moduleAH = optix_module;
+ group_descs[PG_HITS].hitgroup.entryFunctionNameAH = "__anyhit__kernel_optix_shadow_all_hit";
+
+ if (requested_features.use_hair) {
+ group_descs[PG_HITD].hitgroup.moduleIS = optix_module;
+ group_descs[PG_HITS].hitgroup.moduleIS = optix_module;
+
+ // Add curve intersection programs
+ if (requested_features.use_hair_thick) {
+ // Slower programs for thick hair since that also slows down ribbons.
+ // Ideally this should not be needed.
+ group_descs[PG_HITD].hitgroup.entryFunctionNameIS = "__intersection__curve_all";
+ group_descs[PG_HITS].hitgroup.entryFunctionNameIS = "__intersection__curve_all";
+ }
+ else {
+ group_descs[PG_HITD].hitgroup.entryFunctionNameIS = "__intersection__curve_ribbon";
+ group_descs[PG_HITS].hitgroup.entryFunctionNameIS = "__intersection__curve_ribbon";
+ }
+
+# if OPTIX_ABI_VERSION >= 36
+ if (DebugFlags().optix.curves_api && requested_features.use_hair_thick) {
+ OptixBuiltinISOptions builtin_options;
+ builtin_options.builtinISModuleType = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE;
+ builtin_options.usesMotionBlur = false;
+
+ check_result_optix_ret(optixBuiltinISModuleGet(
+ context, &module_options, &pipeline_options, &builtin_options, &builtin_modules[0]));
+
+ group_descs[PG_HITD].hitgroup.moduleIS = builtin_modules[0];
+ group_descs[PG_HITD].hitgroup.entryFunctionNameIS = nullptr;
+ group_descs[PG_HITS].hitgroup.moduleIS = builtin_modules[0];
+ group_descs[PG_HITS].hitgroup.entryFunctionNameIS = nullptr;
+
+ if (motion_blur) {
+ builtin_options.usesMotionBlur = true;
+
+ check_result_optix_ret(optixBuiltinISModuleGet(
+ context, &module_options, &pipeline_options, &builtin_options, &builtin_modules[1]));
+
+ group_descs[PG_HITD_MOTION] = group_descs[PG_HITD];
+ group_descs[PG_HITD_MOTION].hitgroup.moduleIS = builtin_modules[1];
+ group_descs[PG_HITS_MOTION] = group_descs[PG_HITS];
+ group_descs[PG_HITS_MOTION].hitgroup.moduleIS = builtin_modules[1];
+ }
+ }
+# endif
+ }
+
+ if (requested_features.use_subsurface || requested_features.use_shader_raytrace) {
+ // Add hit group for local intersections
+ group_descs[PG_HITL].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
+ group_descs[PG_HITL].hitgroup.moduleAH = optix_module;
+ group_descs[PG_HITL].hitgroup.entryFunctionNameAH = "__anyhit__kernel_optix_local_hit";
+ }
+
+# ifdef WITH_CYCLES_DEBUG
+ group_descs[PG_EXCP].kind = OPTIX_PROGRAM_GROUP_KIND_EXCEPTION;
+ group_descs[PG_EXCP].exception.module = optix_module;
+ group_descs[PG_EXCP].exception.entryFunctionName = "__exception__kernel_optix_exception";
+# endif
+
+ if (requested_features.use_baking) {
+ group_descs[PG_BAKE].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
+ group_descs[PG_BAKE].raygen.module = optix_module;
+ group_descs[PG_BAKE].raygen.entryFunctionName = "__raygen__kernel_optix_bake";
+ }
+
+ if (requested_features.use_true_displacement) {
+ group_descs[PG_DISP].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
+ group_descs[PG_DISP].raygen.module = optix_module;
+ group_descs[PG_DISP].raygen.entryFunctionName = "__raygen__kernel_optix_displace";
+ }
+
+ if (requested_features.use_background_light) {
+ group_descs[PG_BACK].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
+ group_descs[PG_BACK].raygen.module = optix_module;
+ group_descs[PG_BACK].raygen.entryFunctionName = "__raygen__kernel_optix_background";
+ }
+
+ check_result_optix_ret(optixProgramGroupCreate(
+ context, group_descs, NUM_PROGRAM_GROUPS, &group_options, nullptr, 0, groups));
+
+ // Get program stack sizes
+ OptixStackSizes stack_size[NUM_PROGRAM_GROUPS] = {};
+ // Set up SBT, which in this case is used only to select between different programs
+ sbt_data.alloc(NUM_PROGRAM_GROUPS);
+ memset(sbt_data.host_pointer, 0, sizeof(SbtRecord) * NUM_PROGRAM_GROUPS);
+ for (unsigned int i = 0; i < NUM_PROGRAM_GROUPS; ++i) {
+ check_result_optix_ret(optixSbtRecordPackHeader(groups[i], &sbt_data[i]));
+ check_result_optix_ret(optixProgramGroupGetStackSize(groups[i], &stack_size[i]));
+ }
+ sbt_data.copy_to_device(); // Upload SBT to device
+
+ // Calculate maximum trace continuation stack size
+ unsigned int trace_css = stack_size[PG_HITD].cssCH;
+ // This is based on the maximum of closest-hit and any-hit/intersection programs
+ trace_css = std::max(trace_css, stack_size[PG_HITD].cssIS + stack_size[PG_HITD].cssAH);
+ trace_css = std::max(trace_css, stack_size[PG_HITS].cssIS + stack_size[PG_HITS].cssAH);
+ trace_css = std::max(trace_css, stack_size[PG_HITL].cssIS + stack_size[PG_HITL].cssAH);
+# if OPTIX_ABI_VERSION >= 36
+ trace_css = std::max(trace_css,
+ stack_size[PG_HITD_MOTION].cssIS + stack_size[PG_HITD_MOTION].cssAH);
+ trace_css = std::max(trace_css,
+ stack_size[PG_HITS_MOTION].cssIS + stack_size[PG_HITS_MOTION].cssAH);
+# endif
+
+ OptixPipelineLinkOptions link_options;
+ link_options.maxTraceDepth = 1;
+# ifdef WITH_CYCLES_DEBUG
+ link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL;
+# else
+ link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO;
+# endif
+# if OPTIX_ABI_VERSION < 24
+ link_options.overrideUsesMotionBlur = motion_blur;
+# endif
+
+ { // Create path tracing pipeline
+ OptixProgramGroup pipeline_groups[] = {
+ groups[PG_RGEN],
+ groups[PG_MISS],
+ groups[PG_HITD],
+ groups[PG_HITS],
+ groups[PG_HITL],
+# if OPTIX_ABI_VERSION >= 36
+ groups[PG_HITD_MOTION],
+ groups[PG_HITS_MOTION],
+# endif
+# ifdef WITH_CYCLES_DEBUG
+ groups[PG_EXCP],
+# endif
+ };
+ check_result_optix_ret(
+ optixPipelineCreate(context,
+ &pipeline_options,
+ &link_options,
+ pipeline_groups,
+ (sizeof(pipeline_groups) / sizeof(pipeline_groups[0])),
+ nullptr,
+ 0,
+ &pipelines[PIP_PATH_TRACE]));
+
+ // Combine ray generation and trace continuation stack size
+ const unsigned int css = stack_size[PG_RGEN].cssRG + link_options.maxTraceDepth * trace_css;
+
+ // Set stack size depending on pipeline options
+ check_result_optix_ret(
+ optixPipelineSetStackSize(pipelines[PIP_PATH_TRACE], 0, 0, css, (motion_blur ? 3 : 2)));
+ }
+
+ // Only need to create shader evaluation pipeline if one of these features is used:
+ const bool use_shader_eval_pipeline = requested_features.use_baking ||
+ requested_features.use_background_light ||
+ requested_features.use_true_displacement;
+
+ if (use_shader_eval_pipeline) { // Create shader evaluation pipeline
+ OptixProgramGroup pipeline_groups[] = {
+ groups[PG_BAKE],
+ groups[PG_DISP],
+ groups[PG_BACK],
+ groups[PG_MISS],
+ groups[PG_HITD],
+ groups[PG_HITS],
+ groups[PG_HITL],
+# if OPTIX_ABI_VERSION >= 36
+ groups[PG_HITD_MOTION],
+ groups[PG_HITS_MOTION],
+# endif
+# ifdef WITH_CYCLES_DEBUG
+ groups[PG_EXCP],
+# endif
+ };
+ check_result_optix_ret(
+ optixPipelineCreate(context,
+ &pipeline_options,
+ &link_options,
+ pipeline_groups,
+ (sizeof(pipeline_groups) / sizeof(pipeline_groups[0])),
+ nullptr,
+ 0,
+ &pipelines[PIP_SHADER_EVAL]));
+
+ // Calculate continuation stack size based on the maximum of all ray generation stack sizes
+ const unsigned int css = std::max(stack_size[PG_BAKE].cssRG,
+ std::max(stack_size[PG_DISP].cssRG,
+ stack_size[PG_BACK].cssRG)) +
+ link_options.maxTraceDepth * trace_css;
+
+ check_result_optix_ret(optixPipelineSetStackSize(
+ pipelines[PIP_SHADER_EVAL], 0, 0, css, (pipeline_options.usesMotionBlur ? 3 : 2)));
+ }
+
+ // Clean up program group objects
+ for (unsigned int i = 0; i < NUM_PROGRAM_GROUPS; ++i) {
+ optixProgramGroupDestroy(groups[i]);
+ }
+
+ return true;
+ }
+
+ void thread_run(DeviceTask &task, int thread_index) // Main task entry point
+ {
+ if (have_error())
+ return; // Abort early if there was an error previously
+
+ if (task.type == DeviceTask::RENDER) {
+ if (thread_index != 0) {
+ // Only execute denoising in a single thread (see also 'task_add')
+ task.tile_types &= ~RenderTile::DENOISE;
+ }
+
+ RenderTile tile;
+ while (task.acquire_tile(this, tile, task.tile_types)) {
+ if (tile.task == RenderTile::PATH_TRACE)
+ launch_render(task, tile, thread_index);
+ else if (tile.task == RenderTile::DENOISE)
+ launch_denoise(task, tile);
+ task.release_tile(tile);
+ if (task.get_cancel() && !task.need_finish_queue)
+ break; // User requested cancellation
+ else if (have_error())
+ break; // Abort rendering when encountering an error
+ }
+ }
+ else if (task.type == DeviceTask::SHADER) {
+ launch_shader_eval(task, thread_index);
+ }
+ else if (task.type == DeviceTask::DENOISE_BUFFER) {
+ // Set up a single tile that covers the whole task and denoise it
+ RenderTile tile;
+ tile.x = task.x;
+ tile.y = task.y;
+ tile.w = task.w;
+ tile.h = task.h;
+ tile.buffer = task.buffer;
+ tile.num_samples = task.num_samples;
+ tile.start_sample = task.sample;
+ tile.offset = task.offset;
+ tile.stride = task.stride;
+ tile.buffers = task.buffers;
+
+ launch_denoise(task, tile);
+ }
+ }
+
+ void launch_render(DeviceTask &task, RenderTile &rtile, int thread_index)
+ {
+ assert(thread_index < launch_params.data_size);
+
+ // Keep track of total render time of this tile
+ const scoped_timer timer(&rtile.buffers->render_time);
+
+ WorkTile wtile;
+ wtile.x = rtile.x;
+ wtile.y = rtile.y;
+ wtile.w = rtile.w;
+ wtile.h = rtile.h;
+ wtile.offset = rtile.offset;
+ wtile.stride = rtile.stride;
+ wtile.buffer = (float *)rtile.buffer;
+
+ const int end_sample = rtile.start_sample + rtile.num_samples;
+ // Keep this number reasonable to avoid running into TDRs
+ int step_samples = (info.display_device ? 8 : 32);
+ if (task.adaptive_sampling.use) {
+ step_samples = task.adaptive_sampling.align_static_samples(step_samples);
+ }
+
+ // Offset into launch params buffer so that streams use separate data
+ device_ptr launch_params_ptr = launch_params.device_pointer +
+ thread_index * launch_params.data_elements;
+
+ const CUDAContextScope scope(cuContext);
+
+ for (int sample = rtile.start_sample; sample < end_sample; sample += step_samples) {
+ // Copy work tile information to device
+ wtile.num_samples = min(step_samples, end_sample - sample);
+ wtile.start_sample = sample;
+ device_ptr d_wtile_ptr = launch_params_ptr + offsetof(KernelParams, tile);
+ check_result_cuda(
+ cuMemcpyHtoDAsync(d_wtile_ptr, &wtile, sizeof(wtile), cuda_stream[thread_index]));
+
+ OptixShaderBindingTable sbt_params = {};
+ sbt_params.raygenRecord = sbt_data.device_pointer + PG_RGEN * sizeof(SbtRecord);
+# ifdef WITH_CYCLES_DEBUG
+ sbt_params.exceptionRecord = sbt_data.device_pointer + PG_EXCP * sizeof(SbtRecord);
+# endif
+ sbt_params.missRecordBase = sbt_data.device_pointer + PG_MISS * sizeof(SbtRecord);
+ sbt_params.missRecordStrideInBytes = sizeof(SbtRecord);
+ sbt_params.missRecordCount = 1;
+ sbt_params.hitgroupRecordBase = sbt_data.device_pointer + PG_HITD * sizeof(SbtRecord);
+ sbt_params.hitgroupRecordStrideInBytes = sizeof(SbtRecord);
+# if OPTIX_ABI_VERSION >= 36
+ sbt_params.hitgroupRecordCount = 5; // PG_HITD(_MOTION), PG_HITS(_MOTION), PG_HITL
+# else
+ sbt_params.hitgroupRecordCount = 3; // PG_HITD, PG_HITS, PG_HITL
+# endif
+
+ // Launch the ray generation program
+ check_result_optix(optixLaunch(pipelines[PIP_PATH_TRACE],
+ cuda_stream[thread_index],
+ launch_params_ptr,
+ launch_params.data_elements,
+ &sbt_params,
+ // Launch with samples close to each other for better locality
+ wtile.w * wtile.num_samples,
+ wtile.h,
+ 1));
+
+ // Run the adaptive sampling kernels at selected samples aligned to step samples.
+ uint filter_sample = wtile.start_sample + wtile.num_samples - 1;
+ if (task.adaptive_sampling.use && task.adaptive_sampling.need_filter(filter_sample)) {
+ adaptive_sampling_filter(filter_sample, &wtile, d_wtile_ptr, cuda_stream[thread_index]);
+ }
+
+ // Wait for launch to finish
+ check_result_cuda(cuStreamSynchronize(cuda_stream[thread_index]));
+
+ // Update current sample, so it is displayed correctly
+ rtile.sample = wtile.start_sample + wtile.num_samples;
+ // Update task progress after the kernel completed rendering
+ task.update_progress(&rtile, wtile.w * wtile.h * wtile.num_samples);
+
+ if (task.get_cancel() && !task.need_finish_queue)
+ return; // Cancel rendering
+ }
+
+ // Finalize adaptive sampling
+ if (task.adaptive_sampling.use) {
+ device_ptr d_wtile_ptr = launch_params_ptr + offsetof(KernelParams, tile);
+ adaptive_sampling_post(rtile, &wtile, d_wtile_ptr, cuda_stream[thread_index]);
+ check_result_cuda(cuStreamSynchronize(cuda_stream[thread_index]));
+ task.update_progress(&rtile, rtile.w * rtile.h * wtile.num_samples);
+ }
+ }
+
+ bool launch_denoise(DeviceTask &task, RenderTile &rtile)
+ {
+ // Update current sample (for display and NLM denoising task)
+ rtile.sample = rtile.start_sample + rtile.num_samples;
+
+ // Make CUDA context current now, since it is used for both denoising tasks
+ const CUDAContextScope scope(cuContext);
+
+ // Choose between OptiX and NLM denoising
+ if (task.denoising.type == DENOISER_OPTIX) {
+ // Map neighboring tiles onto this device, indices are as following:
+ // Where index 4 is the center tile and index 9 is the target for the result.
+ // 0 1 2
+ // 3 4 5
+ // 6 7 8 9
+ RenderTileNeighbors neighbors(rtile);
+ task.map_neighbor_tiles(neighbors, this);
+ RenderTile &center_tile = neighbors.tiles[RenderTileNeighbors::CENTER];
+ RenderTile &target_tile = neighbors.target;
+ rtile = center_tile; // Tile may have been modified by mapping code
+
+ // Calculate size of the tile to denoise (including overlap)
+ int4 rect = center_tile.bounds();
+ // Overlap between tiles has to be at least 64 pixels
+ // TODO(pmours): Query this value from OptiX
+ rect = rect_expand(rect, 64);
+ int4 clip_rect = neighbors.bounds();
+ rect = rect_clip(rect, clip_rect);
+ int2 rect_size = make_int2(rect.z - rect.x, rect.w - rect.y);
+ int2 overlap_offset = make_int2(rtile.x - rect.x, rtile.y - rect.y);
+
+ // Calculate byte offsets and strides
+ int pixel_stride = task.pass_stride * (int)sizeof(float);
+ int pixel_offset = (rtile.offset + rtile.x + rtile.y * rtile.stride) * pixel_stride;
+ const int pass_offset[3] = {
+ (task.pass_denoising_data + DENOISING_PASS_COLOR) * (int)sizeof(float),
+ (task.pass_denoising_data + DENOISING_PASS_ALBEDO) * (int)sizeof(float),
+ (task.pass_denoising_data + DENOISING_PASS_NORMAL) * (int)sizeof(float)};
+
+ // Start with the current tile pointer offset
+ int input_stride = pixel_stride;
+ device_ptr input_ptr = rtile.buffer + pixel_offset;
+
+ // Copy tile data into a common buffer if necessary
+ device_only_memory<float> input(this, "denoiser input");
+ device_vector<TileInfo> tile_info_mem(this, "denoiser tile info", MEM_READ_WRITE);
+
+ bool contiguous_memory = true;
+ for (int i = 0; i < RenderTileNeighbors::SIZE; i++) {
+ if (neighbors.tiles[i].buffer && neighbors.tiles[i].buffer != rtile.buffer) {
+ contiguous_memory = false;
+ }
+ }
+
+ if (contiguous_memory) {
+ // Tiles are in continous memory, so can just subtract overlap offset
+ input_ptr -= (overlap_offset.x + overlap_offset.y * rtile.stride) * pixel_stride;
+ // Stride covers the whole width of the image and not just a single tile
+ input_stride *= rtile.stride;
+ }
+ else {
+ // Adjacent tiles are in separate memory regions, so need to copy them into a single one
+ input.alloc_to_device(rect_size.x * rect_size.y * task.pass_stride);
+ // Start with the new input buffer
+ input_ptr = input.device_pointer;
+ // Stride covers the width of the new input buffer, which includes tile width and overlap
+ input_stride *= rect_size.x;
+
+ TileInfo *tile_info = tile_info_mem.alloc(1);
+ for (int i = 0; i < RenderTileNeighbors::SIZE; i++) {
+ tile_info->offsets[i] = neighbors.tiles[i].offset;
+ tile_info->strides[i] = neighbors.tiles[i].stride;
+ tile_info->buffers[i] = neighbors.tiles[i].buffer;
+ }
+ tile_info->x[0] = neighbors.tiles[3].x;
+ tile_info->x[1] = neighbors.tiles[4].x;
+ tile_info->x[2] = neighbors.tiles[5].x;
+ tile_info->x[3] = neighbors.tiles[5].x + neighbors.tiles[5].w;
+ tile_info->y[0] = neighbors.tiles[1].y;
+ tile_info->y[1] = neighbors.tiles[4].y;
+ tile_info->y[2] = neighbors.tiles[7].y;
+ tile_info->y[3] = neighbors.tiles[7].y + neighbors.tiles[7].h;
+ tile_info_mem.copy_to_device();
+
+ void *args[] = {
+ &input.device_pointer, &tile_info_mem.device_pointer, &rect.x, &task.pass_stride};
+ launch_filter_kernel("kernel_cuda_filter_copy_input", rect_size.x, rect_size.y, args);
+ }
+
+# if OPTIX_DENOISER_NO_PIXEL_STRIDE
+ device_only_memory<float> input_rgb(this, "denoiser input rgb");
+ input_rgb.alloc_to_device(rect_size.x * rect_size.y * 3 * task.denoising.input_passes);
+
+ void *input_args[] = {&input_rgb.device_pointer,
+ &input_ptr,
+ &rect_size.x,
+ &rect_size.y,
+ &input_stride,
+ &task.pass_stride,
+ const_cast<int *>(pass_offset),
+ &task.denoising.input_passes,
+ &rtile.sample};
+ launch_filter_kernel(
+ "kernel_cuda_filter_convert_to_rgb", rect_size.x, rect_size.y, input_args);
+
+ input_ptr = input_rgb.device_pointer;
+ pixel_stride = 3 * sizeof(float);
+ input_stride = rect_size.x * pixel_stride;
+# endif
+
+ const bool recreate_denoiser = (denoiser == NULL) ||
+ (task.denoising.input_passes != denoiser_input_passes);
+ if (recreate_denoiser) {
+ // Destroy existing handle before creating new one
+ if (denoiser != NULL) {
+ optixDenoiserDestroy(denoiser);
+ }
+
+ // Create OptiX denoiser handle on demand when it is first used
+ OptixDenoiserOptions denoiser_options;
+ assert(task.denoising.input_passes >= 1 && task.denoising.input_passes <= 3);
+ denoiser_options.inputKind = static_cast<OptixDenoiserInputKind>(
+ OPTIX_DENOISER_INPUT_RGB + (task.denoising.input_passes - 1));
+# if OPTIX_ABI_VERSION < 28
+ denoiser_options.pixelFormat = OPTIX_PIXEL_FORMAT_FLOAT3;
+# endif
+ check_result_optix_ret(optixDenoiserCreate(context, &denoiser_options, &denoiser));
+ check_result_optix_ret(
+ optixDenoiserSetModel(denoiser, OPTIX_DENOISER_MODEL_KIND_HDR, NULL, 0));
+
+ // OptiX denoiser handle was created with the requested number of input passes
+ denoiser_input_passes = task.denoising.input_passes;
+ }
+
+ OptixDenoiserSizes sizes = {};
+ check_result_optix_ret(
+ optixDenoiserComputeMemoryResources(denoiser, rect_size.x, rect_size.y, &sizes));
+
+# if OPTIX_ABI_VERSION < 28
+ const size_t scratch_size = sizes.recommendedScratchSizeInBytes;
+# else
+ const size_t scratch_size = sizes.withOverlapScratchSizeInBytes;
+# endif
+ const size_t scratch_offset = sizes.stateSizeInBytes;
+
+ // Allocate denoiser state if tile size has changed since last setup
+ if (recreate_denoiser || (denoiser_state.data_width != rect_size.x ||
+ denoiser_state.data_height != rect_size.y)) {
+ denoiser_state.alloc_to_device(scratch_offset + scratch_size);
+
+ // Initialize denoiser state for the current tile size
+ check_result_optix_ret(optixDenoiserSetup(denoiser,
+ 0,
+ rect_size.x,
+ rect_size.y,
+ denoiser_state.device_pointer,
+ scratch_offset,
+ denoiser_state.device_pointer + scratch_offset,
+ scratch_size));
+
+ denoiser_state.data_width = rect_size.x;
+ denoiser_state.data_height = rect_size.y;
+ }
+
+ // Set up input and output layer information
+ OptixImage2D input_layers[3] = {};
+ OptixImage2D output_layers[1] = {};
+
+ for (int i = 0; i < 3; ++i) {
+# if OPTIX_DENOISER_NO_PIXEL_STRIDE
+ input_layers[i].data = input_ptr + (rect_size.x * rect_size.y * pixel_stride * i);
+# else
+ input_layers[i].data = input_ptr + pass_offset[i];
+# endif
+ input_layers[i].width = rect_size.x;
+ input_layers[i].height = rect_size.y;
+ input_layers[i].rowStrideInBytes = input_stride;
+ input_layers[i].pixelStrideInBytes = pixel_stride;
+ input_layers[i].format = OPTIX_PIXEL_FORMAT_FLOAT3;
+ }
+
+# if OPTIX_DENOISER_NO_PIXEL_STRIDE
+ output_layers[0].data = input_ptr;
+ output_layers[0].width = rect_size.x;
+ output_layers[0].height = rect_size.y;
+ output_layers[0].rowStrideInBytes = input_stride;
+ output_layers[0].pixelStrideInBytes = pixel_stride;
+ int2 output_offset = overlap_offset;
+ overlap_offset = make_int2(0, 0); // Not supported by denoiser API, so apply manually
+# else
+ output_layers[0].data = target_tile.buffer + pixel_offset;
+ output_layers[0].width = target_tile.w;
+ output_layers[0].height = target_tile.h;
+ output_layers[0].rowStrideInBytes = target_tile.stride * pixel_stride;
+ output_layers[0].pixelStrideInBytes = pixel_stride;
+# endif
+ output_layers[0].format = OPTIX_PIXEL_FORMAT_FLOAT3;
+
+ // Finally run denonising
+ OptixDenoiserParams params = {}; // All parameters are disabled/zero
+ check_result_optix_ret(optixDenoiserInvoke(denoiser,
+ 0,
+ &params,
+ denoiser_state.device_pointer,
+ scratch_offset,
+ input_layers,
+ task.denoising.input_passes,
+ overlap_offset.x,
+ overlap_offset.y,
+ output_layers,
+ denoiser_state.device_pointer + scratch_offset,
+ scratch_size));
+
+# if OPTIX_DENOISER_NO_PIXEL_STRIDE
+ void *output_args[] = {&input_ptr,
+ &target_tile.buffer,
+ &output_offset.x,
+ &output_offset.y,
+ &rect_size.x,
+ &rect_size.y,
+ &target_tile.x,
+ &target_tile.y,
+ &target_tile.w,
+ &target_tile.h,
+ &target_tile.offset,
+ &target_tile.stride,
+ &task.pass_stride,
+ &rtile.sample};
+ launch_filter_kernel(
+ "kernel_cuda_filter_convert_from_rgb", target_tile.w, target_tile.h, output_args);
+# endif
+
+ check_result_cuda_ret(cuStreamSynchronize(0));
+
+ task.unmap_neighbor_tiles(neighbors, this);
+ }
+ else {
+ // Run CUDA denoising kernels
+ DenoisingTask denoising(this, task);
+ CUDADevice::denoise(rtile, denoising);
+ }
+
+ // Update task progress after the denoiser completed processing
+ task.update_progress(&rtile, rtile.w * rtile.h);
+
+ return true;
+ }
+
+ void launch_shader_eval(DeviceTask &task, int thread_index)
+ {
+ unsigned int rgen_index = PG_BACK;
+ if (task.shader_eval_type >= SHADER_EVAL_BAKE)
+ rgen_index = PG_BAKE;
+ if (task.shader_eval_type == SHADER_EVAL_DISPLACE)
+ rgen_index = PG_DISP;
+
+ const CUDAContextScope scope(cuContext);
+
+ device_ptr launch_params_ptr = launch_params.device_pointer +
+ thread_index * launch_params.data_elements;
+
+ for (int sample = 0; sample < task.num_samples; ++sample) {
+ ShaderParams params;
+ params.input = (uint4 *)task.shader_input;
+ params.output = (float4 *)task.shader_output;
+ params.type = task.shader_eval_type;
+ params.filter = task.shader_filter;
+ params.sx = task.shader_x;
+ params.offset = task.offset;
+ params.sample = sample;
+
+ check_result_cuda(cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParams, shader),
+ &params,
+ sizeof(params),
+ cuda_stream[thread_index]));
+
+ OptixShaderBindingTable sbt_params = {};
+ sbt_params.raygenRecord = sbt_data.device_pointer + rgen_index * sizeof(SbtRecord);
+# ifdef WITH_CYCLES_DEBUG
+ sbt_params.exceptionRecord = sbt_data.device_pointer + PG_EXCP * sizeof(SbtRecord);
+# endif
+ sbt_params.missRecordBase = sbt_data.device_pointer + PG_MISS * sizeof(SbtRecord);
+ sbt_params.missRecordStrideInBytes = sizeof(SbtRecord);
+ sbt_params.missRecordCount = 1;
+ sbt_params.hitgroupRecordBase = sbt_data.device_pointer + PG_HITD * sizeof(SbtRecord);
+ sbt_params.hitgroupRecordStrideInBytes = sizeof(SbtRecord);
+# if OPTIX_ABI_VERSION >= 36
+ sbt_params.hitgroupRecordCount = 5; // PG_HITD(_MOTION), PG_HITS(_MOTION), PG_HITL
+# else
+ sbt_params.hitgroupRecordCount = 3; // PG_HITD, PG_HITS, PG_HITL
+# endif
+
+ check_result_optix(optixLaunch(pipelines[PIP_SHADER_EVAL],
+ cuda_stream[thread_index],
+ launch_params_ptr,
+ launch_params.data_elements,
+ &sbt_params,
+ task.shader_w,
+ 1,
+ 1));
+
+ check_result_cuda(cuStreamSynchronize(cuda_stream[thread_index]));
+
+ task.update_progress(NULL);
+ }
+ }
+
+ bool build_optix_bvh(const OptixBuildInput &build_input,
+ uint16_t num_motion_steps,
+ OptixTraversableHandle &out_handle)
+ {
+ out_handle = 0;
+
+ const CUDAContextScope scope(cuContext);
+
+ // Compute memory usage
+ OptixAccelBufferSizes sizes = {};
+ OptixAccelBuildOptions options;
+ options.operation = OPTIX_BUILD_OPERATION_BUILD;
+ if (background) {
+ // Prefer best performance and lowest memory consumption in background
+ options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_TRACE | OPTIX_BUILD_FLAG_ALLOW_COMPACTION;
+ }
+ else {
+ // Prefer fast updates in viewport
+ options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_BUILD;
+ }
+
+ options.motionOptions.numKeys = num_motion_steps;
+ options.motionOptions.flags = OPTIX_MOTION_FLAG_START_VANISH | OPTIX_MOTION_FLAG_END_VANISH;
+ options.motionOptions.timeBegin = 0.0f;
+ options.motionOptions.timeEnd = 1.0f;
+
+ check_result_optix_ret(
+ optixAccelComputeMemoryUsage(context, &options, &build_input, 1, &sizes));
+
+ // Allocate required output buffers
+ device_only_memory<char> temp_mem(this, "temp_build_mem");
+ temp_mem.alloc_to_device(align_up(sizes.tempSizeInBytes, 8) + 8);
+ if (!temp_mem.device_pointer)
+ return false; // Make sure temporary memory allocation succeeded
+
+ // Move textures to host memory if there is not enough room
+ size_t size = 0, free = 0;
+ cuMemGetInfo(&free, &size);
+ size = sizes.outputSizeInBytes + device_working_headroom;
+ if (size >= free && can_map_host) {
+ move_textures_to_host(size - free, false);
+ }
+
+ CUdeviceptr out_data = 0;
+ check_result_cuda_ret(cuMemAlloc(&out_data, sizes.outputSizeInBytes));
+ as_mem.push_back(out_data);
+
+ // Finally build the acceleration structure
+ OptixAccelEmitDesc compacted_size_prop;
+ compacted_size_prop.type = OPTIX_PROPERTY_TYPE_COMPACTED_SIZE;
+ // A tiny space was allocated for this property at the end of the temporary buffer above
+ // Make sure this pointer is 8-byte aligned
+ compacted_size_prop.result = align_up(temp_mem.device_pointer + sizes.tempSizeInBytes, 8);
+
+ check_result_optix_ret(optixAccelBuild(context,
+ NULL,
+ &options,
+ &build_input,
+ 1,
+ temp_mem.device_pointer,
+ sizes.tempSizeInBytes,
+ out_data,
+ sizes.outputSizeInBytes,
+ &out_handle,
+ background ? &compacted_size_prop : NULL,
+ background ? 1 : 0));
+
+ // Wait for all operations to finish
+ check_result_cuda_ret(cuStreamSynchronize(NULL));
+
+ // Compact acceleration structure to save memory (do not do this in viewport for faster builds)
+ if (background) {
+ uint64_t compacted_size = sizes.outputSizeInBytes;
+ check_result_cuda_ret(
+ cuMemcpyDtoH(&compacted_size, compacted_size_prop.result, sizeof(compacted_size)));
+
+ // Temporary memory is no longer needed, so free it now to make space
+ temp_mem.free();
+
+ // There is no point compacting if the size does not change
+ if (compacted_size < sizes.outputSizeInBytes) {
+ CUdeviceptr compacted_data = 0;
+ if (cuMemAlloc(&compacted_data, compacted_size) != CUDA_SUCCESS)
+ // Do not compact if memory allocation for compacted acceleration structure fails
+ // Can just use the uncompacted one then, so succeed here regardless
+ return true;
+ as_mem.push_back(compacted_data);
+
+ check_result_optix_ret(optixAccelCompact(
+ context, NULL, out_handle, compacted_data, compacted_size, &out_handle));
+
+ // Wait for compaction to finish
+ check_result_cuda_ret(cuStreamSynchronize(NULL));
+
+ // Free uncompacted acceleration structure
+ cuMemFree(out_data);
+ as_mem.erase(as_mem.end() - 2); // Remove 'out_data' from 'as_mem' array
+ }
+ }
+
+ return true;
+ }
+
+ bool build_optix_bvh(BVH *bvh) override
+ {
+ assert(bvh->params.top_level);
+
+ unsigned int num_instances = 0;
+ unordered_map<Geometry *, OptixTraversableHandle> geometry;
+ geometry.reserve(bvh->geometry.size());
+
+ // Free all previous acceleration structures
+ for (CUdeviceptr mem : as_mem) {
+ cuMemFree(mem);
+ }
+ as_mem.clear();
+
+ // Build bottom level acceleration structures (BLAS)
+ // Note: Always keep this logic in sync with bvh_optix.cpp!
+ for (Object *ob : bvh->objects) {
+ // Skip geometry for which acceleration structure already exists
+ Geometry *geom = ob->geometry;
+ if (geometry.find(geom) != geometry.end())
+ continue;
+
+ if (geom->type == Geometry::HAIR) {
+ // Build BLAS for curve primitives
+ Hair *const hair = static_cast<Hair *const>(ob->geometry);
+ if (hair->num_curves() == 0) {
+ continue;
+ }
+
+ const size_t num_segments = hair->num_segments();
+
+ size_t num_motion_steps = 1;
+ Attribute *motion_keys = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+ if (motion_blur && hair->use_motion_blur && motion_keys) {
+ num_motion_steps = hair->motion_steps;
+ }
+
+ device_vector<OptixAabb> aabb_data(this, "temp_aabb_data", MEM_READ_ONLY);
+# if OPTIX_ABI_VERSION >= 36
+ device_vector<int> index_data(this, "temp_index_data", MEM_READ_ONLY);
+ device_vector<float4> vertex_data(this, "temp_vertex_data", MEM_READ_ONLY);
+ // Four control points for each curve segment
+ const size_t num_vertices = num_segments * 4;
+ if (DebugFlags().optix.curves_api && hair->curve_shape == CURVE_THICK) {
+ index_data.alloc(num_segments);
+ vertex_data.alloc(num_vertices * num_motion_steps);
+ }
+ else
+# endif
+ aabb_data.alloc(num_segments * num_motion_steps);
+
+ // Get AABBs for each motion step
+ for (size_t step = 0; step < num_motion_steps; ++step) {
+ // The center step for motion vertices is not stored in the attribute
+ const float3 *keys = hair->curve_keys.data();
+ size_t center_step = (num_motion_steps - 1) / 2;
+ if (step != center_step) {
+ size_t attr_offset = (step > center_step) ? step - 1 : step;
+ // Technically this is a float4 array, but sizeof(float3) is the same as sizeof(float4)
+ keys = motion_keys->data_float3() + attr_offset * hair->curve_keys.size();
+ }
+
+ for (size_t j = 0, i = 0; j < hair->num_curves(); ++j) {
+ const Hair::Curve curve = hair->get_curve(j);
+
+ for (int segment = 0; segment < curve.num_segments(); ++segment, ++i) {
+# if OPTIX_ABI_VERSION >= 36
+ if (DebugFlags().optix.curves_api && hair->curve_shape == CURVE_THICK) {
+ int k0 = curve.first_key + segment;
+ int k1 = k0 + 1;
+ int ka = max(k0 - 1, curve.first_key);
+ int kb = min(k1 + 1, curve.first_key + curve.num_keys - 1);
+
+ const float4 px = make_float4(keys[ka].x, keys[k0].x, keys[k1].x, keys[kb].x);
+ const float4 py = make_float4(keys[ka].y, keys[k0].y, keys[k1].y, keys[kb].y);
+ const float4 pz = make_float4(keys[ka].z, keys[k0].z, keys[k1].z, keys[kb].z);
+ const float4 pw = make_float4(hair->curve_radius[ka],
+ hair->curve_radius[k0],
+ hair->curve_radius[k1],
+ hair->curve_radius[kb]);
+
+ // Convert Catmull-Rom data to Bezier spline
+ static const float4 cr2bsp0 = make_float4(+7, -4, +5, -2) / 6.f;
+ static const float4 cr2bsp1 = make_float4(-2, 11, -4, +1) / 6.f;
+ static const float4 cr2bsp2 = make_float4(+1, -4, 11, -2) / 6.f;
+ static const float4 cr2bsp3 = make_float4(-2, +5, -4, +7) / 6.f;
+
+ index_data[i] = i * 4;
+ float4 *const v = vertex_data.data() + step * num_vertices + index_data[i];
+ v[0] = make_float4(
+ dot(cr2bsp0, px), dot(cr2bsp0, py), dot(cr2bsp0, pz), dot(cr2bsp0, pw));
+ v[1] = make_float4(
+ dot(cr2bsp1, px), dot(cr2bsp1, py), dot(cr2bsp1, pz), dot(cr2bsp1, pw));
+ v[2] = make_float4(
+ dot(cr2bsp2, px), dot(cr2bsp2, py), dot(cr2bsp2, pz), dot(cr2bsp2, pw));
+ v[3] = make_float4(
+ dot(cr2bsp3, px), dot(cr2bsp3, py), dot(cr2bsp3, pz), dot(cr2bsp3, pw));
+ }
+ else
+# endif
+ {
+ BoundBox bounds = BoundBox::empty;
+ curve.bounds_grow(segment, keys, hair->curve_radius.data(), bounds);
+
+ const size_t index = step * num_segments + i;
+ aabb_data[index].minX = bounds.min.x;
+ aabb_data[index].minY = bounds.min.y;
+ aabb_data[index].minZ = bounds.min.z;
+ aabb_data[index].maxX = bounds.max.x;
+ aabb_data[index].maxY = bounds.max.y;
+ aabb_data[index].maxZ = bounds.max.z;
+ }
+ }
+ }
+ }
+
+ // Upload AABB data to GPU
+ aabb_data.copy_to_device();
+# if OPTIX_ABI_VERSION >= 36
+ index_data.copy_to_device();
+ vertex_data.copy_to_device();
+# endif
+
+ vector<device_ptr> aabb_ptrs;
+ aabb_ptrs.reserve(num_motion_steps);
+# if OPTIX_ABI_VERSION >= 36
+ vector<device_ptr> width_ptrs;
+ vector<device_ptr> vertex_ptrs;
+ width_ptrs.reserve(num_motion_steps);
+ vertex_ptrs.reserve(num_motion_steps);
+# endif
+ for (size_t step = 0; step < num_motion_steps; ++step) {
+ aabb_ptrs.push_back(aabb_data.device_pointer + step * num_segments * sizeof(OptixAabb));
+# if OPTIX_ABI_VERSION >= 36
+ const device_ptr base_ptr = vertex_data.device_pointer +
+ step * num_vertices * sizeof(float4);
+ width_ptrs.push_back(base_ptr + 3 * sizeof(float)); // Offset by vertex size
+ vertex_ptrs.push_back(base_ptr);
+# endif
+ }
+
+ // Force a single any-hit call, so shadow record-all behavior works correctly
+ unsigned int build_flags = OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;
+ OptixBuildInput build_input = {};
+# if OPTIX_ABI_VERSION >= 36
+ if (DebugFlags().optix.curves_api && hair->curve_shape == CURVE_THICK) {
+ build_input.type = OPTIX_BUILD_INPUT_TYPE_CURVES;
+ build_input.curveArray.curveType = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE;
+ build_input.curveArray.numPrimitives = num_segments;
+ build_input.curveArray.vertexBuffers = (CUdeviceptr *)vertex_ptrs.data();
+ build_input.curveArray.numVertices = num_vertices;
+ build_input.curveArray.vertexStrideInBytes = sizeof(float4);
+ build_input.curveArray.widthBuffers = (CUdeviceptr *)width_ptrs.data();
+ build_input.curveArray.widthStrideInBytes = sizeof(float4);
+ build_input.curveArray.indexBuffer = (CUdeviceptr)index_data.device_pointer;
+ build_input.curveArray.indexStrideInBytes = sizeof(int);
+ build_input.curveArray.flag = build_flags;
+ build_input.curveArray.primitiveIndexOffset = hair->optix_prim_offset;
+ }
+ else
+# endif
+ {
+ // Disable visibility test any-hit program, since it is already checked during
+ // intersection. Those trace calls that require anyhit can force it with a ray flag.
+ build_flags |= OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT;
+
+ build_input.type = OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES;
+# if OPTIX_ABI_VERSION < 23
+ build_input.aabbArray.aabbBuffers = (CUdeviceptr *)aabb_ptrs.data();
+ build_input.aabbArray.numPrimitives = num_segments;
+ build_input.aabbArray.strideInBytes = sizeof(OptixAabb);
+ build_input.aabbArray.flags = &build_flags;
+ build_input.aabbArray.numSbtRecords = 1;
+ build_input.aabbArray.primitiveIndexOffset = hair->optix_prim_offset;
+# else
+ build_input.customPrimitiveArray.aabbBuffers = (CUdeviceptr *)aabb_ptrs.data();
+ build_input.customPrimitiveArray.numPrimitives = num_segments;
+ build_input.customPrimitiveArray.strideInBytes = sizeof(OptixAabb);
+ build_input.customPrimitiveArray.flags = &build_flags;
+ build_input.customPrimitiveArray.numSbtRecords = 1;
+ build_input.customPrimitiveArray.primitiveIndexOffset = hair->optix_prim_offset;
+# endif
+ }
+
+ // Allocate memory for new BLAS and build it
+ OptixTraversableHandle handle;
+ if (build_optix_bvh(build_input, num_motion_steps, handle)) {
+ geometry.insert({ob->geometry, handle});
+ }
+ else {
+ return false;
+ }
+ }
+ else if (geom->type == Geometry::MESH) {
+ // Build BLAS for triangle primitives
+ Mesh *const mesh = static_cast<Mesh *const>(ob->geometry);
+ if (mesh->num_triangles() == 0) {
+ continue;
+ }
+
+ const size_t num_verts = mesh->verts.size();
+
+ size_t num_motion_steps = 1;
+ Attribute *motion_keys = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+ if (motion_blur && mesh->use_motion_blur && motion_keys) {
+ num_motion_steps = mesh->motion_steps;
+ }
+
+ device_vector<int> index_data(this, "temp_index_data", MEM_READ_ONLY);
+ index_data.alloc(mesh->triangles.size());
+ memcpy(index_data.data(), mesh->triangles.data(), mesh->triangles.size() * sizeof(int));
+ device_vector<float3> vertex_data(this, "temp_vertex_data", MEM_READ_ONLY);
+ vertex_data.alloc(num_verts * num_motion_steps);
+
+ for (size_t step = 0; step < num_motion_steps; ++step) {
+ const float3 *verts = mesh->verts.data();
+
+ size_t center_step = (num_motion_steps - 1) / 2;
+ // The center step for motion vertices is not stored in the attribute
+ if (step != center_step) {
+ verts = motion_keys->data_float3() +
+ (step > center_step ? step - 1 : step) * num_verts;
+ }
+
+ memcpy(vertex_data.data() + num_verts * step, verts, num_verts * sizeof(float3));
+ }
+
+ // Upload triangle data to GPU
+ index_data.copy_to_device();
+ vertex_data.copy_to_device();
+
+ vector<device_ptr> vertex_ptrs;
+ vertex_ptrs.reserve(num_motion_steps);
+ for (size_t step = 0; step < num_motion_steps; ++step) {
+ vertex_ptrs.push_back(vertex_data.device_pointer + num_verts * step * sizeof(float3));
+ }
+
+ // Force a single any-hit call, so shadow record-all behavior works correctly
+ unsigned int build_flags = OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;
+ OptixBuildInput build_input = {};
+ build_input.type = OPTIX_BUILD_INPUT_TYPE_TRIANGLES;
+ build_input.triangleArray.vertexBuffers = (CUdeviceptr *)vertex_ptrs.data();
+ build_input.triangleArray.numVertices = num_verts;
+ build_input.triangleArray.vertexFormat = OPTIX_VERTEX_FORMAT_FLOAT3;
+ build_input.triangleArray.vertexStrideInBytes = sizeof(float3);
+ build_input.triangleArray.indexBuffer = index_data.device_pointer;
+ build_input.triangleArray.numIndexTriplets = mesh->num_triangles();
+ build_input.triangleArray.indexFormat = OPTIX_INDICES_FORMAT_UNSIGNED_INT3;
+ build_input.triangleArray.indexStrideInBytes = 3 * sizeof(int);
+ build_input.triangleArray.flags = &build_flags;
+ // The SBT does not store per primitive data since Cycles already allocates separate
+ // buffers for that purpose. OptiX does not allow this to be zero though, so just pass in
+ // one and rely on that having the same meaning in this case.
+ build_input.triangleArray.numSbtRecords = 1;
+ build_input.triangleArray.primitiveIndexOffset = mesh->optix_prim_offset;
+
+ // Allocate memory for new BLAS and build it
+ OptixTraversableHandle handle;
+ if (build_optix_bvh(build_input, num_motion_steps, handle)) {
+ geometry.insert({ob->geometry, handle});
+ }
+ else {
+ return false;
+ }
+ }
+ }
+
+ // Fill instance descriptions
+ device_vector<OptixAabb> aabbs(this, "tlas_aabbs", MEM_READ_ONLY);
+ aabbs.alloc(bvh->objects.size());
+ device_vector<OptixInstance> instances(this, "tlas_instances", MEM_READ_ONLY);
+ instances.alloc(bvh->objects.size());
+
+ for (Object *ob : bvh->objects) {
+ // Skip non-traceable objects
+ if (!ob->is_traceable())
+ continue;
+
+ // Create separate instance for triangle/curve meshes of an object
+ auto handle_it = geometry.find(ob->geometry);
+ if (handle_it == geometry.end()) {
+ continue;
+ }
+ OptixTraversableHandle handle = handle_it->second;
+
+ OptixAabb &aabb = aabbs[num_instances];
+ aabb.minX = ob->bounds.min.x;
+ aabb.minY = ob->bounds.min.y;
+ aabb.minZ = ob->bounds.min.z;
+ aabb.maxX = ob->bounds.max.x;
+ aabb.maxY = ob->bounds.max.y;
+ aabb.maxZ = ob->bounds.max.z;
+
+ OptixInstance &instance = instances[num_instances++];
+ memset(&instance, 0, sizeof(instance));
+
+ // Clear transform to identity matrix
+ instance.transform[0] = 1.0f;
+ instance.transform[5] = 1.0f;
+ instance.transform[10] = 1.0f;
+
+ // Set user instance ID to object index
+ instance.instanceId = ob->get_device_index();
+
+ // Have to have at least one bit in the mask, or else instance would always be culled
+ instance.visibilityMask = 1;
+
+ if (ob->geometry->has_volume) {
+ // Volumes have a special bit set in the visibility mask so a trace can mask only volumes
+ instance.visibilityMask |= 2;
+ }
+
+ if (ob->geometry->type == Geometry::HAIR) {
+ // Same applies to curves (so they can be skipped in local trace calls)
+ instance.visibilityMask |= 4;
+
+# if OPTIX_ABI_VERSION >= 36
+ if (motion_blur && ob->geometry->has_motion_blur() && DebugFlags().optix.curves_api &&
+ static_cast<const Hair *>(ob->geometry)->curve_shape == CURVE_THICK) {
+ // Select between motion blur and non-motion blur built-in intersection module
+ instance.sbtOffset = PG_HITD_MOTION - PG_HITD;
+ }
+# endif
+ }
+
+ // Insert motion traversable if object has motion
+ if (motion_blur && ob->use_motion()) {
+ size_t motion_keys = max(ob->motion.size(), 2) - 2;
+ size_t motion_transform_size = sizeof(OptixSRTMotionTransform) +
+ motion_keys * sizeof(OptixSRTData);
+
+ const CUDAContextScope scope(cuContext);
+
+ CUdeviceptr motion_transform_gpu = 0;
+ check_result_cuda_ret(cuMemAlloc(&motion_transform_gpu, motion_transform_size));
+ as_mem.push_back(motion_transform_gpu);
+
+ // Allocate host side memory for motion transform and fill it with transform data
+ OptixSRTMotionTransform &motion_transform = *reinterpret_cast<OptixSRTMotionTransform *>(
+ new uint8_t[motion_transform_size]);
+ motion_transform.child = handle;
+ motion_transform.motionOptions.numKeys = ob->motion.size();
+ motion_transform.motionOptions.flags = OPTIX_MOTION_FLAG_NONE;
+ motion_transform.motionOptions.timeBegin = 0.0f;
+ motion_transform.motionOptions.timeEnd = 1.0f;
+
+ OptixSRTData *const srt_data = motion_transform.srtData;
+ array<DecomposedTransform> decomp(ob->motion.size());
+ transform_motion_decompose(decomp.data(), ob->motion.data(), ob->motion.size());
+
+ for (size_t i = 0; i < ob->motion.size(); ++i) {
+ // Scale
+ srt_data[i].sx = decomp[i].y.w; // scale.x.x
+ srt_data[i].sy = decomp[i].z.w; // scale.y.y
+ srt_data[i].sz = decomp[i].w.w; // scale.z.z
+
+ // Shear
+ srt_data[i].a = decomp[i].z.x; // scale.x.y
+ srt_data[i].b = decomp[i].z.y; // scale.x.z
+ srt_data[i].c = decomp[i].w.x; // scale.y.z
+ assert(decomp[i].z.z == 0.0f); // scale.y.x
+ assert(decomp[i].w.y == 0.0f); // scale.z.x
+ assert(decomp[i].w.z == 0.0f); // scale.z.y
+
+ // Pivot point
+ srt_data[i].pvx = 0.0f;
+ srt_data[i].pvy = 0.0f;
+ srt_data[i].pvz = 0.0f;
+
+ // Rotation
+ srt_data[i].qx = decomp[i].x.x;
+ srt_data[i].qy = decomp[i].x.y;
+ srt_data[i].qz = decomp[i].x.z;
+ srt_data[i].qw = decomp[i].x.w;
+
+ // Translation
+ srt_data[i].tx = decomp[i].y.x;
+ srt_data[i].ty = decomp[i].y.y;
+ srt_data[i].tz = decomp[i].y.z;
+ }
+
+ // Upload motion transform to GPU
+ cuMemcpyHtoD(motion_transform_gpu, &motion_transform, motion_transform_size);
+ delete[] reinterpret_cast<uint8_t *>(&motion_transform);
+
+ // Disable instance transform if object uses motion transform already
+ instance.flags = OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM;
+
+ // Get traversable handle to motion transform
+ optixConvertPointerToTraversableHandle(context,
+ motion_transform_gpu,
+ OPTIX_TRAVERSABLE_TYPE_SRT_MOTION_TRANSFORM,
+ &instance.traversableHandle);
+ }
+ else {
+ instance.traversableHandle = handle;
+
+ if (ob->geometry->is_instanced()) {
+ // Set transform matrix
+ memcpy(instance.transform, &ob->tfm, sizeof(instance.transform));
+ }
+ else {
+ // Disable instance transform if geometry already has it applied to vertex data
+ instance.flags = OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM;
+ // Non-instanced objects read ID from prim_object, so
+ // distinguish them from instanced objects with high bit set
+ instance.instanceId |= 0x800000;
+ }
+ }
+ }
+
+ // Upload instance descriptions
+ aabbs.resize(num_instances);
+ aabbs.copy_to_device();
+ instances.resize(num_instances);
+ instances.copy_to_device();
+
+ // Build top-level acceleration structure (TLAS)
+ OptixBuildInput build_input = {};
+ build_input.type = OPTIX_BUILD_INPUT_TYPE_INSTANCES;
+ build_input.instanceArray.instances = instances.device_pointer;
+ build_input.instanceArray.numInstances = num_instances;
+ build_input.instanceArray.aabbs = aabbs.device_pointer;
+ build_input.instanceArray.numAabbs = num_instances;
+
+ return build_optix_bvh(build_input, 0, tlas_handle);
+ }
+
+ void const_copy_to(const char *name, void *host, size_t size) override
+ {
+ // Set constant memory for CUDA module
+ // TODO(pmours): This is only used for tonemapping (see 'film_convert').
+ // Could be removed by moving those functions to filter CUDA module.
+ CUDADevice::const_copy_to(name, host, size);
+
+ if (strcmp(name, "__data") == 0) {
+ assert(size <= sizeof(KernelData));
+
+ // Fix traversable handle on multi devices
+ KernelData *const data = (KernelData *)host;
+ *(OptixTraversableHandle *)&data->bvh.scene = tlas_handle;
+
+ update_launch_params(offsetof(KernelParams, data), host, size);
+ return;
+ }
+
+ // Update data storage pointers in launch parameters
+# define KERNEL_TEX(data_type, tex_name) \
+ if (strcmp(name, #tex_name) == 0) { \
+ update_launch_params(offsetof(KernelParams, tex_name), host, size); \
+ return; \
+ }
+# include "kernel/kernel_textures.h"
+# undef KERNEL_TEX
+ }
+
+ void update_launch_params(size_t offset, void *data, size_t data_size)
+ {
+ const CUDAContextScope scope(cuContext);
+
+ for (int i = 0; i < info.cpu_threads; ++i)
+ check_result_cuda(
+ cuMemcpyHtoD(launch_params.device_pointer + i * launch_params.data_elements + offset,
+ data,
+ data_size));
+ }
+
+ void task_add(DeviceTask &task) override
+ {
+ // Upload texture information to device if it has changed since last launch
+ load_texture_info();
+
+ if (task.type == DeviceTask::FILM_CONVERT) {
+ // Execute in main thread because of OpenGL access
+ film_convert(task, task.buffer, task.rgba_byte, task.rgba_half);
+ return;
+ }
+
+ if (task.type == DeviceTask::DENOISE_BUFFER) {
+ // Execute denoising in a single thread (e.g. to avoid race conditions during creation)
+ task_pool.push([=] {
+ DeviceTask task_copy = task;
+ thread_run(task_copy, 0);
+ });
+ return;
+ }
+
+ // Split task into smaller ones
+ list<DeviceTask> tasks;
+ task.split(tasks, info.cpu_threads);
+
+ // Queue tasks in internal task pool
+ int task_index = 0;
+ for (DeviceTask &task : tasks) {
+ task_pool.push([=] {
+ // Using task index parameter instead of thread index, since number of CUDA streams may
+ // differ from number of threads
+ DeviceTask task_copy = task;
+ thread_run(task_copy, task_index);
+ });
+ task_index++;
+ }
+ }
+
+ void task_wait() override
+ {
+ // Wait for all queued tasks to finish
+ task_pool.wait_work();
+ }
+
+ void task_cancel() override
+ {
+ // Cancel any remaining tasks in the internal pool
+ task_pool.cancel();
+ }
+};
+
+bool device_optix_init()
+{
+ if (g_optixFunctionTable.optixDeviceContextCreate != NULL)
+ return true; // Already initialized function table
+
+ // Need to initialize CUDA as well
+ if (!device_cuda_init())
+ return false;
+
+ const OptixResult result = optixInit();
+
+ if (result == OPTIX_ERROR_UNSUPPORTED_ABI_VERSION) {
+ VLOG(1) << "OptiX initialization failed because driver does not support ABI version "
+ << OPTIX_ABI_VERSION;
+ return false;
+ }
+ else if (result != OPTIX_SUCCESS) {
+ VLOG(1) << "OptiX initialization failed with error code " << (unsigned int)result;
+ return false;
+ }
+
+ // Loaded OptiX successfully!
+ return true;
+}
+
+void device_optix_info(const vector<DeviceInfo> &cuda_devices, vector<DeviceInfo> &devices)
+{
+ devices.reserve(cuda_devices.size());
+
+ // Simply add all supported CUDA devices as OptiX devices again
+ for (DeviceInfo info : cuda_devices) {
+ assert(info.type == DEVICE_CUDA);
+
+ int major;
+ cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, info.num);
+ if (major < 5) {
+ continue; // Only Maxwell and up are supported by OptiX
+ }
+
+ info.type = DEVICE_OPTIX;
+ info.id += "_OptiX";
+ info.denoisers |= DENOISER_OPTIX;
+
+ devices.push_back(info);
+ }
+}
+
+Device *device_optix_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background)
+{
+ return new OptiXDevice(info, stats, profiler, background);
+}
+
+CCL_NAMESPACE_END
+
+#endif
diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp
index 42e597a34d7..4c288f60c16 100644
--- a/intern/cycles/device/device_split_kernel.cpp
+++ b/intern/cycles/device/device_split_kernel.cpp
@@ -55,6 +55,10 @@ DeviceSplitKernel::DeviceSplitKernel(Device *device)
kernel_next_iteration_setup = NULL;
kernel_indirect_subsurface = NULL;
kernel_buffer_update = NULL;
+ kernel_adaptive_stopping = NULL;
+ kernel_adaptive_filter_x = NULL;
+ kernel_adaptive_filter_y = NULL;
+ kernel_adaptive_adjust_samples = NULL;
}
DeviceSplitKernel::~DeviceSplitKernel()
@@ -83,6 +87,10 @@ DeviceSplitKernel::~DeviceSplitKernel()
delete kernel_next_iteration_setup;
delete kernel_indirect_subsurface;
delete kernel_buffer_update;
+ delete kernel_adaptive_stopping;
+ delete kernel_adaptive_filter_x;
+ delete kernel_adaptive_filter_y;
+ delete kernel_adaptive_adjust_samples;
}
bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures &requested_features)
@@ -114,6 +122,10 @@ bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures &requested_fe
LOAD_KERNEL(next_iteration_setup);
LOAD_KERNEL(indirect_subsurface);
LOAD_KERNEL(buffer_update);
+ LOAD_KERNEL(adaptive_stopping);
+ LOAD_KERNEL(adaptive_filter_x);
+ LOAD_KERNEL(adaptive_filter_y);
+ LOAD_KERNEL(adaptive_adjust_samples);
#undef LOAD_KERNEL
@@ -133,7 +145,7 @@ size_t DeviceSplitKernel::max_elements_for_max_buffer_size(device_memory &kg,
return max_buffer_size / size_per_element;
}
-bool DeviceSplitKernel::path_trace(DeviceTask *task,
+bool DeviceSplitKernel::path_trace(DeviceTask &task,
RenderTile &tile,
device_memory &kgbuffer,
device_memory &kernel_data)
@@ -202,13 +214,21 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
/* initial guess to start rolling average */
const int initial_num_samples = 1;
/* approx number of samples per second */
- int samples_per_second = (avg_time_per_sample > 0.0) ?
- int(double(time_multiplier) / avg_time_per_sample) + 1 :
- initial_num_samples;
+ const int samples_per_second = (avg_time_per_sample > 0.0) ?
+ int(double(time_multiplier) / avg_time_per_sample) + 1 :
+ initial_num_samples;
RenderTile subtile = tile;
subtile.start_sample = tile.sample;
- subtile.num_samples = min(samples_per_second,
+ subtile.num_samples = samples_per_second;
+
+ if (task.adaptive_sampling.use) {
+ subtile.num_samples = task.adaptive_sampling.align_dynamic_samples(subtile.start_sample,
+ subtile.num_samples);
+ }
+
+ /* Don't go beyond requested number of samples. */
+ subtile.num_samples = min(subtile.num_samples,
tile.start_sample + tile.num_samples - tile.sample);
if (device->have_error()) {
@@ -266,7 +286,7 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(buffer_update, global_size, local_size);
- if (task->get_cancel() && cancel_time == DBL_MAX) {
+ if (task.get_cancel() && cancel_time == DBL_MAX) {
/* Wait up to twice as many seconds for current samples to finish
* to avoid artifacts in render result from ending too soon.
*/
@@ -302,6 +322,23 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
}
}
+ int filter_sample = tile.sample + subtile.num_samples - 1;
+ if (task.adaptive_sampling.use && task.adaptive_sampling.need_filter(filter_sample)) {
+ size_t buffer_size[2];
+ buffer_size[0] = round_up(tile.w, local_size[0]);
+ buffer_size[1] = round_up(tile.h, local_size[1]);
+ kernel_adaptive_stopping->enqueue(
+ KernelDimensions(buffer_size, local_size), kgbuffer, kernel_data);
+ buffer_size[0] = round_up(tile.h, local_size[0]);
+ buffer_size[1] = round_up(1, local_size[1]);
+ kernel_adaptive_filter_x->enqueue(
+ KernelDimensions(buffer_size, local_size), kgbuffer, kernel_data);
+ buffer_size[0] = round_up(tile.w, local_size[0]);
+ buffer_size[1] = round_up(1, local_size[1]);
+ kernel_adaptive_filter_y->enqueue(
+ KernelDimensions(buffer_size, local_size), kgbuffer, kernel_data);
+ }
+
double time_per_sample = ((time_dt() - start_time) / subtile.num_samples);
if (avg_time_per_sample == 0.0) {
@@ -315,15 +352,37 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
#undef ENQUEUE_SPLIT_KERNEL
tile.sample += subtile.num_samples;
- task->update_progress(&tile, tile.w * tile.h * subtile.num_samples);
+ task.update_progress(&tile, tile.w * tile.h * subtile.num_samples);
time_multiplier = min(time_multiplier << 1, 10);
- if (task->get_cancel()) {
+ if (task.get_cancel()) {
return true;
}
}
+ if (task.adaptive_sampling.use) {
+ /* Reset the start samples. */
+ RenderTile subtile = tile;
+ subtile.start_sample = tile.start_sample;
+ subtile.num_samples = tile.sample - tile.start_sample;
+ enqueue_split_kernel_data_init(KernelDimensions(global_size, local_size),
+ subtile,
+ num_global_elements,
+ kgbuffer,
+ kernel_data,
+ split_data,
+ ray_state,
+ queue_index,
+ use_queues_flag,
+ work_pool_wgs);
+ size_t buffer_size[2];
+ buffer_size[0] = round_up(tile.w, local_size[0]);
+ buffer_size[1] = round_up(tile.h, local_size[1]);
+ kernel_adaptive_adjust_samples->enqueue(
+ KernelDimensions(buffer_size, local_size), kgbuffer, kernel_data);
+ }
+
return true;
}
diff --git a/intern/cycles/device/device_split_kernel.h b/intern/cycles/device/device_split_kernel.h
index c9fb2ac844f..07a21b10299 100644
--- a/intern/cycles/device/device_split_kernel.h
+++ b/intern/cycles/device/device_split_kernel.h
@@ -27,7 +27,7 @@ CCL_NAMESPACE_BEGIN
* Since some bytes may be needed for aligning chunks of memory;
* This is the amount of memory that we dedicate for that purpose.
*/
-#define DATA_ALLOCATION_MEM_FACTOR 5000000 //5MB
+#define DATA_ALLOCATION_MEM_FACTOR 5000000 // 5MB
/* Types used for split kernel */
@@ -75,6 +75,10 @@ class DeviceSplitKernel {
SplitKernelFunction *kernel_next_iteration_setup;
SplitKernelFunction *kernel_indirect_subsurface;
SplitKernelFunction *kernel_buffer_update;
+ SplitKernelFunction *kernel_adaptive_stopping;
+ SplitKernelFunction *kernel_adaptive_filter_x;
+ SplitKernelFunction *kernel_adaptive_filter_y;
+ SplitKernelFunction *kernel_adaptive_adjust_samples;
/* Global memory variables [porting]; These memory is used for
* co-operation between different kernels; Data written by one
@@ -105,7 +109,7 @@ class DeviceSplitKernel {
virtual ~DeviceSplitKernel();
bool load_kernels(const DeviceRequestedFeatures &requested_features);
- bool path_trace(DeviceTask *task,
+ bool path_trace(DeviceTask &task,
RenderTile &rtile,
device_memory &kgbuffer,
device_memory &kernel_data);
@@ -133,7 +137,7 @@ class DeviceSplitKernel {
virtual int2 split_kernel_local_size() = 0;
virtual int2 split_kernel_global_size(device_memory &kg,
device_memory &data,
- DeviceTask *task) = 0;
+ DeviceTask &task) = 0;
};
CCL_NAMESPACE_END
diff --git a/intern/cycles/device/device_task.cpp b/intern/cycles/device/device_task.cpp
index 376ad06a734..6e7c184c6c9 100644
--- a/intern/cycles/device/device_task.cpp
+++ b/intern/cycles/device/device_task.cpp
@@ -44,12 +44,13 @@ DeviceTask::DeviceTask(Type type_)
shader_eval_type(0),
shader_filter(0),
shader_x(0),
- shader_w(0)
+ shader_w(0),
+ buffers(nullptr)
{
last_update_time = time_dt();
}
-int DeviceTask::get_subtask_count(int num, int max_size)
+int DeviceTask::get_subtask_count(int num, int max_size) const
{
if (max_size != 0) {
int max_size_num;
@@ -77,7 +78,7 @@ int DeviceTask::get_subtask_count(int num, int max_size)
return num;
}
-void DeviceTask::split(list<DeviceTask> &tasks, int num, int max_size)
+void DeviceTask::split(list<DeviceTask> &tasks, int num, int max_size) const
{
num = get_subtask_count(num, max_size);
@@ -115,7 +116,7 @@ void DeviceTask::split(list<DeviceTask> &tasks, int num, int max_size)
void DeviceTask::update_progress(RenderTile *rtile, int pixel_samples)
{
- if ((type != RENDER) && (type != SHADER))
+ if (type == FILM_CONVERT)
return;
if (update_progress_sample) {
@@ -136,4 +137,58 @@ void DeviceTask::update_progress(RenderTile *rtile, int pixel_samples)
}
}
+/* Adaptive Sampling */
+
+AdaptiveSampling::AdaptiveSampling() : use(true), adaptive_step(0), min_samples(0)
+{
+}
+
+/* Render samples in steps that align with the adaptive filtering. */
+int AdaptiveSampling::align_static_samples(int samples) const
+{
+ if (samples > adaptive_step) {
+ /* Make multiple of adaptive_step. */
+ while (samples % adaptive_step != 0) {
+ samples--;
+ }
+ }
+ else if (samples < adaptive_step) {
+ /* Make divisor of adaptive_step. */
+ while (adaptive_step % samples != 0) {
+ samples--;
+ }
+ }
+
+ return max(samples, 1);
+}
+
+/* Render samples in steps that align with the adaptive filtering, with the
+ * suggested number of samples dynamically changing. */
+int AdaptiveSampling::align_dynamic_samples(int offset, int samples) const
+{
+ /* Round so that we end up on multiples of adaptive_samples. */
+ samples += offset;
+
+ if (samples > adaptive_step) {
+ /* Make multiple of adaptive_step. */
+ while (samples % adaptive_step != 0) {
+ samples--;
+ }
+ }
+
+ samples -= offset;
+
+ return max(samples, 1);
+}
+
+bool AdaptiveSampling::need_filter(int sample) const
+{
+ if (sample > min_samples) {
+ return (sample & (adaptive_step - 1)) == (adaptive_step - 1);
+ }
+ else {
+ return false;
+ }
+}
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/device/device_task.h b/intern/cycles/device/device_task.h
index 5cc2e5e25db..fd380788282 100644
--- a/intern/cycles/device/device_task.h
+++ b/intern/cycles/device/device_task.h
@@ -21,7 +21,6 @@
#include "util/util_function.h"
#include "util/util_list.h"
-#include "util/util_task.h"
CCL_NAMESPACE_BEGIN
@@ -30,37 +29,106 @@ CCL_NAMESPACE_BEGIN
class Device;
class RenderBuffers;
class RenderTile;
+class RenderTileNeighbors;
class Tile;
+enum DenoiserType {
+ DENOISER_NLM = 1,
+ DENOISER_OPTIX = 2,
+ DENOISER_OPENIMAGEDENOISE = 4,
+ DENOISER_NUM,
+
+ DENOISER_NONE = 0,
+ DENOISER_ALL = ~0,
+};
+
+enum DenoiserInput {
+ DENOISER_INPUT_RGB = 1,
+ DENOISER_INPUT_RGB_ALBEDO = 2,
+ DENOISER_INPUT_RGB_ALBEDO_NORMAL = 3,
+
+ DENOISER_INPUT_NUM,
+};
+
+typedef int DenoiserTypeMask;
+
class DenoiseParams {
public:
- /* Pixel radius for neighbouring pixels to take into account. */
+ /* Apply denoiser to image. */
+ bool use;
+ /* Output denoising data passes (possibly without applying the denoiser). */
+ bool store_passes;
+
+ /* Denoiser type. */
+ DenoiserType type;
+
+ /* Viewport start sample. */
+ int start_sample;
+
+ /** Native Denoiser **/
+
+ /* Pixel radius for neighboring pixels to take into account. */
int radius;
/* Controls neighbor pixel weighting for the denoising filter. */
float strength;
/* Preserve more or less detail based on feature passes. */
float feature_strength;
- /* When removing pixels that don't carry information, use a relative threshold instead of an absolute one. */
+ /* When removing pixels that don't carry information,
+ * use a relative threshold instead of an absolute one. */
bool relative_pca;
/* How many frames before and after the current center frame are included. */
int neighbor_frames;
/* Clamp the input to the range of +-1e8. Should be enough for any legitimate data. */
bool clamp_input;
+ /** OIDN/Optix Denoiser **/
+
+ /* Passes handed over to the OIDN/OptiX denoiser (default to color + albedo). */
+ DenoiserInput input_passes;
+
DenoiseParams()
{
+ use = false;
+ store_passes = false;
+
+ type = DENOISER_NLM;
+
radius = 8;
strength = 0.5f;
feature_strength = 0.5f;
relative_pca = false;
neighbor_frames = 2;
clamp_input = true;
+
+ input_passes = DENOISER_INPUT_RGB_ALBEDO_NORMAL;
+
+ start_sample = 0;
+ }
+
+ /* Test if a denoising task needs to run, also to prefilter passes for the native
+ * denoiser when we are not applying denoising to the combined image. */
+ bool need_denoising_task() const
+ {
+ return (use || (store_passes && type == DENOISER_NLM));
}
};
-class DeviceTask : public Task {
+class AdaptiveSampling {
public:
- typedef enum { RENDER, FILM_CONVERT, SHADER } Type;
+ AdaptiveSampling();
+
+ int align_static_samples(int samples) const;
+ int align_dynamic_samples(int offset, int samples) const;
+ bool need_filter(int sample) const;
+
+ bool use;
+ int adaptive_step;
+ int min_samples;
+};
+
+class DeviceTask {
+ public:
+ typedef enum { RENDER, FILM_CONVERT, SHADER, DENOISE_BUFFER } Type;
Type type;
int x, y, w, h;
@@ -77,30 +145,28 @@ class DeviceTask : public Task {
int shader_filter;
int shader_x, shader_w;
- int passes_size;
+ RenderBuffers *buffers;
explicit DeviceTask(Type type = RENDER);
- int get_subtask_count(int num, int max_size = 0);
- void split(list<DeviceTask> &tasks, int num, int max_size = 0);
+ int get_subtask_count(int num, int max_size = 0) const;
+ void split(list<DeviceTask> &tasks, int num, int max_size = 0) const;
void update_progress(RenderTile *rtile, int pixel_samples = -1);
- function<bool(Device *device, RenderTile &)> acquire_tile;
+ function<bool(Device *device, RenderTile &, uint)> acquire_tile;
function<void(long, int)> update_progress_sample;
function<void(RenderTile &)> update_tile_sample;
function<void(RenderTile &)> release_tile;
function<bool()> get_cancel;
- function<void(RenderTile *, Device *)> map_neighbor_tiles;
- function<void(RenderTile *, Device *)> unmap_neighbor_tiles;
+ function<void(RenderTileNeighbors &, Device *)> map_neighbor_tiles;
+ function<void(RenderTileNeighbors &, Device *)> unmap_neighbor_tiles;
+ uint tile_types;
DenoiseParams denoising;
bool denoising_from_render;
vector<int> denoising_frames;
- bool denoising_do_filter;
- bool denoising_write_passes;
-
int pass_stride;
int frame_stride;
int target_pass_stride;
@@ -109,7 +175,7 @@ class DeviceTask : public Task {
bool need_finish_queue;
bool integrator_branched;
- int2 requested_tile_size;
+ AdaptiveSampling adaptive_sampling;
protected:
double last_update_time;
diff --git a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/device_opencl.h
index e7bafa0b8a8..e0140996cf0 100644
--- a/intern/cycles/device/opencl/opencl.h
+++ b/intern/cycles/device/opencl/device_opencl.h
@@ -23,6 +23,7 @@
# include "util/util_map.h"
# include "util/util_param.h"
# include "util/util_string.h"
+# include "util/util_task.h"
# include "clew.h"
@@ -88,9 +89,12 @@ class OpenCLInfo {
static bool device_supported(const string &platform_name, const cl_device_id device_id);
static bool platform_version_check(cl_platform_id platform, string *error = NULL);
static bool device_version_check(cl_device_id device, string *error = NULL);
+ static bool get_device_version(cl_device_id device,
+ int *r_major,
+ int *r_minor,
+ string *error = NULL);
static string get_hardware_id(const string &platform_name, cl_device_id device_id);
- static void get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices,
- bool force_all = false);
+ static void get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices);
/* ** Some handy shortcuts to low level cl*GetInfo() functions. ** */
@@ -255,6 +259,8 @@ class OpenCLDevice : public Device {
TaskPool load_required_kernel_task_pool;
/* Task pool for optional kernels (feature kernels during foreground rendering) */
TaskPool load_kernel_task_pool;
+ std::atomic<int> load_kernel_num_compiling;
+
cl_context cxContext;
cl_command_queue cqCommandQueue;
cl_platform_id cpPlatform;
@@ -358,8 +364,8 @@ class OpenCLDevice : public Device {
OpenCLSplitPrograms(OpenCLDevice *device);
~OpenCLSplitPrograms();
- /* Load the kernels and put the created kernels in the given `programs`
- * paramter. */
+ /* Load the kernels and put the created kernels in the given
+ * `programs` parameter. */
void load_kernels(vector<OpenCLProgram *> &programs,
const DeviceRequestedFeatures &requested_features,
bool is_preview = false);
@@ -381,7 +387,6 @@ class OpenCLDevice : public Device {
ConstMemMap const_mem_map;
MemMap mem_map;
- device_ptr null_mem;
bool device_initialized;
string platform_name;
@@ -429,8 +434,10 @@ class OpenCLDevice : public Device {
int mem_sub_ptr_alignment();
void const_copy_to(const char *name, void *host, size_t size);
- void tex_alloc(device_memory &mem);
- void tex_free(device_memory &mem);
+ void global_alloc(device_memory &mem);
+ void global_free(device_memory &mem);
+ void tex_alloc(device_texture &mem);
+ void tex_free(device_texture &mem);
size_t global_size_round_up(int group_size, int global_size);
void enqueue_kernel(cl_kernel kernel,
@@ -446,17 +453,11 @@ class OpenCLDevice : public Device {
device_ptr rgba_byte,
device_ptr rgba_half);
void shader(DeviceTask &task);
+ void update_adaptive(DeviceTask &task, RenderTile &tile, int sample);
+ void bake(DeviceTask &task, RenderTile &tile);
void denoise(RenderTile &tile, DenoisingTask &denoising);
- class OpenCLDeviceTask : public DeviceTask {
- public:
- OpenCLDeviceTask(OpenCLDevice *device, DeviceTask &task) : DeviceTask(task)
- {
- run = function_bind(&OpenCLDevice::thread_run, device, this);
- }
- };
-
int get_split_task_count(DeviceTask & /*task*/)
{
return 1;
@@ -464,7 +465,10 @@ class OpenCLDevice : public Device {
void task_add(DeviceTask &task)
{
- task_pool.push(new OpenCLDeviceTask(this, task));
+ task_pool.push([=] {
+ DeviceTask task_copy = task;
+ thread_run(task_copy);
+ });
}
void task_wait()
@@ -477,7 +481,7 @@ class OpenCLDevice : public Device {
task_pool.cancel();
}
- void thread_run(DeviceTask *task);
+ void thread_run(DeviceTask &task);
virtual BVHLayoutMask get_bvh_layout_mask() const
{
diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/device_opencl_impl.cpp
index 70b1a643044..e851749949d 100644
--- a/intern/cycles/device/opencl/opencl_split.cpp
+++ b/intern/cycles/device/opencl/device_opencl_impl.cpp
@@ -16,7 +16,7 @@
#ifdef WITH_OPENCL
-# include "device/opencl/opencl.h"
+# include "device/opencl/device_opencl.h"
# include "kernel/kernel_types.h"
# include "kernel/split/kernel_split_data_types.h"
@@ -56,7 +56,11 @@ static const string SPLIT_BUNDLE_KERNELS =
"enqueue_inactive "
"next_iteration_setup "
"indirect_subsurface "
- "buffer_update";
+ "buffer_update "
+ "adaptive_stopping "
+ "adaptive_filter_x "
+ "adaptive_filter_y "
+ "adaptive_adjust_samples";
const string OpenCLDevice::get_opencl_program_name(const string &kernel_name)
{
@@ -194,7 +198,7 @@ string OpenCLDevice::get_build_options(const DeviceRequestedFeatures &requested_
DeviceRequestedFeatures features(requested_features);
enable_default_features(features);
- /* Always turn off baking at this point. Baking is only usefull when building the bake kernel.
+ /* Always turn off baking at this point. Baking is only useful when building the bake kernel.
* this also makes sure that the kernels that are build during baking can be reused
* when not doing any baking. */
features.use_baking = false;
@@ -253,19 +257,19 @@ void OpenCLDevice::OpenCLSplitPrograms::load_kernels(
/* Ordered with most complex kernels first, to reduce overall compile time. */
ADD_SPLIT_KERNEL_PROGRAM(subsurface_scatter);
+ ADD_SPLIT_KERNEL_PROGRAM(direct_lighting);
+ ADD_SPLIT_KERNEL_PROGRAM(indirect_background);
if (requested_features.use_volume || is_preview) {
ADD_SPLIT_KERNEL_PROGRAM(do_volume);
}
+ ADD_SPLIT_KERNEL_PROGRAM(shader_eval);
+ ADD_SPLIT_KERNEL_PROGRAM(lamp_emission);
+ ADD_SPLIT_KERNEL_PROGRAM(holdout_emission_blurring_pathtermination_ao);
ADD_SPLIT_KERNEL_PROGRAM(shadow_blocked_dl);
ADD_SPLIT_KERNEL_PROGRAM(shadow_blocked_ao);
- ADD_SPLIT_KERNEL_PROGRAM(holdout_emission_blurring_pathtermination_ao);
- ADD_SPLIT_KERNEL_PROGRAM(lamp_emission);
- ADD_SPLIT_KERNEL_PROGRAM(direct_lighting);
- ADD_SPLIT_KERNEL_PROGRAM(indirect_background);
- ADD_SPLIT_KERNEL_PROGRAM(shader_eval);
/* Quick kernels bundled in a single program to reduce overhead of starting
- * Blender processes. */
+ * Blender processes. */
program_split = OpenCLDevice::OpenCLProgram(
device,
"split_bundle",
@@ -283,6 +287,10 @@ void OpenCLDevice::OpenCLSplitPrograms::load_kernels(
ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(next_iteration_setup);
ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(indirect_subsurface);
ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(buffer_update);
+ ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(adaptive_stopping);
+ ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(adaptive_filter_x);
+ ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(adaptive_filter_y);
+ ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(adaptive_adjust_samples);
programs.push_back(&program_split);
# undef ADD_SPLIT_KERNEL_PROGRAM
@@ -534,7 +542,7 @@ class OpenCLSplitKernel : public DeviceSplitKernel {
virtual int2 split_kernel_global_size(device_memory &kg,
device_memory &data,
- DeviceTask * /*task*/)
+ DeviceTask & /*task*/)
{
cl_device_type type = OpenCLInfo::get_device_type(device->cdDevice);
/* Use small global size on CPU devices as it seems to be much faster. */
@@ -602,16 +610,16 @@ void OpenCLDevice::opencl_assert_err(cl_int err, const char *where)
OpenCLDevice::OpenCLDevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background)
: Device(info, stats, profiler, background),
+ load_kernel_num_compiling(0),
kernel_programs(this),
preview_programs(this),
memory_manager(this),
- texture_info(this, "__texture_info", MEM_TEXTURE)
+ texture_info(this, "__texture_info", MEM_GLOBAL)
{
cpPlatform = NULL;
cdDevice = NULL;
cxContext = NULL;
cqCommandQueue = NULL;
- null_mem = 0;
device_initialized = false;
textures_need_update = true;
use_preview_kernels = !background;
@@ -662,35 +670,27 @@ OpenCLDevice::OpenCLDevice(DeviceInfo &info, Stats &stats, Profiler &profiler, b
return;
}
- null_mem = (device_ptr)clCreateBuffer(cxContext, CL_MEM_READ_ONLY, 1, NULL, &ciErr);
- if (opencl_error(ciErr)) {
- opencl_error("OpenCL: Error creating memory buffer for NULL");
- return;
- }
-
- /* Allocate this right away so that texture_info is placed at offset 0 in the device memory buffers */
+ /* Allocate this right away so that texture_info
+ * is placed at offset 0 in the device memory buffers. */
texture_info.resize(1);
memory_manager.alloc("texture_info", texture_info);
device_initialized = true;
split_kernel = new OpenCLSplitKernel(this);
- if (!background) {
+ if (use_preview_kernels) {
load_preview_kernels();
}
}
OpenCLDevice::~OpenCLDevice()
{
- task_pool.stop();
- load_required_kernel_task_pool.stop();
- load_kernel_task_pool.stop();
+ task_pool.cancel();
+ load_required_kernel_task_pool.cancel();
+ load_kernel_task_pool.cancel();
memory_manager.free();
- if (null_mem)
- clReleaseMemObject(CL_MEM_PTR(null_mem));
-
ConstMemMap::iterator mt;
for (mt = const_mem_map.begin(); mt != const_mem_map.end(); mt++) {
delete mt->second;
@@ -799,7 +799,11 @@ bool OpenCLDevice::load_kernels(const DeviceRequestedFeatures &requested_feature
* internally within a single process. */
foreach (OpenCLProgram *program, programs) {
if (!program->load()) {
- load_kernel_task_pool.push(function_bind(&OpenCLProgram::compile, program));
+ load_kernel_num_compiling++;
+ load_kernel_task_pool.push([=] {
+ program->compile();
+ load_kernel_num_compiling--;
+ });
}
}
return true;
@@ -869,7 +873,7 @@ bool OpenCLDevice::wait_for_availability(const DeviceRequestedFeatures &requeste
* Better to check on device level than per kernel as mixing preview and
* non-preview kernels does not work due to different data types */
if (use_preview_kernels) {
- use_preview_kernels = !load_kernel_task_pool.finished();
+ use_preview_kernels = load_kernel_num_compiling.load() > 0;
}
}
return split_kernel->load_kernels(requested_features);
@@ -896,7 +900,7 @@ DeviceKernelStatus OpenCLDevice::get_active_kernel_switch_state()
return DEVICE_KERNEL_USING_FEATURE_KERNEL;
}
- bool other_kernels_finished = load_kernel_task_pool.finished();
+ bool other_kernels_finished = load_kernel_num_compiling.load() == 0;
if (use_preview_kernels) {
if (other_kernels_finished) {
return DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE;
@@ -946,7 +950,7 @@ void OpenCLDevice::mem_alloc(device_memory &mem)
cl_mem_flags mem_flag;
void *mem_ptr = NULL;
- if (mem.type == MEM_READ_ONLY || mem.type == MEM_TEXTURE)
+ if (mem.type == MEM_READ_ONLY || mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL)
mem_flag = CL_MEM_READ_ONLY;
else
mem_flag = CL_MEM_READ_WRITE;
@@ -961,7 +965,7 @@ void OpenCLDevice::mem_alloc(device_memory &mem)
opencl_assert_err(ciErr, "clCreateBuffer");
}
else {
- mem.device_pointer = null_mem;
+ mem.device_pointer = 0;
}
stats.mem_alloc(size);
@@ -970,9 +974,13 @@ void OpenCLDevice::mem_alloc(device_memory &mem)
void OpenCLDevice::mem_copy_to(device_memory &mem)
{
- if (mem.type == MEM_TEXTURE) {
- tex_free(mem);
- tex_alloc(mem);
+ if (mem.type == MEM_GLOBAL) {
+ global_free(mem);
+ global_alloc(mem);
+ }
+ else if (mem.type == MEM_TEXTURE) {
+ tex_free((device_texture &)mem);
+ tex_alloc((device_texture &)mem);
}
else {
if (!mem.device_pointer) {
@@ -1078,12 +1086,15 @@ void OpenCLDevice::mem_zero(device_memory &mem)
void OpenCLDevice::mem_free(device_memory &mem)
{
- if (mem.type == MEM_TEXTURE) {
- tex_free(mem);
+ if (mem.type == MEM_GLOBAL) {
+ global_free(mem);
+ }
+ else if (mem.type == MEM_TEXTURE) {
+ tex_free((device_texture &)mem);
}
else {
if (mem.device_pointer) {
- if (mem.device_pointer != null_mem) {
+ if (mem.device_pointer != 0) {
opencl_assert(clReleaseMemObject(CL_MEM_PTR(mem.device_pointer)));
}
mem.device_pointer = 0;
@@ -1102,7 +1113,7 @@ int OpenCLDevice::mem_sub_ptr_alignment()
device_ptr OpenCLDevice::mem_alloc_sub_ptr(device_memory &mem, int offset, int size)
{
cl_mem_flags mem_flag;
- if (mem.type == MEM_READ_ONLY || mem.type == MEM_TEXTURE)
+ if (mem.type == MEM_READ_ONLY || mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL)
mem_flag = CL_MEM_READ_ONLY;
else
mem_flag = CL_MEM_READ_WRITE;
@@ -1119,7 +1130,7 @@ device_ptr OpenCLDevice::mem_alloc_sub_ptr(device_memory &mem, int offset, int s
void OpenCLDevice::mem_free_sub_ptr(device_ptr device_pointer)
{
- if (device_pointer && device_pointer != null_mem) {
+ if (device_pointer != 0) {
opencl_assert(clReleaseMemObject(CL_MEM_PTR(device_pointer)));
}
}
@@ -1142,20 +1153,21 @@ void OpenCLDevice::const_copy_to(const char *name, void *host, size_t size)
data->copy_to_device();
}
-void OpenCLDevice::tex_alloc(device_memory &mem)
+void OpenCLDevice::global_alloc(device_memory &mem)
{
- VLOG(1) << "Texture allocate: " << mem.name << ", "
+ VLOG(1) << "Global memory allocate: " << mem.name << ", "
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
<< string_human_readable_size(mem.memory_size()) << ")";
memory_manager.alloc(mem.name, mem);
- /* Set the pointer to non-null to keep code that inspects its value from thinking its unallocated. */
+ /* Set the pointer to non-null to keep code that inspects its value from thinking its
+ * unallocated. */
mem.device_pointer = 1;
textures[mem.name] = &mem;
textures_need_update = true;
}
-void OpenCLDevice::tex_free(device_memory &mem)
+void OpenCLDevice::global_free(device_memory &mem)
{
if (mem.device_pointer) {
mem.device_pointer = 0;
@@ -1173,6 +1185,25 @@ void OpenCLDevice::tex_free(device_memory &mem)
}
}
+void OpenCLDevice::tex_alloc(device_texture &mem)
+{
+ VLOG(1) << "Texture allocate: " << mem.name << ", "
+ << string_human_readable_number(mem.memory_size()) << " bytes. ("
+ << string_human_readable_size(mem.memory_size()) << ")";
+
+ memory_manager.alloc(mem.name, mem);
+ /* Set the pointer to non-null to keep code that inspects its value from thinking its
+ * unallocated. */
+ mem.device_pointer = 1;
+ textures[mem.name] = &mem;
+ textures_need_update = true;
+}
+
+void OpenCLDevice::tex_free(device_texture &mem)
+{
+ global_free(mem);
+}
+
size_t OpenCLDevice::global_size_round_up(int group_size, int global_size)
{
int r = global_size % group_size;
@@ -1237,8 +1268,7 @@ void OpenCLDevice::set_kernel_arg_mem(cl_kernel kernel, cl_uint *narg, const cha
ptr = CL_MEM_PTR(i->second);
}
else {
- /* work around NULL not working, even though the spec says otherwise */
- ptr = CL_MEM_PTR(null_mem);
+ ptr = 0;
}
opencl_assert(clSetKernelArg(kernel, (*narg)++, sizeof(ptr), (void *)&ptr));
@@ -1274,10 +1304,10 @@ void OpenCLDevice::flush_texture_buffers()
foreach (TexturesMap::value_type &tex, textures) {
string name = tex.first;
+ device_memory *mem = tex.second;
- if (string_startswith(name, "__tex_image")) {
- int pos = name.rfind("_");
- int id = atoi(name.data() + pos + 1);
+ if (mem->type == MEM_TEXTURE) {
+ const uint id = ((device_texture *)mem)->slot;
texture_slots.push_back(texture_slot_t(name, num_data_slots + id));
num_slots = max(num_slots, num_data_slots + id + 1);
}
@@ -1290,22 +1320,20 @@ void OpenCLDevice::flush_texture_buffers()
/* Fill in descriptors */
foreach (texture_slot_t &slot, texture_slots) {
+ device_memory *mem = textures[slot.name];
TextureInfo &info = texture_info[slot.slot];
MemoryManager::BufferDescriptor desc = memory_manager.get_descriptor(slot.name);
- info.data = desc.offset;
- info.cl_buffer = desc.device_buffer;
-
- if (string_startswith(slot.name, "__tex_image")) {
- device_memory *mem = textures[slot.name];
- info.width = mem->data_width;
- info.height = mem->data_height;
- info.depth = mem->data_depth;
-
- info.interpolation = mem->interpolation;
- info.extension = mem->extension;
+ if (mem->type == MEM_TEXTURE) {
+ info = ((device_texture *)mem)->info;
+ }
+ else {
+ memset(&info, 0, sizeof(TextureInfo));
}
+
+ info.data = desc.offset;
+ info.cl_buffer = desc.device_buffer;
}
/* Force write of descriptors. */
@@ -1313,26 +1341,20 @@ void OpenCLDevice::flush_texture_buffers()
memory_manager.alloc("texture_info", texture_info);
}
-void OpenCLDevice::thread_run(DeviceTask *task)
+void OpenCLDevice::thread_run(DeviceTask &task)
{
flush_texture_buffers();
- if (task->type == DeviceTask::FILM_CONVERT) {
- film_convert(*task, task->buffer, task->rgba_byte, task->rgba_half);
- }
- else if (task->type == DeviceTask::SHADER) {
- shader(*task);
- }
- else if (task->type == DeviceTask::RENDER) {
+ if (task.type == DeviceTask::RENDER) {
RenderTile tile;
- DenoisingTask denoising(this, *task);
+ DenoisingTask denoising(this, task);
/* Allocate buffer for kernel globals */
device_only_memory<KernelGlobalsDummy> kgbuffer(this, "kernel_globals");
kgbuffer.alloc_to_device(1);
/* Keep rendering tiles until done. */
- while (task->acquire_tile(this, tile)) {
+ while (task.acquire_tile(this, tile, task.tile_types)) {
if (tile.task == RenderTile::PATH_TRACE) {
assert(tile.task == RenderTile::PATH_TRACE);
scoped_timer timer(&tile.buffers->render_time);
@@ -1350,17 +1372,44 @@ void OpenCLDevice::thread_run(DeviceTask *task)
*/
clFinish(cqCommandQueue);
}
+ else if (tile.task == RenderTile::BAKE) {
+ bake(task, tile);
+ }
else if (tile.task == RenderTile::DENOISE) {
tile.sample = tile.start_sample + tile.num_samples;
denoise(tile, denoising);
- task->update_progress(&tile, tile.w * tile.h);
+ task.update_progress(&tile, tile.w * tile.h);
}
- task->release_tile(tile);
+ task.release_tile(tile);
}
kgbuffer.free();
}
+ else if (task.type == DeviceTask::SHADER) {
+ shader(task);
+ }
+ else if (task.type == DeviceTask::FILM_CONVERT) {
+ film_convert(task, task.buffer, task.rgba_byte, task.rgba_half);
+ }
+ else if (task.type == DeviceTask::DENOISE_BUFFER) {
+ RenderTile tile;
+ tile.x = task.x;
+ tile.y = task.y;
+ tile.w = task.w;
+ tile.h = task.h;
+ tile.buffer = task.buffer;
+ tile.sample = task.sample + task.num_samples;
+ tile.num_samples = task.num_samples;
+ tile.start_sample = task.sample;
+ tile.offset = task.offset;
+ tile.stride = task.stride;
+ tile.buffers = task.buffers;
+
+ DenoisingTask denoising(this, task);
+ denoise(tile, denoising);
+ task.update_progress(&tile, tile.w * tile.h);
+ }
}
void OpenCLDevice::film_convert(DeviceTask &task,
@@ -1801,7 +1850,7 @@ void OpenCLDevice::denoise(RenderTile &rtile, DenoisingTask &denoising)
denoising.render_buffer.samples = rtile.sample;
denoising.buffer.gpu_temporary_mem = true;
- denoising.run_denoising(&rtile);
+ denoising.run_denoising(rtile);
}
void OpenCLDevice::shader(DeviceTask &task)
@@ -1817,10 +1866,7 @@ void OpenCLDevice::shader(DeviceTask &task)
cl_int d_offset = task.offset;
OpenCLDevice::OpenCLProgram *program = &background_program;
- if (task.shader_eval_type >= SHADER_EVAL_BAKE) {
- program = &bake_program;
- }
- else if (task.shader_eval_type == SHADER_EVAL_DISPLACE) {
+ if (task.shader_eval_type == SHADER_EVAL_DISPLACE) {
program = &displace_program;
}
program->wait_for_availability();
@@ -1851,10 +1897,90 @@ void OpenCLDevice::shader(DeviceTask &task)
}
}
+void OpenCLDevice::bake(DeviceTask &task, RenderTile &rtile)
+{
+ scoped_timer timer(&rtile.buffers->render_time);
+
+ /* Cast arguments to cl types. */
+ cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer);
+ cl_mem d_buffer = CL_MEM_PTR(rtile.buffer);
+ cl_int d_x = rtile.x;
+ cl_int d_y = rtile.y;
+ cl_int d_w = rtile.w;
+ cl_int d_h = rtile.h;
+ cl_int d_offset = rtile.offset;
+ cl_int d_stride = rtile.stride;
+
+ bake_program.wait_for_availability();
+ cl_kernel kernel = bake_program();
+
+ cl_uint start_arg_index = kernel_set_args(kernel, 0, d_data, d_buffer);
+
+ set_kernel_arg_buffers(kernel, &start_arg_index);
+
+ start_arg_index += kernel_set_args(
+ kernel, start_arg_index, d_x, d_y, d_w, d_h, d_offset, d_stride);
+
+ int start_sample = rtile.start_sample;
+ int end_sample = rtile.start_sample + rtile.num_samples;
+
+ for (int sample = start_sample; sample < end_sample; sample++) {
+ if (task.get_cancel()) {
+ if (task.need_finish_queue == false)
+ break;
+ }
+
+ kernel_set_args(kernel, start_arg_index, sample);
+
+ enqueue_kernel(kernel, d_w, d_h);
+
+ rtile.sample = sample + 1;
+
+ task.update_progress(&rtile, rtile.w * rtile.h);
+ }
+
+ clFinish(cqCommandQueue);
+}
+
+static bool kernel_build_opencl_2(cl_device_id cdDevice)
+{
+ /* Build with OpenCL 2.0 if available, this improves performance
+ * with AMD OpenCL drivers on Windows and Linux (legacy drivers).
+ * Note that OpenCL selects the highest 1.x version by default,
+ * only for 2.0 do we need the explicit compiler flag. */
+ int version_major, version_minor;
+ if (OpenCLInfo::get_device_version(cdDevice, &version_major, &version_minor)) {
+ if (version_major >= 2) {
+ /* This appears to trigger a driver bug in Radeon RX cards with certain
+ * driver version, so don't use OpenCL 2.0 for those. */
+ string device_name = OpenCLInfo::get_readable_device_name(cdDevice);
+ if (string_startswith(device_name, "Radeon RX 4") ||
+ string_startswith(device_name, "Radeon (TM) RX 4") ||
+ string_startswith(device_name, "Radeon RX 5") ||
+ string_startswith(device_name, "Radeon (TM) RX 5")) {
+ char version[256] = "";
+ int driver_major, driver_minor;
+ clGetDeviceInfo(cdDevice, CL_DEVICE_VERSION, sizeof(version), &version, NULL);
+ if (sscanf(version, "OpenCL 2.0 AMD-APP (%d.%d)", &driver_major, &driver_minor) == 2) {
+ return !(driver_major == 3075 && driver_minor <= 12);
+ }
+ }
+
+ return true;
+ }
+ }
+
+ return false;
+}
+
string OpenCLDevice::kernel_build_options(const string *debug_src)
{
string build_options = "-cl-no-signed-zeros -cl-mad-enable ";
+ if (kernel_build_opencl_2(cdDevice)) {
+ build_options += "-cl-std=CL2.0 ";
+ }
+
if (platform_name == "NVIDIA CUDA") {
build_options +=
"-D__KERNEL_OPENCL_NVIDIA__ "
diff --git a/intern/cycles/device/opencl/memory_manager.cpp b/intern/cycles/device/opencl/memory_manager.cpp
index f85aadce1c2..0285dc969ec 100644
--- a/intern/cycles/device/opencl/memory_manager.cpp
+++ b/intern/cycles/device/opencl/memory_manager.cpp
@@ -18,7 +18,7 @@
# include "util/util_foreach.h"
-# include "device/opencl/opencl.h"
+# include "device/opencl/device_opencl.h"
# include "device/opencl/memory_manager.h"
CCL_NAMESPACE_BEGIN
@@ -64,6 +64,9 @@ void MemoryManager::DeviceBuffer::update_device_memory(OpenCLDevice *device)
total_size += alloc_size;
}
+ /* Always allocate non-empty buffer, NULL pointers cause problems with some drivers. */
+ total_size = max(total_size, 16);
+
if (need_realloc) {
cl_ulong max_buffer_size;
clGetDeviceInfo(
@@ -251,7 +254,7 @@ void MemoryManager::set_kernel_arg_buffers(cl_kernel kernel, cl_uint *narg)
device->kernel_set_args(kernel, (*narg)++, *device_buffer.buffer);
}
else {
- device->kernel_set_args(kernel, (*narg)++, device->null_mem);
+ device->kernel_set_args(kernel, (*narg)++);
}
}
}
diff --git a/intern/cycles/device/opencl/memory_manager.h b/intern/cycles/device/opencl/memory_manager.h
index 2fbc97a0756..23624f837a6 100644
--- a/intern/cycles/device/opencl/memory_manager.h
+++ b/intern/cycles/device/opencl/memory_manager.h
@@ -19,8 +19,8 @@
#include "device/device.h"
#include "util/util_map.h"
-#include "util/util_vector.h"
#include "util/util_string.h"
+#include "util/util_vector.h"
#include "clew.h"
diff --git a/intern/cycles/device/opencl/opencl_util.cpp b/intern/cycles/device/opencl/opencl_util.cpp
index cc40ad42b06..b8b07cf2947 100644
--- a/intern/cycles/device/opencl/opencl_util.cpp
+++ b/intern/cycles/device/opencl/opencl_util.cpp
@@ -16,15 +16,16 @@
#ifdef WITH_OPENCL
-# include "device/opencl/opencl.h"
# include "device/device_intern.h"
+# include "device/opencl/device_opencl.h"
# include "util/util_debug.h"
# include "util/util_logging.h"
# include "util/util_md5.h"
# include "util/util_path.h"
-# include "util/util_time.h"
+# include "util/util_semaphore.h"
# include "util/util_system.h"
+# include "util/util_time.h"
using std::cerr;
using std::endl;
@@ -390,8 +391,27 @@ static void escape_python_string(string &str)
string_replace(str, "'", "\'");
}
+static int opencl_compile_process_limit()
+{
+ /* Limit number of concurrent processes compiling, with a heuristic based
+ * on total physical RAM and estimate of memory usage needed when compiling
+ * with all Cycles features enabled.
+ *
+ * This is somewhat arbitrary as we don't know the actual available RAM or
+ * how much the kernel compilation will needed depending on the features, but
+ * better than not limiting at all. */
+ static const int64_t GB = 1024LL * 1024LL * 1024LL;
+ static const int64_t process_memory = 2 * GB;
+ static const int64_t base_memory = 2 * GB;
+ static const int64_t system_memory = system_physical_ram();
+ static const int64_t process_limit = (system_memory - base_memory) / process_memory;
+
+ return max((int)process_limit, 1);
+}
+
bool OpenCLDevice::OpenCLProgram::compile_separate(const string &clbin)
{
+ /* Construct arguments. */
vector<string> args;
args.push_back("--background");
args.push_back("--factory-startup");
@@ -419,14 +439,23 @@ bool OpenCLDevice::OpenCLProgram::compile_separate(const string &clbin)
kernel_file_escaped.c_str(),
clbin_escaped.c_str()));
- double starttime = time_dt();
+ /* Limit number of concurrent processes compiling. */
+ static thread_counting_semaphore semaphore(opencl_compile_process_limit());
+ semaphore.acquire();
+
+ /* Compile. */
+ const double starttime = time_dt();
add_log(string("Cycles: compiling OpenCL program ") + program_name + "...", false);
add_log(string("Build flags: ") + kernel_build_options, true);
- if (!system_call_self(args) || !path_exists(clbin)) {
+ const bool success = system_call_self(args);
+ const double elapsed = time_dt() - starttime;
+
+ semaphore.release();
+
+ if (!success || !path_exists(clbin)) {
return false;
}
- double elapsed = time_dt() - starttime;
add_log(
string_printf("Kernel compilation of %s finished in %.2lfs.", program_name.c_str(), elapsed),
false);
@@ -619,15 +648,16 @@ void OpenCLDevice::OpenCLProgram::compile()
debug_src = &clsrc;
}
- /* If binary kernel exists already, try use it. */
- if (compile_separate(clbin)) {
+ if (DebugFlags().running_inside_blender && compile_separate(clbin)) {
add_log(string("Built and loaded program from ") + clbin + ".", true);
loaded = true;
}
else {
- add_log(string("Separate-process building of ") + clbin +
- " failed, will fall back to regular building.",
- true);
+ if (DebugFlags().running_inside_blender) {
+ add_log(string("Separate-process building of ") + clbin +
+ " failed, will fall back to regular building.",
+ true);
+ }
/* If does not exist or loading binary failed, compile kernel. */
if (!compile_kernel(debug_src)) {
@@ -746,7 +776,11 @@ bool OpenCLInfo::device_supported(const string &platform_name, const cl_device_i
}
VLOG(3) << "OpenCL driver version " << driver_major << "." << driver_minor;
- /* It is possible tyo have Iris GPU on AMD/Apple OpenCL framework
+ if (getenv("CYCLES_OPENCL_TEST")) {
+ return true;
+ }
+
+ /* It is possible to have Iris GPU on AMD/Apple OpenCL framework
* (aka, it will not be on Intel framework). This isn't supported
* and needs an explicit blacklist.
*/
@@ -805,18 +839,30 @@ bool OpenCLInfo::platform_version_check(cl_platform_id platform, string *error)
return true;
}
-bool OpenCLInfo::device_version_check(cl_device_id device, string *error)
+bool OpenCLInfo::get_device_version(cl_device_id device, int *r_major, int *r_minor, string *error)
{
- const int req_major = 1, req_minor = 1;
- int major, minor;
char version[256];
clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION, sizeof(version), &version, NULL);
- if (sscanf(version, "OpenCL C %d.%d", &major, &minor) < 2) {
+ if (sscanf(version, "OpenCL C %d.%d", r_major, r_minor) < 2) {
if (error != NULL) {
*error = string_printf("OpenCL: failed to parse OpenCL C version string (%s).", version);
}
return false;
}
+ if (error != NULL) {
+ *error = "";
+ }
+ return true;
+}
+
+bool OpenCLInfo::device_version_check(cl_device_id device, string *error)
+{
+ const int req_major = 1, req_minor = 1;
+ int major, minor;
+ if (!get_device_version(device, &major, &minor, error)) {
+ return false;
+ }
+
if (!((major == req_major && minor >= req_minor) || (major > req_major))) {
if (error != NULL) {
*error = string_printf("OpenCL: C version 1.1 or later required, found %d.%d", major, minor);
@@ -857,7 +903,7 @@ string OpenCLInfo::get_hardware_id(const string &platform_name, cl_device_id dev
return "";
}
-void OpenCLInfo::get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices, bool force_all)
+void OpenCLInfo::get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices)
{
const cl_device_type device_type = OpenCLInfo::device_type();
static bool first_time = true;
@@ -923,7 +969,7 @@ void OpenCLInfo::get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices
FIRST_VLOG(2) << "Ignoring device " << device_name << " due to old compiler version.";
continue;
}
- if (force_all || device_supported(platform_name, device_id)) {
+ if (device_supported(platform_name, device_id)) {
cl_device_type device_type;
if (!get_device_type(device_id, &device_type, &error)) {
FIRST_VLOG(2) << "Ignoring device " << device_name
diff --git a/intern/cycles/graph/CMakeLists.txt b/intern/cycles/graph/CMakeLists.txt
index c6c46941598..9ff1c5b98c6 100644
--- a/intern/cycles/graph/CMakeLists.txt
+++ b/intern/cycles/graph/CMakeLists.txt
@@ -17,7 +17,7 @@ set(SRC_HEADERS
)
set(LIB
-
+ cycles_util
)
include_directories(${INC})
diff --git a/intern/cycles/graph/node.cpp b/intern/cycles/graph/node.cpp
index fc7daaeeaa6..c437c6fda1e 100644
--- a/intern/cycles/graph/node.cpp
+++ b/intern/cycles/graph/node.cpp
@@ -133,7 +133,7 @@ void Node::set(const SocketType &input, const Transform &value)
void Node::set(const SocketType &input, Node *value)
{
- assert(input.type == SocketType::TRANSFORM);
+ assert(input.type == SocketType::NODE);
get_socket_value<Node *>(this, input) = value;
}
@@ -213,7 +213,7 @@ float Node::get_float(const SocketType &input) const
float2 Node::get_float2(const SocketType &input) const
{
- assert(input.type == SocketType::FLOAT);
+ assert(input.type == SocketType::POINT2);
return get_socket_value<float2>(this, input);
}
@@ -272,7 +272,7 @@ const array<float> &Node::get_float_array(const SocketType &input) const
const array<float2> &Node::get_float2_array(const SocketType &input) const
{
- assert(input.type == SocketType::FLOAT_ARRAY);
+ assert(input.type == SocketType::POINT2_ARRAY);
return get_socket_value<array<float2>>(this, input);
}
@@ -313,7 +313,9 @@ void Node::set_default_value(const SocketType &socket)
{
const void *src = socket.default_value;
void *dst = ((char *)this) + socket.struct_offset;
- memcpy(dst, src, socket.size());
+ if (socket.size() > 0) {
+ memcpy(dst, src, socket.size());
+ }
}
template<typename T>
@@ -667,4 +669,14 @@ size_t Node::get_total_size_in_bytes() const
return total_size;
}
+bool Node::is_a(const NodeType *type_)
+{
+ for (const NodeType *base = type; base; base = base->base) {
+ if (base == type_) {
+ return true;
+ }
+ }
+ return false;
+}
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/graph/node.h b/intern/cycles/graph/node.h
index 226c49b387a..4473b8aca28 100644
--- a/intern/cycles/graph/node.h
+++ b/intern/cycles/graph/node.h
@@ -33,7 +33,7 @@ struct Transform;
struct Node {
explicit Node(const NodeType *type, ustring name = ustring());
- virtual ~Node();
+ virtual ~Node() = 0;
/* set values */
void set(const SocketType &input, bool value);
@@ -94,6 +94,9 @@ struct Node {
/* Get total size of this node. */
size_t get_total_size_in_bytes() const;
+ /* Type testing, taking into account base classes. */
+ bool is_a(const NodeType *type);
+
ustring name;
const NodeType *type;
};
diff --git a/intern/cycles/graph/node_type.cpp b/intern/cycles/graph/node_type.cpp
index f46d4e48026..0283ed7c817 100644
--- a/intern/cycles/graph/node_type.cpp
+++ b/intern/cycles/graph/node_type.cpp
@@ -135,8 +135,13 @@ bool SocketType::is_float3(Type type)
/* Node Type */
-NodeType::NodeType(Type type_) : type(type_)
+NodeType::NodeType(Type type, const NodeType *base) : type(type), base(base)
{
+ if (base) {
+ /* Inherit sockets. */
+ inputs = base->inputs;
+ outputs = base->outputs;
+ }
}
NodeType::~NodeType()
@@ -209,7 +214,7 @@ unordered_map<ustring, NodeType, ustringHash> &NodeType::types()
return _types;
}
-NodeType *NodeType::add(const char *name_, CreateFunc create_, Type type_)
+NodeType *NodeType::add(const char *name_, CreateFunc create_, Type type_, const NodeType *base_)
{
ustring name(name_);
@@ -219,7 +224,7 @@ NodeType *NodeType::add(const char *name_, CreateFunc create_, Type type_)
return NULL;
}
- types()[name] = NodeType(type_);
+ types()[name] = NodeType(type_, base_);
NodeType *type = &types()[name];
type->name = name;
diff --git a/intern/cycles/graph/node_type.h b/intern/cycles/graph/node_type.h
index e9496a42658..a79d44b82f3 100644
--- a/intern/cycles/graph/node_type.h
+++ b/intern/cycles/graph/node_type.h
@@ -103,7 +103,7 @@ struct SocketType {
struct NodeType {
enum Type { NONE, SHADER };
- explicit NodeType(Type type = NONE);
+ explicit NodeType(Type type = NONE, const NodeType *base = NULL);
~NodeType();
void register_input(ustring name,
@@ -124,11 +124,15 @@ struct NodeType {
ustring name;
Type type;
+ const NodeType *base;
vector<SocketType, std::allocator<SocketType>> inputs;
vector<SocketType, std::allocator<SocketType>> outputs;
CreateFunc create;
- static NodeType *add(const char *name, CreateFunc create, Type type = NONE);
+ static NodeType *add(const char *name,
+ CreateFunc create,
+ Type type = NONE,
+ const NodeType *base = NULL);
static const NodeType *find(ustring name);
static unordered_map<ustring, NodeType, ustringHash> &types();
};
@@ -148,6 +152,14 @@ struct NodeType {
} \
template<typename T> const NodeType *structname::register_type()
+#define NODE_ABSTRACT_DECLARE \
+ template<typename T> static const NodeType *register_base_type(); \
+ static const NodeType *node_base_type;
+
+#define NODE_ABSTRACT_DEFINE(structname) \
+ const NodeType *structname::node_base_type = structname::register_base_type<structname>(); \
+ template<typename T> const NodeType *structname::register_base_type()
+
/* Sock Definition Macros */
#define SOCKET_OFFSETOF(T, name) (((char *)&(((T *)1)->name)) - (char *)1)
diff --git a/intern/cycles/graph/node_xml.cpp b/intern/cycles/graph/node_xml.cpp
index a96970cc904..d333400cc4a 100644
--- a/intern/cycles/graph/node_xml.cpp
+++ b/intern/cycles/graph/node_xml.cpp
@@ -200,7 +200,7 @@ void xml_read_node(XMLReader &reader, Node *node, xml_node xml_node)
map<ustring, Node *>::iterator it = reader.node_map.find(value);
if (it != reader.node_map.end()) {
Node *value_node = it->second;
- if (value_node->type == *(socket.node_type))
+ if (value_node->is_a(*(socket.node_type)))
node->set(socket, it->second);
}
break;
@@ -215,7 +215,7 @@ void xml_read_node(XMLReader &reader, Node *node, xml_node xml_node)
map<ustring, Node *>::iterator it = reader.node_map.find(ustring(tokens[i]));
if (it != reader.node_map.end()) {
Node *value_node = it->second;
- value[i] = (value_node->type == *(socket.node_type)) ? value_node : NULL;
+ value[i] = (value_node->is_a(*(socket.node_type))) ? value_node : NULL;
}
else {
value[i] = NULL;
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 8a8fee108ae..5533eeb006d 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -36,6 +36,10 @@ set(SRC_CUDA_KERNELS
)
set(SRC_OPENCL_KERNELS
+ kernels/opencl/kernel_adaptive_stopping.cl
+ kernels/opencl/kernel_adaptive_filter_x.cl
+ kernels/opencl/kernel_adaptive_filter_y.cl
+ kernels/opencl/kernel_adaptive_adjust_samples.cl
kernels/opencl/kernel_bake.cl
kernels/opencl/kernel_base.cl
kernels/opencl/kernel_displace.cl
@@ -64,6 +68,10 @@ set(SRC_OPENCL_KERNELS
kernels/opencl/filter.cl
)
+set(SRC_OPTIX_KERNELS
+ kernels/optix/kernel_optix.cu
+)
+
set(SRC_BVH_HEADERS
bvh/bvh.h
bvh/bvh_nodes.h
@@ -73,28 +81,18 @@ set(SRC_BVH_HEADERS
bvh/bvh_types.h
bvh/bvh_volume.h
bvh/bvh_volume_all.h
- bvh/qbvh_nodes.h
- bvh/qbvh_shadow_all.h
- bvh/qbvh_local.h
- bvh/qbvh_traversal.h
- bvh/qbvh_volume.h
- bvh/qbvh_volume_all.h
- bvh/obvh_nodes.h
- bvh/obvh_shadow_all.h
- bvh/obvh_local.h
- bvh/obvh_traversal.h
- bvh/obvh_volume.h
- bvh/obvh_volume_all.h
bvh/bvh_embree.h
)
set(SRC_HEADERS
kernel_accumulate.h
+ kernel_adaptive_sampling.h
kernel_bake.h
kernel_camera.h
kernel_color.h
kernel_compat_cpu.h
kernel_compat_cuda.h
+ kernel_compat_optix.h
kernel_compat_opencl.h
kernel_differential.h
kernel_emission.h
@@ -103,6 +101,8 @@ set(SRC_HEADERS
kernel_id_passes.h
kernel_jitter.h
kernel_light.h
+ kernel_light_background.h
+ kernel_light_common.h
kernel_math.h
kernel_montecarlo.h
kernel_passes.h
@@ -124,6 +124,7 @@ set(SRC_HEADERS
kernel_types.h
kernel_volume.h
kernel_work_stealing.h
+ kernel_write_passes.h
)
set(SRC_KERNELS_CPU_HEADERS
@@ -140,6 +141,9 @@ set(SRC_KERNELS_CUDA_HEADERS
kernels/cuda/kernel_cuda_image.h
)
+set(SRC_KERNELS_OPTIX_HEADERS
+)
+
set(SRC_KERNELS_OPENCL_HEADERS
kernels/opencl/kernel_split_function.h
kernels/opencl/kernel_opencl_image.h
@@ -168,17 +172,19 @@ set(SRC_CLOSURE_HEADERS
closure/volume.h
closure/bsdf_principled_diffuse.h
closure/bsdf_principled_sheen.h
- closure/bsdf_hair_principled.h
+ closure/bsdf_hair_principled.h
)
set(SRC_SVM_HEADERS
svm/svm.h
svm/svm_ao.h
+ svm/svm_aov.h
svm/svm_attribute.h
svm/svm_bevel.h
svm/svm_blackbody.h
svm/svm_bump.h
svm/svm_camera.h
+ svm/svm_clamp.h
svm/svm_closure.h
svm/svm_convert.h
svm/svm_checker.h
@@ -198,7 +204,9 @@ set(SRC_SVM_HEADERS
svm/svm_invert.h
svm/svm_light_path.h
svm/svm_magic.h
+ svm/svm_map_range.h
svm/svm_mapping.h
+ svm/svm_mapping_util.h
svm/svm_math.h
svm/svm_math_util.h
svm/svm_mix.h
@@ -212,13 +220,16 @@ set(SRC_SVM_HEADERS
svm/svm_sepcomb_vector.h
svm/svm_sky.h
svm/svm_tex_coord.h
- svm/svm_texture.h
+ svm/svm_fractal_noise.h
svm/svm_types.h
svm/svm_value.h
+ svm/svm_vector_rotate.h
svm/svm_vector_transform.h
svm/svm_voronoi.h
svm/svm_voxel.h
svm/svm_wave.h
+ svm/svm_white_noise.h
+ svm/svm_vertex_color.h
)
set(SRC_GEOM_HEADERS
@@ -308,6 +319,10 @@ set(SRC_UTIL_HEADERS
)
set(SRC_SPLIT_HEADERS
+ split/kernel_adaptive_adjust_samples.h
+ split/kernel_adaptive_filter_x.h
+ split/kernel_adaptive_filter_y.h
+ split/kernel_adaptive_stopping.h
split/kernel_branched.h
split/kernel_buffer_update.h
split/kernel_data_init.h
@@ -350,11 +365,11 @@ if(WITH_CYCLES_CUDA_BINARIES)
set(CUDA_VERSION "${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}")
# warn for other versions
- if(CUDA_VERSION MATCHES "101")
+ if((CUDA_VERSION MATCHES "101") OR (CUDA_VERSION MATCHES "102"))
else()
message(WARNING
"CUDA version ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} detected, "
- "build may succeed but only CUDA 10.1 is officially supported")
+ "build may succeed but only CUDA 10.1 and 10.2 are officially supported")
endif()
# build for each arch
@@ -376,11 +391,20 @@ if(WITH_CYCLES_CUDA_BINARIES)
set(cuda_cubins)
macro(CYCLES_CUDA_KERNEL_ADD arch prev_arch name flags sources experimental)
- set(cuda_cubin ${name}_${arch}.cubin)
+ if(${arch} MATCHES "compute_.*")
+ set(format "ptx")
+ else()
+ set(format "cubin")
+ endif()
+ set(cuda_file ${name}_${arch}.${format})
set(kernel_sources ${sources})
if(NOT ${prev_arch} STREQUAL "none")
- set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.cubin)
+ if(${prev_arch} MATCHES "compute_.*")
+ set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.ptx)
+ else()
+ set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.cubin)
+ endif()
endif()
set(cuda_kernel_src "/kernels/cuda/${name}.cu")
@@ -393,7 +417,7 @@ if(WITH_CYCLES_CUDA_BINARIES)
-I ${CMAKE_CURRENT_SOURCE_DIR}/..
-I ${CMAKE_CURRENT_SOURCE_DIR}/kernels/cuda
--use_fast_math
- -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin})
+ -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_file})
if(${experimental})
set(cuda_flags ${cuda_flags} -D __KERNEL_EXPERIMENTAL__)
@@ -418,7 +442,7 @@ if(WITH_CYCLES_CUDA_BINARIES)
endif()
add_custom_command(
- OUTPUT ${cuda_cubin}
+ OUTPUT ${cuda_file}
COMMAND ${CUBIN_CC_ENV}
"$<TARGET_FILE:cycles_cubin_cc>"
-target ${CUDA_ARCH}
@@ -429,18 +453,18 @@ if(WITH_CYCLES_CUDA_BINARIES)
DEPENDS ${kernel_sources} cycles_cubin_cc)
else()
add_custom_command(
- OUTPUT ${cuda_cubin}
+ OUTPUT ${cuda_file}
COMMAND ${CUDA_NVCC_EXECUTABLE}
-arch=${arch}
${CUDA_NVCC_FLAGS}
- --cubin
+ --${format}
${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src}
--ptxas-options="-v"
${cuda_flags}
DEPENDS ${kernel_sources})
endif()
- delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib)
- list(APPEND cuda_cubins ${cuda_cubin})
+ delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_file}" ${CYCLES_INSTALL_PATH}/lib)
+ list(APPEND cuda_cubins ${cuda_file})
unset(cuda_debug_flags)
endmacro()
@@ -471,6 +495,89 @@ if(WITH_CYCLES_CUDA_BINARIES)
cycles_set_solution_folder(cycles_kernel_cuda)
endif()
+# OptiX PTX modules
+
+if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
+ foreach(input ${SRC_OPTIX_KERNELS})
+ get_filename_component(input_we ${input} NAME_WE)
+
+ set(output "${CMAKE_CURRENT_BINARY_DIR}/${input_we}.ptx")
+ set(cuda_flags
+ -I "${OPTIX_INCLUDE_DIR}"
+ -I "${CMAKE_CURRENT_SOURCE_DIR}/.."
+ -I "${CMAKE_CURRENT_SOURCE_DIR}/kernels/cuda"
+ --use_fast_math
+ -o ${output})
+
+ if(WITH_CYCLES_DEBUG)
+ set(cuda_flags ${cuda_flags}
+ -D __KERNEL_DEBUG__)
+ endif()
+ if(WITH_CYCLES_CUBIN_COMPILER)
+
+ # Needed to find libnvrtc-builtins.so. Can't do it from inside
+ # cycles_cubin_cc since the env variable is read before main()
+ if(APPLE)
+ set(CUBIN_CC_ENV ${CMAKE_COMMAND}
+ -E env DYLD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib")
+ elseif(UNIX)
+ set(CUBIN_CC_ENV ${CMAKE_COMMAND}
+ -E env LD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib64")
+ endif()
+
+ add_custom_command(
+ OUTPUT ${output}
+ DEPENDS
+ ${input}
+ ${SRC_HEADERS}
+ ${SRC_KERNELS_CUDA_HEADERS}
+ ${SRC_KERNELS_OPTIX_HEADERS}
+ ${SRC_BVH_HEADERS}
+ ${SRC_SVM_HEADERS}
+ ${SRC_GEOM_HEADERS}
+ ${SRC_CLOSURE_HEADERS}
+ ${SRC_UTIL_HEADERS}
+ COMMAND ${CUBIN_CC_ENV}
+ "$<TARGET_FILE:cycles_cubin_cc>"
+ -target 52
+ -ptx
+ -i ${CMAKE_CURRENT_SOURCE_DIR}/${input}
+ ${cuda_flags}
+ -v
+ -cuda-toolkit-dir "${CUDA_TOOLKIT_ROOT_DIR}"
+ DEPENDS ${kernel_sources} cycles_cubin_cc)
+ else()
+ add_custom_command(
+ OUTPUT
+ ${output}
+ DEPENDS
+ ${input}
+ ${SRC_HEADERS}
+ ${SRC_KERNELS_CUDA_HEADERS}
+ ${SRC_KERNELS_OPTIX_HEADERS}
+ ${SRC_BVH_HEADERS}
+ ${SRC_SVM_HEADERS}
+ ${SRC_GEOM_HEADERS}
+ ${SRC_CLOSURE_HEADERS}
+ ${SRC_UTIL_HEADERS}
+ COMMAND
+ ${CUDA_NVCC_EXECUTABLE}
+ --ptx
+ -arch=sm_52
+ ${cuda_flags}
+ ${input}
+ WORKING_DIRECTORY
+ "${CMAKE_CURRENT_SOURCE_DIR}")
+ endif()
+ list(APPEND optix_ptx ${output})
+
+ delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${output}" ${CYCLES_INSTALL_PATH}/lib)
+ endforeach()
+
+ add_custom_target(cycles_kernel_optix ALL DEPENDS ${optix_ptx})
+ cycles_set_solution_folder(cycles_kernel_optix)
+endif()
+
# OSL module
if(WITH_CYCLES_OSL)
@@ -486,6 +593,19 @@ endif()
include_directories(${INC})
include_directories(SYSTEM ${INC_SYS})
+if(WITH_COMPILER_ASAN)
+ if(CMAKE_COMPILER_IS_GNUCC AND (NOT WITH_CYCLES_KERNEL_ASAN))
+ # GCC hangs compiling the big kernel files with asan and release, so disable by default.
+ set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fno-sanitize=all")
+ set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-sanitize=vptr")
+ elseif(CMAKE_C_COMPILER_ID MATCHES "Clang")
+ # With OSL, Cycles disables rtti in some modules, wich then breaks at linking
+ # when trying to use vptr sanitizer (included into 'undefined' general option).
+ set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fno-sanitize=vptr")
+ set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-sanitize=vptr")
+ endif()
+endif()
+
set_source_files_properties(kernels/cpu/kernel.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/kernel_split.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/filter.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_KERNEL_FLAGS}")
@@ -517,10 +637,12 @@ endif()
cycles_add_library(cycles_kernel "${LIB}"
${SRC_CPU_KERNELS}
${SRC_CUDA_KERNELS}
+ ${SRC_OPTIX_KERNELS}
${SRC_OPENCL_KERNELS}
${SRC_HEADERS}
${SRC_KERNELS_CPU_HEADERS}
${SRC_KERNELS_CUDA_HEADERS}
+ ${SRC_KERNELS_OPTIX_HEADERS}
${SRC_KERNELS_OPENCL_HEADERS}
${SRC_BVH_HEADERS}
${SRC_CLOSURE_HEADERS}
@@ -530,25 +652,42 @@ cycles_add_library(cycles_kernel "${LIB}"
${SRC_SPLIT_HEADERS}
)
+source_group("bvh" FILES ${SRC_BVH_HEADERS})
+source_group("closure" FILES ${SRC_CLOSURE_HEADERS})
+source_group("filter" FILES ${SRC_FILTER_HEADERS})
+source_group("geom" FILES ${SRC_GEOM_HEADERS})
+source_group("kernel" FILES ${SRC_HEADERS})
+source_group("kernel\\split" FILES ${SRC_SPLIT_HEADERS})
+source_group("kernels\\cpu" FILES ${SRC_CPU_KERNELS} ${SRC_KERNELS_CPU_HEADERS})
+source_group("kernels\\cuda" FILES ${SRC_CUDA_KERNELS} ${SRC_KERNELS_CUDA_HEADERS})
+source_group("kernels\\opencl" FILES ${SRC_OPENCL_KERNELS} ${SRC_KERNELS_OPENCL_HEADERS})
+source_group("kernels\\optix" FILES ${SRC_OPTIX_KERNELS} ${SRC_KERNELS_OPTIX_HEADERS})
+source_group("svm" FILES ${SRC_SVM_HEADERS})
+
if(WITH_CYCLES_CUDA)
add_dependencies(cycles_kernel cycles_kernel_cuda)
endif()
+if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
+ add_dependencies(cycles_kernel cycles_kernel_optix)
+endif()
# OpenCL kernel
-#set(KERNEL_PREPROCESSED ${CMAKE_CURRENT_BINARY_DIR}/kernel_preprocessed.cl)
-#add_custom_command(
-# OUTPUT ${KERNEL_PREPROCESSED}
-# COMMAND gcc -x c++ -E ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cl -I ${CMAKE_CURRENT_SOURCE_DIR}/../util/ -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -o ${KERNEL_PREPROCESSED}
-# DEPENDS ${SRC_KERNEL} ${SRC_UTIL_HEADERS})
-#add_custom_target(cycles_kernel_preprocess ALL DEPENDS ${KERNEL_PREPROCESSED})
-#delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${KERNEL_PREPROCESSED}" ${CYCLES_INSTALL_PATH}/kernel)
+# set(KERNEL_PREPROCESSED ${CMAKE_CURRENT_BINARY_DIR}/kernel_preprocessed.cl)
+# add_custom_command(
+# OUTPUT ${KERNEL_PREPROCESSED}
+# COMMAND gcc -x c++ -E ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cl -I ${CMAKE_CURRENT_SOURCE_DIR}/../util/ -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -o ${KERNEL_PREPROCESSED}
+# DEPENDS ${SRC_KERNEL} ${SRC_UTIL_HEADERS})
+# add_custom_target(cycles_kernel_preprocess ALL DEPENDS ${KERNEL_PREPROCESSED})
+# delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${KERNEL_PREPROCESSED}" ${CYCLES_INSTALL_PATH}/kernel)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_OPENCL_KERNELS}" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_CUDA_KERNELS}" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/cuda)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_OPTIX_KERNELS}" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/optix)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNELS_OPENCL_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNELS_CUDA_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/cuda)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNELS_OPTIX_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/optix)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_BVH_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/bvh)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_CLOSURE_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/closure)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_FILTER_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/filter)
diff --git a/intern/cycles/kernel/bvh/bvh.h b/intern/cycles/kernel/bvh/bvh.h
index 13e72ed299f..3049f243ae9 100644
--- a/intern/cycles/kernel/bvh/bvh.h
+++ b/intern/cycles/kernel/bvh/bvh.h
@@ -33,136 +33,108 @@ CCL_NAMESPACE_BEGIN
#include "kernel/bvh/bvh_types.h"
-/* Common QBVH functions. */
-#ifdef __QBVH__
-# include "kernel/bvh/qbvh_nodes.h"
-# ifdef __KERNEL_AVX2__
-# include "kernel/bvh/obvh_nodes.h"
-# endif
-#endif
+#ifndef __KERNEL_OPTIX__
/* Regular BVH traversal */
-#include "kernel/bvh/bvh_nodes.h"
+# include "kernel/bvh/bvh_nodes.h"
-#define BVH_FUNCTION_NAME bvh_intersect
-#define BVH_FUNCTION_FEATURES 0
-#include "kernel/bvh/bvh_traversal.h"
-
-#if defined(__INSTANCING__)
-# define BVH_FUNCTION_NAME bvh_intersect_instancing
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING
+# define BVH_FUNCTION_NAME bvh_intersect
+# define BVH_FUNCTION_FEATURES 0
# include "kernel/bvh/bvh_traversal.h"
-#endif
-#if defined(__HAIR__)
-# define BVH_FUNCTION_NAME bvh_intersect_hair
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR | BVH_HAIR_MINIMUM_WIDTH
-# include "kernel/bvh/bvh_traversal.h"
-#endif
+# if defined(__HAIR__)
+# define BVH_FUNCTION_NAME bvh_intersect_hair
+# define BVH_FUNCTION_FEATURES BVH_HAIR
+# include "kernel/bvh/bvh_traversal.h"
+# endif
-#if defined(__OBJECT_MOTION__)
-# define BVH_FUNCTION_NAME bvh_intersect_motion
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION
-# include "kernel/bvh/bvh_traversal.h"
-#endif
+# if defined(__OBJECT_MOTION__)
+# define BVH_FUNCTION_NAME bvh_intersect_motion
+# define BVH_FUNCTION_FEATURES BVH_MOTION
+# include "kernel/bvh/bvh_traversal.h"
+# endif
-#if defined(__HAIR__) && defined(__OBJECT_MOTION__)
-# define BVH_FUNCTION_NAME bvh_intersect_hair_motion
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR | BVH_HAIR_MINIMUM_WIDTH | BVH_MOTION
-# include "kernel/bvh/bvh_traversal.h"
-#endif
+# if defined(__HAIR__) && defined(__OBJECT_MOTION__)
+# define BVH_FUNCTION_NAME bvh_intersect_hair_motion
+# define BVH_FUNCTION_FEATURES BVH_HAIR | BVH_MOTION
+# include "kernel/bvh/bvh_traversal.h"
+# endif
/* Subsurface scattering BVH traversal */
-#if defined(__BVH_LOCAL__)
-# define BVH_FUNCTION_NAME bvh_intersect_local
-# define BVH_FUNCTION_FEATURES BVH_HAIR
-# include "kernel/bvh/bvh_local.h"
-
-# if defined(__OBJECT_MOTION__)
-# define BVH_FUNCTION_NAME bvh_intersect_local_motion
-# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR
+# if defined(__BVH_LOCAL__)
+# define BVH_FUNCTION_NAME bvh_intersect_local
+# define BVH_FUNCTION_FEATURES BVH_HAIR
# include "kernel/bvh/bvh_local.h"
-# endif
-#endif /* __BVH_LOCAL__ */
-/* Volume BVH traversal */
+# if defined(__OBJECT_MOTION__)
+# define BVH_FUNCTION_NAME bvh_intersect_local_motion
+# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR
+# include "kernel/bvh/bvh_local.h"
+# endif
+# endif /* __BVH_LOCAL__ */
-#if defined(__VOLUME__)
-# define BVH_FUNCTION_NAME bvh_intersect_volume
-# define BVH_FUNCTION_FEATURES BVH_HAIR
-# include "kernel/bvh/bvh_volume.h"
+/* Volume BVH traversal */
-# if defined(__INSTANCING__)
-# define BVH_FUNCTION_NAME bvh_intersect_volume_instancing
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR
+# if defined(__VOLUME__)
+# define BVH_FUNCTION_NAME bvh_intersect_volume
+# define BVH_FUNCTION_FEATURES BVH_HAIR
# include "kernel/bvh/bvh_volume.h"
-# endif
-# if defined(__OBJECT_MOTION__)
-# define BVH_FUNCTION_NAME bvh_intersect_volume_motion
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION | BVH_HAIR
-# include "kernel/bvh/bvh_volume.h"
-# endif
-#endif /* __VOLUME__ */
+# if defined(__OBJECT_MOTION__)
+# define BVH_FUNCTION_NAME bvh_intersect_volume_motion
+# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR
+# include "kernel/bvh/bvh_volume.h"
+# endif
+# endif /* __VOLUME__ */
/* Record all intersections - Shadow BVH traversal */
-#if defined(__SHADOW_RECORD_ALL__)
-# define BVH_FUNCTION_NAME bvh_intersect_shadow_all
-# define BVH_FUNCTION_FEATURES 0
-# include "kernel/bvh/bvh_shadow_all.h"
-
-# if defined(__INSTANCING__)
-# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_instancing
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING
+# if defined(__SHADOW_RECORD_ALL__)
+# define BVH_FUNCTION_NAME bvh_intersect_shadow_all
+# define BVH_FUNCTION_FEATURES 0
# include "kernel/bvh/bvh_shadow_all.h"
-# endif
-
-# if defined(__HAIR__)
-# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR
-# include "kernel/bvh/bvh_shadow_all.h"
-# endif
-# if defined(__OBJECT_MOTION__)
-# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_motion
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION
-# include "kernel/bvh/bvh_shadow_all.h"
-# endif
-
-# if defined(__HAIR__) && defined(__OBJECT_MOTION__)
-# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair_motion
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR | BVH_MOTION
-# include "kernel/bvh/bvh_shadow_all.h"
-# endif
-#endif /* __SHADOW_RECORD_ALL__ */
+# if defined(__HAIR__)
+# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair
+# define BVH_FUNCTION_FEATURES BVH_HAIR
+# include "kernel/bvh/bvh_shadow_all.h"
+# endif
+
+# if defined(__OBJECT_MOTION__)
+# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_motion
+# define BVH_FUNCTION_FEATURES BVH_MOTION
+# include "kernel/bvh/bvh_shadow_all.h"
+# endif
+
+# if defined(__HAIR__) && defined(__OBJECT_MOTION__)
+# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair_motion
+# define BVH_FUNCTION_FEATURES BVH_HAIR | BVH_MOTION
+# include "kernel/bvh/bvh_shadow_all.h"
+# endif
+# endif /* __SHADOW_RECORD_ALL__ */
/* Record all intersections - Volume BVH traversal */
-#if defined(__VOLUME_RECORD_ALL__)
-# define BVH_FUNCTION_NAME bvh_intersect_volume_all
-# define BVH_FUNCTION_FEATURES BVH_HAIR
-# include "kernel/bvh/bvh_volume_all.h"
-
-# if defined(__INSTANCING__)
-# define BVH_FUNCTION_NAME bvh_intersect_volume_all_instancing
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR
+# if defined(__VOLUME_RECORD_ALL__)
+# define BVH_FUNCTION_NAME bvh_intersect_volume_all
+# define BVH_FUNCTION_FEATURES BVH_HAIR
# include "kernel/bvh/bvh_volume_all.h"
-# endif
-# if defined(__OBJECT_MOTION__)
-# define BVH_FUNCTION_NAME bvh_intersect_volume_all_motion
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION | BVH_HAIR
-# include "kernel/bvh/bvh_volume_all.h"
-# endif
-#endif /* __VOLUME_RECORD_ALL__ */
+# if defined(__OBJECT_MOTION__)
+# define BVH_FUNCTION_NAME bvh_intersect_volume_all_motion
+# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR
+# include "kernel/bvh/bvh_volume_all.h"
+# endif
+# endif /* __VOLUME_RECORD_ALL__ */
-#undef BVH_FEATURE
-#undef BVH_NAME_JOIN
-#undef BVH_NAME_EVAL
-#undef BVH_FUNCTION_FULL_NAME
+# undef BVH_FEATURE
+# undef BVH_NAME_JOIN
+# undef BVH_NAME_EVAL
+# undef BVH_FUNCTION_FULL_NAME
+
+#endif /* __KERNEL_OPTIX__ */
ccl_device_inline bool scene_intersect_valid(const Ray *ray)
{
@@ -173,31 +145,65 @@ ccl_device_inline bool scene_intersect_valid(const Ray *ray)
* such cases.
* From production scenes so far it seems it's enough to test first element
* only.
+ * Scene intersection may also called with empty rays for conditional trace
+ * calls that evaluate to false, so filter those out.
*/
- return isfinite(ray->P.x);
+ return isfinite_safe(ray->P.x) && isfinite_safe(ray->D.x) && len_squared(ray->D) != 0.0f;
}
-/* Note: ray is passed by value to work around a possible CUDA compiler bug. */
ccl_device_intersect bool scene_intersect(KernelGlobals *kg,
- const Ray ray,
+ const Ray *ray,
const uint visibility,
- Intersection *isect,
- uint *lcg_state,
- float difl,
- float extmax)
+ Intersection *isect)
{
PROFILING_INIT(kg, PROFILING_INTERSECT);
- if (!scene_intersect_valid(&ray)) {
+#ifdef __KERNEL_OPTIX__
+ uint p0 = 0;
+ uint p1 = 0;
+ uint p2 = 0;
+ uint p3 = 0;
+ uint p4 = visibility;
+ uint p5 = PRIMITIVE_NONE;
+
+ optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
+ ray->P,
+ ray->D,
+ 0.0f,
+ ray->t,
+ ray->time,
+ 0xF,
+ OPTIX_RAY_FLAG_NONE,
+ 0, // SBT offset for PG_HITD
+ 0,
+ 0,
+ p0,
+ p1,
+ p2,
+ p3,
+ p4,
+ p5);
+
+ isect->t = __uint_as_float(p0);
+ isect->u = __uint_as_float(p1);
+ isect->v = __uint_as_float(p2);
+ isect->prim = p3;
+ isect->object = p4;
+ isect->type = p5;
+
+ return p5 != PRIMITIVE_NONE;
+#else /* __KERNEL_OPTIX__ */
+ if (!scene_intersect_valid(ray)) {
return false;
}
-#ifdef __EMBREE__
+
+# ifdef __EMBREE__
if (kernel_data.bvh.scene) {
- isect->t = ray.t;
+ isect->t = ray->t;
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_REGULAR);
IntersectContext rtc_ctx(&ctx);
RTCRayHit ray_hit;
- kernel_embree_setup_rayhit(ray, ray_hit, visibility);
+ kernel_embree_setup_rayhit(*ray, ray_hit, visibility);
rtcIntersect1(kernel_data.bvh.scene, &rtc_ctx.context, &ray_hit);
if (ray_hit.hit.geomID != RTC_INVALID_GEOMETRY_ID &&
ray_hit.hit.primID != RTC_INVALID_GEOMETRY_ID) {
@@ -206,46 +212,33 @@ ccl_device_intersect bool scene_intersect(KernelGlobals *kg,
}
return false;
}
-#endif /* __EMBREE__ */
-#ifdef __OBJECT_MOTION__
+# endif /* __EMBREE__ */
+
+# ifdef __OBJECT_MOTION__
if (kernel_data.bvh.have_motion) {
-# ifdef __HAIR__
- if (kernel_data.bvh.have_curves)
- return bvh_intersect_hair_motion(kg, &ray, isect, visibility, lcg_state, difl, extmax);
-# endif /* __HAIR__ */
+# ifdef __HAIR__
+ if (kernel_data.bvh.have_curves) {
+ return bvh_intersect_hair_motion(kg, ray, isect, visibility);
+ }
+# endif /* __HAIR__ */
- return bvh_intersect_motion(kg, &ray, isect, visibility);
+ return bvh_intersect_motion(kg, ray, isect, visibility);
}
-#endif /* __OBJECT_MOTION__ */
-
-#ifdef __HAIR__
- if (kernel_data.bvh.have_curves)
- return bvh_intersect_hair(kg, &ray, isect, visibility, lcg_state, difl, extmax);
-#endif /* __HAIR__ */
-
-#ifdef __KERNEL_CPU__
-
-# ifdef __INSTANCING__
- if (kernel_data.bvh.have_instancing)
- return bvh_intersect_instancing(kg, &ray, isect, visibility);
-# endif /* __INSTANCING__ */
+# endif /* __OBJECT_MOTION__ */
- return bvh_intersect(kg, &ray, isect, visibility);
-#else /* __KERNEL_CPU__ */
-
-# ifdef __INSTANCING__
- return bvh_intersect_instancing(kg, &ray, isect, visibility);
-# else
- return bvh_intersect(kg, &ray, isect, visibility);
-# endif /* __INSTANCING__ */
+# ifdef __HAIR__
+ if (kernel_data.bvh.have_curves) {
+ return bvh_intersect_hair(kg, ray, isect, visibility);
+ }
+# endif /* __HAIR__ */
-#endif /* __KERNEL_CPU__ */
+ return bvh_intersect(kg, ray, isect, visibility);
+#endif /* __KERNEL_OPTIX__ */
}
#ifdef __BVH_LOCAL__
-/* Note: ray is passed by value to work around a possible CUDA compiler bug. */
ccl_device_intersect bool scene_intersect_local(KernelGlobals *kg,
- const Ray ray,
+ const Ray *ray,
LocalIntersection *local_isect,
int local_object,
uint *lcg_state,
@@ -253,33 +246,74 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals *kg,
{
PROFILING_INIT(kg, PROFILING_INTERSECT_LOCAL);
- if (!scene_intersect_valid(&ray)) {
- local_isect->num_hits = 0;
+# ifdef __KERNEL_OPTIX__
+ uint p0 = ((uint64_t)lcg_state) & 0xFFFFFFFF;
+ uint p1 = (((uint64_t)lcg_state) >> 32) & 0xFFFFFFFF;
+ uint p2 = ((uint64_t)local_isect) & 0xFFFFFFFF;
+ uint p3 = (((uint64_t)local_isect) >> 32) & 0xFFFFFFFF;
+ uint p4 = local_object;
+ // Is set to zero on miss or if ray is aborted, so can be used as return value
+ uint p5 = max_hits;
+
+ if (local_isect) {
+ local_isect->num_hits = 0; // Initialize hit count to zero
+ }
+ optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
+ ray->P,
+ ray->D,
+ 0.0f,
+ ray->t,
+ ray->time,
+ // Skip curves
+ 0x3,
+ // Need to always call into __anyhit__kernel_optix_local_hit
+ OPTIX_RAY_FLAG_ENFORCE_ANYHIT,
+ 2, // SBT offset for PG_HITL
+ 0,
+ 0,
+ p0,
+ p1,
+ p2,
+ p3,
+ p4,
+ p5);
+
+ return p5;
+# else /* __KERNEL_OPTIX__ */
+ if (!scene_intersect_valid(ray)) {
+ if (local_isect) {
+ local_isect->num_hits = 0;
+ }
return false;
}
-# ifdef __EMBREE__
+
+# ifdef __EMBREE__
if (kernel_data.bvh.scene) {
- CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SSS);
+ const bool has_bvh = !(kernel_tex_fetch(__object_flag, local_object) &
+ SD_OBJECT_TRANSFORM_APPLIED);
+ CCLIntersectContext ctx(
+ kg, has_bvh ? CCLIntersectContext::RAY_SSS : CCLIntersectContext::RAY_LOCAL);
ctx.lcg_state = lcg_state;
ctx.max_hits = max_hits;
- ctx.ss_isect = local_isect;
- local_isect->num_hits = 0;
- ctx.sss_object_id = local_object;
+ ctx.local_isect = local_isect;
+ if (local_isect) {
+ local_isect->num_hits = 0;
+ }
+ ctx.local_object_id = local_object;
IntersectContext rtc_ctx(&ctx);
RTCRay rtc_ray;
- kernel_embree_setup_ray(ray, rtc_ray, PATH_RAY_ALL_VISIBILITY);
-
- /* Get the Embree scene for this intersection. */
- RTCGeometry geom = rtcGetGeometry(kernel_data.bvh.scene, local_object * 2);
- if (geom) {
- float3 P = ray.P;
- float3 dir = ray.D;
- float3 idir = ray.D;
- const int object_flag = kernel_tex_fetch(__object_flag, local_object);
- if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+ kernel_embree_setup_ray(*ray, rtc_ray, PATH_RAY_ALL_VISIBILITY);
+
+ /* If this object has its own BVH, use it. */
+ if (has_bvh) {
+ RTCGeometry geom = rtcGetGeometry(kernel_data.bvh.scene, local_object * 2);
+ if (geom) {
+ float3 P = ray->P;
+ float3 dir = ray->D;
+ float3 idir = ray->D;
Transform ob_itfm;
rtc_ray.tfar = bvh_instance_motion_push(
- kg, local_object, &ray, &P, &dir, &idir, ray.t, &ob_itfm);
+ kg, local_object, ray, &P, &dir, &idir, ray->t, &ob_itfm);
/* bvh_instance_motion_push() returns the inverse transform but
* it's not needed here. */
(void)ob_itfm;
@@ -290,22 +324,30 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals *kg,
rtc_ray.dir_x = dir.x;
rtc_ray.dir_y = dir.y;
rtc_ray.dir_z = dir.z;
+ RTCScene scene = (RTCScene)rtcGetGeometryUserData(geom);
+ kernel_assert(scene);
+ if (scene) {
+ rtcOccluded1(scene, &rtc_ctx.context, &rtc_ray);
+ }
}
- RTCScene scene = (RTCScene)rtcGetGeometryUserData(geom);
- if (scene) {
- rtcOccluded1(scene, &rtc_ctx.context, &rtc_ray);
- }
+ }
+ else {
+ rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray);
}
- return local_isect->num_hits > 0;
+ /* rtcOccluded1 sets tfar to -inf if a hit was found. */
+ return (local_isect && local_isect->num_hits > 0) || (rtc_ray.tfar < 0);
+ ;
}
-# endif /* __EMBREE__ */
-# ifdef __OBJECT_MOTION__
+# endif /* __EMBREE__ */
+
+# ifdef __OBJECT_MOTION__
if (kernel_data.bvh.have_motion) {
- return bvh_intersect_local_motion(kg, &ray, local_isect, local_object, lcg_state, max_hits);
+ return bvh_intersect_local_motion(kg, ray, local_isect, local_object, lcg_state, max_hits);
}
-# endif /* __OBJECT_MOTION__ */
- return bvh_intersect_local(kg, &ray, local_isect, local_object, lcg_state, max_hits);
+# endif /* __OBJECT_MOTION__ */
+ return bvh_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits);
+# endif /* __KERNEL_OPTIX__ */
}
#endif
@@ -319,11 +361,41 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg,
{
PROFILING_INIT(kg, PROFILING_INTERSECT_SHADOW_ALL);
+# ifdef __KERNEL_OPTIX__
+ uint p0 = ((uint64_t)isect) & 0xFFFFFFFF;
+ uint p1 = (((uint64_t)isect) >> 32) & 0xFFFFFFFF;
+ uint p3 = max_hits;
+ uint p4 = visibility;
+ uint p5 = false;
+
+ *num_hits = 0; // Initialize hit count to zero
+ optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
+ ray->P,
+ ray->D,
+ 0.0f,
+ ray->t,
+ ray->time,
+ 0xF,
+ // Need to always call into __anyhit__kernel_optix_shadow_all_hit
+ OPTIX_RAY_FLAG_ENFORCE_ANYHIT,
+ 1, // SBT offset for PG_HITS
+ 0,
+ 0,
+ p0,
+ p1,
+ *num_hits,
+ p3,
+ p4,
+ p5);
+
+ return p5;
+# else /* __KERNEL_OPTIX__ */
if (!scene_intersect_valid(ray)) {
*num_hits = 0;
return false;
}
-# ifdef __EMBREE__
+
+# ifdef __EMBREE__
if (kernel_data.bvh.scene) {
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SHADOW_ALL);
ctx.isect_s = isect;
@@ -331,7 +403,7 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg,
ctx.num_hits = 0;
IntersectContext rtc_ctx(&ctx);
RTCRay rtc_ray;
- kernel_embree_setup_ray(*ray, rtc_ray, PATH_RAY_SHADOW);
+ kernel_embree_setup_ray(*ray, rtc_ray, visibility);
rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray);
if (ctx.num_hits > max_hits) {
@@ -340,32 +412,28 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg,
*num_hits = ctx.num_hits;
return rtc_ray.tfar == -INFINITY;
}
-# endif
-# ifdef __OBJECT_MOTION__
+# endif /* __EMBREE__ */
+
+# ifdef __OBJECT_MOTION__
if (kernel_data.bvh.have_motion) {
-# ifdef __HAIR__
+# ifdef __HAIR__
if (kernel_data.bvh.have_curves) {
return bvh_intersect_shadow_all_hair_motion(kg, ray, isect, visibility, max_hits, num_hits);
}
-# endif /* __HAIR__ */
+# endif /* __HAIR__ */
return bvh_intersect_shadow_all_motion(kg, ray, isect, visibility, max_hits, num_hits);
}
-# endif /* __OBJECT_MOTION__ */
+# endif /* __OBJECT_MOTION__ */
-# ifdef __HAIR__
+# ifdef __HAIR__
if (kernel_data.bvh.have_curves) {
return bvh_intersect_shadow_all_hair(kg, ray, isect, visibility, max_hits, num_hits);
}
-# endif /* __HAIR__ */
-
-# ifdef __INSTANCING__
- if (kernel_data.bvh.have_instancing) {
- return bvh_intersect_shadow_all_instancing(kg, ray, isect, visibility, max_hits, num_hits);
- }
-# endif /* __INSTANCING__ */
+# endif /* __HAIR__ */
return bvh_intersect_shadow_all(kg, ray, isect, visibility, max_hits, num_hits);
+# endif /* __KERNEL_OPTIX__ */
}
#endif /* __SHADOW_RECORD_ALL__ */
@@ -377,27 +445,54 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals *kg,
{
PROFILING_INIT(kg, PROFILING_INTERSECT_VOLUME);
+# ifdef __KERNEL_OPTIX__
+ uint p0 = 0;
+ uint p1 = 0;
+ uint p2 = 0;
+ uint p3 = 0;
+ uint p4 = visibility;
+ uint p5 = PRIMITIVE_NONE;
+
+ optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
+ ray->P,
+ ray->D,
+ 0.0f,
+ ray->t,
+ ray->time,
+ // Skip everything but volumes
+ 0x2,
+ OPTIX_RAY_FLAG_NONE,
+ 0, // SBT offset for PG_HITD
+ 0,
+ 0,
+ p0,
+ p1,
+ p2,
+ p3,
+ p4,
+ p5);
+
+ isect->t = __uint_as_float(p0);
+ isect->u = __uint_as_float(p1);
+ isect->v = __uint_as_float(p2);
+ isect->prim = p3;
+ isect->object = p4;
+ isect->type = p5;
+
+ return p5 != PRIMITIVE_NONE;
+# else /* __KERNEL_OPTIX__ */
if (!scene_intersect_valid(ray)) {
return false;
}
-# ifdef __OBJECT_MOTION__
+
+# ifdef __OBJECT_MOTION__
if (kernel_data.bvh.have_motion) {
return bvh_intersect_volume_motion(kg, ray, isect, visibility);
}
-# endif /* __OBJECT_MOTION__ */
-# ifdef __KERNEL_CPU__
-# ifdef __INSTANCING__
- if (kernel_data.bvh.have_instancing)
- return bvh_intersect_volume_instancing(kg, ray, isect, visibility);
-# endif /* __INSTANCING__ */
- return bvh_intersect_volume(kg, ray, isect, visibility);
-# else /* __KERNEL_CPU__ */
-# ifdef __INSTANCING__
- return bvh_intersect_volume_instancing(kg, ray, isect, visibility);
-# else
+# endif /* __OBJECT_MOTION__ */
+
return bvh_intersect_volume(kg, ray, isect, visibility);
-# endif /* __INSTANCING__ */
-# endif /* __KERNEL_CPU__ */
+# endif /* __KERNEL_OPTIX__ */
}
#endif /* __VOLUME__ */
@@ -413,6 +508,7 @@ ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals *kg,
if (!scene_intersect_valid(ray)) {
return false;
}
+
# ifdef __EMBREE__
if (kernel_data.bvh.scene) {
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_VOLUME_ALL);
@@ -423,18 +519,16 @@ ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals *kg,
RTCRay rtc_ray;
kernel_embree_setup_ray(*ray, rtc_ray, visibility);
rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray);
- return rtc_ray.tfar == -INFINITY;
+ return ctx.num_hits;
}
-# endif
+# endif /* __EMBREE__ */
+
# ifdef __OBJECT_MOTION__
if (kernel_data.bvh.have_motion) {
return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits, visibility);
}
# endif /* __OBJECT_MOTION__ */
-# ifdef __INSTANCING__
- if (kernel_data.bvh.have_instancing)
- return bvh_intersect_volume_all_instancing(kg, ray, isect, max_hits, visibility);
-# endif /* __INSTANCING__ */
+
return bvh_intersect_volume_all(kg, ray, isect, max_hits, visibility);
}
#endif /* __VOLUME_RECORD_ALL__ */
diff --git a/intern/cycles/kernel/bvh/bvh_embree.h b/intern/cycles/kernel/bvh/bvh_embree.h
index 661bba54fd4..ca637288bee 100644
--- a/intern/cycles/kernel/bvh/bvh_embree.h
+++ b/intern/cycles/kernel/bvh/bvh_embree.h
@@ -17,9 +17,12 @@
#include <embree3/rtcore_ray.h>
#include <embree3/rtcore_scene.h>
+// clang-format off
#include "kernel/kernel_compat_cpu.h"
#include "kernel/split/kernel_split_data_types.h"
#include "kernel/kernel_globals.h"
+// clang-format on
+
#include "util/util_vector.h"
CCL_NAMESPACE_BEGIN
@@ -28,9 +31,9 @@ struct CCLIntersectContext {
typedef enum {
RAY_REGULAR = 0,
RAY_SHADOW_ALL = 1,
- RAY_SSS = 2,
- RAY_VOLUME_ALL = 3,
-
+ RAY_LOCAL = 2,
+ RAY_SSS = 3,
+ RAY_VOLUME_ALL = 4,
} RayType;
KernelGlobals *kg;
@@ -42,8 +45,8 @@ struct CCLIntersectContext {
int num_hits;
/* for SSS Rays: */
- LocalIntersection *ss_isect;
- int sss_object_id;
+ LocalIntersection *local_isect;
+ int local_object_id;
uint *lcg_state;
CCLIntersectContext(KernelGlobals *kg_, RayType type_)
@@ -53,8 +56,8 @@ struct CCLIntersectContext {
max_hits = 1;
num_hits = 0;
isect_s = NULL;
- ss_isect = NULL;
- sss_object_id = -1;
+ local_isect = NULL;
+ local_object_id = -1;
lcg_state = NULL;
}
};
@@ -121,11 +124,11 @@ ccl_device_inline void kernel_embree_convert_hit(KernelGlobals *kg,
isect->type = kernel_tex_fetch(__prim_type, isect->prim);
}
-ccl_device_inline void kernel_embree_convert_local_hit(KernelGlobals *kg,
- const RTCRay *ray,
- const RTCHit *hit,
- Intersection *isect,
- int local_object_id)
+ccl_device_inline void kernel_embree_convert_sss_hit(KernelGlobals *kg,
+ const RTCRay *ray,
+ const RTCHit *hit,
+ Intersection *isect,
+ int local_object_id)
{
isect->u = 1.0f - hit->v - hit->u;
isect->v = hit->u;
diff --git a/intern/cycles/kernel/bvh/bvh_local.h b/intern/cycles/kernel/bvh/bvh_local.h
index 7a069ef1108..4006c9c1632 100644
--- a/intern/cycles/kernel/bvh/bvh_local.h
+++ b/intern/cycles/kernel/bvh/bvh_local.h
@@ -17,13 +17,6 @@
* limitations under the License.
*/
-#ifdef __QBVH__
-# include "kernel/bvh/qbvh_local.h"
-# ifdef __KERNEL_AVX2__
-# include "kernel/bvh/obvh_local.h"
-# endif
-#endif
-
#if BVH_FEATURE(BVH_HAIR)
# define NODE_INTERSECT bvh_node_intersect
#else
@@ -88,26 +81,6 @@ ccl_device_inline
object = local_object;
}
-#if defined(__KERNEL_SSE2__)
- const shuffle_swap_t shuf_identity = shuffle_swap_identity();
- const shuffle_swap_t shuf_swap = shuffle_swap_swap();
-
- const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
- ssef Psplat[3], idirsplat[3];
-# if BVH_FEATURE(BVH_HAIR)
- ssef tnear(0.0f), tfar(isect_t);
-# endif
- shuffle_swap_t shufflexyz[3];
-
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t);
-
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-#endif
-
/* traversal loop */
do {
do {
@@ -117,33 +90,16 @@ ccl_device_inline
float dist[2];
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-#if !defined(__KERNEL_SSE2__)
traverse_mask = NODE_INTERSECT(kg,
P,
-# if BVH_FEATURE(BVH_HAIR)
+#if BVH_FEATURE(BVH_HAIR)
dir,
-# endif
+#endif
idir,
isect_t,
node_addr,
PATH_RAY_ALL_VISIBILITY,
dist);
-#else // __KERNEL_SSE2__
- traverse_mask = NODE_INTERSECT(kg,
- P,
- dir,
-# if BVH_FEATURE(BVH_HAIR)
- tnear,
- tfar,
-# endif
- tsplat,
- Psplat,
- idirsplat,
- shufflexyz,
- node_addr,
- PATH_RAY_ALL_VISIBILITY,
- dist);
-#endif // __KERNEL_SSE2__
node_addr = __float_as_int(cnodes.z);
node_addr_child1 = __float_as_int(cnodes.w);
@@ -247,20 +203,7 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
uint *lcg_state,
int max_hits)
{
- switch (kernel_data.bvh.bvh_layout) {
-#ifdef __KERNEL_AVX2__
- case BVH_LAYOUT_BVH8:
- return BVH_FUNCTION_FULL_NAME(OBVH)(kg, ray, local_isect, local_object, lcg_state, max_hits);
-#endif
-#ifdef __QBVH__
- case BVH_LAYOUT_BVH4:
- return BVH_FUNCTION_FULL_NAME(QBVH)(kg, ray, local_isect, local_object, lcg_state, max_hits);
-#endif
- case BVH_LAYOUT_BVH2:
- return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, local_isect, local_object, lcg_state, max_hits);
- }
- kernel_assert(!"Should not happen");
- return false;
+ return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, local_isect, local_object, lcg_state, max_hits);
}
#undef BVH_FUNCTION_NAME
diff --git a/intern/cycles/kernel/bvh/bvh_nodes.h b/intern/cycles/kernel/bvh/bvh_nodes.h
index 042630121c8..5367bdb633c 100644
--- a/intern/cycles/kernel/bvh/bvh_nodes.h
+++ b/intern/cycles/kernel/bvh/bvh_nodes.h
@@ -28,7 +28,6 @@ ccl_device_forceinline Transform bvh_unaligned_node_fetch_space(KernelGlobals *k
return space;
}
-#if !defined(__KERNEL_SSE2__)
ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals *kg,
const float3 P,
const float3 idir,
@@ -39,7 +38,9 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals *kg,
{
/* fetch node data */
+#ifdef __VISIBILITY_FLAG__
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
+#endif
float4 node0 = kernel_tex_fetch(__bvh_nodes, node_addr + 1);
float4 node1 = kernel_tex_fetch(__bvh_nodes, node_addr + 2);
float4 node2 = kernel_tex_fetch(__bvh_nodes, node_addr + 3);
@@ -66,74 +67,13 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals *kg,
dist[0] = c0min;
dist[1] = c1min;
-# ifdef __VISIBILITY_FLAG__
+#ifdef __VISIBILITY_FLAG__
/* this visibility test gives a 5% performance hit, how to solve? */
return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) |
(((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0);
-# else
+#else
return ((c0max >= c0min) ? 1 : 0) | ((c1max >= c1min) ? 2 : 0);
-# endif
-}
-
-ccl_device_forceinline int bvh_aligned_node_intersect_robust(KernelGlobals *kg,
- const float3 P,
- const float3 idir,
- const float t,
- const float difl,
- const float extmax,
- const int node_addr,
- const uint visibility,
- float dist[2])
-{
-
- /* fetch node data */
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
- float4 node0 = kernel_tex_fetch(__bvh_nodes, node_addr + 1);
- float4 node1 = kernel_tex_fetch(__bvh_nodes, node_addr + 2);
- float4 node2 = kernel_tex_fetch(__bvh_nodes, node_addr + 3);
-
- /* intersect ray against child nodes */
- float c0lox = (node0.x - P.x) * idir.x;
- float c0hix = (node0.z - P.x) * idir.x;
- float c0loy = (node1.x - P.y) * idir.y;
- float c0hiy = (node1.z - P.y) * idir.y;
- float c0loz = (node2.x - P.z) * idir.z;
- float c0hiz = (node2.z - P.z) * idir.z;
- float c0min = max4(0.0f, min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz));
- float c0max = min4(t, max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz));
-
- float c1lox = (node0.y - P.x) * idir.x;
- float c1hix = (node0.w - P.x) * idir.x;
- float c1loy = (node1.y - P.y) * idir.y;
- float c1hiy = (node1.w - P.y) * idir.y;
- float c1loz = (node2.y - P.z) * idir.z;
- float c1hiz = (node2.w - P.z) * idir.z;
- float c1min = max4(0.0f, min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz));
- float c1max = min4(t, max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz));
-
- if (difl != 0.0f) {
- float hdiff = 1.0f + difl;
- float ldiff = 1.0f - difl;
- if (__float_as_int(cnodes.z) & PATH_RAY_CURVE) {
- c0min = max(ldiff * c0min, c0min - extmax);
- c0max = min(hdiff * c0max, c0max + extmax);
- }
- if (__float_as_int(cnodes.w) & PATH_RAY_CURVE) {
- c1min = max(ldiff * c1min, c1min - extmax);
- c1max = min(hdiff * c1max, c1max + extmax);
- }
- }
-
- dist[0] = c0min;
- dist[1] = c1min;
-
-# ifdef __VISIBILITY_FLAG__
- /* this visibility test gives a 5% performance hit, how to solve? */
- return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) |
- (((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0);
-# else
- return ((c0max >= c0min) ? 1 : 0) | ((c1max >= c1min) ? 2 : 0);
-# endif
+#endif
}
ccl_device_forceinline bool bvh_unaligned_node_intersect_child(KernelGlobals *kg,
@@ -162,41 +102,6 @@ ccl_device_forceinline bool bvh_unaligned_node_intersect_child(KernelGlobals *kg
return tnear <= tfar;
}
-ccl_device_forceinline bool bvh_unaligned_node_intersect_child_robust(KernelGlobals *kg,
- const float3 P,
- const float3 dir,
- const float t,
- const float difl,
- int node_addr,
- int child,
- float dist[2])
-{
- Transform space = bvh_unaligned_node_fetch_space(kg, node_addr, child);
- float3 aligned_dir = transform_direction(&space, dir);
- float3 aligned_P = transform_point(&space, P);
- float3 nrdir = -bvh_inverse_direction(aligned_dir);
- float3 tLowerXYZ = aligned_P * nrdir;
- float3 tUpperXYZ = tLowerXYZ - nrdir;
- const float near_x = min(tLowerXYZ.x, tUpperXYZ.x);
- const float near_y = min(tLowerXYZ.y, tUpperXYZ.y);
- const float near_z = min(tLowerXYZ.z, tUpperXYZ.z);
- const float far_x = max(tLowerXYZ.x, tUpperXYZ.x);
- const float far_y = max(tLowerXYZ.y, tUpperXYZ.y);
- const float far_z = max(tLowerXYZ.z, tUpperXYZ.z);
- const float tnear = max4(0.0f, near_x, near_y, near_z);
- const float tfar = min4(t, far_x, far_y, far_z);
- *dist = tnear;
- if (difl != 0.0f) {
- /* TODO(sergey): Same as for QBVH, needs a proper use. */
- const float round_down = 1.0f - difl;
- const float round_up = 1.0f + difl;
- return round_down * tnear <= round_up * tfar;
- }
- else {
- return tnear <= tfar;
- }
-}
-
ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg,
const float3 P,
const float3 dir,
@@ -207,51 +112,21 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg,
float dist[2])
{
int mask = 0;
+#ifdef __VISIBILITY_FLAG__
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
+#endif
if (bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 0, &dist[0])) {
-# ifdef __VISIBILITY_FLAG__
+#ifdef __VISIBILITY_FLAG__
if ((__float_as_uint(cnodes.x) & visibility))
-# endif
+#endif
{
mask |= 1;
}
}
if (bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 1, &dist[1])) {
-# ifdef __VISIBILITY_FLAG__
+#ifdef __VISIBILITY_FLAG__
if ((__float_as_uint(cnodes.y) & visibility))
-# endif
- {
- mask |= 2;
- }
- }
- return mask;
-}
-
-ccl_device_forceinline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg,
- const float3 P,
- const float3 dir,
- const float3 idir,
- const float t,
- const float difl,
- const float extmax,
- const int node_addr,
- const uint visibility,
- float dist[2])
-{
- int mask = 0;
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
- if (bvh_unaligned_node_intersect_child_robust(kg, P, dir, t, difl, node_addr, 0, &dist[0])) {
-# ifdef __VISIBILITY_FLAG__
- if ((__float_as_uint(cnodes.x) & visibility))
-# endif
- {
- mask |= 1;
- }
- }
- if (bvh_unaligned_node_intersect_child_robust(kg, P, dir, t, difl, node_addr, 1, &dist[1])) {
-# ifdef __VISIBILITY_FLAG__
- if ((__float_as_uint(cnodes.y) & visibility))
-# endif
+#endif
{
mask |= 2;
}
@@ -276,307 +151,3 @@ ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg,
return bvh_aligned_node_intersect(kg, P, idir, t, node_addr, visibility, dist);
}
}
-
-ccl_device_forceinline int bvh_node_intersect_robust(KernelGlobals *kg,
- const float3 P,
- const float3 dir,
- const float3 idir,
- const float t,
- const float difl,
- const float extmax,
- const int node_addr,
- const uint visibility,
- float dist[2])
-{
- float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
- if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
- return bvh_unaligned_node_intersect_robust(
- kg, P, dir, idir, t, difl, extmax, node_addr, visibility, dist);
- }
- else {
- return bvh_aligned_node_intersect_robust(
- kg, P, idir, t, difl, extmax, node_addr, visibility, dist);
- }
-}
-#else /* !defined(__KERNEL_SSE2__) */
-
-int ccl_device_forceinline bvh_aligned_node_intersect(KernelGlobals *kg,
- const float3 &P,
- const float3 &dir,
- const ssef &tsplat,
- const ssef Psplat[3],
- const ssef idirsplat[3],
- const shuffle_swap_t shufflexyz[3],
- const int node_addr,
- const uint visibility,
- float dist[2])
-{
- /* Intersect two child bounding boxes, SSE3 version adapted from Embree */
- const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
-
- /* fetch node data */
- const ssef *bvh_nodes = (ssef *)kg->__bvh_nodes.data + node_addr;
-
- /* intersect ray against child nodes */
- const ssef tminmaxx = (shuffle_swap(bvh_nodes[1], shufflexyz[0]) - Psplat[0]) * idirsplat[0];
- const ssef tminmaxy = (shuffle_swap(bvh_nodes[2], shufflexyz[1]) - Psplat[1]) * idirsplat[1];
- const ssef tminmaxz = (shuffle_swap(bvh_nodes[3], shufflexyz[2]) - Psplat[2]) * idirsplat[2];
-
- /* calculate { c0min, c1min, -c0max, -c1max} */
- ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat));
- const ssef tminmax = minmax ^ pn;
- const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax);
-
- dist[0] = tminmax[0];
- dist[1] = tminmax[1];
-
- int mask = movemask(lrhit);
-
-# ifdef __VISIBILITY_FLAG__
- /* this visibility test gives a 5% performance hit, how to solve? */
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
- int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) |
- (((mask & 2) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0);
- return cmask;
-# else
- return mask & 3;
-# endif
-}
-
-ccl_device_forceinline int bvh_aligned_node_intersect_robust(KernelGlobals *kg,
- const float3 &P,
- const float3 &dir,
- const ssef &tsplat,
- const ssef Psplat[3],
- const ssef idirsplat[3],
- const shuffle_swap_t shufflexyz[3],
- const float difl,
- const float extmax,
- const int nodeAddr,
- const uint visibility,
- float dist[2])
-{
- /* Intersect two child bounding boxes, SSE3 version adapted from Embree */
- const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
-
- /* fetch node data */
- const ssef *bvh_nodes = (ssef *)kg->__bvh_nodes.data + nodeAddr;
-
- /* intersect ray against child nodes */
- const ssef tminmaxx = (shuffle_swap(bvh_nodes[1], shufflexyz[0]) - Psplat[0]) * idirsplat[0];
- const ssef tminmaxy = (shuffle_swap(bvh_nodes[2], shufflexyz[1]) - Psplat[1]) * idirsplat[1];
- const ssef tminmaxz = (shuffle_swap(bvh_nodes[3], shufflexyz[2]) - Psplat[2]) * idirsplat[2];
-
- /* calculate { c0min, c1min, -c0max, -c1max} */
- ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat));
- const ssef tminmax = minmax ^ pn;
-
- if (difl != 0.0f) {
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr + 0);
- float4 *tminmaxview = (float4 *)&tminmax;
- float &c0min = tminmaxview->x, &c1min = tminmaxview->y;
- float &c0max = tminmaxview->z, &c1max = tminmaxview->w;
- float hdiff = 1.0f + difl;
- float ldiff = 1.0f - difl;
- if (__float_as_int(cnodes.x) & PATH_RAY_CURVE) {
- c0min = max(ldiff * c0min, c0min - extmax);
- c0max = min(hdiff * c0max, c0max + extmax);
- }
- if (__float_as_int(cnodes.y) & PATH_RAY_CURVE) {
- c1min = max(ldiff * c1min, c1min - extmax);
- c1max = min(hdiff * c1max, c1max + extmax);
- }
- }
-
- const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax);
-
- dist[0] = tminmax[0];
- dist[1] = tminmax[1];
-
- int mask = movemask(lrhit);
-
-# ifdef __VISIBILITY_FLAG__
- /* this visibility test gives a 5% performance hit, how to solve? */
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr + 0);
- int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) |
- (((mask & 2) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0);
- return cmask;
-# else
- return mask & 3;
-# endif
-}
-
-ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg,
- const float3 P,
- const float3 dir,
- const ssef &isect_near,
- const ssef &isect_far,
- const int node_addr,
- const uint visibility,
- float dist[2])
-{
- Transform space0 = bvh_unaligned_node_fetch_space(kg, node_addr, 0);
- Transform space1 = bvh_unaligned_node_fetch_space(kg, node_addr, 1);
-
- float3 aligned_dir0 = transform_direction(&space0, dir),
- aligned_dir1 = transform_direction(&space1, dir);
- float3 aligned_P0 = transform_point(&space0, P), aligned_P1 = transform_point(&space1, P);
- float3 nrdir0 = -bvh_inverse_direction(aligned_dir0),
- nrdir1 = -bvh_inverse_direction(aligned_dir1);
-
- ssef lower_x = ssef(aligned_P0.x * nrdir0.x, aligned_P1.x * nrdir1.x, 0.0f, 0.0f),
- lower_y = ssef(aligned_P0.y * nrdir0.y, aligned_P1.y * nrdir1.y, 0.0f, 0.0f),
- lower_z = ssef(aligned_P0.z * nrdir0.z, aligned_P1.z * nrdir1.z, 0.0f, 0.0f);
-
- ssef upper_x = lower_x - ssef(nrdir0.x, nrdir1.x, 0.0f, 0.0f),
- upper_y = lower_y - ssef(nrdir0.y, nrdir1.y, 0.0f, 0.0f),
- upper_z = lower_z - ssef(nrdir0.z, nrdir1.z, 0.0f, 0.0f);
-
- ssef tnear_x = min(lower_x, upper_x);
- ssef tnear_y = min(lower_y, upper_y);
- ssef tnear_z = min(lower_z, upper_z);
- ssef tfar_x = max(lower_x, upper_x);
- ssef tfar_y = max(lower_y, upper_y);
- ssef tfar_z = max(lower_z, upper_z);
-
- const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
- const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
- sseb vmask = tnear <= tfar;
- dist[0] = tnear.f[0];
- dist[1] = tnear.f[1];
-
- int mask = (int)movemask(vmask);
-
-# ifdef __VISIBILITY_FLAG__
- /* this visibility test gives a 5% performance hit, how to solve? */
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
- int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) |
- (((mask & 2) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0);
- return cmask;
-# else
- return mask & 3;
-# endif
-}
-
-ccl_device_forceinline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg,
- const float3 P,
- const float3 dir,
- const ssef &isect_near,
- const ssef &isect_far,
- const float difl,
- const int node_addr,
- const uint visibility,
- float dist[2])
-{
- Transform space0 = bvh_unaligned_node_fetch_space(kg, node_addr, 0);
- Transform space1 = bvh_unaligned_node_fetch_space(kg, node_addr, 1);
-
- float3 aligned_dir0 = transform_direction(&space0, dir),
- aligned_dir1 = transform_direction(&space1, dir);
- float3 aligned_P0 = transform_point(&space0, P), aligned_P1 = transform_point(&space1, P);
- float3 nrdir0 = -bvh_inverse_direction(aligned_dir0),
- nrdir1 = -bvh_inverse_direction(aligned_dir1);
-
- ssef lower_x = ssef(aligned_P0.x * nrdir0.x, aligned_P1.x * nrdir1.x, 0.0f, 0.0f),
- lower_y = ssef(aligned_P0.y * nrdir0.y, aligned_P1.y * nrdir1.y, 0.0f, 0.0f),
- lower_z = ssef(aligned_P0.z * nrdir0.z, aligned_P1.z * nrdir1.z, 0.0f, 0.0f);
-
- ssef upper_x = lower_x - ssef(nrdir0.x, nrdir1.x, 0.0f, 0.0f),
- upper_y = lower_y - ssef(nrdir0.y, nrdir1.y, 0.0f, 0.0f),
- upper_z = lower_z - ssef(nrdir0.z, nrdir1.z, 0.0f, 0.0f);
-
- ssef tnear_x = min(lower_x, upper_x);
- ssef tnear_y = min(lower_y, upper_y);
- ssef tnear_z = min(lower_z, upper_z);
- ssef tfar_x = max(lower_x, upper_x);
- ssef tfar_y = max(lower_y, upper_y);
- ssef tfar_z = max(lower_z, upper_z);
-
- const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
- const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
- sseb vmask;
- if (difl != 0.0f) {
- const float round_down = 1.0f - difl;
- const float round_up = 1.0f + difl;
- vmask = round_down * tnear <= round_up * tfar;
- }
- else {
- vmask = tnear <= tfar;
- }
-
- dist[0] = tnear.f[0];
- dist[1] = tnear.f[1];
-
- int mask = (int)movemask(vmask);
-
-# ifdef __VISIBILITY_FLAG__
- /* this visibility test gives a 5% performance hit, how to solve? */
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
- int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) |
- (((mask & 2) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0);
- return cmask;
-# else
- return mask & 3;
-# endif
-}
-
-ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg,
- const float3 &P,
- const float3 &dir,
- const ssef &isect_near,
- const ssef &isect_far,
- const ssef &tsplat,
- const ssef Psplat[3],
- const ssef idirsplat[3],
- const shuffle_swap_t shufflexyz[3],
- const int node_addr,
- const uint visibility,
- float dist[2])
-{
- float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
- if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
- return bvh_unaligned_node_intersect(
- kg, P, dir, isect_near, isect_far, node_addr, visibility, dist);
- }
- else {
- return bvh_aligned_node_intersect(
- kg, P, dir, tsplat, Psplat, idirsplat, shufflexyz, node_addr, visibility, dist);
- }
-}
-
-ccl_device_forceinline int bvh_node_intersect_robust(KernelGlobals *kg,
- const float3 &P,
- const float3 &dir,
- const ssef &isect_near,
- const ssef &isect_far,
- const ssef &tsplat,
- const ssef Psplat[3],
- const ssef idirsplat[3],
- const shuffle_swap_t shufflexyz[3],
- const float difl,
- const float extmax,
- const int node_addr,
- const uint visibility,
- float dist[2])
-{
- float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
- if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
- return bvh_unaligned_node_intersect_robust(
- kg, P, dir, isect_near, isect_far, difl, node_addr, visibility, dist);
- }
- else {
- return bvh_aligned_node_intersect_robust(kg,
- P,
- dir,
- tsplat,
- Psplat,
- idirsplat,
- shufflexyz,
- difl,
- extmax,
- node_addr,
- visibility,
- dist);
- }
-}
-#endif /* !defined(__KERNEL_SSE2__) */
diff --git a/intern/cycles/kernel/bvh/bvh_shadow_all.h b/intern/cycles/kernel/bvh/bvh_shadow_all.h
index b362779549c..dccd257d2de 100644
--- a/intern/cycles/kernel/bvh/bvh_shadow_all.h
+++ b/intern/cycles/kernel/bvh/bvh_shadow_all.h
@@ -17,13 +17,6 @@
* limitations under the License.
*/
-#ifdef __QBVH__
-# include "kernel/bvh/qbvh_shadow_all.h"
-# ifdef __KERNEL_AVX2__
-# include "kernel/bvh/obvh_shadow_all.h"
-# endif
-#endif
-
#if BVH_FEATURE(BVH_HAIR)
# define NODE_INTERSECT bvh_node_intersect
#else
@@ -34,7 +27,6 @@
* enabled/disabled. This way we can compile optimized versions for each case
* without new features slowing things down.
*
- * BVH_INSTANCING: object instancing
* BVH_HAIR: hair curve rendering
* BVH_MOTION: motion blur rendering
*/
@@ -76,33 +68,11 @@ ccl_device_inline
Transform ob_itfm;
#endif
-#if BVH_FEATURE(BVH_INSTANCING)
int num_hits_in_instance = 0;
-#endif
*num_hits = 0;
isect_array->t = tmax;
-#if defined(__KERNEL_SSE2__)
- const shuffle_swap_t shuf_identity = shuffle_swap_identity();
- const shuffle_swap_t shuf_swap = shuffle_swap_swap();
-
- const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
- ssef Psplat[3], idirsplat[3];
-# if BVH_FEATURE(BVH_HAIR)
- ssef tnear(0.0f), tfar(isect_t);
-# endif
- shuffle_swap_t shufflexyz[3];
-
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t);
-
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-#endif /* __KERNEL_SSE2__ */
-
/* traversal loop */
do {
do {
@@ -112,33 +82,16 @@ ccl_device_inline
float dist[2];
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-#if !defined(__KERNEL_SSE2__)
traverse_mask = NODE_INTERSECT(kg,
P,
-# if BVH_FEATURE(BVH_HAIR)
+#if BVH_FEATURE(BVH_HAIR)
dir,
-# endif
+#endif
idir,
isect_t,
node_addr,
visibility,
dist);
-#else // __KERNEL_SSE2__
- traverse_mask = NODE_INTERSECT(kg,
- P,
- dir,
-# if BVH_FEATURE(BVH_HAIR)
- tnear,
- tfar,
-# endif
- tsplat,
- Psplat,
- idirsplat,
- shufflexyz,
- node_addr,
- visibility,
- dist);
-#endif // __KERNEL_SSE2__
node_addr = __float_as_int(cnodes.z);
node_addr_child1 = __float_as_int(cnodes.w);
@@ -174,9 +127,7 @@ ccl_device_inline
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
int prim_addr = __float_as_int(leaf.x);
-#if BVH_FEATURE(BVH_INSTANCING)
if (prim_addr >= 0) {
-#endif
const int prim_addr2 = __float_as_int(leaf.y);
const uint type = __float_as_int(leaf.w);
const uint p_type = type & PRIMITIVE_ALL;
@@ -207,37 +158,13 @@ ccl_device_inline
}
#endif
#if BVH_FEATURE(BVH_HAIR)
- case PRIMITIVE_CURVE:
- case PRIMITIVE_MOTION_CURVE: {
+ case PRIMITIVE_CURVE_THICK:
+ case PRIMITIVE_MOTION_CURVE_THICK:
+ case PRIMITIVE_CURVE_RIBBON:
+ case PRIMITIVE_MOTION_CURVE_RIBBON: {
const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
- if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
- hit = cardinal_curve_intersect(kg,
- isect_array,
- P,
- dir,
- visibility,
- object,
- prim_addr,
- ray->time,
- curve_type,
- NULL,
- 0,
- 0);
- }
- else {
- hit = curve_intersect(kg,
- isect_array,
- P,
- dir,
- visibility,
- object,
- prim_addr,
- ray->time,
- curve_type,
- NULL,
- 0,
- 0);
- }
+ hit = curve_intersect(
+ kg, isect_array, P, dir, visibility, object, prim_addr, ray->time, curve_type);
break;
}
#endif
@@ -282,9 +209,7 @@ ccl_device_inline
/* move on to next entry in intersections array */
isect_array++;
(*num_hits)++;
-#if BVH_FEATURE(BVH_INSTANCING)
num_hits_in_instance++;
-#endif
isect_array->t = isect_t;
}
@@ -292,32 +217,19 @@ ccl_device_inline
prim_addr++;
}
}
-#if BVH_FEATURE(BVH_INSTANCING)
else {
/* instance push */
object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
-# if BVH_FEATURE(BVH_MOTION)
+#if BVH_FEATURE(BVH_MOTION)
isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
-# else
+#else
isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
-# endif
+#endif
num_hits_in_instance = 0;
isect_array->t = isect_t;
-# if defined(__KERNEL_SSE2__)
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
-# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect_t);
-# endif
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-# endif
-
++stack_ptr;
kernel_assert(stack_ptr < BVH_STACK_SIZE);
traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
@@ -325,10 +237,8 @@ ccl_device_inline
node_addr = kernel_tex_fetch(__object_node, object);
}
}
-#endif /* FEATURE(BVH_INSTANCING) */
} while (node_addr != ENTRYPOINT_SENTINEL);
-#if BVH_FEATURE(BVH_INSTANCING)
if (stack_ptr >= 0) {
kernel_assert(object != OBJECT_NONE);
@@ -336,11 +246,11 @@ ccl_device_inline
if (num_hits_in_instance) {
float t_fac;
-# if BVH_FEATURE(BVH_MOTION)
+#if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
-# else
+#else
bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
-# endif
+#endif
/* scale isect->t to adjust for instancing */
for (int i = 0; i < num_hits_in_instance; i++) {
@@ -348,33 +258,20 @@ ccl_device_inline
}
}
else {
-# if BVH_FEATURE(BVH_MOTION)
+#if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
-# else
+#else
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
-# endif
+#endif
}
isect_t = tmax;
isect_array->t = isect_t;
-# if defined(__KERNEL_SSE2__)
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
-# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect_t);
-# endif
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-# endif
-
object = OBJECT_NONE;
node_addr = traversal_stack[stack_ptr];
--stack_ptr;
}
-#endif /* FEATURE(BVH_INSTANCING) */
} while (node_addr != ENTRYPOINT_SENTINEL);
return false;
@@ -387,20 +284,7 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
const uint max_hits,
uint *num_hits)
{
- switch (kernel_data.bvh.bvh_layout) {
-#ifdef __KERNEL_AVX2__
- case BVH_LAYOUT_BVH8:
- return BVH_FUNCTION_FULL_NAME(OBVH)(kg, ray, isect_array, visibility, max_hits, num_hits);
-#endif
-#ifdef __QBVH__
- case BVH_LAYOUT_BVH4:
- return BVH_FUNCTION_FULL_NAME(QBVH)(kg, ray, isect_array, visibility, max_hits, num_hits);
-#endif
- case BVH_LAYOUT_BVH2:
- return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect_array, visibility, max_hits, num_hits);
- }
- kernel_assert(!"Should not happen");
- return false;
+ return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect_array, visibility, max_hits, num_hits);
}
#undef BVH_FUNCTION_NAME
diff --git a/intern/cycles/kernel/bvh/bvh_traversal.h b/intern/cycles/kernel/bvh/bvh_traversal.h
index 34a06d003bb..8b2699ab807 100644
--- a/intern/cycles/kernel/bvh/bvh_traversal.h
+++ b/intern/cycles/kernel/bvh/bvh_traversal.h
@@ -17,42 +17,24 @@
* limitations under the License.
*/
-#ifdef __QBVH__
-# include "kernel/bvh/qbvh_traversal.h"
-#endif
-#ifdef __KERNEL_AVX2__
-# include "kernel/bvh/obvh_traversal.h"
-#endif
-
#if BVH_FEATURE(BVH_HAIR)
# define NODE_INTERSECT bvh_node_intersect
-# define NODE_INTERSECT_ROBUST bvh_node_intersect_robust
#else
# define NODE_INTERSECT bvh_aligned_node_intersect
-# define NODE_INTERSECT_ROBUST bvh_aligned_node_intersect_robust
#endif
/* This is a template BVH traversal function, where various features can be
* enabled/disabled. This way we can compile optimized versions for each case
* without new features slowing things down.
*
- * BVH_INSTANCING: object instancing
* BVH_HAIR: hair curve rendering
- * BVH_HAIR_MINIMUM_WIDTH: hair curve rendering with minimum width
* BVH_MOTION: motion blur rendering
*/
ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
const Ray *ray,
Intersection *isect,
- const uint visibility
-#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
- ,
- uint *lcg_state,
- float difl,
- float extmax
-#endif
-)
+ const uint visibility)
{
/* todo:
* - test if pushing distance on the stack helps (for non shadow rays)
@@ -87,26 +69,6 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
BVH_DEBUG_INIT();
-#if defined(__KERNEL_SSE2__)
- const shuffle_swap_t shuf_identity = shuffle_swap_identity();
- const shuffle_swap_t shuf_swap = shuffle_swap_swap();
-
- const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
- ssef Psplat[3], idirsplat[3];
-# if BVH_FEATURE(BVH_HAIR)
- ssef tnear(0.0f), tfar(isect->t);
-# endif
- shuffle_swap_t shufflexyz[3];
-
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- ssef tsplat(0.0f, 0.0f, -isect->t, -isect->t);
-
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-#endif
-
/* traversal loop */
do {
do {
@@ -116,75 +78,18 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
float dist[2];
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-#if !defined(__KERNEL_SSE2__)
-# if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
- if (difl != 0.0f) {
- traverse_mask = NODE_INTERSECT_ROBUST(kg,
- P,
-# if BVH_FEATURE(BVH_HAIR)
- dir,
-# endif
- idir,
- isect->t,
- difl,
- extmax,
- node_addr,
- visibility,
- dist);
- }
- else
-# endif
{
traverse_mask = NODE_INTERSECT(kg,
P,
-# if BVH_FEATURE(BVH_HAIR)
+#if BVH_FEATURE(BVH_HAIR)
dir,
-# endif
+#endif
idir,
isect->t,
node_addr,
visibility,
dist);
}
-#else // __KERNEL_SSE2__
-# if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
- if (difl != 0.0f) {
- traverse_mask = NODE_INTERSECT_ROBUST(kg,
- P,
- dir,
-# if BVH_FEATURE(BVH_HAIR)
- tnear,
- tfar,
-# endif
- tsplat,
- Psplat,
- idirsplat,
- shufflexyz,
- difl,
- extmax,
- node_addr,
- visibility,
- dist);
- }
- else
-# endif
- {
- traverse_mask = NODE_INTERSECT(kg,
- P,
- dir,
-# if BVH_FEATURE(BVH_HAIR)
- tnear,
- tfar,
-# endif
- tsplat,
- Psplat,
- idirsplat,
- shufflexyz,
- node_addr,
- visibility,
- dist);
- }
-#endif // __KERNEL_SSE2__
node_addr = __float_as_int(cnodes.z);
node_addr_child1 = __float_as_int(cnodes.w);
@@ -221,9 +126,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
int prim_addr = __float_as_int(leaf.x);
-#if BVH_FEATURE(BVH_INSTANCING)
if (prim_addr >= 0) {
-#endif
const int prim_addr2 = __float_as_int(leaf.y);
const uint type = __float_as_int(leaf.w);
@@ -239,17 +142,8 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
if (triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr)) {
/* shadow ray early termination */
-#if defined(__KERNEL_SSE2__)
if (visibility & PATH_RAY_SHADOW_OPAQUE)
return true;
- tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect->t);
-# endif
-#else
- if (visibility & PATH_RAY_SHADOW_OPAQUE)
- return true;
-#endif
}
}
break;
@@ -262,71 +156,28 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
if (motion_triangle_intersect(
kg, isect, P, dir, ray->time, visibility, object, prim_addr)) {
/* shadow ray early termination */
-# if defined(__KERNEL_SSE2__)
- if (visibility & PATH_RAY_SHADOW_OPAQUE)
- return true;
- tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect->t);
-# endif
-# else
if (visibility & PATH_RAY_SHADOW_OPAQUE)
return true;
-# endif
}
}
break;
}
#endif /* BVH_FEATURE(BVH_MOTION) */
#if BVH_FEATURE(BVH_HAIR)
- case PRIMITIVE_CURVE:
- case PRIMITIVE_MOTION_CURVE: {
+ case PRIMITIVE_CURVE_THICK:
+ case PRIMITIVE_MOTION_CURVE_THICK:
+ case PRIMITIVE_CURVE_RIBBON:
+ case PRIMITIVE_MOTION_CURVE_RIBBON: {
for (; prim_addr < prim_addr2; prim_addr++) {
BVH_DEBUG_NEXT_INTERSECTION();
const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL));
- bool hit;
- if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
- hit = cardinal_curve_intersect(kg,
- isect,
- P,
- dir,
- visibility,
- object,
- prim_addr,
- ray->time,
- curve_type,
- lcg_state,
- difl,
- extmax);
- }
- else {
- hit = curve_intersect(kg,
- isect,
- P,
- dir,
- visibility,
- object,
- prim_addr,
- ray->time,
- curve_type,
- lcg_state,
- difl,
- extmax);
- }
+ const bool hit = curve_intersect(
+ kg, isect, P, dir, visibility, object, prim_addr, ray->time, curve_type);
if (hit) {
/* shadow ray early termination */
-# if defined(__KERNEL_SSE2__)
if (visibility & PATH_RAY_SHADOW_OPAQUE)
return true;
- tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect->t);
-# endif
-# else
- if (visibility & PATH_RAY_SHADOW_OPAQUE)
- return true;
-# endif
}
}
break;
@@ -334,30 +185,16 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
#endif /* BVH_FEATURE(BVH_HAIR) */
}
}
-#if BVH_FEATURE(BVH_INSTANCING)
else {
/* instance push */
object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
-# if BVH_FEATURE(BVH_MOTION)
+#if BVH_FEATURE(BVH_MOTION)
isect->t = bvh_instance_motion_push(
kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
-# else
+#else
isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
-# endif
-
-# if defined(__KERNEL_SSE2__)
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect->t);
-# endif
-
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-# endif
+#endif
++stack_ptr;
kernel_assert(stack_ptr < BVH_STACK_SIZE);
@@ -368,38 +205,22 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
BVH_DEBUG_NEXT_INSTANCE();
}
}
-#endif /* FEATURE(BVH_INSTANCING) */
} while (node_addr != ENTRYPOINT_SENTINEL);
-#if BVH_FEATURE(BVH_INSTANCING)
if (stack_ptr >= 0) {
kernel_assert(object != OBJECT_NONE);
/* instance pop */
-# if BVH_FEATURE(BVH_MOTION)
+#if BVH_FEATURE(BVH_MOTION)
isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
-# else
+#else
isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
-# endif
-
-# if defined(__KERNEL_SSE2__)
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect->t);
-# endif
-
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-# endif
+#endif
object = OBJECT_NONE;
node_addr = traversal_stack[stack_ptr];
--stack_ptr;
}
-#endif /* FEATURE(BVH_INSTANCING) */
} while (node_addr != ENTRYPOINT_SENTINEL);
return (isect->prim != PRIM_NONE);
@@ -408,62 +229,11 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
const Ray *ray,
Intersection *isect,
- const uint visibility
-#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
- ,
- uint *lcg_state,
- float difl,
- float extmax
-#endif
-)
+ const uint visibility)
{
- switch (kernel_data.bvh.bvh_layout) {
-#ifdef __KERNEL_AVX2__
- case BVH_LAYOUT_BVH8:
- return BVH_FUNCTION_FULL_NAME(OBVH)(kg,
- ray,
- isect,
- visibility
-# if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
- ,
- lcg_state,
- difl,
- extmax
-# endif
- );
-#endif
-#ifdef __QBVH__
- case BVH_LAYOUT_BVH4:
- return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
- ray,
- isect,
- visibility
-# if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
- ,
- lcg_state,
- difl,
- extmax
-# endif
- );
-#endif /* __QBVH__ */
- case BVH_LAYOUT_BVH2:
- return BVH_FUNCTION_FULL_NAME(BVH)(kg,
- ray,
- isect,
- visibility
-#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
- ,
- lcg_state,
- difl,
- extmax
-#endif
- );
- }
- kernel_assert(!"Should not happen");
- return false;
+ return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect, visibility);
}
#undef BVH_FUNCTION_NAME
#undef BVH_FUNCTION_FEATURES
#undef NODE_INTERSECT
-#undef NODE_INTERSECT_ROBUST
diff --git a/intern/cycles/kernel/bvh/bvh_types.h b/intern/cycles/kernel/bvh/bvh_types.h
index 16f3b03f842..b173568266b 100644
--- a/intern/cycles/kernel/bvh/bvh_types.h
+++ b/intern/cycles/kernel/bvh/bvh_types.h
@@ -31,14 +31,10 @@ CCL_NAMESPACE_BEGIN
/* 64 object BVH + 64 mesh BVH + 64 object node splitting */
#define BVH_STACK_SIZE 192
-#define BVH_QSTACK_SIZE 384
-#define BVH_OSTACK_SIZE 768
/* BVH intersection function variations */
-#define BVH_INSTANCING 1
-#define BVH_MOTION 2
-#define BVH_HAIR 4
-#define BVH_HAIR_MINIMUM_WIDTH 8
+#define BVH_MOTION 1
+#define BVH_HAIR 2
#define BVH_NAME_JOIN(x, y) x##_##y
#define BVH_NAME_EVAL(x, y) BVH_NAME_JOIN(x, y)
diff --git a/intern/cycles/kernel/bvh/bvh_volume.h b/intern/cycles/kernel/bvh/bvh_volume.h
index c83b0d783f4..1f2ea47269b 100644
--- a/intern/cycles/kernel/bvh/bvh_volume.h
+++ b/intern/cycles/kernel/bvh/bvh_volume.h
@@ -17,13 +17,6 @@
* limitations under the License.
*/
-#ifdef __QBVH__
-# include "kernel/bvh/qbvh_volume.h"
-# ifdef __KERNEL_AVX2__
-# include "kernel/bvh/obvh_volume.h"
-# endif
-#endif
-
#if BVH_FEATURE(BVH_HAIR)
# define NODE_INTERSECT bvh_node_intersect
#else
@@ -34,7 +27,6 @@
* various features can be enabled/disabled. This way we can compile optimized
* versions for each case without new features slowing things down.
*
- * BVH_INSTANCING: object instancing
* BVH_MOTION: motion blur rendering
*/
@@ -79,26 +71,6 @@ ccl_device_inline
isect->prim = PRIM_NONE;
isect->object = OBJECT_NONE;
-#if defined(__KERNEL_SSE2__)
- const shuffle_swap_t shuf_identity = shuffle_swap_identity();
- const shuffle_swap_t shuf_swap = shuffle_swap_swap();
-
- const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
- ssef Psplat[3], idirsplat[3];
-# if BVH_FEATURE(BVH_HAIR)
- ssef tnear(0.0f), tfar(isect->t);
-# endif
- shuffle_swap_t shufflexyz[3];
-
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- ssef tsplat(0.0f, 0.0f, -isect->t, -isect->t);
-
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-#endif
-
/* traversal loop */
do {
do {
@@ -108,33 +80,16 @@ ccl_device_inline
float dist[2];
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-#if !defined(__KERNEL_SSE2__)
traverse_mask = NODE_INTERSECT(kg,
P,
-# if BVH_FEATURE(BVH_HAIR)
+#if BVH_FEATURE(BVH_HAIR)
dir,
-# endif
+#endif
idir,
isect->t,
node_addr,
visibility,
dist);
-#else // __KERNEL_SSE2__
- traverse_mask = NODE_INTERSECT(kg,
- P,
- dir,
-# if BVH_FEATURE(BVH_HAIR)
- tnear,
- tfar,
-# endif
- tsplat,
- Psplat,
- idirsplat,
- shufflexyz,
- node_addr,
- visibility,
- dist);
-#endif // __KERNEL_SSE2__
node_addr = __float_as_int(cnodes.z);
node_addr_child1 = __float_as_int(cnodes.w);
@@ -170,9 +125,7 @@ ccl_device_inline
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
int prim_addr = __float_as_int(leaf.x);
-#if BVH_FEATURE(BVH_INSTANCING)
if (prim_addr >= 0) {
-#endif
const int prim_addr2 = __float_as_int(leaf.y);
const uint type = __float_as_int(leaf.w);
@@ -222,31 +175,17 @@ ccl_device_inline
}
}
}
-#if BVH_FEATURE(BVH_INSTANCING)
else {
/* instance push */
object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
int object_flag = kernel_tex_fetch(__object_flag, object);
if (object_flag & SD_OBJECT_HAS_VOLUME) {
-# if BVH_FEATURE(BVH_MOTION)
+#if BVH_FEATURE(BVH_MOTION)
isect->t = bvh_instance_motion_push(
kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
-# else
+#else
isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
-# endif
-
-# if defined(__KERNEL_SSE2__)
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect->t);
-# endif
-
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-# endif
+#endif
++stack_ptr;
kernel_assert(stack_ptr < BVH_STACK_SIZE);
@@ -262,38 +201,22 @@ ccl_device_inline
}
}
}
-#endif /* FEATURE(BVH_INSTANCING) */
} while (node_addr != ENTRYPOINT_SENTINEL);
-#if BVH_FEATURE(BVH_INSTANCING)
if (stack_ptr >= 0) {
kernel_assert(object != OBJECT_NONE);
/* instance pop */
-# if BVH_FEATURE(BVH_MOTION)
+#if BVH_FEATURE(BVH_MOTION)
isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
-# else
+#else
isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
-# endif
-
-# if defined(__KERNEL_SSE2__)
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect->t);
-# endif
-
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-# endif
+#endif
object = OBJECT_NONE;
node_addr = traversal_stack[stack_ptr];
--stack_ptr;
}
-#endif /* FEATURE(BVH_MOTION) */
} while (node_addr != ENTRYPOINT_SENTINEL);
return (isect->prim != PRIM_NONE);
@@ -304,20 +227,7 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
Intersection *isect,
const uint visibility)
{
- switch (kernel_data.bvh.bvh_layout) {
-#ifdef __KERNEL_AVX2__
- case BVH_LAYOUT_BVH8:
- return BVH_FUNCTION_FULL_NAME(OBVH)(kg, ray, isect, visibility);
-#endif
-#ifdef __QBVH__
- case BVH_LAYOUT_BVH4:
- return BVH_FUNCTION_FULL_NAME(QBVH)(kg, ray, isect, visibility);
-#endif
- case BVH_LAYOUT_BVH2:
- return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect, visibility);
- }
- kernel_assert(!"Should not happen");
- return false;
+ return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect, visibility);
}
#undef BVH_FUNCTION_NAME
diff --git a/intern/cycles/kernel/bvh/bvh_volume_all.h b/intern/cycles/kernel/bvh/bvh_volume_all.h
index ae8c4d12e8a..a8664cc4331 100644
--- a/intern/cycles/kernel/bvh/bvh_volume_all.h
+++ b/intern/cycles/kernel/bvh/bvh_volume_all.h
@@ -17,13 +17,6 @@
* limitations under the License.
*/
-#ifdef __QBVH__
-# include "kernel/bvh/qbvh_volume_all.h"
-# ifdef __KERNEL_AVX2__
-# include "kernel/bvh/obvh_volume_all.h"
-# endif
-#endif
-
#if BVH_FEATURE(BVH_HAIR)
# define NODE_INTERSECT bvh_node_intersect
#else
@@ -34,7 +27,6 @@
* various features can be enabled/disabled. This way we can compile optimized
* versions for each case without new features slowing things down.
*
- * BVH_INSTANCING: object instancing
* BVH_MOTION: motion blur rendering
*/
@@ -76,33 +68,11 @@ ccl_device_inline
Transform ob_itfm;
#endif
-#if BVH_FEATURE(BVH_INSTANCING)
int num_hits_in_instance = 0;
-#endif
uint num_hits = 0;
isect_array->t = tmax;
-#if defined(__KERNEL_SSE2__)
- const shuffle_swap_t shuf_identity = shuffle_swap_identity();
- const shuffle_swap_t shuf_swap = shuffle_swap_swap();
-
- const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
- ssef Psplat[3], idirsplat[3];
-# if BVH_FEATURE(BVH_HAIR)
- ssef tnear(0.0f), tfar(isect_t);
-# endif
- shuffle_swap_t shufflexyz[3];
-
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t);
-
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-#endif /* __KERNEL_SSE2__ */
-
/* traversal loop */
do {
do {
@@ -112,33 +82,16 @@ ccl_device_inline
float dist[2];
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-#if !defined(__KERNEL_SSE2__)
traverse_mask = NODE_INTERSECT(kg,
P,
-# if BVH_FEATURE(BVH_HAIR)
+#if BVH_FEATURE(BVH_HAIR)
dir,
-# endif
+#endif
idir,
isect_t,
node_addr,
visibility,
dist);
-#else // __KERNEL_SSE2__
- traverse_mask = NODE_INTERSECT(kg,
- P,
- dir,
-# if BVH_FEATURE(BVH_HAIR)
- tnear,
- tfar,
-# endif
- tsplat,
- Psplat,
- idirsplat,
- shufflexyz,
- node_addr,
- visibility,
- dist);
-#endif // __KERNEL_SSE2__
node_addr = __float_as_int(cnodes.z);
node_addr_child1 = __float_as_int(cnodes.w);
@@ -174,9 +127,7 @@ ccl_device_inline
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
int prim_addr = __float_as_int(leaf.x);
-#if BVH_FEATURE(BVH_INSTANCING)
if (prim_addr >= 0) {
-#endif
const int prim_addr2 = __float_as_int(leaf.y);
const uint type = __float_as_int(leaf.w);
bool hit;
@@ -204,25 +155,21 @@ ccl_device_inline
/* Move on to next entry in intersections array. */
isect_array++;
num_hits++;
-#if BVH_FEATURE(BVH_INSTANCING)
num_hits_in_instance++;
-#endif
isect_array->t = isect_t;
if (num_hits == max_hits) {
-#if BVH_FEATURE(BVH_INSTANCING)
if (object != OBJECT_NONE) {
-# if BVH_FEATURE(BVH_MOTION)
+#if BVH_FEATURE(BVH_MOTION)
float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
-# else
+#else
Transform itfm = object_fetch_transform(
kg, object, OBJECT_INVERSE_TRANSFORM);
float t_fac = 1.0f / len(transform_direction(&itfm, dir));
-# endif
+#endif
for (int i = 0; i < num_hits_in_instance; i++) {
(isect_array - i - 1)->t *= t_fac;
}
}
-#endif /* BVH_FEATURE(BVH_INSTANCING) */
return num_hits;
}
}
@@ -248,25 +195,21 @@ ccl_device_inline
/* Move on to next entry in intersections array. */
isect_array++;
num_hits++;
-# if BVH_FEATURE(BVH_INSTANCING)
num_hits_in_instance++;
-# endif
isect_array->t = isect_t;
if (num_hits == max_hits) {
-# if BVH_FEATURE(BVH_INSTANCING)
if (object != OBJECT_NONE) {
-# if BVH_FEATURE(BVH_MOTION)
+# if BVH_FEATURE(BVH_MOTION)
float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
-# else
+# else
Transform itfm = object_fetch_transform(
kg, object, OBJECT_INVERSE_TRANSFORM);
float t_fac = 1.0f / len(transform_direction(&itfm, dir));
-# endif
+# endif
for (int i = 0; i < num_hits_in_instance; i++) {
(isect_array - i - 1)->t *= t_fac;
}
}
-# endif /* BVH_FEATURE(BVH_INSTANCING) */
return num_hits;
}
}
@@ -279,35 +222,21 @@ ccl_device_inline
}
}
}
-#if BVH_FEATURE(BVH_INSTANCING)
else {
/* instance push */
object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
int object_flag = kernel_tex_fetch(__object_flag, object);
if (object_flag & SD_OBJECT_HAS_VOLUME) {
-# if BVH_FEATURE(BVH_MOTION)
+#if BVH_FEATURE(BVH_MOTION)
isect_t = bvh_instance_motion_push(
kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
-# else
+#else
isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
-# endif
+#endif
num_hits_in_instance = 0;
isect_array->t = isect_t;
-# if defined(__KERNEL_SSE2__)
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
-# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect_t);
-# endif
-
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-# endif
-
++stack_ptr;
kernel_assert(stack_ptr < BVH_STACK_SIZE);
traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
@@ -322,55 +251,39 @@ ccl_device_inline
}
}
}
-#endif /* FEATURE(BVH_INSTANCING) */
} while (node_addr != ENTRYPOINT_SENTINEL);
-#if BVH_FEATURE(BVH_INSTANCING)
if (stack_ptr >= 0) {
kernel_assert(object != OBJECT_NONE);
/* Instance pop. */
if (num_hits_in_instance) {
float t_fac;
-# if BVH_FEATURE(BVH_MOTION)
+#if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
-# else
+#else
bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
-# endif
+#endif
/* Scale isect->t to adjust for instancing. */
for (int i = 0; i < num_hits_in_instance; i++) {
(isect_array - i - 1)->t *= t_fac;
}
}
else {
-# if BVH_FEATURE(BVH_MOTION)
+#if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
-# else
+#else
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
-# endif
+#endif
}
isect_t = tmax;
isect_array->t = isect_t;
-# if defined(__KERNEL_SSE2__)
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
-# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect_t);
-# endif
-
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-# endif
-
object = OBJECT_NONE;
node_addr = traversal_stack[stack_ptr];
--stack_ptr;
}
-#endif /* FEATURE(BVH_INSTANCING) */
} while (node_addr != ENTRYPOINT_SENTINEL);
return num_hits;
@@ -382,20 +295,7 @@ ccl_device_inline uint BVH_FUNCTION_NAME(KernelGlobals *kg,
const uint max_hits,
const uint visibility)
{
- switch (kernel_data.bvh.bvh_layout) {
-#ifdef __KERNEL_AVX2__
- case BVH_LAYOUT_BVH8:
- return BVH_FUNCTION_FULL_NAME(OBVH)(kg, ray, isect_array, max_hits, visibility);
-#endif
-#ifdef __QBVH__
- case BVH_LAYOUT_BVH4:
- return BVH_FUNCTION_FULL_NAME(QBVH)(kg, ray, isect_array, max_hits, visibility);
-#endif
- case BVH_LAYOUT_BVH2:
- return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect_array, max_hits, visibility);
- }
- kernel_assert(!"Should not happen");
- return 0;
+ return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect_array, max_hits, visibility);
}
#undef BVH_FUNCTION_NAME
diff --git a/intern/cycles/kernel/bvh/obvh_local.h b/intern/cycles/kernel/bvh/obvh_local.h
deleted file mode 100644
index e6bb548bc5b..00000000000
--- a/intern/cycles/kernel/bvh/obvh_local.h
+++ /dev/null
@@ -1,398 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This is a template BVH traversal function for subsurface scattering, where
- * various features can be enabled/disabled. This way we can compile optimized
- * versions for each case without new features slowing things down.
- *
- * BVH_MOTION: motion blur rendering
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-# define NODE_INTERSECT obvh_node_intersect
-#else
-# define NODE_INTERSECT obvh_aligned_node_intersect
-#endif
-
-ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
- const Ray *ray,
- LocalIntersection *local_isect,
- int local_object,
- uint *lcg_state,
- int max_hits)
-{
- /* Traversal stack in CUDA thread-local memory. */
- OBVHStackItem traversal_stack[BVH_OSTACK_SIZE];
- traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
-
- /* Traversal variables in registers. */
- int stack_ptr = 0;
- int node_addr = kernel_tex_fetch(__object_node, local_object);
-
- /* Ray parameters in registers. */
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
- float isect_t = ray->t;
-
- if (local_isect != NULL) {
- local_isect->num_hits = 0;
- }
- kernel_assert((local_isect == NULL) == (max_hits == 0));
-
- const int object_flag = kernel_tex_fetch(__object_flag, local_object);
- if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
- isect_t = bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
-#else
- isect_t = bvh_instance_push(kg, local_object, ray, &P, &dir, &idir, isect_t);
-#endif
- object = local_object;
- }
-
- avxf tnear(0.0f), tfar(isect_t);
-#if BVH_FEATURE(BVH_HAIR)
- avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-#endif
- avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-
-#ifdef __KERNEL_AVX2__
- float3 P_idir = P * idir;
- avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- avx3f org4(avxf(P.x), avxf(P.y), avxf(P.z));
-#endif
-
- /* Offsets to select the side that becomes the lower or upper bound. */
- int near_x, near_y, near_z;
- int far_x, far_y, far_z;
- obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-
- /* Traversal loop. */
- do {
- do {
- /* Traverse internal nodes. */
- while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
- avxf dist;
- int child_mask = NODE_INTERSECT(kg,
- tnear,
- tfar,
-#ifdef __KERNEL_AVX2__
- P_idir4,
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4,
-#endif
-#if BVH_FEATURE(BVH_HAIR)
- dir4,
-#endif
- idir4,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- &dist);
-
- if (child_mask != 0) {
- float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
- avxf cnodes;
-#if BVH_FEATURE(BVH_HAIR)
- if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
- cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 26);
- }
- else
-#endif
- {
- cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 14);
- }
-
- /* One child is hit, continue with that child. */
- int r = __bscf(child_mask);
- if (child_mask == 0) {
- node_addr = __float_as_int(cnodes[r]);
- continue;
- }
-
- /* Two children are hit, push far child, and continue with
- * closer child.
- */
- int c0 = __float_as_int(cnodes[r]);
- float d0 = ((float *)&dist)[r];
- r = __bscf(child_mask);
- int c1 = __float_as_int(cnodes[r]);
- float d1 = ((float *)&dist)[r];
- if (child_mask == 0) {
- if (d1 < d0) {
- node_addr = c1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
- continue;
- }
- else {
- node_addr = c0;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- continue;
- }
- }
-
- /* Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there.
- */
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
-
- /* Three children are hit, push all onto stack and sort 3
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c2 = __float_as_int(cnodes[r]);
- float d2 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Four children are hit, push all onto stack and sort 4
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c3 = __float_as_int(cnodes[r]);
- float d3 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
-
- /* Five children are hit, push all onto stack and sort 5
- * stack items, continue with closest child
- */
- r = __bscf(child_mask);
- int c4 = __float_as_int(cnodes[r]);
- float d4 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
- /* Six children are hit, push all onto stack and sort 6
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c5 = __float_as_int(cnodes[r]);
- float d5 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c5;
- traversal_stack[stack_ptr].dist = d5;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c5;
- traversal_stack[stack_ptr].dist = d5;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
-
- /* Seven children are hit, push all onto stack and sort 7
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c6 = __float_as_int(cnodes[r]);
- float d6 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c6;
- traversal_stack[stack_ptr].dist = d6;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5],
- &traversal_stack[stack_ptr - 6]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
- /* Eight children are hit, push all onto stack and sort 8
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c7 = __float_as_int(cnodes[r]);
- float d7 = ((float *)&dist)[r];
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c7;
- traversal_stack[stack_ptr].dist = d7;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c6;
- traversal_stack[stack_ptr].dist = d6;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5],
- &traversal_stack[stack_ptr - 6],
- &traversal_stack[stack_ptr - 7]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-
- /* If node is leaf, fetch triangle list. */
- if (node_addr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
- int prim_addr = __float_as_int(leaf.x);
-
- int prim_addr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
-
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
-
- /* Primitive intersection. */
- switch (type & PRIMITIVE_ALL) {
- case PRIMITIVE_TRIANGLE: {
- /* Intersect ray against primitive, */
- for (; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- if (triangle_intersect_local(kg,
- local_isect,
- P,
- dir,
- object,
- local_object,
- prim_addr,
- isect_t,
- lcg_state,
- max_hits)) {
- return true;
- }
- }
- break;
- }
-#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- /* Intersect ray against primitive. */
- for (; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- if (motion_triangle_intersect_local(kg,
- local_isect,
- P,
- dir,
- ray->time,
- object,
- local_object,
- prim_addr,
- isect_t,
- lcg_state,
- max_hits)) {
- return true;
- }
- }
- break;
- }
-#endif
- default:
- break;
- }
- }
- } while (node_addr != ENTRYPOINT_SENTINEL);
- } while (node_addr != ENTRYPOINT_SENTINEL);
- return false;
-}
-
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/obvh_nodes.h b/intern/cycles/kernel/bvh/obvh_nodes.h
deleted file mode 100644
index 6831562cade..00000000000
--- a/intern/cycles/kernel/bvh/obvh_nodes.h
+++ /dev/null
@@ -1,591 +0,0 @@
-/*
- * Copyright 2011-2014, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * Aligned nodes intersection AVX code is adopted from Embree,
- */
-
-struct OBVHStackItem {
- int addr;
- float dist;
-};
-
-ccl_device_inline void obvh_near_far_idx_calc(const float3 &idir,
- int *ccl_restrict near_x,
- int *ccl_restrict near_y,
- int *ccl_restrict near_z,
- int *ccl_restrict far_x,
- int *ccl_restrict far_y,
- int *ccl_restrict far_z)
-
-{
-#ifdef __KERNEL_SSE__
- *near_x = 0;
- *far_x = 1;
- *near_y = 2;
- *far_y = 3;
- *near_z = 4;
- *far_z = 5;
-
- const size_t mask = movemask(ssef(idir.m128));
-
- const int mask_x = mask & 1;
- const int mask_y = (mask & 2) >> 1;
- const int mask_z = (mask & 4) >> 2;
-
- *near_x += mask_x;
- *far_x -= mask_x;
- *near_y += mask_y;
- *far_y -= mask_y;
- *near_z += mask_z;
- *far_z -= mask_z;
-#else
- if (idir.x >= 0.0f) {
- *near_x = 0;
- *far_x = 1;
- }
- else {
- *near_x = 1;
- *far_x = 0;
- }
- if (idir.y >= 0.0f) {
- *near_y = 2;
- *far_y = 3;
- }
- else {
- *near_y = 3;
- *far_y = 2;
- }
- if (idir.z >= 0.0f) {
- *near_z = 4;
- *far_z = 5;
- }
- else {
- *near_z = 5;
- *far_z = 4;
- }
-#endif
-}
-
-ccl_device_inline void obvh_item_swap(OBVHStackItem *ccl_restrict a, OBVHStackItem *ccl_restrict b)
-{
- OBVHStackItem tmp = *a;
- *a = *b;
- *b = tmp;
-}
-
-ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
- OBVHStackItem *ccl_restrict s2,
- OBVHStackItem *ccl_restrict s3)
-{
- if (s2->dist < s1->dist) {
- obvh_item_swap(s2, s1);
- }
- if (s3->dist < s2->dist) {
- obvh_item_swap(s3, s2);
- }
- if (s2->dist < s1->dist) {
- obvh_item_swap(s2, s1);
- }
-}
-
-ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
- OBVHStackItem *ccl_restrict s2,
- OBVHStackItem *ccl_restrict s3,
- OBVHStackItem *ccl_restrict s4)
-{
- if (s2->dist < s1->dist) {
- obvh_item_swap(s2, s1);
- }
- if (s4->dist < s3->dist) {
- obvh_item_swap(s4, s3);
- }
- if (s3->dist < s1->dist) {
- obvh_item_swap(s3, s1);
- }
- if (s4->dist < s2->dist) {
- obvh_item_swap(s4, s2);
- }
- if (s3->dist < s2->dist) {
- obvh_item_swap(s3, s2);
- }
-}
-
-ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
- OBVHStackItem *ccl_restrict s2,
- OBVHStackItem *ccl_restrict s3,
- OBVHStackItem *ccl_restrict s4,
- OBVHStackItem *ccl_restrict s5)
-{
- obvh_stack_sort(s1, s2, s3, s4);
- if (s5->dist < s4->dist) {
- obvh_item_swap(s4, s5);
- if (s4->dist < s3->dist) {
- obvh_item_swap(s3, s4);
- if (s3->dist < s2->dist) {
- obvh_item_swap(s2, s3);
- if (s2->dist < s1->dist) {
- obvh_item_swap(s1, s2);
- }
- }
- }
- }
-}
-
-ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
- OBVHStackItem *ccl_restrict s2,
- OBVHStackItem *ccl_restrict s3,
- OBVHStackItem *ccl_restrict s4,
- OBVHStackItem *ccl_restrict s5,
- OBVHStackItem *ccl_restrict s6)
-{
- obvh_stack_sort(s1, s2, s3, s4, s5);
- if (s6->dist < s5->dist) {
- obvh_item_swap(s5, s6);
- if (s5->dist < s4->dist) {
- obvh_item_swap(s4, s5);
- if (s4->dist < s3->dist) {
- obvh_item_swap(s3, s4);
- if (s3->dist < s2->dist) {
- obvh_item_swap(s2, s3);
- if (s2->dist < s1->dist) {
- obvh_item_swap(s1, s2);
- }
- }
- }
- }
- }
-}
-
-ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
- OBVHStackItem *ccl_restrict s2,
- OBVHStackItem *ccl_restrict s3,
- OBVHStackItem *ccl_restrict s4,
- OBVHStackItem *ccl_restrict s5,
- OBVHStackItem *ccl_restrict s6,
- OBVHStackItem *ccl_restrict s7)
-{
- obvh_stack_sort(s1, s2, s3, s4, s5, s6);
- if (s7->dist < s6->dist) {
- obvh_item_swap(s6, s7);
- if (s6->dist < s5->dist) {
- obvh_item_swap(s5, s6);
- if (s5->dist < s4->dist) {
- obvh_item_swap(s4, s5);
- if (s4->dist < s3->dist) {
- obvh_item_swap(s3, s4);
- if (s3->dist < s2->dist) {
- obvh_item_swap(s2, s3);
- if (s2->dist < s1->dist) {
- obvh_item_swap(s1, s2);
- }
- }
- }
- }
- }
- }
-}
-
-ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
- OBVHStackItem *ccl_restrict s2,
- OBVHStackItem *ccl_restrict s3,
- OBVHStackItem *ccl_restrict s4,
- OBVHStackItem *ccl_restrict s5,
- OBVHStackItem *ccl_restrict s6,
- OBVHStackItem *ccl_restrict s7,
- OBVHStackItem *ccl_restrict s8)
-{
- obvh_stack_sort(s1, s2, s3, s4, s5, s6, s7);
- if (s8->dist < s7->dist) {
- obvh_item_swap(s7, s8);
- if (s7->dist < s6->dist) {
- obvh_item_swap(s6, s7);
- if (s6->dist < s5->dist) {
- obvh_item_swap(s5, s6);
- if (s5->dist < s4->dist) {
- obvh_item_swap(s4, s5);
- if (s4->dist < s3->dist) {
- obvh_item_swap(s3, s4);
- if (s3->dist < s2->dist) {
- obvh_item_swap(s2, s3);
- if (s2->dist < s1->dist) {
- obvh_item_swap(s1, s2);
- }
- }
- }
- }
- }
- }
- }
-}
-
-/* Axis-aligned nodes intersection */
-
-ccl_device_inline int obvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg,
- const avxf &isect_near,
- const avxf &isect_far,
-#ifdef __KERNEL_AVX2__
- const avx3f &org_idir,
-#else
- const avx3f &org,
-#endif
- const avx3f &idir,
- const int near_x,
- const int near_y,
- const int near_z,
- const int far_x,
- const int far_y,
- const int far_z,
- const int node_addr,
- avxf *ccl_restrict dist)
-{
- const int offset = node_addr + 2;
-#ifdef __KERNEL_AVX2__
- const avxf tnear_x = msub(
- kernel_tex_fetch_avxf(__bvh_nodes, offset + near_x * 2), idir.x, org_idir.x);
- const avxf tnear_y = msub(
- kernel_tex_fetch_avxf(__bvh_nodes, offset + near_y * 2), idir.y, org_idir.y);
- const avxf tnear_z = msub(
- kernel_tex_fetch_avxf(__bvh_nodes, offset + near_z * 2), idir.z, org_idir.z);
- const avxf tfar_x = msub(
- kernel_tex_fetch_avxf(__bvh_nodes, offset + far_x * 2), idir.x, org_idir.x);
- const avxf tfar_y = msub(
- kernel_tex_fetch_avxf(__bvh_nodes, offset + far_y * 2), idir.y, org_idir.y);
- const avxf tfar_z = msub(
- kernel_tex_fetch_avxf(__bvh_nodes, offset + far_z * 2), idir.z, org_idir.z);
-
- const avxf tnear = max4(tnear_x, tnear_y, tnear_z, isect_near);
- const avxf tfar = min4(tfar_x, tfar_y, tfar_z, isect_far);
- const avxb vmask = tnear <= tfar;
- int mask = (int)movemask(vmask);
- *dist = tnear;
- return mask;
-#else
- return 0;
-#endif
-}
-
-ccl_device_inline int obvh_aligned_node_intersect_robust(KernelGlobals *ccl_restrict kg,
- const avxf &isect_near,
- const avxf &isect_far,
-#ifdef __KERNEL_AVX2__
- const avx3f &P_idir,
-#else
- const avx3f &P,
-#endif
- const avx3f &idir,
- const int near_x,
- const int near_y,
- const int near_z,
- const int far_x,
- const int far_y,
- const int far_z,
- const int node_addr,
- const float difl,
- avxf *ccl_restrict dist)
-{
- const int offset = node_addr + 2;
-#ifdef __KERNEL_AVX2__
- const avxf tnear_x = msub(
- kernel_tex_fetch_avxf(__bvh_nodes, offset + near_x * 2), idir.x, P_idir.x);
- const avxf tfar_x = msub(
- kernel_tex_fetch_avxf(__bvh_nodes, offset + far_x * 2), idir.x, P_idir.x);
- const avxf tnear_y = msub(
- kernel_tex_fetch_avxf(__bvh_nodes, offset + near_y * 2), idir.y, P_idir.y);
- const avxf tfar_y = msub(
- kernel_tex_fetch_avxf(__bvh_nodes, offset + far_y * 2), idir.y, P_idir.y);
- const avxf tnear_z = msub(
- kernel_tex_fetch_avxf(__bvh_nodes, offset + near_z * 2), idir.z, P_idir.z);
- const avxf tfar_z = msub(
- kernel_tex_fetch_avxf(__bvh_nodes, offset + far_z * 2), idir.z, P_idir.z);
-
- const float round_down = 1.0f - difl;
- const float round_up = 1.0f + difl;
- const avxf tnear = max4(tnear_x, tnear_y, tnear_z, isect_near);
- const avxf tfar = min4(tfar_x, tfar_y, tfar_z, isect_far);
- const avxb vmask = round_down * tnear <= round_up * tfar;
- int mask = (int)movemask(vmask);
- *dist = tnear;
- return mask;
-#else
- return 0;
-#endif
-}
-
-/* Unaligned nodes intersection */
-
-ccl_device_inline int obvh_unaligned_node_intersect(KernelGlobals *ccl_restrict kg,
- const avxf &isect_near,
- const avxf &isect_far,
-#ifdef __KERNEL_AVX2__
- const avx3f &org_idir,
-#endif
- const avx3f &org,
- const avx3f &dir,
- const avx3f &idir,
- const int near_x,
- const int near_y,
- const int near_z,
- const int far_x,
- const int far_y,
- const int far_z,
- const int node_addr,
- avxf *ccl_restrict dist)
-{
- const int offset = node_addr;
- const avxf tfm_x_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 2);
- const avxf tfm_x_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 4);
- const avxf tfm_x_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 6);
-
- const avxf tfm_y_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 8);
- const avxf tfm_y_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 10);
- const avxf tfm_y_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 12);
-
- const avxf tfm_z_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 14);
- const avxf tfm_z_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 16);
- const avxf tfm_z_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 18);
-
- const avxf tfm_t_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 20);
- const avxf tfm_t_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 22);
- const avxf tfm_t_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 24);
-
- const avxf aligned_dir_x = dir.x * tfm_x_x + dir.y * tfm_x_y + dir.z * tfm_x_z,
- aligned_dir_y = dir.x * tfm_y_x + dir.y * tfm_y_y + dir.z * tfm_y_z,
- aligned_dir_z = dir.x * tfm_z_x + dir.y * tfm_z_y + dir.z * tfm_z_z;
-
- const avxf aligned_P_x = org.x * tfm_x_x + org.y * tfm_x_y + org.z * tfm_x_z + tfm_t_x,
- aligned_P_y = org.x * tfm_y_x + org.y * tfm_y_y + org.z * tfm_y_z + tfm_t_y,
- aligned_P_z = org.x * tfm_z_x + org.y * tfm_z_y + org.z * tfm_z_z + tfm_t_z;
-
- const avxf neg_one(-1.0f);
- const avxf nrdir_x = neg_one / aligned_dir_x, nrdir_y = neg_one / aligned_dir_y,
- nrdir_z = neg_one / aligned_dir_z;
-
- const avxf tlower_x = aligned_P_x * nrdir_x, tlower_y = aligned_P_y * nrdir_y,
- tlower_z = aligned_P_z * nrdir_z;
-
- const avxf tupper_x = tlower_x - nrdir_x, tupper_y = tlower_y - nrdir_y,
- tupper_z = tlower_z - nrdir_z;
-
- const avxf tnear_x = min(tlower_x, tupper_x);
- const avxf tnear_y = min(tlower_y, tupper_y);
- const avxf tnear_z = min(tlower_z, tupper_z);
- const avxf tfar_x = max(tlower_x, tupper_x);
- const avxf tfar_y = max(tlower_y, tupper_y);
- const avxf tfar_z = max(tlower_z, tupper_z);
- const avxf tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
- const avxf tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
- const avxb vmask = tnear <= tfar;
- *dist = tnear;
- return movemask(vmask);
-}
-
-ccl_device_inline int obvh_unaligned_node_intersect_robust(KernelGlobals *ccl_restrict kg,
- const avxf &isect_near,
- const avxf &isect_far,
-#ifdef __KERNEL_AVX2__
- const avx3f &P_idir,
-#endif
- const avx3f &P,
- const avx3f &dir,
- const avx3f &idir,
- const int near_x,
- const int near_y,
- const int near_z,
- const int far_x,
- const int far_y,
- const int far_z,
- const int node_addr,
- const float difl,
- avxf *ccl_restrict dist)
-{
- const int offset = node_addr;
- const avxf tfm_x_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 2);
- const avxf tfm_x_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 4);
- const avxf tfm_x_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 6);
-
- const avxf tfm_y_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 8);
- const avxf tfm_y_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 10);
- const avxf tfm_y_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 12);
-
- const avxf tfm_z_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 14);
- const avxf tfm_z_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 16);
- const avxf tfm_z_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 18);
-
- const avxf tfm_t_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 20);
- const avxf tfm_t_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 22);
- const avxf tfm_t_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 24);
-
- const avxf aligned_dir_x = dir.x * tfm_x_x + dir.y * tfm_x_y + dir.z * tfm_x_z,
- aligned_dir_y = dir.x * tfm_y_x + dir.y * tfm_y_y + dir.z * tfm_y_z,
- aligned_dir_z = dir.x * tfm_z_x + dir.y * tfm_z_y + dir.z * tfm_z_z;
-
- const avxf aligned_P_x = P.x * tfm_x_x + P.y * tfm_x_y + P.z * tfm_x_z + tfm_t_x,
- aligned_P_y = P.x * tfm_y_x + P.y * tfm_y_y + P.z * tfm_y_z + tfm_t_y,
- aligned_P_z = P.x * tfm_z_x + P.y * tfm_z_y + P.z * tfm_z_z + tfm_t_z;
-
- const avxf neg_one(-1.0f);
- const avxf nrdir_x = neg_one / aligned_dir_x, nrdir_y = neg_one / aligned_dir_y,
- nrdir_z = neg_one / aligned_dir_z;
-
- const avxf tlower_x = aligned_P_x * nrdir_x, tlower_y = aligned_P_y * nrdir_y,
- tlower_z = aligned_P_z * nrdir_z;
-
- const avxf tupper_x = tlower_x - nrdir_x, tupper_y = tlower_y - nrdir_y,
- tupper_z = tlower_z - nrdir_z;
-
- const float round_down = 1.0f - difl;
- const float round_up = 1.0f + difl;
-
- const avxf tnear_x = min(tlower_x, tupper_x);
- const avxf tnear_y = min(tlower_y, tupper_y);
- const avxf tnear_z = min(tlower_z, tupper_z);
- const avxf tfar_x = max(tlower_x, tupper_x);
- const avxf tfar_y = max(tlower_y, tupper_y);
- const avxf tfar_z = max(tlower_z, tupper_z);
-
- const avxf tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
- const avxf tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
- const avxb vmask = round_down * tnear <= round_up * tfar;
- *dist = tnear;
- return movemask(vmask);
-}
-
-/* Intersectors wrappers.
- *
- * They'll check node type and call appropriate intersection code.
- */
-
-ccl_device_inline int obvh_node_intersect(KernelGlobals *ccl_restrict kg,
- const avxf &isect_near,
- const avxf &isect_far,
-#ifdef __KERNEL_AVX2__
- const avx3f &org_idir,
-#endif
- const avx3f &org,
- const avx3f &dir,
- const avx3f &idir,
- const int near_x,
- const int near_y,
- const int near_z,
- const int far_x,
- const int far_y,
- const int far_z,
- const int node_addr,
- avxf *ccl_restrict dist)
-{
- const int offset = node_addr;
- const float4 node = kernel_tex_fetch(__bvh_nodes, offset);
- if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
- return obvh_unaligned_node_intersect(kg,
- isect_near,
- isect_far,
-#ifdef __KERNEL_AVX2__
- org_idir,
-#endif
- org,
- dir,
- idir,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- dist);
- }
- else {
- return obvh_aligned_node_intersect(kg,
- isect_near,
- isect_far,
-#ifdef __KERNEL_AVX2__
- org_idir,
-#else
- org,
-#endif
- idir,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- dist);
- }
-}
-
-ccl_device_inline int obvh_node_intersect_robust(KernelGlobals *ccl_restrict kg,
- const avxf &isect_near,
- const avxf &isect_far,
-#ifdef __KERNEL_AVX2__
- const avx3f &P_idir,
-#endif
- const avx3f &P,
- const avx3f &dir,
- const avx3f &idir,
- const int near_x,
- const int near_y,
- const int near_z,
- const int far_x,
- const int far_y,
- const int far_z,
- const int node_addr,
- const float difl,
- avxf *ccl_restrict dist)
-{
- const int offset = node_addr;
- const float4 node = kernel_tex_fetch(__bvh_nodes, offset);
- if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
- return obvh_unaligned_node_intersect_robust(kg,
- isect_near,
- isect_far,
-#ifdef __KERNEL_AVX2__
- P_idir,
-#endif
- P,
- dir,
- idir,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- difl,
- dist);
- }
- else {
- return obvh_aligned_node_intersect_robust(kg,
- isect_near,
- isect_far,
-#ifdef __KERNEL_AVX2__
- P_idir,
-#else
- P,
-#endif
- idir,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- difl,
- dist);
- }
-}
diff --git a/intern/cycles/kernel/bvh/obvh_shadow_all.h b/intern/cycles/kernel/bvh/obvh_shadow_all.h
deleted file mode 100644
index 98efb003788..00000000000
--- a/intern/cycles/kernel/bvh/obvh_shadow_all.h
+++ /dev/null
@@ -1,670 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This is a template BVH traversal function, where various features can be
- * enabled/disabled. This way we can compile optimized versions for each case
- * without new features slowing things down.
- *
- * BVH_INSTANCING: object instancing
- * BVH_HAIR: hair curve rendering
- * BVH_MOTION: motion blur rendering
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-# define NODE_INTERSECT obvh_node_intersect
-#else
-# define NODE_INTERSECT obvh_aligned_node_intersect
-#endif
-
-ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
- const Ray *ray,
- Intersection *isect_array,
- const int skip_object,
- const uint max_hits,
- uint *num_hits)
-{
- /* TODO(sergey):
- * - Test if pushing distance on the stack helps.
- * - Likely and unlikely for if() statements.
- * - Test restrict attribute for pointers.
- */
-
- /* Traversal stack in CUDA thread-local memory. */
- OBVHStackItem traversal_stack[BVH_OSTACK_SIZE];
- traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
-
- /* Traversal variables in registers. */
- int stack_ptr = 0;
- int node_addr = kernel_data.bvh.root;
-
- /* Ray parameters in registers. */
- const float tmax = ray->t;
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
- float isect_t = tmax;
-
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
-#endif
-
- *num_hits = 0;
- isect_array->t = tmax;
-
-#if BVH_FEATURE(BVH_INSTANCING)
- int num_hits_in_instance = 0;
-#endif
-
- avxf tnear(0.0f), tfar(isect_t);
-#if BVH_FEATURE(BVH_HAIR)
- avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-#endif
- avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-
-#ifdef __KERNEL_AVX2__
- float3 P_idir = P * idir;
- avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- avx3f org4(avxf(P.x), avxf(P.y), avxf(P.z));
-#endif
-
- /* Offsets to select the side that becomes the lower or upper bound. */
- int near_x, near_y, near_z;
- int far_x, far_y, far_z;
- obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-
- /* Traversal loop. */
- do {
- do {
- /* Traverse internal nodes. */
- while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
- float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
- (void)inodes;
-
- if (false
-#ifdef __VISIBILITY_FLAG__
- || ((__float_as_uint(inodes.x) & PATH_RAY_SHADOW) == 0)
-#endif
-#if BVH_FEATURE(BVH_MOTION)
- || UNLIKELY(ray->time < inodes.y) || UNLIKELY(ray->time > inodes.z)
-#endif
- ) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- avxf dist;
- int child_mask = NODE_INTERSECT(kg,
- tnear,
- tfar,
-#ifdef __KERNEL_AVX2__
- P_idir4,
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- //#if !defined(__KERNEL_AVX2__)
- org4,
-#endif
-#if BVH_FEATURE(BVH_HAIR)
- dir4,
-#endif
- idir4,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- &dist);
-
- if (child_mask != 0) {
- avxf cnodes;
-#if BVH_FEATURE(BVH_HAIR)
- if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
- cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 26);
- }
- else
-#endif
- {
- cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 14);
- }
-
- /* One child is hit, continue with that child. */
- int r = __bscf(child_mask);
- if (child_mask == 0) {
- node_addr = __float_as_int(cnodes[r]);
- continue;
- }
-
- /* Two children are hit, push far child, and continue with
- * closer child.
- */
- int c0 = __float_as_int(cnodes[r]);
- float d0 = ((float *)&dist)[r];
- r = __bscf(child_mask);
- int c1 = __float_as_int(cnodes[r]);
- float d1 = ((float *)&dist)[r];
- if (child_mask == 0) {
- if (d1 < d0) {
- node_addr = c1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
- continue;
- }
- else {
- node_addr = c0;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- continue;
- }
- }
-
- /* Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there.
- */
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
-
- /* Three children are hit, push all onto stack and sort 3
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c2 = __float_as_int(cnodes[r]);
- float d2 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Four children are hit, push all onto stack and sort 4
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c3 = __float_as_int(cnodes[r]);
- float d3 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
-
- /* Five children are hit, push all onto stack and sort 5
- * stack items, continue with closest child
- */
- r = __bscf(child_mask);
- int c4 = __float_as_int(cnodes[r]);
- float d4 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Six children are hit, push all onto stack and sort 6
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c5 = __float_as_int(cnodes[r]);
- float d5 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c5;
- traversal_stack[stack_ptr].dist = d5;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c5;
- traversal_stack[stack_ptr].dist = d5;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
-
- /* Seven children are hit, push all onto stack and sort 7
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c6 = __float_as_int(cnodes[r]);
- float d6 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c6;
- traversal_stack[stack_ptr].dist = d6;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5],
- &traversal_stack[stack_ptr - 6]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Eight children are hit, push all onto stack and sort 8
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c7 = __float_as_int(cnodes[r]);
- float d7 = ((float *)&dist)[r];
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c7;
- traversal_stack[stack_ptr].dist = d7;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c6;
- traversal_stack[stack_ptr].dist = d6;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5],
- &traversal_stack[stack_ptr - 6],
- &traversal_stack[stack_ptr - 7]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-
- /* If node is leaf, fetch triangle list. */
- if (node_addr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
-#ifdef __VISIBILITY_FLAG__
- if ((__float_as_uint(leaf.z) & PATH_RAY_SHADOW) == 0) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-#endif
-
- int prim_addr = __float_as_int(leaf.x);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if (prim_addr >= 0) {
-#endif
- int prim_addr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
- const uint p_type = type & PRIMITIVE_ALL;
-
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
-
- /* Primitive intersection. */
- if (p_type == PRIMITIVE_TRIANGLE) {
- int prim_count = prim_addr2 - prim_addr;
- if (prim_count < 3) {
- while (prim_addr < prim_addr2) {
- kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) ==
- p_type);
- int hit = triangle_intersect(
- kg, isect_array, P, dir, PATH_RAY_SHADOW, object, prim_addr);
- /* Shadow ray early termination. */
- if (hit) {
- /* detect if this surface has a shader with transparent shadows */
-
- /* todo: optimize so primitive visibility flag indicates if
- * the primitive has a transparent shadow shader? */
- int prim = kernel_tex_fetch(__prim_index, isect_array->prim);
- int shader = 0;
-
-#ifdef __HAIR__
- if (kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
-#endif
- {
- shader = kernel_tex_fetch(__tri_shader, prim);
- }
-#ifdef __HAIR__
- else {
- float4 str = kernel_tex_fetch(__curves, prim);
- shader = __float_as_int(str.z);
- }
-#endif
- int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
-
- /* if no transparent shadows, all light is blocked */
- if (!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
- return true;
- }
- /* if maximum number of hits reached, block all light */
- else if (*num_hits == max_hits) {
- return true;
- }
-
- /* move on to next entry in intersections array */
- isect_array++;
- (*num_hits)++;
-#if BVH_FEATURE(BVH_INSTANCING)
- num_hits_in_instance++;
-#endif
-
- isect_array->t = isect_t;
- }
-
- prim_addr++;
- } //while
- }
- else {
- kernel_assert((kernel_tex_fetch(__prim_type, (prim_addr)) & PRIMITIVE_ALL) ==
- p_type);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- int *nhiptr = &num_hits_in_instance;
-#else
- int nhi = 0;
- int *nhiptr = &nhi;
-#endif
-
- int result = triangle_intersect8(kg,
- &isect_array,
- P,
- dir,
- PATH_RAY_SHADOW,
- object,
- prim_addr,
- prim_count,
- num_hits,
- max_hits,
- nhiptr,
- isect_t);
- if (result == 2) {
- return true;
- }
- } // prim_count
- } // PRIMITIVE_TRIANGLE
- else {
- while (prim_addr < prim_addr2) {
- kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type);
-
-#ifdef __SHADOW_TRICKS__
- uint tri_object = (object == OBJECT_NONE) ?
- kernel_tex_fetch(__prim_object, prim_addr) :
- object;
- if (tri_object == skip_object) {
- ++prim_addr;
- continue;
- }
-#endif
-
- bool hit;
-
- /* todo: specialized intersect functions which don't fill in
- * isect unless needed and check SD_HAS_TRANSPARENT_SHADOW?
- * might give a few % performance improvement */
-
- switch (p_type) {
-
-#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- hit = motion_triangle_intersect(
- kg, isect_array, P, dir, ray->time, PATH_RAY_SHADOW, object, prim_addr);
- break;
- }
-#endif
-#if BVH_FEATURE(BVH_HAIR)
- case PRIMITIVE_CURVE:
- case PRIMITIVE_MOTION_CURVE: {
- const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
- if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
- hit = cardinal_curve_intersect(kg,
- isect_array,
- P,
- dir,
- PATH_RAY_SHADOW,
- object,
- prim_addr,
- ray->time,
- curve_type,
- NULL,
- 0,
- 0);
- }
- else {
- hit = curve_intersect(kg,
- isect_array,
- P,
- dir,
- PATH_RAY_SHADOW,
- object,
- prim_addr,
- ray->time,
- curve_type,
- NULL,
- 0,
- 0);
- }
- break;
- }
-#endif
- default: {
- hit = false;
- break;
- }
- }
-
- /* Shadow ray early termination. */
- if (hit) {
- /* detect if this surface has a shader with transparent shadows */
-
- /* todo: optimize so primitive visibility flag indicates if
- * the primitive has a transparent shadow shader? */
- int prim = kernel_tex_fetch(__prim_index, isect_array->prim);
- int shader = 0;
-
-#ifdef __HAIR__
- if (kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
-#endif
- {
- shader = kernel_tex_fetch(__tri_shader, prim);
- }
-#ifdef __HAIR__
- else {
- float4 str = kernel_tex_fetch(__curves, prim);
- shader = __float_as_int(str.z);
- }
-#endif
- int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
-
- /* if no transparent shadows, all light is blocked */
- if (!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
- return true;
- }
- /* if maximum number of hits reached, block all light */
- else if (*num_hits == max_hits) {
- return true;
- }
-
- /* move on to next entry in intersections array */
- isect_array++;
- (*num_hits)++;
-#if BVH_FEATURE(BVH_INSTANCING)
- num_hits_in_instance++;
-#endif
-
- isect_array->t = isect_t;
- }
-
- prim_addr++;
- } //while prim
- }
- }
-#if BVH_FEATURE(BVH_INSTANCING)
- else {
- /* Instance push. */
- object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
-
-# if BVH_FEATURE(BVH_MOTION)
- isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
-# else
- isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
-# endif
-
- num_hits_in_instance = 0;
- isect_array->t = isect_t;
-
- obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- tfar = avxf(isect_t);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-# endif
- idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P * idir;
- P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
-# endif
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
-
- node_addr = kernel_tex_fetch(__object_node, object);
- }
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if (stack_ptr >= 0) {
- kernel_assert(object != OBJECT_NONE);
-
- /* Instance pop. */
- if (num_hits_in_instance) {
- float t_fac;
-# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
-# else
- bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
-# endif
- /* Scale isect->t to adjust for instancing. */
- for (int i = 0; i < num_hits_in_instance; i++) {
- (isect_array - i - 1)->t *= t_fac;
- }
- }
- else {
-# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
-# else
- bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
-# endif
- }
-
- isect_t = tmax;
- isect_array->t = isect_t;
-
- obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- tfar = avxf(isect_t);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-# endif
- idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P * idir;
- P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
-# endif
-
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
- return false;
-}
-
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/obvh_traversal.h b/intern/cycles/kernel/bvh/obvh_traversal.h
deleted file mode 100644
index 86b1de48aaa..00000000000
--- a/intern/cycles/kernel/bvh/obvh_traversal.h
+++ /dev/null
@@ -1,620 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This is a template BVH traversal function, where various features can be
- * enabled/disabled. This way we can compile optimized versions for each case
- * without new features slowing things down.
- *
- * BVH_INSTANCING: object instancing
- * BVH_HAIR: hair curve rendering
- * BVH_HAIR_MINIMUM_WIDTH: hair curve rendering with minimum width
- * BVH_MOTION: motion blur rendering
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-# define NODE_INTERSECT obvh_node_intersect
-# define NODE_INTERSECT_ROBUST obvh_node_intersect_robust
-#else
-# define NODE_INTERSECT obvh_aligned_node_intersect
-# define NODE_INTERSECT_ROBUST obvh_aligned_node_intersect_robust
-#endif
-
-ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
- const Ray *ray,
- Intersection *isect,
- const uint visibility
-#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
- ,
- uint *lcg_state,
- float difl,
- float extmax
-#endif
-)
-{
- /* Traversal stack in CUDA thread-local memory. */
- OBVHStackItem traversal_stack[BVH_OSTACK_SIZE];
- traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
- traversal_stack[0].dist = -FLT_MAX;
-
- /* Traversal variables in registers. */
- int stack_ptr = 0;
- int node_addr = kernel_data.bvh.root;
- float node_dist = -FLT_MAX;
-
- /* Ray parameters in registers. */
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
-
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
-#endif
-
- isect->t = ray->t;
- isect->u = 0.0f;
- isect->v = 0.0f;
- isect->prim = PRIM_NONE;
- isect->object = OBJECT_NONE;
-
- BVH_DEBUG_INIT();
- avxf tnear(0.0f), tfar(ray->t);
-#if BVH_FEATURE(BVH_HAIR)
- avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-#endif
- avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-
-#ifdef __KERNEL_AVX2__
- float3 P_idir = P * idir;
- avx3f P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- avx3f org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
-#endif
-
- /* Offsets to select the side that becomes the lower or upper bound. */
- int near_x, near_y, near_z;
- int far_x, far_y, far_z;
- obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- /* Traversal loop. */
- do {
- do {
- /* Traverse internal nodes. */
- while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
- float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
- (void)inodes;
-
- if (UNLIKELY(node_dist > isect->t)
-#if BVH_FEATURE(BVH_MOTION)
- || UNLIKELY(ray->time < inodes.y) || UNLIKELY(ray->time > inodes.z)
-#endif
-#ifdef __VISIBILITY_FLAG__
- || (__float_as_uint(inodes.x) & visibility) == 0
-#endif
- ) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- continue;
- }
-
- int child_mask;
- avxf dist;
-
- BVH_DEBUG_NEXT_NODE();
-
-#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
- if (difl != 0.0f) {
- /* NOTE: We extend all the child BB instead of fetching
- * and checking visibility flags for each of the,
- *
- * Need to test if doing opposite would be any faster.
- */
- child_mask = NODE_INTERSECT_ROBUST(kg,
- tnear,
- tfar,
-# ifdef __KERNEL_AVX2__
- P_idir4,
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4,
-# endif
-# if BVH_FEATURE(BVH_HAIR)
- dir4,
-# endif
- idir4,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- difl,
- &dist);
- }
- else
-#endif /* BVH_HAIR_MINIMUM_WIDTH */
- {
- child_mask = NODE_INTERSECT(kg,
- tnear,
- tfar,
-#ifdef __KERNEL_AVX2__
- P_idir4,
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4,
-#endif
-#if BVH_FEATURE(BVH_HAIR)
- dir4,
-#endif
- idir4,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- &dist);
- }
-
- if (child_mask != 0) {
- avxf cnodes;
- /* TODO(sergey): Investigate whether moving cnodes upwards
- * gives a speedup (will be different cache pattern but will
- * avoid extra check here).
- */
-#if BVH_FEATURE(BVH_HAIR)
- if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
- cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 26);
- }
- else
-#endif
- {
- cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 14);
- }
-
- /* One child is hit, continue with that child. */
- int r = __bscf(child_mask);
- float d0 = ((float *)&dist)[r];
- if (child_mask == 0) {
- node_addr = __float_as_int(cnodes[r]);
- node_dist = d0;
- continue;
- }
-
- /* Two children are hit, push far child, and continue with
- * closer child.
- */
- int c0 = __float_as_int(cnodes[r]);
- r = __bscf(child_mask);
- int c1 = __float_as_int(cnodes[r]);
- float d1 = ((float *)&dist)[r];
- if (child_mask == 0) {
- if (d1 < d0) {
- node_addr = c1;
- node_dist = d1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
- continue;
- }
- else {
- node_addr = c0;
- node_dist = d0;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- continue;
- }
- }
-
- /* Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there.
- */
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
-
- /* Three children are hit, push all onto stack and sort 3
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c2 = __float_as_int(cnodes[r]);
- float d2 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2]);
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- continue;
- }
-
- /* Four children are hit, push all onto stack and sort 4
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c3 = __float_as_int(cnodes[r]);
- float d3 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3]);
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- continue;
- }
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
-
- /* Five children are hit, push all onto stack and sort 5
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c4 = __float_as_int(cnodes[r]);
- float d4 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4]);
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- continue;
- }
-
- /* Six children are hit, push all onto stack and sort 6
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c5 = __float_as_int(cnodes[r]);
- float d5 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c5;
- traversal_stack[stack_ptr].dist = d5;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5]);
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- continue;
- }
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c5;
- traversal_stack[stack_ptr].dist = d5;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
-
- /* Seven children are hit, push all onto stack and sort 7
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c6 = __float_as_int(cnodes[r]);
- float d6 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c6;
- traversal_stack[stack_ptr].dist = d6;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5],
- &traversal_stack[stack_ptr - 6]);
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- continue;
- }
-
- /* Eight children are hit, push all onto stack and sort 8
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c7 = __float_as_int(cnodes[r]);
- float d7 = ((float *)&dist)[r];
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c7;
- traversal_stack[stack_ptr].dist = d7;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c6;
- traversal_stack[stack_ptr].dist = d6;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5],
- &traversal_stack[stack_ptr - 6],
- &traversal_stack[stack_ptr - 7]);
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- continue;
- }
-
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- }
-
- /* If node is leaf, fetch triangle list. */
- if (node_addr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
-
-#ifdef __VISIBILITY_FLAG__
- if (UNLIKELY((node_dist > isect->t) || ((__float_as_uint(leaf.z) & visibility) == 0)))
-#else
- if (UNLIKELY((node_dist > isect->t)))
-#endif
- {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- continue;
- }
- int prim_addr = __float_as_int(leaf.x);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if (prim_addr >= 0) {
-#endif
- int prim_addr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
-
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
-
- /* Primitive intersection. */
- switch (type & PRIMITIVE_ALL) {
- case PRIMITIVE_TRIANGLE: {
- int prim_count = prim_addr2 - prim_addr;
- if (prim_count < 3) {
- for (; prim_addr < prim_addr2; prim_addr++) {
- BVH_DEBUG_NEXT_INTERSECTION();
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- if (triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr)) {
- tfar = avxf(isect->t);
- /* Shadow ray early termination. */
- if (visibility == PATH_RAY_SHADOW_OPAQUE) {
- return true;
- }
- }
- } //for
- }
- else {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- if (triangle_intersect8(kg,
- &isect,
- P,
- dir,
- visibility,
- object,
- prim_addr,
- prim_count,
- 0,
- 0,
- NULL,
- 0.0f)) {
- tfar = avxf(isect->t);
- if (visibility == PATH_RAY_SHADOW_OPAQUE) {
- return true;
- }
- }
- } //prim count
- break;
- }
-#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- for (; prim_addr < prim_addr2; prim_addr++) {
- BVH_DEBUG_NEXT_INTERSECTION();
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- if (motion_triangle_intersect(
- kg, isect, P, dir, ray->time, visibility, object, prim_addr)) {
- tfar = avxf(isect->t);
- /* Shadow ray early termination. */
- if (visibility == PATH_RAY_SHADOW_OPAQUE) {
- return true;
- }
- }
- }
- break;
- }
-#endif /* BVH_FEATURE(BVH_MOTION) */
-#if BVH_FEATURE(BVH_HAIR)
- case PRIMITIVE_CURVE:
- case PRIMITIVE_MOTION_CURVE: {
- for (; prim_addr < prim_addr2; prim_addr++) {
- BVH_DEBUG_NEXT_INTERSECTION();
- const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
- kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL));
- bool hit;
- if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
- hit = cardinal_curve_intersect(kg,
- isect,
- P,
- dir,
- visibility,
- object,
- prim_addr,
- ray->time,
- curve_type,
- lcg_state,
- difl,
- extmax);
- }
- else {
- hit = curve_intersect(kg,
- isect,
- P,
- dir,
- visibility,
- object,
- prim_addr,
- ray->time,
- curve_type,
- lcg_state,
- difl,
- extmax);
- }
- if (hit) {
- tfar = avxf(isect->t);
- /* Shadow ray early termination. */
- if (visibility == PATH_RAY_SHADOW_OPAQUE) {
- return true;
- }
- }
- }
- break;
- }
-#endif /* BVH_FEATURE(BVH_HAIR) */
- }
- }
-#if BVH_FEATURE(BVH_INSTANCING)
- else {
- /* Instance push. */
- object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
-
-# if BVH_FEATURE(BVH_MOTION)
- qbvh_instance_motion_push(
- kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist, &ob_itfm);
-# else
- qbvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist);
-# endif
-
- obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- tfar = avxf(isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-# endif
- idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P * idir;
- P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
-# endif
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
- traversal_stack[stack_ptr].dist = -FLT_MAX;
-
- node_addr = kernel_tex_fetch(__object_node, object);
-
- BVH_DEBUG_NEXT_INSTANCE();
- }
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if (stack_ptr >= 0) {
- kernel_assert(object != OBJECT_NONE);
-
- /* Instance pop. */
-# if BVH_FEATURE(BVH_MOTION)
- isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
-# else
- isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
-# endif
-
- obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- tfar = avxf(isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-# endif
- idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P * idir;
- P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
-# endif
-
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
- return (isect->prim != PRIM_NONE);
-}
-
-#undef NODE_INTERSECT
-#undef NODE_INTERSECT_ROBUST
diff --git a/intern/cycles/kernel/bvh/obvh_volume.h b/intern/cycles/kernel/bvh/obvh_volume.h
deleted file mode 100644
index fb41ae783ab..00000000000
--- a/intern/cycles/kernel/bvh/obvh_volume.h
+++ /dev/null
@@ -1,480 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This is a template BVH traversal function for volumes, where
- * various features can be enabled/disabled. This way we can compile optimized
- * versions for each case without new features slowing things down.
- *
- * BVH_INSTANCING: object instancing
- * BVH_MOTION: motion blur rendering
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-# define NODE_INTERSECT obvh_node_intersect
-#else
-# define NODE_INTERSECT obvh_aligned_node_intersect
-#endif
-
-ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
- const Ray *ray,
- Intersection *isect,
- const uint visibility)
-{
- /* Traversal stack in CUDA thread-local memory. */
- OBVHStackItem traversal_stack[BVH_OSTACK_SIZE];
- traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
-
- /* Traversal variables in registers. */
- int stack_ptr = 0;
- int node_addr = kernel_data.bvh.root;
-
- /* Ray parameters in registers. */
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
-
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
-#endif
-
- isect->t = ray->t;
- isect->u = 0.0f;
- isect->v = 0.0f;
- isect->prim = PRIM_NONE;
- isect->object = OBJECT_NONE;
-
- avxf tnear(0.0f), tfar(ray->t);
-#if BVH_FEATURE(BVH_HAIR)
- avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-#endif
- avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-
-#ifdef __KERNEL_AVX2__
- float3 P_idir = P * idir;
- avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- avx3f org4(avxf(P.x), avxf(P.y), avxf(P.z));
-#endif
-
- /* Offsets to select the side that becomes the lower or upper bound. */
- int near_x, near_y, near_z;
- int far_x, far_y, far_z;
- obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-
- /* Traversal loop. */
- do {
- do {
- /* Traverse internal nodes. */
- while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
- float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-
-#ifdef __VISIBILITY_FLAG__
- if ((__float_as_uint(inodes.x) & visibility) == 0) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-#endif
-
- avxf dist;
- int child_mask = NODE_INTERSECT(kg,
- tnear,
- tfar,
-#ifdef __KERNEL_AVX2__
- P_idir4,
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4,
-#endif
-#if BVH_FEATURE(BVH_HAIR)
- dir4,
-#endif
- idir4,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- &dist);
-
- if (child_mask != 0) {
- avxf cnodes;
-#if BVH_FEATURE(BVH_HAIR)
- if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
- cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 26);
- }
- else
-#endif
- {
- cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 14);
- }
-
- /* One child is hit, continue with that child. */
- int r = __bscf(child_mask);
- if (child_mask == 0) {
- node_addr = __float_as_int(cnodes[r]);
- continue;
- }
-
- /* Two children are hit, push far child, and continue with
- * closer child.
- */
- int c0 = __float_as_int(cnodes[r]);
- float d0 = ((float *)&dist)[r];
- r = __bscf(child_mask);
- int c1 = __float_as_int(cnodes[r]);
- float d1 = ((float *)&dist)[r];
- if (child_mask == 0) {
- if (d1 < d0) {
- node_addr = c1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
- continue;
- }
- else {
- node_addr = c0;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- continue;
- }
- }
-
- /* Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there.
- */
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
-
- /* Three children are hit, push all onto stack and sort 3
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c2 = __float_as_int(cnodes[r]);
- float d2 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Four children are hit, push all onto stack and sort 4
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c3 = __float_as_int(cnodes[r]);
- float d3 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
-
- /* Five children are hit, push all onto stack and sort 5
- * stack items, continue with closest child
- */
- r = __bscf(child_mask);
- int c4 = __float_as_int(cnodes[r]);
- float d4 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Six children are hit, push all onto stack and sort 6
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c5 = __float_as_int(cnodes[r]);
- float d5 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c5;
- traversal_stack[stack_ptr].dist = d5;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c5;
- traversal_stack[stack_ptr].dist = d5;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
-
- /* Seven children are hit, push all onto stack and sort 7
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c6 = __float_as_int(cnodes[r]);
- float d6 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c6;
- traversal_stack[stack_ptr].dist = d6;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5],
- &traversal_stack[stack_ptr - 6]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Eight children are hit, push all onto stack and sort 8
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c7 = __float_as_int(cnodes[r]);
- float d7 = ((float *)&dist)[r];
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c7;
- traversal_stack[stack_ptr].dist = d7;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c6;
- traversal_stack[stack_ptr].dist = d6;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5],
- &traversal_stack[stack_ptr - 6],
- &traversal_stack[stack_ptr - 7]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-
- /* If node is leaf, fetch triangle list. */
- if (node_addr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
-
- if ((__float_as_uint(leaf.z) & visibility) == 0) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- int prim_addr = __float_as_int(leaf.x);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if (prim_addr >= 0) {
-#endif
- int prim_addr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
- const uint p_type = type & PRIMITIVE_ALL;
-
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
-
- /* Primitive intersection. */
- switch (p_type) {
- case PRIMITIVE_TRIANGLE: {
- for (; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- /* Only primitives from volume object. */
- uint tri_object = (object == OBJECT_NONE) ?
- kernel_tex_fetch(__prim_object, prim_addr) :
- object;
- int object_flag = kernel_tex_fetch(__object_flag, tri_object);
- if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
- continue;
- }
- /* Intersect ray against primitive. */
- triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr);
- }
- break;
- }
-#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- for (; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- /* Only primitives from volume object. */
- uint tri_object = (object == OBJECT_NONE) ?
- kernel_tex_fetch(__prim_object, prim_addr) :
- object;
- int object_flag = kernel_tex_fetch(__object_flag, tri_object);
- if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
- continue;
- }
- /* Intersect ray against primitive. */
- motion_triangle_intersect(
- kg, isect, P, dir, ray->time, visibility, object, prim_addr);
- }
- break;
- }
-#endif
- }
- }
-#if BVH_FEATURE(BVH_INSTANCING)
- else {
- /* Instance push. */
- object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
- int object_flag = kernel_tex_fetch(__object_flag, object);
- if (object_flag & SD_OBJECT_HAS_VOLUME) {
-# if BVH_FEATURE(BVH_MOTION)
- isect->t = bvh_instance_motion_push(
- kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
-# else
- isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
-# endif
-
- obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- tfar = avxf(isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-# endif
- idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P * idir;
- P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
-# endif
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
-
- node_addr = kernel_tex_fetch(__object_node, object);
- }
- else {
- /* Pop. */
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
- }
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if (stack_ptr >= 0) {
- kernel_assert(object != OBJECT_NONE);
-
- /* Instance pop. */
-# if BVH_FEATURE(BVH_MOTION)
- isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
-# else
- isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
-# endif
-
- obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- tfar = avxf(isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-# endif
- idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P * idir;
- P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
-# endif
-
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
- return (isect->prim != PRIM_NONE);
-}
-
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/obvh_volume_all.h b/intern/cycles/kernel/bvh/obvh_volume_all.h
deleted file mode 100644
index 56e2afd4a11..00000000000
--- a/intern/cycles/kernel/bvh/obvh_volume_all.h
+++ /dev/null
@@ -1,551 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This is a template BVH traversal function for volumes, where
- * various features can be enabled/disabled. This way we can compile optimized
- * versions for each case without new features slowing things down.
- *
- * BVH_INSTANCING: object instancing
- * BVH_MOTION: motion blur rendering
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-# define NODE_INTERSECT obvh_node_intersect
-#else
-# define NODE_INTERSECT obvh_aligned_node_intersect
-#endif
-
-ccl_device uint BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
- const Ray *ray,
- Intersection *isect_array,
- const uint max_hits,
- const uint visibility)
-{
- /* Traversal stack in CUDA thread-local memory. */
- OBVHStackItem traversal_stack[BVH_OSTACK_SIZE];
- traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
-
- /* Traversal variables in registers. */
- int stack_ptr = 0;
- int node_addr = kernel_data.bvh.root;
-
- /* Ray parameters in registers. */
- const float tmax = ray->t;
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
- float isect_t = tmax;
-
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
-#endif
-
- uint num_hits = 0;
- isect_array->t = tmax;
-
-#if BVH_FEATURE(BVH_INSTANCING)
- int num_hits_in_instance = 0;
-#endif
-
- avxf tnear(0.0f), tfar(isect_t);
-#if BVH_FEATURE(BVH_HAIR)
- avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-#endif
- avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-
-#ifdef __KERNEL_AVX2__
- float3 P_idir = P * idir;
- avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- avx3f org4(avxf(P.x), avxf(P.y), avxf(P.z));
-#endif
-
- /* Offsets to select the side that becomes the lower or upper bound. */
- int near_x, near_y, near_z;
- int far_x, far_y, far_z;
- obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-
- /* Traversal loop. */
- do {
- do {
- /* Traverse internal nodes. */
- while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
- float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-
-#ifdef __VISIBILITY_FLAG__
- if ((__float_as_uint(inodes.x) & visibility) == 0) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-#endif
-
- avxf dist;
- int child_mask = NODE_INTERSECT(kg,
- tnear,
- tfar,
-#ifdef __KERNEL_AVX2__
- P_idir4,
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4,
-#endif
-#if BVH_FEATURE(BVH_HAIR)
- dir4,
-#endif
- idir4,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- &dist);
-
- if (child_mask != 0) {
- avxf cnodes;
-#if BVH_FEATURE(BVH_HAIR)
- if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
- cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 26);
- }
- else
-#endif
- {
- cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 14);
- }
-
- /* One child is hit, continue with that child. */
- int r = __bscf(child_mask);
- if (child_mask == 0) {
- node_addr = __float_as_int(cnodes[r]);
- continue;
- }
-
- /* Two children are hit, push far child, and continue with
- * closer child.
- */
- int c0 = __float_as_int(cnodes[r]);
- float d0 = ((float *)&dist)[r];
- r = __bscf(child_mask);
- int c1 = __float_as_int(cnodes[r]);
- float d1 = ((float *)&dist)[r];
- if (child_mask == 0) {
- if (d1 < d0) {
- node_addr = c1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
- continue;
- }
- else {
- node_addr = c0;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- continue;
- }
- }
-
- /* Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there.
- */
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
-
- /* Three children are hit, push all onto stack and sort 3
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c2 = __float_as_int(cnodes[r]);
- float d2 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Four children are hit, push all onto stack and sort 4
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c3 = __float_as_int(cnodes[r]);
- float d3 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
-
- /* Five children are hit, push all onto stack and sort 5
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c4 = __float_as_int(cnodes[r]);
- float d4 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Six children are hit, push all onto stack and sort 6
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c5 = __float_as_int(cnodes[r]);
- float d5 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c5;
- traversal_stack[stack_ptr].dist = d5;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c5;
- traversal_stack[stack_ptr].dist = d5;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
-
- /* Seven children are hit, push all onto stack and sort 7
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c6 = __float_as_int(cnodes[r]);
- float d6 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c6;
- traversal_stack[stack_ptr].dist = d6;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5],
- &traversal_stack[stack_ptr - 6]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Eight children are hit, push all onto stack and sort 8
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c7 = __float_as_int(cnodes[r]);
- float d7 = ((float *)&dist)[r];
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c7;
- traversal_stack[stack_ptr].dist = d7;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c6;
- traversal_stack[stack_ptr].dist = d6;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5],
- &traversal_stack[stack_ptr - 6],
- &traversal_stack[stack_ptr - 7]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-
- /* If node is leaf, fetch triangle list. */
- if (node_addr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
-
- if ((__float_as_uint(leaf.z) & visibility) == 0) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- int prim_addr = __float_as_int(leaf.x);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if (prim_addr >= 0) {
-#endif
- int prim_addr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
- const uint p_type = type & PRIMITIVE_ALL;
- bool hit;
-
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
-
- /* Primitive intersection. */
- switch (p_type) {
- case PRIMITIVE_TRIANGLE: {
- for (; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- /* Only primitives from volume object. */
- uint tri_object = (object == OBJECT_NONE) ?
- kernel_tex_fetch(__prim_object, prim_addr) :
- object;
- int object_flag = kernel_tex_fetch(__object_flag, tri_object);
- if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
- continue;
- }
- /* Intersect ray against primitive. */
- hit = triangle_intersect(kg, isect_array, P, dir, visibility, object, prim_addr);
- if (hit) {
- /* Move on to next entry in intersections array. */
- isect_array++;
- num_hits++;
-#if BVH_FEATURE(BVH_INSTANCING)
- num_hits_in_instance++;
-#endif
- isect_array->t = isect_t;
- if (num_hits == max_hits) {
-#if BVH_FEATURE(BVH_INSTANCING)
-# if BVH_FEATURE(BVH_MOTION)
- float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
-# else
- Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
- float t_fac = 1.0f / len(transform_direction(&itfm, dir));
-# endif
- for (int i = 0; i < num_hits_in_instance; i++) {
- (isect_array - i - 1)->t *= t_fac;
- }
-#endif /* BVH_FEATURE(BVH_INSTANCING) */
- return num_hits;
- }
- }
- }
- break;
- }
-#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- for (; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- /* Only primitives from volume object. */
- uint tri_object = (object == OBJECT_NONE) ?
- kernel_tex_fetch(__prim_object, prim_addr) :
- object;
- int object_flag = kernel_tex_fetch(__object_flag, tri_object);
- if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
- continue;
- }
- /* Intersect ray against primitive. */
- hit = motion_triangle_intersect(
- kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
- if (hit) {
- /* Move on to next entry in intersections array. */
- isect_array++;
- num_hits++;
-# if BVH_FEATURE(BVH_INSTANCING)
- num_hits_in_instance++;
-# endif
- isect_array->t = isect_t;
- if (num_hits == max_hits) {
-# if BVH_FEATURE(BVH_INSTANCING)
-# if BVH_FEATURE(BVH_MOTION)
- float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
-# else
- Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
- float t_fac = 1.0f / len(transform_direction(&itfm, dir));
-# endif
- for (int i = 0; i < num_hits_in_instance; i++) {
- (isect_array - i - 1)->t *= t_fac;
- }
-# endif /* BVH_FEATURE(BVH_INSTANCING) */
- return num_hits;
- }
- }
- }
- break;
- }
-#endif
- }
- }
-#if BVH_FEATURE(BVH_INSTANCING)
- else {
- /* Instance push. */
- object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
- int object_flag = kernel_tex_fetch(__object_flag, object);
- if (object_flag & SD_OBJECT_HAS_VOLUME) {
-# if BVH_FEATURE(BVH_MOTION)
- isect_t = bvh_instance_motion_push(
- kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
-# else
- isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
-# endif
-
- obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- tfar = avxf(isect_t);
- idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-# endif
-# ifdef __KERNEL_AVX2__
- P_idir = P * idir;
- P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
-# endif
-
- num_hits_in_instance = 0;
- isect_array->t = isect_t;
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
-
- node_addr = kernel_tex_fetch(__object_node, object);
- }
- else {
- /* Pop. */
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
- }
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if (stack_ptr >= 0) {
- kernel_assert(object != OBJECT_NONE);
-
- /* Instance pop. */
- if (num_hits_in_instance) {
- float t_fac;
-# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
-# else
- bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
-# endif
- /* Scale isect->t to adjust for instancing. */
- for (int i = 0; i < num_hits_in_instance; i++) {
- (isect_array - i - 1)->t *= t_fac;
- }
- }
- else {
-# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
-# else
- bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
-# endif
- }
-
- isect_t = tmax;
- isect_array->t = isect_t;
-
- obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- tfar = avxf(isect_t);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-# endif
- idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P * idir;
- P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
-# endif
-
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
- return num_hits;
-}
-
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/qbvh_local.h b/intern/cycles/kernel/bvh/qbvh_local.h
deleted file mode 100644
index b21f79bd3a0..00000000000
--- a/intern/cycles/kernel/bvh/qbvh_local.h
+++ /dev/null
@@ -1,291 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This is a template BVH traversal function for finding local intersections
- * around the shading point, for subsurface scattering and bevel. We disable
- * various features for performance, and for instanced objects avoid traversing
- * other parts of the scene.
- *
- * BVH_MOTION: motion blur rendering
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-# define NODE_INTERSECT qbvh_node_intersect
-#else
-# define NODE_INTERSECT qbvh_aligned_node_intersect
-#endif
-
-ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
- const Ray *ray,
- LocalIntersection *local_isect,
- int local_object,
- uint *lcg_state,
- int max_hits)
-{
- /* TODO(sergey):
- * - Test if pushing distance on the stack helps (for non shadow rays).
- * - Separate version for shadow rays.
- * - Likely and unlikely for if() statements.
- * - SSE for hair.
- * - Test restrict attribute for pointers.
- */
-
- /* Traversal stack in CUDA thread-local memory. */
- QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
- traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
-
- /* Traversal variables in registers. */
- int stack_ptr = 0;
- int node_addr = kernel_tex_fetch(__object_node, local_object);
-
- /* Ray parameters in registers. */
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
- float isect_t = ray->t;
-
- if (local_isect != NULL) {
- local_isect->num_hits = 0;
- }
- kernel_assert((local_isect == NULL) == (max_hits == 0));
-
- const int object_flag = kernel_tex_fetch(__object_flag, local_object);
- if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
- isect_t = bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
-#else
- isect_t = bvh_instance_push(kg, local_object, ray, &P, &dir, &idir, isect_t);
-#endif
- object = local_object;
- }
-
- ssef tnear(0.0f), tfar(isect_t);
-#if BVH_FEATURE(BVH_HAIR)
- sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-#endif
- sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-
-#ifdef __KERNEL_AVX2__
- float3 P_idir = P * idir;
- sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
-#endif
-
- /* Offsets to select the side that becomes the lower or upper bound. */
- int near_x, near_y, near_z;
- int far_x, far_y, far_z;
- qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-
- /* Traversal loop. */
- do {
- do {
- /* Traverse internal nodes. */
- while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
- ssef dist;
- int child_mask = NODE_INTERSECT(kg,
- tnear,
- tfar,
-#ifdef __KERNEL_AVX2__
- P_idir4,
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4,
-#endif
-#if BVH_FEATURE(BVH_HAIR)
- dir4,
-#endif
- idir4,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- &dist);
-
- if (child_mask != 0) {
- float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
- float4 cnodes;
-#if BVH_FEATURE(BVH_HAIR)
- if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
- cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 13);
- }
- else
-#endif
- {
- cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 7);
- }
-
- /* One child is hit, continue with that child. */
- int r = __bscf(child_mask);
- if (child_mask == 0) {
- node_addr = __float_as_int(cnodes[r]);
- continue;
- }
-
- /* Two children are hit, push far child, and continue with
- * closer child.
- */
- int c0 = __float_as_int(cnodes[r]);
- float d0 = ((float *)&dist)[r];
- r = __bscf(child_mask);
- int c1 = __float_as_int(cnodes[r]);
- float d1 = ((float *)&dist)[r];
- if (child_mask == 0) {
- if (d1 < d0) {
- node_addr = c1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
- continue;
- }
- else {
- node_addr = c0;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- continue;
- }
- }
-
- /* Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there.
- */
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
-
- /* Three children are hit, push all onto stack and sort 3
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c2 = __float_as_int(cnodes[r]);
- float d2 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- qbvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Four children are hit, push all onto stack and sort 4
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c3 = __float_as_int(cnodes[r]);
- float d3 = ((float *)&dist)[r];
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- qbvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3]);
- }
-
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-
- /* If node is leaf, fetch triangle list. */
- if (node_addr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
- int prim_addr = __float_as_int(leaf.x);
-
- int prim_addr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
-
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
-
- /* Primitive intersection. */
- switch (type & PRIMITIVE_ALL) {
- case PRIMITIVE_TRIANGLE: {
- /* Intersect ray against primitive, */
- for (; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- if (triangle_intersect_local(kg,
- local_isect,
- P,
- dir,
- object,
- local_object,
- prim_addr,
- isect_t,
- lcg_state,
- max_hits)) {
- return true;
- }
- }
- break;
- }
-#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- /* Intersect ray against primitive. */
- for (; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- if (motion_triangle_intersect_local(kg,
- local_isect,
- P,
- dir,
- ray->time,
- object,
- local_object,
- prim_addr,
- isect_t,
- lcg_state,
- max_hits)) {
- return true;
- }
- }
- break;
- }
-#endif
- default:
- break;
- }
- }
- } while (node_addr != ENTRYPOINT_SENTINEL);
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
- return false;
-}
-
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/qbvh_nodes.h b/intern/cycles/kernel/bvh/qbvh_nodes.h
deleted file mode 100644
index 7c1d8c8c72e..00000000000
--- a/intern/cycles/kernel/bvh/qbvh_nodes.h
+++ /dev/null
@@ -1,516 +0,0 @@
-/*
- * Copyright 2011-2014, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * Aligned nodes intersection SSE code is adopted from Embree,
- */
-
-struct QBVHStackItem {
- int addr;
- float dist;
-};
-
-ccl_device_inline void qbvh_near_far_idx_calc(const float3 &idir,
- int *ccl_restrict near_x,
- int *ccl_restrict near_y,
- int *ccl_restrict near_z,
- int *ccl_restrict far_x,
- int *ccl_restrict far_y,
- int *ccl_restrict far_z)
-
-{
-#ifdef __KERNEL_SSE__
- *near_x = 0;
- *far_x = 1;
- *near_y = 2;
- *far_y = 3;
- *near_z = 4;
- *far_z = 5;
-
- const size_t mask = movemask(ssef(idir.m128));
-
- const int mask_x = mask & 1;
- const int mask_y = (mask & 2) >> 1;
- const int mask_z = (mask & 4) >> 2;
-
- *near_x += mask_x;
- *far_x -= mask_x;
- *near_y += mask_y;
- *far_y -= mask_y;
- *near_z += mask_z;
- *far_z -= mask_z;
-#else
- if (idir.x >= 0.0f) {
- *near_x = 0;
- *far_x = 1;
- }
- else {
- *near_x = 1;
- *far_x = 0;
- }
- if (idir.y >= 0.0f) {
- *near_y = 2;
- *far_y = 3;
- }
- else {
- *near_y = 3;
- *far_y = 2;
- }
- if (idir.z >= 0.0f) {
- *near_z = 4;
- *far_z = 5;
- }
- else {
- *near_z = 5;
- *far_z = 4;
- }
-#endif
-}
-
-/* TOOD(sergey): Investigate if using intrinsics helps for both
- * stack item swap and float comparison.
- */
-ccl_device_inline void qbvh_item_swap(QBVHStackItem *ccl_restrict a, QBVHStackItem *ccl_restrict b)
-{
- QBVHStackItem tmp = *a;
- *a = *b;
- *b = tmp;
-}
-
-ccl_device_inline void qbvh_stack_sort(QBVHStackItem *ccl_restrict s1,
- QBVHStackItem *ccl_restrict s2,
- QBVHStackItem *ccl_restrict s3)
-{
- if (s2->dist < s1->dist) {
- qbvh_item_swap(s2, s1);
- }
- if (s3->dist < s2->dist) {
- qbvh_item_swap(s3, s2);
- }
- if (s2->dist < s1->dist) {
- qbvh_item_swap(s2, s1);
- }
-}
-
-ccl_device_inline void qbvh_stack_sort(QBVHStackItem *ccl_restrict s1,
- QBVHStackItem *ccl_restrict s2,
- QBVHStackItem *ccl_restrict s3,
- QBVHStackItem *ccl_restrict s4)
-{
- if (s2->dist < s1->dist) {
- qbvh_item_swap(s2, s1);
- }
- if (s4->dist < s3->dist) {
- qbvh_item_swap(s4, s3);
- }
- if (s3->dist < s1->dist) {
- qbvh_item_swap(s3, s1);
- }
- if (s4->dist < s2->dist) {
- qbvh_item_swap(s4, s2);
- }
- if (s3->dist < s2->dist) {
- qbvh_item_swap(s3, s2);
- }
-}
-
-/* Axis-aligned nodes intersection */
-
-//ccl_device_inline int qbvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg,
-static int qbvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg,
- const ssef &isect_near,
- const ssef &isect_far,
-#ifdef __KERNEL_AVX2__
- const sse3f &org_idir,
-#else
- const sse3f &org,
-#endif
- const sse3f &idir,
- const int near_x,
- const int near_y,
- const int near_z,
- const int far_x,
- const int far_y,
- const int far_z,
- const int node_addr,
- ssef *ccl_restrict dist)
-{
- const int offset = node_addr + 1;
-#ifdef __KERNEL_AVX2__
- const ssef tnear_x = msub(
- kernel_tex_fetch_ssef(__bvh_nodes, offset + near_x), idir.x, org_idir.x);
- const ssef tnear_y = msub(
- kernel_tex_fetch_ssef(__bvh_nodes, offset + near_y), idir.y, org_idir.y);
- const ssef tnear_z = msub(
- kernel_tex_fetch_ssef(__bvh_nodes, offset + near_z), idir.z, org_idir.z);
- const ssef tfar_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + far_x), idir.x, org_idir.x);
- const ssef tfar_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + far_y), idir.y, org_idir.y);
- const ssef tfar_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + far_z), idir.z, org_idir.z);
-#else
- const ssef tnear_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset + near_x) - org.x) * idir.x;
- const ssef tnear_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset + near_y) - org.y) * idir.y;
- const ssef tnear_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset + near_z) - org.z) * idir.z;
- const ssef tfar_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset + far_x) - org.x) * idir.x;
- const ssef tfar_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset + far_y) - org.y) * idir.y;
- const ssef tfar_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset + far_z) - org.z) * idir.z;
-#endif
-
-#ifdef __KERNEL_SSE41__
- const ssef tnear = maxi(maxi(tnear_x, tnear_y), maxi(tnear_z, isect_near));
- const ssef tfar = mini(mini(tfar_x, tfar_y), mini(tfar_z, isect_far));
- const sseb vmask = cast(tnear) > cast(tfar);
- int mask = (int)movemask(vmask) ^ 0xf;
-#else
- const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
- const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
- const sseb vmask = tnear <= tfar;
- int mask = (int)movemask(vmask);
-#endif
- *dist = tnear;
- return mask;
-}
-
-ccl_device_inline int qbvh_aligned_node_intersect_robust(KernelGlobals *ccl_restrict kg,
- const ssef &isect_near,
- const ssef &isect_far,
-#ifdef __KERNEL_AVX2__
- const sse3f &P_idir,
-#else
- const sse3f &P,
-#endif
- const sse3f &idir,
- const int near_x,
- const int near_y,
- const int near_z,
- const int far_x,
- const int far_y,
- const int far_z,
- const int node_addr,
- const float difl,
- ssef *ccl_restrict dist)
-{
- const int offset = node_addr + 1;
-#ifdef __KERNEL_AVX2__
- const ssef tnear_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + near_x), idir.x, P_idir.x);
- const ssef tnear_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + near_y), idir.y, P_idir.y);
- const ssef tnear_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + near_z), idir.z, P_idir.z);
- const ssef tfar_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + far_x), idir.x, P_idir.x);
- const ssef tfar_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + far_y), idir.y, P_idir.y);
- const ssef tfar_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + far_z), idir.z, P_idir.z);
-#else
- const ssef tnear_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset + near_x) - P.x) * idir.x;
- const ssef tnear_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset + near_y) - P.y) * idir.y;
- const ssef tnear_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset + near_z) - P.z) * idir.z;
- const ssef tfar_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset + far_x) - P.x) * idir.x;
- const ssef tfar_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset + far_y) - P.y) * idir.y;
- const ssef tfar_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset + far_z) - P.z) * idir.z;
-#endif
-
- const float round_down = 1.0f - difl;
- const float round_up = 1.0f + difl;
- const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
- const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
- const sseb vmask = round_down * tnear <= round_up * tfar;
- *dist = tnear;
- return (int)movemask(vmask);
-}
-
-/* Unaligned nodes intersection */
-
-ccl_device_inline int qbvh_unaligned_node_intersect(KernelGlobals *ccl_restrict kg,
- const ssef &isect_near,
- const ssef &isect_far,
-#ifdef __KERNEL_AVX2__
- const sse3f &org_idir,
-#endif
- const sse3f &org,
- const sse3f &dir,
- const sse3f &idir,
- const int near_x,
- const int near_y,
- const int near_z,
- const int far_x,
- const int far_y,
- const int far_z,
- const int node_addr,
- ssef *ccl_restrict dist)
-{
- const int offset = node_addr;
- const ssef tfm_x_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 1);
- const ssef tfm_x_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 2);
- const ssef tfm_x_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 3);
-
- const ssef tfm_y_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 4);
- const ssef tfm_y_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 5);
- const ssef tfm_y_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 6);
-
- const ssef tfm_z_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 7);
- const ssef tfm_z_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 8);
- const ssef tfm_z_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 9);
-
- const ssef tfm_t_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 10);
- const ssef tfm_t_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 11);
- const ssef tfm_t_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 12);
-
- const ssef aligned_dir_x = dir.x * tfm_x_x + dir.y * tfm_x_y + dir.z * tfm_x_z,
- aligned_dir_y = dir.x * tfm_y_x + dir.y * tfm_y_y + dir.z * tfm_y_z,
- aligned_dir_z = dir.x * tfm_z_x + dir.y * tfm_z_y + dir.z * tfm_z_z;
-
- const ssef aligned_P_x = org.x * tfm_x_x + org.y * tfm_x_y + org.z * tfm_x_z + tfm_t_x,
- aligned_P_y = org.x * tfm_y_x + org.y * tfm_y_y + org.z * tfm_y_z + tfm_t_y,
- aligned_P_z = org.x * tfm_z_x + org.y * tfm_z_y + org.z * tfm_z_z + tfm_t_z;
-
- const ssef neg_one(-1.0f, -1.0f, -1.0f, -1.0f);
- const ssef nrdir_x = neg_one / aligned_dir_x, nrdir_y = neg_one / aligned_dir_y,
- nrdir_z = neg_one / aligned_dir_z;
-
- const ssef tlower_x = aligned_P_x * nrdir_x, tlower_y = aligned_P_y * nrdir_y,
- tlower_z = aligned_P_z * nrdir_z;
-
- const ssef tupper_x = tlower_x - nrdir_x, tupper_y = tlower_y - nrdir_y,
- tupper_z = tlower_z - nrdir_z;
-
-#ifdef __KERNEL_SSE41__
- const ssef tnear_x = mini(tlower_x, tupper_x);
- const ssef tnear_y = mini(tlower_y, tupper_y);
- const ssef tnear_z = mini(tlower_z, tupper_z);
- const ssef tfar_x = maxi(tlower_x, tupper_x);
- const ssef tfar_y = maxi(tlower_y, tupper_y);
- const ssef tfar_z = maxi(tlower_z, tupper_z);
- const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
- const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
- const sseb vmask = tnear <= tfar;
- *dist = tnear;
- return movemask(vmask);
-#else
- const ssef tnear_x = min(tlower_x, tupper_x);
- const ssef tnear_y = min(tlower_y, tupper_y);
- const ssef tnear_z = min(tlower_z, tupper_z);
- const ssef tfar_x = max(tlower_x, tupper_x);
- const ssef tfar_y = max(tlower_y, tupper_y);
- const ssef tfar_z = max(tlower_z, tupper_z);
- const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
- const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
- const sseb vmask = tnear <= tfar;
- *dist = tnear;
- return movemask(vmask);
-#endif
-}
-
-ccl_device_inline int qbvh_unaligned_node_intersect_robust(KernelGlobals *ccl_restrict kg,
- const ssef &isect_near,
- const ssef &isect_far,
-#ifdef __KERNEL_AVX2__
- const sse3f &P_idir,
-#endif
- const sse3f &P,
- const sse3f &dir,
- const sse3f &idir,
- const int near_x,
- const int near_y,
- const int near_z,
- const int far_x,
- const int far_y,
- const int far_z,
- const int node_addr,
- const float difl,
- ssef *ccl_restrict dist)
-{
- const int offset = node_addr;
- const ssef tfm_x_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 1);
- const ssef tfm_x_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 2);
- const ssef tfm_x_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 3);
-
- const ssef tfm_y_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 4);
- const ssef tfm_y_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 5);
- const ssef tfm_y_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 6);
-
- const ssef tfm_z_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 7);
- const ssef tfm_z_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 8);
- const ssef tfm_z_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 9);
-
- const ssef tfm_t_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 10);
- const ssef tfm_t_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 11);
- const ssef tfm_t_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 12);
-
- const ssef aligned_dir_x = dir.x * tfm_x_x + dir.y * tfm_x_y + dir.z * tfm_x_z,
- aligned_dir_y = dir.x * tfm_y_x + dir.y * tfm_y_y + dir.z * tfm_y_z,
- aligned_dir_z = dir.x * tfm_z_x + dir.y * tfm_z_y + dir.z * tfm_z_z;
-
- const ssef aligned_P_x = P.x * tfm_x_x + P.y * tfm_x_y + P.z * tfm_x_z + tfm_t_x,
- aligned_P_y = P.x * tfm_y_x + P.y * tfm_y_y + P.z * tfm_y_z + tfm_t_y,
- aligned_P_z = P.x * tfm_z_x + P.y * tfm_z_y + P.z * tfm_z_z + tfm_t_z;
-
- const ssef neg_one(-1.0f, -1.0f, -1.0f, -1.0f);
- const ssef nrdir_x = neg_one / aligned_dir_x, nrdir_y = neg_one / aligned_dir_y,
- nrdir_z = neg_one / aligned_dir_z;
-
- const ssef tlower_x = aligned_P_x * nrdir_x, tlower_y = aligned_P_y * nrdir_y,
- tlower_z = aligned_P_z * nrdir_z;
-
- const ssef tupper_x = tlower_x - nrdir_x, tupper_y = tlower_y - nrdir_y,
- tupper_z = tlower_z - nrdir_z;
-
- const float round_down = 1.0f - difl;
- const float round_up = 1.0f + difl;
-
-#ifdef __KERNEL_SSE41__
- const ssef tnear_x = mini(tlower_x, tupper_x);
- const ssef tnear_y = mini(tlower_y, tupper_y);
- const ssef tnear_z = mini(tlower_z, tupper_z);
- const ssef tfar_x = maxi(tlower_x, tupper_x);
- const ssef tfar_y = maxi(tlower_y, tupper_y);
- const ssef tfar_z = maxi(tlower_z, tupper_z);
-#else
- const ssef tnear_x = min(tlower_x, tupper_x);
- const ssef tnear_y = min(tlower_y, tupper_y);
- const ssef tnear_z = min(tlower_z, tupper_z);
- const ssef tfar_x = max(tlower_x, tupper_x);
- const ssef tfar_y = max(tlower_y, tupper_y);
- const ssef tfar_z = max(tlower_z, tupper_z);
-#endif
- const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
- const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
- const sseb vmask = round_down * tnear <= round_up * tfar;
- *dist = tnear;
- return movemask(vmask);
-}
-
-/* Intersectors wrappers.
- *
- * They'll check node type and call appropriate intersection code.
- */
-
-ccl_device_inline int qbvh_node_intersect(KernelGlobals *ccl_restrict kg,
- const ssef &isect_near,
- const ssef &isect_far,
-#ifdef __KERNEL_AVX2__
- const sse3f &org_idir,
-#endif
- const sse3f &org,
- const sse3f &dir,
- const sse3f &idir,
- const int near_x,
- const int near_y,
- const int near_z,
- const int far_x,
- const int far_y,
- const int far_z,
- const int node_addr,
- ssef *ccl_restrict dist)
-{
- const int offset = node_addr;
- const float4 node = kernel_tex_fetch(__bvh_nodes, offset);
- if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
- return qbvh_unaligned_node_intersect(kg,
- isect_near,
- isect_far,
-#ifdef __KERNEL_AVX2__
- org_idir,
-#endif
- org,
- dir,
- idir,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- dist);
- }
- else {
- return qbvh_aligned_node_intersect(kg,
- isect_near,
- isect_far,
-#ifdef __KERNEL_AVX2__
- org_idir,
-#else
- org,
-#endif
- idir,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- dist);
- }
-}
-
-ccl_device_inline int qbvh_node_intersect_robust(KernelGlobals *ccl_restrict kg,
- const ssef &isect_near,
- const ssef &isect_far,
-#ifdef __KERNEL_AVX2__
- const sse3f &P_idir,
-#endif
- const sse3f &P,
- const sse3f &dir,
- const sse3f &idir,
- const int near_x,
- const int near_y,
- const int near_z,
- const int far_x,
- const int far_y,
- const int far_z,
- const int node_addr,
- const float difl,
- ssef *ccl_restrict dist)
-{
- const int offset = node_addr;
- const float4 node = kernel_tex_fetch(__bvh_nodes, offset);
- if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
- return qbvh_unaligned_node_intersect_robust(kg,
- isect_near,
- isect_far,
-#ifdef __KERNEL_AVX2__
- P_idir,
-#endif
- P,
- dir,
- idir,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- difl,
- dist);
- }
- else {
- return qbvh_aligned_node_intersect_robust(kg,
- isect_near,
- isect_far,
-#ifdef __KERNEL_AVX2__
- P_idir,
-#else
- P,
-#endif
- idir,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- difl,
- dist);
- }
-}
diff --git a/intern/cycles/kernel/bvh/qbvh_shadow_all.h b/intern/cycles/kernel/bvh/qbvh_shadow_all.h
deleted file mode 100644
index 49e607bfbd0..00000000000
--- a/intern/cycles/kernel/bvh/qbvh_shadow_all.h
+++ /dev/null
@@ -1,459 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This is a template BVH traversal function, where various features can be
- * enabled/disabled. This way we can compile optimized versions for each case
- * without new features slowing things down.
- *
- * BVH_INSTANCING: object instancing
- * BVH_HAIR: hair curve rendering
- * BVH_MOTION: motion blur rendering
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-# define NODE_INTERSECT qbvh_node_intersect
-#else
-# define NODE_INTERSECT qbvh_aligned_node_intersect
-#endif
-
-ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
- const Ray *ray,
- Intersection *isect_array,
- const uint visibility,
- const uint max_hits,
- uint *num_hits)
-{
- /* TODO(sergey):
- * - Test if pushing distance on the stack helps.
- * - Likely and unlikely for if() statements.
- * - Test restrict attribute for pointers.
- */
-
- /* Traversal stack in CUDA thread-local memory. */
- QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
- traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
-
- /* Traversal variables in registers. */
- int stack_ptr = 0;
- int node_addr = kernel_data.bvh.root;
-
- /* Ray parameters in registers. */
- const float tmax = ray->t;
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
- float isect_t = tmax;
-
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
-#endif
-
- *num_hits = 0;
- isect_array->t = tmax;
-
-#if BVH_FEATURE(BVH_INSTANCING)
- int num_hits_in_instance = 0;
-#endif
-
- ssef tnear(0.0f), tfar(isect_t);
-#if BVH_FEATURE(BVH_HAIR)
- sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-#endif
- sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-
-#ifdef __KERNEL_AVX2__
- float3 P_idir = P * idir;
- sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
-#endif
-
- /* Offsets to select the side that becomes the lower or upper bound. */
- int near_x, near_y, near_z;
- int far_x, far_y, far_z;
- qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-
- /* Traversal loop. */
- do {
- do {
- /* Traverse internal nodes. */
- while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
- float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
- (void)inodes;
-
- if (false
-#ifdef __VISIBILITY_FLAG__
- || ((__float_as_uint(inodes.x) & visibility) == 0)
-#endif
-#if BVH_FEATURE(BVH_MOTION)
- || UNLIKELY(ray->time < inodes.y) || UNLIKELY(ray->time > inodes.z)
-#endif
- ) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- ssef dist;
- int child_mask = NODE_INTERSECT(kg,
- tnear,
- tfar,
-#ifdef __KERNEL_AVX2__
- P_idir4,
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4,
-#endif
-#if BVH_FEATURE(BVH_HAIR)
- dir4,
-#endif
- idir4,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- &dist);
-
- if (child_mask != 0) {
- float4 cnodes;
-#if BVH_FEATURE(BVH_HAIR)
- if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
- cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 13);
- }
- else
-#endif
- {
- cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 7);
- }
-
- /* One child is hit, continue with that child. */
- int r = __bscf(child_mask);
- if (child_mask == 0) {
- node_addr = __float_as_int(cnodes[r]);
- continue;
- }
-
- /* Two children are hit, push far child, and continue with
- * closer child.
- */
- int c0 = __float_as_int(cnodes[r]);
- float d0 = ((float *)&dist)[r];
- r = __bscf(child_mask);
- int c1 = __float_as_int(cnodes[r]);
- float d1 = ((float *)&dist)[r];
- if (child_mask == 0) {
- if (d1 < d0) {
- node_addr = c1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
- continue;
- }
- else {
- node_addr = c0;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- continue;
- }
- }
-
- /* Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there.
- */
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
-
- /* Three children are hit, push all onto stack and sort 3
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c2 = __float_as_int(cnodes[r]);
- float d2 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- qbvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Four children are hit, push all onto stack and sort 4
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c3 = __float_as_int(cnodes[r]);
- float d3 = ((float *)&dist)[r];
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- qbvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3]);
- }
-
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-
- /* If node is leaf, fetch triangle list. */
- if (node_addr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
-#ifdef __VISIBILITY_FLAG__
- if ((__float_as_uint(leaf.z) & visibility) == 0) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-#endif
-
- int prim_addr = __float_as_int(leaf.x);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if (prim_addr >= 0) {
-#endif
- int prim_addr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
- const uint p_type = type & PRIMITIVE_ALL;
-
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
-
- /* Primitive intersection. */
- while (prim_addr < prim_addr2) {
- kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type);
- bool hit;
-
- /* todo: specialized intersect functions which don't fill in
- * isect unless needed and check SD_HAS_TRANSPARENT_SHADOW?
- * might give a few % performance improvement */
-
- switch (p_type) {
- case PRIMITIVE_TRIANGLE: {
- hit = triangle_intersect(kg, isect_array, P, dir, visibility, object, prim_addr);
- break;
- }
-#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- hit = motion_triangle_intersect(
- kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
- break;
- }
-#endif
-#if BVH_FEATURE(BVH_HAIR)
- case PRIMITIVE_CURVE:
- case PRIMITIVE_MOTION_CURVE: {
- const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
- if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
- hit = cardinal_curve_intersect(kg,
- isect_array,
- P,
- dir,
- visibility,
- object,
- prim_addr,
- ray->time,
- curve_type,
- NULL,
- 0,
- 0);
- }
- else {
- hit = curve_intersect(kg,
- isect_array,
- P,
- dir,
- visibility,
- object,
- prim_addr,
- ray->time,
- curve_type,
- NULL,
- 0,
- 0);
- }
- break;
- }
-#endif
- default: {
- hit = false;
- break;
- }
- }
-
- /* Shadow ray early termination. */
- if (hit) {
- /* detect if this surface has a shader with transparent shadows */
-
- /* todo: optimize so primitive visibility flag indicates if
- * the primitive has a transparent shadow shader? */
- int prim = kernel_tex_fetch(__prim_index, isect_array->prim);
- int shader = 0;
-
-#ifdef __HAIR__
- if (kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
-#endif
- {
- shader = kernel_tex_fetch(__tri_shader, prim);
- }
-#ifdef __HAIR__
- else {
- float4 str = kernel_tex_fetch(__curves, prim);
- shader = __float_as_int(str.z);
- }
-#endif
- int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
-
- /* if no transparent shadows, all light is blocked */
- if (!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
- return true;
- }
- /* if maximum number of hits reached, block all light */
- else if (*num_hits == max_hits) {
- return true;
- }
-
- /* move on to next entry in intersections array */
- isect_array++;
- (*num_hits)++;
-#if BVH_FEATURE(BVH_INSTANCING)
- num_hits_in_instance++;
-#endif
-
- isect_array->t = isect_t;
- }
-
- prim_addr++;
- }
- }
-#if BVH_FEATURE(BVH_INSTANCING)
- else {
- /* Instance push. */
- object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
-
-# if BVH_FEATURE(BVH_MOTION)
- isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
-# else
- isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
-# endif
-
- num_hits_in_instance = 0;
- isect_array->t = isect_t;
-
- qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- tfar = ssef(isect_t);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-# endif
- idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P * idir;
- P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
-# endif
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
-
- node_addr = kernel_tex_fetch(__object_node, object);
- }
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if (stack_ptr >= 0) {
- kernel_assert(object != OBJECT_NONE);
-
- /* Instance pop. */
- if (num_hits_in_instance) {
- float t_fac;
-# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
-# else
- bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
-# endif
- /* Scale isect->t to adjust for instancing. */
- for (int i = 0; i < num_hits_in_instance; i++) {
- (isect_array - i - 1)->t *= t_fac;
- }
- }
- else {
-# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
-# else
- bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
-# endif
- }
-
- isect_t = tmax;
- isect_array->t = isect_t;
-
- qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- tfar = ssef(isect_t);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-# endif
- idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P * idir;
- P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
-# endif
-
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
- return false;
-}
-
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/qbvh_traversal.h b/intern/cycles/kernel/bvh/qbvh_traversal.h
deleted file mode 100644
index 9ee0f7b5933..00000000000
--- a/intern/cycles/kernel/bvh/qbvh_traversal.h
+++ /dev/null
@@ -1,483 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This is a template BVH traversal function, where various features can be
- * enabled/disabled. This way we can compile optimized versions for each case
- * without new features slowing things down.
- *
- * BVH_INSTANCING: object instancing
- * BVH_HAIR: hair curve rendering
- * BVH_HAIR_MINIMUM_WIDTH: hair curve rendering with minimum width
- * BVH_MOTION: motion blur rendering
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-# define NODE_INTERSECT qbvh_node_intersect
-# define NODE_INTERSECT_ROBUST qbvh_node_intersect_robust
-#else
-# define NODE_INTERSECT qbvh_aligned_node_intersect
-# define NODE_INTERSECT_ROBUST qbvh_aligned_node_intersect_robust
-#endif
-
-ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
- const Ray *ray,
- Intersection *isect,
- const uint visibility
-#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
- ,
- uint *lcg_state,
- float difl,
- float extmax
-#endif
-)
-{
- /* TODO(sergey):
- * - Test if pushing distance on the stack helps (for non shadow rays).
- * - Separate version for shadow rays.
- * - Likely and unlikely for if() statements.
- * - Test restrict attribute for pointers.
- */
-
- /* Traversal stack in CUDA thread-local memory. */
- QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
- traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
- traversal_stack[0].dist = -FLT_MAX;
-
- /* Traversal variables in registers. */
- int stack_ptr = 0;
- int node_addr = kernel_data.bvh.root;
- float node_dist = -FLT_MAX;
-
- /* Ray parameters in registers. */
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
-
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
-#endif
-
- isect->t = ray->t;
- isect->u = 0.0f;
- isect->v = 0.0f;
- isect->prim = PRIM_NONE;
- isect->object = OBJECT_NONE;
-
- BVH_DEBUG_INIT();
-
- ssef tnear(0.0f), tfar(ray->t);
-#if BVH_FEATURE(BVH_HAIR)
- sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-#endif
- sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-
-#ifdef __KERNEL_AVX2__
- float3 P_idir = P * idir;
- sse3f P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- sse3f org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
-#endif
-
- /* Offsets to select the side that becomes the lower or upper bound. */
- int near_x, near_y, near_z;
- int far_x, far_y, far_z;
- qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-
- /* Traversal loop. */
- do {
- do {
- /* Traverse internal nodes. */
- while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
- float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
- (void)inodes;
-
- if (UNLIKELY(node_dist > isect->t)
-#if BVH_FEATURE(BVH_MOTION)
- || UNLIKELY(ray->time < inodes.y) || UNLIKELY(ray->time > inodes.z)
-#endif
-#ifdef __VISIBILITY_FLAG__
- || (__float_as_uint(inodes.x) & visibility) == 0
-#endif
- ) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- continue;
- }
-
- int child_mask;
- ssef dist;
-
- BVH_DEBUG_NEXT_NODE();
-
-#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
- if (difl != 0.0f) {
- /* NOTE: We extend all the child BB instead of fetching
- * and checking visibility flags for each of the,
- *
- * Need to test if doing opposite would be any faster.
- */
- child_mask = NODE_INTERSECT_ROBUST(kg,
- tnear,
- tfar,
-# ifdef __KERNEL_AVX2__
- P_idir4,
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4,
-# endif
-# if BVH_FEATURE(BVH_HAIR)
- dir4,
-# endif
- idir4,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- difl,
- &dist);
- }
- else
-#endif /* BVH_HAIR_MINIMUM_WIDTH */
- {
- child_mask = NODE_INTERSECT(kg,
- tnear,
- tfar,
-#ifdef __KERNEL_AVX2__
- P_idir4,
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4,
-#endif
-#if BVH_FEATURE(BVH_HAIR)
- dir4,
-#endif
- idir4,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- &dist);
- }
-
- if (child_mask != 0) {
- float4 cnodes;
- /* TODO(sergey): Investigate whether moving cnodes upwards
- * gives a speedup (will be different cache pattern but will
- * avoid extra check here).
- */
-#if BVH_FEATURE(BVH_HAIR)
- if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
- cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 13);
- }
- else
-#endif
- {
- cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 7);
- }
-
- /* One child is hit, continue with that child. */
- int r = __bscf(child_mask);
- float d0 = ((float *)&dist)[r];
- if (child_mask == 0) {
- node_addr = __float_as_int(cnodes[r]);
- node_dist = d0;
- continue;
- }
-
- /* Two children are hit, push far child, and continue with
- * closer child.
- */
- int c0 = __float_as_int(cnodes[r]);
- r = __bscf(child_mask);
- int c1 = __float_as_int(cnodes[r]);
- float d1 = ((float *)&dist)[r];
- if (child_mask == 0) {
- if (d1 < d0) {
- node_addr = c1;
- node_dist = d1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
- continue;
- }
- else {
- node_addr = c0;
- node_dist = d0;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- continue;
- }
- }
-
- /* Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there.
- */
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
-
- /* Three children are hit, push all onto stack and sort 3
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c2 = __float_as_int(cnodes[r]);
- float d2 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- qbvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2]);
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- continue;
- }
-
- /* Four children are hit, push all onto stack and sort 4
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c3 = __float_as_int(cnodes[r]);
- float d3 = ((float *)&dist)[r];
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- qbvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3]);
- }
-
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- }
-
- /* If node is leaf, fetch triangle list. */
- if (node_addr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
-
-#ifdef __VISIBILITY_FLAG__
- if (UNLIKELY((node_dist > isect->t) || ((__float_as_uint(leaf.z) & visibility) == 0)))
-#else
- if (UNLIKELY((node_dist > isect->t)))
-#endif
- {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- continue;
- }
-
- int prim_addr = __float_as_int(leaf.x);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if (prim_addr >= 0) {
-#endif
- int prim_addr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
-
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
-
- /* Primitive intersection. */
- switch (type & PRIMITIVE_ALL) {
- case PRIMITIVE_TRIANGLE: {
- for (; prim_addr < prim_addr2; prim_addr++) {
- BVH_DEBUG_NEXT_INTERSECTION();
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- if (triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr)) {
- tfar = ssef(isect->t);
- /* Shadow ray early termination. */
- if (visibility & PATH_RAY_SHADOW_OPAQUE) {
- return true;
- }
- }
- }
- break;
- }
-#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- for (; prim_addr < prim_addr2; prim_addr++) {
- BVH_DEBUG_NEXT_INTERSECTION();
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- if (motion_triangle_intersect(
- kg, isect, P, dir, ray->time, visibility, object, prim_addr)) {
- tfar = ssef(isect->t);
- /* Shadow ray early termination. */
- if (visibility & PATH_RAY_SHADOW_OPAQUE) {
- return true;
- }
- }
- }
- break;
- }
-#endif /* BVH_FEATURE(BVH_MOTION) */
-#if BVH_FEATURE(BVH_HAIR)
- case PRIMITIVE_CURVE:
- case PRIMITIVE_MOTION_CURVE: {
- for (; prim_addr < prim_addr2; prim_addr++) {
- BVH_DEBUG_NEXT_INTERSECTION();
- const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
- kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL));
- bool hit;
- if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
- hit = cardinal_curve_intersect(kg,
- isect,
- P,
- dir,
- visibility,
- object,
- prim_addr,
- ray->time,
- curve_type,
- lcg_state,
- difl,
- extmax);
- }
- else {
- hit = curve_intersect(kg,
- isect,
- P,
- dir,
- visibility,
- object,
- prim_addr,
- ray->time,
- curve_type,
- lcg_state,
- difl,
- extmax);
- }
- if (hit) {
- tfar = ssef(isect->t);
- /* Shadow ray early termination. */
- if (visibility & PATH_RAY_SHADOW_OPAQUE) {
- return true;
- }
- }
- }
- break;
- }
-#endif /* BVH_FEATURE(BVH_HAIR) */
- }
- }
-#if BVH_FEATURE(BVH_INSTANCING)
- else {
- /* Instance push. */
- object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
-
-# if BVH_FEATURE(BVH_MOTION)
- qbvh_instance_motion_push(
- kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist, &ob_itfm);
-# else
- qbvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist);
-# endif
-
- qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- tfar = ssef(isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-# endif
- idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P * idir;
- P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
-# endif
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
- traversal_stack[stack_ptr].dist = -FLT_MAX;
-
- node_addr = kernel_tex_fetch(__object_node, object);
-
- BVH_DEBUG_NEXT_INSTANCE();
- }
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if (stack_ptr >= 0) {
- kernel_assert(object != OBJECT_NONE);
-
- /* Instance pop. */
-# if BVH_FEATURE(BVH_MOTION)
- isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
-# else
- isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
-# endif
-
- qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- tfar = ssef(isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-# endif
- idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P * idir;
- P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
-# endif
-
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
- return (isect->prim != PRIM_NONE);
-}
-
-#undef NODE_INTERSECT
-#undef NODE_INTERSECT_ROBUST
diff --git a/intern/cycles/kernel/bvh/qbvh_volume.h b/intern/cycles/kernel/bvh/qbvh_volume.h
deleted file mode 100644
index e4eaed04467..00000000000
--- a/intern/cycles/kernel/bvh/qbvh_volume.h
+++ /dev/null
@@ -1,367 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This is a template BVH traversal function for volumes, where
- * various features can be enabled/disabled. This way we can compile optimized
- * versions for each case without new features slowing things down.
- *
- * BVH_INSTANCING: object instancing
- * BVH_MOTION: motion blur rendering
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-# define NODE_INTERSECT qbvh_node_intersect
-#else
-# define NODE_INTERSECT qbvh_aligned_node_intersect
-#endif
-
-ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
- const Ray *ray,
- Intersection *isect,
- const uint visibility)
-{
- /* TODO(sergey):
- * - Test if pushing distance on the stack helps.
- * - Likely and unlikely for if() statements.
- * - Test restrict attribute for pointers.
- */
-
- /* Traversal stack in CUDA thread-local memory. */
- QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
- traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
-
- /* Traversal variables in registers. */
- int stack_ptr = 0;
- int node_addr = kernel_data.bvh.root;
-
- /* Ray parameters in registers. */
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
-
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
-#endif
-
- isect->t = ray->t;
- isect->u = 0.0f;
- isect->v = 0.0f;
- isect->prim = PRIM_NONE;
- isect->object = OBJECT_NONE;
-
- ssef tnear(0.0f), tfar(ray->t);
-#if BVH_FEATURE(BVH_HAIR)
- sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-#endif
- sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-
-#ifdef __KERNEL_AVX2__
- float3 P_idir = P * idir;
- sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
-#endif
-
- /* Offsets to select the side that becomes the lower or upper bound. */
- int near_x, near_y, near_z;
- int far_x, far_y, far_z;
- qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-
- /* Traversal loop. */
- do {
- do {
- /* Traverse internal nodes. */
- while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
- float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-
-#ifdef __VISIBILITY_FLAG__
- if ((__float_as_uint(inodes.x) & visibility) == 0) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-#endif
-
- ssef dist;
- int child_mask = NODE_INTERSECT(kg,
- tnear,
- tfar,
-#ifdef __KERNEL_AVX2__
- P_idir4,
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4,
-#endif
-#if BVH_FEATURE(BVH_HAIR)
- dir4,
-#endif
- idir4,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- &dist);
-
- if (child_mask != 0) {
- float4 cnodes;
-#if BVH_FEATURE(BVH_HAIR)
- if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
- cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 13);
- }
- else
-#endif
- {
- cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 7);
- }
-
- /* One child is hit, continue with that child. */
- int r = __bscf(child_mask);
- if (child_mask == 0) {
- node_addr = __float_as_int(cnodes[r]);
- continue;
- }
-
- /* Two children are hit, push far child, and continue with
- * closer child.
- */
- int c0 = __float_as_int(cnodes[r]);
- float d0 = ((float *)&dist)[r];
- r = __bscf(child_mask);
- int c1 = __float_as_int(cnodes[r]);
- float d1 = ((float *)&dist)[r];
- if (child_mask == 0) {
- if (d1 < d0) {
- node_addr = c1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
- continue;
- }
- else {
- node_addr = c0;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- continue;
- }
- }
-
- /* Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there.
- */
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
-
- /* Three children are hit, push all onto stack and sort 3
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c2 = __float_as_int(cnodes[r]);
- float d2 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- qbvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Four children are hit, push all onto stack and sort 4
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c3 = __float_as_int(cnodes[r]);
- float d3 = ((float *)&dist)[r];
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- qbvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3]);
- }
-
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-
- /* If node is leaf, fetch triangle list. */
- if (node_addr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
-
- if ((__float_as_uint(leaf.z) & visibility) == 0) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- int prim_addr = __float_as_int(leaf.x);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if (prim_addr >= 0) {
-#endif
- int prim_addr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
- const uint p_type = type & PRIMITIVE_ALL;
-
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
-
- /* Primitive intersection. */
- switch (p_type) {
- case PRIMITIVE_TRIANGLE: {
- for (; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- /* Only primitives from volume object. */
- uint tri_object = (object == OBJECT_NONE) ?
- kernel_tex_fetch(__prim_object, prim_addr) :
- object;
- int object_flag = kernel_tex_fetch(__object_flag, tri_object);
- if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
- continue;
- }
- /* Intersect ray against primitive. */
- triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr);
- }
- break;
- }
-#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- for (; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- /* Only primitives from volume object. */
- uint tri_object = (object == OBJECT_NONE) ?
- kernel_tex_fetch(__prim_object, prim_addr) :
- object;
- int object_flag = kernel_tex_fetch(__object_flag, tri_object);
- if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
- continue;
- }
- /* Intersect ray against primitive. */
- motion_triangle_intersect(
- kg, isect, P, dir, ray->time, visibility, object, prim_addr);
- }
- break;
- }
-#endif
- }
- }
-#if BVH_FEATURE(BVH_INSTANCING)
- else {
- /* Instance push. */
- object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
- int object_flag = kernel_tex_fetch(__object_flag, object);
- if (object_flag & SD_OBJECT_HAS_VOLUME) {
-# if BVH_FEATURE(BVH_MOTION)
- isect->t = bvh_instance_motion_push(
- kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
-# else
- isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
-# endif
-
- qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- tfar = ssef(isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-# endif
- idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P * idir;
- P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
-# endif
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
-
- node_addr = kernel_tex_fetch(__object_node, object);
- }
- else {
- /* Pop. */
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
- }
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if (stack_ptr >= 0) {
- kernel_assert(object != OBJECT_NONE);
-
- /* Instance pop. */
-# if BVH_FEATURE(BVH_MOTION)
- isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
-# else
- isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
-# endif
-
- qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- tfar = ssef(isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-# endif
- idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P * idir;
- P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
-# endif
-
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
- return (isect->prim != PRIM_NONE);
-}
-
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/qbvh_volume_all.h b/intern/cycles/kernel/bvh/qbvh_volume_all.h
deleted file mode 100644
index eddc48c487e..00000000000
--- a/intern/cycles/kernel/bvh/qbvh_volume_all.h
+++ /dev/null
@@ -1,444 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This is a template BVH traversal function for volumes, where
- * various features can be enabled/disabled. This way we can compile optimized
- * versions for each case without new features slowing things down.
- *
- * BVH_INSTANCING: object instancing
- * BVH_MOTION: motion blur rendering
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-# define NODE_INTERSECT qbvh_node_intersect
-#else
-# define NODE_INTERSECT qbvh_aligned_node_intersect
-#endif
-
-ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
- const Ray *ray,
- Intersection *isect_array,
- const uint max_hits,
- const uint visibility)
-{
- /* TODO(sergey):
- * - Test if pushing distance on the stack helps.
- * - Likely and unlikely for if() statements.
- * - Test restrict attribute for pointers.
- */
-
- /* Traversal stack in CUDA thread-local memory. */
- QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
- traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
-
- /* Traversal variables in registers. */
- int stack_ptr = 0;
- int node_addr = kernel_data.bvh.root;
-
- /* Ray parameters in registers. */
- const float tmax = ray->t;
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
- float isect_t = tmax;
-
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
-#endif
-
- uint num_hits = 0;
- isect_array->t = tmax;
-
-#if BVH_FEATURE(BVH_INSTANCING)
- int num_hits_in_instance = 0;
-#endif
-
- ssef tnear(0.0f), tfar(isect_t);
-#if BVH_FEATURE(BVH_HAIR)
- sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-#endif
- sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-
-#ifdef __KERNEL_AVX2__
- float3 P_idir = P * idir;
- sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
-#endif
-
- /* Offsets to select the side that becomes the lower or upper bound. */
- int near_x, near_y, near_z;
- int far_x, far_y, far_z;
- qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-
- /* Traversal loop. */
- do {
- do {
- /* Traverse internal nodes. */
- while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
- float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-
-#ifdef __VISIBILITY_FLAG__
- if ((__float_as_uint(inodes.x) & visibility) == 0) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-#endif
-
- ssef dist;
- int child_mask = NODE_INTERSECT(kg,
- tnear,
- tfar,
-#ifdef __KERNEL_AVX2__
- P_idir4,
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4,
-#endif
-#if BVH_FEATURE(BVH_HAIR)
- dir4,
-#endif
- idir4,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- &dist);
-
- if (child_mask != 0) {
- float4 cnodes;
-#if BVH_FEATURE(BVH_HAIR)
- if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
- cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 13);
- }
- else
-#endif
- {
- cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 7);
- }
-
- /* One child is hit, continue with that child. */
- int r = __bscf(child_mask);
- if (child_mask == 0) {
- node_addr = __float_as_int(cnodes[r]);
- continue;
- }
-
- /* Two children are hit, push far child, and continue with
- * closer child.
- */
- int c0 = __float_as_int(cnodes[r]);
- float d0 = ((float *)&dist)[r];
- r = __bscf(child_mask);
- int c1 = __float_as_int(cnodes[r]);
- float d1 = ((float *)&dist)[r];
- if (child_mask == 0) {
- if (d1 < d0) {
- node_addr = c1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
- continue;
- }
- else {
- node_addr = c0;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- continue;
- }
- }
-
- /* Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there.
- */
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
-
- /* Three children are hit, push all onto stack and sort 3
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c2 = __float_as_int(cnodes[r]);
- float d2 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- qbvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Four children are hit, push all onto stack and sort 4
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c3 = __float_as_int(cnodes[r]);
- float d3 = ((float *)&dist)[r];
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- qbvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3]);
- }
-
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-
- /* If node is leaf, fetch triangle list. */
- if (node_addr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
-
- if ((__float_as_uint(leaf.z) & visibility) == 0) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- int prim_addr = __float_as_int(leaf.x);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if (prim_addr >= 0) {
-#endif
- int prim_addr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
- const uint p_type = type & PRIMITIVE_ALL;
- bool hit;
-
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
-
- /* Primitive intersection. */
- switch (p_type) {
- case PRIMITIVE_TRIANGLE: {
- for (; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- /* Only primitives from volume object. */
- uint tri_object = (object == OBJECT_NONE) ?
- kernel_tex_fetch(__prim_object, prim_addr) :
- object;
- int object_flag = kernel_tex_fetch(__object_flag, tri_object);
- if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
- continue;
- }
- /* Intersect ray against primitive. */
- hit = triangle_intersect(kg, isect_array, P, dir, visibility, object, prim_addr);
- if (hit) {
- /* Move on to next entry in intersections array. */
- isect_array++;
- num_hits++;
-#if BVH_FEATURE(BVH_INSTANCING)
- num_hits_in_instance++;
-#endif
- isect_array->t = isect_t;
- if (num_hits == max_hits) {
-#if BVH_FEATURE(BVH_INSTANCING)
- if (object != OBJECT_NONE) {
-# if BVH_FEATURE(BVH_MOTION)
- float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
-# else
- Transform itfm = object_fetch_transform(
- kg, object, OBJECT_INVERSE_TRANSFORM);
- float t_fac = 1.0f / len(transform_direction(&itfm, dir));
-# endif
- for (int i = 0; i < num_hits_in_instance; i++) {
- (isect_array - i - 1)->t *= t_fac;
- }
- }
-#endif /* BVH_FEATURE(BVH_INSTANCING) */
- return num_hits;
- }
- }
- }
- break;
- }
-#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- for (; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- /* Only primitives from volume object. */
- uint tri_object = (object == OBJECT_NONE) ?
- kernel_tex_fetch(__prim_object, prim_addr) :
- object;
- int object_flag = kernel_tex_fetch(__object_flag, tri_object);
- if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
- continue;
- }
- /* Intersect ray against primitive. */
- hit = motion_triangle_intersect(
- kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
- if (hit) {
- /* Move on to next entry in intersections array. */
- isect_array++;
- num_hits++;
-# if BVH_FEATURE(BVH_INSTANCING)
- num_hits_in_instance++;
-# endif
- isect_array->t = isect_t;
- if (num_hits == max_hits) {
-# if BVH_FEATURE(BVH_INSTANCING)
- if (object != OBJECT_NONE) {
-# if BVH_FEATURE(BVH_MOTION)
- float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
-# else
- Transform itfm = object_fetch_transform(
- kg, object, OBJECT_INVERSE_TRANSFORM);
- float t_fac = 1.0f / len(transform_direction(&itfm, dir));
-# endif
- for (int i = 0; i < num_hits_in_instance; i++) {
- (isect_array - i - 1)->t *= t_fac;
- }
- }
-# endif /* BVH_FEATURE(BVH_INSTANCING) */
- return num_hits;
- }
- }
- }
- break;
- }
-#endif
- }
- }
-#if BVH_FEATURE(BVH_INSTANCING)
- else {
- /* Instance push. */
- object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
- int object_flag = kernel_tex_fetch(__object_flag, object);
- if (object_flag & SD_OBJECT_HAS_VOLUME) {
-# if BVH_FEATURE(BVH_MOTION)
- isect_t = bvh_instance_motion_push(
- kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
-# else
- isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
-# endif
-
- qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- tfar = ssef(isect_t);
- idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-# endif
-# ifdef __KERNEL_AVX2__
- P_idir = P * idir;
- P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
-# endif
-
- num_hits_in_instance = 0;
- isect_array->t = isect_t;
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
-
- node_addr = kernel_tex_fetch(__object_node, object);
- }
- else {
- /* Pop. */
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
- }
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if (stack_ptr >= 0) {
- kernel_assert(object != OBJECT_NONE);
-
- /* Instance pop. */
- if (num_hits_in_instance) {
- float t_fac;
-# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
-# else
- bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
-# endif
- /* Scale isect->t to adjust for instancing. */
- for (int i = 0; i < num_hits_in_instance; i++) {
- (isect_array - i - 1)->t *= t_fac;
- }
- }
- else {
-# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
-# else
- bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
-# endif
- }
-
- isect_t = tmax;
- isect_array->t = isect_t;
-
- qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- tfar = ssef(isect_t);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-# endif
- idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P * idir;
- P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
-# endif
-
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
- return num_hits;
-}
-
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/closure/bsdf.h b/intern/cycles/kernel/closure/bsdf.h
index 5e26f90a878..6070fd983f5 100644
--- a/intern/cycles/kernel/closure/bsdf.h
+++ b/intern/cycles/kernel/closure/bsdf.h
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+// clang-format off
#include "kernel/closure/bsdf_ashikhmin_velvet.h"
#include "kernel/closure/bsdf_diffuse.h"
#include "kernel/closure/bsdf_oren_nayar.h"
@@ -32,6 +33,7 @@
#include "kernel/closure/bsdf_principled_sheen.h"
#include "kernel/closure/bssrdf.h"
#include "kernel/closure/volume.h"
+// clang-format on
CCL_NAMESPACE_BEGIN
@@ -73,6 +75,40 @@ ccl_device_inline float bsdf_get_roughness_squared(const ShaderClosure *sc)
return bsdf_get_specular_roughness_squared(sc);
}
+/* An additional term to smooth illumination on grazing angles when using bump mapping.
+ * Based on "Taming the Shadow Terminator" by Matt Jen-Yuan Chiang,
+ * Yining Karl Li and Brent Burley. */
+ccl_device_inline float bump_shadowing_term(float3 Ng, float3 N, float3 I)
+{
+ float g = safe_divide(dot(Ng, I), dot(N, I) * dot(Ng, N));
+
+ /* If the incoming light is on the unshadowed side, return full brightness. */
+ if (g >= 1.0f) {
+ return 1.0f;
+ }
+
+ /* If the incoming light points away from the surface, return black. */
+ if (g < 0.0f) {
+ return 0.0f;
+ }
+
+ /* Return smoothed value to avoid discontinuity at perpendicular angle. */
+ float g2 = sqr(g);
+ return -g2 * g + g2 + g;
+}
+
+/* Shadow terminator workaround, taken from Appleseed.
+ * Original code is under the MIT License
+ * Copyright (c) 2019 Francois Beaune, The appleseedhq Organization */
+ccl_device_inline float shift_cos_in(float cos_in, const float frequency_multiplier)
+{
+ cos_in = min(cos_in, 1.0f);
+
+ const float angle = fast_acosf(cos_in);
+ const float val = max(cosf(angle * frequency_multiplier), 0.0f) / cos_in;
+ return val;
+}
+
ccl_device_inline int bsdf_sample(KernelGlobals *kg,
ShaderData *sd,
const ShaderClosure *sc,
@@ -83,13 +119,16 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
differential3 *domega_in,
float *pdf)
{
+ /* For curves use the smooth normal, particularly for ribbons the geometric
+ * normal gives too much darkening otherwise. */
int label;
+ const float3 Ng = (sd->type & PRIMITIVE_ALL_CURVE) ? sc->N : sd->Ng;
switch (sc->type) {
case CLOSURE_BSDF_DIFFUSE_ID:
case CLOSURE_BSDF_BSSRDF_ID:
label = bsdf_diffuse_sample(sc,
- sd->Ng,
+ Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@@ -104,7 +143,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
#ifdef __SVM__
case CLOSURE_BSDF_OREN_NAYAR_ID:
label = bsdf_oren_nayar_sample(sc,
- sd->Ng,
+ Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@@ -119,7 +158,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
# ifdef __OSL__
case CLOSURE_BSDF_PHONG_RAMP_ID:
label = bsdf_phong_ramp_sample(sc,
- sd->Ng,
+ Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@@ -133,7 +172,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
break;
case CLOSURE_BSDF_DIFFUSE_RAMP_ID:
label = bsdf_diffuse_ramp_sample(sc,
- sd->Ng,
+ Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@@ -148,7 +187,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
# endif
case CLOSURE_BSDF_TRANSLUCENT_ID:
label = bsdf_translucent_sample(sc,
- sd->Ng,
+ Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@@ -162,7 +201,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
break;
case CLOSURE_BSDF_REFLECTION_ID:
label = bsdf_reflection_sample(sc,
- sd->Ng,
+ Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@@ -176,7 +215,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
break;
case CLOSURE_BSDF_REFRACTION_ID:
label = bsdf_refraction_sample(sc,
- sd->Ng,
+ Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@@ -190,7 +229,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
break;
case CLOSURE_BSDF_TRANSPARENT_ID:
label = bsdf_transparent_sample(sc,
- sd->Ng,
+ Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@@ -205,12 +244,10 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
case CLOSURE_BSDF_MICROFACET_GGX_ID:
case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID:
case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID:
case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
label = bsdf_microfacet_ggx_sample(kg,
sc,
- sd->Ng,
+ Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@@ -226,7 +263,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
case CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID:
label = bsdf_microfacet_multi_ggx_sample(kg,
sc,
- sd->Ng,
+ Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@@ -243,7 +280,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID:
label = bsdf_microfacet_multi_ggx_glass_sample(kg,
sc,
- sd->Ng,
+ Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@@ -257,11 +294,10 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
&sd->lcg_state);
break;
case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
- case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
label = bsdf_microfacet_beckmann_sample(kg,
sc,
- sd->Ng,
+ Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@@ -274,9 +310,8 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
pdf);
break;
case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
- case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID:
label = bsdf_ashikhmin_shirley_sample(sc,
- sd->Ng,
+ Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@@ -290,7 +325,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
break;
case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
label = bsdf_ashikhmin_velvet_sample(sc,
- sd->Ng,
+ Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@@ -304,7 +339,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
break;
case CLOSURE_BSDF_DIFFUSE_TOON_ID:
label = bsdf_diffuse_toon_sample(sc,
- sd->Ng,
+ Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@@ -318,7 +353,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
break;
case CLOSURE_BSDF_GLOSSY_TOON_ID:
label = bsdf_glossy_toon_sample(sc,
- sd->Ng,
+ Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@@ -332,7 +367,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
break;
case CLOSURE_BSDF_HAIR_REFLECTION_ID:
label = bsdf_hair_reflection_sample(sc,
- sd->Ng,
+ Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@@ -346,7 +381,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
break;
case CLOSURE_BSDF_HAIR_TRANSMISSION_ID:
label = bsdf_hair_transmission_sample(sc,
- sd->Ng,
+ Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@@ -366,7 +401,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID:
case CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID:
label = bsdf_principled_diffuse_sample(sc,
- sd->Ng,
+ Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@@ -380,7 +415,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
break;
case CLOSURE_BSDF_PRINCIPLED_SHEEN_ID:
label = bsdf_principled_sheen_sample(sc,
- sd->Ng,
+ Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@@ -424,6 +459,19 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
}
}
}
+ else {
+ /* Shadow terminator offset. */
+ const float frequency_multiplier =
+ kernel_tex_fetch(__objects, sd->object).shadow_terminator_offset;
+ if (frequency_multiplier > 1.0f) {
+ *eval *= shift_cos_in(dot(*omega_in, sc->N), frequency_multiplier);
+ }
+ if (label & LABEL_DIFFUSE) {
+ if (!isequal_float3(sc->N, sd->N)) {
+ *eval *= bump_shadowing_term((label & LABEL_TRANSMIT) ? -sd->N : sd->N, sc->N, *omega_in);
+ }
+ }
+ }
return label;
}
@@ -440,9 +488,12 @@ ccl_device_inline
const float3 omega_in,
float *pdf)
{
+ /* For curves use the smooth normal, particularly for ribbons the geometric
+ * normal gives too much darkening otherwise. */
+ const float3 Ng = (sd->type & PRIMITIVE_ALL_CURVE) ? sd->N : sd->Ng;
float3 eval;
- if (dot(sd->Ng, omega_in) >= 0.0f) {
+ if (dot(Ng, omega_in) >= 0.0f) {
switch (sc->type) {
case CLOSURE_BSDF_DIFFUSE_ID:
case CLOSURE_BSDF_BSSRDF_ID:
@@ -475,8 +526,6 @@ ccl_device_inline
case CLOSURE_BSDF_MICROFACET_GGX_ID:
case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID:
case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID:
case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
eval = bsdf_microfacet_ggx_eval_reflect(sc, sd->I, omega_in, pdf);
break;
@@ -490,12 +539,10 @@ ccl_device_inline
sc, sd->I, omega_in, pdf, &sd->lcg_state);
break;
case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
- case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
eval = bsdf_microfacet_beckmann_eval_reflect(sc, sd->I, omega_in, pdf);
break;
case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
- case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID:
eval = bsdf_ashikhmin_shirley_eval_reflect(sc, sd->I, omega_in, pdf);
break;
case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
@@ -535,6 +582,17 @@ ccl_device_inline
eval = make_float3(0.0f, 0.0f, 0.0f);
break;
}
+ if (CLOSURE_IS_BSDF_DIFFUSE(sc->type)) {
+ if (!isequal_float3(sc->N, sd->N)) {
+ eval *= bump_shadowing_term(sd->N, sc->N, omega_in);
+ }
+ }
+ /* Shadow terminator offset. */
+ const float frequency_multiplier =
+ kernel_tex_fetch(__objects, sd->object).shadow_terminator_offset;
+ if (frequency_multiplier > 1.0f) {
+ eval *= shift_cos_in(dot(omega_in, sc->N), frequency_multiplier);
+ }
}
else {
switch (sc->type) {
@@ -561,8 +619,6 @@ ccl_device_inline
case CLOSURE_BSDF_MICROFACET_GGX_ID:
case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID:
case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID:
case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
eval = bsdf_microfacet_ggx_eval_transmit(sc, sd->I, omega_in, pdf);
break;
@@ -576,12 +632,10 @@ ccl_device_inline
sc, sd->I, omega_in, pdf, &sd->lcg_state);
break;
case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
- case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
eval = bsdf_microfacet_beckmann_eval_transmit(sc, sd->I, omega_in, pdf);
break;
case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
- case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID:
eval = bsdf_ashikhmin_shirley_eval_transmit(sc, sd->I, omega_in, pdf);
break;
case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
@@ -621,6 +675,11 @@ ccl_device_inline
eval = make_float3(0.0f, 0.0f, 0.0f);
break;
}
+ if (CLOSURE_IS_BSDF_DIFFUSE(sc->type)) {
+ if (!isequal_float3(sc->N, sd->N)) {
+ eval *= bump_shadowing_term(-sd->N, sc->N, omega_in);
+ }
+ }
}
return eval;
@@ -640,18 +699,14 @@ ccl_device void bsdf_blur(KernelGlobals *kg, ShaderClosure *sc, float roughness)
case CLOSURE_BSDF_MICROFACET_GGX_ID:
case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID:
case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID:
case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
bsdf_microfacet_ggx_blur(sc, roughness);
break;
case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
- case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
bsdf_microfacet_beckmann_blur(sc, roughness);
break;
case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
- case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID:
bsdf_ashikhmin_shirley_blur(sc, roughness);
break;
case CLOSURE_BSDF_HAIR_PRINCIPLED_ID:
@@ -680,18 +735,14 @@ ccl_device bool bsdf_merge(ShaderClosure *a, ShaderClosure *b)
case CLOSURE_BSDF_MICROFACET_GGX_ID:
case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID:
case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID:
case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID:
case CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID:
case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID:
case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID:
case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
- case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
- case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID:
return bsdf_microfacet_merge(a, b);
case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
return bsdf_ashikhmin_velvet_merge(a, b);
diff --git a/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h b/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h
index b3b1c37748d..0d50172a907 100644
--- a/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h
+++ b/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h
@@ -34,18 +34,9 @@ CCL_NAMESPACE_BEGIN
ccl_device int bsdf_ashikhmin_shirley_setup(MicrofacetBsdf *bsdf)
{
bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f);
- bsdf->alpha_y = bsdf->alpha_x;
-
- bsdf->type = CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID;
- return SD_BSDF | SD_BSDF_HAS_EVAL;
-}
-
-ccl_device int bsdf_ashikhmin_shirley_aniso_setup(MicrofacetBsdf *bsdf)
-{
- bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f);
bsdf->alpha_y = clamp(bsdf->alpha_y, 1e-4f, 1.0f);
- bsdf->type = CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID;
+ bsdf->type = CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID;
return SD_BSDF | SD_BSDF_HAS_EVAL;
}
@@ -85,15 +76,11 @@ ccl_device_forceinline float3 bsdf_ashikhmin_shirley_eval_reflect(const ShaderCl
float HdotI = fmaxf(fabsf(dot(H, I)), 1e-6f);
float HdotN = fmaxf(dot(H, N), 1e-6f);
- float pump =
- 1.0f /
- fmaxf(
- 1e-6f,
- (HdotI *
- fmaxf(
- NdotO,
- NdotI))); /* pump from original paper (first derivative disc., but cancels the HdotI in the pdf nicely) */
- /*float pump = 1.0f / fmaxf(1e-4f, ((NdotO + NdotI) * (NdotO*NdotI))); */ /* pump from d-brdf paper */
+ /* pump from original paper
+ * (first derivative disc., but cancels the HdotI in the pdf nicely) */
+ float pump = 1.0f / fmaxf(1e-6f, (HdotI * fmaxf(NdotO, NdotI)));
+ /* pump from d-brdf paper */
+ /*float pump = 1.0f / fmaxf(1e-4f, ((NdotO + NdotI) * (NdotO*NdotI))); */
float n_x = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_x);
float n_y = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_y);
@@ -105,9 +92,8 @@ ccl_device_forceinline float3 bsdf_ashikhmin_shirley_eval_reflect(const ShaderCl
float norm = (n_x + 1.0f) / (8.0f * M_PI_F);
out = NdotO * norm * lobe * pump;
- *pdf =
- norm * lobe /
- HdotI; /* this is p_h / 4(H.I) (conversion from 'wh measure' to 'wi measure', eq. 8 in paper) */
+ /* this is p_h / 4(H.I) (conversion from 'wh measure' to 'wi measure', eq. 8 in paper). */
+ *pdf = norm * lobe / HdotI;
}
else {
/* anisotropic */
diff --git a/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h b/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h
index 8122bcc1424..3d3f20edab3 100644
--- a/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h
+++ b/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h
@@ -42,6 +42,8 @@ typedef ccl_addr_space struct VelvetBsdf {
float invsigma2;
} VelvetBsdf;
+static_assert(sizeof(ShaderClosure) >= sizeof(VelvetBsdf), "VelvetBsdf is too large!");
+
ccl_device int bsdf_ashikhmin_velvet_setup(VelvetBsdf *bsdf)
{
float sigma = fmaxf(bsdf->sigma, 0.01f);
diff --git a/intern/cycles/kernel/closure/bsdf_diffuse.h b/intern/cycles/kernel/closure/bsdf_diffuse.h
index 76b50548455..ea604ed0311 100644
--- a/intern/cycles/kernel/closure/bsdf_diffuse.h
+++ b/intern/cycles/kernel/closure/bsdf_diffuse.h
@@ -39,6 +39,8 @@ typedef ccl_addr_space struct DiffuseBsdf {
SHADER_CLOSURE_BASE;
} DiffuseBsdf;
+static_assert(sizeof(ShaderClosure) >= sizeof(DiffuseBsdf), "DiffuseBsdf is too large!");
+
/* DIFFUSE */
ccl_device int bsdf_diffuse_setup(DiffuseBsdf *bsdf)
diff --git a/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h b/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h
index 9d13eb8d4e0..aa62c1c7ceb 100644
--- a/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h
+++ b/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h
@@ -43,6 +43,8 @@ typedef ccl_addr_space struct DiffuseRampBsdf {
float3 *colors;
} DiffuseRampBsdf;
+static_assert(sizeof(ShaderClosure) >= sizeof(DiffuseRampBsdf), "DiffuseRampBsdf is too large!");
+
ccl_device float3 bsdf_diffuse_ramp_get_color(const float3 colors[8], float pos)
{
int MAXCOLORS = 8;
diff --git a/intern/cycles/kernel/closure/bsdf_hair.h b/intern/cycles/kernel/closure/bsdf_hair.h
index 6b2a9a97d30..7ca9424b815 100644
--- a/intern/cycles/kernel/closure/bsdf_hair.h
+++ b/intern/cycles/kernel/closure/bsdf_hair.h
@@ -44,6 +44,8 @@ typedef ccl_addr_space struct HairBsdf {
float offset;
} HairBsdf;
+static_assert(sizeof(ShaderClosure) >= sizeof(HairBsdf), "HairBsdf is too large!");
+
ccl_device int bsdf_hair_reflection_setup(HairBsdf *bsdf)
{
bsdf->type = CLOSURE_BSDF_HAIR_REFLECTION_ID;
@@ -224,7 +226,7 @@ ccl_device int bsdf_hair_reflection_sample(const ShaderClosure *sc,
fast_sincosf(phi, &sinphi, &cosphi);
*omega_in = (cosphi * costheta_i) * locy - (sinphi * costheta_i) * locx + (sintheta_i)*Tg;
- //differentials - TODO: find a better approximation for the reflective bounce
+ // differentials - TODO: find a better approximation for the reflective bounce
#ifdef __RAY_DIFFERENTIALS__
*domega_in_dx = 2 * dot(locy, dIdx) * locy - dIdx;
*domega_in_dy = 2 * dot(locy, dIdy) * locy - dIdy;
@@ -285,7 +287,7 @@ ccl_device int bsdf_hair_transmission_sample(const ShaderClosure *sc,
fast_sincosf(phi, &sinphi, &cosphi);
*omega_in = (cosphi * costheta_i) * locy - (sinphi * costheta_i) * locx + (sintheta_i)*Tg;
- //differentials - TODO: find a better approximation for the transmission bounce
+ // differentials - TODO: find a better approximation for the transmission bounce
#ifdef __RAY_DIFFERENTIALS__
*domega_in_dx = 2 * dot(locy, dIdx) * locy - dIdx;
*domega_in_dy = 2 * dot(locy, dIdy) * locy - dIdy;
diff --git a/intern/cycles/kernel/closure/bsdf_hair_principled.h b/intern/cycles/kernel/closure/bsdf_hair_principled.h
index a4bba2fbf6c..389bd62ba68 100644
--- a/intern/cycles/kernel/closure/bsdf_hair_principled.h
+++ b/intern/cycles/kernel/closure/bsdf_hair_principled.h
@@ -60,7 +60,8 @@ ccl_device_inline float cos_from_sin(const float s)
return safe_sqrtf(1.0f - s * s);
}
-/* Gives the change in direction in the normal plane for the given angles and p-th-order scattering. */
+/* Gives the change in direction in the normal plane for the given angles and p-th-order
+ * scattering. */
ccl_device_inline float delta_phi(int p, float gamma_o, float gamma_t)
{
return 2.0f * p * gamma_t - 2.0f * gamma_o + p * M_PI_F;
@@ -205,9 +206,6 @@ ccl_device int bsdf_principled_hair_setup(ShaderData *sd, PrincipledHairBSDF *bs
float3 X = safe_normalize(sd->dPdu);
float3 Y = safe_normalize(cross(X, sd->I));
float3 Z = safe_normalize(cross(X, Y));
- /* TODO: the solution below works where sd->Ng is the normal
- * pointing from the center of the curve to the shading point.
- * It doesn't work for triangles, see https://developer.blender.org/T43625 */
/* h -1..0..1 means the rays goes from grazing the hair, to hitting it at
* the center, to grazing the other edge. This is the sine of the angle
@@ -215,7 +213,9 @@ ccl_device int bsdf_principled_hair_setup(ShaderData *sd, PrincipledHairBSDF *bs
/* TODO: we convert this value to a cosine later and discard the sign, so
* we could probably save some operations. */
- float h = dot(cross(sd->Ng, X), Z);
+ float h = (sd->type & (PRIMITIVE_CURVE_RIBBON | PRIMITIVE_MOTION_CURVE_RIBBON)) ?
+ -sd->v :
+ dot(cross(sd->Ng, X), Z);
kernel_assert(fabsf(h) < 1.0f + 1e-4f);
kernel_assert(isfinite3_safe(Y));
@@ -492,6 +492,36 @@ ccl_device void bsdf_principled_hair_blur(ShaderClosure *sc, float roughness)
bsdf->m0_roughness = fmaxf(roughness, bsdf->m0_roughness);
}
+/* Hair Albedo */
+
+ccl_device_inline float bsdf_principled_hair_albedo_roughness_scale(
+ const float azimuthal_roughness)
+{
+ const float x = azimuthal_roughness;
+ return (((((0.245f * x) + 5.574f) * x - 10.73f) * x + 2.532f) * x - 0.215f) * x + 5.969f;
+}
+
+ccl_device float3 bsdf_principled_hair_albedo(ShaderClosure *sc)
+{
+ PrincipledHairBSDF *bsdf = (PrincipledHairBSDF *)sc;
+ return exp3(-sqrt(bsdf->sigma) * bsdf_principled_hair_albedo_roughness_scale(bsdf->v));
+}
+
+ccl_device_inline float3
+bsdf_principled_hair_sigma_from_reflectance(const float3 color, const float azimuthal_roughness)
+{
+ const float3 sigma = log3(color) /
+ bsdf_principled_hair_albedo_roughness_scale(azimuthal_roughness);
+ return sigma * sigma;
+}
+
+ccl_device_inline float3 bsdf_principled_hair_sigma_from_concentration(const float eumelanin,
+ const float pheomelanin)
+{
+ return eumelanin * make_float3(0.506f, 0.841f, 1.653f) +
+ pheomelanin * make_float3(0.343f, 0.733f, 1.924f);
+}
+
CCL_NAMESPACE_END
#endif /* __BSDF_HAIR_PRINCIPLED_H__ */
diff --git a/intern/cycles/kernel/closure/bsdf_microfacet.h b/intern/cycles/kernel/closure/bsdf_microfacet.h
index b4da3123f28..d9e81535b62 100644
--- a/intern/cycles/kernel/closure/bsdf_microfacet.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet.h
@@ -37,6 +37,7 @@ CCL_NAMESPACE_BEGIN
typedef ccl_addr_space struct MicrofacetExtra {
float3 color, cspec0;
+ float3 fresnel_color;
float clearcoat;
} MicrofacetExtra;
@@ -48,6 +49,8 @@ typedef ccl_addr_space struct MicrofacetBsdf {
float3 T;
} MicrofacetBsdf;
+static_assert(sizeof(ShaderClosure) >= sizeof(MicrofacetBsdf), "MicrofacetBsdf is too large!");
+
/* Beckmann and GGX microfacet importance sampling. */
ccl_device_inline void microfacet_beckmann_sample_slopes(KernelGlobals *kg,
@@ -253,9 +256,7 @@ ccl_device_forceinline float3 reflection_color(const MicrofacetBsdf *bsdf, float
{
float3 F = make_float3(1.0f, 1.0f, 1.0f);
bool use_fresnel = (bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID ||
- bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID ||
- bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID);
-
+ bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID);
if (use_fresnel) {
float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior);
@@ -274,6 +275,22 @@ ccl_device_forceinline float D_GTR1(float NdotH, float alpha)
return (alpha2 - 1.0f) / (M_PI_F * logf(alpha2) * t);
}
+ccl_device_forceinline void bsdf_microfacet_fresnel_color(const ShaderData *sd,
+ MicrofacetBsdf *bsdf)
+{
+ kernel_assert(CLOSURE_IS_BSDF_MICROFACET_FRESNEL(bsdf->type));
+
+ float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior);
+ bsdf->extra->fresnel_color = interpolate_fresnel_color(
+ sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0);
+
+ if (bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID) {
+ bsdf->extra->fresnel_color *= 0.25f * bsdf->extra->clearcoat;
+ }
+
+ bsdf->sample_weight *= average(bsdf->extra->fresnel_color);
+}
+
/* GGX microfacet with Smith shadow-masking from:
*
* Microfacet Models for Refraction through Rough Surfaces
@@ -292,46 +309,46 @@ ccl_device int bsdf_microfacet_ggx_setup(MicrofacetBsdf *bsdf)
bsdf->extra = NULL;
bsdf->alpha_x = saturate(bsdf->alpha_x);
- bsdf->alpha_y = bsdf->alpha_x;
+ bsdf->alpha_y = saturate(bsdf->alpha_y);
bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_ID;
return SD_BSDF | SD_BSDF_HAS_EVAL;
}
-ccl_device int bsdf_microfacet_ggx_fresnel_setup(MicrofacetBsdf *bsdf, const ShaderData *sd)
+/* Required to maintain OSL interface. */
+ccl_device int bsdf_microfacet_ggx_isotropic_setup(MicrofacetBsdf *bsdf)
{
- bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x);
- bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y);
- bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z);
+ bsdf->alpha_y = bsdf->alpha_x;
- float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior);
- float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0));
- bsdf->sample_weight *= F;
+ return bsdf_microfacet_ggx_setup(bsdf);
+}
+
+ccl_device int bsdf_microfacet_ggx_fresnel_setup(MicrofacetBsdf *bsdf, const ShaderData *sd)
+{
+ bsdf->extra->cspec0 = saturate3(bsdf->extra->cspec0);
bsdf->alpha_x = saturate(bsdf->alpha_x);
- bsdf->alpha_y = bsdf->alpha_x;
+ bsdf->alpha_y = saturate(bsdf->alpha_y);
bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID;
+ bsdf_microfacet_fresnel_color(sd, bsdf);
+
return SD_BSDF | SD_BSDF_HAS_EVAL;
}
ccl_device int bsdf_microfacet_ggx_clearcoat_setup(MicrofacetBsdf *bsdf, const ShaderData *sd)
{
- bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x);
- bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y);
- bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z);
-
- float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior);
- float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0));
- bsdf->sample_weight *= 0.25f * bsdf->extra->clearcoat * F;
+ bsdf->extra->cspec0 = saturate3(bsdf->extra->cspec0);
bsdf->alpha_x = saturate(bsdf->alpha_x);
bsdf->alpha_y = bsdf->alpha_x;
bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID;
+ bsdf_microfacet_fresnel_color(sd, bsdf);
+
return SD_BSDF | SD_BSDF_HAS_EVAL;
}
@@ -350,36 +367,6 @@ ccl_device bool bsdf_microfacet_merge(const ShaderClosure *a, const ShaderClosur
(bsdf_a->extra->clearcoat == bsdf_b->extra->clearcoat)));
}
-ccl_device int bsdf_microfacet_ggx_aniso_setup(MicrofacetBsdf *bsdf)
-{
- bsdf->extra = NULL;
-
- bsdf->alpha_x = saturate(bsdf->alpha_x);
- bsdf->alpha_y = saturate(bsdf->alpha_y);
-
- bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID;
-
- return SD_BSDF | SD_BSDF_HAS_EVAL;
-}
-
-ccl_device int bsdf_microfacet_ggx_aniso_fresnel_setup(MicrofacetBsdf *bsdf, const ShaderData *sd)
-{
- bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x);
- bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y);
- bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z);
-
- float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior);
- float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0));
- bsdf->sample_weight *= F;
-
- bsdf->alpha_x = saturate(bsdf->alpha_x);
- bsdf->alpha_y = saturate(bsdf->alpha_y);
-
- bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID;
-
- return SD_BSDF | SD_BSDF_HAS_EVAL;
-}
-
ccl_device int bsdf_microfacet_ggx_refraction_setup(MicrofacetBsdf *bsdf)
{
bsdf->extra = NULL;
@@ -629,8 +616,7 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg,
*eval = make_float3(1e6f, 1e6f, 1e6f);
bool use_fresnel = (bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID ||
- bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID ||
- bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID);
+ bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID);
/* if fresnel is used, calculate the color with reflection_color(...) */
if (use_fresnel) {
@@ -804,19 +790,18 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg,
ccl_device int bsdf_microfacet_beckmann_setup(MicrofacetBsdf *bsdf)
{
bsdf->alpha_x = saturate(bsdf->alpha_x);
- bsdf->alpha_y = bsdf->alpha_x;
+ bsdf->alpha_y = saturate(bsdf->alpha_y);
bsdf->type = CLOSURE_BSDF_MICROFACET_BECKMANN_ID;
return SD_BSDF | SD_BSDF_HAS_EVAL;
}
-ccl_device int bsdf_microfacet_beckmann_aniso_setup(MicrofacetBsdf *bsdf)
+/* Required to maintain OSL interface. */
+ccl_device int bsdf_microfacet_beckmann_isotropic_setup(MicrofacetBsdf *bsdf)
{
- bsdf->alpha_x = saturate(bsdf->alpha_x);
- bsdf->alpha_y = saturate(bsdf->alpha_y);
+ bsdf->alpha_y = bsdf->alpha_x;
- bsdf->type = CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID;
- return SD_BSDF | SD_BSDF_HAS_EVAL;
+ return bsdf_microfacet_beckmann_setup(bsdf);
}
ccl_device int bsdf_microfacet_beckmann_refraction_setup(MicrofacetBsdf *bsdf)
diff --git a/intern/cycles/kernel/closure/bsdf_microfacet_multi.h b/intern/cycles/kernel/closure/bsdf_microfacet_multi.h
index 2cc1a9c5299..9795c8da065 100644
--- a/intern/cycles/kernel/closure/bsdf_microfacet_multi.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet_multi.h
@@ -16,7 +16,8 @@
CCL_NAMESPACE_BEGIN
-/* Most of the code is based on the supplemental implementations from https://eheitzresearch.wordpress.com/240-2/. */
+/* Most of the code is based on the supplemental implementations from
+ * https://eheitzresearch.wordpress.com/240-2/. */
/* === GGX Microfacet distribution functions === */
@@ -80,7 +81,8 @@ ccl_device_forceinline float2 mf_sampleP22_11(const float cosI,
return make_float2(slopeX, -slopeY);
}
-/* Visible normal sampling for the GGX distribution (based on page 7 of the supplemental implementation). */
+/* Visible normal sampling for the GGX distribution
+ * (based on page 7 of the supplemental implementation). */
ccl_device_forceinline float3 mf_sample_vndf(const float3 wi,
const float2 alpha,
const float randx,
@@ -134,7 +136,8 @@ ccl_device_forceinline float3 mf_eval_phase_glossy(const float3 w,
return make_float3(phase, phase, phase);
}
-/* Phase function for dielectric transmissive materials, including both reflection and refraction according to the dielectric fresnel term. */
+/* Phase function for dielectric transmissive materials, including both reflection and refraction
+ * according to the dielectric fresnel term. */
ccl_device_forceinline float3 mf_sample_phase_glass(
const float3 wi, const float eta, const float3 wm, const float randV, bool *outside)
{
@@ -227,7 +230,8 @@ ccl_device_forceinline float mf_G1(const float3 w, const float C1, const float l
return powf(C1, lambda);
}
-/* Sampling from the visible height distribution (based on page 17 of the supplemental implementation). */
+/* Sampling from the visible height distribution (based on page 17 of the supplemental
+ * implementation). */
ccl_device_forceinline bool mf_sample_height(
const float3 w, float *h, float *C1, float *G1, float *lambda, const float U)
{
@@ -254,7 +258,8 @@ ccl_device_forceinline bool mf_sample_height(
}
/* === PDF approximations for the different phase functions. ===
- * As explained in bsdf_microfacet_multi_impl.h, using approximations with MIS still produces an unbiased result. */
+ * As explained in bsdf_microfacet_multi_impl.h, using approximations with MIS still produces an
+ * unbiased result. */
/* Approximation for the albedo of the single-scattering GGX distribution,
* the missing energy is then approximated as a diffuse reflection for the PDF. */
@@ -342,7 +347,8 @@ ccl_device_forceinline float mf_glass_pdf(const float3 wi,
}
}
-/* === Actual random walk implementations, one version of mf_eval and mf_sample per phase function. === */
+/* === Actual random walk implementations === */
+/* One version of mf_eval and mf_sample per phase function. */
#define MF_NAME_JOIN(x, y) x##_##y
#define MF_NAME_EVAL(x, y) MF_NAME_JOIN(x, y)
@@ -372,17 +378,13 @@ ccl_device int bsdf_microfacet_multi_ggx_common_setup(MicrofacetBsdf *bsdf)
{
bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f);
bsdf->alpha_y = clamp(bsdf->alpha_y, 1e-4f, 1.0f);
- bsdf->extra->color.x = saturate(bsdf->extra->color.x);
- bsdf->extra->color.y = saturate(bsdf->extra->color.y);
- bsdf->extra->color.z = saturate(bsdf->extra->color.z);
- bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x);
- bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y);
- bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z);
+ bsdf->extra->color = saturate3(bsdf->extra->color);
+ bsdf->extra->cspec0 = saturate3(bsdf->extra->cspec0);
return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_NEEDS_LCG;
}
-ccl_device int bsdf_microfacet_multi_ggx_aniso_setup(MicrofacetBsdf *bsdf)
+ccl_device int bsdf_microfacet_multi_ggx_setup(MicrofacetBsdf *bsdf)
{
if (is_zero(bsdf->T))
bsdf->T = make_float3(1.0f, 0.0f, 0.0f);
@@ -392,39 +394,14 @@ ccl_device int bsdf_microfacet_multi_ggx_aniso_setup(MicrofacetBsdf *bsdf)
return bsdf_microfacet_multi_ggx_common_setup(bsdf);
}
-ccl_device int bsdf_microfacet_multi_ggx_aniso_fresnel_setup(MicrofacetBsdf *bsdf,
- const ShaderData *sd)
+ccl_device int bsdf_microfacet_multi_ggx_fresnel_setup(MicrofacetBsdf *bsdf, const ShaderData *sd)
{
if (is_zero(bsdf->T))
bsdf->T = make_float3(1.0f, 0.0f, 0.0f);
bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID;
- float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior);
- float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0));
- bsdf->sample_weight *= F;
-
- return bsdf_microfacet_multi_ggx_common_setup(bsdf);
-}
-
-ccl_device int bsdf_microfacet_multi_ggx_setup(MicrofacetBsdf *bsdf)
-{
- bsdf->alpha_y = bsdf->alpha_x;
-
- bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID;
-
- return bsdf_microfacet_multi_ggx_common_setup(bsdf);
-}
-
-ccl_device int bsdf_microfacet_multi_ggx_fresnel_setup(MicrofacetBsdf *bsdf, const ShaderData *sd)
-{
- bsdf->alpha_y = bsdf->alpha_x;
-
- bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID;
-
- float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior);
- float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0));
- bsdf->sample_weight *= F;
+ bsdf_microfacet_fresnel_color(sd, bsdf);
return bsdf_microfacet_multi_ggx_common_setup(bsdf);
}
@@ -562,9 +539,7 @@ ccl_device int bsdf_microfacet_multi_ggx_glass_setup(MicrofacetBsdf *bsdf)
bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f);
bsdf->alpha_y = bsdf->alpha_x;
bsdf->ior = max(0.0f, bsdf->ior);
- bsdf->extra->color.x = saturate(bsdf->extra->color.x);
- bsdf->extra->color.y = saturate(bsdf->extra->color.y);
- bsdf->extra->color.z = saturate(bsdf->extra->color.z);
+ bsdf->extra->color = saturate3(bsdf->extra->color);
bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID;
@@ -577,18 +552,12 @@ ccl_device int bsdf_microfacet_multi_ggx_glass_fresnel_setup(MicrofacetBsdf *bsd
bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f);
bsdf->alpha_y = bsdf->alpha_x;
bsdf->ior = max(0.0f, bsdf->ior);
- bsdf->extra->color.x = saturate(bsdf->extra->color.x);
- bsdf->extra->color.y = saturate(bsdf->extra->color.y);
- bsdf->extra->color.z = saturate(bsdf->extra->color.z);
- bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x);
- bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y);
- bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z);
+ bsdf->extra->color = saturate3(bsdf->extra->color);
+ bsdf->extra->cspec0 = saturate3(bsdf->extra->cspec0);
bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID;
- float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior);
- float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0));
- bsdf->sample_weight *= F;
+ bsdf_microfacet_fresnel_color(sd, bsdf);
return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_NEEDS_LCG;
}
diff --git a/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h b/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h
index 79247ee8057..04d9b22d7d2 100644
--- a/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h
@@ -16,14 +16,14 @@
/* Evaluate the BSDF from wi to wo.
* Evaluation is split into the analytical single-scattering BSDF and the multi-scattering BSDF,
- * which is evaluated stochastically through a random walk. At each bounce (except for the first one),
- * the amount of reflection from here towards wo is evaluated before bouncing again.
+ * which is evaluated stochastically through a random walk. At each bounce (except for the first
+ * one), the amount of reflection from here towards wo is evaluated before bouncing again.
*
- * Because of the random walk, the evaluation is not deterministic, but its expected value is equal to
- * the correct BSDF, which is enough for Monte-Carlo rendering. The PDF also can't be determined
- * analytically, so the single-scattering PDF plus a diffuse term to account for the multi-scattered
- * energy is used. In combination with MIS, that is enough to produce an unbiased result, although
- * the balance heuristic isn't necessarily optimal anymore.
+ * Because of the random walk, the evaluation is not deterministic, but its expected value is equal
+ * to the correct BSDF, which is enough for Monte-Carlo rendering. The PDF also can't be determined
+ * analytically, so the single-scattering PDF plus a diffuse term to account for the
+ * multi-scattered energy is used. In combination with MIS, that is enough to produce an unbiased
+ * result, although the balance heuristic isn't necessarily optimal anymore.
*/
ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_eval)(float3 wi,
float3 wo,
@@ -36,7 +36,8 @@ ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_eval)(float3 wi,
bool use_fresnel,
const float3 cspec0)
{
- /* Evaluating for a shallower incoming direction produces less noise, and the properties of the BSDF guarantee reciprocity. */
+ /* Evaluating for a shallower incoming direction produces less noise, and the properties of the
+ * BSDF guarantee reciprocity. */
bool swapped = false;
#ifdef MF_MULTI_GLASS
if (wi.z * wo.z < 0.0f) {
@@ -180,9 +181,9 @@ ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_eval)(float3 wi,
return eval;
}
-/* Perform a random walk on the microsurface starting from wi, returning the direction in which the walk
- * escaped the surface in wo. The function returns the throughput between wi and wo.
- * Without reflection losses due to coloring or fresnel absorption in conductors, the sampling is optimal.
+/* Perform a random walk on the microsurface starting from wi, returning the direction in which the
+ * walk escaped the surface in wo. The function returns the throughput between wi and wo. Without
+ * reflection losses due to coloring or fresnel absorption in conductors, the sampling is optimal.
*/
ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_sample)(float3 wi,
float3 *wo,
diff --git a/intern/cycles/kernel/closure/bsdf_oren_nayar.h b/intern/cycles/kernel/closure/bsdf_oren_nayar.h
index 104ed5b2818..41e5736bf49 100644
--- a/intern/cycles/kernel/closure/bsdf_oren_nayar.h
+++ b/intern/cycles/kernel/closure/bsdf_oren_nayar.h
@@ -27,6 +27,8 @@ typedef ccl_addr_space struct OrenNayarBsdf {
float b;
} OrenNayarBsdf;
+static_assert(sizeof(ShaderClosure) >= sizeof(OrenNayarBsdf), "OrenNayarBsdf is too large!");
+
ccl_device float3 bsdf_oren_nayar_get_intensity(const ShaderClosure *sc,
float3 n,
float3 v,
diff --git a/intern/cycles/kernel/closure/bsdf_phong_ramp.h b/intern/cycles/kernel/closure/bsdf_phong_ramp.h
index b6fd0e68681..cf5484383f2 100644
--- a/intern/cycles/kernel/closure/bsdf_phong_ramp.h
+++ b/intern/cycles/kernel/closure/bsdf_phong_ramp.h
@@ -44,6 +44,8 @@ typedef ccl_addr_space struct PhongRampBsdf {
float3 *colors;
} PhongRampBsdf;
+static_assert(sizeof(ShaderClosure) >= sizeof(PhongRampBsdf), "PhongRampBsdf is too large!");
+
ccl_device float3 bsdf_phong_ramp_get_color(const float3 colors[8], float pos)
{
int MAXCOLORS = 8;
diff --git a/intern/cycles/kernel/closure/bsdf_principled_diffuse.h b/intern/cycles/kernel/closure/bsdf_principled_diffuse.h
index d7795974ef5..43646aaeb5b 100644
--- a/intern/cycles/kernel/closure/bsdf_principled_diffuse.h
+++ b/intern/cycles/kernel/closure/bsdf_principled_diffuse.h
@@ -30,6 +30,9 @@ typedef ccl_addr_space struct PrincipledDiffuseBsdf {
float roughness;
} PrincipledDiffuseBsdf;
+static_assert(sizeof(ShaderClosure) >= sizeof(PrincipledDiffuseBsdf),
+ "PrincipledDiffuseBsdf is too large!");
+
ccl_device float3 calculate_principled_diffuse_brdf(
const PrincipledDiffuseBsdf *bsdf, float3 N, float3 V, float3 L, float3 H, float *pdf)
{
diff --git a/intern/cycles/kernel/closure/bsdf_principled_sheen.h b/intern/cycles/kernel/closure/bsdf_principled_sheen.h
index bc522095b3b..3707de29d73 100644
--- a/intern/cycles/kernel/closure/bsdf_principled_sheen.h
+++ b/intern/cycles/kernel/closure/bsdf_principled_sheen.h
@@ -26,10 +26,26 @@ CCL_NAMESPACE_BEGIN
typedef ccl_addr_space struct PrincipledSheenBsdf {
SHADER_CLOSURE_BASE;
+ float avg_value;
} PrincipledSheenBsdf;
-ccl_device float3 calculate_principled_sheen_brdf(
- const PrincipledSheenBsdf *bsdf, float3 N, float3 V, float3 L, float3 H, float *pdf)
+static_assert(sizeof(ShaderClosure) >= sizeof(PrincipledSheenBsdf),
+ "PrincipledSheenBsdf is too large!");
+
+ccl_device_inline float calculate_avg_principled_sheen_brdf(float3 N, float3 I)
+{
+ /* To compute the average, we set the half-vector to the normal, resulting in
+ * NdotI = NdotL = NdotV = LdotH */
+ float NdotI = dot(N, I);
+ if (NdotI < 0.0f) {
+ return 0.0f;
+ }
+
+ return schlick_fresnel(NdotI) * NdotI;
+}
+
+ccl_device float3
+calculate_principled_sheen_brdf(float3 N, float3 V, float3 L, float3 H, float *pdf)
{
float NdotL = dot(N, L);
float NdotV = dot(N, V);
@@ -46,9 +62,11 @@ ccl_device float3 calculate_principled_sheen_brdf(
return make_float3(value, value, value);
}
-ccl_device int bsdf_principled_sheen_setup(PrincipledSheenBsdf *bsdf)
+ccl_device int bsdf_principled_sheen_setup(const ShaderData *sd, PrincipledSheenBsdf *bsdf)
{
bsdf->type = CLOSURE_BSDF_PRINCIPLED_SHEEN_ID;
+ bsdf->avg_value = calculate_avg_principled_sheen_brdf(bsdf->N, sd->I);
+ bsdf->sample_weight *= bsdf->avg_value;
return SD_BSDF | SD_BSDF_HAS_EVAL;
}
@@ -66,7 +84,7 @@ ccl_device float3 bsdf_principled_sheen_eval_reflect(const ShaderClosure *sc,
if (dot(N, omega_in) > 0.0f) {
*pdf = fmaxf(dot(N, omega_in), 0.0f) * M_1_PI_F;
- return calculate_principled_sheen_brdf(bsdf, N, V, L, H, pdf);
+ return calculate_principled_sheen_brdf(N, V, L, H, pdf);
}
else {
*pdf = 0.0f;
@@ -104,7 +122,7 @@ ccl_device int bsdf_principled_sheen_sample(const ShaderClosure *sc,
if (dot(Ng, *omega_in) > 0) {
float3 H = normalize(I + *omega_in);
- *eval = calculate_principled_sheen_brdf(bsdf, N, I, *omega_in, H, pdf);
+ *eval = calculate_principled_sheen_brdf(N, I, *omega_in, H, pdf);
#ifdef __RAY_DIFFERENTIALS__
// TODO: find a better approximation for the diffuse bounce
diff --git a/intern/cycles/kernel/closure/bsdf_toon.h b/intern/cycles/kernel/closure/bsdf_toon.h
index f37fd228087..cc5de21ed0e 100644
--- a/intern/cycles/kernel/closure/bsdf_toon.h
+++ b/intern/cycles/kernel/closure/bsdf_toon.h
@@ -42,6 +42,8 @@ typedef ccl_addr_space struct ToonBsdf {
float smooth;
} ToonBsdf;
+static_assert(sizeof(ShaderClosure) >= sizeof(ToonBsdf), "ToonBsdf is too large!");
+
/* DIFFUSE TOON */
ccl_device int bsdf_diffuse_toon_setup(ToonBsdf *bsdf)
diff --git a/intern/cycles/kernel/closure/bsdf_util.h b/intern/cycles/kernel/closure/bsdf_util.h
index a9a27edd7de..a73dee1b045 100644
--- a/intern/cycles/kernel/closure/bsdf_util.h
+++ b/intern/cycles/kernel/closure/bsdf_util.h
@@ -134,20 +134,6 @@ ccl_device float schlick_fresnel(float u)
return m2 * m2 * m; // pow(m, 5)
}
-ccl_device float smooth_step(float edge0, float edge1, float x)
-{
- float result;
- if (x < edge0)
- result = 0.0f;
- else if (x >= edge1)
- result = 1.0f;
- else {
- float t = (x - edge0) / (edge1 - edge0);
- result = (3.0f - 2.0f * t) * (t * t);
- }
- return result;
-}
-
/* Calculate the fresnel color which is a blend between white and the F0 color (cspec0) */
ccl_device_forceinline float3
interpolate_fresnel_color(float3 L, float3 H, float ior, float F0, float3 cspec0)
@@ -155,7 +141,7 @@ interpolate_fresnel_color(float3 L, float3 H, float ior, float F0, float3 cspec0
/* Calculate the fresnel interpolation factor
* The value from fresnel_dielectric_cos(...) has to be normalized because
* the cspec0 keeps the F0 color
- */
+ */
float F0_norm = 1.0f / (1.0f - F0);
float FH = (fresnel_dielectric_cos(dot(L, H), ior) - F0) * F0_norm;
diff --git a/intern/cycles/kernel/closure/bssrdf.h b/intern/cycles/kernel/closure/bssrdf.h
index 57804eca269..4d88a822821 100644
--- a/intern/cycles/kernel/closure/bssrdf.h
+++ b/intern/cycles/kernel/closure/bssrdf.h
@@ -30,6 +30,8 @@ typedef ccl_addr_space struct Bssrdf {
float channels;
} Bssrdf;
+static_assert(sizeof(ShaderClosure) >= sizeof(Bssrdf), "Bssrdf is too large!");
+
/* Planar Truncated Gaussian
*
* Note how this is different from the typical gaussian, this one integrates
@@ -224,12 +226,12 @@ ccl_device float bssrdf_burley_eval(const float d, float r)
if (r >= Rm)
return 0.0f;
- /* Burley refletance profile, equation (3).
+ /* Burley reflectance profile, equation (3).
*
* NOTES:
* - Surface albedo is already included into sc->weight, no need to
* multiply by this term here.
- * - This is normalized diffuse model, so the equation is mutliplied
+ * - This is normalized diffuse model, so the equation is multiplied
* by 2*pi, which also matches cdf().
*/
float exp_r_3_d = expf(-r / (3.0f * d));
@@ -450,7 +452,8 @@ ccl_device void bssrdf_sample(const ShaderClosure *sc, float xi, float *r, float
else if (bssrdf->type == CLOSURE_BSSRDF_GAUSSIAN_ID) {
bssrdf_gaussian_sample(radius, xi, r, h);
}
- else { /*if(bssrdf->type == CLOSURE_BSSRDF_BURLEY_ID || bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_ID)*/
+ else { /* if (bssrdf->type == CLOSURE_BSSRDF_BURLEY_ID ||
+ * bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_ID) */
bssrdf_burley_sample(radius, xi, r, h);
}
}
@@ -466,7 +469,8 @@ ccl_device float bssrdf_channel_pdf(const Bssrdf *bssrdf, float radius, float r)
else if (bssrdf->type == CLOSURE_BSSRDF_GAUSSIAN_ID) {
return bssrdf_gaussian_pdf(radius, r);
}
- else { /*if(bssrdf->type == CLOSURE_BSSRDF_BURLEY_ID || bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_ID)*/
+ else { /* if (bssrdf->type == CLOSURE_BSSRDF_BURLEY_ID ||
+ * bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_ID)*/
return bssrdf_burley_pdf(radius, r);
}
}
diff --git a/intern/cycles/kernel/closure/volume.h b/intern/cycles/kernel/closure/volume.h
index 473bc0e8a82..1430f712701 100644
--- a/intern/cycles/kernel/closure/volume.h
+++ b/intern/cycles/kernel/closure/volume.h
@@ -40,6 +40,9 @@ typedef ccl_addr_space struct HenyeyGreensteinVolume {
float g;
} HenyeyGreensteinVolume;
+static_assert(sizeof(ShaderClosure) >= sizeof(HenyeyGreensteinVolume),
+ "HenyeyGreensteinVolume is too large!");
+
/* Given cosine between rays, return probability density that a photon bounces
* to that direction. The g parameter controls how different it is from the
* uniform sphere. g=0 uniform diffuse-like, g=1 close to sharp single ray. */
diff --git a/intern/cycles/kernel/filter/filter_features.h b/intern/cycles/kernel/filter/filter_features.h
index 809ccfe8be6..8a2af957146 100644
--- a/intern/cycles/kernel/filter/filter_features.h
+++ b/intern/cycles/kernel/filter/filter_features.h
@@ -18,8 +18,9 @@ CCL_NAMESPACE_BEGIN
#define ccl_get_feature(buffer, pass) (buffer)[(pass)*pass_stride]
-/* Loop over the pixels in the range [low.x, high.x) x [low.y, high.y).+ * pixel_buffer always points to the current pixel in the first pass.
- * Repeat the loop for every secondary frame if there are any. */
+/* Loop over the pixels in the range [low.x, high.x) x [low.y, high.y).+ * pixel_buffer always
+ * points to the current pixel in the first pass. Repeat the loop for every secondary frame if
+ * there are any. */
#define FOR_PIXEL_WINDOW \
for (int frame = 0; frame < tile_info->num_frames; frame++) { \
pixel.z = tile_info->frames[frame]; \
diff --git a/intern/cycles/kernel/filter/filter_features_sse.h b/intern/cycles/kernel/filter/filter_features_sse.h
index 1e0d6e93453..59d4ace2bef 100644
--- a/intern/cycles/kernel/filter/filter_features_sse.h
+++ b/intern/cycles/kernel/filter/filter_features_sse.h
@@ -20,8 +20,8 @@ CCL_NAMESPACE_BEGIN
/* Loop over the pixels in the range [low.x, high.x) x [low.y, high.y), 4 at a time.
* pixel_buffer always points to the first of the 4 current pixel in the first pass.
- * x4 and y4 contain the coordinates of the four pixels, active_pixels contains a mask that's set for all pixels within the window.
- * Repeat the loop for every secondary frame if there are any. */
+ * x4 and y4 contain the coordinates of the four pixels, active_pixels contains a mask that's set
+ * for all pixels within the window. Repeat the loop for every secondary frame if there are any. */
#define FOR_PIXEL_WINDOW_SSE \
for (int frame = 0; frame < tile_info->num_frames; frame++) { \
pixel.z = tile_info->frames[frame]; \
@@ -109,7 +109,6 @@ ccl_device_inline void filter_calculate_scale_sse(float4 *scale, bool use_time)
scale[2] = rcp(max(reduce_max(scale[2]), make_float4(0.01f)));
if (use_time) {
scale[10] = rcp(max(reduce_max(scale[6]), make_float4(0.01f)));
- ;
}
scale[6] = rcp(max(reduce_max(scale[4]), make_float4(0.01f)));
scale[7] = scale[8] = scale[9] = rcp(max(reduce_max(sqrt(scale[5])), make_float4(0.01f)));
diff --git a/intern/cycles/kernel/filter/filter_nlm_cpu.h b/intern/cycles/kernel/filter/filter_nlm_cpu.h
index a94266a8786..24200c29203 100644
--- a/intern/cycles/kernel/filter/filter_nlm_cpu.h
+++ b/intern/cycles/kernel/filter/filter_nlm_cpu.h
@@ -197,7 +197,8 @@ ccl_device_inline void kernel_filter_nlm_construct_gramian(int dx,
bool use_time)
{
int4 clip_area = rect_clip(rect, filter_window);
- /* fy and fy are in filter-window-relative coordinates, while x and y are in feature-window-relative coordinates. */
+ /* fy and fy are in filter-window-relative coordinates,
+ * while x and y are in feature-window-relative coordinates. */
for (int y = clip_area.y; y < clip_area.w; y++) {
for (int x = clip_area.x; x < clip_area.z; x++) {
const int low = max(rect.x, x - f);
diff --git a/intern/cycles/kernel/filter/filter_prefilter.h b/intern/cycles/kernel/filter/filter_prefilter.h
index 8211311313d..97cecba190e 100644
--- a/intern/cycles/kernel/filter/filter_prefilter.h
+++ b/intern/cycles/kernel/filter/filter_prefilter.h
@@ -16,14 +16,19 @@
CCL_NAMESPACE_BEGIN
-/* First step of the shadow prefiltering, performs the shadow division and stores all data
+/**
+ * First step of the shadow prefiltering, performs the shadow division and stores all data
* in a nice and easy rectangular array that can be passed to the NLM filter.
*
* Calculates:
- * unfiltered: Contains the two half images of the shadow feature pass
- * sampleVariance: The sample-based variance calculated in the kernel. Note: This calculation is biased in general, and especially here since the variance of the ratio can only be approximated.
- * sampleVarianceV: Variance of the sample variance estimation, quite noisy (since it's essentially the buffer variance of the two variance halves)
- * bufferVariance: The buffer-based variance of the shadow feature. Unbiased, but quite noisy.
+ * \param unfiltered: Contains the two half images of the shadow feature pass
+ * \param sampleVariance: The sample-based variance calculated in the kernel.
+ * Note: This calculation is biased in general,
+ * and especially here since the variance of the ratio can only be approximated.
+ * \param sampleVarianceV: Variance of the sample variance estimation, quite noisy
+ * (since it's essentially the buffer variance of the two variance halves)
+ * \param bufferVariance: The buffer-based variance of the shadow feature.
+ * Unbiased, but quite noisy.
*/
ccl_device void kernel_filter_divide_shadow(int sample,
CCL_FILTER_TILE_INFO,
@@ -140,25 +145,34 @@ ccl_device void kernel_filter_write_feature(int sample,
combined_buffer[out_offset] = from[idx];
}
+#define GET_COLOR(image) \
+ make_float3(image[idx], image[idx + pass_stride], image[idx + 2 * pass_stride])
+#define SET_COLOR(image, color) \
+ image[idx] = color.x; \
+ image[idx + pass_stride] = color.y; \
+ image[idx + 2 * pass_stride] = color.z
+
ccl_device void kernel_filter_detect_outliers(int x,
int y,
- ccl_global float *image,
- ccl_global float *variance,
+ ccl_global float *in,
+ ccl_global float *variance_out,
ccl_global float *depth,
- ccl_global float *out,
+ ccl_global float *image_out,
int4 rect,
int pass_stride)
{
int buffer_w = align_up(rect.z - rect.x, 4);
+ ccl_global float *image_in = in;
+ ccl_global float *variance_in = in + 3 * pass_stride;
+
int n = 0;
float values[25];
float pixel_variance, max_variance = 0.0f;
for (int y1 = max(y - 2, rect.y); y1 < min(y + 3, rect.w); y1++) {
for (int x1 = max(x - 2, rect.x); x1 < min(x + 3, rect.z); x1++) {
int idx = (y1 - rect.y) * buffer_w + (x1 - rect.x);
- float3 color = make_float3(
- image[idx], image[idx + pass_stride], image[idx + 2 * pass_stride]);
+ float3 color = GET_COLOR(image_in);
color = max(color, make_float3(0.0f, 0.0f, 0.0f));
float L = average(color);
@@ -176,8 +190,7 @@ ccl_device void kernel_filter_detect_outliers(int x,
values[i] = L;
n++;
- float3 pixel_var = make_float3(
- variance[idx], variance[idx + pass_stride], variance[idx + 2 * pass_stride]);
+ float3 pixel_var = GET_COLOR(variance_in);
float var = average(pixel_var);
if ((x1 == x) && (y1 == y)) {
pixel_variance = (pixel_var.x < 0.0f || pixel_var.y < 0.0f || pixel_var.z < 0.0f) ? -1.0f :
@@ -192,8 +205,12 @@ ccl_device void kernel_filter_detect_outliers(int x,
max_variance += 1e-4f;
int idx = (y - rect.y) * buffer_w + (x - rect.x);
- float3 color = make_float3(image[idx], image[idx + pass_stride], image[idx + 2 * pass_stride]);
+
+ float3 color = GET_COLOR(image_in);
+ float3 variance = GET_COLOR(variance_in);
color = max(color, make_float3(0.0f, 0.0f, 0.0f));
+ variance = max(variance, make_float3(0.0f, 0.0f, 0.0f));
+
float L = average(color);
float ref = 2.0f * values[(int)(n * 0.75f)];
@@ -204,36 +221,43 @@ ccl_device void kernel_filter_detect_outliers(int x,
if (L > ref) {
/* The pixel appears to be an outlier.
- * However, it may just be a legitimate highlight. Therefore, it is checked how likely it is that the pixel
- * should actually be at the reference value:
- * If the reference is within the 3-sigma interval, the pixel is assumed to be a statistical outlier.
- * Otherwise, it is very unlikely that the pixel should be darker, which indicates a legitimate highlight.
+ * However, it may just be a legitimate highlight. Therefore, it is checked how likely it is
+ * that the pixel should actually be at the reference value: If the reference is within the
+ * 3-sigma interval, the pixel is assumed to be a statistical outlier. Otherwise, it is very
+ * unlikely that the pixel should be darker, which indicates a legitimate highlight.
*/
if (pixel_variance < 0.0f || pixel_variance > 9.0f * max_variance) {
depth[idx] = -depth[idx];
color *= ref / L;
- variance[idx] = variance[idx + pass_stride] = variance[idx + 2 * pass_stride] = max_variance;
+ variance = make_float3(max_variance, max_variance, max_variance);
}
else {
float stddev = sqrtf(pixel_variance);
if (L - 3 * stddev < ref) {
/* The pixel is an outlier, so negate the depth value to mark it as one.
- * Also, scale its brightness down to the outlier threshold to avoid trouble with the NLM weights. */
+ * Also, scale its brightness down to the outlier threshold to avoid trouble with the NLM
+ * weights. */
depth[idx] = -depth[idx];
float fac = ref / L;
color *= fac;
- variance[idx] *= fac * fac;
- variance[idx + pass_stride] *= fac * fac;
- variance[idx + 2 * pass_stride] *= fac * fac;
+ variance *= sqr(fac);
}
}
}
- out[idx] = color.x;
- out[idx + pass_stride] = color.y;
- out[idx + 2 * pass_stride] = color.z;
+
+ /* Apply log(1+x) transform to compress highlights and avoid halos in the denoised results.
+ * Variance is transformed accordingly - the derivative of the transform is 1/(1+x), so we
+ * scale by the square of that (since we have variance instead of standard deviation). */
+ color = color_highlight_compress(color, &variance);
+
+ SET_COLOR(image_out, color);
+ SET_COLOR(variance_out, variance);
}
+#undef GET_COLOR
+#undef SET_COLOR
+
/* Combine A/B buffers.
* Calculates the combined mean and the buffer variance. */
ccl_device void kernel_filter_combine_halves(int x,
diff --git a/intern/cycles/kernel/filter/filter_reconstruction.h b/intern/cycles/kernel/filter/filter_reconstruction.h
index 850f20584da..17941689ad5 100644
--- a/intern/cycles/kernel/filter/filter_reconstruction.h
+++ b/intern/cycles/kernel/filter/filter_reconstruction.h
@@ -108,7 +108,7 @@ ccl_device_inline void kernel_filter_finalize(int x,
/* The weighted average of pixel colors (essentially, the NLM-filtered image).
* In case the solution of the linear model fails due to numerical issues or
- * returns non-sensical negative values, fall back to this value. */
+ * returns nonsensical negative values, fall back to this value. */
float3 mean_color = XtWY[0] / XtWX[0];
math_trimatrix_vec3_solve(XtWX, XtWY, (*rank) + 1, stride);
@@ -119,8 +119,8 @@ ccl_device_inline void kernel_filter_finalize(int x,
final_color = mean_color;
}
- /* Clamp pixel value to positive values. */
- final_color = max(final_color, make_float3(0.0f, 0.0f, 0.0f));
+ /* Clamp pixel value to positive values and reverse the highlight compression transform. */
+ final_color = color_highlight_uncompress(max(final_color, make_float3(0.0f, 0.0f, 0.0f)));
ccl_global float *combined_buffer = buffer + (y * buffer_params.y + x + buffer_params.x) *
buffer_params.z;
diff --git a/intern/cycles/kernel/filter/filter_transform.h b/intern/cycles/kernel/filter/filter_transform.h
index 69e3c7c458d..880a661214e 100644
--- a/intern/cycles/kernel/filter/filter_transform.h
+++ b/intern/cycles/kernel/filter/filter_transform.h
@@ -55,7 +55,8 @@ ccl_device void kernel_filter_construct_transform(const float *ccl_restrict buff
math_vector_scale(feature_means, 1.0f / num_pixels, num_features);
- /* === Scale the shifted feature passes to a range of [-1; 1], will be baked into the transform later. === */
+ /* === Scale the shifted feature passes to a range of [-1; 1] ===
+ * Will be baked into the transform later. */
float feature_scale[DENOISE_FEATURES];
math_vector_zero(feature_scale, num_features);
@@ -69,8 +70,9 @@ ccl_device void kernel_filter_construct_transform(const float *ccl_restrict buff
filter_calculate_scale(feature_scale, use_time);
/* === Generate the feature transformation. ===
- * This transformation maps the num_features-dimentional feature space to a reduced feature (r-feature) space
- * which generally has fewer dimensions. This mainly helps to prevent overfitting. */
+ * This transformation maps the num_features-dimensional feature space to a reduced feature
+ * (r-feature) space which generally has fewer dimensions.
+ * This mainly helps to prevent over-fitting. */
float feature_matrix[DENOISE_FEATURES * DENOISE_FEATURES];
math_matrix_zero(feature_matrix, num_features);
FOR_PIXEL_WINDOW
@@ -83,7 +85,7 @@ ccl_device void kernel_filter_construct_transform(const float *ccl_restrict buff
math_matrix_jacobi_eigendecomposition(feature_matrix, transform, num_features, 1);
*rank = 0;
- /* Prevent overfitting when a small window is used. */
+ /* Prevent over-fitting when a small window is used. */
int max_rank = min(num_features, num_pixels / 3);
if (pca_threshold < 0.0f) {
float threshold_energy = 0.0f;
diff --git a/intern/cycles/kernel/filter/filter_transform_gpu.h b/intern/cycles/kernel/filter/filter_transform_gpu.h
index 89cddfd927f..adc85881fe5 100644
--- a/intern/cycles/kernel/filter/filter_transform_gpu.h
+++ b/intern/cycles/kernel/filter/filter_transform_gpu.h
@@ -61,7 +61,8 @@ ccl_device void kernel_filter_construct_transform(const ccl_global float *ccl_re
math_vector_scale(feature_means, 1.0f / num_pixels, num_features);
- /* === Scale the shifted feature passes to a range of [-1; 1], will be baked into the transform later. === */
+ /* === Scale the shifted feature passes to a range of [-1; 1] ===
+ * Will be baked into the transform later. */
float feature_scale[DENOISE_FEATURES];
math_vector_zero(feature_scale, num_features);
@@ -75,8 +76,9 @@ ccl_device void kernel_filter_construct_transform(const ccl_global float *ccl_re
filter_calculate_scale(feature_scale, use_time);
/* === Generate the feature transformation. ===
- * This transformation maps the num_features-dimentional feature space to a reduced feature (r-feature) space
- * which generally has fewer dimensions. This mainly helps to prevent overfitting. */
+ * This transformation maps the num_features-dimensional feature space to a reduced feature
+ * (r-feature) space which generally has fewer dimensions.
+ * This mainly helps to prevent overfitting. */
float feature_matrix[DENOISE_FEATURES * DENOISE_FEATURES];
math_matrix_zero(feature_matrix, num_features);
FOR_PIXEL_WINDOW
diff --git a/intern/cycles/kernel/filter/filter_transform_sse.h b/intern/cycles/kernel/filter/filter_transform_sse.h
index 22397b292db..5a124b5d73b 100644
--- a/intern/cycles/kernel/filter/filter_transform_sse.h
+++ b/intern/cycles/kernel/filter/filter_transform_sse.h
@@ -58,7 +58,8 @@ ccl_device void kernel_filter_construct_transform(const float *ccl_restrict buff
feature_means[i] = reduce_add(feature_means[i]) * pixel_scale;
}
- /* === Scale the shifted feature passes to a range of [-1; 1], will be baked into the transform later. === */
+ /* === Scale the shifted feature passes to a range of [-1; 1] ===
+ * Will be baked into the transform later. */
float4 feature_scale[DENOISE_FEATURES];
math_vector_zero_sse(feature_scale, num_features);
FOR_PIXEL_WINDOW_SSE
@@ -72,8 +73,9 @@ ccl_device void kernel_filter_construct_transform(const float *ccl_restrict buff
filter_calculate_scale_sse(feature_scale, use_time);
/* === Generate the feature transformation. ===
- * This transformation maps the num_features-dimentional feature space to a reduced feature (r-feature) space
- * which generally has fewer dimensions. This mainly helps to prevent overfitting. */
+ * This transformation maps the num_features-dimensional feature space to a reduced feature
+ * (r-feature) space which generally has fewer dimensions.
+ * This mainly helps to prevent over-fitting. */
float4 feature_matrix_sse[DENOISE_FEATURES * DENOISE_FEATURES];
math_matrix_zero_sse(feature_matrix_sse, num_features);
FOR_PIXEL_WINDOW_SSE
diff --git a/intern/cycles/kernel/geom/geom.h b/intern/cycles/kernel/geom/geom.h
index e81c1b781c8..5ff4d5f7053 100644
--- a/intern/cycles/kernel/geom/geom.h
+++ b/intern/cycles/kernel/geom/geom.h
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+// clang-format off
#include "kernel/geom/geom_attribute.h"
#include "kernel/geom/geom_object.h"
#ifdef __PATCH_EVAL__
@@ -30,3 +31,4 @@
#include "kernel/geom/geom_curve_intersect.h"
#include "kernel/geom/geom_volume.h"
#include "kernel/geom/geom_primitive.h"
+// clang-format on
diff --git a/intern/cycles/kernel/geom/geom_attribute.h b/intern/cycles/kernel/geom/geom_attribute.h
index 456608bfa22..e1b0e6fb81c 100644
--- a/intern/cycles/kernel/geom/geom_attribute.h
+++ b/intern/cycles/kernel/geom/geom_attribute.h
@@ -29,17 +29,11 @@ ccl_device_inline uint subd_triangle_patch(KernelGlobals *kg, const ShaderData *
ccl_device_inline uint attribute_primitive_type(KernelGlobals *kg, const ShaderData *sd)
{
-#ifdef __HAIR__
- if (sd->type & PRIMITIVE_ALL_CURVE) {
- return ATTR_PRIM_CURVE;
- }
- else
-#endif
- if (subd_triangle_patch(kg, sd) != ~0) {
+ if ((sd->type & PRIMITIVE_ALL_TRIANGLE) && subd_triangle_patch(kg, sd) != ~0) {
return ATTR_PRIM_SUBD;
}
else {
- return ATTR_PRIM_TRIANGLE;
+ return ATTR_PRIM_GEOMETRY;
}
}
diff --git a/intern/cycles/kernel/geom/geom_curve.h b/intern/cycles/kernel/geom/geom_curve.h
index e0aacb434eb..6ff0c7f2044 100644
--- a/intern/cycles/kernel/geom/geom_curve.h
+++ b/intern/cycles/kernel/geom/geom_curve.h
@@ -23,33 +23,6 @@ CCL_NAMESPACE_BEGIN
#ifdef __HAIR__
-/* Interpolation of curve geometry */
-
-ccl_device_inline float3 curvetangent(float t, float3 p0, float3 p1, float3 p2, float3 p3)
-{
- float fc = 0.71f;
- float data[4];
- float t2 = t * t;
- data[0] = -3.0f * fc * t2 + 4.0f * fc * t - fc;
- data[1] = 3.0f * (2.0f - fc) * t2 + 2.0f * (fc - 3.0f) * t;
- data[2] = 3.0f * (fc - 2.0f) * t2 + 2.0f * (3.0f - 2.0f * fc) * t + fc;
- data[3] = 3.0f * fc * t2 - 2.0f * fc * t;
- return data[0] * p0 + data[1] * p1 + data[2] * p2 + data[3] * p3;
-}
-
-ccl_device_inline float3 curvepoint(float t, float3 p0, float3 p1, float3 p2, float3 p3)
-{
- float data[4];
- float fc = 0.71f;
- float t2 = t * t;
- float t3 = t2 * t;
- data[0] = -fc * t3 + 2.0f * fc * t2 - fc * t;
- data[1] = (2.0f - fc) * t3 + (fc - 3.0f) * t2 + 1.0f;
- data[2] = (fc - 2.0f) * t3 + (3.0f - 2.0f * fc) * t2 + fc * t;
- data[3] = fc * t3 - fc * t2;
- return data[0] * p0 + data[1] * p1 + data[2] * p2 + data[3] * p3;
-}
-
/* Reading attributes on various curve elements */
ccl_device float curve_attribute_float(
@@ -83,6 +56,16 @@ ccl_device float curve_attribute_float(
return (1.0f - sd->u) * f0 + sd->u * f1;
}
+ else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) {
+# ifdef __RAY_DIFFERENTIALS__
+ if (dx)
+ *dx = 0.0f;
+ if (dy)
+ *dy = 0.0f;
+# endif
+
+ return kernel_tex_fetch(__attributes_float, desc.offset);
+ }
else {
# ifdef __RAY_DIFFERENTIALS__
if (dx)
@@ -133,6 +116,16 @@ ccl_device float2 curve_attribute_float2(KernelGlobals *kg,
return (1.0f - sd->u) * f0 + sd->u * f1;
}
+ else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) {
+# ifdef __RAY_DIFFERENTIALS__
+ if (dx)
+ *dx = make_float2(0.0f, 0.0f);
+ if (dy)
+ *dy = make_float2(0.0f, 0.0f);
+# endif
+
+ return kernel_tex_fetch(__attributes_float2, desc.offset);
+ }
else {
# ifdef __RAY_DIFFERENTIALS__
if (dx)
@@ -183,6 +176,16 @@ ccl_device float3 curve_attribute_float3(KernelGlobals *kg,
return (1.0f - sd->u) * f0 + sd->u * f1;
}
+ else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) {
+# ifdef __RAY_DIFFERENTIALS__
+ if (dx)
+ *dx = make_float3(0.0f, 0.0f, 0.0f);
+ if (dy)
+ *dy = make_float3(0.0f, 0.0f, 0.0f);
+# endif
+
+ return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset));
+ }
else {
# ifdef __RAY_DIFFERENTIALS__
if (dx)
@@ -195,6 +198,66 @@ ccl_device float3 curve_attribute_float3(KernelGlobals *kg,
}
}
+ccl_device float4 curve_attribute_float4(KernelGlobals *kg,
+ const ShaderData *sd,
+ const AttributeDescriptor desc,
+ float4 *dx,
+ float4 *dy)
+{
+ if (desc.element == ATTR_ELEMENT_CURVE) {
+ /* idea: we can't derive any useful differentials here, but for tiled
+ * mipmap image caching it would be useful to avoid reading the highest
+ * detail level always. maybe a derivative based on the hair density
+ * could be computed somehow? */
+# ifdef __RAY_DIFFERENTIALS__
+ if (dx)
+ *dx = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ if (dy)
+ *dy = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+# endif
+
+ return kernel_tex_fetch(__attributes_float3, desc.offset + sd->prim);
+ }
+ else if (desc.element == ATTR_ELEMENT_CURVE_KEY ||
+ desc.element == ATTR_ELEMENT_CURVE_KEY_MOTION) {
+ float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
+ int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
+ int k1 = k0 + 1;
+
+ float4 f0 = kernel_tex_fetch(__attributes_float3, desc.offset + k0);
+ float4 f1 = kernel_tex_fetch(__attributes_float3, desc.offset + k1);
+
+# ifdef __RAY_DIFFERENTIALS__
+ if (dx)
+ *dx = sd->du.dx * (f1 - f0);
+ if (dy)
+ *dy = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+# endif
+
+ return (1.0f - sd->u) * f0 + sd->u * f1;
+ }
+ else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) {
+# ifdef __RAY_DIFFERENTIALS__
+ if (dx)
+ *dx = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ if (dy)
+ *dy = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+# endif
+
+ return kernel_tex_fetch(__attributes_float3, desc.offset);
+ }
+ else {
+# ifdef __RAY_DIFFERENTIALS__
+ if (dx)
+ *dx = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ if (dy)
+ *dy = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+# endif
+
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
+}
+
/* Curve thickness */
ccl_device float curve_thickness(KernelGlobals *kg, ShaderData *sd)
@@ -208,12 +271,12 @@ ccl_device float curve_thickness(KernelGlobals *kg, ShaderData *sd)
float4 P_curve[2];
- if (sd->type & PRIMITIVE_CURVE) {
+ if (!(sd->type & PRIMITIVE_ALL_MOTION)) {
P_curve[0] = kernel_tex_fetch(__curve_keys, k0);
P_curve[1] = kernel_tex_fetch(__curve_keys, k1);
}
else {
- motion_curve_keys(kg, sd->object, sd->prim, sd->time, k0, k1, P_curve);
+ motion_curve_keys_linear(kg, sd->object, sd->prim, sd->time, k0, k1, P_curve);
}
r = (P_curve[1].w - P_curve[0].w) * sd->u + P_curve[0].w;
diff --git a/intern/cycles/kernel/geom/geom_curve_intersect.h b/intern/cycles/kernel/geom/geom_curve_intersect.h
index 5fd277c2f99..06d2c016f5b 100644
--- a/intern/cycles/kernel/geom/geom_curve_intersect.h
+++ b/intern/cycles/kernel/geom/geom_curve_intersect.h
@@ -1,4 +1,7 @@
/*
+ * Copyright 2009-2020 Intel Corporation. Adapted from Embree with
+ * with modifications.
+ *
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
@@ -14,802 +17,685 @@
CCL_NAMESPACE_BEGIN
-/* Curve primitive intersection functions. */
+/* Curve primitive intersection functions.
+ *
+ * The code here was adapted from curve_intersector_sweep.h in Embree, to get
+ * an exact match between Embree CPU ray-tracing and our GPU ray-tracing. */
+
+#define CURVE_NUM_BEZIER_SUBDIVISIONS 3
+#define CURVE_NUM_BEZIER_SUBDIVISIONS_UNSTABLE (CURVE_NUM_BEZIER_SUBDIVISIONS + 1)
+#define CURVE_NUM_BEZIER_STEPS 2
+#define CURVE_NUM_JACOBIAN_ITERATIONS 5
#ifdef __HAIR__
-# ifdef __KERNEL_SSE2__
-ccl_device_inline ssef transform_point_T3(const ssef t[3], const ssef &a)
+/* Catmull-rom curve evaluation. */
+
+ccl_device_inline float4 catmull_rom_basis_eval(const float4 curve[4], float u)
{
- return madd(shuffle<0>(a), t[0], madd(shuffle<1>(a), t[1], shuffle<2>(a) * t[2]));
+ const float t = u;
+ const float s = 1.0f - u;
+ const float n0 = -t * s * s;
+ const float n1 = 2.0f + t * t * (3.0f * t - 5.0f);
+ const float n2 = 2.0f + s * s * (3.0f * s - 5.0f);
+ const float n3 = -s * t * t;
+ return 0.5f * (curve[0] * n0 + curve[1] * n1 + curve[2] * n2 + curve[3] * n3);
}
-# endif
-/* On CPU pass P and dir by reference to aligned vector. */
-ccl_device_forceinline bool cardinal_curve_intersect(KernelGlobals *kg,
- Intersection *isect,
- const float3 ccl_ref P,
- const float3 ccl_ref dir,
- uint visibility,
- int object,
- int curveAddr,
- float time,
- int type,
- uint *lcg_state,
- float difl,
- float extmax)
+ccl_device_inline float4 catmull_rom_basis_derivative(const float4 curve[4], float u)
{
- const bool is_curve_primitive = (type & PRIMITIVE_CURVE);
+ const float t = u;
+ const float s = 1.0f - u;
+ const float n0 = -s * s + 2.0f * s * t;
+ const float n1 = 2.0f * t * (3.0f * t - 5.0f) + 3.0f * t * t;
+ const float n2 = 2.0f * s * (3.0f * t + 2.0f) - 3.0f * s * s;
+ const float n3 = -2.0f * s * t + t * t;
+ return 0.5f * (curve[0] * n0 + curve[1] * n1 + curve[2] * n2 + curve[3] * n3);
+}
- if (!is_curve_primitive && kernel_data.bvh.use_bvh_steps) {
- const float2 prim_time = kernel_tex_fetch(__prim_time, curveAddr);
- if (time < prim_time.x || time > prim_time.y) {
- return false;
- }
- }
+ccl_device_inline float4 catmull_rom_basis_derivative2(const float4 curve[4], float u)
+{
- int segment = PRIMITIVE_UNPACK_SEGMENT(type);
- float epsilon = 0.0f;
- float r_st, r_en;
+ const float t = u;
+ const float n0 = -3.0f * t + 2.0f;
+ const float n1 = 9.0f * t - 5.0f;
+ const float n2 = -9.0f * t + 4.0f;
+ const float n3 = 3.0f * t - 1.0f;
+ return (curve[0] * n0 + curve[1] * n1 + curve[2] * n2 + curve[3] * n3);
+}
- int depth = kernel_data.curve.subdivisions;
- int flags = kernel_data.curve.curveflags;
- int prim = kernel_tex_fetch(__prim_index, curveAddr);
+/* Thick Curve */
-# ifdef __KERNEL_SSE2__
- ssef vdir = load4f(dir);
- ssef vcurve_coef[4];
- const float3 *curve_coef = (float3 *)vcurve_coef;
+ccl_device_inline float3 dnormalize(const float3 p, const float3 dp)
+{
+ const float pp = dot(p, p);
+ const float pdp = dot(p, dp);
+ return (pp * dp - pdp * p) / (pp * sqrtf(pp));
+}
- {
- ssef dtmp = vdir * vdir;
- ssef d_ss = mm_sqrt(dtmp + shuffle<2>(dtmp));
- ssef rd_ss = load1f_first(1.0f) / d_ss;
-
- ssei v00vec = load4i((ssei *)&kg->__curves.data[prim]);
- int2 &v00 = (int2 &)v00vec;
-
- int k0 = v00.x + segment;
- int k1 = k0 + 1;
- int ka = max(k0 - 1, v00.x);
- int kb = min(k1 + 1, v00.x + v00.y - 1);
-
-# if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__) && \
- (!defined(_MSC_VER) || _MSC_VER > 1800)
- avxf P_curve_0_1, P_curve_2_3;
- if (is_curve_primitive) {
- P_curve_0_1 = _mm256_loadu2_m128(&kg->__curve_keys.data[k0].x, &kg->__curve_keys.data[ka].x);
- P_curve_2_3 = _mm256_loadu2_m128(&kg->__curve_keys.data[kb].x, &kg->__curve_keys.data[k1].x);
- }
- else {
- int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, curveAddr) : object;
- motion_cardinal_curve_keys_avx(
- kg, fobject, prim, time, ka, k0, k1, kb, &P_curve_0_1, &P_curve_2_3);
- }
-# else /* __KERNEL_AVX2__ */
- ssef P_curve[4];
-
- if (is_curve_primitive) {
- P_curve[0] = load4f(&kg->__curve_keys.data[ka].x);
- P_curve[1] = load4f(&kg->__curve_keys.data[k0].x);
- P_curve[2] = load4f(&kg->__curve_keys.data[k1].x);
- P_curve[3] = load4f(&kg->__curve_keys.data[kb].x);
+ccl_device_inline float sqr_point_to_line_distance(const float3 PmQ0, const float3 Q1mQ0)
+{
+ const float3 N = cross(PmQ0, Q1mQ0);
+ const float3 D = Q1mQ0;
+ return dot(N, N) / dot(D, D);
+}
+
+ccl_device_inline bool cylinder_intersect(const float3 cylinder_start,
+ const float3 cylinder_end,
+ const float cylinder_radius,
+ const float3 ray_dir,
+ float2 *t_o,
+ float *u0_o,
+ float3 *Ng0_o,
+ float *u1_o,
+ float3 *Ng1_o)
+{
+ /* Calculate quadratic equation to solve. */
+ const float rl = 1.0f / len(cylinder_end - cylinder_start);
+ const float3 P0 = cylinder_start, dP = (cylinder_end - cylinder_start) * rl;
+ const float3 O = -P0, dO = ray_dir;
+
+ const float dOdO = dot(dO, dO);
+ const float OdO = dot(dO, O);
+ const float OO = dot(O, O);
+ const float dOz = dot(dP, dO);
+ const float Oz = dot(dP, O);
+
+ const float A = dOdO - sqr(dOz);
+ const float B = 2.0f * (OdO - dOz * Oz);
+ const float C = OO - sqr(Oz) - sqr(cylinder_radius);
+
+ /* We miss the cylinder if determinant is smaller than zero. */
+ const float D = B * B - 4.0f * A * C;
+ if (!(D >= 0.0f)) {
+ *t_o = make_float2(FLT_MAX, -FLT_MAX);
+ return false;
+ }
+
+ /* Special case for rays that are parallel to the cylinder. */
+ const float eps = 16.0f * FLT_EPSILON * max(fabsf(dOdO), fabsf(sqr(dOz)));
+ if (fabsf(A) < eps) {
+ if (C <= 0.0f) {
+ *t_o = make_float2(-FLT_MAX, FLT_MAX);
+ return true;
}
else {
- int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, curveAddr) : object;
- motion_cardinal_curve_keys(kg, fobject, prim, time, ka, k0, k1, kb, (float4 *)&P_curve);
+ *t_o = make_float2(-FLT_MAX, FLT_MAX);
+ return false;
}
-# endif /* __KERNEL_AVX2__ */
-
- ssef rd_sgn = set_sign_bit<0, 1, 1, 1>(shuffle<0>(rd_ss));
- ssef mul_zxxy = shuffle<2, 0, 0, 1>(vdir) * rd_sgn;
- ssef mul_yz = shuffle<1, 2, 1, 2>(vdir) * mul_zxxy;
- ssef mul_shuf = shuffle<0, 1, 2, 3>(mul_zxxy, mul_yz);
- ssef vdir0 = vdir & cast(ssei(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0));
-
- ssef htfm0 = shuffle<0, 2, 0, 3>(mul_shuf, vdir0);
- ssef htfm1 = shuffle<1, 0, 1, 3>(load1f_first(extract<0>(d_ss)), vdir0);
- ssef htfm2 = shuffle<1, 3, 2, 3>(mul_shuf, vdir0);
-
-# if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__) && \
- (!defined(_MSC_VER) || _MSC_VER > 1800)
- const avxf vPP = _mm256_broadcast_ps(&P.m128);
- const avxf htfm00 = avxf(htfm0.m128, htfm0.m128);
- const avxf htfm11 = avxf(htfm1.m128, htfm1.m128);
- const avxf htfm22 = avxf(htfm2.m128, htfm2.m128);
-
- const avxf p01 = madd(
- shuffle<0>(P_curve_0_1 - vPP),
- htfm00,
- madd(shuffle<1>(P_curve_0_1 - vPP), htfm11, shuffle<2>(P_curve_0_1 - vPP) * htfm22));
- const avxf p23 = madd(
- shuffle<0>(P_curve_2_3 - vPP),
- htfm00,
- madd(shuffle<1>(P_curve_2_3 - vPP), htfm11, shuffle<2>(P_curve_2_3 - vPP) * htfm22));
-
- const ssef p0 = _mm256_castps256_ps128(p01);
- const ssef p1 = _mm256_extractf128_ps(p01, 1);
- const ssef p2 = _mm256_castps256_ps128(p23);
- const ssef p3 = _mm256_extractf128_ps(p23, 1);
-
- const ssef P_curve_1 = _mm256_extractf128_ps(P_curve_0_1, 1);
- r_st = ((float4 &)P_curve_1).w;
- const ssef P_curve_2 = _mm256_castps256_ps128(P_curve_2_3);
- r_en = ((float4 &)P_curve_2).w;
-# else /* __KERNEL_AVX2__ */
- ssef htfm[] = {htfm0, htfm1, htfm2};
- ssef vP = load4f(P);
- ssef p0 = transform_point_T3(htfm, P_curve[0] - vP);
- ssef p1 = transform_point_T3(htfm, P_curve[1] - vP);
- ssef p2 = transform_point_T3(htfm, P_curve[2] - vP);
- ssef p3 = transform_point_T3(htfm, P_curve[3] - vP);
-
- r_st = ((float4 &)P_curve[1]).w;
- r_en = ((float4 &)P_curve[2]).w;
-# endif /* __KERNEL_AVX2__ */
-
- float fc = 0.71f;
- ssef vfc = ssef(fc);
- ssef vfcxp3 = vfc * p3;
-
- vcurve_coef[0] = p1;
- vcurve_coef[1] = vfc * (p2 - p0);
- vcurve_coef[2] = madd(
- ssef(fc * 2.0f), p0, madd(ssef(fc - 3.0f), p1, msub(ssef(3.0f - 2.0f * fc), p2, vfcxp3)));
- vcurve_coef[3] = msub(ssef(fc - 2.0f), p2 - p1, msub(vfc, p0, vfcxp3));
}
-# else
- float3 curve_coef[4];
- /* curve Intersection check */
- /* obtain curve parameters */
+ /* Standard case for rays that are not parallel to the cylinder. */
+ const float Q = sqrtf(D);
+ const float rcp_2A = 1.0f / (2.0f * A);
+ const float t0 = (-B - Q) * rcp_2A;
+ const float t1 = (-B + Q) * rcp_2A;
+
+ /* Calculates u and Ng for near hit. */
{
- /* ray transform created - this should be created at beginning of intersection loop */
- Transform htfm;
- float d = sqrtf(dir.x * dir.x + dir.z * dir.z);
- htfm = make_transform(dir.z / d,
- 0,
- -dir.x / d,
- 0,
- -dir.x * dir.y / d,
- d,
- -dir.y * dir.z / d,
- 0,
- dir.x,
- dir.y,
- dir.z,
- 0);
-
- float4 v00 = kernel_tex_fetch(__curves, prim);
-
- int k0 = __float_as_int(v00.x) + segment;
- int k1 = k0 + 1;
-
- int ka = max(k0 - 1, __float_as_int(v00.x));
- int kb = min(k1 + 1, __float_as_int(v00.x) + __float_as_int(v00.y) - 1);
-
- float4 P_curve[4];
-
- if (is_curve_primitive) {
- P_curve[0] = kernel_tex_fetch(__curve_keys, ka);
- P_curve[1] = kernel_tex_fetch(__curve_keys, k0);
- P_curve[2] = kernel_tex_fetch(__curve_keys, k1);
- P_curve[3] = kernel_tex_fetch(__curve_keys, kb);
- }
- else {
- int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, curveAddr) : object;
- motion_cardinal_curve_keys(kg, fobject, prim, time, ka, k0, k1, kb, P_curve);
- }
+ *u0_o = (t0 * dOz + Oz) * rl;
+ const float3 Pr = t0 * ray_dir;
+ const float3 Pl = (*u0_o) * (cylinder_end - cylinder_start) + cylinder_start;
+ *Ng0_o = Pr - Pl;
+ }
- float3 p0 = transform_point(&htfm, float4_to_float3(P_curve[0]) - P);
- float3 p1 = transform_point(&htfm, float4_to_float3(P_curve[1]) - P);
- float3 p2 = transform_point(&htfm, float4_to_float3(P_curve[2]) - P);
- float3 p3 = transform_point(&htfm, float4_to_float3(P_curve[3]) - P);
-
- float fc = 0.71f;
- curve_coef[0] = p1;
- curve_coef[1] = -fc * p0 + fc * p2;
- curve_coef[2] = 2.0f * fc * p0 + (fc - 3.0f) * p1 + (3.0f - 2.0f * fc) * p2 - fc * p3;
- curve_coef[3] = -fc * p0 + (2.0f - fc) * p1 + (fc - 2.0f) * p2 + fc * p3;
- r_st = P_curve[1].w;
- r_en = P_curve[2].w;
+ /* Calculates u and Ng for far hit. */
+ {
+ *u1_o = (t1 * dOz + Oz) * rl;
+ const float3 Pr = t1 * ray_dir;
+ const float3 Pl = (*u1_o) * (cylinder_end - cylinder_start) + cylinder_start;
+ *Ng1_o = Pr - Pl;
}
-# endif
- float r_curr = max(r_st, r_en);
-
- if ((flags & CURVE_KN_RIBBONS) || !(flags & CURVE_KN_BACKFACING))
- epsilon = 2 * r_curr;
-
- /* find bounds - this is slow for cubic curves */
- float upper, lower;
-
- float zextrem[4];
- curvebounds(&lower,
- &upper,
- &zextrem[0],
- &zextrem[1],
- &zextrem[2],
- &zextrem[3],
- curve_coef[0].z,
- curve_coef[1].z,
- curve_coef[2].z,
- curve_coef[3].z);
- if (lower - r_curr > isect->t || upper + r_curr < epsilon)
- return false;
+ *t_o = make_float2(t0, t1);
- /* minimum width extension */
- float mw_extension = min(difl * fabsf(upper), extmax);
- float r_ext = mw_extension + r_curr;
-
- float xextrem[4];
- curvebounds(&lower,
- &upper,
- &xextrem[0],
- &xextrem[1],
- &xextrem[2],
- &xextrem[3],
- curve_coef[0].x,
- curve_coef[1].x,
- curve_coef[2].x,
- curve_coef[3].x);
- if (lower > r_ext || upper < -r_ext)
- return false;
+ return true;
+}
- float yextrem[4];
- curvebounds(&lower,
- &upper,
- &yextrem[0],
- &yextrem[1],
- &yextrem[2],
- &yextrem[3],
- curve_coef[0].y,
- curve_coef[1].y,
- curve_coef[2].y,
- curve_coef[3].y);
- if (lower > r_ext || upper < -r_ext)
- return false;
+ccl_device_inline float2 half_plane_intersect(const float3 P, const float3 N, const float3 ray_dir)
+{
+ const float3 O = -P;
+ const float3 D = ray_dir;
+ const float ON = dot(O, N);
+ const float DN = dot(D, N);
+ const float min_rcp_input = 1e-18f;
+ const bool eps = fabsf(DN) < min_rcp_input;
+ const float t = -ON / DN;
+ const float lower = (eps || DN < 0.0f) ? -FLT_MAX : t;
+ const float upper = (eps || DN > 0.0f) ? FLT_MAX : t;
+ return make_float2(lower, upper);
+}
- /* setup recurrent loop */
- int level = 1 << depth;
- int tree = 0;
- float resol = 1.0f / (float)level;
- bool hit = false;
-
- /* begin loop */
- while (!(tree >> (depth))) {
- const float i_st = tree * resol;
- const float i_en = i_st + (level * resol);
-
-# ifdef __KERNEL_SSE2__
- ssef vi_st = ssef(i_st), vi_en = ssef(i_en);
- ssef vp_st = madd(madd(madd(vcurve_coef[3], vi_st, vcurve_coef[2]), vi_st, vcurve_coef[1]),
- vi_st,
- vcurve_coef[0]);
- ssef vp_en = madd(madd(madd(vcurve_coef[3], vi_en, vcurve_coef[2]), vi_en, vcurve_coef[1]),
- vi_en,
- vcurve_coef[0]);
-
- ssef vbmin = min(vp_st, vp_en);
- ssef vbmax = max(vp_st, vp_en);
-
- float3 &bmin = (float3 &)vbmin, &bmax = (float3 &)vbmax;
- float &bminx = bmin.x, &bminy = bmin.y, &bminz = bmin.z;
- float &bmaxx = bmax.x, &bmaxy = bmax.y, &bmaxz = bmax.z;
- float3 &p_st = (float3 &)vp_st, &p_en = (float3 &)vp_en;
-# else
- float3 p_st = ((curve_coef[3] * i_st + curve_coef[2]) * i_st + curve_coef[1]) * i_st +
- curve_coef[0];
- float3 p_en = ((curve_coef[3] * i_en + curve_coef[2]) * i_en + curve_coef[1]) * i_en +
- curve_coef[0];
-
- float bminx = min(p_st.x, p_en.x);
- float bmaxx = max(p_st.x, p_en.x);
- float bminy = min(p_st.y, p_en.y);
- float bmaxy = max(p_st.y, p_en.y);
- float bminz = min(p_st.z, p_en.z);
- float bmaxz = max(p_st.z, p_en.z);
-# endif
+ccl_device bool curve_intersect_iterative(const float3 ray_dir,
+ const float dt,
+ const float4 curve[4],
+ float u,
+ float t,
+ const bool use_backfacing,
+ Intersection *isect)
+{
+ const float length_ray_dir = len(ray_dir);
+
+ /* Error of curve evaluations is proportional to largest coordinate. */
+ const float4 box_min = min(min(curve[0], curve[1]), min(curve[2], curve[3]));
+ const float4 box_max = max(min(curve[0], curve[1]), max(curve[2], curve[3]));
+ const float4 box_abs = max(fabs(box_min), fabs(box_max));
+ const float P_err = 16.0f * FLT_EPSILON *
+ max(box_abs.x, max(box_abs.y, max(box_abs.z, box_abs.w)));
+ const float radius_max = box_max.w;
+
+ for (int i = 0; i < CURVE_NUM_JACOBIAN_ITERATIONS; i++) {
+ const float3 Q = ray_dir * t;
+ const float3 dQdt = ray_dir;
+ const float Q_err = 16.0f * FLT_EPSILON * length_ray_dir * t;
+
+ const float4 P4 = catmull_rom_basis_eval(curve, u);
+ const float4 dPdu4 = catmull_rom_basis_derivative(curve, u);
+
+ const float3 P = float4_to_float3(P4);
+ const float3 dPdu = float4_to_float3(dPdu4);
+ const float radius = P4.w;
+ const float dradiusdu = dPdu4.w;
+
+ const float3 ddPdu = float4_to_float3(catmull_rom_basis_derivative2(curve, u));
+
+ const float3 R = Q - P;
+ const float len_R = len(R);
+ const float R_err = max(Q_err, P_err);
+ const float3 dRdu = -dPdu;
+ const float3 dRdt = dQdt;
+
+ const float3 T = normalize(dPdu);
+ const float3 dTdu = dnormalize(dPdu, ddPdu);
+ const float cos_err = P_err / len(dPdu);
+
+ const float f = dot(R, T);
+ const float f_err = len_R * P_err + R_err + cos_err * (1.0f + len_R);
+ const float dfdu = dot(dRdu, T) + dot(R, dTdu);
+ const float dfdt = dot(dRdt, T);
+
+ const float K = dot(R, R) - sqr(f);
+ const float dKdu = (dot(R, dRdu) - f * dfdu);
+ const float dKdt = (dot(R, dRdt) - f * dfdt);
+ const float rsqrt_K = inversesqrtf(K);
+
+ const float g = sqrtf(K) - radius;
+ const float g_err = R_err + f_err + 16.0f * FLT_EPSILON * radius_max;
+ const float dgdu = dKdu * rsqrt_K - dradiusdu;
+ const float dgdt = dKdt * rsqrt_K;
+
+ const float invdet = 1.0f / (dfdu * dgdt - dgdu * dfdt);
+ u -= (dgdt * f - dfdt * g) * invdet;
+ t -= (-dgdu * f + dfdu * g) * invdet;
+
+ if (fabsf(f) < f_err && fabsf(g) < g_err) {
+ t += dt;
+ if (!(0.0f <= t && t <= isect->t)) {
+ return false; /* Rejects NaNs */
+ }
+ if (!(u >= 0.0f && u <= 1.0f)) {
+ return false; /* Rejects NaNs */
+ }
- if (xextrem[0] >= i_st && xextrem[0] <= i_en) {
- bminx = min(bminx, xextrem[1]);
- bmaxx = max(bmaxx, xextrem[1]);
- }
- if (xextrem[2] >= i_st && xextrem[2] <= i_en) {
- bminx = min(bminx, xextrem[3]);
- bmaxx = max(bmaxx, xextrem[3]);
- }
- if (yextrem[0] >= i_st && yextrem[0] <= i_en) {
- bminy = min(bminy, yextrem[1]);
- bmaxy = max(bmaxy, yextrem[1]);
- }
- if (yextrem[2] >= i_st && yextrem[2] <= i_en) {
- bminy = min(bminy, yextrem[3]);
- bmaxy = max(bmaxy, yextrem[3]);
- }
- if (zextrem[0] >= i_st && zextrem[0] <= i_en) {
- bminz = min(bminz, zextrem[1]);
- bmaxz = max(bmaxz, zextrem[1]);
- }
- if (zextrem[2] >= i_st && zextrem[2] <= i_en) {
- bminz = min(bminz, zextrem[3]);
- bmaxz = max(bmaxz, zextrem[3]);
+ /* Backface culling. */
+ const float3 R = normalize(Q - P);
+ const float3 U = dradiusdu * R + dPdu;
+ const float3 V = cross(dPdu, R);
+ const float3 Ng = cross(V, U);
+ if (!use_backfacing && dot(ray_dir, Ng) > 0.0f) {
+ return false;
+ }
+
+ /* Record intersection. */
+ isect->t = t;
+ isect->u = u;
+ isect->v = 0.0f;
+
+ return true;
}
+ }
+ return false;
+}
- float r1 = r_st + (r_en - r_st) * i_st;
- float r2 = r_st + (r_en - r_st) * i_en;
- r_curr = max(r1, r2);
+ccl_device bool curve_intersect_recursive(const float3 ray_orig,
+ const float3 ray_dir,
+ float4 curve[4],
+ Intersection *isect)
+{
+ /* Move ray closer to make intersection stable. */
+ const float3 center = float4_to_float3(0.25f * (curve[0] + curve[1] + curve[2] + curve[3]));
+ const float dt = dot(center - ray_orig, ray_dir) / dot(ray_dir, ray_dir);
+ const float3 ref = ray_orig + ray_dir * dt;
+ const float4 ref4 = make_float4(ref.x, ref.y, ref.z, 0.0f);
+ curve[0] -= ref4;
+ curve[1] -= ref4;
+ curve[2] -= ref4;
+ curve[3] -= ref4;
+
+ const bool use_backfacing = false;
+ const float step_size = 1.0f / (float)(CURVE_NUM_BEZIER_STEPS);
+
+ int depth = 0;
+
+ /* todo: optimize stack for GPU somehow? Possibly some bitflags are enough, and
+ * u0/u1 can be derived from the depth. */
+ struct {
+ float u0, u1;
+ int i;
+ } stack[CURVE_NUM_BEZIER_SUBDIVISIONS_UNSTABLE];
+
+ bool found = false;
+
+ float u0 = 0.0f;
+ float u1 = 1.0f;
+ int i = 0;
+
+ while (1) {
+ for (; i < CURVE_NUM_BEZIER_STEPS; i++) {
+ const float step = i * step_size;
+
+ /* Subdivide curve. */
+ const float dscale = (u1 - u0) * (1.0f / 3.0f) * step_size;
+ const float vu0 = mix(u0, u1, step);
+ const float vu1 = mix(u0, u1, step + step_size);
+
+ const float4 P0 = catmull_rom_basis_eval(curve, vu0);
+ const float4 dP0du = dscale * catmull_rom_basis_derivative(curve, vu0);
+ const float4 P3 = catmull_rom_basis_eval(curve, vu1);
+ const float4 dP3du = dscale * catmull_rom_basis_derivative(curve, vu1);
+
+ const float4 P1 = P0 + dP0du;
+ const float4 P2 = P3 - dP3du;
+
+ /* Calculate bounding cylinders. */
+ const float rr1 = sqr_point_to_line_distance(float4_to_float3(dP0du),
+ float4_to_float3(P3 - P0));
+ const float rr2 = sqr_point_to_line_distance(float4_to_float3(dP3du),
+ float4_to_float3(P3 - P0));
+ const float maxr12 = sqrtf(max(rr1, rr2));
+ const float one_plus_ulp = 1.0f + 2.0f * FLT_EPSILON;
+ const float one_minus_ulp = 1.0f - 2.0f * FLT_EPSILON;
+ float r_outer = max(max(P0.w, P1.w), max(P2.w, P3.w)) + maxr12;
+ float r_inner = min(min(P0.w, P1.w), min(P2.w, P3.w)) - maxr12;
+ r_outer = one_plus_ulp * r_outer;
+ r_inner = max(0.0f, one_minus_ulp * r_inner);
+ bool valid = true;
+
+ /* Intersect with outer cylinder. */
+ float2 tc_outer;
+ float u_outer0, u_outer1;
+ float3 Ng_outer0, Ng_outer1;
+ valid = cylinder_intersect(float4_to_float3(P0),
+ float4_to_float3(P3),
+ r_outer,
+ ray_dir,
+ &tc_outer,
+ &u_outer0,
+ &Ng_outer0,
+ &u_outer1,
+ &Ng_outer1);
+ if (!valid) {
+ continue;
+ }
- mw_extension = min(difl * fabsf(bmaxz), extmax);
- float r_ext = mw_extension + r_curr;
- float coverage = 1.0f;
+ /* Intersect with cap-planes. */
+ float2 tp = make_float2(-dt, isect->t - dt);
+ tp = make_float2(max(tp.x, tc_outer.x), min(tp.y, tc_outer.y));
+ const float2 h0 = half_plane_intersect(
+ float4_to_float3(P0), float4_to_float3(dP0du), ray_dir);
+ tp = make_float2(max(tp.x, h0.x), min(tp.y, h0.y));
+ const float2 h1 = half_plane_intersect(
+ float4_to_float3(P3), -float4_to_float3(dP3du), ray_dir);
+ tp = make_float2(max(tp.x, h1.x), min(tp.y, h1.y));
+ valid = tp.x <= tp.y;
+ if (!valid) {
+ continue;
+ }
- if (bminz - r_curr > isect->t || bmaxz + r_curr < epsilon || bminx > r_ext || bmaxx < -r_ext ||
- bminy > r_ext || bmaxy < -r_ext) {
- /* the bounding box does not overlap the square centered at O */
- tree += level;
- level = tree & -tree;
- }
- else if (level == 1) {
-
- /* the maximum recursion depth is reached.
- * check if dP0.(Q-P0)>=0 and dPn.(Pn-Q)>=0.
- * dP* is reversed if necessary.*/
- float t = isect->t;
- float u = 0.0f;
- float gd = 0.0f;
-
- if (flags & CURVE_KN_RIBBONS) {
- float3 tg = (p_en - p_st);
-# ifdef __KERNEL_SSE__
- const float3 tg_sq = tg * tg;
- float w = tg_sq.x + tg_sq.y;
-# else
- float w = tg.x * tg.x + tg.y * tg.y;
-# endif
- if (w == 0) {
- tree++;
- level = tree & -tree;
- continue;
- }
-# ifdef __KERNEL_SSE__
- const float3 p_sttg = p_st * tg;
- w = -(p_sttg.x + p_sttg.y) / w;
+ /* Clamp and correct u parameter. */
+ u_outer0 = clamp(u_outer0, 0.0f, 1.0f);
+ u_outer1 = clamp(u_outer1, 0.0f, 1.0f);
+ u_outer0 = mix(u0, u1, (step + u_outer0) * (1.0f / (float)(CURVE_NUM_BEZIER_STEPS + 1)));
+ u_outer1 = mix(u0, u1, (step + u_outer1) * (1.0f / (float)(CURVE_NUM_BEZIER_STEPS + 1)));
+
+ /* Intersect with inner cylinder. */
+ float2 tc_inner;
+ float u_inner0, u_inner1;
+ float3 Ng_inner0, Ng_inner1;
+ const bool valid_inner = cylinder_intersect(float4_to_float3(P0),
+ float4_to_float3(P3),
+ r_inner,
+ ray_dir,
+ &tc_inner,
+ &u_inner0,
+ &Ng_inner0,
+ &u_inner1,
+ &Ng_inner1);
+
+ /* At the unstable area we subdivide deeper. */
+# if 0
+ const bool unstable0 = (!valid_inner) |
+ (fabsf(dot(normalize(ray_dir), normalize(Ng_inner0))) < 0.3f);
+ const bool unstable1 = (!valid_inner) |
+ (fabsf(dot(normalize(ray_dir), normalize(Ng_inner1))) < 0.3f);
# else
- w = -(p_st.x * tg.x + p_st.y * tg.y) / w;
+ /* On the GPU appears to be a little faster if always enabled. */
+ (void)valid_inner;
+
+ const bool unstable0 = true;
+ const bool unstable1 = true;
# endif
- w = saturate(w);
-
- /* compute u on the curve segment */
- u = i_st * (1 - w) + i_en * w;
- r_curr = r_st + (r_en - r_st) * u;
- /* compare x-y distances */
- float3 p_curr = ((curve_coef[3] * u + curve_coef[2]) * u + curve_coef[1]) * u +
- curve_coef[0];
-
- float3 dp_st = (3 * curve_coef[3] * i_st + 2 * curve_coef[2]) * i_st + curve_coef[1];
- if (dot(tg, dp_st) < 0)
- dp_st *= -1;
- if (dot(dp_st, -p_st) + p_curr.z * dp_st.z < 0) {
- tree++;
- level = tree & -tree;
- continue;
+
+ /* Subtract the inner interval from the current hit interval. */
+ float2 tp0 = make_float2(tp.x, min(tp.y, tc_inner.x));
+ float2 tp1 = make_float2(max(tp.x, tc_inner.y), tp.y);
+ bool valid0 = valid && (tp0.x <= tp0.y);
+ bool valid1 = valid && (tp1.x <= tp1.y);
+ if (!(valid0 || valid1)) {
+ continue;
+ }
+
+ /* Process one or two hits. */
+ bool recurse = false;
+ if (valid0) {
+ const int termDepth = unstable0 ? CURVE_NUM_BEZIER_SUBDIVISIONS_UNSTABLE :
+ CURVE_NUM_BEZIER_SUBDIVISIONS;
+ if (depth >= termDepth) {
+ found |= curve_intersect_iterative(
+ ray_dir, dt, curve, u_outer0, tp0.x, use_backfacing, isect);
}
- float3 dp_en = (3 * curve_coef[3] * i_en + 2 * curve_coef[2]) * i_en + curve_coef[1];
- if (dot(tg, dp_en) < 0)
- dp_en *= -1;
- if (dot(dp_en, p_en) - p_curr.z * dp_en.z < 0) {
- tree++;
- level = tree & -tree;
- continue;
+ else {
+ recurse = true;
}
+ }
- /* compute coverage */
- float r_ext = r_curr;
- coverage = 1.0f;
- if (difl != 0.0f) {
- mw_extension = min(difl * fabsf(bmaxz), extmax);
- r_ext = mw_extension + r_curr;
-# ifdef __KERNEL_SSE__
- const float3 p_curr_sq = p_curr * p_curr;
- const float3 dxxx(_mm_sqrt_ss(_mm_hadd_ps(p_curr_sq.m128, p_curr_sq.m128)));
- float d = dxxx.x;
-# else
- float d = sqrtf(p_curr.x * p_curr.x + p_curr.y * p_curr.y);
-# endif
- float d0 = d - r_curr;
- float d1 = d + r_curr;
- float inv_mw_extension = 1.0f / mw_extension;
- if (d0 >= 0)
- coverage = (min(d1 * inv_mw_extension, 1.0f) - min(d0 * inv_mw_extension, 1.0f)) *
- 0.5f;
- else // inside
- coverage = (min(d1 * inv_mw_extension, 1.0f) + min(-d0 * inv_mw_extension, 1.0f)) *
- 0.5f;
+ if (valid1 && (tp1.x + dt <= isect->t)) {
+ const int termDepth = unstable1 ? CURVE_NUM_BEZIER_SUBDIVISIONS_UNSTABLE :
+ CURVE_NUM_BEZIER_SUBDIVISIONS;
+ if (depth >= termDepth) {
+ found |= curve_intersect_iterative(
+ ray_dir, dt, curve, u_outer1, tp1.y, use_backfacing, isect);
}
-
- if (p_curr.x * p_curr.x + p_curr.y * p_curr.y >= r_ext * r_ext || p_curr.z <= epsilon ||
- isect->t < p_curr.z) {
- tree++;
- level = tree & -tree;
- continue;
+ else {
+ recurse = true;
}
+ }
- t = p_curr.z;
+ if (recurse) {
+ stack[depth].u0 = u0;
+ stack[depth].u1 = u1;
+ stack[depth].i = i + 1;
+ depth++;
- /* stochastic fade from minimum width */
- if (difl != 0.0f && lcg_state) {
- if (coverage != 1.0f && (lcg_step_float(lcg_state) > coverage))
- return hit;
- }
+ u0 = vu0;
+ u1 = vu1;
+ i = -1;
}
- else {
- float l = len(p_en - p_st);
- /* minimum width extension */
- float or1 = r1;
- float or2 = r2;
-
- if (difl != 0.0f) {
- mw_extension = min(len(p_st - P) * difl, extmax);
- or1 = r1 < mw_extension ? mw_extension : r1;
- mw_extension = min(len(p_en - P) * difl, extmax);
- or2 = r2 < mw_extension ? mw_extension : r2;
- }
- /* --- */
- float invl = 1.0f / l;
- float3 tg = (p_en - p_st) * invl;
- gd = (or2 - or1) * invl;
- float difz = -dot(p_st, tg);
- float cyla = 1.0f - (tg.z * tg.z * (1 + gd * gd));
- float invcyla = 1.0f / cyla;
- float halfb = (-p_st.z - tg.z * (difz + gd * (difz * gd + or1)));
- float tcentre = -halfb * invcyla;
- float zcentre = difz + (tg.z * tcentre);
- float3 tdif = -p_st;
- tdif.z += tcentre;
- float tdifz = dot(tdif, tg);
- float tb = 2 * (tdif.z - tg.z * (tdifz + gd * (tdifz * gd + or1)));
- float tc = dot(tdif, tdif) - tdifz * tdifz * (1 + gd * gd) - or1 * or1 -
- 2 * or1 * tdifz * gd;
- float td = tb * tb - 4 * cyla * tc;
- if (td < 0.0f) {
- tree++;
- level = tree & -tree;
- continue;
- }
+ }
- float rootd = sqrtf(td);
- float correction = (-tb - rootd) * 0.5f * invcyla;
- t = tcentre + correction;
-
- float3 dp_st = (3 * curve_coef[3] * i_st + 2 * curve_coef[2]) * i_st + curve_coef[1];
- if (dot(tg, dp_st) < 0)
- dp_st *= -1;
- float3 dp_en = (3 * curve_coef[3] * i_en + 2 * curve_coef[2]) * i_en + curve_coef[1];
- if (dot(tg, dp_en) < 0)
- dp_en *= -1;
-
- if (flags & CURVE_KN_BACKFACING &&
- (dot(dp_st, -p_st) + t * dp_st.z < 0 || dot(dp_en, p_en) - t * dp_en.z < 0 ||
- isect->t < t || t <= 0.0f)) {
- correction = (-tb + rootd) * 0.5f * invcyla;
- t = tcentre + correction;
- }
+ if (depth > 0) {
+ depth--;
+ u0 = stack[depth].u0;
+ u1 = stack[depth].u1;
+ i = stack[depth].i;
+ }
+ else {
+ break;
+ }
+ }
- if (dot(dp_st, -p_st) + t * dp_st.z < 0 || dot(dp_en, p_en) - t * dp_en.z < 0 ||
- isect->t < t || t <= 0.0f) {
- tree++;
- level = tree & -tree;
- continue;
- }
+ return found;
+}
- float w = (zcentre + (tg.z * correction)) * invl;
- w = saturate(w);
- /* compute u on the curve segment */
- u = i_st * (1 - w) + i_en * w;
+/* Ribbons */
- /* stochastic fade from minimum width */
- if (difl != 0.0f && lcg_state) {
- r_curr = r1 + (r2 - r1) * w;
- r_ext = or1 + (or2 - or1) * w;
- coverage = r_curr / r_ext;
+ccl_device_inline bool cylinder_culling_test(const float2 p1, const float2 p2, const float r)
+{
+ /* Performs culling against a cylinder. */
+ const float2 dp = p2 - p1;
+ const float num = dp.x * p1.y - dp.y * p1.x;
+ const float den2 = dot(p2 - p1, p2 - p1);
+ return num * num <= r * r * den2;
+}
- if (coverage != 1.0f && (lcg_step_float(lcg_state) > coverage))
- return hit;
- }
- }
- /* we found a new intersection */
+/*! Intersects a ray with a quad with backface culling
+ * enabled. The quad v0,v1,v2,v3 is split into two triangles
+ * v0,v1,v3 and v2,v3,v1. The edge v1,v2 decides which of the two
+ * triangles gets intersected. */
+ccl_device_inline bool ribbon_intersect_quad(const float ray_tfar,
+ const float3 quad_v0,
+ const float3 quad_v1,
+ const float3 quad_v2,
+ const float3 quad_v3,
+ float *u_o,
+ float *v_o,
+ float *t_o)
+{
+ /* Calculate vertices relative to ray origin? */
+ const float3 O = make_float3(0.0f, 0.0f, 0.0f);
+ const float3 D = make_float3(0.0f, 0.0f, 1.0f);
+ const float3 va = quad_v0 - O;
+ const float3 vb = quad_v1 - O;
+ const float3 vc = quad_v2 - O;
+ const float3 vd = quad_v3 - O;
+
+ const float3 edb = vb - vd;
+ const float WW = dot(cross(vd, edb), D);
+ const float3 v0 = (WW <= 0.0f) ? va : vc;
+ const float3 v1 = (WW <= 0.0f) ? vb : vd;
+ const float3 v2 = (WW <= 0.0f) ? vd : vb;
+
+ /* Calculate edges? */
+ const float3 e0 = v2 - v0;
+ const float3 e1 = v0 - v1;
+
+ /* perform edge tests */
+ const float U = dot(cross(v0, e0), D);
+ const float V = dot(cross(v1, e1), D);
+ if (!(max(U, V) <= 0.0f)) {
+ return false;
+ }
-# ifdef __VISIBILITY_FLAG__
- /* visibility flag test. we do it here under the assumption
- * that most triangles are culled by node flags */
- if (kernel_tex_fetch(__prim_visibility, curveAddr) & visibility)
-# endif
- {
- /* record intersection */
- isect->t = t;
- isect->u = u;
- isect->v = gd;
- isect->prim = curveAddr;
- isect->object = object;
- isect->type = type;
- hit = true;
- }
+ /* Calculate geometry normal and denominator? */
+ const float3 Ng = cross(e1, e0);
+ const float den = dot(Ng, D);
+ const float rcpDen = 1.0f / den;
+
+ /* Perform depth test? */
+ const float t = rcpDen * dot(v0, Ng);
+ if (!(0.0f <= t && t <= ray_tfar)) {
+ return false;
+ }
+
+ /* Avoid division by 0? */
+ if (!(den != 0.0f)) {
+ return false;
+ }
+
+ /* Update hit information? */
+ *t_o = t;
+ *u_o = U * rcpDen;
+ *v_o = V * rcpDen;
+ *u_o = (WW <= 0.0f) ? *u_o : 1.0f - *u_o;
+ *v_o = (WW <= 0.0f) ? *v_o : 1.0f - *v_o;
+ return true;
+}
- tree++;
- level = tree & -tree;
+ccl_device_inline void ribbon_ray_space(const float3 ray_dir, float3 ray_space[3])
+{
+ const float3 dx0 = make_float3(0, ray_dir.z, -ray_dir.y);
+ const float3 dx1 = make_float3(-ray_dir.z, 0, ray_dir.x);
+ ray_space[0] = normalize(dot(dx0, dx0) > dot(dx1, dx1) ? dx0 : dx1);
+ ray_space[1] = normalize(cross(ray_dir, ray_space[0]));
+ ray_space[2] = ray_dir;
+}
+
+ccl_device_inline float4 ribbon_to_ray_space(const float3 ray_space[3],
+ const float3 ray_org,
+ const float4 P4)
+{
+ float3 P = float4_to_float3(P4) - ray_org;
+ return make_float4(dot(ray_space[0], P), dot(ray_space[1], P), dot(ray_space[2], P), P4.w);
+}
+
+ccl_device_inline bool ribbon_intersect(const float3 ray_org,
+ const float3 ray_dir,
+ const float ray_tfar,
+ const int N,
+ float4 curve[4],
+ Intersection *isect)
+{
+ /* Transform control points into ray space. */
+ float3 ray_space[3];
+ ribbon_ray_space(ray_dir, ray_space);
+
+ curve[0] = ribbon_to_ray_space(ray_space, ray_org, curve[0]);
+ curve[1] = ribbon_to_ray_space(ray_space, ray_org, curve[1]);
+ curve[2] = ribbon_to_ray_space(ray_space, ray_org, curve[2]);
+ curve[3] = ribbon_to_ray_space(ray_space, ray_org, curve[3]);
+
+ const float4 mx = max(max(fabs(curve[0]), fabs(curve[1])), max(fabs(curve[2]), fabs(curve[3])));
+ const float eps = 4.0f * FLT_EPSILON * max(max(mx.x, mx.y), max(mx.z, mx.w));
+ const float step_size = 1.0f / (float)N;
+
+ /* Evaluate first point and radius scaled normal direction. */
+ float4 p0 = catmull_rom_basis_eval(curve, 0.0f);
+ float3 dp0dt = float4_to_float3(catmull_rom_basis_derivative(curve, 0.0f));
+ if (max3(fabs(dp0dt)) < eps) {
+ const float4 p1 = catmull_rom_basis_eval(curve, step_size);
+ dp0dt = float4_to_float3(p1 - p0);
+ }
+ float3 wn0 = normalize(make_float3(dp0dt.y, -dp0dt.x, 0.0f)) * p0.w;
+
+ /* Evaluate the bezier curve. */
+ for (int i = 0; i < N; i++) {
+ const float u = i * step_size;
+ const float4 p1 = catmull_rom_basis_eval(curve, u + step_size);
+ bool valid = cylinder_culling_test(
+ make_float2(p0.x, p0.y), make_float2(p1.x, p1.y), max(p0.w, p1.w));
+ if (!valid) {
+ continue;
}
- else {
- /* split the curve into two curves and process */
- level = level >> 1;
+
+ /* Evaluate next point. */
+ float3 dp1dt = float4_to_float3(catmull_rom_basis_derivative(curve, u + step_size));
+ dp1dt = (max3(fabs(dp1dt)) < eps) ? float4_to_float3(p1 - p0) : dp1dt;
+ const float3 wn1 = normalize(make_float3(dp1dt.y, -dp1dt.x, 0.0f)) * p1.w;
+
+ /* Construct quad coordinates. */
+ const float3 lp0 = float4_to_float3(p0) + wn0;
+ const float3 lp1 = float4_to_float3(p1) + wn1;
+ const float3 up0 = float4_to_float3(p0) - wn0;
+ const float3 up1 = float4_to_float3(p1) - wn1;
+
+ /* Intersect quad. */
+ float vu, vv, vt;
+ bool valid0 = ribbon_intersect_quad(isect->t, lp0, lp1, up1, up0, &vu, &vv, &vt);
+
+ if (valid0) {
+ /* ignore self intersections */
+ const float avoidance_factor = 2.0f;
+ if (avoidance_factor != 0.0f) {
+ float r = mix(p0.w, p1.w, vu);
+ valid0 = vt > avoidance_factor * r;
+ }
+
+ if (valid0) {
+ vv = 2.0f * vv - 1.0f;
+
+ /* Record intersection. */
+ isect->t = vt;
+ isect->u = u + vu * step_size;
+ isect->v = vv;
+ return true;
+ }
}
- }
- return hit;
+ p0 = p1;
+ wn0 = wn1;
+ }
+ return false;
}
ccl_device_forceinline bool curve_intersect(KernelGlobals *kg,
Intersection *isect,
- float3 P,
- float3 direction,
+ const float3 P,
+ const float3 dir,
uint visibility,
int object,
int curveAddr,
float time,
- int type,
- uint *lcg_state,
- float difl,
- float extmax)
+ int type)
{
- /* define few macros to minimize code duplication for SSE */
-# ifndef __KERNEL_SSE2__
-# define len3_squared(x) len_squared(x)
-# define len3(x) len(x)
-# define dot3(x, y) dot(x, y)
-# endif
-
- const bool is_curve_primitive = (type & PRIMITIVE_CURVE);
+ const bool is_motion = (type & PRIMITIVE_ALL_MOTION);
- if (!is_curve_primitive && kernel_data.bvh.use_bvh_steps) {
+# ifndef __KERNEL_OPTIX__ /* See OptiX motion flag OPTIX_MOTION_FLAG_[START|END]_VANISH */
+ if (is_motion && kernel_data.bvh.use_bvh_steps) {
const float2 prim_time = kernel_tex_fetch(__prim_time, curveAddr);
if (time < prim_time.x || time > prim_time.y) {
return false;
}
}
+# endif
int segment = PRIMITIVE_UNPACK_SEGMENT(type);
- /* curve Intersection check */
- int flags = kernel_data.curve.curveflags;
-
int prim = kernel_tex_fetch(__prim_index, curveAddr);
+
float4 v00 = kernel_tex_fetch(__curves, prim);
- int cnum = __float_as_int(v00.x);
- int k0 = cnum + segment;
+ int k0 = __float_as_int(v00.x) + segment;
int k1 = k0 + 1;
-# ifndef __KERNEL_SSE2__
- float4 P_curve[2];
-
- if (is_curve_primitive) {
- P_curve[0] = kernel_tex_fetch(__curve_keys, k0);
- P_curve[1] = kernel_tex_fetch(__curve_keys, k1);
- }
- else {
- int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, curveAddr) : object;
- motion_curve_keys(kg, fobject, prim, time, k0, k1, P_curve);
- }
-
- float or1 = P_curve[0].w;
- float or2 = P_curve[1].w;
- float3 p1 = float4_to_float3(P_curve[0]);
- float3 p2 = float4_to_float3(P_curve[1]);
-
- /* minimum width extension */
- float r1 = or1;
- float r2 = or2;
- float3 dif = P - p1;
- float3 dif_second = P - p2;
- if (difl != 0.0f) {
- float pixelsize = min(len3(dif) * difl, extmax);
- r1 = or1 < pixelsize ? pixelsize : or1;
- pixelsize = min(len3(dif_second) * difl, extmax);
- r2 = or2 < pixelsize ? pixelsize : or2;
- }
- /* --- */
-
- float3 p21_diff = p2 - p1;
- float3 sphere_dif1 = (dif + dif_second) * 0.5f;
- float3 dir = direction;
- float sphere_b_tmp = dot3(dir, sphere_dif1);
- float3 sphere_dif2 = sphere_dif1 - sphere_b_tmp * dir;
-# else
- ssef P_curve[2];
+ int ka = max(k0 - 1, __float_as_int(v00.x));
+ int kb = min(k1 + 1, __float_as_int(v00.x) + __float_as_int(v00.y) - 1);
- if (is_curve_primitive) {
- P_curve[0] = load4f(&kg->__curve_keys.data[k0].x);
- P_curve[1] = load4f(&kg->__curve_keys.data[k1].x);
+ float4 curve[4];
+ if (!is_motion) {
+ curve[0] = kernel_tex_fetch(__curve_keys, ka);
+ curve[1] = kernel_tex_fetch(__curve_keys, k0);
+ curve[2] = kernel_tex_fetch(__curve_keys, k1);
+ curve[3] = kernel_tex_fetch(__curve_keys, kb);
}
else {
int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, curveAddr) : object;
- motion_curve_keys(kg, fobject, prim, time, k0, k1, (float4 *)&P_curve);
+ motion_curve_keys(kg, fobject, prim, time, ka, k0, k1, kb, curve);
}
- const ssef or12 = shuffle<3, 3, 3, 3>(P_curve[0], P_curve[1]);
-
- ssef r12 = or12;
- const ssef vP = load4f(P);
- const ssef dif = vP - P_curve[0];
- const ssef dif_second = vP - P_curve[1];
- if (difl != 0.0f) {
- const ssef len1_sq = len3_squared_splat(dif);
- const ssef len2_sq = len3_squared_splat(dif_second);
- const ssef len12 = mm_sqrt(shuffle<0, 0, 0, 0>(len1_sq, len2_sq));
- const ssef pixelsize12 = min(len12 * difl, ssef(extmax));
- r12 = max(or12, pixelsize12);
- }
- float or1 = extract<0>(or12), or2 = extract<0>(shuffle<2>(or12));
- float r1 = extract<0>(r12), r2 = extract<0>(shuffle<2>(r12));
-
- const ssef p21_diff = P_curve[1] - P_curve[0];
- const ssef sphere_dif1 = (dif + dif_second) * 0.5f;
- const ssef dir = load4f(direction);
- const ssef sphere_b_tmp = dot3_splat(dir, sphere_dif1);
- const ssef sphere_dif2 = nmadd(sphere_b_tmp, dir, sphere_dif1);
-# endif
-
- float mr = max(r1, r2);
- float l = len3(p21_diff);
- float invl = 1.0f / l;
- float sp_r = mr + 0.5f * l;
-
- float sphere_b = dot3(dir, sphere_dif2);
- float sdisc = sphere_b * sphere_b - len3_squared(sphere_dif2) + sp_r * sp_r;
-
- if (sdisc < 0.0f)
- return false;
-
- /* obtain parameters and test midpoint distance for suitable modes */
-# ifndef __KERNEL_SSE2__
- float3 tg = p21_diff * invl;
-# else
- const ssef tg = p21_diff * invl;
-# endif
- float gd = (r2 - r1) * invl;
-
- float dirz = dot3(dir, tg);
- float difz = dot3(dif, tg);
-
- float a = 1.0f - (dirz * dirz * (1 + gd * gd));
-
- float halfb = dot3(dir, dif) - dirz * (difz + gd * (difz * gd + r1));
-
- float tcentre = -halfb / a;
- float zcentre = difz + (dirz * tcentre);
-
- if ((tcentre > isect->t) && !(flags & CURVE_KN_ACCURATE))
- return false;
- if ((zcentre < 0 || zcentre > l) && !(flags & CURVE_KN_ACCURATE) &&
- !(flags & CURVE_KN_INTERSECTCORRECTION))
+# ifdef __VISIBILITY_FLAG__
+ if (!(kernel_tex_fetch(__prim_visibility, curveAddr) & visibility)) {
return false;
-
- /* test minimum separation */
-# ifndef __KERNEL_SSE2__
- float3 cprod = cross(tg, dir);
- float cprod2sq = len3_squared(cross(tg, dif));
-# else
- const ssef cprod = cross(tg, dir);
- float cprod2sq = len3_squared(cross_zxy(tg, dif));
+ }
# endif
- float cprodsq = len3_squared(cprod);
- float distscaled = dot3(cprod, dif);
- if (cprodsq == 0)
- distscaled = cprod2sq;
- else
- distscaled = (distscaled * distscaled) / cprodsq;
-
- if (distscaled > mr * mr)
- return false;
-
- /* calculate true intersection */
-# ifndef __KERNEL_SSE2__
- float3 tdif = dif + tcentre * dir;
-# else
- const ssef tdif = madd(ssef(tcentre), dir, dif);
-# endif
- float tdifz = dot3(tdif, tg);
- float tdifma = tdifz * gd + r1;
- float tb = 2 * (dot3(dir, tdif) - dirz * (tdifz + gd * tdifma));
- float tc = dot3(tdif, tdif) - tdifz * tdifz - tdifma * tdifma;
- float td = tb * tb - 4 * a * tc;
+ if (type & (PRIMITIVE_CURVE_RIBBON | PRIMITIVE_MOTION_CURVE_RIBBON)) {
+ /* todo: adaptive number of subdivisions could help performance here. */
+ const int subdivisions = kernel_data.bvh.curve_subdivisions;
+ if (ribbon_intersect(P, dir, isect->t, subdivisions, curve, isect)) {
+ isect->prim = curveAddr;
+ isect->object = object;
+ isect->type = type;
+ return true;
+ }
- if (td < 0.0f)
return false;
-
- float rootd = 0.0f;
- float correction = 0.0f;
- if (flags & CURVE_KN_ACCURATE) {
- rootd = sqrtf(td);
- correction = ((-tb - rootd) / (2 * a));
}
-
- float t = tcentre + correction;
-
- if (t < isect->t) {
-
- if (flags & CURVE_KN_INTERSECTCORRECTION) {
- rootd = sqrtf(td);
- correction = ((-tb - rootd) / (2 * a));
- t = tcentre + correction;
- }
-
- float z = zcentre + (dirz * correction);
- // bool backface = false;
-
- if (flags & CURVE_KN_BACKFACING && (t < 0.0f || z < 0 || z > l)) {
- // backface = true;
- correction = ((-tb + rootd) / (2 * a));
- t = tcentre + correction;
- z = zcentre + (dirz * correction);
- }
-
- /* stochastic fade from minimum width */
- float adjradius = or1 + z * (or2 - or1) * invl;
- adjradius = adjradius / (r1 + z * gd);
- if (lcg_state && adjradius != 1.0f) {
- if (lcg_step_float(lcg_state) > adjradius)
- return false;
+ else {
+ if (curve_intersect_recursive(P, dir, curve, isect)) {
+ isect->prim = curveAddr;
+ isect->object = object;
+ isect->type = type;
+ return true;
}
- /* --- */
-
- if (t > 0.0f && t < isect->t && z >= 0 && z <= l) {
-
- if (flags & CURVE_KN_ENCLOSEFILTER) {
- float enc_ratio = 1.01f;
- if ((difz > -r1 * enc_ratio) && (dot3(dif_second, tg) < r2 * enc_ratio)) {
- float a2 = 1.0f - (dirz * dirz * (1 + gd * gd * enc_ratio * enc_ratio));
- float c2 = dot3(dif, dif) - difz * difz * (1 + gd * gd * enc_ratio * enc_ratio) -
- r1 * r1 * enc_ratio * enc_ratio - 2 * r1 * difz * gd * enc_ratio;
- if (a2 * c2 < 0.0f)
- return false;
- }
- }
-# ifdef __VISIBILITY_FLAG__
- /* visibility flag test. we do it here under the assumption
- * that most triangles are culled by node flags */
- if (kernel_tex_fetch(__prim_visibility, curveAddr) & visibility)
-# endif
- {
- /* record intersection */
- isect->t = t;
- isect->u = z * invl;
- isect->v = gd;
- isect->prim = curveAddr;
- isect->object = object;
- isect->type = type;
-
- return true;
- }
- }
+ return false;
}
-
- return false;
-
-# ifndef __KERNEL_SSE2__
-# undef len3_squared
-# undef len3
-# undef dot3
-# endif
}
-ccl_device_inline float3 curve_refine(KernelGlobals *kg,
- ShaderData *sd,
- const Intersection *isect,
- const Ray *ray)
+ccl_device_inline void curve_shader_setup(KernelGlobals *kg,
+ ShaderData *sd,
+ const Intersection *isect,
+ const Ray *ray)
{
- int flag = kernel_data.curve.curveflags;
float t = isect->t;
float3 P = ray->P;
float3 D = ray->D;
@@ -832,118 +718,63 @@ ccl_device_inline float3 curve_refine(KernelGlobals *kg,
int k0 = __float_as_int(v00.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
int k1 = k0 + 1;
- float3 tg;
+ int ka = max(k0 - 1, __float_as_int(v00.x));
+ int kb = min(k1 + 1, __float_as_int(v00.x) + __float_as_int(v00.y) - 1);
- if (flag & CURVE_KN_INTERPOLATE) {
- int ka = max(k0 - 1, __float_as_int(v00.x));
- int kb = min(k1 + 1, __float_as_int(v00.x) + __float_as_int(v00.y) - 1);
+ float4 P_curve[4];
- float4 P_curve[4];
+ if (!(sd->type & PRIMITIVE_ALL_MOTION)) {
+ P_curve[0] = kernel_tex_fetch(__curve_keys, ka);
+ P_curve[1] = kernel_tex_fetch(__curve_keys, k0);
+ P_curve[2] = kernel_tex_fetch(__curve_keys, k1);
+ P_curve[3] = kernel_tex_fetch(__curve_keys, kb);
+ }
+ else {
+ motion_curve_keys(kg, sd->object, sd->prim, sd->time, ka, k0, k1, kb, P_curve);
+ }
- if (sd->type & PRIMITIVE_CURVE) {
- P_curve[0] = kernel_tex_fetch(__curve_keys, ka);
- P_curve[1] = kernel_tex_fetch(__curve_keys, k0);
- P_curve[2] = kernel_tex_fetch(__curve_keys, k1);
- P_curve[3] = kernel_tex_fetch(__curve_keys, kb);
- }
- else {
- motion_cardinal_curve_keys(kg, sd->object, sd->prim, sd->time, ka, k0, k1, kb, P_curve);
- }
+ sd->u = isect->u;
- float3 p[4];
- p[0] = float4_to_float3(P_curve[0]);
- p[1] = float4_to_float3(P_curve[1]);
- p[2] = float4_to_float3(P_curve[2]);
- p[3] = float4_to_float3(P_curve[3]);
+ P = P + D * t;
- P = P + D * t;
+ const float4 dPdu4 = catmull_rom_basis_derivative(P_curve, isect->u);
+ const float3 dPdu = float4_to_float3(dPdu4);
-# ifdef __UV__
- sd->u = isect->u;
- sd->v = 0.0f;
-# endif
+ if (sd->type & (PRIMITIVE_CURVE_RIBBON | PRIMITIVE_MOTION_CURVE_RIBBON)) {
+ /* Rounded smooth normals for ribbons, to approximate thick curve shape. */
+ const float3 tangent = normalize(dPdu);
+ const float3 bitangent = normalize(cross(tangent, -D));
+ const float sine = isect->v;
+ const float cosine = safe_sqrtf(1.0f - sine * sine);
- tg = normalize(curvetangent(isect->u, p[0], p[1], p[2], p[3]));
+ sd->N = normalize(sine * bitangent - cosine * normalize(cross(tangent, bitangent)));
+ sd->Ng = -D;
+ sd->v = isect->v;
- if (kernel_data.curve.curveflags & CURVE_KN_RIBBONS) {
- sd->Ng = normalize(-(D - tg * (dot(tg, D))));
- }
- else {
-# ifdef __EMBREE__
- if (kernel_data.bvh.scene) {
- sd->Ng = normalize(isect->Ng);
- }
- else
+# if 0
+ /* This approximates the position and geometric normal of a thick curve too,
+ * but gives too many issues with wrong self intersections. */
+ const float dPdu_radius = dPdu4.w;
+ sd->Ng = sd->N;
+ P += sd->N * dPdu_radius;
# endif
- {
- /* direction from inside to surface of curve */
- float3 p_curr = curvepoint(isect->u, p[0], p[1], p[2], p[3]);
- sd->Ng = normalize(P - p_curr);
-
- /* adjustment for changing radius */
- float gd = isect->v;
-
- if (gd != 0.0f) {
- sd->Ng = sd->Ng - gd * tg;
- sd->Ng = normalize(sd->Ng);
- }
- }
- }
-
- /* todo: sometimes the normal is still so that this is detected as
- * backfacing even if cull backfaces is enabled */
-
- sd->N = sd->Ng;
}
else {
- float4 P_curve[2];
-
- if (sd->type & PRIMITIVE_CURVE) {
- P_curve[0] = kernel_tex_fetch(__curve_keys, k0);
- P_curve[1] = kernel_tex_fetch(__curve_keys, k1);
- }
- else {
- motion_curve_keys(kg, sd->object, sd->prim, sd->time, k0, k1, P_curve);
- }
-
- float l = 1.0f;
- tg = normalize_len(float4_to_float3(P_curve[1] - P_curve[0]), &l);
-
- P = P + D * t;
-
- float3 dif = P - float4_to_float3(P_curve[0]);
-
-# ifdef __UV__
- sd->u = dot(dif, tg) / l;
+ /* Thick curves, compute normal using direction from inside the curve.
+ * This could be optimized by recording the normal in the intersection,
+ * however for Optix this would go beyond the size of the payload. */
+ const float3 P_inside = float4_to_float3(catmull_rom_basis_eval(P_curve, isect->u));
+ const float3 Ng = normalize(P - P_inside);
+
+ sd->N = Ng;
+ sd->Ng = Ng;
sd->v = 0.0f;
-# endif
-
- if (flag & CURVE_KN_TRUETANGENTGNORMAL) {
- sd->Ng = -(D - tg * dot(tg, D));
- sd->Ng = normalize(sd->Ng);
- }
- else {
- float gd = isect->v;
-
- /* direction from inside to surface of curve */
- float denom = fmaxf(P_curve[0].w + sd->u * l * gd, 1e-8f);
- sd->Ng = (dif - tg * sd->u * l) / denom;
-
- /* adjustment for changing radius */
- if (gd != 0.0f) {
- sd->Ng = sd->Ng - gd * tg;
- }
-
- sd->Ng = normalize(sd->Ng);
- }
-
- sd->N = sd->Ng;
}
# ifdef __DPDU__
/* dPdu/dPdv */
- sd->dPdu = tg;
- sd->dPdv = cross(tg, sd->Ng);
+ sd->dPdu = dPdu;
+ sd->dPdv = cross(dPdu, sd->Ng);
# endif
if (isect->object != OBJECT_NONE) {
@@ -956,7 +787,10 @@ ccl_device_inline float3 curve_refine(KernelGlobals *kg,
P = transform_point(&tfm, P);
}
- return P;
+ sd->P = P;
+
+ float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
+ sd->shader = __float_as_int(curvedata.z);
}
#endif
diff --git a/intern/cycles/kernel/geom/geom_motion_curve.h b/intern/cycles/kernel/geom/geom_motion_curve.h
index 7380c506bf4..0f66f4af755 100644
--- a/intern/cycles/kernel/geom/geom_motion_curve.h
+++ b/intern/cycles/kernel/geom/geom_motion_curve.h
@@ -36,7 +36,7 @@ ccl_device_inline int find_attribute_curve_motion(KernelGlobals *kg,
* zero iterations and rendering is really slow with motion curves. For until other
* areas are speed up it's probably not so crucial to optimize this out.
*/
- uint attr_offset = object_attribute_map_offset(kg, object) + ATTR_PRIM_CURVE;
+ uint attr_offset = object_attribute_map_offset(kg, object) + ATTR_PRIM_GEOMETRY;
uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
while (attr_map.x != id) {
@@ -50,14 +50,14 @@ ccl_device_inline int find_attribute_curve_motion(KernelGlobals *kg,
return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z;
}
-ccl_device_inline void motion_curve_keys_for_step(KernelGlobals *kg,
- int offset,
- int numkeys,
- int numsteps,
- int step,
- int k0,
- int k1,
- float4 keys[2])
+ccl_device_inline void motion_curve_keys_for_step_linear(KernelGlobals *kg,
+ int offset,
+ int numkeys,
+ int numsteps,
+ int step,
+ int k0,
+ int k1,
+ float4 keys[2])
{
if (step == numsteps) {
/* center step: regular key location */
@@ -77,7 +77,7 @@ ccl_device_inline void motion_curve_keys_for_step(KernelGlobals *kg,
}
/* return 2 curve key locations */
-ccl_device_inline void motion_curve_keys(
+ccl_device_inline void motion_curve_keys_linear(
KernelGlobals *kg, int object, int prim, float time, int k0, int k1, float4 keys[2])
{
/* get motion info */
@@ -97,24 +97,24 @@ ccl_device_inline void motion_curve_keys(
/* fetch key coordinates */
float4 next_keys[2];
- motion_curve_keys_for_step(kg, offset, numkeys, numsteps, step, k0, k1, keys);
- motion_curve_keys_for_step(kg, offset, numkeys, numsteps, step + 1, k0, k1, next_keys);
+ motion_curve_keys_for_step_linear(kg, offset, numkeys, numsteps, step, k0, k1, keys);
+ motion_curve_keys_for_step_linear(kg, offset, numkeys, numsteps, step + 1, k0, k1, next_keys);
/* interpolate between steps */
keys[0] = (1.0f - t) * keys[0] + t * next_keys[0];
keys[1] = (1.0f - t) * keys[1] + t * next_keys[1];
}
-ccl_device_inline void motion_cardinal_curve_keys_for_step(KernelGlobals *kg,
- int offset,
- int numkeys,
- int numsteps,
- int step,
- int k0,
- int k1,
- int k2,
- int k3,
- float4 keys[4])
+ccl_device_inline void motion_curve_keys_for_step(KernelGlobals *kg,
+ int offset,
+ int numkeys,
+ int numsteps,
+ int step,
+ int k0,
+ int k1,
+ int k2,
+ int k3,
+ float4 keys[4])
{
if (step == numsteps) {
/* center step: regular key location */
@@ -138,15 +138,15 @@ ccl_device_inline void motion_cardinal_curve_keys_for_step(KernelGlobals *kg,
}
/* return 2 curve key locations */
-ccl_device_inline void motion_cardinal_curve_keys(KernelGlobals *kg,
- int object,
- int prim,
- float time,
- int k0,
- int k1,
- int k2,
- int k3,
- float4 keys[4])
+ccl_device_inline void motion_curve_keys(KernelGlobals *kg,
+ int object,
+ int prim,
+ float time,
+ int k0,
+ int k1,
+ int k2,
+ int k3,
+ float4 keys[4])
{
/* get motion info */
int numsteps, numkeys;
@@ -165,9 +165,8 @@ ccl_device_inline void motion_cardinal_curve_keys(KernelGlobals *kg,
/* fetch key coordinates */
float4 next_keys[4];
- motion_cardinal_curve_keys_for_step(kg, offset, numkeys, numsteps, step, k0, k1, k2, k3, keys);
- motion_cardinal_curve_keys_for_step(
- kg, offset, numkeys, numsteps, step + 1, k0, k1, k2, k3, next_keys);
+ motion_curve_keys_for_step(kg, offset, numkeys, numsteps, step, k0, k1, k2, k3, keys);
+ motion_curve_keys_for_step(kg, offset, numkeys, numsteps, step + 1, k0, k1, k2, k3, next_keys);
/* interpolate between steps */
keys[0] = (1.0f - t) * keys[0] + t * next_keys[0];
@@ -176,53 +175,6 @@ ccl_device_inline void motion_cardinal_curve_keys(KernelGlobals *kg,
keys[3] = (1.0f - t) * keys[3] + t * next_keys[3];
}
-# if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
-/* Similar to above, but returns keys as pair of two AVX registers with each
- * holding two float4.
- */
-ccl_device_inline void motion_cardinal_curve_keys_avx(KernelGlobals *kg,
- int object,
- int prim,
- float time,
- int k0,
- int k1,
- int k2,
- int k3,
- avxf *out_keys_0_1,
- avxf *out_keys_2_3)
-{
- /* Get motion info. */
- int numsteps, numkeys;
- object_motion_info(kg, object, &numsteps, NULL, &numkeys);
-
- /* Figure out which steps we need to fetch and their interpolation factor. */
- int maxstep = numsteps * 2;
- int step = min((int)(time * maxstep), maxstep - 1);
- float t = time * maxstep - step;
-
- /* Find attribute. */
- AttributeElement elem;
- int offset = find_attribute_curve_motion(kg, object, ATTR_STD_MOTION_VERTEX_POSITION, &elem);
- kernel_assert(offset != ATTR_STD_NOT_FOUND);
-
- /* Fetch key coordinates. */
- float4 next_keys[4];
- float4 keys[4];
- motion_cardinal_curve_keys_for_step(kg, offset, numkeys, numsteps, step, k0, k1, k2, k3, keys);
- motion_cardinal_curve_keys_for_step(
- kg, offset, numkeys, numsteps, step + 1, k0, k1, k2, k3, next_keys);
-
- const avxf keys_0_1 = avxf(keys[0].m128, keys[1].m128);
- const avxf keys_2_3 = avxf(keys[2].m128, keys[3].m128);
- const avxf next_keys_0_1 = avxf(next_keys[0].m128, next_keys[1].m128);
- const avxf next_keys_2_3 = avxf(next_keys[2].m128, next_keys[3].m128);
-
- /* Interpolate between steps. */
- *out_keys_0_1 = (1.0f - t) * keys_0_1 + t * next_keys_0_1;
- *out_keys_2_3 = (1.0f - t) * keys_2_3 + t * next_keys_2_3;
-}
-# endif
-
#endif
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h b/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h
index 49d4829af38..859d919f0bb 100644
--- a/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h
+++ b/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h
@@ -103,17 +103,21 @@ ccl_device_inline
const Ray *ray,
float3 verts[3])
{
+# ifdef __KERNEL_OPTIX__
+ /* isect->t is always in world space with OptiX. */
+ return motion_triangle_refine(kg, sd, isect, ray, verts);
+# else
float3 P = ray->P;
float3 D = ray->D;
float t = isect->t;
-# ifdef __INTERSECTION_REFINE__
+# ifdef __INTERSECTION_REFINE__
if (isect->object != OBJECT_NONE) {
-# ifdef __OBJECT_MOTION__
+# ifdef __OBJECT_MOTION__
Transform tfm = sd->ob_itfm;
-# else
+# else
Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
-# endif
+# endif
P = transform_point(&tfm, P);
D = transform_direction(&tfm, D);
@@ -135,19 +139,20 @@ ccl_device_inline
P = P + D * rt;
if (isect->object != OBJECT_NONE) {
-# ifdef __OBJECT_MOTION__
+# ifdef __OBJECT_MOTION__
Transform tfm = sd->ob_tfm;
-# else
+# else
Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
-# endif
+# endif
P = transform_point(&tfm, P);
}
return P;
-# else /* __INTERSECTION_REFINE__ */
+# else /* __INTERSECTION_REFINE__ */
return P + D * t;
-# endif /* __INTERSECTION_REFINE__ */
+# endif /* __INTERSECTION_REFINE__ */
+# endif
}
#endif /* __BVH_LOCAL__ */
diff --git a/intern/cycles/kernel/geom/geom_object.h b/intern/cycles/kernel/geom/geom_object.h
index 2792fd64c61..614e2e3b92b 100644
--- a/intern/cycles/kernel/geom/geom_object.h
+++ b/intern/cycles/kernel/geom/geom_object.h
@@ -81,13 +81,7 @@ ccl_device_inline Transform object_fetch_transform_motion(KernelGlobals *kg,
const uint num_steps = kernel_tex_fetch(__objects, object).numsteps * 2 + 1;
Transform tfm;
-# ifdef __EMBREE__
- if (kernel_data.bvh.scene) {
- transform_motion_array_interpolate_straight(&tfm, motion, num_steps, time);
- }
- else
-# endif
- transform_motion_array_interpolate(&tfm, motion, num_steps, time);
+ transform_motion_array_interpolate(&tfm, motion, num_steps, time);
return tfm;
}
@@ -227,6 +221,17 @@ ccl_device_inline float object_surface_area(KernelGlobals *kg, int object)
return kernel_tex_fetch(__objects, object).surface_area;
}
+/* Color of the object */
+
+ccl_device_inline float3 object_color(KernelGlobals *kg, int object)
+{
+ if (object == OBJECT_NONE)
+ return make_float3(0.0f, 0.0f, 0.0f);
+
+ const ccl_global KernelObject *kobject = &kernel_tex_fetch(__objects, object);
+ return make_float3(kobject->color[0], kobject->color[1], kobject->color[2]);
+}
+
/* Pass ID number of object */
ccl_device_inline float object_pass_id(KernelGlobals *kg, int object)
@@ -315,6 +320,26 @@ ccl_device_inline uint object_patch_map_offset(KernelGlobals *kg, int object)
return kernel_tex_fetch(__objects, object).patch_map_offset;
}
+/* Volume step size */
+
+ccl_device_inline float object_volume_density(KernelGlobals *kg, int object)
+{
+ if (object == OBJECT_NONE) {
+ return 1.0f;
+ }
+
+ return kernel_tex_fetch(__objects, object).surface_area;
+}
+
+ccl_device_inline float object_volume_step_size(KernelGlobals *kg, int object)
+{
+ if (object == OBJECT_NONE) {
+ return kernel_data.background.volume_step_size;
+ }
+
+ return kernel_tex_fetch(__object_volume_step, object);
+}
+
/* Pass ID for shader */
ccl_device int shader_pass_id(KernelGlobals *kg, const ShaderData *sd)
@@ -386,24 +411,10 @@ ccl_device float3 particle_angular_velocity(KernelGlobals *kg, int particle)
ccl_device_inline float3 bvh_clamp_direction(float3 dir)
{
- /* clamp absolute values by exp2f(-80.0f) to avoid division by zero when calculating inverse direction */
-#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__)
- const ssef oopes(8.271806E-25f, 8.271806E-25f, 8.271806E-25f, 0.0f);
- const ssef mask = _mm_cmpgt_ps(fabs(dir), oopes);
- const ssef signdir = signmsk(dir.m128) | oopes;
-# ifndef __KERNEL_AVX__
- ssef res = mask & ssef(dir);
- res = _mm_or_ps(res, _mm_andnot_ps(mask, signdir));
-# else
- ssef res = _mm_blendv_ps(signdir, dir, mask);
-# endif
- return float3(res);
-#else /* __KERNEL_SSE__ && __KERNEL_SSE2__ */
const float ooeps = 8.271806E-25f;
return make_float3((fabsf(dir.x) > ooeps) ? dir.x : copysignf(ooeps, dir.x),
(fabsf(dir.y) > ooeps) ? dir.y : copysignf(ooeps, dir.y),
(fabsf(dir.z) > ooeps) ? dir.z : copysignf(ooeps, dir.z));
-#endif /* __KERNEL_SSE__ && __KERNEL_SSE2__ */
}
ccl_device_inline float3 bvh_inverse_direction(float3 dir)
@@ -431,38 +442,6 @@ ccl_device_inline float bvh_instance_push(
return t;
}
-#ifdef __QBVH__
-/* Same as above, but optimized for QBVH scene intersection,
- * which needs to modify two max distances.
- *
- * TODO(sergey): Investigate if passing NULL instead of t1 gets optimized
- * so we can avoid having this duplication.
- */
-ccl_device_inline void qbvh_instance_push(KernelGlobals *kg,
- int object,
- const Ray *ray,
- float3 *P,
- float3 *dir,
- float3 *idir,
- float *t,
- float *t1)
-{
- Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
-
- *P = transform_point(&tfm, ray->P);
-
- float len;
- *dir = bvh_clamp_direction(normalize_len(transform_direction(&tfm, ray->D), &len));
- *idir = bvh_inverse_direction(*dir);
-
- if (*t != FLT_MAX)
- *t *= len;
-
- if (*t1 != -FLT_MAX)
- *t1 *= len;
-}
-#endif
-
/* Transorm ray to exit static object in BVH */
ccl_device_inline float bvh_instance_pop(
@@ -525,39 +504,6 @@ ccl_device_inline float bvh_instance_motion_push(KernelGlobals *kg,
return t;
}
-# ifdef __QBVH__
-/* Same as above, but optimized for QBVH scene intersection,
- * which needs to modify two max distances.
- *
- * TODO(sergey): Investigate if passing NULL instead of t1 gets optimized
- * so we can avoid having this duplication.
- */
-ccl_device_inline void qbvh_instance_motion_push(KernelGlobals *kg,
- int object,
- const Ray *ray,
- float3 *P,
- float3 *dir,
- float3 *idir,
- float *t,
- float *t1,
- Transform *itfm)
-{
- object_fetch_transform_motion_test(kg, object, ray->time, itfm);
-
- *P = transform_point(itfm, ray->P);
-
- float len;
- *dir = bvh_clamp_direction(normalize_len(transform_direction(itfm, ray->D), &len));
- *idir = bvh_inverse_direction(*dir);
-
- if (*t != FLT_MAX)
- *t *= len;
-
- if (*t1 != -FLT_MAX)
- *t1 *= len;
-}
-# endif
-
/* Transorm ray to exit motion blurred object in BVH */
ccl_device_inline float bvh_instance_motion_pop(KernelGlobals *kg,
diff --git a/intern/cycles/kernel/geom/geom_patch.h b/intern/cycles/kernel/geom/geom_patch.h
index df19199f68e..8b4b91b96c8 100644
--- a/intern/cycles/kernel/geom/geom_patch.h
+++ b/intern/cycles/kernel/geom/geom_patch.h
@@ -380,15 +380,15 @@ ccl_device float3 patch_eval_float3(KernelGlobals *kg,
return val;
}
-ccl_device float3 patch_eval_uchar4(KernelGlobals *kg,
+ccl_device float4 patch_eval_uchar4(KernelGlobals *kg,
const ShaderData *sd,
int offset,
int patch,
float u,
float v,
int channel,
- float3 *du,
- float3 *dv)
+ float4 *du,
+ float4 *dv)
{
int indices[PATCH_MAX_CONTROL_VERTS];
float weights[PATCH_MAX_CONTROL_VERTS];
@@ -398,14 +398,14 @@ ccl_device float3 patch_eval_uchar4(KernelGlobals *kg,
int num_control = patch_eval_control_verts(
kg, sd->object, patch, u, v, channel, indices, weights, weights_du, weights_dv);
- float3 val = make_float3(0.0f, 0.0f, 0.0f);
+ float4 val = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
if (du)
- *du = make_float3(0.0f, 0.0f, 0.0f);
+ *du = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
if (dv)
- *dv = make_float3(0.0f, 0.0f, 0.0f);
+ *dv = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
for (int i = 0; i < num_control; i++) {
- float3 v = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, offset + indices[i]));
+ float4 v = color_uchar4_to_float4(kernel_tex_fetch(__attributes_uchar4, offset + indices[i]));
val += v * weights[i];
if (du)
diff --git a/intern/cycles/kernel/geom/geom_primitive.h b/intern/cycles/kernel/geom/geom_primitive.h
index 7f2b52a24c4..997abf438d0 100644
--- a/intern/cycles/kernel/geom/geom_primitive.h
+++ b/intern/cycles/kernel/geom/geom_primitive.h
@@ -162,6 +162,32 @@ ccl_device_inline float3 primitive_attribute_float3(KernelGlobals *kg,
}
}
+ccl_device_inline float4 primitive_attribute_float4(KernelGlobals *kg,
+ const ShaderData *sd,
+ const AttributeDescriptor desc,
+ float4 *dx,
+ float4 *dy)
+{
+ if (sd->type & PRIMITIVE_ALL_TRIANGLE) {
+ if (subd_triangle_patch(kg, sd) == ~0)
+ return triangle_attribute_float4(kg, sd, desc, dx, dy);
+ else
+ return subd_triangle_attribute_float4(kg, sd, desc, dx, dy);
+ }
+#ifdef __HAIR__
+ else if (sd->type & PRIMITIVE_ALL_CURVE) {
+ return curve_attribute_float4(kg, sd, desc, dx, dy);
+ }
+#endif
+ else {
+ if (dx)
+ *dx = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ if (dy)
+ *dy = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
+}
+
ccl_device_inline float2 primitive_surface_attribute_float2(KernelGlobals *kg,
const ShaderData *sd,
const AttributeDescriptor desc,
diff --git a/intern/cycles/kernel/geom/geom_subd_triangle.h b/intern/cycles/kernel/geom/geom_subd_triangle.h
index 8d5b3c12833..3eef9857ae3 100644
--- a/intern/cycles/kernel/geom/geom_subd_triangle.h
+++ b/intern/cycles/kernel/geom/geom_subd_triangle.h
@@ -217,6 +217,14 @@ ccl_device_noinline float subd_triangle_attribute_float(
return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
}
+ else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) {
+ if (dx)
+ *dx = 0.0f;
+ if (dy)
+ *dy = 0.0f;
+
+ return kernel_tex_fetch(__attributes_float, desc.offset);
+ }
else {
if (dx)
*dx = 0.0f;
@@ -352,6 +360,14 @@ ccl_device_noinline float2 subd_triangle_attribute_float2(KernelGlobals *kg,
return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
}
+ else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) {
+ if (dx)
+ *dx = make_float2(0.0f, 0.0f);
+ if (dy)
+ *dy = make_float2(0.0f, 0.0f);
+
+ return kernel_tex_fetch(__attributes_float2, desc.offset);
+ }
else {
if (dx)
*dx = make_float2(0.0f, 0.0f);
@@ -382,13 +398,7 @@ ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals *kg,
float2 p = dpdu * sd->u + dpdv * sd->v + uv[2];
float3 a, dads, dadt;
-
- if (desc.element == ATTR_ELEMENT_CORNER_BYTE) {
- a = patch_eval_uchar4(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt);
- }
- else {
- a = patch_eval_float3(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt);
- }
+ a = patch_eval_float3(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt);
# ifdef __RAY_DIFFERENTIALS__
if (dx || dy) {
@@ -460,7 +470,7 @@ ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals *kg,
return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
}
- else if (desc.element == ATTR_ELEMENT_CORNER || desc.element == ATTR_ELEMENT_CORNER_BYTE) {
+ else if (desc.element == ATTR_ELEMENT_CORNER) {
float2 uv[3];
subd_triangle_patch_uv(kg, sd, uv);
@@ -469,18 +479,10 @@ ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals *kg,
float3 f0, f1, f2, f3;
- if (desc.element == ATTR_ELEMENT_CORNER) {
- f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[0] + desc.offset));
- f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[1] + desc.offset));
- f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[2] + desc.offset));
- f3 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[3] + desc.offset));
- }
- else {
- f0 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[0] + desc.offset));
- f1 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[1] + desc.offset));
- f2 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[2] + desc.offset));
- f3 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[3] + desc.offset));
- }
+ f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[0] + desc.offset));
+ f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[1] + desc.offset));
+ f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[2] + desc.offset));
+ f3 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[3] + desc.offset));
if (subd_triangle_patch_num_corners(kg, patch) != 4) {
f1 = (f1 + f0) * 0.5f;
@@ -500,6 +502,14 @@ ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals *kg,
return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
}
+ else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) {
+ if (dx)
+ *dx = make_float3(0.0f, 0.0f, 0.0f);
+ if (dy)
+ *dy = make_float3(0.0f, 0.0f, 0.0f);
+
+ return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset));
+ }
else {
if (dx)
*dx = make_float3(0.0f, 0.0f, 0.0f);
@@ -510,4 +520,110 @@ ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals *kg,
}
}
+ccl_device_noinline float4 subd_triangle_attribute_float4(KernelGlobals *kg,
+ const ShaderData *sd,
+ const AttributeDescriptor desc,
+ float4 *dx,
+ float4 *dy)
+{
+ int patch = subd_triangle_patch(kg, sd);
+
+#ifdef __PATCH_EVAL__
+ if (desc.flags & ATTR_SUBDIVIDED) {
+ float2 uv[3];
+ subd_triangle_patch_uv(kg, sd, uv);
+
+ float2 dpdu = uv[0] - uv[2];
+ float2 dpdv = uv[1] - uv[2];
+
+ /* p is [s, t] */
+ float2 p = dpdu * sd->u + dpdv * sd->v + uv[2];
+
+ float4 dads, dadt;
+
+ float4 a = patch_eval_uchar4(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt);
+
+# ifdef __RAY_DIFFERENTIALS__
+ if (dx || dy) {
+ float dsdu = dpdu.x;
+ float dtdu = dpdu.y;
+ float dsdv = dpdv.x;
+ float dtdv = dpdv.y;
+
+ if (dx) {
+ float dudx = sd->du.dx;
+ float dvdx = sd->dv.dx;
+
+ float dsdx = dsdu * dudx + dsdv * dvdx;
+ float dtdx = dtdu * dudx + dtdv * dvdx;
+
+ *dx = dads * dsdx + dadt * dtdx;
+ }
+ if (dy) {
+ float dudy = sd->du.dy;
+ float dvdy = sd->dv.dy;
+
+ float dsdy = dsdu * dudy + dsdv * dvdy;
+ float dtdy = dtdu * dudy + dtdv * dvdy;
+
+ *dy = dads * dsdy + dadt * dtdy;
+ }
+ }
+# endif
+ return a;
+ }
+ else
+#endif /* __PATCH_EVAL__ */
+ if (desc.element == ATTR_ELEMENT_CORNER_BYTE) {
+ float2 uv[3];
+ subd_triangle_patch_uv(kg, sd, uv);
+
+ int corners[4];
+ subd_triangle_patch_corners(kg, patch, corners);
+
+ float4 f0 = color_uchar4_to_float4(
+ kernel_tex_fetch(__attributes_uchar4, corners[0] + desc.offset));
+ float4 f1 = color_uchar4_to_float4(
+ kernel_tex_fetch(__attributes_uchar4, corners[1] + desc.offset));
+ float4 f2 = color_uchar4_to_float4(
+ kernel_tex_fetch(__attributes_uchar4, corners[2] + desc.offset));
+ float4 f3 = color_uchar4_to_float4(
+ kernel_tex_fetch(__attributes_uchar4, corners[3] + desc.offset));
+
+ if (subd_triangle_patch_num_corners(kg, patch) != 4) {
+ f1 = (f1 + f0) * 0.5f;
+ f3 = (f3 + f0) * 0.5f;
+ }
+
+ float4 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
+ float4 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
+ float4 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
+
+#ifdef __RAY_DIFFERENTIALS__
+ if (dx)
+ *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c;
+ if (dy)
+ *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c;
+#endif
+
+ return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
+ }
+ else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) {
+ if (dx)
+ *dx = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ if (dy)
+ *dy = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+
+ return color_uchar4_to_float4(kernel_tex_fetch(__attributes_uchar4, desc.offset));
+ }
+ else {
+ if (dx)
+ *dx = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ if (dy)
+ *dy = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
+}
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_triangle.h b/intern/cycles/kernel/geom/geom_triangle.h
index 9938c0ba2c3..0278f3ade8e 100644
--- a/intern/cycles/kernel/geom/geom_triangle.h
+++ b/intern/cycles/kernel/geom/geom_triangle.h
@@ -153,6 +153,14 @@ ccl_device float triangle_attribute_float(
return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2;
}
+ else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) {
+ if (dx)
+ *dx = 0.0f;
+ if (dy)
+ *dy = 0.0f;
+
+ return kernel_tex_fetch(__attributes_float, desc.offset);
+ }
else {
if (dx)
*dx = 0.0f;
@@ -212,6 +220,14 @@ ccl_device float2 triangle_attribute_float2(KernelGlobals *kg,
return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2;
}
+ else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) {
+ if (dx)
+ *dx = make_float2(0.0f, 0.0f);
+ if (dy)
+ *dy = make_float2(0.0f, 0.0f);
+
+ return kernel_tex_fetch(__attributes_float2, desc.offset);
+ }
else {
if (dx)
*dx = make_float2(0.0f, 0.0f);
@@ -255,20 +271,13 @@ ccl_device float3 triangle_attribute_float3(KernelGlobals *kg,
return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2;
}
- else if (desc.element == ATTR_ELEMENT_CORNER || desc.element == ATTR_ELEMENT_CORNER_BYTE) {
+ else if (desc.element == ATTR_ELEMENT_CORNER) {
int tri = desc.offset + sd->prim * 3;
float3 f0, f1, f2;
- if (desc.element == ATTR_ELEMENT_CORNER) {
- f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 0));
- f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 1));
- f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 2));
- }
- else {
- f0 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, tri + 0));
- f1 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, tri + 1));
- f2 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, tri + 2));
- }
+ f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 0));
+ f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 1));
+ f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 2));
#ifdef __RAY_DIFFERENTIALS__
if (dx)
@@ -279,6 +288,14 @@ ccl_device float3 triangle_attribute_float3(KernelGlobals *kg,
return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2;
}
+ else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) {
+ if (dx)
+ *dx = make_float3(0.0f, 0.0f, 0.0f);
+ if (dy)
+ *dy = make_float3(0.0f, 0.0f, 0.0f);
+
+ return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset));
+ }
else {
if (dx)
*dx = make_float3(0.0f, 0.0f, 0.0f);
@@ -289,4 +306,53 @@ ccl_device float3 triangle_attribute_float3(KernelGlobals *kg,
}
}
+ccl_device float4 triangle_attribute_float4(KernelGlobals *kg,
+ const ShaderData *sd,
+ const AttributeDescriptor desc,
+ float4 *dx,
+ float4 *dy)
+{
+ if (desc.element == ATTR_ELEMENT_CORNER_BYTE || desc.element == ATTR_ELEMENT_VERTEX) {
+ float4 f0, f1, f2;
+
+ if (desc.element == ATTR_ELEMENT_CORNER_BYTE) {
+ int tri = desc.offset + sd->prim * 3;
+ f0 = color_uchar4_to_float4(kernel_tex_fetch(__attributes_uchar4, tri + 0));
+ f1 = color_uchar4_to_float4(kernel_tex_fetch(__attributes_uchar4, tri + 1));
+ f2 = color_uchar4_to_float4(kernel_tex_fetch(__attributes_uchar4, tri + 2));
+ }
+ else {
+ uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
+ f0 = kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.x);
+ f1 = kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.y);
+ f2 = kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.z);
+ }
+
+#ifdef __RAY_DIFFERENTIALS__
+ if (dx)
+ *dx = sd->du.dx * f0 + sd->dv.dx * f1 - (sd->du.dx + sd->dv.dx) * f2;
+ if (dy)
+ *dy = sd->du.dy * f0 + sd->dv.dy * f1 - (sd->du.dy + sd->dv.dy) * f2;
+#endif
+
+ return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2;
+ }
+ else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) {
+ if (dx)
+ *dx = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ if (dy)
+ *dy = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+
+ return color_uchar4_to_float4(kernel_tex_fetch(__attributes_uchar4, desc.offset));
+ }
+ else {
+ if (dx)
+ *dx = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ if (dy)
+ *dy = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
+}
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_triangle_intersect.h b/intern/cycles/kernel/geom/geom_triangle_intersect.h
index bcad03102d2..b0cce274b94 100644
--- a/intern/cycles/kernel/geom/geom_triangle_intersect.h
+++ b/intern/cycles/kernel/geom/geom_triangle_intersect.h
@@ -71,424 +71,6 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
return false;
}
-#ifdef __KERNEL_AVX2__
-# define cross256(A, B, C, D) _mm256_fmsub_ps(A, B, _mm256_mul_ps(C, D))
-ccl_device_inline int ray_triangle_intersect8(KernelGlobals *kg,
- float3 ray_P,
- float3 ray_dir,
- Intersection **isect,
- uint visibility,
- int object,
- __m256 *triA,
- __m256 *triB,
- __m256 *triC,
- int prim_addr,
- int prim_num,
- uint *num_hits,
- uint max_hits,
- int *num_hits_in_instance,
- float isect_t)
-{
-
- const unsigned char prim_num_mask = (1 << prim_num) - 1;
-
- const __m256i zero256 = _mm256_setzero_si256();
-
- const __m256 Px256 = _mm256_set1_ps(ray_P.x);
- const __m256 Py256 = _mm256_set1_ps(ray_P.y);
- const __m256 Pz256 = _mm256_set1_ps(ray_P.z);
-
- const __m256 dirx256 = _mm256_set1_ps(ray_dir.x);
- const __m256 diry256 = _mm256_set1_ps(ray_dir.y);
- const __m256 dirz256 = _mm256_set1_ps(ray_dir.z);
-
- /* Calculate vertices relative to ray origin. */
- __m256 v0_x_256 = _mm256_sub_ps(triC[0], Px256);
- __m256 v0_y_256 = _mm256_sub_ps(triC[1], Py256);
- __m256 v0_z_256 = _mm256_sub_ps(triC[2], Pz256);
-
- __m256 v1_x_256 = _mm256_sub_ps(triA[0], Px256);
- __m256 v1_y_256 = _mm256_sub_ps(triA[1], Py256);
- __m256 v1_z_256 = _mm256_sub_ps(triA[2], Pz256);
-
- __m256 v2_x_256 = _mm256_sub_ps(triB[0], Px256);
- __m256 v2_y_256 = _mm256_sub_ps(triB[1], Py256);
- __m256 v2_z_256 = _mm256_sub_ps(triB[2], Pz256);
-
- __m256 v0_v1_x_256 = _mm256_add_ps(v0_x_256, v1_x_256);
- __m256 v0_v1_y_256 = _mm256_add_ps(v0_y_256, v1_y_256);
- __m256 v0_v1_z_256 = _mm256_add_ps(v0_z_256, v1_z_256);
-
- __m256 v0_v2_x_256 = _mm256_add_ps(v0_x_256, v2_x_256);
- __m256 v0_v2_y_256 = _mm256_add_ps(v0_y_256, v2_y_256);
- __m256 v0_v2_z_256 = _mm256_add_ps(v0_z_256, v2_z_256);
-
- __m256 v1_v2_x_256 = _mm256_add_ps(v1_x_256, v2_x_256);
- __m256 v1_v2_y_256 = _mm256_add_ps(v1_y_256, v2_y_256);
- __m256 v1_v2_z_256 = _mm256_add_ps(v1_z_256, v2_z_256);
-
- /* Calculate triangle edges. */
- __m256 e0_x_256 = _mm256_sub_ps(v2_x_256, v0_x_256);
- __m256 e0_y_256 = _mm256_sub_ps(v2_y_256, v0_y_256);
- __m256 e0_z_256 = _mm256_sub_ps(v2_z_256, v0_z_256);
-
- __m256 e1_x_256 = _mm256_sub_ps(v0_x_256, v1_x_256);
- __m256 e1_y_256 = _mm256_sub_ps(v0_y_256, v1_y_256);
- __m256 e1_z_256 = _mm256_sub_ps(v0_z_256, v1_z_256);
-
- __m256 e2_x_256 = _mm256_sub_ps(v1_x_256, v2_x_256);
- __m256 e2_y_256 = _mm256_sub_ps(v1_y_256, v2_y_256);
- __m256 e2_z_256 = _mm256_sub_ps(v1_z_256, v2_z_256);
-
- /* Perform edge tests. */
- /* cross (AyBz - AzBy, AzBx -AxBz, AxBy - AyBx) */
- __m256 U_x_256 = cross256(v0_v2_y_256, e0_z_256, v0_v2_z_256, e0_y_256);
- __m256 U_y_256 = cross256(v0_v2_z_256, e0_x_256, v0_v2_x_256, e0_z_256);
- __m256 U_z_256 = cross256(v0_v2_x_256, e0_y_256, v0_v2_y_256, e0_x_256);
- /* vertical dot */
- __m256 U_256 = _mm256_mul_ps(U_x_256, dirx256);
- U_256 = _mm256_fmadd_ps(U_y_256, diry256, U_256);
- U_256 = _mm256_fmadd_ps(U_z_256, dirz256, U_256);
-
- __m256 V_x_256 = cross256(v0_v1_y_256, e1_z_256, v0_v1_z_256, e1_y_256);
- __m256 V_y_256 = cross256(v0_v1_z_256, e1_x_256, v0_v1_x_256, e1_z_256);
- __m256 V_z_256 = cross256(v0_v1_x_256, e1_y_256, v0_v1_y_256, e1_x_256);
- /* vertical dot */
- __m256 V_256 = _mm256_mul_ps(V_x_256, dirx256);
- V_256 = _mm256_fmadd_ps(V_y_256, diry256, V_256);
- V_256 = _mm256_fmadd_ps(V_z_256, dirz256, V_256);
-
- __m256 W_x_256 = cross256(v1_v2_y_256, e2_z_256, v1_v2_z_256, e2_y_256);
- __m256 W_y_256 = cross256(v1_v2_z_256, e2_x_256, v1_v2_x_256, e2_z_256);
- __m256 W_z_256 = cross256(v1_v2_x_256, e2_y_256, v1_v2_y_256, e2_x_256);
- /* vertical dot */
- __m256 W_256 = _mm256_mul_ps(W_x_256, dirx256);
- W_256 = _mm256_fmadd_ps(W_y_256, diry256, W_256);
- W_256 = _mm256_fmadd_ps(W_z_256, dirz256, W_256);
-
- __m256i U_256_1 = _mm256_srli_epi32(_mm256_castps_si256(U_256), 31);
- __m256i V_256_1 = _mm256_srli_epi32(_mm256_castps_si256(V_256), 31);
- __m256i W_256_1 = _mm256_srli_epi32(_mm256_castps_si256(W_256), 31);
- __m256i UVW_256_1 = _mm256_add_epi32(_mm256_add_epi32(U_256_1, V_256_1), W_256_1);
-
- const __m256i one256 = _mm256_set1_epi32(1);
- const __m256i two256 = _mm256_set1_epi32(2);
-
- __m256i mask_minmaxUVW_256 = _mm256_or_si256(_mm256_cmpeq_epi32(one256, UVW_256_1),
- _mm256_cmpeq_epi32(two256, UVW_256_1));
-
- unsigned char mask_minmaxUVW_pos = _mm256_movemask_ps(_mm256_castsi256_ps(mask_minmaxUVW_256));
- if ((mask_minmaxUVW_pos & prim_num_mask) == prim_num_mask) { //all bits set
- return false;
- }
-
- /* Calculate geometry normal and denominator. */
- __m256 Ng1_x_256 = cross256(e1_y_256, e0_z_256, e1_z_256, e0_y_256);
- __m256 Ng1_y_256 = cross256(e1_z_256, e0_x_256, e1_x_256, e0_z_256);
- __m256 Ng1_z_256 = cross256(e1_x_256, e0_y_256, e1_y_256, e0_x_256);
-
- Ng1_x_256 = _mm256_add_ps(Ng1_x_256, Ng1_x_256);
- Ng1_y_256 = _mm256_add_ps(Ng1_y_256, Ng1_y_256);
- Ng1_z_256 = _mm256_add_ps(Ng1_z_256, Ng1_z_256);
-
- /* vertical dot */
- __m256 den_256 = _mm256_mul_ps(Ng1_x_256, dirx256);
- den_256 = _mm256_fmadd_ps(Ng1_y_256, diry256, den_256);
- den_256 = _mm256_fmadd_ps(Ng1_z_256, dirz256, den_256);
-
- /* Perform depth test. */
- __m256 T_256 = _mm256_mul_ps(Ng1_x_256, v0_x_256);
- T_256 = _mm256_fmadd_ps(Ng1_y_256, v0_y_256, T_256);
- T_256 = _mm256_fmadd_ps(Ng1_z_256, v0_z_256, T_256);
-
- const __m256i c0x80000000 = _mm256_set1_epi32(0x80000000);
- __m256i sign_den_256 = _mm256_and_si256(_mm256_castps_si256(den_256), c0x80000000);
-
- __m256 sign_T_256 = _mm256_castsi256_ps(
- _mm256_xor_si256(_mm256_castps_si256(T_256), sign_den_256));
-
- unsigned char mask_sign_T = _mm256_movemask_ps(sign_T_256);
- if (((mask_minmaxUVW_pos | mask_sign_T) & prim_num_mask) == prim_num_mask) {
- return false;
- }
-
- __m256 xor_signmask_256 = _mm256_castsi256_ps(
- _mm256_xor_si256(_mm256_castps_si256(den_256), sign_den_256));
-
- ccl_align(32) float den8[8], U8[8], V8[8], T8[8], sign_T8[8], xor_signmask8[8];
- ccl_align(32) unsigned int mask_minmaxUVW8[8];
-
- if (visibility == PATH_RAY_SHADOW_OPAQUE) {
- __m256i mask_final_256 = _mm256_cmpeq_epi32(mask_minmaxUVW_256, zero256);
- __m256i maskden256 = _mm256_cmpeq_epi32(_mm256_castps_si256(den_256), zero256);
- __m256i mask0 = _mm256_cmpgt_epi32(zero256, _mm256_castps_si256(sign_T_256));
- __m256 rayt_256 = _mm256_set1_ps((*isect)->t);
- __m256i mask1 = _mm256_cmpgt_epi32(
- _mm256_castps_si256(sign_T_256),
- _mm256_castps_si256(_mm256_mul_ps(
- _mm256_castsi256_ps(_mm256_xor_si256(_mm256_castps_si256(den_256), sign_den_256)),
- rayt_256)));
- mask0 = _mm256_or_si256(mask1, mask0);
- mask_final_256 = _mm256_andnot_si256(mask0, mask_final_256); //(~mask_minmaxUVW_pos) &(~mask)
- mask_final_256 = _mm256_andnot_si256(
- maskden256, mask_final_256); //(~mask_minmaxUVW_pos) &(~mask) & (~maskden)
- unsigned char mask_final = _mm256_movemask_ps(_mm256_castsi256_ps(mask_final_256));
- if ((mask_final & prim_num_mask) == 0) {
- return false;
- }
- const int i = __bsf(mask_final);
- __m256 inv_den_256 = _mm256_rcp_ps(den_256);
- U_256 = _mm256_mul_ps(U_256, inv_den_256);
- V_256 = _mm256_mul_ps(V_256, inv_den_256);
- T_256 = _mm256_mul_ps(T_256, inv_den_256);
- _mm256_store_ps(U8, U_256);
- _mm256_store_ps(V8, V_256);
- _mm256_store_ps(T8, T_256);
- /* NOTE: Here we assume visibility for all triangles in the node is
- * the same. */
- (*isect)->u = U8[i];
- (*isect)->v = V8[i];
- (*isect)->t = T8[i];
- (*isect)->prim = (prim_addr + i);
- (*isect)->object = object;
- (*isect)->type = PRIMITIVE_TRIANGLE;
- return true;
- }
- else {
- _mm256_store_ps(den8, den_256);
- _mm256_store_ps(U8, U_256);
- _mm256_store_ps(V8, V_256);
- _mm256_store_ps(T8, T_256);
-
- _mm256_store_ps(sign_T8, sign_T_256);
- _mm256_store_ps(xor_signmask8, xor_signmask_256);
- _mm256_store_si256((__m256i *)mask_minmaxUVW8, mask_minmaxUVW_256);
-
- int ret = false;
-
- if (visibility == PATH_RAY_SHADOW) {
- for (int i = 0; i < prim_num; i++) {
- if (mask_minmaxUVW8[i]) {
- continue;
- }
-# ifdef __VISIBILITY_FLAG__
- if ((kernel_tex_fetch(__prim_visibility, (prim_addr + i)) & visibility) == 0) {
- continue;
- }
-# endif
- if ((sign_T8[i] < 0.0f) || (sign_T8[i] > (*isect)->t * xor_signmask8[i])) {
- continue;
- }
- if (!den8[i]) {
- continue;
- }
- const float inv_den = 1.0f / den8[i];
- (*isect)->u = U8[i] * inv_den;
- (*isect)->v = V8[i] * inv_den;
- (*isect)->t = T8[i] * inv_den;
- (*isect)->prim = (prim_addr + i);
- (*isect)->object = object;
- (*isect)->type = PRIMITIVE_TRIANGLE;
- const int prim = kernel_tex_fetch(__prim_index, (*isect)->prim);
- int shader = 0;
-# ifdef __HAIR__
- if (kernel_tex_fetch(__prim_type, (*isect)->prim) & PRIMITIVE_ALL_TRIANGLE)
-# endif
- {
- shader = kernel_tex_fetch(__tri_shader, prim);
- }
-# ifdef __HAIR__
- else {
- float4 str = kernel_tex_fetch(__curves, prim);
- shader = __float_as_int(str.z);
- }
-# endif
- const int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
- /* If no transparent shadows, all light is blocked. */
- if (!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
- return 2;
- }
- /* If maximum number of hits reached, block all light. */
- else if (num_hits == NULL || *num_hits == max_hits) {
- return 2;
- }
- /* Move on to next entry in intersections array. */
- ret = true;
- (*isect)++;
- (*num_hits)++;
- (*num_hits_in_instance)++;
- (*isect)->t = isect_t;
- }
- }
- else {
- for (int i = 0; i < prim_num; i++) {
- if (mask_minmaxUVW8[i]) {
- continue;
- }
-# ifdef __VISIBILITY_FLAG__
- if ((kernel_tex_fetch(__prim_visibility, (prim_addr + i)) & visibility) == 0) {
- continue;
- }
-# endif
- if ((sign_T8[i] < 0.0f) || (sign_T8[i] > (*isect)->t * xor_signmask8[i])) {
- continue;
- }
- if (!den8[i]) {
- continue;
- }
- const float inv_den = 1.0f / den8[i];
- (*isect)->u = U8[i] * inv_den;
- (*isect)->v = V8[i] * inv_den;
- (*isect)->t = T8[i] * inv_den;
- (*isect)->prim = (prim_addr + i);
- (*isect)->object = object;
- (*isect)->type = PRIMITIVE_TRIANGLE;
- ret = true;
- }
- }
- return ret;
- }
-}
-
-ccl_device_inline int triangle_intersect8(KernelGlobals *kg,
- Intersection **isect,
- float3 P,
- float3 dir,
- uint visibility,
- int object,
- int prim_addr,
- int prim_num,
- uint *num_hits,
- uint max_hits,
- int *num_hits_in_instance,
- float isect_t)
-{
- __m128 tri_a[8], tri_b[8], tri_c[8];
- __m256 tritmp[12], tri[12];
- __m256 triA[3], triB[3], triC[3];
-
- int i, r;
-
- uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr);
- for (i = 0; i < prim_num; i++) {
- tri_a[i] = *(__m128 *)&kg->__prim_tri_verts.data[tri_vindex++];
- tri_b[i] = *(__m128 *)&kg->__prim_tri_verts.data[tri_vindex++];
- tri_c[i] = *(__m128 *)&kg->__prim_tri_verts.data[tri_vindex++];
- }
- //create 9 or 12 placeholders
- tri[0] = _mm256_castps128_ps256(tri_a[0]); //_mm256_zextps128_ps256
- tri[1] = _mm256_castps128_ps256(tri_b[0]); //_mm256_zextps128_ps256
- tri[2] = _mm256_castps128_ps256(tri_c[0]); //_mm256_zextps128_ps256
-
- tri[3] = _mm256_castps128_ps256(tri_a[1]); //_mm256_zextps128_ps256
- tri[4] = _mm256_castps128_ps256(tri_b[1]); //_mm256_zextps128_ps256
- tri[5] = _mm256_castps128_ps256(tri_c[1]); //_mm256_zextps128_ps256
-
- tri[6] = _mm256_castps128_ps256(tri_a[2]); //_mm256_zextps128_ps256
- tri[7] = _mm256_castps128_ps256(tri_b[2]); //_mm256_zextps128_ps256
- tri[8] = _mm256_castps128_ps256(tri_c[2]); //_mm256_zextps128_ps256
-
- if (prim_num > 3) {
- tri[9] = _mm256_castps128_ps256(tri_a[3]); //_mm256_zextps128_ps256
- tri[10] = _mm256_castps128_ps256(tri_b[3]); //_mm256_zextps128_ps256
- tri[11] = _mm256_castps128_ps256(tri_c[3]); //_mm256_zextps128_ps256
- }
-
- for (i = 4, r = 0; i < prim_num; i++, r += 3) {
- tri[r] = _mm256_insertf128_ps(tri[r], tri_a[i], 1);
- tri[r + 1] = _mm256_insertf128_ps(tri[r + 1], tri_b[i], 1);
- tri[r + 2] = _mm256_insertf128_ps(tri[r + 2], tri_c[i], 1);
- }
-
- //------------------------------------------------
- //0! Xa0 Ya0 Za0 1 Xa4 Ya4 Za4 1
- //1! Xb0 Yb0 Zb0 1 Xb4 Yb4 Zb4 1
- //2! Xc0 Yc0 Zc0 1 Xc4 Yc4 Zc4 1
-
- //3! Xa1 Ya1 Za1 1 Xa5 Ya5 Za5 1
- //4! Xb1 Yb1 Zb1 1 Xb5 Yb5 Zb5 1
- //5! Xc1 Yc1 Zc1 1 Xc5 Yc5 Zc5 1
-
- //6! Xa2 Ya2 Za2 1 Xa6 Ya6 Za6 1
- //7! Xb2 Yb2 Zb2 1 Xb6 Yb6 Zb6 1
- //8! Xc2 Yc2 Zc2 1 Xc6 Yc6 Zc6 1
-
- //9! Xa3 Ya3 Za3 1 Xa7 Ya7 Za7 1
- //10! Xb3 Yb3 Zb3 1 Xb7 Yb7 Zb7 1
- //11! Xc3 Yc3 Zc3 1 Xc7 Yc7 Zc7 1
-
- //"transpose"
- tritmp[0] = _mm256_unpacklo_ps(tri[0], tri[3]); //0! Xa0 Xa1 Ya0 Ya1 Xa4 Xa5 Ya4 Ya5
- tritmp[1] = _mm256_unpackhi_ps(tri[0], tri[3]); //1! Za0 Za1 1 1 Za4 Za5 1 1
-
- tritmp[2] = _mm256_unpacklo_ps(tri[6], tri[9]); //2! Xa2 Xa3 Ya2 Ya3 Xa6 Xa7 Ya6 Ya7
- tritmp[3] = _mm256_unpackhi_ps(tri[6], tri[9]); //3! Za2 Za3 1 1 Za6 Za7 1 1
-
- tritmp[4] = _mm256_unpacklo_ps(tri[1], tri[4]); //4! Xb0 Xb1 Yb0 Yb1 Xb4 Xb5 Yb4 Yb5
- tritmp[5] = _mm256_unpackhi_ps(tri[1], tri[4]); //5! Zb0 Zb1 1 1 Zb4 Zb5 1 1
-
- tritmp[6] = _mm256_unpacklo_ps(tri[7], tri[10]); //6! Xb2 Xb3 Yb2 Yb3 Xb6 Xb7 Yb6 Yb7
- tritmp[7] = _mm256_unpackhi_ps(tri[7], tri[10]); //7! Zb2 Zb3 1 1 Zb6 Zb7 1 1
-
- tritmp[8] = _mm256_unpacklo_ps(tri[2], tri[5]); //8! Xc0 Xc1 Yc0 Yc1 Xc4 Xc5 Yc4 Yc5
- tritmp[9] = _mm256_unpackhi_ps(tri[2], tri[5]); //9! Zc0 Zc1 1 1 Zc4 Zc5 1 1
-
- tritmp[10] = _mm256_unpacklo_ps(tri[8], tri[11]); //10! Xc2 Xc3 Yc2 Yc3 Xc6 Xc7 Yc6 Yc7
- tritmp[11] = _mm256_unpackhi_ps(tri[8], tri[11]); //11! Zc2 Zc3 1 1 Zc6 Zc7 1 1
-
- /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
- triA[0] = _mm256_castpd_ps(
- _mm256_unpacklo_pd(_mm256_castps_pd(tritmp[0]),
- _mm256_castps_pd(tritmp[2]))); // Xa0 Xa1 Xa2 Xa3 Xa4 Xa5 Xa6 Xa7
- triA[1] = _mm256_castpd_ps(
- _mm256_unpackhi_pd(_mm256_castps_pd(tritmp[0]),
- _mm256_castps_pd(tritmp[2]))); // Ya0 Ya1 Ya2 Ya3 Ya4 Ya5 Ya6 Ya7
- triA[2] = _mm256_castpd_ps(
- _mm256_unpacklo_pd(_mm256_castps_pd(tritmp[1]),
- _mm256_castps_pd(tritmp[3]))); // Za0 Za1 Za2 Za3 Za4 Za5 Za6 Za7
-
- triB[0] = _mm256_castpd_ps(
- _mm256_unpacklo_pd(_mm256_castps_pd(tritmp[4]),
- _mm256_castps_pd(tritmp[6]))); // Xb0 Xb1 Xb2 Xb3 Xb4 Xb5 Xb5 Xb7
- triB[1] = _mm256_castpd_ps(
- _mm256_unpackhi_pd(_mm256_castps_pd(tritmp[4]),
- _mm256_castps_pd(tritmp[6]))); // Yb0 Yb1 Yb2 Yb3 Yb4 Yb5 Yb5 Yb7
- triB[2] = _mm256_castpd_ps(
- _mm256_unpacklo_pd(_mm256_castps_pd(tritmp[5]),
- _mm256_castps_pd(tritmp[7]))); // Zb0 Zb1 Zb2 Zb3 Zb4 Zb5 Zb5 Zb7
-
- triC[0] = _mm256_castpd_ps(
- _mm256_unpacklo_pd(_mm256_castps_pd(tritmp[8]),
- _mm256_castps_pd(tritmp[10]))); //Xc0 Xc1 Xc2 Xc3 Xc4 Xc5 Xc6 Xc7
- triC[1] = _mm256_castpd_ps(
- _mm256_unpackhi_pd(_mm256_castps_pd(tritmp[8]),
- _mm256_castps_pd(tritmp[10]))); //Yc0 Yc1 Yc2 Yc3 Yc4 Yc5 Yc6 Yc7
- triC[2] = _mm256_castpd_ps(
- _mm256_unpacklo_pd(_mm256_castps_pd(tritmp[9]),
- _mm256_castps_pd(tritmp[11]))); //Zc0 Zc1 Zc2 Zc3 Zc4 Zc5 Zc6 Zc7
-
- /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
-
- int result = ray_triangle_intersect8(kg,
- P,
- dir,
- isect,
- visibility,
- object,
- triA,
- triB,
- triC,
- prim_addr,
- prim_num,
- num_hits,
- max_hits,
- num_hits_in_instance,
- isect_t);
- return result;
-}
-
-#endif /* __KERNEL_AVX2__ */
-
/* Special ray intersection routines for subsurface scattering. In that case we
* only want to intersect with primitives in the same object, and if case of
* multiple hits we pick a single random primitive as the intersection point.
@@ -681,16 +263,20 @@ ccl_device_inline float3 triangle_refine_local(KernelGlobals *kg,
const Intersection *isect,
const Ray *ray)
{
+#ifdef __KERNEL_OPTIX__
+ /* isect->t is always in world space with OptiX. */
+ return triangle_refine(kg, sd, isect, ray);
+#else
float3 P = ray->P;
float3 D = ray->D;
float t = isect->t;
if (isect->object != OBJECT_NONE) {
-#ifdef __OBJECT_MOTION__
+# ifdef __OBJECT_MOTION__
Transform tfm = sd->ob_itfm;
-#else
+# else
Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
-#endif
+# endif
P = transform_point(&tfm, P);
D = transform_direction(&tfm, D);
@@ -699,7 +285,7 @@ ccl_device_inline float3 triangle_refine_local(KernelGlobals *kg,
P = P + D * t;
-#ifdef __INTERSECTION_REFINE__
+# ifdef __INTERSECTION_REFINE__
const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, isect->prim);
const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0),
tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1),
@@ -719,19 +305,20 @@ ccl_device_inline float3 triangle_refine_local(KernelGlobals *kg,
float rt = dot(edge2, qvec) / det;
P = P + D * rt;
}
-#endif /* __INTERSECTION_REFINE__ */
+# endif /* __INTERSECTION_REFINE__ */
if (isect->object != OBJECT_NONE) {
-#ifdef __OBJECT_MOTION__
+# ifdef __OBJECT_MOTION__
Transform tfm = sd->ob_tfm;
-#else
+# else
Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
-#endif
+# endif
P = transform_point(&tfm, P);
}
return P;
+#endif
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_volume.h b/intern/cycles/kernel/geom/geom_volume.h
index 96cf35a40dc..f43a7841b46 100644
--- a/intern/cycles/kernel/geom/geom_volume.h
+++ b/intern/cycles/kernel/geom/geom_volume.h
@@ -51,10 +51,14 @@ ccl_device float volume_attribute_float(KernelGlobals *kg,
const ShaderData *sd,
const AttributeDescriptor desc)
{
- float3 P = volume_normalized_position(kg, sd, sd->P);
+ /* todo: optimize this so we don't have to transform both here and in
+ * kernel_tex_image_interp_3d when possible. Also could optimize for the
+ * common case where transform is translation/scale only. */
+ float3 P = sd->P;
+ object_inverse_position_transform(kg, sd, &P);
InterpolationType interp = (sd->flag & SD_VOLUME_CUBIC) ? INTERPOLATION_CUBIC :
INTERPOLATION_NONE;
- float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z, interp);
+ float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P, interp);
return average(float4_to_float3(r));
}
@@ -62,10 +66,11 @@ ccl_device float3 volume_attribute_float3(KernelGlobals *kg,
const ShaderData *sd,
const AttributeDescriptor desc)
{
- float3 P = volume_normalized_position(kg, sd, sd->P);
+ float3 P = sd->P;
+ object_inverse_position_transform(kg, sd, &P);
InterpolationType interp = (sd->flag & SD_VOLUME_CUBIC) ? INTERPOLATION_CUBIC :
INTERPOLATION_NONE;
- float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z, interp);
+ float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P, interp);
if (r.w > 1e-6f && r.w != 1.0f) {
/* For RGBA colors, unpremultiply after interpolation. */
diff --git a/intern/cycles/kernel/kernel.h b/intern/cycles/kernel/kernel.h
index dfdd8843f29..b907c6a2bac 100644
--- a/intern/cycles/kernel/kernel.h
+++ b/intern/cycles/kernel/kernel.h
@@ -19,8 +19,8 @@
/* CPU Kernel Interface */
-#include "util/util_types.h"
#include "kernel/kernel_types.h"
+#include "util/util_types.h"
CCL_NAMESPACE_BEGIN
@@ -38,7 +38,7 @@ void *kernel_osl_memory(KernelGlobals *kg);
bool kernel_osl_use(KernelGlobals *kg);
void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t size);
-void kernel_tex_copy(KernelGlobals *kg, const char *name, void *mem, size_t size);
+void kernel_global_memory_copy(KernelGlobals *kg, const char *name, void *mem, size_t size);
#define KERNEL_ARCH cpu
#include "kernel/kernels/cpu/kernel_cpu.h"
diff --git a/intern/cycles/kernel/kernel_accumulate.h b/intern/cycles/kernel/kernel_accumulate.h
index b9d723222a1..79ea03f4f6f 100644
--- a/intern/cycles/kernel/kernel_accumulate.h
+++ b/intern/cycles/kernel/kernel_accumulate.h
@@ -36,21 +36,18 @@ ccl_device_inline void bsdf_eval_init(BsdfEval *eval,
eval->glossy = make_float3(0.0f, 0.0f, 0.0f);
eval->transmission = make_float3(0.0f, 0.0f, 0.0f);
eval->transparent = make_float3(0.0f, 0.0f, 0.0f);
- eval->subsurface = make_float3(0.0f, 0.0f, 0.0f);
- eval->scatter = make_float3(0.0f, 0.0f, 0.0f);
+ eval->volume = make_float3(0.0f, 0.0f, 0.0f);
if (type == CLOSURE_BSDF_TRANSPARENT_ID)
eval->transparent = value;
- else if (CLOSURE_IS_BSDF_DIFFUSE(type))
+ else if (CLOSURE_IS_BSDF_DIFFUSE(type) || CLOSURE_IS_BSDF_BSSRDF(type))
eval->diffuse = value;
else if (CLOSURE_IS_BSDF_GLOSSY(type))
eval->glossy = value;
else if (CLOSURE_IS_BSDF_TRANSMISSION(type))
eval->transmission = value;
- else if (CLOSURE_IS_BSDF_BSSRDF(type))
- eval->subsurface = value;
else if (CLOSURE_IS_PHASE(type))
- eval->scatter = value;
+ eval->volume = value;
}
else
#endif
@@ -73,16 +70,14 @@ ccl_device_inline void bsdf_eval_accum(BsdfEval *eval,
value *= mis_weight;
#ifdef __PASSES__
if (eval->use_light_pass) {
- if (CLOSURE_IS_BSDF_DIFFUSE(type))
+ if (CLOSURE_IS_BSDF_DIFFUSE(type) || CLOSURE_IS_BSDF_BSSRDF(type))
eval->diffuse += value;
else if (CLOSURE_IS_BSDF_GLOSSY(type))
eval->glossy += value;
else if (CLOSURE_IS_BSDF_TRANSMISSION(type))
eval->transmission += value;
- else if (CLOSURE_IS_BSDF_BSSRDF(type))
- eval->subsurface += value;
else if (CLOSURE_IS_PHASE(type))
- eval->scatter += value;
+ eval->volume += value;
/* skipping transparent, this function is used by for eval(), will be zero then */
}
@@ -98,7 +93,7 @@ ccl_device_inline bool bsdf_eval_is_zero(BsdfEval *eval)
#ifdef __PASSES__
if (eval->use_light_pass) {
return is_zero(eval->diffuse) && is_zero(eval->glossy) && is_zero(eval->transmission) &&
- is_zero(eval->transparent) && is_zero(eval->subsurface) && is_zero(eval->scatter);
+ is_zero(eval->transparent) && is_zero(eval->volume);
}
else
#endif
@@ -114,8 +109,7 @@ ccl_device_inline void bsdf_eval_mis(BsdfEval *eval, float value)
eval->diffuse *= value;
eval->glossy *= value;
eval->transmission *= value;
- eval->subsurface *= value;
- eval->scatter *= value;
+ eval->volume *= value;
/* skipping transparent, this function is used by for eval(), will be zero then */
}
@@ -144,8 +138,7 @@ ccl_device_inline void bsdf_eval_mul3(BsdfEval *eval, float3 value)
eval->diffuse *= value;
eval->glossy *= value;
eval->transmission *= value;
- eval->subsurface *= value;
- eval->scatter *= value;
+ eval->volume *= value;
/* skipping transparent, this function is used by for eval(), will be zero then */
}
@@ -160,7 +153,7 @@ ccl_device_inline float3 bsdf_eval_sum(const BsdfEval *eval)
{
#ifdef __PASSES__
if (eval->use_light_pass) {
- return eval->diffuse + eval->glossy + eval->transmission + eval->subsurface + eval->scatter;
+ return eval->diffuse + eval->glossy + eval->transmission + eval->volume;
}
else
#endif
@@ -174,32 +167,29 @@ ccl_device_inline float3 bsdf_eval_sum(const BsdfEval *eval)
* visible as the first non-transparent hit, while indirectly visible are the
* bounces after that. */
-ccl_device_inline void path_radiance_init(PathRadiance *L, int use_light_pass)
+ccl_device_inline void path_radiance_init(KernelGlobals *kg, PathRadiance *L)
{
/* clear all */
#ifdef __PASSES__
- L->use_light_pass = use_light_pass;
+ L->use_light_pass = kernel_data.film.use_light_pass;
- if (use_light_pass) {
+ if (kernel_data.film.use_light_pass) {
L->indirect = make_float3(0.0f, 0.0f, 0.0f);
L->direct_emission = make_float3(0.0f, 0.0f, 0.0f);
L->color_diffuse = make_float3(0.0f, 0.0f, 0.0f);
L->color_glossy = make_float3(0.0f, 0.0f, 0.0f);
L->color_transmission = make_float3(0.0f, 0.0f, 0.0f);
- L->color_subsurface = make_float3(0.0f, 0.0f, 0.0f);
L->direct_diffuse = make_float3(0.0f, 0.0f, 0.0f);
L->direct_glossy = make_float3(0.0f, 0.0f, 0.0f);
L->direct_transmission = make_float3(0.0f, 0.0f, 0.0f);
- L->direct_subsurface = make_float3(0.0f, 0.0f, 0.0f);
- L->direct_scatter = make_float3(0.0f, 0.0f, 0.0f);
+ L->direct_volume = make_float3(0.0f, 0.0f, 0.0f);
L->indirect_diffuse = make_float3(0.0f, 0.0f, 0.0f);
L->indirect_glossy = make_float3(0.0f, 0.0f, 0.0f);
L->indirect_transmission = make_float3(0.0f, 0.0f, 0.0f);
- L->indirect_subsurface = make_float3(0.0f, 0.0f, 0.0f);
- L->indirect_scatter = make_float3(0.0f, 0.0f, 0.0f);
+ L->indirect_volume = make_float3(0.0f, 0.0f, 0.0f);
L->transparent = 0.0f;
L->emission = make_float3(0.0f, 0.0f, 0.0f);
@@ -211,8 +201,7 @@ ccl_device_inline void path_radiance_init(PathRadiance *L, int use_light_pass)
L->state.diffuse = make_float3(0.0f, 0.0f, 0.0f);
L->state.glossy = make_float3(0.0f, 0.0f, 0.0f);
L->state.transmission = make_float3(0.0f, 0.0f, 0.0f);
- L->state.subsurface = make_float3(0.0f, 0.0f, 0.0f);
- L->state.scatter = make_float3(0.0f, 0.0f, 0.0f);
+ L->state.volume = make_float3(0.0f, 0.0f, 0.0f);
L->state.direct = make_float3(0.0f, 0.0f, 0.0f);
}
else
@@ -264,11 +253,9 @@ ccl_device_inline void path_radiance_bsdf_bounce(KernelGlobals *kg,
L_state->diffuse = bsdf_eval->diffuse * value;
L_state->glossy = bsdf_eval->glossy * value;
L_state->transmission = bsdf_eval->transmission * value;
- L_state->subsurface = bsdf_eval->subsurface * value;
- L_state->scatter = bsdf_eval->scatter * value;
+ L_state->volume = bsdf_eval->volume * value;
- *throughput = L_state->diffuse + L_state->glossy + L_state->transmission +
- L_state->subsurface + L_state->scatter;
+ *throughput = L_state->diffuse + L_state->glossy + L_state->transmission + L_state->volume;
L_state->direct = *throughput;
}
@@ -285,7 +272,37 @@ ccl_device_inline void path_radiance_bsdf_bounce(KernelGlobals *kg,
}
}
-ccl_device_inline void path_radiance_accum_emission(PathRadiance *L,
+#ifdef __CLAMP_SAMPLE__
+ccl_device_forceinline void path_radiance_clamp(KernelGlobals *kg, float3 *L, int bounce)
+{
+ float limit = (bounce > 0) ? kernel_data.integrator.sample_clamp_indirect :
+ kernel_data.integrator.sample_clamp_direct;
+ float sum = reduce_add(fabs(*L));
+ if (sum > limit) {
+ *L *= limit / sum;
+ }
+}
+
+ccl_device_forceinline void path_radiance_clamp_throughput(KernelGlobals *kg,
+ float3 *L,
+ float3 *throughput,
+ int bounce)
+{
+ float limit = (bounce > 0) ? kernel_data.integrator.sample_clamp_indirect :
+ kernel_data.integrator.sample_clamp_direct;
+
+ float sum = reduce_add(fabs(*L));
+ if (sum > limit) {
+ float clamp_factor = limit / sum;
+ *L *= clamp_factor;
+ *throughput *= clamp_factor;
+ }
+}
+
+#endif
+
+ccl_device_inline void path_radiance_accum_emission(KernelGlobals *kg,
+ PathRadiance *L,
ccl_addr_space PathState *state,
float3 throughput,
float3 value)
@@ -296,33 +313,41 @@ ccl_device_inline void path_radiance_accum_emission(PathRadiance *L,
}
#endif
+ float3 contribution = throughput * value;
+#ifdef __CLAMP_SAMPLE__
+ path_radiance_clamp(kg, &contribution, state->bounce - 1);
+#endif
+
#ifdef __PASSES__
if (L->use_light_pass) {
if (state->bounce == 0)
- L->emission += throughput * value;
+ L->emission += contribution;
else if (state->bounce == 1)
- L->direct_emission += throughput * value;
+ L->direct_emission += contribution;
else
- L->indirect += throughput * value;
+ L->indirect += contribution;
}
else
#endif
{
- L->emission += throughput * value;
+ L->emission += contribution;
}
}
-ccl_device_inline void path_radiance_accum_ao(PathRadiance *L,
+ccl_device_inline void path_radiance_accum_ao(KernelGlobals *kg,
+ PathRadiance *L,
ccl_addr_space PathState *state,
float3 throughput,
float3 alpha,
float3 bsdf,
float3 ao)
{
+#ifdef __PASSES__
/* Store AO pass. */
if (L->use_light_pass && state->bounce == 0) {
L->ao += alpha * throughput * ao;
}
+#endif
#ifdef __SHADOW_TRICKS__
/* For shadow catcher, accumulate ratio. */
@@ -337,21 +362,23 @@ ccl_device_inline void path_radiance_accum_ao(PathRadiance *L,
}
#endif
+ float3 contribution = throughput * bsdf * ao;
+
#ifdef __PASSES__
if (L->use_light_pass) {
if (state->bounce == 0) {
/* Directly visible lighting. */
- L->direct_diffuse += throughput * bsdf * ao;
+ L->direct_diffuse += contribution;
}
else {
/* Indirectly visible lighting after BSDF bounce. */
- L->indirect += throughput * bsdf * ao;
+ L->indirect += contribution;
}
}
else
#endif
{
- L->emission += throughput * bsdf * ao;
+ L->emission += contribution;
}
}
@@ -372,7 +399,8 @@ ccl_device_inline void path_radiance_accum_total_ao(PathRadiance *L,
#endif
}
-ccl_device_inline void path_radiance_accum_light(PathRadiance *L,
+ccl_device_inline void path_radiance_accum_light(KernelGlobals *kg,
+ PathRadiance *L,
ccl_addr_space PathState *state,
float3 throughput,
BsdfEval *bsdf_eval,
@@ -392,15 +420,23 @@ ccl_device_inline void path_radiance_accum_light(PathRadiance *L,
}
#endif
+ float3 shaded_throughput = throughput * shadow;
+
#ifdef __PASSES__
if (L->use_light_pass) {
+ /* Compute the clamping based on the total contribution.
+ * The resulting scale is then be applied to all individual components. */
+ float3 full_contribution = shaded_throughput * bsdf_eval_sum(bsdf_eval);
+# ifdef __CLAMP_SAMPLE__
+ path_radiance_clamp_throughput(kg, &full_contribution, &shaded_throughput, state->bounce);
+# endif
+
if (state->bounce == 0) {
/* directly visible lighting */
- L->direct_diffuse += throughput * bsdf_eval->diffuse * shadow;
- L->direct_glossy += throughput * bsdf_eval->glossy * shadow;
- L->direct_transmission += throughput * bsdf_eval->transmission * shadow;
- L->direct_subsurface += throughput * bsdf_eval->subsurface * shadow;
- L->direct_scatter += throughput * bsdf_eval->scatter * shadow;
+ L->direct_diffuse += shaded_throughput * bsdf_eval->diffuse;
+ L->direct_glossy += shaded_throughput * bsdf_eval->glossy;
+ L->direct_transmission += shaded_throughput * bsdf_eval->transmission;
+ L->direct_volume += shaded_throughput * bsdf_eval->volume;
if (is_lamp) {
L->shadow.x += shadow.x * shadow_fac;
@@ -410,13 +446,15 @@ ccl_device_inline void path_radiance_accum_light(PathRadiance *L,
}
else {
/* indirectly visible lighting after BSDF bounce */
- L->indirect += throughput * bsdf_eval_sum(bsdf_eval) * shadow;
+ L->indirect += full_contribution;
}
}
else
#endif
{
- L->emission += throughput * bsdf_eval->diffuse * shadow;
+ float3 contribution = shaded_throughput * bsdf_eval->diffuse;
+ path_radiance_clamp(kg, &contribution, state->bounce);
+ L->emission += contribution;
}
}
@@ -437,7 +475,8 @@ ccl_device_inline void path_radiance_accum_total_light(PathRadiance *L,
#endif
}
-ccl_device_inline void path_radiance_accum_background(PathRadiance *L,
+ccl_device_inline void path_radiance_accum_background(KernelGlobals *kg,
+ PathRadiance *L,
ccl_addr_space PathState *state,
float3 throughput,
float3 value)
@@ -454,23 +493,29 @@ ccl_device_inline void path_radiance_accum_background(PathRadiance *L,
}
#endif
+ float3 contribution = throughput * value;
+#ifdef __CLAMP_SAMPLE__
+ path_radiance_clamp(kg, &contribution, state->bounce - 1);
+#endif
+
#ifdef __PASSES__
if (L->use_light_pass) {
if (state->flag & PATH_RAY_TRANSPARENT_BACKGROUND)
- L->background += throughput * value;
+ L->background += contribution;
else if (state->bounce == 1)
- L->direct_emission += throughput * value;
+ L->direct_emission += contribution;
else
- L->indirect += throughput * value;
+ L->indirect += contribution;
}
else
#endif
{
- L->emission += throughput * value;
+ L->emission += contribution;
}
#ifdef __DENOISING_FEATURES__
- L->denoising_albedo += state->denoising_feature_weight * value;
+ L->denoising_albedo += state->denoising_feature_weight * state->denoising_feature_throughput *
+ value;
#endif /* __DENOISING_FEATURES__ */
}
@@ -503,15 +548,13 @@ ccl_device_inline void path_radiance_sum_indirect(PathRadiance *L)
L->direct_diffuse += L->state.diffuse * L->direct_emission;
L->direct_glossy += L->state.glossy * L->direct_emission;
L->direct_transmission += L->state.transmission * L->direct_emission;
- L->direct_subsurface += L->state.subsurface * L->direct_emission;
- L->direct_scatter += L->state.scatter * L->direct_emission;
+ L->direct_volume += L->state.volume * L->direct_emission;
L->indirect = safe_divide_color(L->indirect, L->state.direct);
L->indirect_diffuse += L->state.diffuse * L->indirect;
L->indirect_glossy += L->state.glossy * L->indirect;
L->indirect_transmission += L->state.transmission * L->indirect;
- L->indirect_subsurface += L->state.subsurface * L->indirect;
- L->indirect_scatter += L->state.scatter * L->indirect;
+ L->indirect_volume += L->state.volume * L->indirect;
}
#endif
}
@@ -523,8 +566,7 @@ ccl_device_inline void path_radiance_reset_indirect(PathRadiance *L)
L->state.diffuse = make_float3(0.0f, 0.0f, 0.0f);
L->state.glossy = make_float3(0.0f, 0.0f, 0.0f);
L->state.transmission = make_float3(0.0f, 0.0f, 0.0f);
- L->state.subsurface = make_float3(0.0f, 0.0f, 0.0f);
- L->state.scatter = make_float3(0.0f, 0.0f, 0.0f);
+ L->state.volume = make_float3(0.0f, 0.0f, 0.0f);
L->direct_emission = make_float3(0.0f, 0.0f, 0.0f);
L->indirect = make_float3(0.0f, 0.0f, 0.0f);
@@ -585,15 +627,13 @@ ccl_device_inline float3 path_radiance_clamp_and_sum(KernelGlobals *kg,
/* Light Passes are used */
#ifdef __PASSES__
float3 L_direct, L_indirect;
- float clamp_direct = kernel_data.integrator.sample_clamp_direct;
- float clamp_indirect = kernel_data.integrator.sample_clamp_indirect;
if (L->use_light_pass) {
path_radiance_sum_indirect(L);
- L_direct = L->direct_diffuse + L->direct_glossy + L->direct_transmission +
- L->direct_subsurface + L->direct_scatter + L->emission;
+ L_direct = L->direct_diffuse + L->direct_glossy + L->direct_transmission + L->direct_volume +
+ L->emission;
L_indirect = L->indirect_diffuse + L->indirect_glossy + L->indirect_transmission +
- L->indirect_subsurface + L->indirect_scatter;
+ L->indirect_volume;
if (!kernel_data.background.transparent)
L_direct += L->background;
@@ -609,55 +649,15 @@ ccl_device_inline float3 path_radiance_clamp_and_sum(KernelGlobals *kg,
L->direct_diffuse = make_float3(0.0f, 0.0f, 0.0f);
L->direct_glossy = make_float3(0.0f, 0.0f, 0.0f);
L->direct_transmission = make_float3(0.0f, 0.0f, 0.0f);
- L->direct_subsurface = make_float3(0.0f, 0.0f, 0.0f);
- L->direct_scatter = make_float3(0.0f, 0.0f, 0.0f);
+ L->direct_volume = make_float3(0.0f, 0.0f, 0.0f);
L->indirect_diffuse = make_float3(0.0f, 0.0f, 0.0f);
L->indirect_glossy = make_float3(0.0f, 0.0f, 0.0f);
L->indirect_transmission = make_float3(0.0f, 0.0f, 0.0f);
- L->indirect_subsurface = make_float3(0.0f, 0.0f, 0.0f);
- L->indirect_scatter = make_float3(0.0f, 0.0f, 0.0f);
+ L->indirect_volume = make_float3(0.0f, 0.0f, 0.0f);
L->emission = make_float3(0.0f, 0.0f, 0.0f);
}
-
- /* Clamp direct and indirect samples */
-# ifdef __CLAMP_SAMPLE__
- else if (sum > clamp_direct || sum > clamp_indirect) {
- float scale;
-
- /* Direct */
- float sum_direct = fabsf(L_direct.x) + fabsf(L_direct.y) + fabsf(L_direct.z);
- if (sum_direct > clamp_direct) {
- scale = clamp_direct / sum_direct;
- L_direct *= scale;
-
- L->direct_diffuse *= scale;
- L->direct_glossy *= scale;
- L->direct_transmission *= scale;
- L->direct_subsurface *= scale;
- L->direct_scatter *= scale;
- L->emission *= scale;
- L->background *= scale;
- }
-
- /* Indirect */
- float sum_indirect = fabsf(L_indirect.x) + fabsf(L_indirect.y) + fabsf(L_indirect.z);
- if (sum_indirect > clamp_indirect) {
- scale = clamp_indirect / sum_indirect;
- L_indirect *= scale;
-
- L->indirect_diffuse *= scale;
- L->indirect_glossy *= scale;
- L->indirect_transmission *= scale;
- L->indirect_subsurface *= scale;
- L->indirect_scatter *= scale;
- }
-
- /* Sum again, after clamping */
- L_sum = L_direct + L_indirect;
- }
-# endif
}
/* No Light Passes */
@@ -696,7 +696,7 @@ ccl_device_inline void path_radiance_split_denoising(KernelGlobals *kg,
kernel_assert(L->use_light_pass);
*clean = L->emission + L->background;
- *noisy = L->direct_scatter + L->indirect_scatter;
+ *noisy = L->direct_volume + L->indirect_volume;
# define ADD_COMPONENT(flag, component) \
if (kernel_data.film.denoising_flags & flag) \
@@ -710,8 +710,6 @@ ccl_device_inline void path_radiance_split_denoising(KernelGlobals *kg,
ADD_COMPONENT(DENOISING_CLEAN_GLOSSY_IND, L->indirect_glossy);
ADD_COMPONENT(DENOISING_CLEAN_TRANSMISSION_DIR, L->direct_transmission);
ADD_COMPONENT(DENOISING_CLEAN_TRANSMISSION_IND, L->indirect_transmission);
- ADD_COMPONENT(DENOISING_CLEAN_SUBSURFACE_DIR, L->direct_subsurface);
- ADD_COMPONENT(DENOISING_CLEAN_SUBSURFACE_IND, L->indirect_subsurface);
# undef ADD_COMPONENT
#else
*noisy = L->emission;
@@ -748,14 +746,12 @@ ccl_device_inline void path_radiance_accum_sample(PathRadiance *L, PathRadiance
safe_float3_add(L->direct_diffuse, L_sample->direct_diffuse);
safe_float3_add(L->direct_glossy, L_sample->direct_glossy);
safe_float3_add(L->direct_transmission, L_sample->direct_transmission);
- safe_float3_add(L->direct_subsurface, L_sample->direct_subsurface);
- safe_float3_add(L->direct_scatter, L_sample->direct_scatter);
+ safe_float3_add(L->direct_volume, L_sample->direct_volume);
safe_float3_add(L->indirect_diffuse, L_sample->indirect_diffuse);
safe_float3_add(L->indirect_glossy, L_sample->indirect_glossy);
safe_float3_add(L->indirect_transmission, L_sample->indirect_transmission);
- safe_float3_add(L->indirect_subsurface, L_sample->indirect_subsurface);
- safe_float3_add(L->indirect_scatter, L_sample->indirect_scatter);
+ safe_float3_add(L->indirect_volume, L_sample->indirect_volume);
safe_float3_add(L->background, L_sample->background);
safe_float3_add(L->ao, L_sample->ao);
diff --git a/intern/cycles/kernel/kernel_adaptive_sampling.h b/intern/cycles/kernel/kernel_adaptive_sampling.h
new file mode 100644
index 00000000000..98b7bf7e7dc
--- /dev/null
+++ b/intern/cycles/kernel/kernel_adaptive_sampling.h
@@ -0,0 +1,239 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __KERNEL_ADAPTIVE_SAMPLING_H__
+#define __KERNEL_ADAPTIVE_SAMPLING_H__
+
+CCL_NAMESPACE_BEGIN
+
+/* Determines whether to continue sampling a given pixel or if it has sufficiently converged. */
+
+ccl_device void kernel_do_adaptive_stopping(KernelGlobals *kg,
+ ccl_global float *buffer,
+ int sample)
+{
+ /* TODO Stefan: Is this better in linear, sRGB or something else? */
+ float4 I = *((ccl_global float4 *)buffer);
+ float4 A = *(ccl_global float4 *)(buffer + kernel_data.film.pass_adaptive_aux_buffer);
+ /* The per pixel error as seen in section 2.1 of
+ * "A hierarchical automatic stopping condition for Monte Carlo global illumination"
+ * A small epsilon is added to the divisor to prevent division by zero. */
+ float error = (fabsf(I.x - A.x) + fabsf(I.y - A.y) + fabsf(I.z - A.z)) /
+ (sample * 0.0001f + sqrtf(I.x + I.y + I.z));
+ if (error < kernel_data.integrator.adaptive_threshold * (float)sample) {
+ /* Set the fourth component to non-zero value to indicate that this pixel has converged. */
+ buffer[kernel_data.film.pass_adaptive_aux_buffer + 3] += 1.0f;
+ }
+}
+
+/* Adjust the values of an adaptively sampled pixel. */
+
+ccl_device void kernel_adaptive_post_adjust(KernelGlobals *kg,
+ ccl_global float *buffer,
+ float sample_multiplier)
+{
+ *(ccl_global float4 *)(buffer) *= sample_multiplier;
+
+ /* Scale the aux pass too, this is necessary for progressive rendering to work properly. */
+ kernel_assert(kernel_data.film.pass_adaptive_aux_buffer);
+ *(ccl_global float4 *)(buffer + kernel_data.film.pass_adaptive_aux_buffer) *= sample_multiplier;
+
+#ifdef __PASSES__
+ int flag = kernel_data.film.pass_flag;
+
+ if (flag & PASSMASK(NORMAL))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_normal) *= sample_multiplier;
+
+ if (flag & PASSMASK(UV))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_uv) *= sample_multiplier;
+
+ if (flag & PASSMASK(MOTION)) {
+ *(ccl_global float4 *)(buffer + kernel_data.film.pass_motion) *= sample_multiplier;
+ *(ccl_global float *)(buffer + kernel_data.film.pass_motion_weight) *= sample_multiplier;
+ }
+
+ if (kernel_data.film.use_light_pass) {
+ int light_flag = kernel_data.film.light_pass_flag;
+
+ if (light_flag & PASSMASK(MIST))
+ *(ccl_global float *)(buffer + kernel_data.film.pass_mist) *= sample_multiplier;
+
+ /* Shadow pass omitted on purpose. It has its own scale parameter. */
+
+ if (light_flag & PASSMASK(DIFFUSE_INDIRECT))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_diffuse_indirect) *= sample_multiplier;
+ if (light_flag & PASSMASK(GLOSSY_INDIRECT))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_glossy_indirect) *= sample_multiplier;
+ if (light_flag & PASSMASK(TRANSMISSION_INDIRECT))
+ *(ccl_global float3 *)(buffer +
+ kernel_data.film.pass_transmission_indirect) *= sample_multiplier;
+ if (light_flag & PASSMASK(VOLUME_INDIRECT))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_volume_indirect) *= sample_multiplier;
+ if (light_flag & PASSMASK(DIFFUSE_DIRECT))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_diffuse_direct) *= sample_multiplier;
+ if (light_flag & PASSMASK(GLOSSY_DIRECT))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_glossy_direct) *= sample_multiplier;
+ if (light_flag & PASSMASK(TRANSMISSION_DIRECT))
+ *(ccl_global float3 *)(buffer +
+ kernel_data.film.pass_transmission_direct) *= sample_multiplier;
+ if (light_flag & PASSMASK(VOLUME_DIRECT))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_volume_direct) *= sample_multiplier;
+
+ if (light_flag & PASSMASK(EMISSION))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_emission) *= sample_multiplier;
+ if (light_flag & PASSMASK(BACKGROUND))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_background) *= sample_multiplier;
+ if (light_flag & PASSMASK(AO))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_ao) *= sample_multiplier;
+
+ if (light_flag & PASSMASK(DIFFUSE_COLOR))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_diffuse_color) *= sample_multiplier;
+ if (light_flag & PASSMASK(GLOSSY_COLOR))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_glossy_color) *= sample_multiplier;
+ if (light_flag & PASSMASK(TRANSMISSION_COLOR))
+ *(ccl_global float3 *)(buffer +
+ kernel_data.film.pass_transmission_color) *= sample_multiplier;
+ }
+#endif
+
+#ifdef __DENOISING_FEATURES__
+
+# define scale_float3_variance(buffer, offset, scale) \
+ *(buffer + offset) *= scale; \
+ *(buffer + offset + 1) *= scale; \
+ *(buffer + offset + 2) *= scale; \
+ *(buffer + offset + 3) *= scale * scale; \
+ *(buffer + offset + 4) *= scale * scale; \
+ *(buffer + offset + 5) *= scale * scale;
+
+# define scale_shadow_variance(buffer, offset, scale) \
+ *(buffer + offset) *= scale; \
+ *(buffer + offset + 1) *= scale; \
+ *(buffer + offset + 2) *= scale * scale;
+
+ if (kernel_data.film.pass_denoising_data) {
+ scale_shadow_variance(
+ buffer, kernel_data.film.pass_denoising_data + DENOISING_PASS_SHADOW_A, sample_multiplier);
+ scale_shadow_variance(
+ buffer, kernel_data.film.pass_denoising_data + DENOISING_PASS_SHADOW_B, sample_multiplier);
+ if (kernel_data.film.pass_denoising_clean) {
+ scale_float3_variance(
+ buffer, kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR, sample_multiplier);
+ *(buffer + kernel_data.film.pass_denoising_clean) *= sample_multiplier;
+ *(buffer + kernel_data.film.pass_denoising_clean + 1) *= sample_multiplier;
+ *(buffer + kernel_data.film.pass_denoising_clean + 2) *= sample_multiplier;
+ }
+ else {
+ scale_float3_variance(
+ buffer, kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR, sample_multiplier);
+ }
+ scale_float3_variance(
+ buffer, kernel_data.film.pass_denoising_data + DENOISING_PASS_NORMAL, sample_multiplier);
+ scale_float3_variance(
+ buffer, kernel_data.film.pass_denoising_data + DENOISING_PASS_ALBEDO, sample_multiplier);
+ *(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_DEPTH) *= sample_multiplier;
+ *(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_DEPTH +
+ 1) *= sample_multiplier * sample_multiplier;
+ }
+#endif /* __DENOISING_FEATURES__ */
+
+ /* Cryptomatte. */
+ if (kernel_data.film.cryptomatte_passes) {
+ int num_slots = 0;
+ num_slots += (kernel_data.film.cryptomatte_passes & CRYPT_OBJECT) ? 1 : 0;
+ num_slots += (kernel_data.film.cryptomatte_passes & CRYPT_MATERIAL) ? 1 : 0;
+ num_slots += (kernel_data.film.cryptomatte_passes & CRYPT_ASSET) ? 1 : 0;
+ num_slots = num_slots * 2 * kernel_data.film.cryptomatte_depth;
+ ccl_global float2 *id_buffer = (ccl_global float2 *)(buffer +
+ kernel_data.film.pass_cryptomatte);
+ for (int slot = 0; slot < num_slots; slot++) {
+ id_buffer[slot].y *= sample_multiplier;
+ }
+ }
+
+ /* AOVs. */
+ for (int i = 0; i < kernel_data.film.pass_aov_value_num; i++) {
+ *(buffer + kernel_data.film.pass_aov_value + i) *= sample_multiplier;
+ }
+ for (int i = 0; i < kernel_data.film.pass_aov_color_num; i++) {
+ *((ccl_global float4 *)(buffer + kernel_data.film.pass_aov_color) + i) *= sample_multiplier;
+ }
+}
+
+/* This is a simple box filter in two passes.
+ * When a pixel demands more adaptive samples, let its neighboring pixels draw more samples too. */
+
+ccl_device bool kernel_do_adaptive_filter_x(KernelGlobals *kg, int y, ccl_global WorkTile *tile)
+{
+ bool any = false;
+ bool prev = false;
+ for (int x = tile->x; x < tile->x + tile->w; ++x) {
+ int index = tile->offset + x + y * tile->stride;
+ ccl_global float *buffer = tile->buffer + index * kernel_data.film.pass_stride;
+ ccl_global float4 *aux = (ccl_global float4 *)(buffer +
+ kernel_data.film.pass_adaptive_aux_buffer);
+ if ((*aux).w == 0.0f) {
+ any = true;
+ if (x > tile->x && !prev) {
+ index = index - 1;
+ buffer = tile->buffer + index * kernel_data.film.pass_stride;
+ aux = (ccl_global float4 *)(buffer + kernel_data.film.pass_adaptive_aux_buffer);
+ (*aux).w = 0.0f;
+ }
+ prev = true;
+ }
+ else {
+ if (prev) {
+ (*aux).w = 0.0f;
+ }
+ prev = false;
+ }
+ }
+ return any;
+}
+
+ccl_device bool kernel_do_adaptive_filter_y(KernelGlobals *kg, int x, ccl_global WorkTile *tile)
+{
+ bool prev = false;
+ bool any = false;
+ for (int y = tile->y; y < tile->y + tile->h; ++y) {
+ int index = tile->offset + x + y * tile->stride;
+ ccl_global float *buffer = tile->buffer + index * kernel_data.film.pass_stride;
+ ccl_global float4 *aux = (ccl_global float4 *)(buffer +
+ kernel_data.film.pass_adaptive_aux_buffer);
+ if ((*aux).w == 0.0f) {
+ any = true;
+ if (y > tile->y && !prev) {
+ index = index - tile->stride;
+ buffer = tile->buffer + index * kernel_data.film.pass_stride;
+ aux = (ccl_global float4 *)(buffer + kernel_data.film.pass_adaptive_aux_buffer);
+ (*aux).w = 0.0f;
+ }
+ prev = true;
+ }
+ else {
+ if (prev) {
+ (*aux).w = 0.0f;
+ }
+ prev = false;
+ }
+ }
+ return any;
+}
+
+CCL_NAMESPACE_END
+
+#endif /* __KERNEL_ADAPTIVE_SAMPLING_H__ */
diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h
index 64840c00f16..4bae9e5e1b4 100644
--- a/intern/cycles/kernel/kernel_bake.h
+++ b/intern/cycles/kernel/kernel_bake.h
@@ -18,19 +18,33 @@ CCL_NAMESPACE_BEGIN
#ifdef __BAKING__
-ccl_device_inline void compute_light_pass(
+ccl_device_noinline void compute_light_pass(
KernelGlobals *kg, ShaderData *sd, PathRadiance *L, uint rng_hash, int pass_filter, int sample)
{
kernel_assert(kernel_data.film.use_light_pass);
- PathRadiance L_sample;
- PathState state;
- Ray ray;
float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
- /* emission and indirect shader data memory used by various functions */
- ShaderData emission_sd, indirect_sd;
+ /* Emission and indirect shader data memory used by various functions. */
+ ShaderDataTinyStorage emission_sd_storage;
+ ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
+ ShaderData indirect_sd;
+
+ /* Init radiance. */
+ path_radiance_init(kg, L);
+
+ /* Init path state. */
+ PathState state;
+ path_state_init(kg, emission_sd, &state, rng_hash, sample, NULL);
+
+ /* Evaluate surface shader. */
+ shader_eval_surface(kg, sd, &state, NULL, state.flag);
+ /* TODO, disable more closures we don't need besides transparent */
+ shader_bsdf_disable_transparency(kg, sd);
+
+ /* Init ray. */
+ Ray ray;
ray.P = sd->P + sd->Ng;
ray.D = -sd->Ng;
ray.t = FLT_MAX;
@@ -38,18 +52,6 @@ ccl_device_inline void compute_light_pass(
ray.time = 0.5f;
# endif
- /* init radiance */
- path_radiance_init(&L_sample, kernel_data.film.use_light_pass);
-
- /* init path state */
- path_state_init(kg, &emission_sd, &state, rng_hash, sample, NULL);
-
- /* evaluate surface shader */
- shader_eval_surface(kg, sd, &state, state.flag);
-
- /* TODO, disable more closures we don't need besides transparent */
- shader_bsdf_disable_transparency(kg, sd);
-
# ifdef __BRANCHED_PATH__
if (!kernel_data.integrator.branched) {
/* regular path tracer */
@@ -57,8 +59,7 @@ ccl_device_inline void compute_light_pass(
/* sample ambient occlusion */
if (pass_filter & BAKE_FILTER_AO) {
- kernel_path_ao(
- kg, sd, &emission_sd, &L_sample, &state, throughput, shader_bsdf_alpha(kg, sd));
+ kernel_path_ao(kg, sd, emission_sd, L, &state, throughput, shader_bsdf_alpha(kg, sd));
}
/* sample emission */
@@ -66,26 +67,27 @@ ccl_device_inline void compute_light_pass(
bool is_volume_boundary = (state.volume_bounce > 0) || (state.volume_bounds_bounce > 0);
float3 emission = indirect_primitive_emission(
kg, sd, 0.0f, sd->P_pick, sd->N_pick, state.flag, state.ray_pdf, is_volume_boundary);
- path_radiance_accum_emission(&L_sample, &state, throughput, emission);
+ path_radiance_accum_emission(kg, L, &state, throughput, emission);
}
bool is_sss_sample = false;
# ifdef __SUBSURFACE__
/* sample subsurface scattering */
- if ((pass_filter & BAKE_FILTER_SUBSURFACE) && (sd->flag & SD_BSSRDF)) {
- /* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */
+ if ((pass_filter & BAKE_FILTER_DIFFUSE) && (sd->flag & SD_BSSRDF)) {
+ /* When mixing BSSRDF and BSDF closures we should skip BSDF lighting
+ * if scattering was successful. */
SubsurfaceIndirectRays ss_indirect;
kernel_path_subsurface_init_indirect(&ss_indirect);
if (kernel_path_subsurface_scatter(
- kg, sd, &emission_sd, &L_sample, &state, &ray, &throughput, &ss_indirect)) {
+ kg, sd, emission_sd, L, &state, &ray, &throughput, &ss_indirect)) {
while (ss_indirect.num_rays) {
- kernel_path_subsurface_setup_indirect(
- kg, &ss_indirect, &state, &ray, &L_sample, &throughput);
+ kernel_path_subsurface_setup_indirect(kg, &ss_indirect, &state, &ray, L, &throughput);
+ kernel_path_indirect(kg, &indirect_sd, emission_sd, &ray, throughput, &state, L);
indirect_sd.P_pick = sd->P_pick;
indirect_sd.N_pick = sd->N_pick;
kernel_path_indirect(
- kg, &indirect_sd, &emission_sd, &ray, throughput, &state, &L_sample);
+ kg, &indirect_sd, emission_sd, &ray, throughput, &state, L);
}
is_sss_sample = true;
}
@@ -94,20 +96,20 @@ ccl_device_inline void compute_light_pass(
/* sample light and BSDF */
if (!is_sss_sample && (pass_filter & (BAKE_FILTER_DIRECT | BAKE_FILTER_INDIRECT))) {
- kernel_path_surface_connect_light(kg, sd, &emission_sd, throughput, &state, &L_sample);
+ kernel_path_surface_connect_light(kg, sd, emission_sd, throughput, &state, L);
- if (kernel_path_surface_bounce(kg, sd, &throughput, &state, &L_sample.state, &ray)) {
+ if (kernel_path_surface_bounce(kg, sd, &throughput, &state, &L->state, &ray)) {
# ifdef __LAMP_MIS__
state.ray_t = 0.0f;
# endif
/* compute indirect light */
indirect_sd.P_pick = sd->P_pick;
indirect_sd.N_pick = sd->N_pick;
- kernel_path_indirect(kg, &indirect_sd, &emission_sd, &ray, throughput, &state, &L_sample);
+ kernel_path_indirect(kg, &indirect_sd, emission_sd, &ray, throughput, &state, L);
/* sum and reset indirect light pass variables for the next samples */
- path_radiance_sum_indirect(&L_sample);
- path_radiance_reset_indirect(&L_sample);
+ path_radiance_sum_indirect(L);
+ path_radiance_reset_indirect(L);
}
}
# ifdef __BRANCHED_PATH__
@@ -117,7 +119,7 @@ ccl_device_inline void compute_light_pass(
/* sample ambient occlusion */
if (pass_filter & BAKE_FILTER_AO) {
- kernel_branched_path_ao(kg, sd, &emission_sd, &L_sample, &state, throughput);
+ kernel_branched_path_ao(kg, sd, emission_sd, L, &state, throughput);
}
/* sample emission */
@@ -125,15 +127,16 @@ ccl_device_inline void compute_light_pass(
bool is_volume_boundary = (state.volume_bounce > 0) || (state.volume_bounds_bounce > 0);
float3 emission = indirect_primitive_emission(
kg, sd, 0.0f, sd->P_pick, sd->N_pick, state.flag, state.ray_pdf, is_volume_boundary);
- path_radiance_accum_emission(&L_sample, &state, throughput, emission);
+ path_radiance_accum_emission(kg, L, &state, throughput, emission);
}
# ifdef __SUBSURFACE__
/* sample subsurface scattering */
- if ((pass_filter & BAKE_FILTER_SUBSURFACE) && (sd->flag & SD_BSSRDF)) {
- /* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */
+ if ((pass_filter & BAKE_FILTER_DIFFUSE) && (sd->flag & SD_BSSRDF)) {
+ /* When mixing BSSRDF and BSDF closures we should skip BSDF lighting
+ * if scattering was successful. */
kernel_branched_path_subsurface_scatter(
- kg, sd, &indirect_sd, &emission_sd, &L_sample, &state, &ray, throughput);
+ kg, sd, &indirect_sd, emission_sd, L, &state, &ray, throughput);
}
# endif
@@ -144,19 +147,16 @@ ccl_device_inline void compute_light_pass(
if (kernel_data.integrator.use_direct_light) {
int all = kernel_data.integrator.sample_all_lights_direct;
kernel_branched_path_surface_connect_light(
- kg, sd, &emission_sd, &state, throughput, 1.0f, &L_sample, all);
+ kg, sd, emission_sd, &state, throughput, 1.0f, L, all);
}
# endif
/* indirect light */
kernel_branched_path_surface_indirect_light(
- kg, sd, &indirect_sd, &emission_sd, throughput, 1.0f, &state, &L_sample);
+ kg, sd, &indirect_sd, emission_sd, throughput, 1.0f, &state, L);
}
}
# endif
-
- /* accumulate into master L */
- path_radiance_accum_sample(L, &L_sample);
}
/* this helps with AA but it's not the real solution as it does not AA the geometry
@@ -184,10 +184,6 @@ ccl_device_inline float3 kernel_bake_shader_bsdf(KernelGlobals *kg,
return shader_bsdf_glossy(kg, sd);
case SHADER_EVAL_TRANSMISSION:
return shader_bsdf_transmission(kg, sd);
-# ifdef __SUBSURFACE__
- case SHADER_EVAL_SUBSURFACE:
- return shader_bsdf_subsurface(kg, sd);
-# endif
default:
kernel_assert(!"Unknown bake type passed to BSDF evaluate");
return make_float3(0.0f, 0.0f, 0.0f);
@@ -215,12 +211,12 @@ ccl_device float3 kernel_bake_evaluate_direct_indirect(KernelGlobals *kg,
}
else {
/* surface color of the pass only */
- shader_eval_surface(kg, sd, state, 0);
+ shader_eval_surface(kg, sd, state, NULL, 0);
return kernel_bake_shader_bsdf(kg, sd, type);
}
}
else {
- shader_eval_surface(kg, sd, state, 0);
+ shader_eval_surface(kg, sd, state, NULL, 0);
color = kernel_bake_shader_bsdf(kg, sd, type);
}
@@ -235,41 +231,28 @@ ccl_device float3 kernel_bake_evaluate_direct_indirect(KernelGlobals *kg,
return out;
}
-ccl_device void kernel_bake_evaluate(KernelGlobals *kg,
- ccl_global uint4 *input,
- ccl_global float4 *output,
- ShaderEvalType type,
- int pass_filter,
- int i,
- int offset,
- int sample)
+ccl_device void kernel_bake_evaluate(
+ KernelGlobals *kg, ccl_global float *buffer, int sample, int x, int y, int offset, int stride)
{
- ShaderData sd;
- PathState state = {0};
- uint4 in = input[i * 2];
- uint4 diff = input[i * 2 + 1];
-
- float3 out = make_float3(0.0f, 0.0f, 0.0f);
+ /* Setup render buffers. */
+ const int index = offset + x + y * stride;
+ const int pass_stride = kernel_data.film.pass_stride;
+ buffer += index * pass_stride;
- int object = in.x;
- int prim = in.y;
+ ccl_global float *primitive = buffer + kernel_data.film.pass_bake_primitive;
+ ccl_global float *differential = buffer + kernel_data.film.pass_bake_differential;
+ ccl_global float *output = buffer + kernel_data.film.pass_combined;
+ int prim = __float_as_uint(primitive[1]);
if (prim == -1)
return;
- float u = __uint_as_float(in.z);
- float v = __uint_as_float(in.w);
-
- float dudx = __uint_as_float(diff.x);
- float dudy = __uint_as_float(diff.y);
- float dvdx = __uint_as_float(diff.z);
- float dvdy = __uint_as_float(diff.w);
+ prim += kernel_data.bake.tri_offset;
+ /* Random number generator. */
+ uint rng_hash = hash_uint2(x, y) ^ kernel_data.integrator.seed;
int num_samples = kernel_data.integrator.aa_samples;
- /* random number generator */
- uint rng_hash = cmj_hash(offset + i, kernel_data.integrator.seed);
-
float filter_x, filter_y;
if (sample == 0) {
filter_x = filter_y = 0.5f;
@@ -278,23 +261,29 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg,
path_rng_2D(kg, rng_hash, sample, num_samples, PRNG_FILTER_U, &filter_x, &filter_y);
}
- /* subpixel u/v offset */
+ /* Barycentric UV with subpixel offset. */
+ float u = primitive[2];
+ float v = primitive[3];
+
+ float dudx = differential[0];
+ float dudy = differential[1];
+ float dvdx = differential[2];
+ float dvdy = differential[3];
+
if (sample > 0) {
u = bake_clamp_mirror_repeat(u + dudx * (filter_x - 0.5f) + dudy * (filter_y - 0.5f), 1.0f);
v = bake_clamp_mirror_repeat(v + dvdx * (filter_x - 0.5f) + dvdy * (filter_y - 0.5f),
1.0f - u);
}
- /* triangle */
+ /* Shader data setup. */
+ int object = kernel_data.bake.object_index;
int shader;
float3 P, Ng;
triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader);
- /* light passes */
- PathRadiance L;
- path_radiance_init(&L, kernel_data.film.use_light_pass);
-
+ ShaderData sd;
shader_setup_from_sample(
kg,
&sd,
@@ -312,7 +301,7 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg,
LAMP_NONE);
sd.I = sd.N;
- /* update differentials */
+ /* Setup differentials. */
sd.dP.dx = sd.dPdu * dudx + sd.dPdv * dvdx;
sd.dP.dy = sd.dPdu * dudy + sd.dPdv * dvdy;
sd.du.dx = dudx;
@@ -320,17 +309,24 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg,
sd.dv.dx = dvdx;
sd.dv.dy = dvdy;
- /* set RNG state for shaders that use sampling */
+ /* Set RNG state for shaders that use sampling. */
+ PathState state = {0};
state.rng_hash = rng_hash;
state.rng_offset = 0;
state.sample = sample;
state.num_samples = num_samples;
state.min_ray_pdf = FLT_MAX;
- /* light passes if we need more than color */
- if (pass_filter & ~BAKE_FILTER_COLOR)
+ /* Light passes if we need more than color. */
+ PathRadiance L;
+ int pass_filter = kernel_data.bake.pass_filter;
+
+ if (kernel_data.bake.pass_filter & ~BAKE_FILTER_COLOR)
compute_light_pass(kg, &sd, &L, rng_hash, pass_filter, sample);
+ float3 out = make_float3(0.0f, 0.0f, 0.0f);
+
+ ShaderEvalType type = (ShaderEvalType)kernel_data.bake.type;
switch (type) {
/* data passes */
case SHADER_EVAL_NORMAL:
@@ -338,7 +334,7 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg,
case SHADER_EVAL_EMISSION: {
if (type != SHADER_EVAL_NORMAL || (sd.flag & SD_HAS_BUMP)) {
int path_flag = (type == SHADER_EVAL_EMISSION) ? PATH_RAY_EMISSION : 0;
- shader_eval_surface(kg, &sd, &state, path_flag);
+ shader_eval_surface(kg, &sd, &state, NULL, path_flag);
}
if (type == SHADER_EVAL_NORMAL) {
@@ -391,11 +387,6 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg,
if ((pass_filter & BAKE_FILTER_TRANSMISSION_INDIRECT) == BAKE_FILTER_TRANSMISSION_INDIRECT)
out += L.indirect_transmission;
- if ((pass_filter & BAKE_FILTER_SUBSURFACE_DIRECT) == BAKE_FILTER_SUBSURFACE_DIRECT)
- out += L.direct_subsurface;
- if ((pass_filter & BAKE_FILTER_SUBSURFACE_INDIRECT) == BAKE_FILTER_SUBSURFACE_INDIRECT)
- out += L.indirect_subsurface;
-
if ((pass_filter & BAKE_FILTER_EMISSION) != 0)
out += L.emission;
@@ -420,13 +411,6 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg,
kg, &sd, &state, L.direct_transmission, L.indirect_transmission, type, pass_filter);
break;
}
- case SHADER_EVAL_SUBSURFACE: {
-# ifdef __SUBSURFACE__
- out = kernel_bake_evaluate_direct_indirect(
- kg, &sd, &state, L.direct_subsurface, L.indirect_subsurface, type, pass_filter);
-# endif
- break;
- }
# endif
/* extra */
@@ -451,7 +435,7 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg,
/* evaluate */
int path_flag = 0; /* we can't know which type of BSDF this is for */
- shader_eval_surface(kg, &sd, &state, path_flag | PATH_RAY_EMISSION);
+ shader_eval_surface(kg, &sd, &state, NULL, path_flag | PATH_RAY_EMISSION);
out = shader_background_eval(&sd);
break;
}
@@ -463,10 +447,8 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg,
}
/* write output */
- const float output_fac = 1.0f / num_samples;
- const float4 scaled_result = make_float4(out.x, out.y, out.z, 1.0f) * output_fac;
-
- output[i] = (sample == 0) ? scaled_result : output[i] + scaled_result;
+ const float4 result = make_float4(out.x, out.y, out.z, 1.0f);
+ kernel_write_pass_float4(output, result);
}
#endif /* __BAKING__ */
@@ -530,7 +512,7 @@ ccl_device void kernel_background_evaluate(KernelGlobals *kg,
/* evaluate */
int path_flag = 0; /* we can't know which type of BSDF this is for */
- shader_eval_surface(kg, &sd, &state, path_flag | PATH_RAY_EMISSION);
+ shader_eval_surface(kg, &sd, &state, NULL, path_flag | PATH_RAY_EMISSION);
float3 color = shader_background_eval(&sd);
/* write output */
diff --git a/intern/cycles/kernel/kernel_camera.h b/intern/cycles/kernel/kernel_camera.h
index 1085930c33a..efe46d5b0dd 100644
--- a/intern/cycles/kernel/kernel_camera.h
+++ b/intern/cycles/kernel/kernel_camera.h
@@ -128,7 +128,7 @@ ccl_device void camera_sample_perspective(KernelGlobals *kg,
#ifdef __RAY_DIFFERENTIALS__
/* Ray differentials, computed from scratch using the raster coordinates
* because we don't want to be affected by depth of field. We compute
- * ray origin and direction for the center and two neighbouring pixels
+ * ray origin and direction for the center and two neighboring pixels
* and simply take their differences. */
float3 Pnostereo = transform_point(&cameratoworld, make_float3(0.0f, 0.0f, 0.0f));
@@ -237,7 +237,9 @@ ccl_device void camera_sample_orthographic(KernelGlobals *kg,
/* Panorama Camera */
ccl_device_inline void camera_sample_panorama(ccl_constant KernelCamera *cam,
+#ifdef __CAMERA_MOTION__
const ccl_global DecomposedTransform *cam_motion,
+#endif
float raster_x,
float raster_y,
float lens_u,
@@ -303,7 +305,7 @@ ccl_device_inline void camera_sample_panorama(ccl_constant KernelCamera *cam,
#ifdef __RAY_DIFFERENTIALS__
/* Ray differentials, computed from scratch using the raster coordinates
* because we don't want to be affected by depth of field. We compute
- * ray origin and direction for the center and two neighbouring pixels
+ * ray origin and direction for the center and two neighboring pixels
* and simply take their differences. */
float3 Pcenter = Pcamera;
float3 Dcenter = panorama_to_direction(cam, Pcenter.x, Pcenter.y);
@@ -377,9 +379,9 @@ ccl_device_inline void camera_sample(KernelGlobals *kg,
const int shutter_table_offset = kernel_data.cam.shutter_table_offset;
ray->time = lookup_table_read(kg, time, shutter_table_offset, SHUTTER_TABLE_SIZE);
/* TODO(sergey): Currently single rolling shutter effect type only
- * where scanlines are acquired from top to bottom and whole scanline
+ * where scan-lines are acquired from top to bottom and whole scan-line
* is acquired at once (no delay in acquisition happens between pixels
- * of single scanline).
+ * of single scan-line).
*
* Might want to support more models in the future.
*/
@@ -413,8 +415,12 @@ ccl_device_inline void camera_sample(KernelGlobals *kg,
camera_sample_orthographic(kg, raster_x, raster_y, lens_u, lens_v, ray);
}
else {
+#ifdef __CAMERA_MOTION__
const ccl_global DecomposedTransform *cam_motion = kernel_tex_array(__camera_motion);
camera_sample_panorama(&kernel_data.cam, cam_motion, raster_x, raster_y, lens_u, lens_v, ray);
+#else
+ camera_sample_panorama(&kernel_data.cam, raster_x, raster_y, lens_u, lens_v, ray);
+#endif
}
}
@@ -435,8 +441,22 @@ ccl_device_inline float camera_distance(KernelGlobals *kg, float3 P)
float3 camD = make_float3(cameratoworld.x.z, cameratoworld.y.z, cameratoworld.z.z);
return fabsf(dot((P - camP), camD));
}
- else
+ else {
+ return len(P - camP);
+ }
+}
+
+ccl_device_inline float camera_z_depth(KernelGlobals *kg, float3 P)
+{
+ if (kernel_data.cam.type != CAMERA_PANORAMA) {
+ Transform worldtocamera = kernel_data.cam.worldtocamera;
+ return transform_point(&worldtocamera, P).z;
+ }
+ else {
+ Transform cameratoworld = kernel_data.cam.cameratoworld;
+ float3 camP = make_float3(cameratoworld.x.w, cameratoworld.y.w, cameratoworld.z.w);
return len(P - camP);
+ }
}
ccl_device_inline float3 camera_direction_from_point(KernelGlobals *kg, float3 P)
diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h
index e8fedca4489..88f6a264a5a 100644
--- a/intern/cycles/kernel/kernel_compat_cpu.h
+++ b/intern/cycles/kernel/kernel_compat_cpu.h
@@ -35,11 +35,11 @@
# define __NODES_FEATURES__ NODE_FEATURE_ALL
#endif
+#include "util/util_half.h"
#include "util/util_math.h"
#include "util/util_simd.h"
-#include "util/util_half.h"
-#include "util/util_types.h"
#include "util/util_texture.h"
+#include "util/util_types.h"
#define ccl_addr_space
@@ -79,7 +79,7 @@ template<typename T> struct texture {
}
#if defined(__KERNEL_AVX__) || defined(__KERNEL_AVX2__)
/* Reads 256 bytes but indexes in blocks of 128 bytes to maintain
- * compatibility with existing indicies and data structures.
+ * compatibility with existing indices and data structures.
*/
ccl_always_inline avxf fetch_avxf(const int index)
{
diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h
index 469b81d120b..4094e173da9 100644
--- a/intern/cycles/kernel/kernel_compat_cuda.h
+++ b/intern/cycles/kernel/kernel_compat_cuda.h
@@ -37,8 +37,11 @@ typedef unsigned long long uint64_t;
typedef unsigned short half;
typedef unsigned long long CUtexObject;
-#define FLT_MIN 1.175494350822287507969e-38f
-#define FLT_MAX 340282346638528859811704183484516925440.0f
+#ifdef CYCLES_CUBIN_CC
+# define FLT_MIN 1.175494350822287507969e-38f
+# define FLT_MAX 340282346638528859811704183484516925440.0f
+# define FLT_EPSILON 1.192092896e-07F
+#endif
__device__ half __float2half(const float f)
{
@@ -58,6 +61,7 @@ __device__ half __float2half(const float f)
# define ccl_device_forceinline __device__ __forceinline__
#endif
#define ccl_device_noinline __device__ __noinline__
+#define ccl_device_noinline_cpu ccl_device
#define ccl_global
#define ccl_static_constant __constant__
#define ccl_constant const
@@ -67,11 +71,13 @@ __device__ half __float2half(const float f)
#define ccl_may_alias
#define ccl_addr_space
#define ccl_restrict __restrict__
+#define ccl_loop_no_unroll
/* TODO(sergey): In theory we might use references with CUDA, however
* performance impact yet to be investigated.
*/
#define ccl_ref
#define ccl_align(n) __align__(n)
+#define ccl_optional_struct_init
#define ATTR_FALLTHROUGH
diff --git a/intern/cycles/kernel/kernel_compat_opencl.h b/intern/cycles/kernel/kernel_compat_opencl.h
index e040ea88d7c..ba7ab43a47a 100644
--- a/intern/cycles/kernel/kernel_compat_opencl.h
+++ b/intern/cycles/kernel/kernel_compat_opencl.h
@@ -35,6 +35,7 @@
#define ccl_device_inline ccl_device
#define ccl_device_forceinline ccl_device
#define ccl_device_noinline ccl_device ccl_noinline
+#define ccl_device_noinline_cpu ccl_device
#define ccl_may_alias
#define ccl_static_constant static __constant
#define ccl_constant __constant
@@ -45,6 +46,13 @@
#define ccl_restrict restrict
#define ccl_ref
#define ccl_align(n) __attribute__((aligned(n)))
+#define ccl_optional_struct_init
+
+#if __OPENCL_VERSION__ >= 200
+# define ccl_loop_no_unroll __attribute__((opencl_unroll_hint(1)))
+#else
+# define ccl_loop_no_unroll
+#endif
#ifdef __SPLIT_KERNEL__
# define ccl_addr_space __global
@@ -124,6 +132,8 @@
#define fminf(x, y) fmin(((float)(x)), ((float)(y)))
#define fmodf(x, y) fmod((float)(x), (float)(y))
#define sinhf(x) sinh(((float)(x)))
+#define coshf(x) cosh(((float)(x)))
+#define tanhf(x) tanh(((float)(x)))
/* Use native functions with possibly lower precision for performance,
* no issues found so far. */
diff --git a/intern/cycles/kernel/kernel_compat_optix.h b/intern/cycles/kernel/kernel_compat_optix.h
new file mode 100644
index 00000000000..970f5cf864c
--- /dev/null
+++ b/intern/cycles/kernel/kernel_compat_optix.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright 2019, NVIDIA Corporation.
+ * Copyright 2019, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __KERNEL_COMPAT_OPTIX_H__
+#define __KERNEL_COMPAT_OPTIX_H__
+
+#define OPTIX_DONT_INCLUDE_CUDA
+#include <optix.h>
+
+#define __KERNEL_GPU__
+#define __KERNEL_CUDA__ // OptiX kernels are implicitly CUDA kernels too
+#define __KERNEL_OPTIX__
+#define CCL_NAMESPACE_BEGIN
+#define CCL_NAMESPACE_END
+
+#ifndef ATTR_FALLTHROUGH
+# define ATTR_FALLTHROUGH
+#endif
+
+typedef unsigned int uint32_t;
+typedef unsigned long long uint64_t;
+typedef unsigned short half;
+typedef unsigned long long CUtexObject;
+#ifdef CYCLES_CUBIN_CC
+# define FLT_MIN 1.175494350822287507969e-38f
+# define FLT_MAX 340282346638528859811704183484516925440.0f
+# define FLT_EPSILON 1.192092896e-07F
+#endif
+
+__device__ half __float2half(const float f)
+{
+ half val;
+ asm("{ cvt.rn.f16.f32 %0, %1;}\n" : "=h"(val) : "f"(f));
+ return val;
+}
+
+/* Selective nodes compilation. */
+#ifndef __NODES_MAX_GROUP__
+# define __NODES_MAX_GROUP__ NODE_GROUP_LEVEL_MAX
+#endif
+#ifndef __NODES_FEATURES__
+# define __NODES_FEATURES__ NODE_FEATURE_ALL
+#endif
+
+#define ccl_device \
+ __device__ __forceinline__ // Function calls are bad for OptiX performance, so inline everything
+#define ccl_device_inline ccl_device
+#define ccl_device_forceinline ccl_device
+#define ccl_device_noinline __device__ __noinline__
+#define ccl_device_noinline_cpu ccl_device
+#define ccl_global
+#define ccl_static_constant __constant__
+#define ccl_constant const
+#define ccl_local
+#define ccl_local_param
+#define ccl_private
+#define ccl_may_alias
+#define ccl_addr_space
+#define ccl_loop_no_unroll
+#define ccl_restrict __restrict__
+#define ccl_ref
+#define ccl_align(n) __align__(n)
+
+// Zero initialize structs to help the compiler figure out scoping
+#define ccl_optional_struct_init = {}
+
+#define kernel_data __params.data // See kernel_globals.h
+#define kernel_tex_array(t) __params.t
+#define kernel_tex_fetch(t, index) __params.t[(index)]
+
+#define kernel_assert(cond)
+
+/* Types */
+
+#include "util/util_half.h"
+#include "util/util_types.h"
+
+#endif /* __KERNEL_COMPAT_OPTIX_H__ */
diff --git a/intern/cycles/kernel/kernel_emission.h b/intern/cycles/kernel/kernel_emission.h
index 76fcc9d9e51..dc51d01bab7 100644
--- a/intern/cycles/kernel/kernel_emission.h
+++ b/intern/cycles/kernel/kernel_emission.h
@@ -17,17 +17,17 @@
CCL_NAMESPACE_BEGIN
/* Direction Emission */
-ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
- ShaderData *emission_sd,
- LightSample *ls,
- ccl_addr_space PathState *state,
- float3 I,
- differential3 dI,
- float t,
- float time)
+ccl_device_noinline_cpu float3 direct_emissive_eval(KernelGlobals *kg,
+ ShaderData *emission_sd,
+ LightSample *ls,
+ ccl_addr_space PathState *state,
+ float3 I,
+ differential3 dI,
+ float t,
+ float time)
{
/* setup shading at emitter */
- float3 eval;
+ float3 eval = make_float3(0.0f, 0.0f, 0.0f);
if (shader_constant_emission_eval(kg, ls->shader, &eval)) {
if ((ls->prim != PRIM_NONE) && dot(ls->Ng, I) < 0.0f) {
@@ -73,7 +73,7 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
/* No proper path flag, we're evaluating this for all closures. that's
* weak but we'd have to do multiple evaluations otherwise. */
path_state_modify_bounce(state, true);
- shader_eval_surface(kg, emission_sd, state, PATH_RAY_EMISSION);
+ shader_eval_surface(kg, emission_sd, state, NULL, PATH_RAY_EMISSION);
path_state_modify_bounce(state, false);
/* Evaluate closures. */
@@ -90,18 +90,23 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
eval *= ls->eval_fac;
+ if (ls->lamp != LAMP_NONE) {
+ const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, ls->lamp);
+ eval *= make_float3(klight->strength[0], klight->strength[1], klight->strength[2]);
+ }
+
return eval;
}
-ccl_device_noinline bool direct_emission(KernelGlobals *kg,
- ShaderData *sd,
- ShaderData *emission_sd,
- LightSample *ls,
- ccl_addr_space PathState *state,
- Ray *ray,
- BsdfEval *eval,
- bool *is_lamp,
- float rand_terminate)
+ccl_device_noinline_cpu bool direct_emission(KernelGlobals *kg,
+ ShaderData *sd,
+ ShaderData *emission_sd,
+ LightSample *ls,
+ ccl_addr_space PathState *state,
+ Ray *ray,
+ BsdfEval *eval,
+ bool *is_lamp,
+ float rand_terminate)
{
if (ls->pdf == 0.0f)
return false;
@@ -140,16 +145,14 @@ ccl_device_noinline bool direct_emission(KernelGlobals *kg,
#ifdef __PASSES__
/* use visibility flag to skip lights */
if (ls->shader & SHADER_EXCLUDE_ANY) {
- if (ls->shader & SHADER_EXCLUDE_DIFFUSE) {
+ if (ls->shader & SHADER_EXCLUDE_DIFFUSE)
eval->diffuse = make_float3(0.0f, 0.0f, 0.0f);
- eval->subsurface = make_float3(0.0f, 0.0f, 0.0f);
- }
if (ls->shader & SHADER_EXCLUDE_GLOSSY)
eval->glossy = make_float3(0.0f, 0.0f, 0.0f);
if (ls->shader & SHADER_EXCLUDE_TRANSMIT)
eval->transmission = make_float3(0.0f, 0.0f, 0.0f);
if (ls->shader & SHADER_EXCLUDE_SCATTER)
- eval->scatter = make_float3(0.0f, 0.0f, 0.0f);
+ eval->volume = make_float3(0.0f, 0.0f, 0.0f);
}
#endif
@@ -203,7 +206,7 @@ ccl_device_noinline bool direct_emission(KernelGlobals *kg,
/* Indirect Primitive Emission */
-ccl_device_noinline float3 indirect_primitive_emission(KernelGlobals *kg,
+ccl_device_noinline_cpu float3 indirect_primitive_emission(KernelGlobals *kg,
ShaderData *sd,
float t,
float3 P,
@@ -236,18 +239,16 @@ ccl_device_noinline float3 indirect_primitive_emission(KernelGlobals *kg,
/* Indirect Lamp Emission */
-ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg,
- ShaderData *emission_sd,
- ccl_addr_space PathState *state,
+ccl_device_noinline_cpu void indirect_lamp_emission(KernelGlobals *kg,
+ ShaderData *emission_sd,
+ ccl_addr_space PathState *state,
float3 N,
- Ray *ray,
- float3 *emission)
+ PathRadiance *L,
+ Ray *ray,
+ float3 throughput)
{
- bool hit_lamp = false;
-
- *emission = make_float3(0.0f, 0.0f, 0.0f);
for (int lamp = 0; lamp < kernel_data.integrator.num_all_lights; lamp++) {
- LightSample ls;
+ LightSample ls ccl_optional_struct_init;
if (!lamp_light_eval(kg, lamp, ray->P, ray->D, ray->t, &ls))
continue;
@@ -265,7 +266,7 @@ ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg,
}
#endif
- float3 L = direct_emissive_eval(
+ float3 lamp_L = direct_emissive_eval(
kg, emission_sd, &ls, state, -ray->D, ray->dD, ls.t, ray->time);
bool has_volume = false;
@@ -276,7 +277,7 @@ ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg,
volume_ray.t = ls.t;
float3 volume_tp = make_float3(1.0f, 1.0f, 1.0f);
kernel_volume_shadow(kg, emission_sd, state, &volume_ray, &volume_tp);
- L *= volume_tp;
+ lamp_L *= volume_tp;
}
has_volume = ((emission_sd->flag & SD_HAS_VOLUME) != 0);
@@ -289,23 +290,21 @@ ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg,
/* multiply with light picking probablity to pdf */
ls.pdf *= light_distribution_pdf(kg, ray->P, N, ~ls.lamp, -1, has_volume);
float mis_weight = power_heuristic(state->ray_pdf, ls.pdf);
- L *= mis_weight;
+ lamp_L *= mis_weight;
}
- *emission += L;
- hit_lamp = true;
+ path_radiance_accum_emission(kg, L, state, throughput, lamp_L);
}
-
- return hit_lamp;
}
/* Indirect Background */
-ccl_device_noinline float3 indirect_background(KernelGlobals *kg,
- ShaderData *emission_sd,
+ccl_device_noinline_cpu float3 indirect_background(KernelGlobals *kg,
+ ShaderData *emission_sd,
float3 N,
- ccl_addr_space PathState *state,
- ccl_addr_space Ray *ray)
+ ccl_addr_space PathState *state,
+ ccl_global float *buffer,
+ ccl_addr_space Ray *ray)
{
#ifdef __BACKGROUND__
int shader = kernel_data.background.surface_shader;
@@ -328,7 +327,7 @@ ccl_device_noinline float3 indirect_background(KernelGlobals *kg,
has_volume = ((emission_sd->flag & SD_HAS_VOLUME) != 0);
# endif
/* Evaluate background shader. */
- float3 L;
+ float3 L = make_float3(0.0f, 0.0f, 0.0f);
if (!shader_constant_emission_eval(kg, shader, &L)) {
# ifdef __SPLIT_KERNEL__
Ray priv_ray = *ray;
@@ -338,7 +337,7 @@ ccl_device_noinline float3 indirect_background(KernelGlobals *kg,
# endif
path_state_modify_bounce(state, true);
- shader_eval_surface(kg, emission_sd, state, state->flag | PATH_RAY_EMISSION);
+ shader_eval_surface(kg, emission_sd, state, buffer, state->flag | PATH_RAY_EMISSION);
path_state_modify_bounce(state, false);
L = shader_background_eval(emission_sd);
@@ -362,7 +361,7 @@ ccl_device_noinline float3 indirect_background(KernelGlobals *kg,
/* check if background light exists or if we should skip pdf */
int res_x = kernel_data.integrator.pdf_background_res_x;
- if (!(state->flag & PATH_RAY_MIS_SKIP) && res_x) {
+ if (!(state->flag & PATH_RAY_MIS_SKIP) && kernel_data.background.use_mis) {
/* multiple importance sampling, get background light pdf for ray
* direction, and compute weight with respect to BSDF pdf */
float pdf = background_light_pdf(kg, P_pick, ray->D);
diff --git a/intern/cycles/kernel/kernel_film.h b/intern/cycles/kernel/kernel_film.h
index d20f1adf663..8344f4b4f47 100644
--- a/intern/cycles/kernel/kernel_film.h
+++ b/intern/cycles/kernel/kernel_film.h
@@ -16,18 +16,60 @@
CCL_NAMESPACE_BEGIN
-ccl_device float4 film_map(KernelGlobals *kg, float4 irradiance, float scale)
+ccl_device float4 film_get_pass_result(KernelGlobals *kg,
+ ccl_global float *buffer,
+ float sample_scale,
+ int index,
+ bool use_display_sample_scale)
{
- float exposure = kernel_data.film.exposure;
- float4 result = irradiance * scale;
+ float4 pass_result;
+
+ int display_pass_stride = kernel_data.film.display_pass_stride;
+ int display_pass_components = kernel_data.film.display_pass_components;
+
+ if (display_pass_components == 4) {
+ float4 in = *(ccl_global float4 *)(buffer + display_pass_stride +
+ index * kernel_data.film.pass_stride);
+ float alpha = use_display_sample_scale ?
+ (kernel_data.film.use_display_pass_alpha ? in.w : 1.0f / sample_scale) :
+ 1.0f;
+
+ pass_result = make_float4(in.x, in.y, in.z, alpha);
+
+ int display_divide_pass_stride = kernel_data.film.display_divide_pass_stride;
+ if (display_divide_pass_stride != -1) {
+ ccl_global float4 *divide_in = (ccl_global float4 *)(buffer + display_divide_pass_stride +
+ index * kernel_data.film.pass_stride);
+ float3 divided = safe_divide_even_color(float4_to_float3(pass_result),
+ float4_to_float3(*divide_in));
+ pass_result = make_float4(divided.x, divided.y, divided.z, pass_result.w);
+ }
+
+ if (kernel_data.film.use_display_exposure) {
+ float exposure = kernel_data.film.exposure;
+ pass_result *= make_float4(exposure, exposure, exposure, alpha);
+ }
+ }
+ else if (display_pass_components == 1) {
+ ccl_global float *in = (ccl_global float *)(buffer + display_pass_stride +
+ index * kernel_data.film.pass_stride);
+ pass_result = make_float4(*in, *in, *in, 1.0f / sample_scale);
+ }
+
+ return pass_result;
+}
+
+ccl_device float4 film_map(KernelGlobals *kg, float4 rgba_in, float scale)
+{
+ float4 result;
/* conversion to srgb */
- result.x = color_linear_to_srgb(result.x * exposure);
- result.y = color_linear_to_srgb(result.y * exposure);
- result.z = color_linear_to_srgb(result.z * exposure);
+ result.x = color_linear_to_srgb(rgba_in.x * scale);
+ result.y = color_linear_to_srgb(rgba_in.y * scale);
+ result.z = color_linear_to_srgb(rgba_in.z * scale);
/* clamp since alpha might be > 1.0 due to russian roulette */
- result.w = saturate(result.w);
+ result.w = saturate(rgba_in.w * scale);
return result;
}
@@ -57,15 +99,15 @@ ccl_device void kernel_film_convert_to_byte(KernelGlobals *kg,
/* buffer offset */
int index = offset + x + y * stride;
- rgba += index;
- buffer += index * kernel_data.film.pass_stride;
+ bool use_display_sample_scale = (kernel_data.film.display_divide_pass_stride == -1);
+ float4 rgba_in = film_get_pass_result(kg, buffer, sample_scale, index, use_display_sample_scale);
/* map colors */
- float4 irradiance = *((ccl_global float4 *)buffer);
- float4 float_result = film_map(kg, irradiance, sample_scale);
- uchar4 byte_result = film_float_to_byte(float_result);
+ float4 float_result = film_map(kg, rgba_in, use_display_sample_scale ? sample_scale : 1.0f);
+ uchar4 uchar_result = film_float_to_byte(float_result);
- *rgba = byte_result;
+ rgba += index;
+ *rgba = uchar_result;
}
ccl_device void kernel_film_convert_to_half_float(KernelGlobals *kg,
@@ -80,20 +122,11 @@ ccl_device void kernel_film_convert_to_half_float(KernelGlobals *kg,
/* buffer offset */
int index = offset + x + y * stride;
- ccl_global float4 *in = (ccl_global float4 *)(buffer + index * kernel_data.film.pass_stride);
- ccl_global half *out = (ccl_global half *)rgba + index * 4;
-
- float exposure = kernel_data.film.exposure;
-
- float4 rgba_in = *in;
+ bool use_display_sample_scale = (kernel_data.film.display_divide_pass_stride == -1);
+ float4 rgba_in = film_get_pass_result(kg, buffer, sample_scale, index, use_display_sample_scale);
- if (exposure != 1.0f) {
- rgba_in.x *= exposure;
- rgba_in.y *= exposure;
- rgba_in.z *= exposure;
- }
-
- float4_store_half(out, rgba_in, sample_scale);
+ ccl_global half *out = (ccl_global half *)rgba + index * 4;
+ float4_store_half(out, rgba_in, use_display_sample_scale ? sample_scale : 1.0f);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_globals.h b/intern/cycles/kernel/kernel_globals.h
index 9dbf3b7ea2e..c186e8560eb 100644
--- a/intern/cycles/kernel/kernel_globals.h
+++ b/intern/cycles/kernel/kernel_globals.h
@@ -22,8 +22,8 @@
#include "kernel/kernel_profiling.h"
#ifdef __KERNEL_CPU__
-# include "util/util_vector.h"
# include "util/util_map.h"
+# include "util/util_vector.h"
#endif
#ifdef __KERNEL_OPENCL__
@@ -90,12 +90,43 @@ typedef struct KernelGlobals {
#endif /* __KERNEL_CPU__ */
+#ifdef __KERNEL_OPTIX__
+
+typedef struct ShaderParams {
+ uint4 *input;
+ float4 *output;
+ int type;
+ int filter;
+ int sx;
+ int offset;
+ int sample;
+} ShaderParams;
+
+typedef struct KernelParams {
+ WorkTile tile;
+ KernelData data;
+ ShaderParams shader;
+# define KERNEL_TEX(type, name) const type *name;
+# include "kernel/kernel_textures.h"
+} KernelParams;
+
+typedef struct KernelGlobals {
+# ifdef __VOLUME__
+ VolumeState volume_state;
+# endif
+ Intersection hits_stack[64];
+} KernelGlobals;
+
+extern "C" __constant__ KernelParams __params;
+
+#else /* __KERNEL_OPTIX__ */
+
/* For CUDA, constant memory textures must be globals, so we can't put them
* into a struct. As a result we don't actually use this struct and use actual
* globals and simply pass along a NULL pointer everywhere, which we hope gets
* optimized out. */
-#ifdef __KERNEL_CUDA__
+# ifdef __KERNEL_CUDA__
__constant__ KernelData __data;
typedef struct KernelGlobals {
@@ -103,10 +134,12 @@ typedef struct KernelGlobals {
Intersection hits_stack[64];
} KernelGlobals;
-# define KERNEL_TEX(type, name) const __constant__ __device__ type *name;
-# include "kernel/kernel_textures.h"
+# define KERNEL_TEX(type, name) const __constant__ __device__ type *name;
+# include "kernel/kernel_textures.h"
+
+# endif /* __KERNEL_CUDA__ */
-#endif /* __KERNEL_CUDA__ */
+#endif /* __KERNEL_OPTIX__ */
/* OpenCL */
diff --git a/intern/cycles/kernel/kernel_id_passes.h b/intern/cycles/kernel/kernel_id_passes.h
index c1f4e39e5e7..1ca42e933d1 100644
--- a/intern/cycles/kernel/kernel_id_passes.h
+++ b/intern/cycles/kernel/kernel_id_passes.h
@@ -1,18 +1,18 @@
/*
-* Copyright 2018 Blender Foundation
-*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
+ * Copyright 2018 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
CCL_NAMESPACE_BEGIN
@@ -32,7 +32,7 @@ ccl_device_inline void kernel_write_id_slots(ccl_global float *buffer,
/* If the loop reaches an empty slot, the ID isn't in any slot yet - so add it! */
if (id_buffer[slot].x == ID_NONE) {
/* Use an atomic to claim this slot.
- * If a different thread got here first, try again from this slot on. */
+ * If a different thread got here first, try again from this slot on. */
float old_id = atomic_compare_and_swap_float(buffer + slot * 2, ID_NONE, id);
if (old_id != ID_NONE && old_id != id) {
continue;
@@ -54,7 +54,7 @@ ccl_device_inline void kernel_write_id_slots(ccl_global float *buffer,
break;
}
/* If there already is a slot for that ID, add the weight.
- * If no slot was found, add it to the last. */
+ * If no slot was found, add it to the last. */
else if (id_buffer[slot].x == id || slot == num_slots - 1) {
id_buffer[slot].y += weight;
break;
diff --git a/intern/cycles/kernel/kernel_jitter.h b/intern/cycles/kernel/kernel_jitter.h
index f7270a14940..b9c48b86a5d 100644
--- a/intern/cycles/kernel/kernel_jitter.h
+++ b/intern/cycles/kernel/kernel_jitter.h
@@ -38,43 +38,13 @@ ccl_device_inline int cmj_fast_mod_pow2(int a, int b)
ccl_device_inline int cmj_fast_div_pow2(int a, int b)
{
kernel_assert(b > 1);
-#if defined(__KERNEL_SSE2__)
-# ifdef _MSC_VER
- unsigned long ctz;
- _BitScanForward(&ctz, b);
- return a >> ctz;
-# else
- return a >> __builtin_ctz(b);
-# endif
-#elif defined(__KERNEL_CUDA__)
- return a >> (__ffs(b) - 1);
-#else
- return a / b;
-#endif
+ return a >> count_trailing_zeros(b);
}
ccl_device_inline uint cmj_w_mask(uint w)
{
kernel_assert(w > 1);
-#if defined(__KERNEL_SSE2__)
-# ifdef _MSC_VER
- unsigned long leading_zero;
- _BitScanReverse(&leading_zero, w);
- return ((1 << (1 + leading_zero)) - 1);
-# else
- return ((1 << (32 - __builtin_clz(w))) - 1);
-# endif
-#elif defined(__KERNEL_CUDA__)
- return ((1 << (32 - __clz(w))) - 1);
-#else
- w |= w >> 1;
- w |= w >> 2;
- w |= w >> 4;
- w |= w >> 8;
- w |= w >> 16;
-
- return w;
-#endif
+ return ((1 << (32 - count_leading_zeros(w))) - 1);
}
ccl_device_inline uint cmj_permute(uint i, uint l, uint p)
@@ -225,4 +195,37 @@ ccl_device void cmj_sample_2D(int s, int N, int p, float *fx, float *fy)
}
#endif
+ccl_device float pmj_sample_1D(KernelGlobals *kg, int sample, int rng_hash, int dimension)
+{
+ /* Fallback to random */
+ if (sample >= NUM_PMJ_SAMPLES) {
+ const int p = rng_hash + dimension;
+ return cmj_randfloat(sample, p);
+ }
+ else {
+ const uint mask = cmj_hash_simple(dimension, rng_hash) & 0x007fffff;
+ const int index = ((dimension % NUM_PMJ_PATTERNS) * NUM_PMJ_SAMPLES + sample) * 2;
+ return __uint_as_float(kernel_tex_fetch(__sample_pattern_lut, index) ^ mask) - 1.0f;
+ }
+}
+
+ccl_device float2 pmj_sample_2D(KernelGlobals *kg, int sample, int rng_hash, int dimension)
+{
+ if (sample >= NUM_PMJ_SAMPLES) {
+ const int p = rng_hash + dimension;
+ const float fx = cmj_randfloat(sample, p);
+ const float fy = cmj_randfloat(sample, p + 1);
+ return make_float2(fx, fy);
+ }
+ else {
+ const int index = ((dimension % NUM_PMJ_PATTERNS) * NUM_PMJ_SAMPLES + sample) * 2;
+ const uint maskx = cmj_hash_simple(dimension, rng_hash) & 0x007fffff;
+ const uint masky = cmj_hash_simple(dimension + 1, rng_hash) & 0x007fffff;
+ const float fx = __uint_as_float(kernel_tex_fetch(__sample_pattern_lut, index) ^ maskx) - 1.0f;
+ const float fy = __uint_as_float(kernel_tex_fetch(__sample_pattern_lut, index + 1) ^ masky) -
+ 1.0f;
+ return make_float2(fx, fy);
+ }
+}
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_light.h b/intern/cycles/kernel/kernel_light.h
index 48cc284694a..5c47cc4ce23 100644
--- a/intern/cycles/kernel/kernel_light.h
+++ b/intern/cycles/kernel/kernel_light.h
@@ -14,6 +14,8 @@
* limitations under the License.
*/
+#include "kernel_light_background.h"
+
CCL_NAMESPACE_BEGIN
/* Light Sample result */
@@ -86,515 +88,6 @@ ccl_device void kernel_update_light_picking(ShaderData *sd, ccl_addr_space PathS
#endif
}
-/* Area light sampling */
-
-/* Uses the following paper:
- *
- * Carlos Urena et al.
- * An Area-Preserving Parametrization for Spherical Rectangles.
- *
- * https://www.solidangle.com/research/egsr2013_spherical_rectangle.pdf
- *
- * Note: light_p is modified when sample_coord is true.
- */
-ccl_device_inline float rect_light_sample(float3 P,
- float3 *light_p,
- float3 axisu,
- float3 axisv,
- float randu,
- float randv,
- bool sample_coord)
-{
- /* In our name system we're using P for the center,
- * which is o in the paper.
- */
-
- float3 corner = *light_p - axisu * 0.5f - axisv * 0.5f;
- float axisu_len, axisv_len;
- /* Compute local reference system R. */
- float3 x = normalize_len(axisu, &axisu_len);
- float3 y = normalize_len(axisv, &axisv_len);
- float3 z = cross(x, y);
- /* Compute rectangle coords in local reference system. */
- float3 dir = corner - P;
- float z0 = dot(dir, z);
- /* Flip 'z' to make it point against Q. */
- if (z0 > 0.0f) {
- z *= -1.0f;
- z0 *= -1.0f;
- }
- float x0 = dot(dir, x);
- float y0 = dot(dir, y);
- float x1 = x0 + axisu_len;
- float y1 = y0 + axisv_len;
- /* Compute internal angles (gamma_i). */
- float4 diff = make_float4(x0, y1, x1, y0) - make_float4(x1, y0, x0, y1);
- float4 nz = make_float4(y0, x1, y1, x0) * diff;
- nz = nz / sqrt(z0 * z0 * diff * diff + nz * nz);
- float g0 = safe_acosf(-nz.x * nz.y);
- float g1 = safe_acosf(-nz.y * nz.z);
- float g2 = safe_acosf(-nz.z * nz.w);
- float g3 = safe_acosf(-nz.w * nz.x);
- /* Compute predefined constants. */
- float b0 = nz.x;
- float b1 = nz.z;
- float b0sq = b0 * b0;
- float k = M_2PI_F - g2 - g3;
- /* Compute solid angle from internal angles. */
- float S = g0 + g1 - k;
-
- if (sample_coord) {
- /* Compute cu. */
- float au = randu * S + k;
- float fu = (cosf(au) * b0 - b1) / sinf(au);
- float cu = 1.0f / sqrtf(fu * fu + b0sq) * (fu > 0.0f ? 1.0f : -1.0f);
- cu = clamp(cu, -1.0f, 1.0f);
- /* Compute xu. */
- float xu = -(cu * z0) / max(sqrtf(1.0f - cu * cu), 1e-7f);
- xu = clamp(xu, x0, x1);
- /* Compute yv. */
- float z0sq = z0 * z0;
- float y0sq = y0 * y0;
- float y1sq = y1 * y1;
- float d = sqrtf(xu * xu + z0sq);
- float h0 = y0 / sqrtf(d * d + y0sq);
- float h1 = y1 / sqrtf(d * d + y1sq);
- float hv = h0 + randv * (h1 - h0), hv2 = hv * hv;
- float yv = (hv2 < 1.0f - 1e-6f) ? (hv * d) / sqrtf(1.0f - hv2) : y1;
-
- /* Transform (xu, yv, z0) to world coords. */
- *light_p = P + xu * x + yv * y + z0 * z;
- }
-
- /* return pdf */
- if (S != 0.0f)
- return 1.0f / S;
- else
- return 0.0f;
-}
-
-ccl_device_inline float3 ellipse_sample(float3 ru, float3 rv, float randu, float randv)
-{
- to_unit_disk(&randu, &randv);
- return ru * randu + rv * randv;
-}
-
-ccl_device float3 disk_light_sample(float3 v, float randu, float randv)
-{
- float3 ru, rv;
-
- make_orthonormals(v, &ru, &rv);
-
- return ellipse_sample(ru, rv, randu, randv);
-}
-
-ccl_device float3 distant_light_sample(float3 D, float radius, float randu, float randv)
-{
- return normalize(D + disk_light_sample(D, randu, randv) * radius);
-}
-
-ccl_device float3
-sphere_light_sample(float3 P, float3 center, float radius, float randu, float randv)
-{
- return disk_light_sample(normalize(P - center), randu, randv) * radius;
-}
-
-ccl_device float spot_light_attenuation(float3 dir,
- float spot_angle,
- float spot_smooth,
- LightSample *ls)
-{
- float3 I = ls->Ng;
-
- float attenuation = dot(dir, I);
-
- if (attenuation <= spot_angle) {
- attenuation = 0.0f;
- }
- else {
- float t = attenuation - spot_angle;
-
- if (t < spot_smooth && spot_smooth != 0.0f)
- attenuation *= smoothstepf(t / spot_smooth);
- }
-
- return attenuation;
-}
-
-ccl_device float lamp_light_pdf(KernelGlobals *kg, const float3 Ng, const float3 I, float t)
-{
- float cos_pi = dot(Ng, I);
-
- if (cos_pi <= 0.0f)
- return 0.0f;
-
- return t * t / cos_pi;
-}
-
-/* Background Light */
-
-#ifdef __BACKGROUND_MIS__
-
-/* TODO(sergey): In theory it should be all fine to use noinline for all
- * devices, but we're so close to the release so better not screw things
- * up for CPU at least.
- */
-# ifdef __KERNEL_GPU__
-ccl_device_noinline
-# else
-ccl_device
-# endif
- float3
- background_map_sample(KernelGlobals *kg, float randu, float randv, float *pdf)
-{
- /* for the following, the CDF values are actually a pair of floats, with the
- * function value as X and the actual CDF as Y. The last entry's function
- * value is the CDF total. */
- int res_x = kernel_data.integrator.pdf_background_res_x;
- int res_y = kernel_data.integrator.pdf_background_res_y;
- int cdf_width = res_x + 1;
-
- /* this is basically std::lower_bound as used by pbrt */
- int first = 0;
- int count = res_y;
-
- while (count > 0) {
- int step = count >> 1;
- int middle = first + step;
-
- if (kernel_tex_fetch(__light_background_marginal_cdf, middle).y < randv) {
- first = middle + 1;
- count -= step + 1;
- }
- else
- count = step;
- }
-
- int index_v = max(0, first - 1);
- kernel_assert(index_v >= 0 && index_v < res_y);
-
- float2 cdf_v = kernel_tex_fetch(__light_background_marginal_cdf, index_v);
- float2 cdf_next_v = kernel_tex_fetch(__light_background_marginal_cdf, index_v + 1);
- float2 cdf_last_v = kernel_tex_fetch(__light_background_marginal_cdf, res_y);
-
- /* importance-sampled V direction */
- float dv = inverse_lerp(cdf_v.y, cdf_next_v.y, randv);
- float v = (index_v + dv) / res_y;
-
- /* this is basically std::lower_bound as used by pbrt */
- first = 0;
- count = res_x;
- while (count > 0) {
- int step = count >> 1;
- int middle = first + step;
-
- if (kernel_tex_fetch(__light_background_conditional_cdf, index_v * cdf_width + middle).y <
- randu) {
- first = middle + 1;
- count -= step + 1;
- }
- else
- count = step;
- }
-
- int index_u = max(0, first - 1);
- kernel_assert(index_u >= 0 && index_u < res_x);
-
- float2 cdf_u = kernel_tex_fetch(__light_background_conditional_cdf,
- index_v * cdf_width + index_u);
- float2 cdf_next_u = kernel_tex_fetch(__light_background_conditional_cdf,
- index_v * cdf_width + index_u + 1);
- float2 cdf_last_u = kernel_tex_fetch(__light_background_conditional_cdf,
- index_v * cdf_width + res_x);
-
- /* importance-sampled U direction */
- float du = inverse_lerp(cdf_u.y, cdf_next_u.y, randu);
- float u = (index_u + du) / res_x;
-
- /* compute pdf */
- float denom = cdf_last_u.x * cdf_last_v.x;
- float sin_theta = sinf(M_PI_F * v);
-
- if (sin_theta == 0.0f || denom == 0.0f)
- *pdf = 0.0f;
- else
- *pdf = (cdf_u.x * cdf_v.x) / (M_2PI_F * M_PI_F * sin_theta * denom);
-
- /* compute direction */
- return equirectangular_to_direction(u, v);
-}
-
-/* TODO(sergey): Same as above, after the release we should consider using
- * 'noinline' for all devices.
- */
-# ifdef __KERNEL_GPU__
-ccl_device_noinline
-# else
-ccl_device
-# endif
- float
- background_map_pdf(KernelGlobals *kg, float3 direction)
-{
- float2 uv = direction_to_equirectangular(direction);
- int res_x = kernel_data.integrator.pdf_background_res_x;
- int res_y = kernel_data.integrator.pdf_background_res_y;
- int cdf_width = res_x + 1;
-
- float sin_theta = sinf(uv.y * M_PI_F);
-
- if (sin_theta == 0.0f)
- return 0.0f;
-
- int index_u = clamp(float_to_int(uv.x * res_x), 0, res_x - 1);
- int index_v = clamp(float_to_int(uv.y * res_y), 0, res_y - 1);
-
- /* pdfs in V direction */
- float2 cdf_last_u = kernel_tex_fetch(__light_background_conditional_cdf,
- index_v * cdf_width + res_x);
- float2 cdf_last_v = kernel_tex_fetch(__light_background_marginal_cdf, res_y);
-
- float denom = cdf_last_u.x * cdf_last_v.x;
-
- if (denom == 0.0f)
- return 0.0f;
-
- /* pdfs in U direction */
- float2 cdf_u = kernel_tex_fetch(__light_background_conditional_cdf,
- index_v * cdf_width + index_u);
- float2 cdf_v = kernel_tex_fetch(__light_background_marginal_cdf, index_v);
-
- return (cdf_u.x * cdf_v.x) / (M_2PI_F * M_PI_F * sin_theta * denom);
-}
-
-ccl_device_inline bool background_portal_data_fetch_and_check_side(
- KernelGlobals *kg, float3 P, int index, float3 *lightpos, float3 *dir)
-{
- int portal = kernel_data.integrator.portal_offset + index;
- const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal);
-
- *lightpos = make_float3(klight->co[0], klight->co[1], klight->co[2]);
- *dir = make_float3(klight->area.dir[0], klight->area.dir[1], klight->area.dir[2]);
-
- /* Check whether portal is on the right side. */
- if (dot(*dir, P - *lightpos) > 1e-4f)
- return true;
-
- return false;
-}
-
-ccl_device_inline float background_portal_pdf(
- KernelGlobals *kg, float3 P, float3 direction, int ignore_portal, bool *is_possible)
-{
- float portal_pdf = 0.0f;
-
- int num_possible = 0;
- for (int p = 0; p < kernel_data.integrator.num_portals; p++) {
- if (p == ignore_portal)
- continue;
-
- float3 lightpos, dir;
- if (!background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir))
- continue;
-
- /* There's a portal that could be sampled from this position. */
- if (is_possible) {
- *is_possible = true;
- }
- num_possible++;
-
- int portal = kernel_data.integrator.portal_offset + p;
- const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal);
- float3 axisu = make_float3(
- klight->area.axisu[0], klight->area.axisu[1], klight->area.axisu[2]);
- float3 axisv = make_float3(
- klight->area.axisv[0], klight->area.axisv[1], klight->area.axisv[2]);
- bool is_round = (klight->area.invarea < 0.0f);
-
- if (!ray_quad_intersect(P,
- direction,
- 1e-4f,
- FLT_MAX,
- lightpos,
- axisu,
- axisv,
- dir,
- NULL,
- NULL,
- NULL,
- NULL,
- is_round))
- continue;
-
- if (is_round) {
- float t;
- float3 D = normalize_len(lightpos - P, &t);
- portal_pdf += fabsf(klight->area.invarea) * lamp_light_pdf(kg, dir, -D, t);
- }
- else {
- portal_pdf += rect_light_sample(P, &lightpos, axisu, axisv, 0.0f, 0.0f, false);
- }
- }
-
- if (ignore_portal >= 0) {
- /* We have skipped a portal that could be sampled as well. */
- num_possible++;
- }
-
- return (num_possible > 0) ? portal_pdf / num_possible : 0.0f;
-}
-
-ccl_device int background_num_possible_portals(KernelGlobals *kg, float3 P)
-{
- int num_possible_portals = 0;
- for (int p = 0; p < kernel_data.integrator.num_portals; p++) {
- float3 lightpos, dir;
- if (background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir))
- num_possible_portals++;
- }
- return num_possible_portals;
-}
-
-ccl_device float3 background_portal_sample(KernelGlobals *kg,
- float3 P,
- float randu,
- float randv,
- int num_possible,
- int *sampled_portal,
- float *pdf)
-{
- /* Pick a portal, then re-normalize randv. */
- randv *= num_possible;
- int portal = (int)randv;
- randv -= portal;
-
- /* TODO(sergey): Some smarter way of finding portal to sample
- * is welcome.
- */
- for (int p = 0; p < kernel_data.integrator.num_portals; p++) {
- /* Search for the sampled portal. */
- float3 lightpos, dir;
- if (!background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir))
- continue;
-
- if (portal == 0) {
- /* p is the portal to be sampled. */
- int portal = kernel_data.integrator.portal_offset + p;
- const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal);
- float3 axisu = make_float3(
- klight->area.axisu[0], klight->area.axisu[1], klight->area.axisu[2]);
- float3 axisv = make_float3(
- klight->area.axisv[0], klight->area.axisv[1], klight->area.axisv[2]);
- bool is_round = (klight->area.invarea < 0.0f);
-
- float3 D;
- if (is_round) {
- lightpos += ellipse_sample(axisu * 0.5f, axisv * 0.5f, randu, randv);
- float t;
- D = normalize_len(lightpos - P, &t);
- *pdf = fabsf(klight->area.invarea) * lamp_light_pdf(kg, dir, -D, t);
- }
- else {
- *pdf = rect_light_sample(P, &lightpos, axisu, axisv, randu, randv, true);
- D = normalize(lightpos - P);
- }
-
- *pdf /= num_possible;
- *sampled_portal = p;
- return D;
- }
-
- portal--;
- }
-
- return make_float3(0.0f, 0.0f, 0.0f);
-}
-
-ccl_device_inline float3
-background_light_sample(KernelGlobals *kg, float3 P, float randu, float randv, float *pdf)
-{
- /* Probability of sampling portals instead of the map. */
- float portal_sampling_pdf = kernel_data.integrator.portal_pdf;
-
- /* Check if there are portals in the scene which we can sample. */
- if (portal_sampling_pdf > 0.0f) {
- int num_portals = background_num_possible_portals(kg, P);
- if (num_portals > 0) {
- if (portal_sampling_pdf == 1.0f || randu < portal_sampling_pdf) {
- if (portal_sampling_pdf < 1.0f) {
- randu /= portal_sampling_pdf;
- }
- int portal;
- float3 D = background_portal_sample(kg, P, randu, randv, num_portals, &portal, pdf);
- if (num_portals > 1) {
- /* Ignore the chosen portal, its pdf is already included. */
- *pdf += background_portal_pdf(kg, P, D, portal, NULL);
- }
- /* We could also have sampled the map, so combine with MIS. */
- if (portal_sampling_pdf < 1.0f) {
- float cdf_pdf = background_map_pdf(kg, D);
- *pdf = (portal_sampling_pdf * (*pdf) + (1.0f - portal_sampling_pdf) * cdf_pdf);
- }
- return D;
- }
- else {
- /* Sample map, but with nonzero portal_sampling_pdf for MIS. */
- randu = (randu - portal_sampling_pdf) / (1.0f - portal_sampling_pdf);
- }
- }
- else {
- /* We can't sample a portal.
- * Check if we can sample the map instead.
- */
- if (portal_sampling_pdf == 1.0f) {
- /* Use uniform as a fallback if we can't sample the map. */
- *pdf = 1.0f / M_4PI_F;
- return sample_uniform_sphere(randu, randv);
- }
- else {
- portal_sampling_pdf = 0.0f;
- }
- }
- }
-
- float3 D = background_map_sample(kg, randu, randv, pdf);
- /* Use MIS if portals could be sampled as well. */
- if (portal_sampling_pdf > 0.0f) {
- float portal_pdf = background_portal_pdf(kg, P, D, -1, NULL);
- *pdf = (portal_sampling_pdf * portal_pdf + (1.0f - portal_sampling_pdf) * (*pdf));
- }
- return D;
-}
-
-ccl_device float background_light_pdf(KernelGlobals *kg, float3 P, float3 direction)
-{
- /* Probability of sampling portals instead of the map. */
- float portal_sampling_pdf = kernel_data.integrator.portal_pdf;
-
- float portal_pdf = 0.0f, map_pdf = 0.0f;
- if (portal_sampling_pdf > 0.0f) {
- /* Evaluate PDF of sampling this direction by portal sampling. */
- bool is_possible = false;
- portal_pdf = background_portal_pdf(kg, P, direction, -1, &is_possible) * portal_sampling_pdf;
- if (!is_possible) {
- /* Portal sampling is not possible here because all portals point to the wrong side.
- * If map sampling is possible, it would be used instead, otherwise fallback sampling is used. */
- if (portal_sampling_pdf == 1.0f) {
- return 1.0f / M_4PI_F;
- }
- else {
- /* Force map sampling. */
- portal_sampling_pdf = 0.0f;
- }
- }
- }
- if (portal_sampling_pdf < 1.0f) {
- /* Evaluate PDF of sampling this direction by map sampling. */
- map_pdf = background_map_pdf(kg, direction) * (1.0f - portal_sampling_pdf);
- }
- return portal_pdf + map_pdf;
-}
-#endif
-
/* Regular Light */
/* returns the PDF of sampling a point on this lamp */
@@ -663,7 +156,7 @@ ccl_device_inline bool lamp_light_sample(
/* spot light attenuation */
float3 dir = make_float3(klight->spot.dir[0], klight->spot.dir[1], klight->spot.dir[2]);
ls->eval_fac *= spot_light_attenuation(
- dir, klight->spot.spot_angle, klight->spot.spot_smooth, ls);
+ dir, klight->spot.spot_angle, klight->spot.spot_smooth, ls->Ng);
if (ls->eval_fac == 0.0f) {
return false;
}
@@ -799,7 +292,7 @@ ccl_device bool lamp_light_eval(
/* spot light attenuation */
float3 dir = make_float3(klight->spot.dir[0], klight->spot.dir[1], klight->spot.dir[2]);
ls->eval_fac *= spot_light_attenuation(
- dir, klight->spot.spot_angle, klight->spot.spot_smooth, ls);
+ dir, klight->spot.spot_angle, klight->spot.spot_smooth, ls->Ng);
if (ls->eval_fac == 0.0f)
return false;
@@ -870,20 +363,18 @@ ccl_device_inline bool triangle_world_space_vertices(
triangle_vertices(kg, prim, V);
}
-#ifdef __INSTANCING__
if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
-# ifdef __OBJECT_MOTION__
+#ifdef __OBJECT_MOTION__
float object_time = (time >= 0.0f) ? time : 0.5f;
Transform tfm = object_fetch_transform_motion_test(kg, object, object_time, NULL);
-# else
+#else
Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
-# endif
+#endif
V[0] = transform_point(&tfm, V[0]);
V[1] = transform_point(&tfm, V[1]);
V[2] = transform_point(&tfm, V[2]);
has_motion = true;
}
-#endif
return has_motion;
}
@@ -1074,11 +565,19 @@ ccl_device_forceinline void triangle_light_sample(KernelGlobals *kg,
}
}
else {
- /* compute random point in triangle */
- randu = sqrtf(randu);
+ /* compute random point in triangle. From Eric Heitz's "A Low-Distortion Map Between Triangle
+ * and Square" */
+ float u = randu;
+ float v = randv;
+ if (v > u) {
+ u *= 0.5f;
+ v -= u;
+ }
+ else {
+ v *= 0.5f;
+ u -= v;
+ }
- const float u = 1.0f - randu;
- const float v = randv * randu;
const float t = 1.0f - u - v;
ls->P = u * V[0] + v * V[1] + t * V[2];
/* compute incoming direction, distance and pdf */
@@ -1142,7 +641,7 @@ ccl_device int light_distribution_sample(KernelGlobals *kg, float *randu)
int len = kernel_data.integrator.num_distribution + 1;
float r = *randu;
- while (len > 0) {
+ do {
int half_len = len >> 1;
int middle = first + half_len;
@@ -1153,7 +652,7 @@ ccl_device int light_distribution_sample(KernelGlobals *kg, float *randu)
first = middle + 1;
len = len - half_len - 1;
}
- }
+ } while (len > 0);
/* Clamping should not be needed but float rounding errors seem to
* make this fail on rare occasions. */
@@ -1170,7 +669,7 @@ ccl_device int light_distribution_sample(KernelGlobals *kg, float *randu)
/* Generic Light */
-ccl_device bool light_select_reached_max_bounces(KernelGlobals *kg, int index, int bounce)
+ccl_device_inline bool light_select_reached_max_bounces(KernelGlobals *kg, int index, int bounce)
{
return (bounce > kernel_tex_fetch(__lights, index).max_bounces);
}
@@ -1840,7 +1339,7 @@ ccl_device_noinline bool light_sample(KernelGlobals *kg,
return (ls->pdf > 0.0f);
}
-ccl_device int light_select_num_samples(KernelGlobals *kg, int index)
+ccl_device_inline int light_select_num_samples(KernelGlobals *kg, int index)
{
return kernel_tex_fetch(__lights, index).samples;
}
diff --git a/intern/cycles/kernel/kernel_light_background.h b/intern/cycles/kernel/kernel_light_background.h
new file mode 100644
index 00000000000..30e336f0f80
--- /dev/null
+++ b/intern/cycles/kernel/kernel_light_background.h
@@ -0,0 +1,448 @@
+/*
+ * Copyright 2011-2020 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel_light_common.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Background Light */
+
+#ifdef __BACKGROUND_MIS__
+
+ccl_device float3 background_map_sample(KernelGlobals *kg, float randu, float randv, float *pdf)
+{
+ /* for the following, the CDF values are actually a pair of floats, with the
+ * function value as X and the actual CDF as Y. The last entry's function
+ * value is the CDF total. */
+ int res_x = kernel_data.background.map_res_x;
+ int res_y = kernel_data.background.map_res_y;
+ int cdf_width = res_x + 1;
+
+ /* this is basically std::lower_bound as used by pbrt */
+ int first = 0;
+ int count = res_y;
+
+ while (count > 0) {
+ int step = count >> 1;
+ int middle = first + step;
+
+ if (kernel_tex_fetch(__light_background_marginal_cdf, middle).y < randv) {
+ first = middle + 1;
+ count -= step + 1;
+ }
+ else
+ count = step;
+ }
+
+ int index_v = max(0, first - 1);
+ kernel_assert(index_v >= 0 && index_v < res_y);
+
+ float2 cdf_v = kernel_tex_fetch(__light_background_marginal_cdf, index_v);
+ float2 cdf_next_v = kernel_tex_fetch(__light_background_marginal_cdf, index_v + 1);
+ float2 cdf_last_v = kernel_tex_fetch(__light_background_marginal_cdf, res_y);
+
+ /* importance-sampled V direction */
+ float dv = inverse_lerp(cdf_v.y, cdf_next_v.y, randv);
+ float v = (index_v + dv) / res_y;
+
+ /* this is basically std::lower_bound as used by pbrt */
+ first = 0;
+ count = res_x;
+ while (count > 0) {
+ int step = count >> 1;
+ int middle = first + step;
+
+ if (kernel_tex_fetch(__light_background_conditional_cdf, index_v * cdf_width + middle).y <
+ randu) {
+ first = middle + 1;
+ count -= step + 1;
+ }
+ else
+ count = step;
+ }
+
+ int index_u = max(0, first - 1);
+ kernel_assert(index_u >= 0 && index_u < res_x);
+
+ float2 cdf_u = kernel_tex_fetch(__light_background_conditional_cdf,
+ index_v * cdf_width + index_u);
+ float2 cdf_next_u = kernel_tex_fetch(__light_background_conditional_cdf,
+ index_v * cdf_width + index_u + 1);
+ float2 cdf_last_u = kernel_tex_fetch(__light_background_conditional_cdf,
+ index_v * cdf_width + res_x);
+
+ /* importance-sampled U direction */
+ float du = inverse_lerp(cdf_u.y, cdf_next_u.y, randu);
+ float u = (index_u + du) / res_x;
+
+ /* compute pdf */
+ float sin_theta = sinf(M_PI_F * v);
+ float denom = (M_2PI_F * M_PI_F * sin_theta) * cdf_last_u.x * cdf_last_v.x;
+
+ if (sin_theta == 0.0f || denom == 0.0f)
+ *pdf = 0.0f;
+ else
+ *pdf = (cdf_u.x * cdf_v.x) / denom;
+
+ /* compute direction */
+ return equirectangular_to_direction(u, v);
+}
+
+/* TODO(sergey): Same as above, after the release we should consider using
+ * 'noinline' for all devices.
+ */
+ccl_device float background_map_pdf(KernelGlobals *kg, float3 direction)
+{
+ float2 uv = direction_to_equirectangular(direction);
+ int res_x = kernel_data.background.map_res_x;
+ int res_y = kernel_data.background.map_res_y;
+ int cdf_width = res_x + 1;
+
+ float sin_theta = sinf(uv.y * M_PI_F);
+
+ if (sin_theta == 0.0f)
+ return 0.0f;
+
+ int index_u = clamp(float_to_int(uv.x * res_x), 0, res_x - 1);
+ int index_v = clamp(float_to_int(uv.y * res_y), 0, res_y - 1);
+
+ /* pdfs in V direction */
+ float2 cdf_last_u = kernel_tex_fetch(__light_background_conditional_cdf,
+ index_v * cdf_width + res_x);
+ float2 cdf_last_v = kernel_tex_fetch(__light_background_marginal_cdf, res_y);
+
+ float denom = (M_2PI_F * M_PI_F * sin_theta) * cdf_last_u.x * cdf_last_v.x;
+
+ if (denom == 0.0f)
+ return 0.0f;
+
+ /* pdfs in U direction */
+ float2 cdf_u = kernel_tex_fetch(__light_background_conditional_cdf,
+ index_v * cdf_width + index_u);
+ float2 cdf_v = kernel_tex_fetch(__light_background_marginal_cdf, index_v);
+
+ return (cdf_u.x * cdf_v.x) / denom;
+}
+
+ccl_device_inline bool background_portal_data_fetch_and_check_side(
+ KernelGlobals *kg, float3 P, int index, float3 *lightpos, float3 *dir)
+{
+ int portal = kernel_data.background.portal_offset + index;
+ const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal);
+
+ *lightpos = make_float3(klight->co[0], klight->co[1], klight->co[2]);
+ *dir = make_float3(klight->area.dir[0], klight->area.dir[1], klight->area.dir[2]);
+
+ /* Check whether portal is on the right side. */
+ if (dot(*dir, P - *lightpos) > 1e-4f)
+ return true;
+
+ return false;
+}
+
+ccl_device_inline float background_portal_pdf(
+ KernelGlobals *kg, float3 P, float3 direction, int ignore_portal, bool *is_possible)
+{
+ float portal_pdf = 0.0f;
+
+ int num_possible = 0;
+ for (int p = 0; p < kernel_data.background.num_portals; p++) {
+ if (p == ignore_portal)
+ continue;
+
+ float3 lightpos, dir;
+ if (!background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir))
+ continue;
+
+ /* There's a portal that could be sampled from this position. */
+ if (is_possible) {
+ *is_possible = true;
+ }
+ num_possible++;
+
+ int portal = kernel_data.background.portal_offset + p;
+ const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal);
+ float3 axisu = make_float3(
+ klight->area.axisu[0], klight->area.axisu[1], klight->area.axisu[2]);
+ float3 axisv = make_float3(
+ klight->area.axisv[0], klight->area.axisv[1], klight->area.axisv[2]);
+ bool is_round = (klight->area.invarea < 0.0f);
+
+ if (!ray_quad_intersect(P,
+ direction,
+ 1e-4f,
+ FLT_MAX,
+ lightpos,
+ axisu,
+ axisv,
+ dir,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ is_round))
+ continue;
+
+ if (is_round) {
+ float t;
+ float3 D = normalize_len(lightpos - P, &t);
+ portal_pdf += fabsf(klight->area.invarea) * lamp_light_pdf(kg, dir, -D, t);
+ }
+ else {
+ portal_pdf += rect_light_sample(P, &lightpos, axisu, axisv, 0.0f, 0.0f, false);
+ }
+ }
+
+ if (ignore_portal >= 0) {
+ /* We have skipped a portal that could be sampled as well. */
+ num_possible++;
+ }
+
+ return (num_possible > 0) ? portal_pdf / num_possible : 0.0f;
+}
+
+ccl_device int background_num_possible_portals(KernelGlobals *kg, float3 P)
+{
+ int num_possible_portals = 0;
+ for (int p = 0; p < kernel_data.background.num_portals; p++) {
+ float3 lightpos, dir;
+ if (background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir))
+ num_possible_portals++;
+ }
+ return num_possible_portals;
+}
+
+ccl_device float3 background_portal_sample(KernelGlobals *kg,
+ float3 P,
+ float randu,
+ float randv,
+ int num_possible,
+ int *sampled_portal,
+ float *pdf)
+{
+ /* Pick a portal, then re-normalize randv. */
+ randv *= num_possible;
+ int portal = (int)randv;
+ randv -= portal;
+
+ /* TODO(sergey): Some smarter way of finding portal to sample
+ * is welcome.
+ */
+ for (int p = 0; p < kernel_data.background.num_portals; p++) {
+ /* Search for the sampled portal. */
+ float3 lightpos, dir;
+ if (!background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir))
+ continue;
+
+ if (portal == 0) {
+ /* p is the portal to be sampled. */
+ int portal = kernel_data.background.portal_offset + p;
+ const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal);
+ float3 axisu = make_float3(
+ klight->area.axisu[0], klight->area.axisu[1], klight->area.axisu[2]);
+ float3 axisv = make_float3(
+ klight->area.axisv[0], klight->area.axisv[1], klight->area.axisv[2]);
+ bool is_round = (klight->area.invarea < 0.0f);
+
+ float3 D;
+ if (is_round) {
+ lightpos += ellipse_sample(axisu * 0.5f, axisv * 0.5f, randu, randv);
+ float t;
+ D = normalize_len(lightpos - P, &t);
+ *pdf = fabsf(klight->area.invarea) * lamp_light_pdf(kg, dir, -D, t);
+ }
+ else {
+ *pdf = rect_light_sample(P, &lightpos, axisu, axisv, randu, randv, true);
+ D = normalize(lightpos - P);
+ }
+
+ *pdf /= num_possible;
+ *sampled_portal = p;
+ return D;
+ }
+
+ portal--;
+ }
+
+ return make_float3(0.0f, 0.0f, 0.0f);
+}
+
+ccl_device_inline float3 background_sun_sample(KernelGlobals *kg,
+ float randu,
+ float randv,
+ float *pdf)
+{
+ float3 D;
+ const float3 N = float4_to_float3(kernel_data.background.sun);
+ const float angle = kernel_data.background.sun.w;
+ sample_uniform_cone(N, angle, randu, randv, &D, pdf);
+ return D;
+}
+
+ccl_device_inline float background_sun_pdf(KernelGlobals *kg, float3 D)
+{
+ const float3 N = float4_to_float3(kernel_data.background.sun);
+ const float angle = kernel_data.background.sun.w;
+ return pdf_uniform_cone(N, D, angle);
+}
+
+ccl_device_inline float3
+background_light_sample(KernelGlobals *kg, float3 P, float randu, float randv, float *pdf)
+{
+ float portal_method_pdf = kernel_data.background.portal_weight;
+ float sun_method_pdf = kernel_data.background.sun_weight;
+ float map_method_pdf = kernel_data.background.map_weight;
+
+ int num_portals = 0;
+ if (portal_method_pdf > 0.0f) {
+ /* Check if there are portals in the scene which we can sample. */
+ num_portals = background_num_possible_portals(kg, P);
+ if (num_portals == 0) {
+ portal_method_pdf = 0.0f;
+ }
+ }
+
+ float pdf_fac = (portal_method_pdf + sun_method_pdf + map_method_pdf);
+ if (pdf_fac == 0.0f) {
+ /* Use uniform as a fallback if we can't use any strategy. */
+ *pdf = 1.0f / M_4PI_F;
+ return sample_uniform_sphere(randu, randv);
+ }
+
+ pdf_fac = 1.0f / pdf_fac;
+ portal_method_pdf *= pdf_fac;
+ sun_method_pdf *= pdf_fac;
+ map_method_pdf *= pdf_fac;
+
+ /* We have 100% in total and split it between the three categories.
+ * Therefore, we pick portals if randu is between 0 and portal_method_pdf,
+ * sun if randu is between portal_method_pdf and (portal_method_pdf + sun_method_pdf)
+ * and map if randu is between (portal_method_pdf + sun_method_pdf) and 1. */
+ float sun_method_cdf = portal_method_pdf + sun_method_pdf;
+
+ int method = 0;
+ float3 D;
+ if (randu < portal_method_pdf) {
+ method = 0;
+ /* Rescale randu. */
+ if (portal_method_pdf != 1.0f) {
+ randu /= portal_method_pdf;
+ }
+
+ /* Sample a portal. */
+ int portal;
+ D = background_portal_sample(kg, P, randu, randv, num_portals, &portal, pdf);
+ if (num_portals > 1) {
+ /* Ignore the chosen portal, its pdf is already included. */
+ *pdf += background_portal_pdf(kg, P, D, portal, NULL);
+ }
+
+ /* Skip MIS if this is the only method. */
+ if (portal_method_pdf == 1.0f) {
+ return D;
+ }
+ *pdf *= portal_method_pdf;
+ }
+ else if (randu < sun_method_cdf) {
+ method = 1;
+ /* Rescale randu. */
+ if (sun_method_pdf != 1.0f) {
+ randu = (randu - portal_method_pdf) / sun_method_pdf;
+ }
+
+ D = background_sun_sample(kg, randu, randv, pdf);
+
+ /* Skip MIS if this is the only method. */
+ if (sun_method_pdf == 1.0f) {
+ return D;
+ }
+ *pdf *= sun_method_pdf;
+ }
+ else {
+ method = 2;
+ /* Rescale randu. */
+ if (map_method_pdf != 1.0f) {
+ randu = (randu - sun_method_cdf) / map_method_pdf;
+ }
+
+ D = background_map_sample(kg, randu, randv, pdf);
+
+ /* Skip MIS if this is the only method. */
+ if (map_method_pdf == 1.0f) {
+ return D;
+ }
+ *pdf *= map_method_pdf;
+ }
+
+ /* MIS weighting. */
+ if (method != 0 && portal_method_pdf != 0.0f) {
+ *pdf += portal_method_pdf * background_portal_pdf(kg, P, D, -1, NULL);
+ }
+ if (method != 1 && sun_method_pdf != 0.0f) {
+ *pdf += sun_method_pdf * background_sun_pdf(kg, D);
+ }
+ if (method != 2 && map_method_pdf != 0.0f) {
+ *pdf += map_method_pdf * background_map_pdf(kg, D);
+ }
+ return D;
+}
+
+ccl_device float background_light_pdf(KernelGlobals *kg, float3 P, float3 direction)
+{
+ float portal_method_pdf = kernel_data.background.portal_weight;
+ float sun_method_pdf = kernel_data.background.sun_weight;
+ float map_method_pdf = kernel_data.background.map_weight;
+
+ float portal_pdf = 0.0f;
+ /* Portals are a special case here since we need to compute their pdf in order
+ * to find out if we can sample them. */
+ if (portal_method_pdf > 0.0f) {
+ /* Evaluate PDF of sampling this direction by portal sampling. */
+ bool is_possible = false;
+ portal_pdf = background_portal_pdf(kg, P, direction, -1, &is_possible);
+ if (!is_possible) {
+ /* Portal sampling is not possible here because all portals point to the wrong side.
+ * If other methods can be used instead, do so, otherwise uniform sampling is used as a
+ * fallback. */
+ portal_method_pdf = 0.0f;
+ }
+ }
+
+ float pdf_fac = (portal_method_pdf + sun_method_pdf + map_method_pdf);
+ if (pdf_fac == 0.0f) {
+ /* Use uniform as a fallback if we can't use any strategy. */
+ return kernel_data.integrator.pdf_lights / M_4PI_F;
+ }
+
+ pdf_fac = 1.0f / pdf_fac;
+ portal_method_pdf *= pdf_fac;
+ sun_method_pdf *= pdf_fac;
+ map_method_pdf *= pdf_fac;
+
+ float pdf = portal_pdf * portal_method_pdf;
+ if (sun_method_pdf != 0.0f) {
+ pdf += background_sun_pdf(kg, direction) * sun_method_pdf;
+ }
+ if (map_method_pdf != 0.0f) {
+ pdf += background_map_pdf(kg, direction) * map_method_pdf;
+ }
+
+ return pdf * kernel_data.integrator.pdf_lights;
+}
+
+#endif
+
+CCL_NAMESPACE_END \ No newline at end of file
diff --git a/intern/cycles/kernel/kernel_light_common.h b/intern/cycles/kernel/kernel_light_common.h
new file mode 100644
index 00000000000..39503a4b479
--- /dev/null
+++ b/intern/cycles/kernel/kernel_light_common.h
@@ -0,0 +1,159 @@
+/*
+ * Copyright 2011-2020 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+/* Area light sampling */
+
+/* Uses the following paper:
+ *
+ * Carlos Urena et al.
+ * An Area-Preserving Parametrization for Spherical Rectangles.
+ *
+ * https://www.solidangle.com/research/egsr2013_spherical_rectangle.pdf
+ *
+ * Note: light_p is modified when sample_coord is true.
+ */
+ccl_device_inline float rect_light_sample(float3 P,
+ float3 *light_p,
+ float3 axisu,
+ float3 axisv,
+ float randu,
+ float randv,
+ bool sample_coord)
+{
+ /* In our name system we're using P for the center,
+ * which is o in the paper.
+ */
+
+ float3 corner = *light_p - axisu * 0.5f - axisv * 0.5f;
+ float axisu_len, axisv_len;
+ /* Compute local reference system R. */
+ float3 x = normalize_len(axisu, &axisu_len);
+ float3 y = normalize_len(axisv, &axisv_len);
+ float3 z = cross(x, y);
+ /* Compute rectangle coords in local reference system. */
+ float3 dir = corner - P;
+ float z0 = dot(dir, z);
+ /* Flip 'z' to make it point against Q. */
+ if (z0 > 0.0f) {
+ z *= -1.0f;
+ z0 *= -1.0f;
+ }
+ float x0 = dot(dir, x);
+ float y0 = dot(dir, y);
+ float x1 = x0 + axisu_len;
+ float y1 = y0 + axisv_len;
+ /* Compute internal angles (gamma_i). */
+ float4 diff = make_float4(x0, y1, x1, y0) - make_float4(x1, y0, x0, y1);
+ float4 nz = make_float4(y0, x1, y1, x0) * diff;
+ nz = nz / sqrt(z0 * z0 * diff * diff + nz * nz);
+ float g0 = safe_acosf(-nz.x * nz.y);
+ float g1 = safe_acosf(-nz.y * nz.z);
+ float g2 = safe_acosf(-nz.z * nz.w);
+ float g3 = safe_acosf(-nz.w * nz.x);
+ /* Compute predefined constants. */
+ float b0 = nz.x;
+ float b1 = nz.z;
+ float b0sq = b0 * b0;
+ float k = M_2PI_F - g2 - g3;
+ /* Compute solid angle from internal angles. */
+ float S = g0 + g1 - k;
+
+ if (sample_coord) {
+ /* Compute cu. */
+ float au = randu * S + k;
+ float fu = (cosf(au) * b0 - b1) / sinf(au);
+ float cu = 1.0f / sqrtf(fu * fu + b0sq) * (fu > 0.0f ? 1.0f : -1.0f);
+ cu = clamp(cu, -1.0f, 1.0f);
+ /* Compute xu. */
+ float xu = -(cu * z0) / max(sqrtf(1.0f - cu * cu), 1e-7f);
+ xu = clamp(xu, x0, x1);
+ /* Compute yv. */
+ float z0sq = z0 * z0;
+ float y0sq = y0 * y0;
+ float y1sq = y1 * y1;
+ float d = sqrtf(xu * xu + z0sq);
+ float h0 = y0 / sqrtf(d * d + y0sq);
+ float h1 = y1 / sqrtf(d * d + y1sq);
+ float hv = h0 + randv * (h1 - h0), hv2 = hv * hv;
+ float yv = (hv2 < 1.0f - 1e-6f) ? (hv * d) / sqrtf(1.0f - hv2) : y1;
+
+ /* Transform (xu, yv, z0) to world coords. */
+ *light_p = P + xu * x + yv * y + z0 * z;
+ }
+
+ /* return pdf */
+ if (S != 0.0f)
+ return 1.0f / S;
+ else
+ return 0.0f;
+}
+
+ccl_device_inline float3 ellipse_sample(float3 ru, float3 rv, float randu, float randv)
+{
+ to_unit_disk(&randu, &randv);
+ return ru * randu + rv * randv;
+}
+
+ccl_device float3 disk_light_sample(float3 v, float randu, float randv)
+{
+ float3 ru, rv;
+
+ make_orthonormals(v, &ru, &rv);
+
+ return ellipse_sample(ru, rv, randu, randv);
+}
+
+ccl_device float3 distant_light_sample(float3 D, float radius, float randu, float randv)
+{
+ return normalize(D + disk_light_sample(D, randu, randv) * radius);
+}
+
+ccl_device float3
+sphere_light_sample(float3 P, float3 center, float radius, float randu, float randv)
+{
+ return disk_light_sample(normalize(P - center), randu, randv) * radius;
+}
+
+ccl_device float spot_light_attenuation(float3 dir, float spot_angle, float spot_smooth, float3 N)
+{
+ float attenuation = dot(dir, N);
+
+ if (attenuation <= spot_angle) {
+ attenuation = 0.0f;
+ }
+ else {
+ float t = attenuation - spot_angle;
+
+ if (t < spot_smooth && spot_smooth != 0.0f)
+ attenuation *= smoothstepf(t / spot_smooth);
+ }
+
+ return attenuation;
+}
+
+ccl_device float lamp_light_pdf(KernelGlobals *kg, const float3 Ng, const float3 I, float t)
+{
+ float cos_pi = dot(Ng, I);
+
+ if (cos_pi <= 0.0f)
+ return 0.0f;
+
+ return t * t / cos_pi;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_montecarlo.h b/intern/cycles/kernel/kernel_montecarlo.h
index a933be970c2..0edcc1a5a14 100644
--- a/intern/cycles/kernel/kernel_montecarlo.h
+++ b/intern/cycles/kernel/kernel_montecarlo.h
@@ -85,8 +85,9 @@ ccl_device_inline void sample_uniform_hemisphere(
ccl_device_inline void sample_uniform_cone(
const float3 N, float angle, float randu, float randv, float3 *omega_in, float *pdf)
{
- float z = cosf(angle * randu);
- float r = sqrtf(max(0.0f, 1.0f - z * z));
+ float zMin = cosf(angle);
+ float z = zMin - zMin * randu + randu;
+ float r = safe_sqrtf(1.0f - sqr(z));
float phi = M_2PI_F * randv;
float x = r * cosf(phi);
float y = r * sinf(phi);
@@ -94,7 +95,17 @@ ccl_device_inline void sample_uniform_cone(
float3 T, B;
make_orthonormals(N, &T, &B);
*omega_in = x * T + y * B + z * N;
- *pdf = 0.5f * M_1_PI_F / (1.0f - cosf(angle));
+ *pdf = M_1_2PI_F / (1.0f - zMin);
+}
+
+ccl_device_inline float pdf_uniform_cone(const float3 N, float3 D, float angle)
+{
+ float zMin = cosf(angle);
+ float z = dot(N, D);
+ if (z > zMin) {
+ return M_1_2PI_F / (1.0f - zMin);
+ }
+ return 0.0f;
}
/* sample uniform point on the surface of a sphere */
@@ -199,21 +210,27 @@ ccl_device float3 ensure_valid_reflection(float3 Ng, float3 I, float3 N)
float NdotNg = dot(N, Ng);
float3 X = normalize(N - NdotNg * Ng);
+ /* Keep math expressions. */
+ /* clang-format off */
/* Calculate N.z and N.x in the local coordinate system.
*
* The goal of this computation is to find a N' that is rotated towards Ng just enough
* to lift R' above the threshold (here called t), therefore dot(R', Ng) = t.
*
- * According to the standard reflection equation, this means that we want dot(2*dot(N', I)*N' - I, Ng) = t.
+ * According to the standard reflection equation,
+ * this means that we want dot(2*dot(N', I)*N' - I, Ng) = t.
*
- * Since the Z axis of our local coordinate system is Ng, dot(x, Ng) is just x.z, so we get 2*dot(N', I)*N'.z - I.z = t.
+ * Since the Z axis of our local coordinate system is Ng, dot(x, Ng) is just x.z, so we get
+ * 2*dot(N', I)*N'.z - I.z = t.
*
- * The rotation is simple to express in the coordinate system we formed - since N lies in the X-Z-plane, we know that
- * N' will also lie in the X-Z-plane, so N'.y = 0 and therefore dot(N', I) = N'.x*I.x + N'.z*I.z .
+ * The rotation is simple to express in the coordinate system we formed -
+ * since N lies in the X-Z-plane, we know that N' will also lie in the X-Z-plane,
+ * so N'.y = 0 and therefore dot(N', I) = N'.x*I.x + N'.z*I.z .
*
* Furthermore, we want N' to be normalized, so N'.x = sqrt(1 - N'.z^2).
*
- * With these simplifications, we get the final equation 2*(sqrt(1 - N'.z^2)*I.x + N'.z*I.z)*N'.z - I.z = t.
+ * With these simplifications,
+ * we get the final equation 2*(sqrt(1 - N'.z^2)*I.x + N'.z*I.z)*N'.z - I.z = t.
*
* The only unknown here is N'.z, so we can solve for that.
*
@@ -227,8 +244,11 @@ ccl_device float3 ensure_valid_reflection(float3 Ng, float3 I, float3 N)
* c = I.z*t + a
* N'.z = +-sqrt(0.5*(+-b + c)/a)
*
- * Two solutions can immediately be discarded because they're negative so N' would lie in the lower hemisphere.
+ * Two solutions can immediately be discarded because they're negative so N' would lie in the
+ * lower hemisphere.
*/
+ /* clang-format on */
+
float Ix = dot(I, X), Iz = dot(I, Ng);
float Ix2 = sqr(Ix), Iz2 = sqr(Iz);
float a = Ix2 + Iz2;
@@ -237,8 +257,9 @@ ccl_device float3 ensure_valid_reflection(float3 Ng, float3 I, float3 N)
float c = Iz * threshold + a;
/* Evaluate both solutions.
- * In many cases one can be immediately discarded (if N'.z would be imaginary or larger than one), so check for that first.
- * If no option is viable (might happen in extreme cases like N being in the wrong hemisphere), give up and return Ng. */
+ * In many cases one can be immediately discarded (if N'.z would be imaginary or larger than
+ * one), so check for that first. If no option is viable (might happen in extreme cases like N
+ * being in the wrong hemisphere), give up and return Ng. */
float fac = 0.5f / a;
float N1_z2 = fac * (b + c), N2_z2 = fac * (-b + c);
bool valid1 = (N1_z2 > 1e-5f) && (N1_z2 <= (1.0f + 1e-5f));
@@ -256,8 +277,9 @@ ccl_device float3 ensure_valid_reflection(float3 Ng, float3 I, float3 N)
valid1 = (R1 >= 1e-5f);
valid2 = (R2 >= 1e-5f);
if (valid1 && valid2) {
- /* If both solutions are valid, return the one with the shallower reflection since it will be closer to the input
- * (if the original reflection wasn't shallow, we would not be in this part of the function). */
+ /* If both solutions are valid, return the one with the shallower reflection since it will be
+ * closer to the input (if the original reflection wasn't shallow, we would not be in this
+ * part of the function). */
N_new = (R1 < R2) ? N1 : N2;
}
else {
diff --git a/intern/cycles/kernel/kernel_passes.h b/intern/cycles/kernel/kernel_passes.h
index 462ec037ee7..753cf4561b2 100644
--- a/intern/cycles/kernel/kernel_passes.h
+++ b/intern/cycles/kernel/kernel_passes.h
@@ -14,84 +14,11 @@
* limitations under the License.
*/
-#if defined(__SPLIT_KERNEL__) || defined(__KERNEL_CUDA__)
-# define __ATOMIC_PASS_WRITE__
-#endif
-
#include "kernel/kernel_id_passes.h"
CCL_NAMESPACE_BEGIN
-ccl_device_inline void kernel_write_pass_float(ccl_global float *buffer, float value)
-{
- ccl_global float *buf = buffer;
-#ifdef __ATOMIC_PASS_WRITE__
- atomic_add_and_fetch_float(buf, value);
-#else
- *buf += value;
-#endif
-}
-
-ccl_device_inline void kernel_write_pass_float3(ccl_global float *buffer, float3 value)
-{
-#ifdef __ATOMIC_PASS_WRITE__
- ccl_global float *buf_x = buffer + 0;
- ccl_global float *buf_y = buffer + 1;
- ccl_global float *buf_z = buffer + 2;
-
- atomic_add_and_fetch_float(buf_x, value.x);
- atomic_add_and_fetch_float(buf_y, value.y);
- atomic_add_and_fetch_float(buf_z, value.z);
-#else
- ccl_global float3 *buf = (ccl_global float3 *)buffer;
- *buf += value;
-#endif
-}
-
-ccl_device_inline void kernel_write_pass_float4(ccl_global float *buffer, float4 value)
-{
-#ifdef __ATOMIC_PASS_WRITE__
- ccl_global float *buf_x = buffer + 0;
- ccl_global float *buf_y = buffer + 1;
- ccl_global float *buf_z = buffer + 2;
- ccl_global float *buf_w = buffer + 3;
-
- atomic_add_and_fetch_float(buf_x, value.x);
- atomic_add_and_fetch_float(buf_y, value.y);
- atomic_add_and_fetch_float(buf_z, value.z);
- atomic_add_and_fetch_float(buf_w, value.w);
-#else
- ccl_global float4 *buf = (ccl_global float4 *)buffer;
- *buf += value;
-#endif
-}
-
#ifdef __DENOISING_FEATURES__
-ccl_device_inline void kernel_write_pass_float_variance(ccl_global float *buffer, float value)
-{
- kernel_write_pass_float(buffer, value);
-
- /* The online one-pass variance update that's used for the megakernel can't easily be implemented
- * with atomics, so for the split kernel the E[x^2] - 1/N * (E[x])^2 fallback is used. */
- kernel_write_pass_float(buffer + 1, value * value);
-}
-
-# ifdef __ATOMIC_PASS_WRITE__
-# define kernel_write_pass_float3_unaligned kernel_write_pass_float3
-# else
-ccl_device_inline void kernel_write_pass_float3_unaligned(ccl_global float *buffer, float3 value)
-{
- buffer[0] += value.x;
- buffer[1] += value.y;
- buffer[2] += value.z;
-}
-# endif
-
-ccl_device_inline void kernel_write_pass_float3_variance(ccl_global float *buffer, float3 value)
-{
- kernel_write_pass_float3_unaligned(buffer, value);
- kernel_write_pass_float3_unaligned(buffer + 3, value * value);
-}
ccl_device_inline void kernel_write_denoising_shadow(KernelGlobals *kg,
ccl_global float *buffer,
@@ -102,7 +29,9 @@ ccl_device_inline void kernel_write_denoising_shadow(KernelGlobals *kg,
if (kernel_data.film.pass_denoising_data == 0)
return;
- buffer += (sample & 1) ? DENOISING_PASS_SHADOW_B : DENOISING_PASS_SHADOW_A;
+ buffer += sample_is_even(kernel_data.integrator.sampling_pattern, sample) ?
+ DENOISING_PASS_SHADOW_B :
+ DENOISING_PASS_SHADOW_A;
path_total = ensure_finite(path_total);
path_total_shaded = ensure_finite(path_total_shaded);
@@ -113,14 +42,12 @@ ccl_device_inline void kernel_write_denoising_shadow(KernelGlobals *kg,
float value = path_total_shaded / max(path_total, 1e-7f);
kernel_write_pass_float(buffer + 2, value * value);
}
-#endif /* __DENOISING_FEATURES__ */
ccl_device_inline void kernel_update_denoising_features(KernelGlobals *kg,
ShaderData *sd,
ccl_addr_space PathState *state,
PathRadiance *L)
{
-#ifdef __DENOISING_FEATURES__
if (state->denoising_feature_weight == 0.0f) {
return;
}
@@ -133,7 +60,8 @@ ccl_device_inline void kernel_update_denoising_features(KernelGlobals *kg,
}
float3 normal = make_float3(0.0f, 0.0f, 0.0f);
- float3 albedo = make_float3(0.0f, 0.0f, 0.0f);
+ float3 diffuse_albedo = make_float3(0.0f, 0.0f, 0.0f);
+ float3 specular_albedo = make_float3(0.0f, 0.0f, 0.0f);
float sum_weight = 0.0f, sum_nonspecular_weight = 0.0f;
for (int i = 0; i < sd->num_closure; i++) {
@@ -145,10 +73,31 @@ ccl_device_inline void kernel_update_denoising_features(KernelGlobals *kg,
/* All closures contribute to the normal feature, but only diffuse-like ones to the albedo. */
normal += sc->N * sc->sample_weight;
sum_weight += sc->sample_weight;
+
+ float3 closure_albedo = sc->weight;
+ /* Closures that include a Fresnel term typically have weights close to 1 even though their
+ * actual contribution is significantly lower.
+ * To account for this, we scale their weight by the average fresnel factor (the same is also
+ * done for the sample weight in the BSDF setup, so we don't need to scale that here). */
+ if (CLOSURE_IS_BSDF_MICROFACET_FRESNEL(sc->type)) {
+ MicrofacetBsdf *bsdf = (MicrofacetBsdf *)sc;
+ closure_albedo *= bsdf->extra->fresnel_color;
+ }
+ else if (sc->type == CLOSURE_BSDF_PRINCIPLED_SHEEN_ID) {
+ PrincipledSheenBsdf *bsdf = (PrincipledSheenBsdf *)sc;
+ closure_albedo *= bsdf->avg_value;
+ }
+ else if (sc->type == CLOSURE_BSDF_HAIR_PRINCIPLED_ID) {
+ closure_albedo *= bsdf_principled_hair_albedo(sc);
+ }
+
if (bsdf_get_specular_roughness_squared(sc) > sqr(0.075f)) {
- albedo += sc->weight;
+ diffuse_albedo += closure_albedo;
sum_nonspecular_weight += sc->sample_weight;
}
+ else {
+ specular_albedo += closure_albedo;
+ }
}
/* Wait for next bounce if 75% or more sample weight belongs to specular-like closures. */
@@ -156,18 +105,22 @@ ccl_device_inline void kernel_update_denoising_features(KernelGlobals *kg,
if (sum_weight != 0.0f) {
normal /= sum_weight;
}
+
+ /* Transform normal into camera space. */
+ const Transform worldtocamera = kernel_data.cam.worldtocamera;
+ normal = transform_direction(&worldtocamera, normal);
+
L->denoising_normal += ensure_finite3(state->denoising_feature_weight * normal);
- L->denoising_albedo += ensure_finite3(state->denoising_feature_weight * albedo);
+ L->denoising_albedo += ensure_finite3(state->denoising_feature_weight *
+ state->denoising_feature_throughput * diffuse_albedo);
state->denoising_feature_weight = 0.0f;
}
-#else
- (void)kg;
- (void)sd;
- (void)state;
- (void)L;
-#endif /* __DENOISING_FEATURES__ */
+ else {
+ state->denoising_feature_throughput *= specular_albedo;
+ }
}
+#endif /* __DENOISING_FEATURES__ */
#ifdef __KERNEL_DEBUG__
ccl_device_inline void kernel_write_debug_passes(KernelGlobals *kg,
@@ -241,7 +194,7 @@ ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg,
average(shader_bsdf_alpha(kg, sd)) >= kernel_data.film.pass_alpha_threshold) {
if (state->sample == 0) {
if (flag & PASSMASK(DEPTH)) {
- float depth = camera_distance(kg, sd->P);
+ float depth = camera_z_depth(kg, sd->P);
kernel_write_pass_float(buffer + kernel_data.film.pass_depth, depth);
}
if (flag & PASSMASK(OBJECT_ID)) {
@@ -301,8 +254,6 @@ ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg,
L->color_glossy += shader_bsdf_glossy(kg, sd) * throughput;
if (light_flag & PASSMASK_COMPONENT(TRANSMISSION))
L->color_transmission += shader_bsdf_transmission(kg, sd) * throughput;
- if (light_flag & PASSMASK_COMPONENT(SUBSURFACE))
- L->color_subsurface += shader_bsdf_subsurface(kg, sd) * throughput;
if (light_flag & PASSMASK(MIST)) {
/* bring depth into 0..1 range */
@@ -348,11 +299,8 @@ ccl_device_inline void kernel_write_light_passes(KernelGlobals *kg,
if (light_flag & PASSMASK(TRANSMISSION_INDIRECT))
kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_indirect,
L->indirect_transmission);
- if (light_flag & PASSMASK(SUBSURFACE_INDIRECT))
- kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_indirect,
- L->indirect_subsurface);
if (light_flag & PASSMASK(VOLUME_INDIRECT))
- kernel_write_pass_float3(buffer + kernel_data.film.pass_volume_indirect, L->indirect_scatter);
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_volume_indirect, L->indirect_volume);
if (light_flag & PASSMASK(DIFFUSE_DIRECT))
kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_direct, L->direct_diffuse);
if (light_flag & PASSMASK(GLOSSY_DIRECT))
@@ -360,11 +308,8 @@ ccl_device_inline void kernel_write_light_passes(KernelGlobals *kg,
if (light_flag & PASSMASK(TRANSMISSION_DIRECT))
kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_direct,
L->direct_transmission);
- if (light_flag & PASSMASK(SUBSURFACE_DIRECT))
- kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_direct,
- L->direct_subsurface);
if (light_flag & PASSMASK(VOLUME_DIRECT))
- kernel_write_pass_float3(buffer + kernel_data.film.pass_volume_direct, L->direct_scatter);
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_volume_direct, L->direct_volume);
if (light_flag & PASSMASK(EMISSION))
kernel_write_pass_float3(buffer + kernel_data.film.pass_emission, L->emission);
@@ -380,8 +325,6 @@ ccl_device_inline void kernel_write_light_passes(KernelGlobals *kg,
if (light_flag & PASSMASK(TRANSMISSION_COLOR))
kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_color,
L->color_transmission);
- if (light_flag & PASSMASK(SUBSURFACE_COLOR))
- kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_color, L->color_subsurface);
if (light_flag & PASSMASK(SHADOW)) {
float4 shadow = L->shadow;
shadow.w = kernel_data.film.pass_shadow_scale;
@@ -403,7 +346,9 @@ ccl_device_inline void kernel_write_result(KernelGlobals *kg,
float alpha;
float3 L_sum = path_radiance_clamp_and_sum(kg, L, &alpha);
- kernel_write_pass_float4(buffer, make_float4(L_sum.x, L_sum.y, L_sum.z, alpha));
+ if (kernel_data.film.pass_flag & PASSMASK(COMBINED)) {
+ kernel_write_pass_float4(buffer, make_float4(L_sum.x, L_sum.y, L_sum.z, alpha));
+ }
kernel_write_light_passes(kg, buffer, L);
@@ -446,6 +391,45 @@ ccl_device_inline void kernel_write_result(KernelGlobals *kg,
#ifdef __KERNEL_DEBUG__
kernel_write_debug_passes(kg, buffer, L);
#endif
+
+ /* Adaptive Sampling. Fill the additional buffer with the odd samples and calculate our stopping
+ criteria. This is the heuristic from "A hierarchical automatic stopping condition for Monte
+ Carlo global illumination" except that here it is applied per pixel and not in hierarchical
+ tiles. */
+ if (kernel_data.film.pass_adaptive_aux_buffer &&
+ kernel_data.integrator.adaptive_threshold > 0.0f) {
+ if (sample_is_even(kernel_data.integrator.sampling_pattern, sample)) {
+ kernel_write_pass_float4(buffer + kernel_data.film.pass_adaptive_aux_buffer,
+ make_float4(L_sum.x * 2.0f, L_sum.y * 2.0f, L_sum.z * 2.0f, 0.0f));
+ }
+#ifdef __KERNEL_CPU__
+ if ((sample > kernel_data.integrator.adaptive_min_samples) &&
+ kernel_data.integrator.adaptive_stop_per_sample) {
+ const int step = kernel_data.integrator.adaptive_step;
+
+ if ((sample & (step - 1)) == (step - 1)) {
+ kernel_do_adaptive_stopping(kg, buffer, sample);
+ }
+ }
+#endif
+ }
+
+ /* Write the sample count as negative numbers initially to mark the samples as in progress.
+ * Once the tile has finished rendering, the sign gets flipped and all the pixel values
+ * are scaled as if they were taken at a uniform sample count. */
+ if (kernel_data.film.pass_sample_count) {
+ /* Make sure it's a negative number. In progressive refine mode, this bit gets flipped between
+ * passes. */
+#ifdef __ATOMIC_PASS_WRITE__
+ atomic_fetch_and_or_uint32((ccl_global uint *)(buffer + kernel_data.film.pass_sample_count),
+ 0x80000000);
+#else
+ if (buffer[kernel_data.film.pass_sample_count] > 0) {
+ buffer[kernel_data.film.pass_sample_count] *= -1.0f;
+ }
+#endif
+ kernel_write_pass_float(buffer + kernel_data.film.pass_sample_count, -1.0f);
+ }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
index b5219a0d04a..eec3556c3a1 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -18,6 +18,7 @@
# include "kernel/osl/osl_shader.h"
#endif
+// clang-format off
#include "kernel/kernel_random.h"
#include "kernel/kernel_projection.h"
#include "kernel/kernel_montecarlo.h"
@@ -27,9 +28,11 @@
#include "kernel/geom/geom.h"
#include "kernel/bvh/bvh.h"
+#include "kernel/kernel_write_passes.h"
#include "kernel/kernel_accumulate.h"
#include "kernel/kernel_shader.h"
#include "kernel/kernel_light.h"
+#include "kernel/kernel_adaptive_sampling.h"
#include "kernel/kernel_passes.h"
#if defined(__VOLUME__) || defined(__SUBSURFACE__)
@@ -47,6 +50,7 @@
#include "kernel/kernel_path_surface.h"
#include "kernel/kernel_path_volume.h"
#include "kernel/kernel_path_subsurface.h"
+// clang-format on
CCL_NAMESPACE_BEGIN
@@ -65,25 +69,7 @@ ccl_device_forceinline bool kernel_path_scene_intersect(KernelGlobals *kg,
ray->t = kernel_data.background.ao_distance;
}
-#ifdef __HAIR__
- float difl = 0.0f, extmax = 0.0f;
- uint lcg_state = 0;
-
- if (kernel_data.bvh.have_curves) {
- if ((kernel_data.cam.resolution == 1) && (state->flag & PATH_RAY_CAMERA)) {
- float3 pixdiff = ray->dD.dx + ray->dD.dy;
- /*pixdiff = pixdiff - dot(pixdiff, ray.D)*ray.D;*/
- difl = kernel_data.curve.minimum_width * len(pixdiff) * 0.5f;
- }
-
- extmax = kernel_data.curve.maximum_width;
- lcg_state = lcg_state_init_addrspace(state, 0x51633e2d);
- }
-
- bool hit = scene_intersect(kg, *ray, visibility, isect, &lcg_state, difl, extmax);
-#else
- bool hit = scene_intersect(kg, *ray, visibility, isect, NULL, 0.0f, 0.0f);
-#endif /* __HAIR__ */
+ bool hit = scene_intersect(kg, ray, visibility, isect);
#ifdef __KERNEL_DEBUG__
if (state->flag & PATH_RAY_CAMERA) {
@@ -110,7 +96,7 @@ ccl_device_forceinline void kernel_path_lamp_emission(KernelGlobals *kg,
#ifdef __LAMP_MIS__
if (kernel_data.integrator.use_lamp_mis && !(state->flag & PATH_RAY_CAMERA)) {
/* ray starting from previous non-transparent bounce */
- Ray light_ray;
+ Ray light_ray ccl_optional_struct_init;
float3 N_pick;
if (state->ray_t == 0.0f) {
light_ray.P = emission_sd->P_pick;
@@ -141,6 +127,7 @@ ccl_device_forceinline void kernel_path_background(KernelGlobals *kg,
ccl_addr_space Ray *ray,
float3 throughput,
ShaderData *sd,
+ ccl_global float *buffer,
PathRadiance *L)
{
/* eval background shader if nothing hit */
@@ -161,8 +148,8 @@ ccl_device_forceinline void kernel_path_background(KernelGlobals *kg,
#ifdef __BACKGROUND__
/* sample background shader */
- float3 L_background = indirect_background(kg, sd, sd->N_pick, state, ray);
- path_radiance_accum_background(L, state, throughput, L_background);
+ float3 L_background = indirect_background(kg, sd, sd->N_pick, state, buffer, ray);
+ path_radiance_accum_background(kg, L, state, throughput, L_background);
#endif /* __BACKGROUND__ */
}
@@ -194,19 +181,19 @@ ccl_device_forceinline VolumeIntegrateResult kernel_path_volume(KernelGlobals *k
Ray volume_ray = *ray;
volume_ray.t = (hit) ? isect->t : FLT_MAX;
- bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
+ float step_size = volume_stack_step_size(kg, state->volume_stack);
# ifdef __VOLUME_DECOUPLED__
int sampling_method = volume_stack_sampling_method(kg, state->volume_stack);
bool direct = (state->flag & PATH_RAY_CAMERA) != 0;
- bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, direct, sampling_method);
+ bool decoupled = kernel_volume_use_decoupled(kg, step_size, direct, sampling_method);
if (decoupled) {
/* cache steps along volume for repeated sampling */
VolumeSegment volume_segment;
shader_setup_from_volume(kg, sd, &volume_ray);
- kernel_volume_decoupled_record(kg, state, &volume_ray, sd, &volume_segment, heterogeneous);
+ kernel_volume_decoupled_record(kg, state, &volume_ray, sd, &volume_segment, step_size);
kernel_update_light_picking(sd, state);
@@ -214,7 +201,7 @@ ccl_device_forceinline VolumeIntegrateResult kernel_path_volume(KernelGlobals *k
/* emission */
if (volume_segment.closure_flag & SD_EMISSION)
- path_radiance_accum_emission(L, state, *throughput, volume_segment.accum_emission);
+ path_radiance_accum_emission(kg, L, state, *throughput, volume_segment.accum_emission);
/* scattering */
VolumeIntegrateResult result = VOLUME_PATH_ATTENUATED;
@@ -254,7 +241,7 @@ ccl_device_forceinline VolumeIntegrateResult kernel_path_volume(KernelGlobals *k
{
/* integrate along volume segment with distance sampling */
VolumeIntegrateResult result = kernel_volume_integrate(
- kg, state, sd, &volume_ray, L, throughput, heterogeneous);
+ kg, state, sd, &volume_ray, L, throughput, step_size);
kernel_update_light_picking(sd, state);
@@ -297,7 +284,7 @@ ccl_device_forceinline bool kernel_path_shader_apply(KernelGlobals *kg,
float3 bg = make_float3(0.0f, 0.0f, 0.0f);
if (!kernel_data.background.transparent) {
- bg = indirect_background(kg, emission_sd, sd->N_pick, state, ray);
+ bg = indirect_background(kg, emission_sd, sd->N_pick, state, NULL, ray);
}
path_radiance_accum_shadowcatcher(L, throughput, bg);
}
@@ -312,19 +299,11 @@ ccl_device_forceinline bool kernel_path_shader_apply(KernelGlobals *kg,
#ifdef __HOLDOUT__
if (((sd->flag & SD_HOLDOUT) || (sd->object_flag & SD_OBJECT_HOLDOUT_MASK)) &&
(state->flag & PATH_RAY_TRANSPARENT_BACKGROUND)) {
+ const float3 holdout_weight = shader_holdout_apply(kg, sd);
if (kernel_data.background.transparent) {
- float3 holdout_weight;
- if (sd->object_flag & SD_OBJECT_HOLDOUT_MASK) {
- holdout_weight = make_float3(1.0f, 1.0f, 1.0f);
- }
- else {
- holdout_weight = shader_holdout_eval(kg, sd);
- }
- /* any throughput is ok, should all be identical here */
L->transparent += average(holdout_weight * throughput);
}
-
- if (sd->object_flag & SD_OBJECT_HOLDOUT_MASK) {
+ if (isequal_float3(holdout_weight, make_float3(1.0f, 1.0f, 1.0f))) {
return false;
}
}
@@ -364,7 +343,7 @@ ccl_device_forceinline bool kernel_path_shader_apply(KernelGlobals *kg,
float3 emission = indirect_primitive_emission(
kg, sd, ray_length, P_pick, N_pick, state->flag, state->ray_pdf, has_volume);
- path_radiance_accum_emission(L, state, throughput, emission);
+ path_radiance_accum_emission(kg, L, state, throughput, emission);
}
#endif /* __EMISSION__ */
@@ -375,13 +354,19 @@ ccl_device_forceinline bool kernel_path_shader_apply(KernelGlobals *kg,
return true;
}
-ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
- ShaderData *sd,
- ShaderData *emission_sd,
- PathRadiance *L,
- ccl_addr_space PathState *state,
- float3 throughput,
- float3 ao_alpha)
+#ifdef __KERNEL_OPTIX__
+ccl_device_inline /* inline trace calls */
+#else
+ccl_device_noinline
+#endif
+ void
+ kernel_path_ao(KernelGlobals *kg,
+ ShaderData *sd,
+ ShaderData *emission_sd,
+ PathRadiance *L,
+ ccl_addr_space PathState *state,
+ float3 throughput,
+ float3 ao_alpha)
{
PROFILING_INIT(kg, PROFILING_AO);
@@ -410,7 +395,7 @@ ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
light_ray.dD = differential3_zero();
if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &ao_shadow)) {
- path_radiance_accum_ao(L, state, throughput, ao_alpha, ao_bsdf, ao_shadow);
+ path_radiance_accum_ao(kg, L, state, throughput, ao_alpha, ao_bsdf, ao_shadow);
}
else {
path_radiance_accum_total_ao(L, state, throughput, ao_bsdf);
@@ -463,7 +448,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
/* Shade background. */
if (!hit) {
- kernel_path_background(kg, state, ray, throughput, sd, L);
+ kernel_path_background(kg, state, ray, throughput, sd, NULL, L);
break;
}
else if (path_state_ao_bounce(kg, state)) {
@@ -480,7 +465,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
# endif
/* Evaluate shader. */
- shader_eval_surface(kg, sd, state, state->flag);
+ shader_eval_surface(kg, sd, state, NULL, state->flag);
shader_prepare_closures(sd, state);
/* Apply shadow catcher, holdout, emission. */
@@ -490,8 +475,8 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
}
/* path termination. this is a strange place to put the termination, it's
- * mainly due to the mixed in MIS that we use. gives too many unneeded
- * shader evaluations, only need emission if we are going to terminate */
+ * mainly due to the mixed in MIS that we use. gives too many unneeded
+ * shader evaluations, only need emission if we are going to terminate */
float probability = path_state_continuation_probability(kg, state, throughput);
if (probability == 0.0f) {
@@ -508,7 +493,9 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
kernel_update_light_picking(sd, state);
+# ifdef __DENOISING_FEATURES__
kernel_update_denoising_features(kg, sd, state, L);
+# endif
# ifdef __AO__
/* ambient occlusion */
@@ -519,7 +506,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
# ifdef __SUBSURFACE__
/* bssrdf scatter to a different location on the same object, replacing
- * the closures with a diffuse BSDF */
+ * the closures with a diffuse BSDF */
if (sd->flag & SD_BSSRDF) {
if (kernel_path_subsurface_scatter(
kg, sd, emission_sd, L, state, ray, &throughput, &ss_indirect)) {
@@ -529,12 +516,10 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
# endif /* __SUBSURFACE__ */
# if defined(__EMISSION__)
- if (kernel_data.integrator.use_direct_light) {
- int all = (kernel_data.integrator.sample_all_lights_indirect) ||
- (state->flag & PATH_RAY_SHADOW_CATCHER);
- kernel_branched_path_surface_connect_light(
- kg, sd, emission_sd, state, throughput, 1.0f, L, all);
- }
+ int all = (kernel_data.integrator.sample_all_lights_indirect) ||
+ (state->flag & PATH_RAY_SHADOW_CATCHER);
+ kernel_branched_path_surface_connect_light(
+ kg, sd, emission_sd, state, throughput, 1.0f, L, all);
# endif /* defined(__EMISSION__) */
# ifdef __VOLUME__
@@ -605,7 +590,7 @@ ccl_device_forceinline void kernel_path_integrate(KernelGlobals *kg,
/* Shade background. */
if (!hit) {
- kernel_path_background(kg, state, ray, throughput, &sd, L);
+ kernel_path_background(kg, state, ray, throughput, &sd, buffer, L);
break;
}
else if (path_state_ao_bounce(kg, state)) {
@@ -622,7 +607,7 @@ ccl_device_forceinline void kernel_path_integrate(KernelGlobals *kg,
# endif
/* Evaluate shader. */
- shader_eval_surface(kg, &sd, state, state->flag);
+ shader_eval_surface(kg, &sd, state, buffer, state->flag);
shader_prepare_closures(&sd, state);
/* Apply shadow catcher, holdout, emission. */
@@ -632,8 +617,8 @@ ccl_device_forceinline void kernel_path_integrate(KernelGlobals *kg,
}
/* path termination. this is a strange place to put the termination, it's
- * mainly due to the mixed in MIS that we use. gives too many unneeded
- * shader evaluations, only need emission if we are going to terminate */
+ * mainly due to the mixed in MIS that we use. gives too many unneeded
+ * shader evaluations, only need emission if we are going to terminate */
float probability = path_state_continuation_probability(kg, state, throughput);
if (probability == 0.0f) {
@@ -649,7 +634,9 @@ ccl_device_forceinline void kernel_path_integrate(KernelGlobals *kg,
kernel_update_light_picking(&sd, state);
+# ifdef __DENOISING_FEATURES__
kernel_update_denoising_features(kg, &sd, state, L);
+# endif
# ifdef __AO__
/* ambient occlusion */
@@ -660,7 +647,7 @@ ccl_device_forceinline void kernel_path_integrate(KernelGlobals *kg,
# ifdef __SUBSURFACE__
/* bssrdf scatter to a different location on the same object, replacing
- * the closures with a diffuse BSDF */
+ * the closures with a diffuse BSDF */
if (sd.flag & SD_BSSRDF) {
if (kernel_path_subsurface_scatter(
kg, &sd, emission_sd, L, state, ray, &throughput, &ss_indirect)) {
@@ -669,8 +656,10 @@ ccl_device_forceinline void kernel_path_integrate(KernelGlobals *kg,
}
# endif /* __SUBSURFACE__ */
+# ifdef __EMISSION__
/* direct lighting */
kernel_path_surface_connect_light(kg, &sd, emission_sd, throughput, state, L);
+# endif /* __EMISSION__ */
# ifdef __VOLUME__
}
@@ -706,21 +695,31 @@ ccl_device void kernel_path_trace(
buffer += index * pass_stride;
+ if (kernel_data.film.pass_adaptive_aux_buffer) {
+ ccl_global float4 *aux = (ccl_global float4 *)(buffer +
+ kernel_data.film.pass_adaptive_aux_buffer);
+ if ((*aux).w > 0.0f) {
+ return;
+ }
+ }
+
/* Initialize random numbers and sample ray. */
uint rng_hash;
Ray ray;
kernel_path_trace_setup(kg, sample, x, y, &rng_hash, &ray);
+# ifndef __KERNEL_OPTIX__
if (ray.t == 0.0f) {
return;
}
+# endif
/* Initialize state. */
float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
PathRadiance L;
- path_radiance_init(&L, kernel_data.film.use_light_pass);
+ path_radiance_init(kg, &L);
ShaderDataTinyStorage emission_sd_storage;
ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
@@ -728,6 +727,13 @@ ccl_device void kernel_path_trace(
PathState state;
path_state_init(kg, emission_sd, &state, rng_hash, sample, &ray);
+# ifdef __KERNEL_OPTIX__
+ /* Force struct into local memory to avoid costly spilling on trace calls. */
+ if (pass_stride < 0) /* This is never executed and just prevents the compiler from doing SROA. */
+ for (int i = 0; i < sizeof(L); ++i)
+ reinterpret_cast<unsigned char *>(&L)[-pass_stride + i] = 0;
+# endif
+
/* Integrate. */
kernel_path_integrate(kg, &state, throughput, &ray, &L, buffer, emission_sd);
diff --git a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h
index 7ec535ee215..64da8c0b83a 100644
--- a/intern/cycles/kernel/kernel_path_branched.h
+++ b/intern/cycles/kernel/kernel_path_branched.h
@@ -55,7 +55,7 @@ ccl_device_inline void kernel_branched_path_ao(KernelGlobals *kg,
if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &ao_shadow)) {
path_radiance_accum_ao(
- L, state, throughput * num_samples_inv, ao_alpha, ao_bsdf, ao_shadow);
+ kg, L, state, throughput * num_samples_inv, ao_alpha, ao_bsdf, ao_shadow);
}
else {
path_radiance_accum_total_ao(L, state, throughput * num_samples_inv, ao_bsdf);
@@ -91,7 +91,7 @@ ccl_device_forceinline void kernel_branched_path_volume(KernelGlobals *kg,
Ray volume_ray = *ray;
volume_ray.t = (hit) ? isect->t : FLT_MAX;
- bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
+ float step_size = volume_stack_step_size(kg, state->volume_stack);
# ifdef __VOLUME_DECOUPLED__
/* decoupled ray marching only supported on CPU */
@@ -100,7 +100,7 @@ ccl_device_forceinline void kernel_branched_path_volume(KernelGlobals *kg,
VolumeSegment volume_segment;
shader_setup_from_volume(kg, sd, &volume_ray);
- kernel_volume_decoupled_record(kg, state, &volume_ray, sd, &volume_segment, heterogeneous);
+ kernel_volume_decoupled_record(kg, state, &volume_ray, sd, &volume_segment, step_size);
kernel_update_light_picking(sd, state);
@@ -150,7 +150,7 @@ ccl_device_forceinline void kernel_branched_path_volume(KernelGlobals *kg,
/* emission and transmittance */
if (volume_segment.closure_flag & SD_EMISSION)
- path_radiance_accum_emission(L, state, *throughput, volume_segment.accum_emission);
+ path_radiance_accum_emission(kg, L, state, *throughput, volume_segment.accum_emission);
*throughput *= volume_segment.accum_transmittance;
/* free cached steps */
@@ -175,7 +175,7 @@ ccl_device_forceinline void kernel_branched_path_volume(KernelGlobals *kg,
path_state_branch(&ps, j, num_samples);
VolumeIntegrateResult result = kernel_volume_integrate(
- kg, &ps, sd, &volume_ray, L, &tp, heterogeneous);
+ kg, &ps, sd, &volume_ray, L, &tp, step_size);
kernel_update_light_picking(sd, &ps);
@@ -206,14 +206,14 @@ ccl_device_forceinline void kernel_branched_path_volume(KernelGlobals *kg,
# endif /* __VOLUME__ */
/* bounce off surface and integrate indirect light */
-ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGlobals *kg,
- ShaderData *sd,
- ShaderData *indirect_sd,
- ShaderData *emission_sd,
- float3 throughput,
- float num_samples_adjust,
- PathState *state,
- PathRadiance *L)
+ccl_device_noinline_cpu void kernel_branched_path_surface_indirect_light(KernelGlobals *kg,
+ ShaderData *sd,
+ ShaderData *indirect_sd,
+ ShaderData *emission_sd,
+ float3 throughput,
+ float num_samples_adjust,
+ PathState *state,
+ PathRadiance *L)
{
float sum_sample_weight = 0.0f;
# ifdef __DENOISING_FEATURES__
@@ -385,7 +385,7 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
/* initialize */
float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
- path_radiance_init(L, kernel_data.film.use_light_pass);
+ path_radiance_init(kg, L);
/* shader data memory used for both volumes and surfaces, saves stack space */
ShaderData sd;
@@ -415,7 +415,7 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
/* Shade background. */
if (!hit) {
- kernel_path_background(kg, &state, &ray, throughput, &sd, L);
+ kernel_path_background(kg, &state, &ray, throughput, &sd, buffer, L);
break;
}
@@ -428,7 +428,7 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
if (!(sd.flag & SD_HAS_ONLY_VOLUME)) {
# endif
- shader_eval_surface(kg, &sd, &state, state.flag);
+ shader_eval_surface(kg, &sd, &state, buffer, state.flag);
shader_merge_closures(&sd);
/* Apply shadow catcher, holdout, emission. */
@@ -440,8 +440,8 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
/* transparency termination */
if (state.flag & PATH_RAY_TRANSPARENT) {
/* path termination. this is a strange place to put the termination, it's
- * mainly due to the mixed in MIS that we use. gives too many unneeded
- * shader evaluations, only need emission if we are going to terminate */
+ * mainly due to the mixed in MIS that we use. gives too many unneeded
+ * shader evaluations, only need emission if we are going to terminate */
float probability = path_state_continuation_probability(kg, &state, throughput);
if (probability == 0.0f) {
@@ -459,7 +459,9 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
kernel_update_light_picking(&sd, &state);
+# ifdef __DENOISING_FEATURES__
kernel_update_denoising_features(kg, &sd, &state, L);
+# endif
# ifdef __AO__
/* ambient occlusion */
@@ -535,6 +537,14 @@ ccl_device void kernel_branched_path_trace(
buffer += index * pass_stride;
+ if (kernel_data.film.pass_adaptive_aux_buffer) {
+ ccl_global float4 *aux = (ccl_global float4 *)(buffer +
+ kernel_data.film.pass_adaptive_aux_buffer);
+ if ((*aux).w > 0.0f) {
+ return;
+ }
+ }
+
/* initialize random numbers and ray */
uint rng_hash;
Ray ray;
diff --git a/intern/cycles/kernel/kernel_path_state.h b/intern/cycles/kernel/kernel_path_state.h
index cdca0b1f9bf..c389c815ae2 100644
--- a/intern/cycles/kernel/kernel_path_state.h
+++ b/intern/cycles/kernel/kernel_path_state.h
@@ -41,9 +41,11 @@ ccl_device_inline void path_state_init(KernelGlobals *kg,
if (kernel_data.film.pass_denoising_data) {
state->flag |= PATH_RAY_STORE_SHADOW_INFO;
state->denoising_feature_weight = 1.0f;
+ state->denoising_feature_throughput = make_float3(1.0f, 1.0f, 1.0f);
}
else {
state->denoising_feature_weight = 0.0f;
+ state->denoising_feature_throughput = make_float3(0.0f, 0.0f, 0.0f);
}
#endif /* __DENOISING_FEATURES__ */
@@ -209,8 +211,8 @@ ccl_device_inline float path_state_continuation_probability(KernelGlobals *kg,
return 0.0f;
}
else if (state->flag & PATH_RAY_TRANSPARENT) {
- /* Do at least one bounce without RR. */
- if (state->transparent_bounce <= 1) {
+ /* Do at least specified number of bounces without RR. */
+ if (state->transparent_bounce <= kernel_data.integrator.transparent_min_bounce) {
return 1.0f;
}
#ifdef __SHADOW_TRICKS__
@@ -221,8 +223,8 @@ ccl_device_inline float path_state_continuation_probability(KernelGlobals *kg,
#endif
}
else {
- /* Do at least one bounce without RR. */
- if (state->bounce <= 1) {
+ /* Do at least specified number of bounces without RR. */
+ if (state->bounce <= kernel_data.integrator.min_bounce) {
return 1.0f;
}
#ifdef __SHADOW_TRICKS__
diff --git a/intern/cycles/kernel/kernel_path_surface.h b/intern/cycles/kernel/kernel_path_surface.h
index 3ad737acd85..ca27b68a627 100644
--- a/intern/cycles/kernel/kernel_path_surface.h
+++ b/intern/cycles/kernel/kernel_path_surface.h
@@ -224,8 +224,9 @@ ccl_device void accum_light_tree_contribution(KernelGlobals *kg,
#if defined(__BRANCHED_PATH__) || defined(__SUBSURFACE__) || defined(__SHADOW_TRICKS__) || \
defined(__BAKING__)
-/* branched path tracing: connect path directly to position on one or more lights and add it to L */
-ccl_device_noinline void kernel_branched_path_surface_connect_light(
+/* branched path tracing: connect path directly to position on one or more lights and add it to L
+ */
+ccl_device_noinline_cpu void kernel_branched_path_surface_connect_light(
KernelGlobals *kg,
ShaderData *sd,
ShaderData *emission_sd,
@@ -438,9 +439,9 @@ ccl_device bool kernel_branched_path_surface_bounce(KernelGlobals *kg,
{
/* sample BSDF */
float bsdf_pdf;
- BsdfEval bsdf_eval;
- float3 bsdf_omega_in;
- differential3 bsdf_domega_in;
+ BsdfEval bsdf_eval ccl_optional_struct_init;
+ float3 bsdf_omega_in ccl_optional_struct_init;
+ differential3 bsdf_domega_in ccl_optional_struct_init;
float bsdf_u, bsdf_v;
path_branched_rng_2D(
kg, state->rng_hash, state, sample, num_samples, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
@@ -570,9 +571,9 @@ ccl_device bool kernel_path_surface_bounce(KernelGlobals *kg,
if (sd->flag & SD_BSDF) {
/* sample BSDF */
float bsdf_pdf;
- BsdfEval bsdf_eval;
- float3 bsdf_omega_in;
- differential3 bsdf_domega_in;
+ BsdfEval bsdf_eval ccl_optional_struct_init;
+ float3 bsdf_omega_in ccl_optional_struct_init;
+ differential3 bsdf_domega_in ccl_optional_struct_init;
float bsdf_u, bsdf_v;
path_state_rng_2D(kg, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
int label;
diff --git a/intern/cycles/kernel/kernel_path_volume.h b/intern/cycles/kernel/kernel_path_volume.h
index 1d7c289d383..f2a36d172a8 100644
--- a/intern/cycles/kernel/kernel_path_volume.h
+++ b/intern/cycles/kernel/kernel_path_volume.h
@@ -26,55 +26,54 @@ ccl_device_inline void kernel_path_volume_connect_light(KernelGlobals *kg,
PathRadiance *L)
{
# ifdef __EMISSION__
- if (!kernel_data.integrator.use_direct_light)
- return;
-
/* sample illumination from lights to find path contribution */
- float light_u, light_v;
- path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v);
-
- Ray light_ray;
- BsdfEval L_light;
- LightSample ls;
- bool is_lamp;
+ Ray light_ray ccl_optional_struct_init;
+ BsdfEval L_light ccl_optional_struct_init;
+ bool is_lamp = false;
+ bool has_emission = false;
+ light_ray.t = 0.0f;
+# ifdef __OBJECT_MOTION__
/* connect to light from given point where shader has been evaluated */
light_ray.time = sd->time;
- if (light_sample(
- kg, light_u, light_v, sd->time, sd->P_pick, sd->N_pick, state->bounce, &ls, true)) {
- float terminate = path_state_rng_light_termination(kg, state);
- if (direct_emission(
- kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
- /* trace shadow ray */
- float3 shadow;
+# endif
- if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) {
- /* accumulate */
- path_radiance_accum_light(L, state, throughput, &L_light, shadow, 1.0f, is_lamp);
- }
+ if (kernel_data.integrator.use_direct_light) {
+ float light_u, light_v;
+ path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v);
+
+ LightSample ls ccl_optional_struct_init;
+ if (light_sample(kg, -1, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) {
+ float terminate = path_state_rng_light_termination(kg, state);
+ has_emission = direct_emission(
+ kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate);
}
}
+
+ /* trace shadow ray */
+ float3 shadow;
+
+ const bool blocked = shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow);
+
+ if (has_emission && !blocked) {
+ /* accumulate */
+ path_radiance_accum_light(kg, L, state, throughput, &L_light, shadow, 1.0f, is_lamp);
+ }
# endif /* __EMISSION__ */
}
-# ifdef __KERNEL_GPU__
-ccl_device_noinline
-# else
-ccl_device
-# endif
- bool
- kernel_path_volume_bounce(KernelGlobals *kg,
- ShaderData *sd,
- ccl_addr_space float3 *throughput,
- ccl_addr_space PathState *state,
- PathRadianceState *L_state,
- ccl_addr_space Ray *ray)
+ccl_device_noinline_cpu bool kernel_path_volume_bounce(KernelGlobals *kg,
+ ShaderData *sd,
+ ccl_addr_space float3 *throughput,
+ ccl_addr_space PathState *state,
+ PathRadianceState *L_state,
+ ccl_addr_space Ray *ray)
{
/* sample phase function */
float phase_pdf;
- BsdfEval phase_eval;
- float3 phase_omega_in;
- differential3 phase_domega_in;
+ BsdfEval phase_eval ccl_optional_struct_init;
+ float3 phase_omega_in ccl_optional_struct_init;
+ differential3 phase_domega_in ccl_optional_struct_init;
float phase_u, phase_v;
path_state_rng_2D(kg, state, PRNG_BSDF_U, &phase_u, &phase_v);
int label;
@@ -128,7 +127,7 @@ ccl_device
return true;
}
-# ifndef __SPLIT_KERNEL__
+# if !defined(__SPLIT_KERNEL__) && (defined(__BRANCHED_PATH__) || defined(__VOLUME_DECOUPLED__))
ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg,
ShaderData *sd,
ShaderData *emission_sd,
@@ -140,94 +139,71 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg,
const VolumeSegment *segment)
{
# ifdef __EMISSION__
- if (!kernel_data.integrator.use_direct_light)
- return;
-
- Ray light_ray;
- BsdfEval L_light;
- bool is_lamp;
+ BsdfEval L_light ccl_optional_struct_init;
- light_ray.time = sd->time;
- if (sample_all_lights && !kernel_data.integrator.use_light_tree) {
- /* lamp sampling */
- for (int i = 0; i < kernel_data.integrator.num_all_lights; i++) {
- if (UNLIKELY(light_select_reached_max_bounces(kg, i, state->bounce)))
- continue;
-
- int num_samples = light_select_num_samples(kg, i);
- float num_samples_inv = 1.0f / num_samples;
- uint lamp_rng_hash = cmj_hash(state->rng_hash, i);
-
- for (int j = 0; j < num_samples; j++) {
- /* sample random position on given light */
- float light_u, light_v;
- path_branched_rng_2D(
- kg, lamp_rng_hash, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
-
- LightSample ls;
- lamp_light_sample(kg, i, light_u, light_v, ray->P, &ls);
-
- float3 tp = throughput;
-
- /* sample position on volume segment */
- float rphase = path_branched_rng_1D(
- kg, state->rng_hash, state, j, num_samples, PRNG_PHASE_CHANNEL);
- float rscatter = path_branched_rng_1D(
- kg, state->rng_hash, state, j, num_samples, PRNG_SCATTER_DISTANCE);
-
- VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
- state,
- ray,
- sd,
- &tp,
- rphase,
- rscatter,
- segment,
- (ls.t != FLT_MAX) ? &ls.P :
- NULL,
- false);
+ int num_lights = 1;
+ if (sample_all_lights) {
+ num_lights = kernel_data.integrator.num_all_lights;
+ if (kernel_data.integrator.pdf_triangles != 0.0f) {
+ num_lights += 1;
+ }
+ }
- /* todo: split up light_sample so we don't have to call it again with new position */
- if (result == VOLUME_PATH_SCATTERED &&
- lamp_light_sample(kg, i, light_u, light_v, sd->P_pick, &ls)) {
-
- float terminate = path_branched_rng_light_termination(
- kg, state->rng_hash, state, j, num_samples);
- if (direct_emission(
- kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
- /* trace shadow ray */
- float3 shadow;
-
- if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) {
- /* accumulate */
- path_radiance_accum_light(
- L, state, tp * num_samples_inv, &L_light, shadow, num_samples_inv, is_lamp);
- }
- }
+ for (int i = 0; i < num_lights; ++i) {
+ /* sample one light at random */
+ int num_samples = 1;
+ int num_all_lights = 1;
+ uint lamp_rng_hash = state->rng_hash;
+ bool double_pdf = false;
+ bool is_mesh_light = false;
+ bool is_lamp = false;
+
+ if (sample_all_lights) {
+ /* lamp sampling */
+ is_lamp = i < kernel_data.integrator.num_all_lights;
+ if (is_lamp) {
+ if (UNLIKELY(light_select_reached_max_bounces(kg, i, state->bounce))) {
+ continue;
}
+ num_samples = light_select_num_samples(kg, i);
+ num_all_lights = kernel_data.integrator.num_all_lights;
+ lamp_rng_hash = cmj_hash(state->rng_hash, i);
+ double_pdf = kernel_data.integrator.pdf_triangles != 0.0f;
+ }
+ /* mesh light sampling */
+ else {
+ num_samples = kernel_data.integrator.mesh_light_samples;
+ double_pdf = kernel_data.integrator.num_all_lights != 0;
+ is_mesh_light = true;
}
}
- /* mesh light sampling */
- if (kernel_data.integrator.pdf_triangles != 0.0f) {
- int num_samples = kernel_data.integrator.mesh_light_samples;
- float num_samples_inv = 1.0f / num_samples;
+ float num_samples_inv = 1.0f / (num_samples * num_all_lights);
+
+ for (int j = 0; j < num_samples; j++) {
+ Ray light_ray ccl_optional_struct_init;
+ light_ray.t = 0.0f; /* reset ray */
+# ifdef __OBJECT_MOTION__
+ light_ray.time = sd->time;
+# endif
+ bool has_emission = false;
- for (int j = 0; j < num_samples; j++) {
- /* sample random position on random triangle */
+ float3 tp = throughput;
+
+ if (kernel_data.integrator.use_direct_light) {
+ /* sample random position on random light/triangle */
float light_u, light_v;
path_branched_rng_2D(
- kg, state->rng_hash, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
+ kg, lamp_rng_hash, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
/* only sample triangle lights */
- if (kernel_data.integrator.num_all_lights)
+ if (is_mesh_light && double_pdf) {
light_u = 0.5f * light_u;
+ }
- LightSample ls;
- light_sample(
- kg, light_u, light_v, sd->time, sd->P_pick, sd->N_pick, state->bounce, &ls, true);
-
- float3 tp = throughput;
+ LightSample ls ccl_optional_struct_init;
+ const int lamp = is_lamp ? i : -1;
+ light_sample(kg, lamp, light_u, light_v, sd->time, ray->P, state->bounce, &ls);
/* sample position on volume segment */
float rphase = path_branched_rng_1D(
@@ -260,63 +236,24 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg,
if (kernel_data.integrator.num_all_lights)
ls.pdf *= 2.0f;
- float terminate = path_branched_rng_light_termination(
- kg, state->rng_hash, state, j, num_samples);
- if (direct_emission(
- kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
- /* trace shadow ray */
- float3 shadow;
-
- if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) {
- /* accumulate */
- path_radiance_accum_light(
- L, state, tp * num_samples_inv, &L_light, shadow, num_samples_inv, is_lamp);
- }
+ /* sample random light */
+ float terminate = path_branched_rng_light_termination(
+ kg, state->rng_hash, state, j, num_samples);
+ has_emission = direct_emission(
+ kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate);
}
}
}
- }
- }
- else {
- /* sample random position on random light */
- float light_u, light_v;
- path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v);
- LightSample ls;
- light_sample(kg, light_u, light_v, sd->time, sd->P_pick, sd->N_pick, state->bounce, &ls, true);
-
- float3 tp = throughput;
-
- /* sample position on volume segment */
- float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
- float rscatter = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
-
- VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
- state,
- ray,
- sd,
- &tp,
- rphase,
- rscatter,
- segment,
- (ls.t != FLT_MAX) ? &ls.P :
- NULL,
- false);
-
- /* todo: split up light_sample so we don't have to call it again with new position */
- if (result == VOLUME_PATH_SCATTERED &&
- light_sample(kg, light_u, light_v, sd->time, sd->P, sd->N, state->bounce, &ls, true)) {
- /* sample random light */
- float terminate = path_state_rng_light_termination(kg, state);
- if (direct_emission(
- kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
- /* trace shadow ray */
- float3 shadow;
-
- if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) {
- /* accumulate */
- path_radiance_accum_light(L, state, tp, &L_light, shadow, 1.0f, is_lamp);
- }
+ /* trace shadow ray */
+ float3 shadow;
+
+ const bool blocked = shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow);
+
+ if (has_emission && !blocked) {
+ /* accumulate */
+ path_radiance_accum_light(
+ kg, L, state, tp * num_samples_inv, &L_light, shadow, num_samples_inv, is_lamp);
}
}
}
diff --git a/intern/cycles/kernel/kernel_queues.h b/intern/cycles/kernel/kernel_queues.h
index 91a39fc1465..451d2a0cedf 100644
--- a/intern/cycles/kernel/kernel_queues.h
+++ b/intern/cycles/kernel/kernel_queues.h
@@ -73,7 +73,7 @@ ccl_device void enqueue_ray_index_local(
int queue_number, /* Queue in which to enqueue ray index. */
char enqueue_flag, /* True for threads whose ray index has to be enqueued. */
int queuesize, /* queue size. */
- ccl_local_param unsigned int *local_queue_atomics, /* To to local queue atomics. */
+ ccl_local_param unsigned int *local_queue_atomics, /* To do local queue atomics. */
ccl_global int *Queue_data, /* Queues. */
ccl_global int *Queue_index) /* To do global queue atomics. */
{
diff --git a/intern/cycles/kernel/kernel_random.h b/intern/cycles/kernel/kernel_random.h
index 6779c1f7160..a9b17854f25 100644
--- a/intern/cycles/kernel/kernel_random.h
+++ b/intern/cycles/kernel/kernel_random.h
@@ -41,10 +41,9 @@ ccl_device uint sobol_dimension(KernelGlobals *kg, int index, int dimension)
{
uint result = 0;
uint i = index + SOBOL_SKIP;
- for (uint j = 0; i; i >>= 1, j++) {
- if (i & 1) {
- result ^= kernel_tex_fetch(__sobol_directions, 32 * dimension + j);
- }
+ for (int j = 0, x; (x = find_first_set(i)); i >>= x) {
+ j += x;
+ result ^= kernel_tex_fetch(__sample_pattern_lut, 32 * dimension + j - 1);
}
return result;
}
@@ -57,7 +56,9 @@ ccl_device_forceinline float path_rng_1D(
#ifdef __DEBUG_CORRELATION__
return (float)drand48();
#endif
-
+ if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_PMJ) {
+ return pmj_sample_1D(kg, sample, rng_hash, dimension);
+ }
#ifdef __CMJ__
# ifdef __SOBOL__
if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ)
@@ -100,7 +101,12 @@ ccl_device_forceinline void path_rng_2D(KernelGlobals *kg,
*fy = (float)drand48();
return;
#endif
-
+ if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_PMJ) {
+ const float2 f = pmj_sample_2D(kg, sample, rng_hash, dimension);
+ *fx = f.x;
+ *fy = f.y;
+ return;
+ }
#ifdef __CMJ__
# ifdef __SOBOL__
if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ)
@@ -130,7 +136,7 @@ ccl_device_inline void path_rng_init(KernelGlobals *kg,
float *fy)
{
/* load state */
- *rng_hash = hash_int_2d(x, y);
+ *rng_hash = hash_uint2(x, y);
*rng_hash ^= kernel_data.integrator.seed;
#ifdef __DEBUG_CORRELATION__
@@ -244,7 +250,7 @@ ccl_device_inline void path_branched_rng_2D(KernelGlobals *kg,
fy);
}
-/* Utitility functions to get light termination value,
+/* Utility functions to get light termination value,
* since it might not be needed in many cases.
*/
ccl_device_inline float path_state_rng_light_termination(KernelGlobals *kg,
@@ -285,4 +291,31 @@ ccl_device float lcg_step_float_addrspace(ccl_addr_space uint *rng)
return (float)*rng * (1.0f / (float)0xFFFFFFFF);
}
+ccl_device_inline bool sample_is_even(int pattern, int sample)
+{
+ if (pattern == SAMPLING_PATTERN_PMJ) {
+ /* See Section 10.2.1, "Progressive Multi-Jittered Sample Sequences", Christensen et al.
+ * We can use this to get divide sample sequence into two classes for easier variance
+ * estimation. */
+#if defined(__GNUC__) && !defined(__KERNEL_GPU__)
+ return __builtin_popcount(sample & 0xaaaaaaaa) & 1;
+#elif defined(__NVCC__)
+ return __popc(sample & 0xaaaaaaaa) & 1;
+#elif defined(__KERNEL_OPENCL__)
+ return popcount(sample & 0xaaaaaaaa) & 1;
+#else
+ /* TODO(Stefan): popcnt intrinsic for Windows with fallback for older CPUs. */
+ int i = sample & 0xaaaaaaaa;
+ i = i - ((i >> 1) & 0x55555555);
+ i = (i & 0x33333333) + ((i >> 2) & 0x33333333);
+ i = (((i + (i >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24;
+ return i & 1;
+#endif
+ }
+ else {
+ /* TODO(Stefan): Are there reliable ways of dividing CMJ and Sobol into two classes? */
+ return sample & 0x1;
+ }
+}
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h
index 351b623addb..e461e1642b6 100644
--- a/intern/cycles/kernel/kernel_shader.h
+++ b/intern/cycles/kernel/kernel_shader.h
@@ -23,10 +23,12 @@
* Release.
*/
+// clang-format off
#include "kernel/closure/alloc.h"
#include "kernel/closure/bsdf_util.h"
#include "kernel/closure/bsdf.h"
#include "kernel/closure/emissive.h"
+// clang-format on
#include "kernel/svm/svm.h"
@@ -48,17 +50,21 @@ ccl_device void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd
}
#endif
-ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg,
- ShaderData *sd,
- const Intersection *isect,
- const Ray *ray)
+#ifdef __KERNEL_OPTIX__
+ccl_device_inline
+#else
+ccl_device_noinline
+#endif
+ void
+ shader_setup_from_ray(KernelGlobals *kg,
+ ShaderData *sd,
+ const Intersection *isect,
+ const Ray *ray)
{
PROFILING_INIT(kg, PROFILING_SHADER_SETUP);
-#ifdef __INSTANCING__
sd->object = (isect->object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, isect->prim) :
isect->object;
-#endif
sd->lamp = LAMP_NONE;
sd->type = isect->type;
@@ -74,18 +80,13 @@ ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg,
sd->prim = kernel_tex_fetch(__prim_index, isect->prim);
sd->ray_length = isect->t;
-#ifdef __UV__
sd->u = isect->u;
sd->v = isect->v;
-#endif
#ifdef __HAIR__
if (sd->type & PRIMITIVE_ALL_CURVE) {
/* curve */
- float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
-
- sd->shader = __float_as_int(curvedata.z);
- sd->P = curve_refine(kg, sd, isect, ray);
+ curve_shader_setup(kg, sd, isect, ray);
}
else
#endif
@@ -117,17 +118,15 @@ ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg,
sd->flag |= kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
-#ifdef __INSTANCING__
if (isect->object != OBJECT_NONE) {
/* instance transform */
object_normal_transform_auto(kg, sd, &sd->N);
object_normal_transform_auto(kg, sd, &sd->Ng);
-# ifdef __DPDU__
+#ifdef __DPDU__
object_dir_transform_auto(kg, sd, &sd->dPdu);
object_dir_transform_auto(kg, sd, &sd->dPdv);
-# endif
- }
#endif
+ }
/* backfacing test */
bool backfacing = (dot(sd->Ng, sd->I) < 0.0f);
@@ -177,10 +176,8 @@ ccl_device_inline
sd->prim = kernel_tex_fetch(__prim_index, isect->prim);
sd->type = isect->type;
-# ifdef __UV__
sd->u = isect->u;
sd->v = isect->v;
-# endif
/* fetch triangle data */
if (sd->type == PRIMITIVE_TRIANGLE) {
@@ -207,17 +204,15 @@ ccl_device_inline
sd->flag |= kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
-# ifdef __INSTANCING__
if (isect->object != OBJECT_NONE) {
/* instance transform */
object_normal_transform_auto(kg, sd, &sd->N);
object_normal_transform_auto(kg, sd, &sd->Ng);
-# ifdef __DPDU__
+# ifdef __DPDU__
object_dir_transform_auto(kg, sd, &sd->dPdu);
object_dir_transform_auto(kg, sd, &sd->dPdv);
-# endif
- }
# endif
+ }
/* backfacing test */
if (backfacing) {
@@ -276,17 +271,13 @@ ccl_device_inline void shader_setup_from_sample(KernelGlobals *kg,
else
sd->type = PRIMITIVE_NONE;
- /* primitive */
-#ifdef __INSTANCING__
+ /* primitive */
sd->object = object;
-#endif
sd->lamp = LAMP_NONE;
/* currently no access to bvh prim index for strand sd->prim*/
sd->prim = prim;
-#ifdef __UV__
sd->u = u;
sd->v = v;
-#endif
sd->time = time;
sd->ray_length = t;
@@ -322,23 +313,19 @@ ccl_device_inline void shader_setup_from_sample(KernelGlobals *kg,
if (sd->shader & SHADER_SMOOTH_NORMAL) {
sd->N = triangle_smooth_normal(kg, Ng, sd->prim, sd->u, sd->v);
-#ifdef __INSTANCING__
if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
object_normal_transform_auto(kg, sd, &sd->N);
}
-#endif
}
/* dPdu/dPdv */
#ifdef __DPDU__
triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv);
-# ifdef __INSTANCING__
if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
object_dir_transform_auto(kg, sd, &sd->dPdu);
object_dir_transform_auto(kg, sd, &sd->dPdv);
}
-# endif
#endif
}
else {
@@ -424,15 +411,11 @@ ccl_device_inline void shader_setup_from_background(KernelGlobals *kg,
sd->time = ray->time;
sd->ray_length = 0.0f;
-#ifdef __INSTANCING__
sd->object = OBJECT_NONE;
-#endif
sd->lamp = LAMP_NONE;
sd->prim = PRIM_NONE;
-#ifdef __UV__
sd->u = 0.0f;
sd->v = 0.0f;
-#endif
#ifdef __DPDU__
/* dPdu/dPdv */
@@ -473,17 +456,13 @@ ccl_device_inline void shader_setup_from_volume(KernelGlobals *kg, ShaderData *s
sd->time = ray->time;
sd->ray_length = 0.0f; /* todo: can we set this to some useful value? */
-# ifdef __INSTANCING__
sd->object = OBJECT_NONE; /* todo: fill this for texture coordinates */
-# endif
sd->lamp = LAMP_NONE;
sd->prim = PRIM_NONE;
sd->type = PRIMITIVE_NONE;
-# ifdef __UV__
sd->u = 0.0f;
sd->v = 0.0f;
-# endif
# ifdef __DPDU__
/* dPdu/dPdv */
@@ -686,8 +665,7 @@ ccl_device_inline const ShaderClosure *shader_bsdf_pick(ShaderData *sd, float *r
if (r < next_sum) {
sampled = i;
- /* Rescale to reuse for direction sample, to better
- * preserve stratifaction. */
+ /* Rescale to reuse for direction sample, to better preserve stratification. */
*randu = (r - partial_sum) / sc->sample_weight;
break;
}
@@ -743,8 +721,7 @@ ccl_device_inline const ShaderClosure *shader_bssrdf_pick(ShaderData *sd,
*throughput *= (sum_bsdf + sum_bssrdf) / sum_bssrdf;
sampled = i;
- /* Rescale to reuse for direction sample, to better
- * preserve stratifaction. */
+ /* Rescale to reuse for direction sample, to better preserve stratification. */
*randu = (r - partial_sum) / sc->sample_weight;
break;
}
@@ -780,7 +757,7 @@ ccl_device_inline int shader_bsdf_sample(KernelGlobals *kg,
kernel_assert(CLOSURE_IS_BSDF(sc->type));
int label;
- float3 eval;
+ float3 eval = make_float3(0.0f, 0.0f, 0.0f);
*pdf = 0.0f;
label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
@@ -810,7 +787,7 @@ ccl_device int shader_bsdf_sample_closure(KernelGlobals *kg,
PROFILING_INIT(kg, PROFILING_CLOSURE_SAMPLE);
int label;
- float3 eval;
+ float3 eval = make_float3(0.0f, 0.0f, 0.0f);
*pdf = 0.0f;
label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
@@ -897,7 +874,8 @@ ccl_device float3 shader_bsdf_diffuse(KernelGlobals *kg, ShaderData *sd)
for (int i = 0; i < sd->num_closure; i++) {
ShaderClosure *sc = &sd->closure[i];
- if (CLOSURE_IS_BSDF_DIFFUSE(sc->type))
+ if (CLOSURE_IS_BSDF_DIFFUSE(sc->type) || CLOSURE_IS_BSSRDF(sc->type) ||
+ CLOSURE_IS_BSDF_BSSRDF(sc->type))
eval += sc->weight;
}
@@ -932,20 +910,6 @@ ccl_device float3 shader_bsdf_transmission(KernelGlobals *kg, ShaderData *sd)
return eval;
}
-ccl_device float3 shader_bsdf_subsurface(KernelGlobals *kg, ShaderData *sd)
-{
- float3 eval = make_float3(0.0f, 0.0f, 0.0f);
-
- for (int i = 0; i < sd->num_closure; i++) {
- ShaderClosure *sc = &sd->closure[i];
-
- if (CLOSURE_IS_BSSRDF(sc->type) || CLOSURE_IS_BSDF_BSSRDF(sc->type))
- eval += sc->weight;
- }
-
- return eval;
-}
-
ccl_device float3 shader_bsdf_average_normal(KernelGlobals *kg, ShaderData *sd)
{
float3 N = make_float3(0.0f, 0.0f, 0.0f);
@@ -1053,15 +1017,36 @@ ccl_device float3 shader_emissive_eval(ShaderData *sd)
/* Holdout */
-ccl_device float3 shader_holdout_eval(KernelGlobals *kg, ShaderData *sd)
+ccl_device float3 shader_holdout_apply(KernelGlobals *kg, ShaderData *sd)
{
float3 weight = make_float3(0.0f, 0.0f, 0.0f);
- for (int i = 0; i < sd->num_closure; i++) {
- ShaderClosure *sc = &sd->closure[i];
+ /* For objects marked as holdout, preserve transparency and remove all other
+ * closures, replacing them with a holdout weight. */
+ if (sd->object_flag & SD_OBJECT_HOLDOUT_MASK) {
+ if ((sd->flag & SD_TRANSPARENT) && !(sd->flag & SD_HAS_ONLY_VOLUME)) {
+ weight = make_float3(1.0f, 1.0f, 1.0f) - sd->closure_transparent_extinction;
+
+ for (int i = 0; i < sd->num_closure; i++) {
+ ShaderClosure *sc = &sd->closure[i];
+ if (!CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) {
+ sc->type = NBUILTIN_CLOSURES;
+ }
+ }
- if (CLOSURE_IS_HOLDOUT(sc->type))
- weight += sc->weight;
+ sd->flag &= ~(SD_CLOSURE_FLAGS - (SD_TRANSPARENT | SD_BSDF));
+ }
+ else {
+ weight = make_float3(1.0f, 1.0f, 1.0f);
+ }
+ }
+ else {
+ for (int i = 0; i < sd->num_closure; i++) {
+ ShaderClosure *sc = &sd->closure[i];
+ if (CLOSURE_IS_HOLDOUT(sc->type)) {
+ weight += sc->weight;
+ }
+ }
}
return weight;
@@ -1072,6 +1057,7 @@ ccl_device float3 shader_holdout_eval(KernelGlobals *kg, ShaderData *sd)
ccl_device void shader_eval_surface(KernelGlobals *kg,
ShaderData *sd,
ccl_addr_space PathState *state,
+ ccl_global float *buffer,
int path_flag)
{
PROFILING_INIT(kg, PROFILING_SHADER_EVAL);
@@ -1092,7 +1078,7 @@ ccl_device void shader_eval_surface(KernelGlobals *kg,
#ifdef __OSL__
if (kg->osl) {
- if (sd->object == OBJECT_NONE) {
+ if (sd->object == OBJECT_NONE && sd->lamp == LAMP_NONE) {
OSLShader::eval_background(kg, sd, state, path_flag);
}
else {
@@ -1103,7 +1089,7 @@ ccl_device void shader_eval_surface(KernelGlobals *kg,
#endif
{
#ifdef __SVM__
- svm_eval_nodes(kg, sd, state, SHADER_TYPE_SURFACE, path_flag);
+ svm_eval_nodes(kg, sd, state, buffer, SHADER_TYPE_SURFACE, path_flag);
#else
if (sd->object == OBJECT_NONE) {
sd->closure_emission_background = make_float3(0.8f, 0.8f, 0.8f);
@@ -1223,7 +1209,7 @@ ccl_device int shader_volume_phase_sample(KernelGlobals *kg,
* depending on color channels, even if this is perhaps not a common case */
const ShaderClosure *sc = &sd->closure[sampled];
int label;
- float3 eval;
+ float3 eval = make_float3(0.0f, 0.0f, 0.0f);
*pdf = 0.0f;
label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
@@ -1248,7 +1234,7 @@ ccl_device int shader_phase_sample_closure(KernelGlobals *kg,
PROFILING_INIT(kg, PROFILING_CLOSURE_VOLUME_SAMPLE);
int label;
- float3 eval;
+ float3 eval = make_float3(0.0f, 0.0f, 0.0f);
*pdf = 0.0f;
label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
@@ -1315,7 +1301,7 @@ ccl_device_inline void shader_eval_volume(KernelGlobals *kg,
else
# endif
{
- svm_eval_nodes(kg, sd, state, SHADER_TYPE_VOLUME, path_flag);
+ svm_eval_nodes(kg, sd, state, NULL, SHADER_TYPE_VOLUME, path_flag);
}
# endif
@@ -1344,7 +1330,7 @@ ccl_device void shader_eval_displacement(KernelGlobals *kg,
else
# endif
{
- svm_eval_nodes(kg, sd, state, SHADER_TYPE_DISPLACEMENT, 0);
+ svm_eval_nodes(kg, sd, state, NULL, SHADER_TYPE_DISPLACEMENT, 0);
}
#endif
}
@@ -1358,7 +1344,7 @@ ccl_device bool shader_transparent_shadow(KernelGlobals *kg, Intersection *isect
int shader = 0;
# ifdef __HAIR__
- if (kernel_tex_fetch(__prim_type, isect->prim) & PRIMITIVE_ALL_TRIANGLE) {
+ if (isect->type & PRIMITIVE_ALL_TRIANGLE) {
# endif
shader = kernel_tex_fetch(__tri_shader, prim);
# ifdef __HAIR__
diff --git a/intern/cycles/kernel/kernel_shadow.h b/intern/cycles/kernel/kernel_shadow.h
index 6af1369feab..07043e6a769 100644
--- a/intern/cycles/kernel/kernel_shadow.h
+++ b/intern/cycles/kernel/kernel_shadow.h
@@ -17,13 +17,6 @@
CCL_NAMESPACE_BEGIN
#ifdef __VOLUME__
-typedef struct VolumeState {
-# ifdef __SPLIT_KERNEL__
-# else
- PathState ps;
-# endif
-} VolumeState;
-
/* Get PathState ready for use for volume stack evaluation. */
# ifdef __SPLIT_KERNEL__
ccl_addr_space
@@ -55,16 +48,15 @@ ccl_addr_space
/* Attenuate throughput accordingly to the given intersection event.
* Returns true if the throughput is zero and traversal can be aborted.
*/
-ccl_device_forceinline bool shadow_handle_transparent_isect(
- KernelGlobals *kg,
- ShaderData *shadow_sd,
- ccl_addr_space PathState *state,
+ccl_device_forceinline bool shadow_handle_transparent_isect(KernelGlobals *kg,
+ ShaderData *shadow_sd,
+ ccl_addr_space PathState *state,
#ifdef __VOLUME__
- ccl_addr_space struct PathState *volume_state,
+ ccl_addr_space PathState *volume_state,
#endif
- Intersection *isect,
- Ray *ray,
- float3 *throughput)
+ Intersection *isect,
+ Ray *ray,
+ float3 *throughput)
{
#ifdef __VOLUME__
/* Attenuation between last surface and next surface. */
@@ -79,7 +71,7 @@ ccl_device_forceinline bool shadow_handle_transparent_isect(
/* Attenuation from transparent surface. */
if (!(shadow_sd->flag & SD_HAS_ONLY_VOLUME)) {
path_state_modify_bounce(state, true);
- shader_eval_surface(kg, shadow_sd, state, PATH_RAY_SHADOW);
+ shader_eval_surface(kg, shadow_sd, state, NULL, PATH_RAY_SHADOW);
path_state_modify_bounce(state, false);
*throughput *= shader_bsdf_transparency(kg, shadow_sd);
}
@@ -103,8 +95,7 @@ ccl_device bool shadow_blocked_opaque(KernelGlobals *kg,
Intersection *isect,
float3 *shadow)
{
- const bool blocked = scene_intersect(
- kg, *ray, visibility & PATH_RAY_SHADOW_OPAQUE, isect, NULL, 0.0f, 0.0f);
+ const bool blocked = scene_intersect(kg, ray, visibility & PATH_RAY_SHADOW_OPAQUE, isect);
#ifdef __VOLUME__
if (!blocked && state->volume_stack[0].shader != SHADER_NONE) {
/* Apply attenuation from current volume shader. */
@@ -164,7 +155,11 @@ ccl_device bool shadow_blocked_transparent_all_loop(KernelGlobals *kg,
uint num_hits;
const bool blocked = scene_intersect_shadow_all(kg, ray, hits, visibility, max_hits, &num_hits);
# ifdef __VOLUME__
+# ifdef __KERNEL_OPTIX__
+ VolumeState &volume_state = kg->volume_state;
+# else
VolumeState volume_state;
+# endif
# endif
/* If no opaque surface found but we did find transparent hits,
* shade them.
@@ -303,7 +298,11 @@ ccl_device bool shadow_blocked_transparent_stepped_loop(KernelGlobals *kg,
float3 *shadow)
{
# ifdef __VOLUME__
+# ifdef __KERNEL_OPTIX__
+ VolumeState &volume_state = kg->volume_state;
+# else
VolumeState volume_state;
+# endif
# endif
if (blocked && is_transparent_isect) {
float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
@@ -319,8 +318,7 @@ ccl_device bool shadow_blocked_transparent_stepped_loop(KernelGlobals *kg,
if (bounce >= kernel_data.integrator.transparent_max_bounce) {
return true;
}
- if (!scene_intersect(
- kg, *ray, visibility & PATH_RAY_SHADOW_TRANSPARENT, isect, NULL, 0.0f, 0.0f)) {
+ if (!scene_intersect(kg, ray, visibility & PATH_RAY_SHADOW_TRANSPARENT, isect)) {
break;
}
if (!shader_transparent_shadow(kg, isect)) {
@@ -376,8 +374,7 @@ ccl_device bool shadow_blocked_transparent_stepped(KernelGlobals *kg,
Intersection *isect,
float3 *shadow)
{
- bool blocked = scene_intersect(
- kg, *ray, visibility & PATH_RAY_SHADOW_OPAQUE, isect, NULL, 0.0f, 0.0f);
+ bool blocked = scene_intersect(kg, ray, visibility & PATH_RAY_SHADOW_OPAQUE, isect);
bool is_transparent_isect = blocked ? shader_transparent_shadow(kg, isect) : false;
return shadow_blocked_transparent_stepped_loop(
kg, sd, shadow_sd, state, visibility, ray, isect, blocked, is_transparent_isect, shadow);
@@ -390,32 +387,38 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg,
ShaderData *sd,
ShaderData *shadow_sd,
ccl_addr_space PathState *state,
- Ray *ray_input,
+ Ray *ray,
float3 *shadow)
{
- Ray *ray = ray_input;
- Intersection isect;
- /* Some common early checks. */
*shadow = make_float3(1.0f, 1.0f, 1.0f);
+#if !defined(__KERNEL_OPTIX__)
+ /* Some common early checks.
+ * Avoid conditional trace call in OptiX though, since those hurt performance there.
+ */
if (ray->t == 0.0f) {
return false;
}
+#endif
#ifdef __SHADOW_TRICKS__
const uint visibility = (state->flag & PATH_RAY_SHADOW_CATCHER) ? PATH_RAY_SHADOW_NON_CATCHER :
PATH_RAY_SHADOW;
#else
const uint visibility = PATH_RAY_SHADOW;
#endif
- /* Do actual shadow shading. */
- /* First of all, we check if integrator requires transparent shadows.
+ /* Do actual shadow shading.
+ * First of all, we check if integrator requires transparent shadows.
* if not, we use simplest and fastest ever way to calculate occlusion.
+ * Do not do this in OptiX to avoid the additional trace call.
*/
-#ifdef __TRANSPARENT_SHADOWS__
+#if !defined(__KERNEL_OPTIX__) || !defined(__TRANSPARENT_SHADOWS__)
+ Intersection isect;
+# ifdef __TRANSPARENT_SHADOWS__
if (!kernel_data.integrator.transparent_shadows)
-#endif
+# endif
{
return shadow_blocked_opaque(kg, shadow_sd, state, visibility, ray, &isect, shadow);
}
+#endif
#ifdef __TRANSPARENT_SHADOWS__
# ifdef __SHADOW_RECORD_ALL__
/* For the transparent shadows we try to use record-all logic on the
@@ -428,16 +431,20 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg,
if (state->transparent_bounce >= transparent_max_bounce) {
return true;
}
- const uint max_hits = transparent_max_bounce - state->transparent_bounce - 1;
-# ifdef __KERNEL_GPU__
- /* On GPU we do trickey with tracing opaque ray first, this avoids speed
+ uint max_hits = transparent_max_bounce - state->transparent_bounce - 1;
+# if defined(__KERNEL_OPTIX__)
+ /* Always use record-all behavior in OptiX, but ensure there are no out of bounds
+ * accesses to the hit stack.
+ */
+ max_hits = min(max_hits, SHADOW_STACK_MAX_HITS - 1);
+# elif defined(__KERNEL_GPU__)
+ /* On GPU we do tricky with tracing opaque ray first, this avoids speed
* regressions in some files.
*
* TODO(sergey): Check why using record-all behavior causes slowdown in such
* cases. Could that be caused by a higher spill pressure?
*/
- const bool blocked = scene_intersect(
- kg, *ray, visibility & PATH_RAY_SHADOW_OPAQUE, &isect, NULL, 0.0f, 0.0f);
+ const bool blocked = scene_intersect(kg, ray, visibility & PATH_RAY_SHADOW_OPAQUE, &isect);
const bool is_transparent_isect = blocked ? shader_transparent_shadow(kg, &isect) : false;
if (!blocked || !is_transparent_isect || max_hits + 1 >= SHADOW_STACK_MAX_HITS) {
return shadow_blocked_transparent_stepped_loop(
diff --git a/intern/cycles/kernel/kernel_subsurface.h b/intern/cycles/kernel/kernel_subsurface.h
index 7510e50a962..ed8572467ea 100644
--- a/intern/cycles/kernel/kernel_subsurface.h
+++ b/intern/cycles/kernel/kernel_subsurface.h
@@ -138,7 +138,7 @@ ccl_device void subsurface_color_bump_blur(
if (bump || texture_blur > 0.0f) {
/* average color and normal at incoming point */
- shader_eval_surface(kg, sd, state, state->flag);
+ shader_eval_surface(kg, sd, state, NULL, state->flag);
float3 in_color = shader_bssrdf_sum(sd, (bump) ? N : NULL, NULL);
/* we simply divide out the average color and multiply with the average
@@ -222,7 +222,7 @@ ccl_device_inline int subsurface_scatter_disk(KernelGlobals *kg,
/* intersect with the same object. if multiple intersections are found it
* will use at most BSSRDF_MAX_HITS hits, a random subset of all hits */
- scene_intersect_local(kg, *ray, ss_isect, sd->object, lcg_state, BSSRDF_MAX_HITS);
+ scene_intersect_local(kg, ray, ss_isect, sd->object, lcg_state, BSSRDF_MAX_HITS);
int num_eval_hits = min(ss_isect->num_hits, BSSRDF_MAX_HITS);
for (int hit = 0; hit < num_eval_hits; hit++) {
@@ -353,13 +353,19 @@ ccl_device void subsurface_random_walk_coefficients(const ShaderClosure *sc,
*weight = safe_divide_color(bssrdf->weight, A);
}
-ccl_device_noinline bool subsurface_random_walk(KernelGlobals *kg,
- LocalIntersection *ss_isect,
- ShaderData *sd,
- ccl_addr_space PathState *state,
- const ShaderClosure *sc,
- const float bssrdf_u,
- const float bssrdf_v)
+#ifdef __KERNEL_OPTIX__
+ccl_device_inline /* inline trace calls */
+#else
+ccl_device_noinline
+#endif
+ bool
+ subsurface_random_walk(KernelGlobals *kg,
+ LocalIntersection *ss_isect,
+ ShaderData *sd,
+ ccl_addr_space PathState *state,
+ const ShaderClosure *sc,
+ const float bssrdf_u,
+ const float bssrdf_v)
{
/* Sample diffuse surface scatter into the object. */
float3 D;
@@ -418,16 +424,21 @@ ccl_device_noinline bool subsurface_random_walk(KernelGlobals *kg,
float t = -logf(1.0f - rdist) / sample_sigma_t;
ray->t = t;
- scene_intersect_local(kg, *ray, ss_isect, sd->object, NULL, 1);
+ scene_intersect_local(kg, ray, ss_isect, sd->object, NULL, 1);
hit = (ss_isect->num_hits > 0);
if (hit) {
+#ifdef __KERNEL_OPTIX__
+ /* t is always in world space with OptiX. */
+ t = ss_isect->hits[0].t;
+#else
/* Compute world space distance to surface hit. */
float3 D = ray->D;
object_inverse_dir_transform(kg, sd, &D);
D = normalize(D) * ss_isect->hits[0].t;
object_dir_transform(kg, sd, &D);
t = len(D);
+#endif
}
/* Advance to new scatter location. */
diff --git a/intern/cycles/kernel/kernel_textures.h b/intern/cycles/kernel/kernel_textures.h
index b6c1701211d..293335e0e08 100644
--- a/intern/cycles/kernel/kernel_textures.h
+++ b/intern/cycles/kernel/kernel_textures.h
@@ -35,6 +35,7 @@ KERNEL_TEX(KernelObject, __objects)
KERNEL_TEX(Transform, __object_motion_pass)
KERNEL_TEX(DecomposedTransform, __object_motion)
KERNEL_TEX(uint, __object_flag)
+KERNEL_TEX(float, __object_volume_step)
/* cameras */
KERNEL_TEX(DecomposedTransform, __camera_motion)
@@ -85,7 +86,7 @@ KERNEL_TEX(KernelShader, __shaders)
KERNEL_TEX(float, __lookup_table)
/* sobol */
-KERNEL_TEX(uint, __sobol_directions)
+KERNEL_TEX(uint, __sample_pattern_lut)
/* image textures */
KERNEL_TEX(TextureInfo, __texture_info)
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index 390f239f68f..c6a7524c643 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -84,9 +84,7 @@ CCL_NAMESPACE_BEGIN
/* Kernel features */
#define __SOBOL__
-#define __INSTANCING__
#define __DPDU__
-#define __UV__
#define __BACKGROUND__
#define __CAUSTICS_TRICKS__
#define __VISIBILITY_FLAG__
@@ -106,8 +104,6 @@ CCL_NAMESPACE_BEGIN
#ifndef __KERNEL_AO_PREVIEW__
# define __SVM__
# define __EMISSION__
-# define __TEXTURES__
-# define __EXTRA_NODES__
# define __HOLDOUT__
# define __MULTI_CLOSURE__
# define __TRANSPARENT_SHADOWS__
@@ -115,7 +111,6 @@ CCL_NAMESPACE_BEGIN
# define __LAMP_MIS__
# define __CAMERA_MOTION__
# define __OBJECT_MOTION__
-# define __HAIR__
# define __BAKING__
# define __PRINCIPLED__
# define __SUBSURFACE__
@@ -128,9 +123,6 @@ CCL_NAMESPACE_BEGIN
/* Device specific features */
#ifdef __KERNEL_CPU__
-# ifdef __KERNEL_SSE2__
-# define __QBVH__
-# endif
# ifdef WITH_OSL
# define __OSL__
# endif
@@ -144,6 +136,13 @@ CCL_NAMESPACE_BEGIN
# endif
#endif /* __KERNEL_CUDA__ */
+#ifdef __KERNEL_OPTIX__
+# undef __BAKING__
+# undef __BRANCHED_PATH__
+/* TODO(pmours): Cannot use optixTrace in non-inlined functions */
+# undef __SHADER_RAYTRACE__
+#endif /* __KERNEL_OPTIX__ */
+
#ifdef __KERNEL_OPENCL__
#endif /* __KERNEL_OPENCL__ */
@@ -214,8 +213,9 @@ typedef enum ShaderEvalType {
SHADER_EVAL_DIFFUSE_COLOR,
SHADER_EVAL_GLOSSY_COLOR,
SHADER_EVAL_TRANSMISSION_COLOR,
- SHADER_EVAL_SUBSURFACE_COLOR,
SHADER_EVAL_EMISSION,
+ SHADER_EVAL_AOV_COLOR,
+ SHADER_EVAL_AOV_VALUE,
/* light passes */
SHADER_EVAL_AO,
@@ -224,7 +224,6 @@ typedef enum ShaderEvalType {
SHADER_EVAL_DIFFUSE,
SHADER_EVAL_GLOSSY,
SHADER_EVAL_TRANSMISSION,
- SHADER_EVAL_SUBSURFACE,
/* extra */
SHADER_EVAL_ENVIRONMENT,
@@ -261,6 +260,7 @@ enum PathTraceDimension {
enum SamplingPattern {
SAMPLING_PATTERN_SOBOL = 0,
SAMPLING_PATTERN_CMJ = 1,
+ SAMPLING_PATTERN_PMJ = 2,
SAMPLING_NUM_PATTERNS,
};
@@ -268,6 +268,7 @@ enum SamplingPattern {
/* these flags values correspond to raytypes in osl.cpp, so keep them in sync! */
enum PathRayFlag {
+ /* Ray visibility. */
PATH_RAY_CAMERA = (1 << 0),
PATH_RAY_REFLECT = (1 << 1),
PATH_RAY_TRANSMIT = (1 << 2),
@@ -276,6 +277,7 @@ enum PathRayFlag {
PATH_RAY_SINGULAR = (1 << 5),
PATH_RAY_TRANSPARENT = (1 << 6),
+ /* Shadow ray visibility. */
PATH_RAY_SHADOW_OPAQUE_NON_CATCHER = (1 << 7),
PATH_RAY_SHADOW_OPAQUE_CATCHER = (1 << 8),
PATH_RAY_SHADOW_OPAQUE = (PATH_RAY_SHADOW_OPAQUE_NON_CATCHER | PATH_RAY_SHADOW_OPAQUE_CATCHER),
@@ -287,8 +289,11 @@ enum PathRayFlag {
PATH_RAY_SHADOW_TRANSPARENT_NON_CATCHER),
PATH_RAY_SHADOW = (PATH_RAY_SHADOW_OPAQUE | PATH_RAY_SHADOW_TRANSPARENT),
- PATH_RAY_CURVE = (1 << 11), /* visibility flag to define curve segments */
- PATH_RAY_VOLUME_SCATTER = (1 << 12), /* volume scattering */
+ /* Unused, free to reuse. */
+ PATH_RAY_UNUSED = (1 << 11),
+
+ /* Ray visibility for volume scattering. */
+ PATH_RAY_VOLUME_SCATTER = (1 << 12),
/* Special flag to tag unaligned BVH nodes. */
PATH_RAY_NODE_UNALIGNED = (1 << 13),
@@ -314,7 +319,7 @@ enum PathRayFlag {
/* Ray is to be terminated, but continue with transparent bounces and
* emission as long as we encounter them. This is required to make the
* MIS between direct and indirect light rays match, as shadow rays go
- * through transparent surfaces to reach emisison too. */
+ * through transparent surfaces to reach emission too. */
PATH_RAY_TERMINATE_AFTER_TRANSPARENT = (1 << 21),
/* Ray is to be terminated. */
PATH_RAY_TERMINATE = (PATH_RAY_TERMINATE_IMMEDIATE | PATH_RAY_TERMINATE_AFTER_TRANSPARENT),
@@ -365,6 +370,10 @@ typedef enum PassType {
#endif
PASS_RENDER_TIME,
PASS_CRYPTOMATTE,
+ PASS_AOV_COLOR,
+ PASS_AOV_VALUE,
+ PASS_ADAPTIVE_AUX_BUFFER,
+ PASS_SAMPLE_COUNT,
PASS_CATEGORY_MAIN_END = 31,
PASS_MIST = 32,
@@ -382,13 +391,14 @@ typedef enum PassType {
PASS_TRANSMISSION_DIRECT,
PASS_TRANSMISSION_INDIRECT,
PASS_TRANSMISSION_COLOR,
- PASS_SUBSURFACE_DIRECT,
- PASS_SUBSURFACE_INDIRECT,
- PASS_SUBSURFACE_COLOR,
- PASS_VOLUME_DIRECT,
+ PASS_VOLUME_DIRECT = 50,
PASS_VOLUME_INDIRECT,
/* No Scatter color since it's tricky to define what it would even mean. */
PASS_CATEGORY_LIGHT_END = 63,
+
+ PASS_BAKE_PRIMITIVE,
+ PASS_BAKE_DIFFERENTIAL,
+ PASS_CATEGORY_BAKE_END = 95
} PassType;
#define PASS_ANY (~0)
@@ -435,23 +445,20 @@ typedef enum eBakePassFilter {
BAKE_FILTER_DIFFUSE = (1 << 3),
BAKE_FILTER_GLOSSY = (1 << 4),
BAKE_FILTER_TRANSMISSION = (1 << 5),
- BAKE_FILTER_SUBSURFACE = (1 << 6),
- BAKE_FILTER_EMISSION = (1 << 7),
- BAKE_FILTER_AO = (1 << 8),
+ BAKE_FILTER_EMISSION = (1 << 6),
+ BAKE_FILTER_AO = (1 << 7),
} eBakePassFilter;
typedef enum BakePassFilterCombos {
BAKE_FILTER_COMBINED = (BAKE_FILTER_DIRECT | BAKE_FILTER_INDIRECT | BAKE_FILTER_DIFFUSE |
- BAKE_FILTER_GLOSSY | BAKE_FILTER_TRANSMISSION | BAKE_FILTER_SUBSURFACE |
- BAKE_FILTER_EMISSION | BAKE_FILTER_AO),
+ BAKE_FILTER_GLOSSY | BAKE_FILTER_TRANSMISSION | BAKE_FILTER_EMISSION |
+ BAKE_FILTER_AO),
BAKE_FILTER_DIFFUSE_DIRECT = (BAKE_FILTER_DIRECT | BAKE_FILTER_DIFFUSE),
BAKE_FILTER_GLOSSY_DIRECT = (BAKE_FILTER_DIRECT | BAKE_FILTER_GLOSSY),
BAKE_FILTER_TRANSMISSION_DIRECT = (BAKE_FILTER_DIRECT | BAKE_FILTER_TRANSMISSION),
- BAKE_FILTER_SUBSURFACE_DIRECT = (BAKE_FILTER_DIRECT | BAKE_FILTER_SUBSURFACE),
BAKE_FILTER_DIFFUSE_INDIRECT = (BAKE_FILTER_INDIRECT | BAKE_FILTER_DIFFUSE),
BAKE_FILTER_GLOSSY_INDIRECT = (BAKE_FILTER_INDIRECT | BAKE_FILTER_GLOSSY),
BAKE_FILTER_TRANSMISSION_INDIRECT = (BAKE_FILTER_INDIRECT | BAKE_FILTER_TRANSMISSION),
- BAKE_FILTER_SUBSURFACE_INDIRECT = (BAKE_FILTER_INDIRECT | BAKE_FILTER_SUBSURFACE),
} BakePassFilterCombos;
typedef enum DenoiseFlag {
@@ -461,9 +468,7 @@ typedef enum DenoiseFlag {
DENOISING_CLEAN_GLOSSY_IND = (1 << 3),
DENOISING_CLEAN_TRANSMISSION_DIR = (1 << 4),
DENOISING_CLEAN_TRANSMISSION_IND = (1 << 5),
- DENOISING_CLEAN_SUBSURFACE_DIR = (1 << 6),
- DENOISING_CLEAN_SUBSURFACE_IND = (1 << 7),
- DENOISING_CLEAN_ALL_PASSES = (1 << 8) - 1,
+ DENOISING_CLEAN_ALL_PASSES = (1 << 6) - 1,
} DenoiseFlag;
#ifdef __KERNEL_DEBUG__
@@ -483,8 +488,7 @@ typedef ccl_addr_space struct PathRadianceState {
float3 diffuse;
float3 glossy;
float3 transmission;
- float3 subsurface;
- float3 scatter;
+ float3 volume;
float3 direct;
#endif
@@ -507,19 +511,16 @@ typedef ccl_addr_space struct PathRadiance {
float3 color_diffuse;
float3 color_glossy;
float3 color_transmission;
- float3 color_subsurface;
float3 direct_diffuse;
float3 direct_glossy;
float3 direct_transmission;
- float3 direct_subsurface;
- float3 direct_scatter;
+ float3 direct_volume;
float3 indirect_diffuse;
float3 indirect_glossy;
float3 indirect_transmission;
- float3 indirect_subsurface;
- float3 indirect_scatter;
+ float3 indirect_volume;
float4 shadow;
float mist;
@@ -573,8 +574,7 @@ typedef struct BsdfEval {
float3 glossy;
float3 transmission;
float3 transparent;
- float3 subsurface;
- float3 scatter;
+ float3 volume;
#endif
#ifdef __SHADOW_TRICKS__
float3 sum_no_mis;
@@ -660,9 +660,8 @@ typedef struct Ray {
* is fixed.
*/
#ifndef __KERNEL_OPENCL_AMD__
- float3 P; /* origin */
- float3 D; /* direction */
-
+ float3 P; /* origin */
+ float3 D; /* direction */
float t; /* length of the ray */
float time; /* time (for motion blur) */
#else
@@ -702,32 +701,42 @@ typedef enum PrimitiveType {
PRIMITIVE_NONE = 0,
PRIMITIVE_TRIANGLE = (1 << 0),
PRIMITIVE_MOTION_TRIANGLE = (1 << 1),
- PRIMITIVE_CURVE = (1 << 2),
- PRIMITIVE_MOTION_CURVE = (1 << 3),
+ PRIMITIVE_CURVE_THICK = (1 << 2),
+ PRIMITIVE_MOTION_CURVE_THICK = (1 << 3),
+ PRIMITIVE_CURVE_RIBBON = (1 << 4),
+ PRIMITIVE_MOTION_CURVE_RIBBON = (1 << 5),
/* Lamp primitive is not included below on purpose,
* since it is no real traceable primitive.
*/
- PRIMITIVE_LAMP = (1 << 4),
+ PRIMITIVE_LAMP = (1 << 6),
PRIMITIVE_ALL_TRIANGLE = (PRIMITIVE_TRIANGLE | PRIMITIVE_MOTION_TRIANGLE),
- PRIMITIVE_ALL_CURVE = (PRIMITIVE_CURVE | PRIMITIVE_MOTION_CURVE),
- PRIMITIVE_ALL_MOTION = (PRIMITIVE_MOTION_TRIANGLE | PRIMITIVE_MOTION_CURVE),
+ PRIMITIVE_ALL_CURVE = (PRIMITIVE_CURVE_THICK | PRIMITIVE_MOTION_CURVE_THICK |
+ PRIMITIVE_CURVE_RIBBON | PRIMITIVE_MOTION_CURVE_RIBBON),
+ PRIMITIVE_ALL_MOTION = (PRIMITIVE_MOTION_TRIANGLE | PRIMITIVE_MOTION_CURVE_THICK |
+ PRIMITIVE_MOTION_CURVE_RIBBON),
PRIMITIVE_ALL = (PRIMITIVE_ALL_TRIANGLE | PRIMITIVE_ALL_CURVE),
/* Total number of different traceable primitives.
* NOTE: This is an actual value, not a bitflag.
*/
- PRIMITIVE_NUM_TOTAL = 4,
+ PRIMITIVE_NUM_TOTAL = 6,
} PrimitiveType;
#define PRIMITIVE_PACK_SEGMENT(type, segment) ((segment << PRIMITIVE_NUM_TOTAL) | (type))
#define PRIMITIVE_UNPACK_SEGMENT(type) (type >> PRIMITIVE_NUM_TOTAL)
+typedef enum CurveShapeType {
+ CURVE_RIBBON = 0,
+ CURVE_THICK = 1,
+
+ CURVE_NUM_SHAPE_TYPES,
+} CurveShapeType;
+
/* Attributes */
typedef enum AttributePrimitive {
- ATTR_PRIM_TRIANGLE = 0,
- ATTR_PRIM_CURVE,
+ ATTR_PRIM_GEOMETRY = 0,
ATTR_PRIM_SUBD,
ATTR_PRIM_TYPES
@@ -755,6 +764,7 @@ typedef enum AttributeStandard {
ATTR_STD_UV,
ATTR_STD_UV_TANGENT,
ATTR_STD_UV_TANGENT_SIGN,
+ ATTR_STD_VERTEX_COLOR,
ATTR_STD_GENERATED,
ATTR_STD_GENERATED_TRANSFORM,
ATTR_STD_POSITION_UNDEFORMED,
@@ -773,6 +783,7 @@ typedef enum AttributeStandard {
ATTR_STD_VOLUME_TEMPERATURE,
ATTR_STD_VOLUME_VELOCITY,
ATTR_STD_POINTINESS,
+ ATTR_STD_RANDOM_PER_ISLAND,
ATTR_STD_NUM,
ATTR_STD_NOT_FOUND = ~0
@@ -814,8 +825,9 @@ typedef struct AttributeDescriptor {
* ShaderClosure has a fixed size, and any extra space must be allocated
* with closure_alloc_extra().
*
- * We pad the struct to 80 bytes and ensure it is aligned to 16 bytes, which
- * we assume to be the maximum required alignment for any struct. */
+ * We pad the struct to align to 16 bytes. All shader closures are assumed
+ * to fit in this struct size. CPU sizes are a bit larger because float3 is
+ * padded to be 16 bytes, while it's only 12 bytes on the GPU. */
#define SHADER_CLOSURE_BASE \
float3 weight; \
@@ -827,7 +839,10 @@ typedef ccl_addr_space struct ccl_align(16) ShaderClosure
{
SHADER_CLOSURE_BASE;
- float data[10]; /* pad to 80 bytes */
+#ifdef __KERNEL_CPU__
+ float pad[2];
+#endif
+ float data[10];
}
ShaderClosure;
@@ -890,13 +905,13 @@ enum ShaderDataFlag {
SD_HAS_DISPLACEMENT = (1 << 26),
/* Has constant emission (value stored in __shaders) */
SD_HAS_CONSTANT_EMISSION = (1 << 27),
- /* Needs to access attributes */
- SD_NEED_ATTRIBUTES = (1 << 28),
+ /* Needs to access attributes for volume rendering */
+ SD_NEED_VOLUME_ATTRIBUTES = (1 << 28),
SD_SHADER_FLAGS = (SD_USE_MIS | SD_HAS_TRANSPARENT_SHADOW | SD_HAS_VOLUME | SD_HAS_ONLY_VOLUME |
SD_HETEROGENEOUS_VOLUME | SD_HAS_BSSRDF_BUMP | SD_VOLUME_EQUIANGULAR |
SD_VOLUME_MIS | SD_VOLUME_CUBIC | SD_HAS_BUMP | SD_HAS_DISPLACEMENT |
- SD_HAS_CONSTANT_EMISSION | SD_NEED_ATTRIBUTES)
+ SD_HAS_CONSTANT_EMISSION | SD_NEED_VOLUME_ATTRIBUTES)
};
/* Object flags. */
@@ -926,7 +941,8 @@ enum ShaderDataObjectFlag {
SD_OBJECT_HAS_VOLUME_ATTRIBUTES)
};
-typedef ccl_addr_space struct ShaderData {
+typedef ccl_addr_space struct ccl_align(16) ShaderData
+{
/* position */
float3 P;
/* smooth normal for shading */
@@ -1015,11 +1031,16 @@ typedef ccl_addr_space struct ShaderData {
/* At the end so we can adjust size in ShaderDataTinyStorage. */
struct ShaderClosure closure[MAX_CLOSURE];
-} ShaderData;
+}
+ShaderData;
-typedef ccl_addr_space struct ShaderDataTinyStorage {
+/* ShaderDataTinyStorage needs the same alignment as ShaderData, or else
+ * the pointer cast in AS_SHADER_DATA invokes undefined behavior. */
+typedef ccl_addr_space struct ccl_align(16) ShaderDataTinyStorage
+{
char pad[sizeof(ShaderData) - sizeof(ShaderClosure) * MAX_CLOSURE];
-} ShaderDataTinyStorage;
+}
+ShaderDataTinyStorage;
#define AS_SHADER_DATA(shader_data_tiny_storage) ((ShaderData *)shader_data_tiny_storage)
/* Path State */
@@ -1051,6 +1072,7 @@ typedef struct PathState {
#ifdef __DENOISING_FEATURES__
float denoising_feature_weight;
+ float3 denoising_feature_throughput;
#endif /* __DENOISING_FEATURES__ */
/* multiple importance sampling */
@@ -1069,6 +1091,15 @@ typedef struct PathState {
#endif
} PathState;
+#ifdef __VOLUME__
+typedef struct VolumeState {
+# ifdef __SPLIT_KERNEL__
+# else
+ PathState ps;
+# endif
+} VolumeState;
+#endif
+
/* Struct to gather multiple nearby intersections. */
typedef struct LocalIntersection {
Ray ray;
@@ -1159,7 +1190,7 @@ typedef struct KernelCamera {
ProjectionTransform worldtondc;
Transform worldtocamera;
- /* Stores changes in the projeciton matrix. Use for camera zoom motion
+ /* Stores changes in the projection matrix. Use for camera zoom motion
* blur and motion pass output for perspective camera. */
ProjectionTransform perspective_pre;
ProjectionTransform perspective_post;
@@ -1181,6 +1212,7 @@ static_assert_align(KernelCamera, 16);
typedef struct KernelFilm {
float exposure;
int pass_flag;
+
int light_pass_flag;
int pass_stride;
int use_light_pass;
@@ -1198,18 +1230,15 @@ typedef struct KernelFilm {
int pass_diffuse_color;
int pass_glossy_color;
int pass_transmission_color;
- int pass_subsurface_color;
int pass_diffuse_indirect;
int pass_glossy_indirect;
int pass_transmission_indirect;
- int pass_subsurface_indirect;
int pass_volume_indirect;
int pass_diffuse_direct;
int pass_glossy_direct;
int pass_transmission_direct;
- int pass_subsurface_direct;
int pass_volume_direct;
int pass_emission;
@@ -1224,6 +1253,9 @@ typedef struct KernelFilm {
int cryptomatte_depth;
int pass_cryptomatte;
+ int pass_adaptive_aux_buffer;
+ int pass_sample_count;
+
int pass_mist;
float mist_start;
float mist_inv_depth;
@@ -1233,6 +1265,12 @@ typedef struct KernelFilm {
int pass_denoising_clean;
int denoising_flags;
+ int pass_aov_color;
+ int pass_aov_value;
+ int pass_aov_color_num;
+ int pass_aov_value_num;
+ int pad1, pad2, pad3;
+
/* XYZ to rendering color space transform. float4 instead of float3 to
* ensure consistent padding/alignment across devices. */
float4 xyz_to_r;
@@ -1240,12 +1278,25 @@ typedef struct KernelFilm {
float4 xyz_to_b;
float4 rgb_to_y;
+ int pass_bake_primitive;
+ int pass_bake_differential;
+ int pad;
+
#ifdef __KERNEL_DEBUG__
int pass_bvh_traversed_nodes;
int pass_bvh_traversed_instances;
int pass_bvh_intersections;
int pass_ray_bounces;
#endif
+
+ /* viewport rendering options */
+ int display_pass_stride;
+ int display_pass_components;
+ int display_divide_pass_stride;
+ int use_display_exposure;
+ int use_display_pass_alpha;
+
+ int pad4, pad5, pad6;
} KernelFilm;
static_assert_align(KernelFilm, 16);
@@ -1253,6 +1304,7 @@ typedef struct KernelBackground {
/* only shader index */
int surface_shader;
int volume_shader;
+ float volume_step_size;
int transparent;
float transparent_roughness_squared_threshold;
@@ -1260,7 +1312,24 @@ typedef struct KernelBackground {
float ao_factor;
float ao_distance;
float ao_bounces_factor;
- float ao_pad;
+
+ /* portal sampling */
+ float portal_weight;
+ int num_portals;
+ int portal_offset;
+
+ /* sun sampling */
+ float sun_weight;
+ /* xyz store direction, w the angle. float4 instead of float3 is used
+ * to ensure consistent padding/alignment across devices. */
+ float4 sun;
+
+ /* map sampling */
+ float map_weight;
+ int map_res_x;
+ int map_res_y;
+
+ int use_mis;
} KernelBackground;
static_assert_align(KernelBackground, 16);
@@ -1279,18 +1348,12 @@ typedef struct KernelIntegrator {
float pdf_triangles;
float pdf_lights;
float pdf_inv_totarea;
- int pdf_background_res_x;
- int pdf_background_res_y;
float light_inv_rr_threshold;
int distant_lights_offset;
int background_light_index;
- /* light portals */
- float portal_pdf;
- int num_portals;
- int portal_offset;
-
/* bounces */
+ int min_bounce;
int max_bounce;
int max_diffuse_bounce;
@@ -1301,6 +1364,7 @@ typedef struct KernelIntegrator {
int ao_bounces;
/* transparent */
+ int transparent_min_bounce;
int transparent_max_bounce;
int transparent_shadows;
@@ -1334,11 +1398,15 @@ typedef struct KernelIntegrator {
/* sampler */
int sampling_pattern;
int aa_samples;
+ int adaptive_min_samples;
+ int adaptive_step;
+ int adaptive_stop_per_sample;
+ float adaptive_threshold;
/* volume render */
int use_volumes;
int volume_max_steps;
- float volume_step_size;
+ float volume_step_rate;
int volume_samples;
int start_sample;
@@ -1353,11 +1421,12 @@ typedef enum KernelBVHLayout {
BVH_LAYOUT_NONE = 0,
BVH_LAYOUT_BVH2 = (1 << 0),
- BVH_LAYOUT_BVH4 = (1 << 1),
- BVH_LAYOUT_BVH8 = (1 << 2),
- BVH_LAYOUT_EMBREE = (1 << 3),
- BVH_LAYOUT_DEFAULT = BVH_LAYOUT_BVH8,
- BVH_LAYOUT_ALL = (unsigned int)(-1),
+ BVH_LAYOUT_EMBREE = (1 << 1),
+ BVH_LAYOUT_OPTIX = (1 << 2),
+
+ /* Default BVH layout to use for CPU. */
+ BVH_LAYOUT_AUTO = BVH_LAYOUT_EMBREE,
+ BVH_LAYOUT_ALL = (unsigned int)(~0u),
} KernelBVHLayout;
typedef struct KernelBVH {
@@ -1365,56 +1434,48 @@ typedef struct KernelBVH {
int root;
int have_motion;
int have_curves;
- int have_instancing;
int bvh_layout;
int use_bvh_steps;
+ int curve_subdivisions;
- /* Embree */
-#ifdef __EMBREE__
+ /* Custom BVH */
+#ifdef __KERNEL_OPTIX__
+ OptixTraversableHandle scene;
+#else
+# ifdef __EMBREE__
RTCScene scene;
-# ifndef __KERNEL_64_BIT__
- int pad1;
+# ifndef __KERNEL_64_BIT__
+ int pad2;
+# endif
+# else
+ int scene, pad2;
# endif
-#else
- int pad1, pad2;
#endif
} KernelBVH;
static_assert_align(KernelBVH, 16);
-typedef enum CurveFlag {
- /* runtime flags */
- CURVE_KN_BACKFACING = 1, /* backside of cylinder? */
- CURVE_KN_ENCLOSEFILTER = 2, /* don't consider strands surrounding start point? */
- CURVE_KN_INTERPOLATE = 4, /* render as a curve? */
- CURVE_KN_ACCURATE = 8, /* use accurate intersections test? */
- CURVE_KN_INTERSECTCORRECTION = 16, /* correct for width after determing closest midpoint? */
- CURVE_KN_TRUETANGENTGNORMAL = 32, /* use tangent normal for geometry? */
- CURVE_KN_RIBBONS = 64, /* use flat curve ribbons */
-} CurveFlag;
-
-typedef struct KernelCurves {
- int curveflags;
- int subdivisions;
-
- float minimum_width;
- float maximum_width;
-} KernelCurves;
-static_assert_align(KernelCurves, 16);
-
typedef struct KernelTables {
int beckmann_offset;
int pad1, pad2, pad3;
} KernelTables;
static_assert_align(KernelTables, 16);
+typedef struct KernelBake {
+ int object_index;
+ int tri_offset;
+ int type;
+ int pass_filter;
+} KernelBake;
+static_assert_align(KernelBake, 16);
+
typedef struct KernelData {
KernelCamera cam;
KernelFilm film;
KernelBackground background;
KernelIntegrator integrator;
KernelBVH bvh;
- KernelCurves curve;
KernelTables tables;
+ KernelBake bake;
} KernelData;
static_assert_align(KernelData, 16);
@@ -1427,6 +1488,7 @@ typedef struct KernelObject {
float surface_area;
float pass_id;
float random_number;
+ float color[3];
int particle_index;
float dupli_generated[3];
@@ -1439,11 +1501,12 @@ typedef struct KernelObject {
uint patch_map_offset;
uint attribute_map_offset;
uint motion_offset;
- uint pad1;
float cryptomatte_object;
float cryptomatte_asset;
- float pad2, pad3;
+
+ float shadow_terminator_offset;
+ float pad1, pad2, pad3;
} KernelObject;
static_assert_align(KernelObject, 16);
@@ -1481,6 +1544,8 @@ typedef struct KernelLight {
int samples;
float max_bounces;
float random;
+ float strength[3];
+ float pad1;
Transform tfm;
Transform itfm;
union {
@@ -1542,7 +1607,7 @@ static_assert_align(KernelShader, 16);
* Queue 1 - Active rays
* Queue 2 - Background queue
* Queue 3 - Shadow ray cast kernel - AO
- * Queeu 4 - Shadow ray cast kernel - direct lighting
+ * Queue 4 - Shadow ray cast kernel - direct lighting
*/
/* Queue names */
@@ -1654,12 +1719,16 @@ typedef struct WorkTile {
uint start_sample;
uint num_samples;
- uint offset;
+ int offset;
uint stride;
ccl_global float *buffer;
} WorkTile;
+/* Precoumputed sample table sizes for PMJ02 sampler. */
+#define NUM_PMJ_SAMPLES 64 * 64
+#define NUM_PMJ_PATTERNS 48
+
CCL_NAMESPACE_END
#endif /* __KERNEL_TYPES_H__ */
diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h
index 044fca7fff6..b2468b196da 100644
--- a/intern/cycles/kernel/kernel_volume.h
+++ b/intern/cycles/kernel/kernel_volume.h
@@ -48,7 +48,8 @@ ccl_device_inline bool volume_shader_extinction_sample(KernelGlobals *kg,
shader_eval_volume(kg, sd, state, state->volume_stack, PATH_RAY_SHADOW);
if (sd->flag & SD_EXTINCTION) {
- *extinction = sd->closure_transparent_extinction;
+ const float density = object_volume_density(kg, sd->object);
+ *extinction = sd->closure_transparent_extinction * density;
return true;
}
else {
@@ -84,6 +85,11 @@ ccl_device_inline bool volume_shader_sample(KernelGlobals *kg,
}
}
+ const float density = object_volume_density(kg, sd->object);
+ coeff->sigma_s *= density;
+ coeff->sigma_t *= density;
+ coeff->emission *= density;
+
return true;
}
@@ -101,29 +107,39 @@ ccl_device float kernel_volume_channel_get(float3 value, int channel)
#ifdef __VOLUME__
-ccl_device bool volume_stack_is_heterogeneous(KernelGlobals *kg, ccl_addr_space VolumeStack *stack)
+ccl_device float volume_stack_step_size(KernelGlobals *kg, ccl_addr_space VolumeStack *stack)
{
+ float step_size = FLT_MAX;
+
for (int i = 0; stack[i].shader != SHADER_NONE; i++) {
int shader_flag = kernel_tex_fetch(__shaders, (stack[i].shader & SHADER_MASK)).flags;
+ bool heterogeneous = false;
+
if (shader_flag & SD_HETEROGENEOUS_VOLUME) {
- return true;
+ heterogeneous = true;
}
- else if (shader_flag & SD_NEED_ATTRIBUTES) {
+ else if (shader_flag & SD_NEED_VOLUME_ATTRIBUTES) {
/* We want to render world or objects without any volume grids
- * as homogenous, but can only verify this at runtime since other
- * heterogenous volume objects may be using the same shader. */
+ * as homogeneous, but can only verify this at run-time since other
+ * heterogeneous volume objects may be using the same shader. */
int object = stack[i].object;
if (object != OBJECT_NONE) {
int object_flag = kernel_tex_fetch(__object_flag, object);
if (object_flag & SD_OBJECT_HAS_VOLUME_ATTRIBUTES) {
- return true;
+ heterogeneous = true;
}
}
}
+
+ if (heterogeneous) {
+ float object_step_size = object_volume_step_size(kg, stack[i].object);
+ object_step_size *= kernel_data.integrator.volume_step_rate;
+ step_size = fminf(object_step_size, step_size);
+ }
}
- return false;
+ return step_size;
}
ccl_device int volume_stack_sampling_method(KernelGlobals *kg, VolumeStack *stack)
@@ -158,12 +174,13 @@ ccl_device int volume_stack_sampling_method(KernelGlobals *kg, VolumeStack *stac
ccl_device_inline void kernel_volume_step_init(KernelGlobals *kg,
ccl_addr_space PathState *state,
+ const float object_step_size,
float t,
float *step_size,
float *step_offset)
{
const int max_steps = kernel_data.integrator.volume_max_steps;
- float step = min(kernel_data.integrator.volume_step_size, t);
+ float step = min(object_step_size, t);
/* compute exact steps in advance for malloc */
if (t > max_steps * step) {
@@ -187,7 +204,7 @@ ccl_device void kernel_volume_shadow_homogeneous(KernelGlobals *kg,
ShaderData *sd,
float3 *throughput)
{
- float3 sigma_t;
+ float3 sigma_t = make_float3(0.0f, 0.0f, 0.0f);
if (volume_shader_extinction_sample(kg, sd, state, ray->P, &sigma_t))
*throughput *= volume_color_transmittance(sigma_t, ray->t);
@@ -199,7 +216,8 @@ ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg,
ccl_addr_space PathState *state,
Ray *ray,
ShaderData *sd,
- float3 *throughput)
+ float3 *throughput,
+ const float object_step_size)
{
float3 tp = *throughput;
const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
@@ -207,7 +225,7 @@ ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg,
/* prepare for stepping */
int max_steps = kernel_data.integrator.volume_max_steps;
float step_offset, step_size;
- kernel_volume_step_init(kg, state, ray->t, &step_size, &step_offset);
+ kernel_volume_step_init(kg, state, object_step_size, ray->t, &step_size, &step_offset);
/* compute extinction at the start */
float t = 0.0f;
@@ -225,7 +243,7 @@ ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg,
}
float3 new_P = ray->P + ray->D * (t + step_offset);
- float3 sigma_t;
+ float3 sigma_t = make_float3(0.0f, 0.0f, 0.0f);
/* compute attenuation over segment */
if (volume_shader_extinction_sample(kg, sd, state, new_P, &sigma_t)) {
@@ -264,8 +282,9 @@ ccl_device_noinline void kernel_volume_shadow(KernelGlobals *kg,
{
shader_setup_from_volume(kg, shadow_sd, ray);
- if (volume_stack_is_heterogeneous(kg, state->volume_stack))
- kernel_volume_shadow_heterogeneous(kg, state, ray, shadow_sd, throughput);
+ float step_size = volume_stack_step_size(kg, state->volume_stack);
+ if (step_size != FLT_MAX)
+ kernel_volume_shadow_heterogeneous(kg, state, ray, shadow_sd, throughput, step_size);
else
kernel_volume_shadow_homogeneous(kg, state, ray, shadow_sd, throughput);
}
@@ -428,7 +447,7 @@ kernel_volume_integrate_homogeneous(KernelGlobals *kg,
ccl_addr_space float3 *throughput,
bool probalistic_scatter)
{
- VolumeShaderCoefficients coeff;
+ VolumeShaderCoefficients coeff ccl_optional_struct_init;
if (!volume_shader_sample(kg, sd, state, ray->P, &coeff))
return VOLUME_PATH_MISSED;
@@ -504,7 +523,7 @@ kernel_volume_integrate_homogeneous(KernelGlobals *kg,
float3 transmittance = volume_color_transmittance(coeff.sigma_t, ray->t);
float3 emission = kernel_volume_emission_integrate(
&coeff, closure_flag, transmittance, ray->t);
- path_radiance_accum_emission(L, state, *throughput, emission);
+ path_radiance_accum_emission(kg, L, state, *throughput, emission);
}
/* modify throughput */
@@ -533,7 +552,8 @@ kernel_volume_integrate_heterogeneous_distance(KernelGlobals *kg,
Ray *ray,
ShaderData *sd,
PathRadiance *L,
- ccl_addr_space float3 *throughput)
+ ccl_addr_space float3 *throughput,
+ const float object_step_size)
{
float3 tp = *throughput;
const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
@@ -541,7 +561,7 @@ kernel_volume_integrate_heterogeneous_distance(KernelGlobals *kg,
/* prepare for stepping */
int max_steps = kernel_data.integrator.volume_max_steps;
float step_offset, step_size;
- kernel_volume_step_init(kg, state, ray->t, &step_size, &step_offset);
+ kernel_volume_step_init(kg, state, object_step_size, ray->t, &step_size, &step_offset);
/* compute coefficients at the start */
float t = 0.0f;
@@ -559,13 +579,13 @@ kernel_volume_integrate_heterogeneous_distance(KernelGlobals *kg,
float dt = new_t - t;
/* use random position inside this segment to sample shader,
- * for last shorter step we remap it to fit within the segment. */
+ * for last shorter step we remap it to fit within the segment. */
if (new_t == ray->t) {
step_offset *= (new_t - t) / step_size;
}
float3 new_P = ray->P + ray->D * (t + step_offset);
- VolumeShaderCoefficients coeff;
+ VolumeShaderCoefficients coeff ccl_optional_struct_init;
/* compute segment */
if (volume_shader_sample(kg, sd, state, new_P, &coeff)) {
@@ -621,6 +641,7 @@ kernel_volume_integrate_heterogeneous_distance(KernelGlobals *kg,
new_tp = tp * transmittance;
}
else {
+ transmittance = make_float3(0.0f, 0.0f, 0.0f);
new_tp = tp;
}
@@ -628,7 +649,7 @@ kernel_volume_integrate_heterogeneous_distance(KernelGlobals *kg,
if (L && (closure_flag & SD_EMISSION)) {
float3 emission = kernel_volume_emission_integrate(
&coeff, closure_flag, transmittance, dt);
- path_radiance_accum_emission(L, state, tp, emission);
+ path_radiance_accum_emission(kg, L, state, tp, emission);
}
/* modify throughput */
@@ -671,19 +692,20 @@ kernel_volume_integrate_heterogeneous_distance(KernelGlobals *kg,
* ray, with the assumption that there are no surfaces blocking light
* between the endpoints. distance sampling is used to decide if we will
* scatter or not. */
-ccl_device_noinline VolumeIntegrateResult
+ccl_device_noinline_cpu VolumeIntegrateResult
kernel_volume_integrate(KernelGlobals *kg,
ccl_addr_space PathState *state,
ShaderData *sd,
Ray *ray,
PathRadiance *L,
ccl_addr_space float3 *throughput,
- bool heterogeneous)
+ float step_size)
{
shader_setup_from_volume(kg, sd, ray);
- if (heterogeneous)
- return kernel_volume_integrate_heterogeneous_distance(kg, state, ray, sd, L, throughput);
+ if (step_size != FLT_MAX)
+ return kernel_volume_integrate_heterogeneous_distance(
+ kg, state, ray, sd, L, throughput, step_size);
else
return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, true);
}
@@ -734,7 +756,7 @@ ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg,
Ray *ray,
ShaderData *sd,
VolumeSegment *segment,
- bool heterogeneous)
+ const float object_step_size)
{
const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
@@ -742,9 +764,9 @@ ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg,
int max_steps;
float step_size, step_offset;
- if (heterogeneous) {
+ if (object_step_size != FLT_MAX) {
max_steps = kernel_data.integrator.volume_max_steps;
- kernel_volume_step_init(kg, state, ray->t, &step_size, &step_offset);
+ kernel_volume_step_init(kg, state, object_step_size, ray->t, &step_size, &step_offset);
# ifdef __KERNEL_CPU__
/* NOTE: For the branched path tracing it's possible to have direct
@@ -794,13 +816,13 @@ ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg,
float dt = new_t - t;
/* use random position inside this segment to sample shader,
- * for last shorter step we remap it to fit within the segment. */
+ * for last shorter step we remap it to fit within the segment. */
if (new_t == ray->t) {
step_offset *= (new_t - t) / step_size;
}
float3 new_P = ray->P + ray->D * (t + step_offset);
- VolumeShaderCoefficients coeff;
+ VolumeShaderCoefficients coeff ccl_optional_struct_init;
/* compute segment */
if (volume_shader_sample(kg, sd, state, new_P, &coeff)) {
@@ -1277,7 +1299,7 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
*/
if (stack_index == 0 && kernel_data.background.volume_shader == SHADER_NONE) {
stack[0].shader = kernel_data.background.volume_shader;
- stack[0].object = PRIM_NONE;
+ stack[0].object = OBJECT_NONE;
stack[1].shader = SHADER_NONE;
}
else {
diff --git a/intern/cycles/kernel/kernel_work_stealing.h b/intern/cycles/kernel/kernel_work_stealing.h
index 799561a7466..d1602744f1d 100644
--- a/intern/cycles/kernel/kernel_work_stealing.h
+++ b/intern/cycles/kernel/kernel_work_stealing.h
@@ -23,17 +23,41 @@ CCL_NAMESPACE_BEGIN
* Utility functions for work stealing
*/
+/* Map global work index to tile, pixel X/Y and sample. */
+ccl_device_inline void get_work_pixel(ccl_global const WorkTile *tile,
+ uint global_work_index,
+ ccl_private uint *x,
+ ccl_private uint *y,
+ ccl_private uint *sample)
+{
+#ifdef __KERNEL_CUDA__
+ /* Keeping threads for the same pixel together improves performance on CUDA. */
+ uint sample_offset = global_work_index % tile->num_samples;
+ uint pixel_offset = global_work_index / tile->num_samples;
+#else /* __KERNEL_CUDA__ */
+ uint tile_pixels = tile->w * tile->h;
+ uint sample_offset = global_work_index / tile_pixels;
+ uint pixel_offset = global_work_index - sample_offset * tile_pixels;
+#endif /* __KERNEL_CUDA__ */
+ uint y_offset = pixel_offset / tile->w;
+ uint x_offset = pixel_offset - y_offset * tile->w;
+
+ *x = tile->x + x_offset;
+ *y = tile->y + y_offset;
+ *sample = tile->start_sample + sample_offset;
+}
+
#ifdef __KERNEL_OPENCL__
# pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
#endif
#ifdef __SPLIT_KERNEL__
/* Returns true if there is work */
-ccl_device bool get_next_work(KernelGlobals *kg,
- ccl_global uint *work_pools,
- uint total_work_size,
- uint ray_index,
- ccl_private uint *global_work_index)
+ccl_device bool get_next_work_item(KernelGlobals *kg,
+ ccl_global uint *work_pools,
+ uint total_work_size,
+ uint ray_index,
+ ccl_private uint *global_work_index)
{
/* With a small amount of work there may be more threads than work due to
* rounding up of global size, stop such threads immediately. */
@@ -56,31 +80,37 @@ ccl_device bool get_next_work(KernelGlobals *kg,
/* Test if all work for this pool is done. */
return (*global_work_index < total_work_size);
}
-#endif
-/* Map global work index to tile, pixel X/Y and sample. */
-ccl_device_inline void get_work_pixel(ccl_global const WorkTile *tile,
- uint global_work_index,
- ccl_private uint *x,
- ccl_private uint *y,
- ccl_private uint *sample)
+ccl_device bool get_next_work(KernelGlobals *kg,
+ ccl_global uint *work_pools,
+ uint total_work_size,
+ uint ray_index,
+ ccl_private uint *global_work_index)
{
-#ifdef __KERNEL_CUDA__
- /* Keeping threads for the same pixel together improves performance on CUDA. */
- uint sample_offset = global_work_index % tile->num_samples;
- uint pixel_offset = global_work_index / tile->num_samples;
-#else /* __KERNEL_CUDA__ */
- uint tile_pixels = tile->w * tile->h;
- uint sample_offset = global_work_index / tile_pixels;
- uint pixel_offset = global_work_index - sample_offset * tile_pixels;
-#endif /* __KERNEL_CUDA__ */
- uint y_offset = pixel_offset / tile->w;
- uint x_offset = pixel_offset - y_offset * tile->w;
-
- *x = tile->x + x_offset;
- *y = tile->y + y_offset;
- *sample = tile->start_sample + sample_offset;
+ bool got_work = false;
+ if (kernel_data.film.pass_adaptive_aux_buffer) {
+ do {
+ got_work = get_next_work_item(kg, work_pools, total_work_size, ray_index, global_work_index);
+ if (got_work) {
+ ccl_global WorkTile *tile = &kernel_split_params.tile;
+ uint x, y, sample;
+ get_work_pixel(tile, *global_work_index, &x, &y, &sample);
+ uint buffer_offset = (tile->offset + x + y * tile->stride) * kernel_data.film.pass_stride;
+ ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
+ ccl_global float4 *aux = (ccl_global float4 *)(buffer +
+ kernel_data.film.pass_adaptive_aux_buffer);
+ if ((*aux).w == 0.0f) {
+ break;
+ }
+ }
+ } while (got_work);
+ }
+ else {
+ got_work = get_next_work_item(kg, work_pools, total_work_size, ray_index, global_work_index);
+ }
+ return got_work;
}
+#endif
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_write_passes.h b/intern/cycles/kernel/kernel_write_passes.h
new file mode 100644
index 00000000000..410218d91d4
--- /dev/null
+++ b/intern/cycles/kernel/kernel_write_passes.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if defined(__SPLIT_KERNEL__) || defined(__KERNEL_CUDA__)
+# define __ATOMIC_PASS_WRITE__
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device_inline void kernel_write_pass_float(ccl_global float *buffer, float value)
+{
+ ccl_global float *buf = buffer;
+#ifdef __ATOMIC_PASS_WRITE__
+ atomic_add_and_fetch_float(buf, value);
+#else
+ *buf += value;
+#endif
+}
+
+ccl_device_inline void kernel_write_pass_float3(ccl_global float *buffer, float3 value)
+{
+#ifdef __ATOMIC_PASS_WRITE__
+ ccl_global float *buf_x = buffer + 0;
+ ccl_global float *buf_y = buffer + 1;
+ ccl_global float *buf_z = buffer + 2;
+
+ atomic_add_and_fetch_float(buf_x, value.x);
+ atomic_add_and_fetch_float(buf_y, value.y);
+ atomic_add_and_fetch_float(buf_z, value.z);
+#else
+ ccl_global float3 *buf = (ccl_global float3 *)buffer;
+ *buf += value;
+#endif
+}
+
+ccl_device_inline void kernel_write_pass_float4(ccl_global float *buffer, float4 value)
+{
+#ifdef __ATOMIC_PASS_WRITE__
+ ccl_global float *buf_x = buffer + 0;
+ ccl_global float *buf_y = buffer + 1;
+ ccl_global float *buf_z = buffer + 2;
+ ccl_global float *buf_w = buffer + 3;
+
+ atomic_add_and_fetch_float(buf_x, value.x);
+ atomic_add_and_fetch_float(buf_y, value.y);
+ atomic_add_and_fetch_float(buf_z, value.z);
+ atomic_add_and_fetch_float(buf_w, value.w);
+#else
+ ccl_global float4 *buf = (ccl_global float4 *)buffer;
+ *buf += value;
+#endif
+}
+
+#ifdef __DENOISING_FEATURES__
+ccl_device_inline void kernel_write_pass_float_variance(ccl_global float *buffer, float value)
+{
+ kernel_write_pass_float(buffer, value);
+
+ /* The online one-pass variance update that's used for the megakernel can't easily be implemented
+ * with atomics, so for the split kernel the E[x^2] - 1/N * (E[x])^2 fallback is used. */
+ kernel_write_pass_float(buffer + 1, value * value);
+}
+
+# ifdef __ATOMIC_PASS_WRITE__
+# define kernel_write_pass_float3_unaligned kernel_write_pass_float3
+# else
+ccl_device_inline void kernel_write_pass_float3_unaligned(ccl_global float *buffer, float3 value)
+{
+ buffer[0] += value.x;
+ buffer[1] += value.y;
+ buffer[2] += value.z;
+}
+# endif
+
+ccl_device_inline void kernel_write_pass_float3_variance(ccl_global float *buffer, float3 value)
+{
+ kernel_write_pass_float3_unaligned(buffer, value);
+ kernel_write_pass_float3_unaligned(buffer + 3, value * value);
+}
+#endif /* __DENOISING_FEATURES__ */
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernels/cpu/kernel.cpp b/intern/cycles/kernel/kernels/cpu/kernel.cpp
index f2146302a27..8040bfb7b33 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel.cpp
@@ -64,15 +64,17 @@ CCL_NAMESPACE_BEGIN
/* Memory Copy */
-void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t size)
+void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t)
{
- if (strcmp(name, "__data") == 0)
- memcpy(&kg->__data, host, size);
- else
+ if (strcmp(name, "__data") == 0) {
+ kg->__data = *(KernelData *)host;
+ }
+ else {
assert(0);
+ }
}
-void kernel_tex_copy(KernelGlobals *kg, const char *name, void *mem, size_t size)
+void kernel_global_memory_copy(KernelGlobals *kg, const char *name, void *mem, size_t size)
{
if (0) {
}
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
index f5d981fb71a..ea3103f12c3 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
@@ -46,6 +46,9 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
int offset,
int sample);
+void KERNEL_FUNCTION_FULL_NAME(bake)(
+ KernelGlobals *kg, float *buffer, int sample, int x, int y, int offset, int stride);
+
/* Split kernels */
void KERNEL_FUNCTION_FULL_NAME(data_init)(KernelGlobals *kg,
@@ -89,5 +92,9 @@ DECLARE_SPLIT_KERNEL_FUNCTION(enqueue_inactive)
DECLARE_SPLIT_KERNEL_FUNCTION(next_iteration_setup)
DECLARE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
DECLARE_SPLIT_KERNEL_FUNCTION(buffer_update)
+DECLARE_SPLIT_KERNEL_FUNCTION(adaptive_stopping)
+DECLARE_SPLIT_KERNEL_FUNCTION(adaptive_filter_x)
+DECLARE_SPLIT_KERNEL_FUNCTION(adaptive_filter_y)
+DECLARE_SPLIT_KERNEL_FUNCTION(adaptive_adjust_samples)
#undef KERNEL_ARCH
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
index 4289e2bbb85..f87501db258 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
@@ -19,6 +19,10 @@
CCL_NAMESPACE_BEGIN
+/* Make template functions private so symbols don't conflict between kernels with different
+ * instruction sets. */
+namespace {
+
template<typename T> struct TextureInterpolator {
#define SET_CUBIC_SPLINE_WEIGHTS(u, t) \
{ \
@@ -470,7 +474,7 @@ ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, fl
{
const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
- switch (kernel_tex_type(id)) {
+ switch (info.data_type) {
case IMAGE_DATA_TYPE_HALF:
return TextureInterpolator<half>::interp(info, x, y);
case IMAGE_DATA_TYPE_BYTE:
@@ -494,28 +498,34 @@ ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, fl
}
}
-ccl_device float4 kernel_tex_image_interp_3d(
- KernelGlobals *kg, int id, float x, float y, float z, InterpolationType interp)
+ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg,
+ int id,
+ float3 P,
+ InterpolationType interp)
{
const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
- switch (kernel_tex_type(id)) {
+ if (info.use_transform_3d) {
+ P = transform_point(&info.transform_3d, P);
+ }
+
+ switch (info.data_type) {
case IMAGE_DATA_TYPE_HALF:
- return TextureInterpolator<half>::interp_3d(info, x, y, z, interp);
+ return TextureInterpolator<half>::interp_3d(info, P.x, P.y, P.z, interp);
case IMAGE_DATA_TYPE_BYTE:
- return TextureInterpolator<uchar>::interp_3d(info, x, y, z, interp);
+ return TextureInterpolator<uchar>::interp_3d(info, P.x, P.y, P.z, interp);
case IMAGE_DATA_TYPE_USHORT:
- return TextureInterpolator<uint16_t>::interp_3d(info, x, y, z, interp);
+ return TextureInterpolator<uint16_t>::interp_3d(info, P.x, P.y, P.z, interp);
case IMAGE_DATA_TYPE_FLOAT:
- return TextureInterpolator<float>::interp_3d(info, x, y, z, interp);
+ return TextureInterpolator<float>::interp_3d(info, P.x, P.y, P.z, interp);
case IMAGE_DATA_TYPE_HALF4:
- return TextureInterpolator<half4>::interp_3d(info, x, y, z, interp);
+ return TextureInterpolator<half4>::interp_3d(info, P.x, P.y, P.z, interp);
case IMAGE_DATA_TYPE_BYTE4:
- return TextureInterpolator<uchar4>::interp_3d(info, x, y, z, interp);
+ return TextureInterpolator<uchar4>::interp_3d(info, P.x, P.y, P.z, interp);
case IMAGE_DATA_TYPE_USHORT4:
- return TextureInterpolator<ushort4>::interp_3d(info, x, y, z, interp);
+ return TextureInterpolator<ushort4>::interp_3d(info, P.x, P.y, P.z, interp);
case IMAGE_DATA_TYPE_FLOAT4:
- return TextureInterpolator<float4>::interp_3d(info, x, y, z, interp);
+ return TextureInterpolator<float4>::interp_3d(info, P.x, P.y, P.z, interp);
default:
assert(0);
return make_float4(
@@ -523,6 +533,8 @@ ccl_device float4 kernel_tex_image_interp_3d(
}
}
+} /* Namespace. */
+
CCL_NAMESPACE_END
#endif // __KERNEL_CPU_IMAGE_H__
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
index 9ca3f46b5b6..5aa3fb14318 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
@@ -20,6 +20,7 @@
* simply includes this file without worry of copying actual implementation over.
*/
+// clang-format off
#include "kernel/kernel_compat_cpu.h"
#ifndef KERNEL_STUB
@@ -58,6 +59,10 @@
# include "kernel/split/kernel_next_iteration_setup.h"
# include "kernel/split/kernel_indirect_subsurface.h"
# include "kernel/split/kernel_buffer_update.h"
+# include "kernel/split/kernel_adaptive_stopping.h"
+# include "kernel/split/kernel_adaptive_filter_x.h"
+# include "kernel/split/kernel_adaptive_filter_y.h"
+# include "kernel/split/kernel_adaptive_adjust_samples.h"
# endif /* __SPLIT_KERNEL__ */
#else
# define STUB_ASSERT(arch, name) \
@@ -67,6 +72,7 @@
# include "kernel/split/kernel_data_init.h"
# endif /* __SPLIT_KERNEL__ */
#endif /* KERNEL_STUB */
+// clang-format on
CCL_NAMESPACE_BEGIN
@@ -126,6 +132,18 @@ void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg,
# endif /* KERNEL_STUB */
}
+/* Bake */
+
+void KERNEL_FUNCTION_FULL_NAME(bake)(
+ KernelGlobals *kg, float *buffer, int sample, int x, int y, int offset, int stride)
+{
+# ifdef KERNEL_STUB
+ STUB_ASSERT(KERNEL_ARCH, bake);
+# else
+ kernel_bake_evaluate(kg, buffer, sample, x, y, offset, stride);
+# endif /* KERNEL_STUB */
+}
+
/* Shader Evaluate */
void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
@@ -140,12 +158,7 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
# ifdef KERNEL_STUB
STUB_ASSERT(KERNEL_ARCH, shader);
# else
- if (type >= SHADER_EVAL_BAKE) {
-# ifdef __BAKING__
- kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, filter, i, offset, sample);
-# endif
- }
- else if (type == SHADER_EVAL_DISPLACE) {
+ if (type == SHADER_EVAL_DISPLACE) {
kernel_displace_evaluate(kg, input, output, i);
}
else {
@@ -204,6 +217,10 @@ DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(enqueue_inactive, uint)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint)
DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint)
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_stopping)
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_filter_x)
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_filter_y)
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_adjust_samples)
#endif /* __SPLIT_KERNEL__ */
#undef KERNEL_STUB
diff --git a/intern/cycles/kernel/kernels/cuda/filter.cu b/intern/cycles/kernel/kernels/cuda/filter.cu
index 5b552b01413..6c9642d1f03 100644
--- a/intern/cycles/kernel/kernels/cuda/filter.cu
+++ b/intern/cycles/kernel/kernels/cuda/filter.cu
@@ -28,6 +28,74 @@
extern "C" __global__ void
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
+kernel_cuda_filter_copy_input(float *buffer,
+ CCL_FILTER_TILE_INFO,
+ int4 prefilter_rect,
+ int buffer_pass_stride)
+{
+ int x = prefilter_rect.x + blockDim.x*blockIdx.x + threadIdx.x;
+ int y = prefilter_rect.y + blockDim.y*blockIdx.y + threadIdx.y;
+ if(x < prefilter_rect.z && y < prefilter_rect.w) {
+ int xtile = (x < tile_info->x[1]) ? 0 : ((x < tile_info->x[2]) ? 1 : 2);
+ int ytile = (y < tile_info->y[1]) ? 0 : ((y < tile_info->y[2]) ? 1 : 2);
+ int itile = ytile * 3 + xtile;
+ float *const in = ((float *)ccl_get_tile_buffer(itile)) +
+ (tile_info->offsets[itile] + y * tile_info->strides[itile] + x) * buffer_pass_stride;
+ buffer += ((y - prefilter_rect.y) * (prefilter_rect.z - prefilter_rect.x) + (x - prefilter_rect.x)) * buffer_pass_stride;
+ for (int i = 0; i < buffer_pass_stride; ++i)
+ buffer[i] = in[i];
+ }
+}
+
+extern "C" __global__ void
+CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
+kernel_cuda_filter_convert_to_rgb(float *rgb, float *buf, int sw, int sh, int stride, int pass_stride, int3 pass_offset, int num_inputs, int num_samples)
+{
+ int x = blockDim.x*blockIdx.x + threadIdx.x;
+ int y = blockDim.y*blockIdx.y + threadIdx.y;
+ if(x < sw && y < sh) {
+ if (num_inputs > 0) {
+ float *in = buf + x * pass_stride + (y * stride + pass_offset.x) / sizeof(float);
+ float *out = rgb + (x + y * sw) * 3;
+ out[0] = clamp(in[0] / num_samples, 0.0f, 10000.0f);
+ out[1] = clamp(in[1] / num_samples, 0.0f, 10000.0f);
+ out[2] = clamp(in[2] / num_samples, 0.0f, 10000.0f);
+ }
+ if (num_inputs > 1) {
+ float *in = buf + x * pass_stride + (y * stride + pass_offset.y) / sizeof(float);
+ float *out = rgb + (x + y * sw) * 3 + (sw * sh) * 3;
+ out[0] = in[0] / num_samples;
+ out[1] = in[1] / num_samples;
+ out[2] = in[2] / num_samples;
+ }
+ if (num_inputs > 2) {
+ float *in = buf + x * pass_stride + (y * stride + pass_offset.z) / sizeof(float);
+ float *out = rgb + (x + y * sw) * 3 + (sw * sh * 2) * 3;
+ out[0] = in[0] / num_samples;
+ out[1] = in[1] / num_samples;
+ out[2] = in[2] / num_samples;
+ }
+ }
+}
+
+extern "C" __global__ void
+CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
+kernel_cuda_filter_convert_from_rgb(float *rgb, float *buf, int ix, int iy, int iw, int ih, int sx, int sy, int sw, int sh, int offset, int stride, int pass_stride, int num_samples)
+{
+ int x = blockDim.x*blockIdx.x + threadIdx.x;
+ int y = blockDim.y*blockIdx.y + threadIdx.y;
+ if(x < sw && y < sh) {
+ float *in = rgb + ((ix + x) + (iy + y) * iw) * 3;
+ float *out = buf + (offset + (sx + x) + (sy + y) * stride) * pass_stride;
+ out[0] = in[0] * num_samples;
+ out[1] = in[1] * num_samples;
+ out[2] = in[2] * num_samples;
+ }
+}
+
+
+extern "C" __global__ void
+CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
kernel_cuda_filter_divide_shadow(int sample,
CCL_FILTER_TILE_INFO,
float *unfilteredA,
@@ -97,14 +165,14 @@ kernel_cuda_filter_write_feature(int sample,
int x = blockDim.x*blockIdx.x + threadIdx.x;
int y = blockDim.y*blockIdx.y + threadIdx.y;
if(x < filter_area.z && y < filter_area.w) {
- kernel_filter_write_feature(sample,
- x + filter_area.x,
- y + filter_area.y,
- buffer_params,
- from,
- buffer,
- out_offset,
- prefilter_rect);
+ kernel_filter_write_feature(sample,
+ x + filter_area.x,
+ y + filter_area.y,
+ buffer_params,
+ from,
+ buffer,
+ out_offset,
+ prefilter_rect);
}
}
diff --git a/intern/cycles/kernel/kernels/cuda/kernel.cu b/intern/cycles/kernel/kernels/cuda/kernel.cu
index af311027f78..d4f41132a11 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel.cu
+++ b/intern/cycles/kernel/kernels/cuda/kernel.cu
@@ -33,6 +33,7 @@
#include "kernel/kernel_path_branched.h"
#include "kernel/kernel_bake.h"
#include "kernel/kernel_work_stealing.h"
+#include "kernel/kernel_adaptive_sampling.h"
/* kernels */
extern "C" __global__ void
@@ -83,6 +84,75 @@ kernel_cuda_branched_path_trace(WorkTile *tile, uint total_work_size)
extern "C" __global__ void
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
+kernel_cuda_adaptive_stopping(WorkTile *tile, int sample, uint total_work_size)
+{
+ int work_index = ccl_global_id(0);
+ bool thread_is_active = work_index < total_work_size;
+ KernelGlobals kg;
+ if(thread_is_active && kernel_data.film.pass_adaptive_aux_buffer) {
+ uint x = tile->x + work_index % tile->w;
+ uint y = tile->y + work_index / tile->w;
+ int index = tile->offset + x + y * tile->stride;
+ ccl_global float *buffer = tile->buffer + index * kernel_data.film.pass_stride;
+ kernel_do_adaptive_stopping(&kg, buffer, sample);
+ }
+}
+
+extern "C" __global__ void
+CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
+kernel_cuda_adaptive_filter_x(WorkTile *tile, int sample, uint)
+{
+ KernelGlobals kg;
+ if(kernel_data.film.pass_adaptive_aux_buffer && sample > kernel_data.integrator.adaptive_min_samples) {
+ if(ccl_global_id(0) < tile->h) {
+ int y = tile->y + ccl_global_id(0);
+ kernel_do_adaptive_filter_x(&kg, y, tile);
+ }
+ }
+}
+
+extern "C" __global__ void
+CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
+kernel_cuda_adaptive_filter_y(WorkTile *tile, int sample, uint)
+{
+ KernelGlobals kg;
+ if(kernel_data.film.pass_adaptive_aux_buffer && sample > kernel_data.integrator.adaptive_min_samples) {
+ if(ccl_global_id(0) < tile->w) {
+ int x = tile->x + ccl_global_id(0);
+ kernel_do_adaptive_filter_y(&kg, x, tile);
+ }
+ }
+}
+
+extern "C" __global__ void
+CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
+kernel_cuda_adaptive_scale_samples(WorkTile *tile, int start_sample, int sample, uint total_work_size)
+{
+ if(kernel_data.film.pass_adaptive_aux_buffer) {
+ int work_index = ccl_global_id(0);
+ bool thread_is_active = work_index < total_work_size;
+ KernelGlobals kg;
+ if(thread_is_active) {
+ uint x = tile->x + work_index % tile->w;
+ uint y = tile->y + work_index / tile->w;
+ int index = tile->offset + x + y * tile->stride;
+ ccl_global float *buffer = tile->buffer + index * kernel_data.film.pass_stride;
+ if(buffer[kernel_data.film.pass_sample_count] < 0.0f) {
+ buffer[kernel_data.film.pass_sample_count] = -buffer[kernel_data.film.pass_sample_count];
+ float sample_multiplier = sample / max((float)start_sample + 1.0f, buffer[kernel_data.film.pass_sample_count]);
+ if(sample_multiplier != 1.0f) {
+ kernel_adaptive_post_adjust(&kg, buffer, sample_multiplier);
+ }
+ }
+ else {
+ kernel_adaptive_post_adjust(&kg, buffer, sample / (sample - 1.0f));
+ }
+ }
+ }
+}
+
+extern "C" __global__ void
+CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
kernel_cuda_convert_to_byte(uchar4 *rgba, float *buffer, float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride)
{
int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
@@ -144,13 +214,16 @@ kernel_cuda_background(uint4 *input,
#ifdef __BAKING__
extern "C" __global__ void
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_bake(uint4 *input, float4 *output, int type, int filter, int sx, int sw, int offset, int sample)
+kernel_cuda_bake(WorkTile *tile, uint total_work_size)
{
- int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
+ int work_index = ccl_global_id(0);
+
+ if(work_index < total_work_size) {
+ uint x, y, sample;
+ get_work_pixel(tile, work_index, &x, &y, &sample);
- if(x < sx + sw) {
KernelGlobals kg;
- kernel_bake_evaluate(&kg, input, output, (ShaderEvalType)type, filter, x, offset, sample);
+ kernel_bake_evaluate(&kg, tile->buffer, sample, x, y, tile->offset, tile->stride);
}
}
#endif
diff --git a/intern/cycles/kernel/kernels/cuda/kernel_config.h b/intern/cycles/kernel/kernels/cuda/kernel_config.h
index d9f349837a8..3ec00762e72 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel_config.h
+++ b/intern/cycles/kernel/kernels/cuda/kernel_config.h
@@ -61,7 +61,8 @@
/* tunable parameters */
# define CUDA_THREADS_BLOCK_WIDTH 16
-/* CUDA 9.0 seems to cause slowdowns on high-end Pascal cards unless we increase the number of registers */
+/* CUDA 9.0 seems to cause slowdowns on high-end Pascal cards unless we increase the number of
+ * registers */
# if __CUDACC_VER_MAJOR__ >= 9 && __CUDA_ARCH__ >= 600
# define CUDA_KERNEL_MAX_REGISTERS 64
# else
diff --git a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
index 7c68f08ea10..1d425d132a1 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
+++ b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
@@ -124,7 +124,7 @@ ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, fl
CUtexObject tex = (CUtexObject)info.data;
/* float4, byte4, ushort4 and half4 */
- const int texture_type = kernel_tex_type(id);
+ const int texture_type = info.data_type;
if (texture_type == IMAGE_DATA_TYPE_FLOAT4 || texture_type == IMAGE_DATA_TYPE_BYTE4 ||
texture_type == IMAGE_DATA_TYPE_HALF4 || texture_type == IMAGE_DATA_TYPE_USHORT4) {
if (info.interpolation == INTERPOLATION_CUBIC) {
@@ -149,14 +149,25 @@ ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, fl
}
}
-ccl_device float4 kernel_tex_image_interp_3d(
- KernelGlobals *kg, int id, float x, float y, float z, InterpolationType interp)
+ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg,
+ int id,
+ float3 P,
+ InterpolationType interp)
{
const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
+
+ if (info.use_transform_3d) {
+ P = transform_point(&info.transform_3d, P);
+ }
+
+ const float x = P.x;
+ const float y = P.y;
+ const float z = P.z;
+
CUtexObject tex = (CUtexObject)info.data;
uint interpolation = (interp == INTERPOLATION_NONE) ? info.interpolation : interp;
- const int texture_type = kernel_tex_type(id);
+ const int texture_type = info.data_type;
if (texture_type == IMAGE_DATA_TYPE_FLOAT4 || texture_type == IMAGE_DATA_TYPE_BYTE4 ||
texture_type == IMAGE_DATA_TYPE_HALF4 || texture_type == IMAGE_DATA_TYPE_USHORT4) {
if (interpolation == INTERPOLATION_CUBIC) {
diff --git a/intern/cycles/kernel/kernels/cuda/kernel_split.cu b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
index 43b3d0aa0e6..95ad7599cf1 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel_split.cu
+++ b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
@@ -43,6 +43,10 @@
#include "kernel/split/kernel_next_iteration_setup.h"
#include "kernel/split/kernel_indirect_subsurface.h"
#include "kernel/split/kernel_buffer_update.h"
+#include "kernel/split/kernel_adaptive_stopping.h"
+#include "kernel/split/kernel_adaptive_filter_x.h"
+#include "kernel/split/kernel_adaptive_filter_y.h"
+#include "kernel/split/kernel_adaptive_adjust_samples.h"
#include "kernel/kernel_film.h"
@@ -121,6 +125,10 @@ DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(enqueue_inactive, uint)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint)
DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint)
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_stopping)
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_filter_x)
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_filter_y)
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_adjust_samples)
extern "C" __global__ void
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_adjust_samples.cl b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_adjust_samples.cl
new file mode 100644
index 00000000000..ebdb99d4730
--- /dev/null
+++ b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_adjust_samples.cl
@@ -0,0 +1,23 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel/kernel_compat_opencl.h"
+#include "kernel/split/kernel_split_common.h"
+#include "kernel/split/kernel_adaptive_adjust_samples.h"
+
+#define KERNEL_NAME adaptive_adjust_samples
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_x.cl b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_x.cl
new file mode 100644
index 00000000000..76d82d4184e
--- /dev/null
+++ b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_x.cl
@@ -0,0 +1,23 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel/kernel_compat_opencl.h"
+#include "kernel/split/kernel_split_common.h"
+#include "kernel/split/kernel_adaptive_filter_x.h"
+
+#define KERNEL_NAME adaptive_filter_x
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_y.cl b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_y.cl
new file mode 100644
index 00000000000..1e6d15ba0f2
--- /dev/null
+++ b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_y.cl
@@ -0,0 +1,23 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel/kernel_compat_opencl.h"
+#include "kernel/split/kernel_split_common.h"
+#include "kernel/split/kernel_adaptive_filter_y.h"
+
+#define KERNEL_NAME adaptive_filter_y
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_stopping.cl b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_stopping.cl
new file mode 100644
index 00000000000..51de0059667
--- /dev/null
+++ b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_stopping.cl
@@ -0,0 +1,23 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel/kernel_compat_opencl.h"
+#include "kernel/split/kernel_split_common.h"
+#include "kernel/split/kernel_adaptive_stopping.h"
+
+#define KERNEL_NAME adaptive_stopping
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h
index b6390679331..9ab374d1fba 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h
+++ b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h
@@ -47,7 +47,7 @@ ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg,
int id,
int offset)
{
- const int texture_type = kernel_tex_type(id);
+ const int texture_type = info->data_type;
/* Float4 */
if (texture_type == IMAGE_DATA_TYPE_FLOAT4) {
@@ -77,7 +77,7 @@ ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg,
return make_float4(f, f, f, 1.0f);
}
/* Byte */
-#ifdef cl_khr_fp16
+#ifdef __KERNEL_CL_KHR_FP16__
/* half and half4 are optional in OpenCL */
else if (texture_type == IMAGE_DATA_TYPE_HALF) {
float f = tex_fetch(half, info, offset);
@@ -202,11 +202,19 @@ ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, fl
}
}
-ccl_device float4
-kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z, int interp)
+ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float3 P, int interp)
{
const ccl_global TextureInfo *info = kernel_tex_info(kg, id);
+ if (info->use_transform_3d) {
+ Transform tfm = info->transform_3d;
+ P = transform_point(&tfm, P);
+ }
+
+ const float x = P.x;
+ const float y = P.y;
+ const float z = P.z;
+
if (info->extension == EXTENSION_CLIP) {
if (x < 0.0f || y < 0.0f || z < 0.0f || x > 1.0f || y > 1.0f || z > 1.0f) {
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl b/intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl
index 6041f13b52b..c3b7b09460a 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl
@@ -28,3 +28,7 @@
#include "kernel/kernels/opencl/kernel_next_iteration_setup.cl"
#include "kernel/kernels/opencl/kernel_indirect_subsurface.cl"
#include "kernel/kernels/opencl/kernel_buffer_update.cl"
+#include "kernel/kernels/opencl/kernel_adaptive_stopping.cl"
+#include "kernel/kernels/opencl/kernel_adaptive_filter_x.cl"
+#include "kernel/kernels/opencl/kernel_adaptive_filter_y.cl"
+#include "kernel/kernels/opencl/kernel_adaptive_adjust_samples.cl"
diff --git a/intern/cycles/kernel/kernels/optix/kernel_optix.cu b/intern/cycles/kernel/kernels/optix/kernel_optix.cu
new file mode 100644
index 00000000000..3b166e59dfd
--- /dev/null
+++ b/intern/cycles/kernel/kernels/optix/kernel_optix.cu
@@ -0,0 +1,329 @@
+/*
+ * Copyright 2019, NVIDIA Corporation.
+ * Copyright 2019, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// clang-format off
+#include "kernel/kernel_compat_optix.h"
+#include "util/util_atomic.h"
+#include "kernel/kernel_types.h"
+#include "kernel/kernel_globals.h"
+#include "../cuda/kernel_cuda_image.h" // Texture lookup uses normal CUDA intrinsics
+
+#include "kernel/kernel_path.h"
+#include "kernel/kernel_bake.h"
+// clang-format on
+
+template<typename T> ccl_device_forceinline T *get_payload_ptr_0()
+{
+ return (T *)(((uint64_t)optixGetPayload_1() << 32) | optixGetPayload_0());
+}
+template<typename T> ccl_device_forceinline T *get_payload_ptr_2()
+{
+ return (T *)(((uint64_t)optixGetPayload_3() << 32) | optixGetPayload_2());
+}
+
+template<bool always = false> ccl_device_forceinline uint get_object_id()
+{
+#ifdef __OBJECT_MOTION__
+ // Always get the the instance ID from the TLAS
+ // There might be a motion transform node between TLAS and BLAS which does not have one
+ uint object = optixGetInstanceIdFromHandle(optixGetTransformListHandle(0));
+#else
+ uint object = optixGetInstanceId();
+#endif
+ // Choose between always returning object ID or only for instances
+ if (always)
+ // Can just remove the high bit since instance always contains object ID
+ return object & 0x7FFFFF;
+ // Set to OBJECT_NONE if this is not an instanced object
+ else if (object & 0x800000)
+ object = OBJECT_NONE;
+ return object;
+}
+
+extern "C" __global__ void __raygen__kernel_optix_path_trace()
+{
+ KernelGlobals kg; // Allocate stack storage for common data
+
+ const uint3 launch_index = optixGetLaunchIndex();
+ // Keep threads for same pixel together to improve occupancy of warps
+ uint pixel_offset = launch_index.x / __params.tile.num_samples;
+ uint sample_offset = launch_index.x % __params.tile.num_samples;
+
+ kernel_path_trace(&kg,
+ __params.tile.buffer,
+ __params.tile.start_sample + sample_offset,
+ __params.tile.x + pixel_offset,
+ __params.tile.y + launch_index.y,
+ __params.tile.offset,
+ __params.tile.stride);
+}
+
+#ifdef __BAKING__
+extern "C" __global__ void __raygen__kernel_optix_bake()
+{
+ KernelGlobals kg;
+ const ShaderParams &p = __params.shader;
+ kernel_bake_evaluate(&kg,
+ p.input,
+ p.output,
+ (ShaderEvalType)p.type,
+ p.filter,
+ p.sx + optixGetLaunchIndex().x,
+ p.offset,
+ p.sample);
+}
+#endif
+
+extern "C" __global__ void __raygen__kernel_optix_displace()
+{
+ KernelGlobals kg;
+ const ShaderParams &p = __params.shader;
+ kernel_displace_evaluate(&kg, p.input, p.output, p.sx + optixGetLaunchIndex().x);
+}
+
+extern "C" __global__ void __raygen__kernel_optix_background()
+{
+ KernelGlobals kg;
+ const ShaderParams &p = __params.shader;
+ kernel_background_evaluate(&kg, p.input, p.output, p.sx + optixGetLaunchIndex().x);
+}
+
+extern "C" __global__ void __miss__kernel_optix_miss()
+{
+ // 'kernel_path_lamp_emission' checks intersection distance, so need to set it even on a miss
+ optixSetPayload_0(__float_as_uint(optixGetRayTmax()));
+ optixSetPayload_5(PRIMITIVE_NONE);
+}
+
+extern "C" __global__ void __anyhit__kernel_optix_local_hit()
+{
+#ifdef __BVH_LOCAL__
+ const uint object = get_object_id<true>();
+ if (object != optixGetPayload_4() /* local_object */) {
+ // Only intersect with matching object
+ return optixIgnoreIntersection();
+ }
+
+ int hit = 0;
+ uint *const lcg_state = get_payload_ptr_0<uint>();
+ LocalIntersection *const local_isect = get_payload_ptr_2<LocalIntersection>();
+
+ if (lcg_state) {
+ const uint max_hits = optixGetPayload_5();
+ for (int i = min(max_hits, local_isect->num_hits) - 1; i >= 0; --i) {
+ if (optixGetRayTmax() == local_isect->hits[i].t) {
+ return optixIgnoreIntersection();
+ }
+ }
+
+ hit = local_isect->num_hits++;
+
+ if (local_isect->num_hits > max_hits) {
+ hit = lcg_step_uint(lcg_state) % local_isect->num_hits;
+ if (hit >= max_hits) {
+ return optixIgnoreIntersection();
+ }
+ }
+ }
+ else {
+ if (local_isect->num_hits && optixGetRayTmax() > local_isect->hits[0].t) {
+ // Record closest intersection only
+ // Do not terminate ray here, since there is no guarantee about distance ordering in any-hit
+ return optixIgnoreIntersection();
+ }
+
+ local_isect->num_hits = 1;
+ }
+
+ Intersection *isect = &local_isect->hits[hit];
+ isect->t = optixGetRayTmax();
+ isect->prim = optixGetPrimitiveIndex();
+ isect->object = get_object_id();
+ isect->type = kernel_tex_fetch(__prim_type, isect->prim);
+
+ const float2 barycentrics = optixGetTriangleBarycentrics();
+ isect->u = 1.0f - barycentrics.y - barycentrics.x;
+ isect->v = barycentrics.x;
+
+ // Record geometric normal
+ const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, isect->prim);
+ const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0));
+ const float3 tri_b = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1));
+ const float3 tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2));
+ local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a));
+
+ // Continue tracing (without this the trace call would return after the first hit)
+ optixIgnoreIntersection();
+#endif
+}
+
+extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
+{
+#ifdef __SHADOW_RECORD_ALL__
+ const uint prim = optixGetPrimitiveIndex();
+# ifdef __VISIBILITY_FLAG__
+ const uint visibility = optixGetPayload_4();
+ if ((kernel_tex_fetch(__prim_visibility, prim) & visibility) == 0) {
+ return optixIgnoreIntersection();
+ }
+# endif
+
+ // Offset into array with num_hits
+ Intersection *const isect = get_payload_ptr_0<Intersection>() + optixGetPayload_2();
+ isect->t = optixGetRayTmax();
+ isect->prim = prim;
+ isect->object = get_object_id();
+ isect->type = kernel_tex_fetch(__prim_type, prim);
+
+ if (optixIsTriangleHit()) {
+ const float2 barycentrics = optixGetTriangleBarycentrics();
+ isect->u = 1.0f - barycentrics.y - barycentrics.x;
+ isect->v = barycentrics.x;
+ }
+# ifdef __HAIR__
+ else {
+ const float u = __uint_as_float(optixGetAttribute_0());
+ isect->u = u;
+ isect->v = __uint_as_float(optixGetAttribute_1());
+
+ // Filter out curve endcaps
+ if (u == 0.0f || u == 1.0f) {
+ return optixIgnoreIntersection();
+ }
+ }
+# endif
+
+# ifdef __TRANSPARENT_SHADOWS__
+ // Detect if this surface has a shader with transparent shadows
+ if (!shader_transparent_shadow(NULL, isect) || optixGetPayload_2() >= optixGetPayload_3()) {
+# endif
+ // This is an opaque hit or the hit limit has been reached, abort traversal
+ optixSetPayload_5(true);
+ return optixTerminateRay();
+# ifdef __TRANSPARENT_SHADOWS__
+ }
+
+ optixSetPayload_2(optixGetPayload_2() + 1); // num_hits++
+
+ // Continue tracing
+ optixIgnoreIntersection();
+# endif
+#endif
+}
+
+extern "C" __global__ void __anyhit__kernel_optix_visibility_test()
+{
+ uint visibility = optixGetPayload_4();
+#ifdef __VISIBILITY_FLAG__
+ const uint prim = optixGetPrimitiveIndex();
+ if ((kernel_tex_fetch(__prim_visibility, prim) & visibility) == 0) {
+ return optixIgnoreIntersection();
+ }
+#endif
+
+#ifdef __HAIR__
+ if (!optixIsTriangleHit()) {
+ // Filter out curve endcaps
+ const float u = __uint_as_float(optixGetAttribute_0());
+ if (u == 0.0f || u == 1.0f) {
+ return optixIgnoreIntersection();
+ }
+ }
+#endif
+
+ // Shadow ray early termination
+ if (visibility & PATH_RAY_SHADOW_OPAQUE) {
+ return optixTerminateRay();
+ }
+}
+
+extern "C" __global__ void __closesthit__kernel_optix_hit()
+{
+ optixSetPayload_0(__float_as_uint(optixGetRayTmax())); // Intersection distance
+ optixSetPayload_3(optixGetPrimitiveIndex());
+ optixSetPayload_4(get_object_id());
+ // Can be PRIMITIVE_TRIANGLE and PRIMITIVE_MOTION_TRIANGLE or curve type and segment index
+ optixSetPayload_5(kernel_tex_fetch(__prim_type, optixGetPrimitiveIndex()));
+
+ if (optixIsTriangleHit()) {
+ const float2 barycentrics = optixGetTriangleBarycentrics();
+ optixSetPayload_1(__float_as_uint(1.0f - barycentrics.y - barycentrics.x));
+ optixSetPayload_2(__float_as_uint(barycentrics.x));
+ }
+ else {
+ optixSetPayload_1(optixGetAttribute_0()); // Same as 'optixGetCurveParameter()'
+ optixSetPayload_2(optixGetAttribute_1());
+ }
+}
+
+#ifdef __HAIR__
+ccl_device_inline void optix_intersection_curve(const uint prim, const uint type)
+{
+ const uint object = get_object_id<true>();
+ const uint visibility = optixGetPayload_4();
+
+ float3 P = optixGetObjectRayOrigin();
+ float3 dir = optixGetObjectRayDirection();
+
+ // The direction is not normalized by default, but the curve intersection routine expects that
+ float len;
+ dir = normalize_len(dir, &len);
+
+# ifdef __OBJECT_MOTION__
+ const float time = optixGetRayTime();
+# else
+ const float time = 0.0f;
+# endif
+
+ Intersection isect;
+ isect.t = optixGetRayTmax();
+ // Transform maximum distance into object space
+ if (isect.t != FLT_MAX)
+ isect.t *= len;
+
+ if (curve_intersect(NULL, &isect, P, dir, visibility, object, prim, time, type)) {
+ optixReportIntersection(isect.t / len,
+ type & PRIMITIVE_ALL,
+ __float_as_int(isect.u), // Attribute_0
+ __float_as_int(isect.v)); // Attribute_1
+ }
+}
+
+extern "C" __global__ void __intersection__curve_ribbon()
+{
+ const uint prim = optixGetPrimitiveIndex();
+ const uint type = kernel_tex_fetch(__prim_type, prim);
+
+ if (type & (PRIMITIVE_CURVE_RIBBON | PRIMITIVE_MOTION_CURVE_RIBBON)) {
+ optix_intersection_curve(prim, type);
+ }
+}
+
+extern "C" __global__ void __intersection__curve_all()
+{
+ const uint prim = optixGetPrimitiveIndex();
+ const uint type = kernel_tex_fetch(__prim_type, prim);
+ optix_intersection_curve(prim, type);
+}
+#endif
+
+#ifdef __KERNEL_DEBUG__
+extern "C" __global__ void __exception__kernel_optix_exception()
+{
+ printf("Unhandled exception occured: code %d!\n", optixGetExceptionCode());
+}
+#endif
diff --git a/intern/cycles/kernel/osl/CMakeLists.txt b/intern/cycles/kernel/osl/CMakeLists.txt
index 28d9ca854db..d7ab778181e 100644
--- a/intern/cycles/kernel/osl/CMakeLists.txt
+++ b/intern/cycles/kernel/osl/CMakeLists.txt
@@ -27,10 +27,24 @@ set(HEADER_SRC
set(LIB
cycles_render
+
+ ${OSL_LIBRARIES}
+ ${OPENIMAGEIO_LIBRARIES}
+ ${LLVM_LIBRARY}
)
+# OSL and LLVM are built without RTTI
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${RTTI_DISABLE_FLAGS}")
+if(APPLE)
+ # Disable allocation warning on macOS prior to 10.14: the OSLRenderServices
+ # contains member which is 64 bytes aligned (cache inside of OIIO's
+ # unordered_map_concurrent). This is not something what the SDK supportsm, but
+ # since we take care of allocations ourselves is is OK to ignore the
+ # diagnostic message.
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -faligned-allocation")
+endif()
+
include_directories(${INC})
include_directories(SYSTEM ${INC_SYS})
diff --git a/intern/cycles/kernel/osl/background.cpp b/intern/cycles/kernel/osl/background.cpp
index b395227845d..3f9de5ab33d 100644
--- a/intern/cycles/kernel/osl/background.cpp
+++ b/intern/cycles/kernel/osl/background.cpp
@@ -36,9 +36,11 @@
#include "kernel/osl/osl_closures.h"
+// clang-format off
#include "kernel/kernel_compat_cpu.h"
#include "kernel/closure/alloc.h"
#include "kernel/closure/emissive.h"
+// clang-format on
CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp b/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp
index c5edc7c9be3..76a2e41abfa 100644
--- a/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp
+++ b/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp
@@ -37,10 +37,12 @@
#include "kernel/kernel_compat_cpu.h"
#include "kernel/osl/osl_closures.h"
+// clang-format off
#include "kernel/kernel_types.h"
#include "kernel/kernel_montecarlo.h"
#include "kernel/closure/alloc.h"
#include "kernel/closure/bsdf_diffuse_ramp.h"
+// clang-format on
CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp b/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp
index 4b7e59ff932..b78dc8a3a67 100644
--- a/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp
+++ b/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp
@@ -37,9 +37,11 @@
#include "kernel/kernel_compat_cpu.h"
#include "kernel/osl/osl_closures.h"
+// clang-format off
#include "kernel/kernel_types.h"
#include "kernel/closure/alloc.h"
#include "kernel/closure/bsdf_phong_ramp.h"
+// clang-format on
CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/kernel/osl/emissive.cpp b/intern/cycles/kernel/osl/emissive.cpp
index c29ddb13e2e..d656723bac2 100644
--- a/intern/cycles/kernel/osl/emissive.cpp
+++ b/intern/cycles/kernel/osl/emissive.cpp
@@ -36,10 +36,12 @@
#include "kernel/osl/osl_closures.h"
+// clang-format off
#include "kernel/kernel_compat_cpu.h"
#include "kernel/kernel_types.h"
#include "kernel/closure/alloc.h"
#include "kernel/closure/emissive.h"
+// clang-format on
CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/kernel/osl/osl_bssrdf.cpp b/intern/cycles/kernel/osl/osl_bssrdf.cpp
index dd52c33071c..c5ca8616fbd 100644
--- a/intern/cycles/kernel/osl/osl_bssrdf.cpp
+++ b/intern/cycles/kernel/osl/osl_bssrdf.cpp
@@ -35,6 +35,7 @@
#include "kernel/kernel_compat_cpu.h"
#include "kernel/osl/osl_closures.h"
+// clang-format off
#include "kernel/kernel_types.h"
#include "kernel/kernel_montecarlo.h"
@@ -43,6 +44,7 @@
#include "kernel/closure/bsdf_diffuse.h"
#include "kernel/closure/bsdf_principled_diffuse.h"
#include "kernel/closure/bssrdf.h"
+// clang-format on
CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/kernel/osl/osl_closures.cpp b/intern/cycles/kernel/osl/osl_closures.cpp
index aa7e2727577..7ee467a46dd 100644
--- a/intern/cycles/kernel/osl/osl_closures.cpp
+++ b/intern/cycles/kernel/osl/osl_closures.cpp
@@ -39,6 +39,7 @@
#include "util/util_math.h"
#include "util/util_param.h"
+// clang-format off
#include "kernel/kernel_types.h"
#include "kernel/kernel_compat_cpu.h"
#include "kernel/split/kernel_split_data_types.h"
@@ -63,6 +64,7 @@
#include "kernel/closure/bsdf_principled_diffuse.h"
#include "kernel/closure/bsdf_principled_sheen.h"
#include "kernel/closure/volume.h"
+// clang-format on
CCL_NAMESPACE_BEGIN
@@ -98,14 +100,14 @@ CLOSURE_FLOAT3_PARAM(DiffuseClosure, params.N),
BSDF_CLOSURE_CLASS_END(AshikhminVelvet, ashikhmin_velvet)
BSDF_CLOSURE_CLASS_BEGIN(AshikhminShirley,
- ashikhmin_shirley_aniso,
+ ashikhmin_shirley,
MicrofacetBsdf,
LABEL_GLOSSY | LABEL_REFLECT)
CLOSURE_FLOAT3_PARAM(AshikhminShirleyClosure, params.N),
CLOSURE_FLOAT3_PARAM(AshikhminShirleyClosure, params.T),
CLOSURE_FLOAT_PARAM(AshikhminShirleyClosure, params.alpha_x),
CLOSURE_FLOAT_PARAM(AshikhminShirleyClosure, params.alpha_y),
- BSDF_CLOSURE_CLASS_END(AshikhminShirley, ashikhmin_shirley_aniso)
+ BSDF_CLOSURE_CLASS_END(AshikhminShirley, ashikhmin_shirley)
BSDF_CLOSURE_CLASS_BEGIN(DiffuseToon, diffuse_toon, ToonBsdf, LABEL_DIFFUSE)
CLOSURE_FLOAT3_PARAM(DiffuseToonClosure, params.N),
@@ -119,42 +121,42 @@ CLOSURE_FLOAT3_PARAM(DiffuseClosure, params.N),
CLOSURE_FLOAT_PARAM(GlossyToonClosure, params.smooth),
BSDF_CLOSURE_CLASS_END(GlossyToon, glossy_toon)
+ BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGXIsotropic,
+ microfacet_ggx_isotropic,
+ MicrofacetBsdf,
+ LABEL_GLOSSY | LABEL_REFLECT)
+ CLOSURE_FLOAT3_PARAM(MicrofacetGGXIsotropicClosure, params.N),
+ CLOSURE_FLOAT_PARAM(MicrofacetGGXIsotropicClosure, params.alpha_x),
+ BSDF_CLOSURE_CLASS_END(MicrofacetGGXIsotropic, microfacet_ggx_isotropic)
+
BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGX,
microfacet_ggx,
MicrofacetBsdf,
LABEL_GLOSSY | LABEL_REFLECT)
CLOSURE_FLOAT3_PARAM(MicrofacetGGXClosure, params.N),
+ CLOSURE_FLOAT3_PARAM(MicrofacetGGXClosure, params.T),
CLOSURE_FLOAT_PARAM(MicrofacetGGXClosure, params.alpha_x),
+ CLOSURE_FLOAT_PARAM(MicrofacetGGXClosure, params.alpha_y),
BSDF_CLOSURE_CLASS_END(MicrofacetGGX, microfacet_ggx)
- BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGXAniso,
- microfacet_ggx_aniso,
+ BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmannIsotropic,
+ microfacet_beckmann_isotropic,
MicrofacetBsdf,
LABEL_GLOSSY | LABEL_REFLECT)
- CLOSURE_FLOAT3_PARAM(MicrofacetGGXAnisoClosure, params.N),
- CLOSURE_FLOAT3_PARAM(MicrofacetGGXAnisoClosure, params.T),
- CLOSURE_FLOAT_PARAM(MicrofacetGGXAnisoClosure, params.alpha_x),
- CLOSURE_FLOAT_PARAM(MicrofacetGGXAnisoClosure, params.alpha_y),
- BSDF_CLOSURE_CLASS_END(MicrofacetGGXAniso, microfacet_ggx_aniso)
+ CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannIsotropicClosure, params.N),
+ CLOSURE_FLOAT_PARAM(MicrofacetBeckmannIsotropicClosure, params.alpha_x),
+ BSDF_CLOSURE_CLASS_END(MicrofacetBeckmannIsotropic, microfacet_beckmann_isotropic)
BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmann,
microfacet_beckmann,
MicrofacetBsdf,
LABEL_GLOSSY | LABEL_REFLECT)
CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannClosure, params.N),
+ CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannClosure, params.T),
CLOSURE_FLOAT_PARAM(MicrofacetBeckmannClosure, params.alpha_x),
+ CLOSURE_FLOAT_PARAM(MicrofacetBeckmannClosure, params.alpha_y),
BSDF_CLOSURE_CLASS_END(MicrofacetBeckmann, microfacet_beckmann)
- BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmannAniso,
- microfacet_beckmann_aniso,
- MicrofacetBsdf,
- LABEL_GLOSSY | LABEL_REFLECT)
- CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannAnisoClosure, params.N),
- CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannAnisoClosure, params.T),
- CLOSURE_FLOAT_PARAM(MicrofacetBeckmannAnisoClosure, params.alpha_x),
- CLOSURE_FLOAT_PARAM(MicrofacetBeckmannAnisoClosure, params.alpha_y),
- BSDF_CLOSURE_CLASS_END(MicrofacetBeckmannAniso, microfacet_beckmann_aniso)
-
BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGXRefraction,
microfacet_ggx_refraction,
MicrofacetBsdf,
@@ -197,15 +199,32 @@ CLOSURE_FLOAT3_PARAM(DiffuseClosure, params.N),
CLOSURE_FLOAT_PARAM(PrincipledDiffuseClosure, params.roughness),
BSDF_CLOSURE_CLASS_END(PrincipledDiffuse, principled_diffuse)
- BSDF_CLOSURE_CLASS_BEGIN(PrincipledSheen,
- principled_sheen,
- PrincipledSheenBsdf,
- LABEL_DIFFUSE)
- CLOSURE_FLOAT3_PARAM(PrincipledSheenClosure, params.N),
- BSDF_CLOSURE_CLASS_END(PrincipledSheen, principled_sheen)
+ class PrincipledSheenClosure : public CBSDFClosure {
+ public:
+ PrincipledSheenBsdf params;
- /* PRINCIPLED HAIR BSDF */
- class PrincipledHairClosure : public CBSDFClosure {
+ void setup(ShaderData *sd, int path_flag, float3 weight)
+ {
+ if (!skip(sd, path_flag, LABEL_DIFFUSE)) {
+ PrincipledSheenBsdf *bsdf = (PrincipledSheenBsdf *)bsdf_alloc_osl(
+ sd, sizeof(PrincipledSheenBsdf), weight, &params);
+ sd->flag |= (bsdf) ? bsdf_principled_sheen_setup(sd, bsdf) : 0;
+ }
+ }
+};
+
+static ClosureParam *bsdf_principled_sheen_params()
+{
+ static ClosureParam params[] = {CLOSURE_FLOAT3_PARAM(PrincipledSheenClosure, params.N),
+ CLOSURE_STRING_KEYPARAM(PrincipledSheenClosure, label, "label"),
+ CLOSURE_FINISH_PARAM(PrincipledSheenClosure)};
+ return params;
+}
+
+CCLOSURE_PREPARE_STATIC(closure_bsdf_principled_sheen_prepare, PrincipledSheenClosure)
+
+/* PRINCIPLED HAIR BSDF */
+class PrincipledHairClosure : public CBSDFClosure {
public:
PrincipledHairBSDF params;
@@ -343,13 +362,16 @@ void OSLShader::register_closures(OSLShadingSystem *ss_)
id++,
closure_bsdf_transparent_params(),
closure_bsdf_transparent_prepare);
+
register_closure(
- ss, "microfacet_ggx", id++, bsdf_microfacet_ggx_params(), bsdf_microfacet_ggx_prepare);
+ ss, "microfacet", id++, closure_bsdf_microfacet_params(), closure_bsdf_microfacet_prepare);
register_closure(ss,
- "microfacet_ggx_aniso",
+ "microfacet_ggx",
id++,
- bsdf_microfacet_ggx_aniso_params(),
- bsdf_microfacet_ggx_aniso_prepare);
+ bsdf_microfacet_ggx_isotropic_params(),
+ bsdf_microfacet_ggx_isotropic_prepare);
+ register_closure(
+ ss, "microfacet_ggx_aniso", id++, bsdf_microfacet_ggx_params(), bsdf_microfacet_ggx_prepare);
register_closure(ss,
"microfacet_ggx_refraction",
id++,
@@ -398,13 +420,13 @@ void OSLShader::register_closures(OSLShadingSystem *ss_)
register_closure(ss,
"microfacet_beckmann",
id++,
- bsdf_microfacet_beckmann_params(),
- bsdf_microfacet_beckmann_prepare);
+ bsdf_microfacet_beckmann_isotropic_params(),
+ bsdf_microfacet_beckmann_isotropic_prepare);
register_closure(ss,
"microfacet_beckmann_aniso",
id++,
- bsdf_microfacet_beckmann_aniso_params(),
- bsdf_microfacet_beckmann_aniso_prepare);
+ bsdf_microfacet_beckmann_params(),
+ bsdf_microfacet_beckmann_prepare);
register_closure(ss,
"microfacet_beckmann_refraction",
id++,
@@ -413,8 +435,8 @@ void OSLShader::register_closures(OSLShadingSystem *ss_)
register_closure(ss,
"ashikhmin_shirley",
id++,
- bsdf_ashikhmin_shirley_aniso_params(),
- bsdf_ashikhmin_shirley_aniso_prepare);
+ bsdf_ashikhmin_shirley_params(),
+ bsdf_ashikhmin_shirley_prepare);
register_closure(
ss, "ashikhmin_velvet", id++, bsdf_ashikhmin_velvet_params(), bsdf_ashikhmin_velvet_prepare);
register_closure(
@@ -425,8 +447,11 @@ void OSLShader::register_closures(OSLShadingSystem *ss_)
id++,
bsdf_principled_diffuse_params(),
bsdf_principled_diffuse_prepare);
- register_closure(
- ss, "principled_sheen", id++, bsdf_principled_sheen_params(), bsdf_principled_sheen_prepare);
+ register_closure(ss,
+ "principled_sheen",
+ id++,
+ bsdf_principled_sheen_params(),
+ closure_bsdf_principled_sheen_prepare);
register_closure(ss,
"principled_clearcoat",
id++,
@@ -486,6 +511,82 @@ bool CBSDFClosure::skip(const ShaderData *sd, int path_flag, int scattering)
return false;
}
+/* Standard Microfacet Closure */
+
+class MicrofacetClosure : public CBSDFClosure {
+ public:
+ MicrofacetBsdf params;
+ ustring distribution;
+ int refract;
+
+ void setup(ShaderData *sd, int path_flag, float3 weight)
+ {
+ static ustring u_ggx("ggx");
+ static ustring u_default("default");
+
+ const int label = (refract) ? LABEL_TRANSMIT : LABEL_REFLECT;
+ if (skip(sd, path_flag, LABEL_GLOSSY | label)) {
+ return;
+ }
+
+ MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc_osl(
+ sd, sizeof(MicrofacetBsdf), weight, &params);
+
+ if (!bsdf) {
+ return;
+ }
+
+ /* GGX */
+ if (distribution == u_ggx || distribution == u_default) {
+ if (!refract) {
+ if (params.alpha_x == params.alpha_y) {
+ /* Isotropic */
+ sd->flag |= bsdf_microfacet_ggx_isotropic_setup(bsdf);
+ }
+ else {
+ /* Anisotropic */
+ sd->flag |= bsdf_microfacet_ggx_setup(bsdf);
+ }
+ }
+ else {
+ sd->flag |= bsdf_microfacet_ggx_refraction_setup(bsdf);
+ }
+ }
+ /* Beckmann */
+ else {
+ if (!refract) {
+ if (params.alpha_x == params.alpha_y) {
+ /* Isotropic */
+ sd->flag |= bsdf_microfacet_beckmann_isotropic_setup(bsdf);
+ }
+ else {
+ /* Anisotropic */
+ sd->flag |= bsdf_microfacet_beckmann_setup(bsdf);
+ }
+ }
+ else {
+ sd->flag |= bsdf_microfacet_beckmann_refraction_setup(bsdf);
+ }
+ }
+ }
+};
+
+ClosureParam *closure_bsdf_microfacet_params()
+{
+ static ClosureParam params[] = {CLOSURE_STRING_PARAM(MicrofacetClosure, distribution),
+ CLOSURE_FLOAT3_PARAM(MicrofacetClosure, params.N),
+ CLOSURE_FLOAT3_PARAM(MicrofacetClosure, params.T),
+ CLOSURE_FLOAT_PARAM(MicrofacetClosure, params.alpha_x),
+ CLOSURE_FLOAT_PARAM(MicrofacetClosure, params.alpha_y),
+ CLOSURE_FLOAT_PARAM(MicrofacetClosure, params.ior),
+ CLOSURE_INT_PARAM(MicrofacetClosure, refract),
+ CLOSURE_STRING_KEYPARAM(MicrofacetClosure, label, "label"),
+ CLOSURE_FINISH_PARAM(MicrofacetClosure)};
+
+ return params;
+}
+CCLOSURE_PREPARE(closure_bsdf_microfacet_prepare, MicrofacetClosure)
+
/* GGX closures with Fresnel */
class MicrofacetFresnelClosure : public CBSDFClosure {
@@ -497,8 +598,8 @@ class MicrofacetFresnelClosure : public CBSDFClosure {
MicrofacetBsdf *alloc(ShaderData *sd, int path_flag, float3 weight)
{
/* Technically, the MultiGGX Glass closure may also transmit. However,
- * since this is set statically and only used for caustic flags, this
- * is probably as good as it gets. */
+ * since this is set statically and only used for caustic flags, this
+ * is probably as good as it gets. */
if (skip(sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) {
return NULL;
}
@@ -560,7 +661,7 @@ class MicrofacetGGXAnisoFresnelClosure : public MicrofacetFresnelClosure {
return;
}
- sd->flag |= bsdf_microfacet_ggx_aniso_fresnel_setup(bsdf, sd);
+ sd->flag |= bsdf_microfacet_ggx_fresnel_setup(bsdf, sd);
}
};
@@ -654,7 +755,7 @@ class MicrofacetMultiGGXAnisoClosure : public MicrofacetMultiClosure {
}
bsdf->ior = 0.0f;
- sd->flag |= bsdf_microfacet_multi_ggx_aniso_setup(bsdf);
+ sd->flag |= bsdf_microfacet_multi_ggx_setup(bsdf);
}
};
@@ -715,8 +816,8 @@ class MicrofacetMultiFresnelClosure : public CBSDFClosure {
MicrofacetBsdf *alloc(ShaderData *sd, int path_flag, float3 weight)
{
/* Technically, the MultiGGX closure may also transmit. However,
- * since this is set statically and only used for caustic flags, this
- * is probably as good as it gets. */
+ * since this is set statically and only used for caustic flags, this
+ * is probably as good as it gets. */
if (skip(sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) {
return NULL;
}
@@ -779,7 +880,7 @@ class MicrofacetMultiGGXAnisoFresnelClosure : public MicrofacetMultiFresnelClosu
return;
}
- sd->flag |= bsdf_microfacet_multi_ggx_aniso_fresnel_setup(bsdf, sd);
+ sd->flag |= bsdf_microfacet_multi_ggx_fresnel_setup(bsdf, sd);
}
};
diff --git a/intern/cycles/kernel/osl/osl_closures.h b/intern/cycles/kernel/osl/osl_closures.h
index d3db6b71f5c..e4058e3a746 100644
--- a/intern/cycles/kernel/osl/osl_closures.h
+++ b/intern/cycles/kernel/osl/osl_closures.h
@@ -33,12 +33,12 @@
#ifndef __OSL_CLOSURES_H__
#define __OSL_CLOSURES_H__
-#include "util/util_types.h"
#include "kernel/kernel_types.h"
+#include "util/util_types.h"
+#include <OSL/genclosure.h>
#include <OSL/oslclosure.h>
#include <OSL/oslexec.h>
-#include <OSL/genclosure.h>
CCL_NAMESPACE_BEGIN
@@ -51,6 +51,7 @@ OSL::ClosureParam *closure_bsdf_transparent_params();
OSL::ClosureParam *closure_bssrdf_params();
OSL::ClosureParam *closure_absorption_params();
OSL::ClosureParam *closure_henyey_greenstein_params();
+OSL::ClosureParam *closure_bsdf_microfacet_params();
OSL::ClosureParam *closure_bsdf_microfacet_multi_ggx_params();
OSL::ClosureParam *closure_bsdf_microfacet_multi_ggx_glass_params();
OSL::ClosureParam *closure_bsdf_microfacet_multi_ggx_aniso_params();
@@ -70,6 +71,7 @@ void closure_bsdf_transparent_prepare(OSL::RendererServices *, int id, void *dat
void closure_bssrdf_prepare(OSL::RendererServices *, int id, void *data);
void closure_absorption_prepare(OSL::RendererServices *, int id, void *data);
void closure_henyey_greenstein_prepare(OSL::RendererServices *, int id, void *data);
+void closure_bsdf_microfacet_prepare(OSL::RendererServices *, int id, void *data);
void closure_bsdf_microfacet_multi_ggx_prepare(OSL::RendererServices *, int id, void *data);
void closure_bsdf_microfacet_multi_ggx_glass_prepare(OSL::RendererServices *, int id, void *data);
void closure_bsdf_microfacet_multi_ggx_aniso_prepare(OSL::RendererServices *, int id, void *data);
diff --git a/intern/cycles/kernel/osl/osl_globals.h b/intern/cycles/kernel/osl/osl_globals.h
index 641c9967586..c06c9abd4c1 100644
--- a/intern/cycles/kernel/osl/osl_globals.h
+++ b/intern/cycles/kernel/osl/osl_globals.h
@@ -21,9 +21,13 @@
# include <OSL/oslexec.h>
+# include <OpenImageIO/refcnt.h>
+# include <OpenImageIO/unordered_map_concurrent.h>
+
# include "util/util_map.h"
# include "util/util_param.h"
# include "util/util_thread.h"
+# include "util/util_unique_ptr.h"
# include "util/util_vector.h"
# ifndef WIN32
@@ -33,6 +37,13 @@ using std::isfinite;
CCL_NAMESPACE_BEGIN
class OSLRenderServices;
+class ColorSpaceProcessor;
+
+/* OSL Globals
+ *
+ * Data needed by OSL render services, that is global to a rendering session.
+ * This includes all OSL shaders, name to attribute mapping and texture handles.
+ * */
struct OSLGlobals {
OSLGlobals()
diff --git a/intern/cycles/kernel/osl/osl_services.cpp b/intern/cycles/kernel/osl/osl_services.cpp
index eb9f672fd8a..5292b5f8055 100644
--- a/intern/cycles/kernel/osl/osl_services.cpp
+++ b/intern/cycles/kernel/osl/osl_services.cpp
@@ -25,6 +25,7 @@
#include <string.h>
+#include "render/colorspace.h"
#include "render/mesh.h"
#include "render/object.h"
#include "render/scene.h"
@@ -38,11 +39,13 @@
#include "util/util_logging.h"
#include "util/util_string.h"
+// clang-format off
#include "kernel/kernel_compat_cpu.h"
#include "kernel/split/kernel_split_data_types.h"
#include "kernel/kernel_globals.h"
#include "kernel/kernel_color.h"
#include "kernel/kernel_random.h"
+#include "kernel/kernel_write_passes.h"
#include "kernel/kernel_projection.h"
#include "kernel/kernel_differential.h"
#include "kernel/kernel_montecarlo.h"
@@ -54,10 +57,7 @@
#include "kernel/kernel_projection.h"
#include "kernel/kernel_accumulate.h"
#include "kernel/kernel_shader.h"
-
-#ifdef WITH_PTEX
-# include <Ptexture.h>
-#endif
+// clang-format on
CCL_NAMESPACE_BEGIN
@@ -84,6 +84,7 @@ ustring OSLRenderServices::u_screen("screen");
ustring OSLRenderServices::u_raster("raster");
ustring OSLRenderServices::u_ndc("NDC");
ustring OSLRenderServices::u_object_location("object:location");
+ustring OSLRenderServices::u_object_color("object:color");
ustring OSLRenderServices::u_object_index("object:index");
ustring OSLRenderServices::u_geom_dupli_generated("geom:dupli_generated");
ustring OSLRenderServices::u_geom_dupli_uv("geom:dupli_uv");
@@ -124,34 +125,17 @@ ustring OSLRenderServices::u_I("I");
ustring OSLRenderServices::u_u("u");
ustring OSLRenderServices::u_v("v");
ustring OSLRenderServices::u_empty;
-ustring OSLRenderServices::u_at_bevel("@bevel");
-ustring OSLRenderServices::u_at_ao("@ao");
-OSLRenderServices::OSLRenderServices()
+OSLRenderServices::OSLRenderServices(OSL::TextureSystem *texture_system)
+ : texture_system(texture_system)
{
- kernel_globals = NULL;
- osl_ts = NULL;
-
-#ifdef WITH_PTEX
- size_t maxmem = 16384 * 1024;
- ptex_cache = PtexCache::create(0, maxmem);
-#endif
}
OSLRenderServices::~OSLRenderServices()
{
- if (osl_ts) {
- VLOG(2) << "OSL texture system stats:\n" << osl_ts->getstats();
+ if (texture_system) {
+ VLOG(2) << "OSL texture system stats:\n" << texture_system->getstats();
}
-#ifdef WITH_PTEX
- ptex_cache->release();
-#endif
-}
-
-void OSLRenderServices::thread_init(KernelGlobals *kernel_globals_, OSL::TextureSystem *osl_ts_)
-{
- kernel_globals = kernel_globals_;
- osl_ts = osl_ts_;
}
bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg,
@@ -233,7 +217,8 @@ bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg,
ustring from,
float time)
{
- KernelGlobals *kg = kernel_globals;
+ ShaderData *sd = (ShaderData *)(sg->renderstate);
+ KernelGlobals *kg = sd->osl_globals;
if (from == u_ndc) {
copy_matrix(result, kernel_data.cam.ndctoworld);
@@ -264,7 +249,8 @@ bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg,
ustring to,
float time)
{
- KernelGlobals *kg = kernel_globals;
+ ShaderData *sd = (ShaderData *)(sg->renderstate);
+ KernelGlobals *kg = sd->osl_globals;
if (to == u_ndc) {
copy_matrix(result, kernel_data.cam.worldtondc);
@@ -354,7 +340,8 @@ bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg,
bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring from)
{
- KernelGlobals *kg = kernel_globals;
+ ShaderData *sd = (ShaderData *)(sg->renderstate);
+ KernelGlobals *kg = sd->osl_globals;
if (from == u_ndc) {
copy_matrix(result, kernel_data.cam.ndctoworld);
@@ -380,7 +367,8 @@ bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg,
OSL::Matrix44 &result,
ustring to)
{
- KernelGlobals *kg = kernel_globals;
+ ShaderData *sd = (ShaderData *)(sg->renderstate);
+ KernelGlobals *kg = sd->osl_globals;
if (to == u_ndc) {
copy_matrix(result, kernel_data.cam.worldtondc);
@@ -498,6 +486,65 @@ static bool set_attribute_float3(float3 f, TypeDesc type, bool derivatives, void
return set_attribute_float3(fv, type, derivatives, val);
}
+/* Attributes with the TypeRGBA type descriptor should be retrieved and stored
+ * in a float array of size 4 (e.g. node_vertex_color.osl), this array have
+ * a type descriptor TypeFloatArray4. If the storage is not a TypeFloatArray4,
+ * we either store the first three components in a vector, store the average of
+ * the components in a float, or fail the retrieval and do nothing. We allow
+ * this for the correct operation of the Attribute node.
+ */
+
+static bool set_attribute_float4(float4 f[3], TypeDesc type, bool derivatives, void *val)
+{
+ float *fval = (float *)val;
+ if (type == TypeFloatArray4) {
+ fval[0] = f[0].x;
+ fval[1] = f[0].y;
+ fval[2] = f[0].z;
+ fval[3] = f[0].w;
+
+ if (derivatives) {
+ fval[4] = f[1].x;
+ fval[5] = f[1].y;
+ fval[6] = f[1].z;
+ fval[7] = f[1].w;
+
+ fval[8] = f[2].x;
+ fval[9] = f[2].y;
+ fval[10] = f[2].z;
+ fval[11] = f[2].w;
+ }
+ return true;
+ }
+ else if (type == TypeDesc::TypePoint || type == TypeDesc::TypeVector ||
+ type == TypeDesc::TypeNormal || type == TypeDesc::TypeColor) {
+ fval[0] = f[0].x;
+ fval[1] = f[0].y;
+ fval[2] = f[0].z;
+
+ if (derivatives) {
+ fval[3] = f[1].x;
+ fval[4] = f[1].y;
+ fval[5] = f[1].z;
+
+ fval[6] = f[2].x;
+ fval[7] = f[2].y;
+ fval[8] = f[2].z;
+ }
+ return true;
+ }
+ else if (type == TypeDesc::TypeFloat) {
+ fval[0] = average(float4_to_float3(f[0]));
+
+ if (derivatives) {
+ fval[1] = average(float4_to_float3(f[1]));
+ fval[2] = average(float4_to_float3(f[2]));
+ }
+ return true;
+ }
+ return false;
+}
+
static bool set_attribute_float(float f[3], TypeDesc type, bool derivatives, void *val)
{
if (type == TypeDesc::TypePoint || type == TypeDesc::TypeVector ||
@@ -633,7 +680,7 @@ static bool get_primitive_attribute(KernelGlobals *kg,
return set_attribute_float3(fval, type, derivatives, val);
}
else if (attr.type == TypeFloat2) {
- float2 fval[2];
+ float2 fval[3];
fval[0] = primitive_attribute_float2(
kg, sd, attr.desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL);
return set_attribute_float2(fval, type, derivatives, val);
@@ -644,6 +691,12 @@ static bool get_primitive_attribute(KernelGlobals *kg,
kg, sd, attr.desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL);
return set_attribute_float(fval, type, derivatives, val);
}
+ else if (attr.type == TypeRGBA) {
+ float4 fval[3];
+ fval[0] = primitive_attribute_float4(
+ kg, sd, attr.desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL);
+ return set_attribute_float4(fval, type, derivatives, val);
+ }
else {
return false;
}
@@ -684,6 +737,10 @@ bool OSLRenderServices::get_object_standard_attribute(
float3 f = object_location(kg, sd);
return set_attribute_float3(f, type, derivatives, val);
}
+ else if (name == u_object_color) {
+ float3 f = object_color(kg, sd->object);
+ return set_attribute_float3(f, type, derivatives, val);
+ }
else if (name == u_object_index) {
float f = object_pass_id(kg, sd->object);
return set_attribute_float(f, type, derivatives, val);
@@ -713,7 +770,7 @@ bool OSLRenderServices::get_object_standard_attribute(
}
else if (name == u_particle_random) {
int particle_id = object_particle_id(kg, sd->object);
- float f = hash_int_01(particle_index(kg, particle_id));
+ float f = hash_uint2_to_float(particle_index(kg, particle_id), 0);
return set_attribute_float(f, type, derivatives, val);
}
@@ -733,7 +790,7 @@ bool OSLRenderServices::get_object_standard_attribute(
return set_attribute_float3(f, type, derivatives, val);
}
#if 0 /* unsupported */
- else if(name == u_particle_rotation) {
+ else if (name == u_particle_rotation) {
int particle_id = object_particle_id(kg, sd->object);
float4 f = particle_rotation(kg, particle_id);
return set_attribute_float4(f, type, derivatives, val);
@@ -954,21 +1011,52 @@ bool OSLRenderServices::get_userdata(
return false; /* disabled by lockgeom */
}
+#if OSL_LIBRARY_VERSION_CODE >= 11100
+TextureSystem::TextureHandle *OSLRenderServices::get_texture_handle(ustring filename,
+ OSL::ShadingContext *)
+#else
+
TextureSystem::TextureHandle *OSLRenderServices::get_texture_handle(ustring filename)
+#endif
{
- if (filename.length() && filename[0] == '@') {
- /* Dummy, we don't use texture handles for builtin textures but need
- * to tell the OSL runtime optimizer that this is a valid texture. */
+ OSLTextureHandleMap::iterator it = textures.find(filename);
+
+ /* For non-OIIO textures, just return a pointer to our own OSLTextureHandle. */
+ if (it != textures.end()) {
+ if (it->second->type != OSLTextureHandle::OIIO) {
+ return (TextureSystem::TextureHandle *)it->second.get();
+ }
+ }
+
+ /* Get handle from OpenImageIO. */
+ OSL::TextureSystem *ts = texture_system;
+ TextureSystem::TextureHandle *handle = ts->get_texture_handle(filename);
+ if (handle == NULL) {
return NULL;
}
- else {
- return texturesys()->get_texture_handle(filename);
+
+ /* Insert new OSLTextureHandle if needed. */
+ if (it == textures.end()) {
+ textures.insert(filename, new OSLTextureHandle(OSLTextureHandle::OIIO));
+ it = textures.find(filename);
}
+
+ /* Assign OIIO texture handle and return. */
+ it->second->oiio_handle = handle;
+ return (TextureSystem::TextureHandle *)it->second.get();
}
bool OSLRenderServices::good(TextureSystem::TextureHandle *texture_handle)
{
- return texturesys()->good(texture_handle);
+ OSLTextureHandle *handle = (OSLTextureHandle *)texture_handle;
+
+ if (handle->oiio_handle) {
+ OSL::TextureSystem *ts = texture_system;
+ return ts->good(handle->oiio_handle);
+ }
+ else {
+ return true;
+ }
}
bool OSLRenderServices::texture(ustring filename,
@@ -988,69 +1076,28 @@ bool OSLRenderServices::texture(ustring filename,
float *dresultdt,
ustring *errormessage)
{
- OSL::TextureSystem *ts = osl_ts;
+ OSLTextureHandle *handle = (OSLTextureHandle *)texture_handle;
+ OSLTextureHandle::Type texture_type = (handle) ? handle->type : OSLTextureHandle::OIIO;
ShaderData *sd = (ShaderData *)(sg->renderstate);
- KernelGlobals *kg = sd->osl_globals;
-
- if (texture_thread_info == NULL) {
- OSLThreadData *tdata = kg->osl_tdata;
- texture_thread_info = tdata->oiio_thread_info;
- }
-
-#ifdef WITH_PTEX
- /* todo: this is just a quick hack, only works with particular files and options */
- if (string_endswith(filename.string(), ".ptx")) {
- float2 uv;
- int faceid;
-
- if (!primitive_ptex(kg, sd, &uv, &faceid))
- return false;
-
- float u = uv.x;
- float v = uv.y;
- float dudx = 0.0f;
- float dvdx = 0.0f;
- float dudy = 0.0f;
- float dvdy = 0.0f;
-
- Ptex::String error;
- PtexPtr<PtexTexture> r(ptex_cache->get(filename.c_str(), error));
-
- if (!r) {
- //std::cerr << error.c_str() << std::endl;
- return false;
- }
-
- bool mipmaplerp = false;
- float sharpness = 1.0f;
- PtexFilter::Options opts(PtexFilter::f_bicubic, mipmaplerp, sharpness);
- PtexPtr<PtexFilter> f(PtexFilter::getFilter(r, opts));
-
- f->eval(result, options.firstchannel, nchannels, faceid, u, v, dudx, dvdx, dudy, dvdy);
-
- for (int c = r->numChannels(); c < nchannels; c++)
- result[c] = result[0];
-
- return true;
- }
-#endif
+ KernelGlobals *kernel_globals = sd->osl_globals;
bool status = false;
- if (filename.length() && filename[0] == '@') {
- if (filename == u_at_bevel) {
+ switch (texture_type) {
+ case OSLTextureHandle::BEVEL: {
/* Bevel shader hack. */
if (nchannels >= 3) {
PathState *state = sd->osl_path_state;
int num_samples = (int)s;
float radius = t;
- float3 N = svm_bevel(kg, sd, state, radius, num_samples);
+ float3 N = svm_bevel(kernel_globals, sd, state, radius, num_samples);
result[0] = N.x;
result[1] = N.y;
result[2] = N.z;
status = true;
}
+ break;
}
- else if (filename == u_at_ao) {
+ case OSLTextureHandle::AO: {
/* AO shader hack. */
PathState *state = sd->osl_path_state;
int num_samples = (int)s;
@@ -1066,19 +1113,13 @@ bool OSLRenderServices::texture(ustring filename,
if ((int)options.tblur) {
flags |= NODE_AO_GLOBAL_RADIUS;
}
- result[0] = svm_ao(kg, sd, N, state, radius, num_samples, flags);
- status = true;
- }
- else if (filename[1] == 'l') {
- /* IES light. */
- int slot = atoi(filename.c_str() + 2);
- result[0] = kernel_ies_interp(kg, slot, s, t);
+ result[0] = svm_ao(kernel_globals, sd, N, state, radius, num_samples, flags);
status = true;
+ break;
}
- else {
+ case OSLTextureHandle::SVM: {
/* Packed texture. */
- int slot = atoi(filename.c_str() + 2);
- float4 rgba = kernel_tex_image_interp(kg, slot, s, 1.0f - t);
+ float4 rgba = kernel_tex_image_interp(kernel_globals, handle->svm_slot, s, 1.0f - t);
result[0] = rgba[0];
if (nchannels > 1)
@@ -1088,37 +1129,62 @@ bool OSLRenderServices::texture(ustring filename,
if (nchannels > 3)
result[3] = rgba[3];
status = true;
+ break;
}
- }
- else {
- if (texture_handle != NULL) {
- status = ts->texture(texture_handle,
- texture_thread_info,
- options,
- s,
- t,
- dsdx,
- dtdx,
- dsdy,
- dtdy,
- nchannels,
- result,
- dresultds,
- dresultdt);
+ case OSLTextureHandle::IES: {
+ /* IES light. */
+ result[0] = kernel_ies_interp(kernel_globals, handle->svm_slot, s, t);
+ status = true;
+ break;
}
- else {
- status = ts->texture(filename,
- options,
- s,
- t,
- dsdx,
- dtdx,
- dsdy,
- dtdy,
- nchannels,
- result,
- dresultds,
- dresultdt);
+ case OSLTextureHandle::OIIO: {
+ /* OpenImageIO texture cache. */
+ OSL::TextureSystem *ts = texture_system;
+
+ if (handle && handle->oiio_handle) {
+ if (texture_thread_info == NULL) {
+ OSLThreadData *tdata = kernel_globals->osl_tdata;
+ texture_thread_info = tdata->oiio_thread_info;
+ }
+
+ status = ts->texture(handle->oiio_handle,
+ texture_thread_info,
+ options,
+ s,
+ t,
+ dsdx,
+ dtdx,
+ dsdy,
+ dtdy,
+ nchannels,
+ result,
+ dresultds,
+ dresultdt);
+ }
+ else {
+ status = ts->texture(filename,
+ options,
+ s,
+ t,
+ dsdx,
+ dtdx,
+ dsdy,
+ dtdy,
+ nchannels,
+ result,
+ dresultds,
+ dresultdt);
+ }
+
+ if (!status) {
+ /* This might be slow, but prevents error messages leak and
+ * other nasty stuff happening. */
+ ts->geterror();
+ }
+ else if (handle && handle->processor) {
+ ColorSpaceManager::to_scene_linear(handle->processor, result, nchannels);
+ }
+ break;
}
}
@@ -1131,11 +1197,6 @@ bool OSLRenderServices::texture(ustring filename,
if (nchannels == 4)
result[3] = 1.0f;
}
- /* This might be slow, but prevents error messages leak and
- * other nasty stuff happening.
- */
- string err = ts->geterror();
- (void)err;
}
return status;
@@ -1157,56 +1218,83 @@ bool OSLRenderServices::texture3d(ustring filename,
float *dresultdr,
ustring *errormessage)
{
- OSL::TextureSystem *ts = osl_ts;
- ShaderData *sd = (ShaderData *)(sg->renderstate);
- KernelGlobals *kg = sd->osl_globals;
+ OSLTextureHandle *handle = (OSLTextureHandle *)texture_handle;
+ OSLTextureHandle::Type texture_type = (handle) ? handle->type : OSLTextureHandle::OIIO;
+ bool status = false;
- if (texture_thread_info == NULL) {
- OSLThreadData *tdata = kg->osl_tdata;
- texture_thread_info = tdata->oiio_thread_info;
- }
+ switch (texture_type) {
+ case OSLTextureHandle::SVM: {
+ /* Packed texture. */
+ ShaderData *sd = (ShaderData *)(sg->renderstate);
+ KernelGlobals *kernel_globals = sd->osl_globals;
+ int slot = handle->svm_slot;
+ float3 P_float3 = make_float3(P.x, P.y, P.z);
+ float4 rgba = kernel_tex_image_interp_3d(kernel_globals, slot, P_float3, INTERPOLATION_NONE);
- bool status;
- if (filename.length() && filename[0] == '@') {
- int slot = atoi(filename.c_str() + 1);
- float4 rgba = kernel_tex_image_interp_3d(kg, slot, P.x, P.y, P.z, INTERPOLATION_NONE);
+ result[0] = rgba[0];
+ if (nchannels > 1)
+ result[1] = rgba[1];
+ if (nchannels > 2)
+ result[2] = rgba[2];
+ if (nchannels > 3)
+ result[3] = rgba[3];
+ status = true;
+ break;
+ }
+ case OSLTextureHandle::OIIO: {
+ /* OpenImageIO texture cache. */
+ OSL::TextureSystem *ts = texture_system;
+
+ if (handle && handle->oiio_handle) {
+ if (texture_thread_info == NULL) {
+ ShaderData *sd = (ShaderData *)(sg->renderstate);
+ KernelGlobals *kernel_globals = sd->osl_globals;
+ OSLThreadData *tdata = kernel_globals->osl_tdata;
+ texture_thread_info = tdata->oiio_thread_info;
+ }
- result[0] = rgba[0];
- if (nchannels > 1)
- result[1] = rgba[1];
- if (nchannels > 2)
- result[2] = rgba[2];
- if (nchannels > 3)
- result[3] = rgba[3];
- status = true;
- }
- else {
- if (texture_handle != NULL) {
- status = ts->texture3d(texture_handle,
- texture_thread_info,
- options,
- P,
- dPdx,
- dPdy,
- dPdz,
- nchannels,
- result,
- dresultds,
- dresultdt,
- dresultdr);
+ status = ts->texture3d(handle->oiio_handle,
+ texture_thread_info,
+ options,
+ P,
+ dPdx,
+ dPdy,
+ dPdz,
+ nchannels,
+ result,
+ dresultds,
+ dresultdt,
+ dresultdr);
+ }
+ else {
+ status = ts->texture3d(filename,
+ options,
+ P,
+ dPdx,
+ dPdy,
+ dPdz,
+ nchannels,
+ result,
+ dresultds,
+ dresultdt,
+ dresultdr);
+ }
+
+ if (!status) {
+ /* This might be slow, but prevents error messages leak and
+ * other nasty stuff happening. */
+ ts->geterror();
+ }
+ else if (handle && handle->processor) {
+ ColorSpaceManager::to_scene_linear(handle->processor, result, nchannels);
+ }
+ break;
}
- else {
- status = ts->texture3d(filename,
- options,
- P,
- dPdx,
- dPdy,
- dPdz,
- nchannels,
- result,
- dresultds,
- dresultdt,
- dresultdr);
+ case OSLTextureHandle::IES:
+ case OSLTextureHandle::AO:
+ case OSLTextureHandle::BEVEL: {
+ status = false;
+ break;
}
}
@@ -1219,18 +1307,13 @@ bool OSLRenderServices::texture3d(ustring filename,
if (nchannels == 4)
result[3] = 1.0f;
}
- /* This might be slow, but prevents error messages leak and
- * other nasty stuff happening.
- */
- string err = ts->geterror();
- (void)err;
}
return status;
}
bool OSLRenderServices::environment(ustring filename,
- TextureHandle *th,
+ TextureHandle *texture_handle,
TexturePerthread *thread_info,
TextureOpt &options,
OSL::ShaderGlobals *sg,
@@ -1243,21 +1326,33 @@ bool OSLRenderServices::environment(ustring filename,
float *dresultdt,
ustring *errormessage)
{
- OSL::TextureSystem *ts = osl_ts;
+ OSLTextureHandle *handle = (OSLTextureHandle *)texture_handle;
+ OSL::TextureSystem *ts = texture_system;
+ bool status = false;
- if (thread_info == NULL) {
- ShaderData *sd = (ShaderData *)(sg->renderstate);
- KernelGlobals *kg = sd->osl_globals;
- OSLThreadData *tdata = kg->osl_tdata;
- thread_info = tdata->oiio_thread_info;
- }
+ if (handle && handle->oiio_handle) {
+ if (thread_info == NULL) {
+ ShaderData *sd = (ShaderData *)(sg->renderstate);
+ KernelGlobals *kernel_globals = sd->osl_globals;
+ OSLThreadData *tdata = kernel_globals->osl_tdata;
+ thread_info = tdata->oiio_thread_info;
+ }
- if (th == NULL) {
- th = ts->get_texture_handle(filename, thread_info);
+ status = ts->environment(handle->oiio_handle,
+ thread_info,
+ options,
+ R,
+ dRdx,
+ dRdy,
+ nchannels,
+ result,
+ dresultds,
+ dresultdt);
+ }
+ else {
+ status = ts->environment(
+ filename, options, R, dRdx, dRdy, nchannels, result, dresultds, dresultdt);
}
-
- bool status = ts->environment(
- th, thread_info, options, R, dRdx, dRdy, nchannels, result, dresultds, dresultdt);
if (!status) {
if (nchannels == 3 || nchannels == 4) {
@@ -1269,26 +1364,43 @@ bool OSLRenderServices::environment(ustring filename,
result[3] = 1.0f;
}
}
+ else if (handle && handle->processor) {
+ ColorSpaceManager::to_scene_linear(handle->processor, result, nchannels);
+ }
return status;
}
+#if OSL_LIBRARY_VERSION_CODE >= 11100
+bool OSLRenderServices::get_texture_info(ustring filename,
+ TextureHandle *texture_handle,
+ TexturePerthread *,
+ OSL::ShadingContext *,
+ int subimage,
+ ustring dataname,
+ TypeDesc datatype,
+ void *data,
+ ustring *)
+#else
bool OSLRenderServices::get_texture_info(OSL::ShaderGlobals *sg,
ustring filename,
- TextureHandle *th,
+ TextureHandle *texture_handle,
int subimage,
ustring dataname,
TypeDesc datatype,
void *data)
+#endif
{
- OSL::TextureSystem *ts = osl_ts;
- if (filename.length() && filename[0] == '@') {
- /* Special builtin textures. */
+ OSLTextureHandle *handle = (OSLTextureHandle *)texture_handle;
+
+ /* No texture info for other texture types. */
+ if (handle && handle->type != OSLTextureHandle::OIIO) {
return false;
}
- else {
- return ts->get_texture_info(filename, subimage, dataname, datatype, data);
- }
+
+ /* Get texture info from OpenImageIO. */
+ OSL::TextureSystem *ts = texture_system;
+ return ts->get_texture_info(filename, subimage, dataname, datatype, data);
}
int OSLRenderServices::pointcloud_search(OSL::ShaderGlobals *sg,
@@ -1371,9 +1483,16 @@ bool OSLRenderServices::trace(TraceOpt &options,
tracedata->init = true;
tracedata->sd.osl_globals = sd->osl_globals;
+ KernelGlobals *kg = sd->osl_globals;
+
+ /* Can't raytrace from shaders like displacement, before BVH exists. */
+ if (kernel_data.bvh.bvh_layout == BVH_LAYOUT_NONE) {
+ return false;
+ }
+
/* Raytrace, leaving out shadow opaque to avoid early exit. */
uint visibility = PATH_RAY_ALL_VISIBILITY - PATH_RAY_SHADOW_OPAQUE;
- return scene_intersect(sd->osl_globals, ray, visibility, &tracedata->isect, NULL, 0.0f, 0.0f);
+ return scene_intersect(kg, &ray, visibility, &tracedata->isect);
}
bool OSLRenderServices::getmessage(OSL::ShaderGlobals *sg,
diff --git a/intern/cycles/kernel/osl/osl_services.h b/intern/cycles/kernel/osl/osl_services.h
index 2fad5833fc9..894d6e471ba 100644
--- a/intern/cycles/kernel/osl/osl_services.h
+++ b/intern/cycles/kernel/osl/osl_services.h
@@ -25,8 +25,8 @@
* attributes.
*/
-#include <OSL/oslexec.h>
#include <OSL/oslclosure.h>
+#include <OSL/oslexec.h>
#ifdef WITH_PTEX
class PtexCache;
@@ -40,13 +40,46 @@ class Shader;
struct ShaderData;
struct float3;
struct KernelGlobals;
+
+/* OSL Texture Handle
+ *
+ * OSL texture lookups are string based. If those strings are known at compile
+ * time, the OSL compiler can cache a texture handle to use instead of a string.
+ *
+ * By default it uses TextureSystem::TextureHandle. But since we want to support
+ * different kinds of textures and color space conversions, this is our own handle
+ * with additional data.
+ *
+ * These are stored in a concurrent hash map, because OSL can compile multiple
+ * shaders in parallel. */
+
+struct OSLTextureHandle : public OIIO::RefCnt {
+ enum Type { OIIO, SVM, IES, BEVEL, AO };
+
+ OSLTextureHandle(Type type = OIIO, int svm_slot = -1)
+ : type(type), svm_slot(svm_slot), oiio_handle(NULL), processor(NULL)
+ {
+ }
+
+ Type type;
+ int svm_slot;
+ OSL::TextureSystem::TextureHandle *oiio_handle;
+ ColorSpaceProcessor *processor;
+};
+
+typedef OIIO::intrusive_ptr<OSLTextureHandle> OSLTextureHandleRef;
+typedef OIIO::unordered_map_concurrent<ustring, OSLTextureHandleRef, ustringHash>
+ OSLTextureHandleMap;
+
+/* OSL Render Services
+ *
+ * Interface for OSL to access attributes, textures and other scene data. */
+
class OSLRenderServices : public OSL::RendererServices {
public:
- OSLRenderServices();
+ OSLRenderServices(OSL::TextureSystem *texture_system);
~OSLRenderServices();
- void thread_init(KernelGlobals *kernel_globals, OSL::TextureSystem *ts);
-
bool get_matrix(OSL::ShaderGlobals *sg,
OSL::Matrix44 &result,
OSL::TransformationPtr xform,
@@ -140,7 +173,12 @@ class OSLRenderServices : public OSL::RendererServices {
void *val,
bool derivatives) override;
+#if OSL_LIBRARY_VERSION_CODE >= 11100
+ TextureSystem::TextureHandle *get_texture_handle(ustring filename,
+ OSL::ShadingContext *context) override;
+#else
TextureSystem::TextureHandle *get_texture_handle(ustring filename) override;
+#endif
bool good(TextureSystem::TextureHandle *texture_handle) override;
@@ -191,6 +229,17 @@ class OSLRenderServices : public OSL::RendererServices {
float *dresultdt,
ustring *errormessage) override;
+#if OSL_LIBRARY_VERSION_CODE >= 11100
+ bool get_texture_info(ustring filename,
+ TextureHandle *texture_handle,
+ TexturePerthread *texture_thread_info,
+ OSL::ShadingContext *shading_context,
+ int subimage,
+ ustring dataname,
+ TypeDesc datatype,
+ void *data,
+ ustring *errormessage) override;
+#else
bool get_texture_info(OSL::ShaderGlobals *sg,
ustring filename,
TextureHandle *texture_handle,
@@ -198,6 +247,7 @@ class OSLRenderServices : public OSL::RendererServices {
ustring dataname,
TypeDesc datatype,
void *data) override;
+#endif
static bool get_background_attribute(
KernelGlobals *kg, ShaderData *sd, ustring name, TypeDesc type, bool derivatives, void *val);
@@ -212,6 +262,7 @@ class OSLRenderServices : public OSL::RendererServices {
static ustring u_raster;
static ustring u_ndc;
static ustring u_object_location;
+ static ustring u_object_color;
static ustring u_object_index;
static ustring u_geom_dupli_generated;
static ustring u_geom_dupli_uv;
@@ -255,12 +306,12 @@ class OSLRenderServices : public OSL::RendererServices {
static ustring u_at_bevel;
static ustring u_at_ao;
- private:
- KernelGlobals *kernel_globals;
- OSL::TextureSystem *osl_ts;
-#ifdef WITH_PTEX
- PtexCache *ptex_cache;
-#endif
+ /* Texture system and texture handle map are part of the services instead of
+ * globals to be shared between different render sessions. This saves memory,
+ * and is required because texture handles are cached as part of the shared
+ * shading system. */
+ OSL::TextureSystem *texture_system;
+ OSLTextureHandleMap textures;
};
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/osl/osl_shader.cpp b/intern/cycles/kernel/osl/osl_shader.cpp
index 3d9c579c9ff..2318813949e 100644
--- a/intern/cycles/kernel/osl/osl_shader.cpp
+++ b/intern/cycles/kernel/osl/osl_shader.cpp
@@ -16,6 +16,7 @@
#include <OSL/oslexec.h>
+// clang-format off
#include "kernel/kernel_compat_cpu.h"
#include "kernel/kernel_montecarlo.h"
#include "kernel/kernel_types.h"
@@ -28,6 +29,7 @@
#include "kernel/osl/osl_globals.h"
#include "kernel/osl/osl_services.h"
#include "kernel/osl/osl_shader.h"
+// clang-format on
#include "util/util_foreach.h"
@@ -49,7 +51,6 @@ void OSLShader::thread_init(KernelGlobals *kg,
/* per thread kernel data init*/
kg->osl = osl_globals;
- kg->osl->services->thread_init(kernel_globals, osl_globals->ts);
OSL::ShadingSystem *ss = kg->osl->ss;
OSLThreadData *tdata = new OSLThreadData();
@@ -383,10 +384,6 @@ int OSLShader::find_attribute(KernelGlobals *kg,
{
/* for OSL, a hash map is used to lookup the attribute by name. */
int object = sd->object * ATTR_PRIM_TYPES;
-#ifdef __HAIR__
- if (sd->type & PRIMITIVE_ALL_CURVE)
- object += ATTR_PRIM_CURVE;
-#endif
OSLGlobals::AttributeMap &attr_map = kg->osl->attribute_map[object];
ustring stdname(std::string("geom:") +
diff --git a/intern/cycles/kernel/shaders/CMakeLists.txt b/intern/cycles/kernel/shaders/CMakeLists.txt
index b42b9b2fe64..9dcedc9ba19 100644
--- a/intern/cycles/kernel/shaders/CMakeLists.txt
+++ b/intern/cycles/kernel/shaders/CMakeLists.txt
@@ -13,6 +13,7 @@ set(SRC_OSL
node_bump.osl
node_camera.osl
node_checker_texture.osl
+ node_clamp.osl
node_combine_rgb.osl
node_combine_hsv.osl
node_combine_xyz.osl
@@ -46,6 +47,7 @@ set(SRC_OSL
node_light_falloff.osl
node_light_path.osl
node_magic_texture.osl
+ node_map_range.osl
node_mapping.osl
node_math.osl
node_mix.osl
@@ -76,13 +78,16 @@ set(SRC_OSL
node_value.osl
node_vector_curves.osl
node_vector_math.osl
+ node_vector_rotate.osl
node_vector_transform.osl
node_velvet_bsdf.osl
+ node_vertex_color.osl
node_voronoi_texture.osl
node_voxel_texture.osl
node_wavelength.osl
node_blackbody.osl
node_wave_texture.osl
+ node_white_noise_texture.osl
node_wireframe.osl
node_hair_bsdf.osl
node_principled_hair_bsdf.osl
@@ -91,13 +96,19 @@ set(SRC_OSL
node_rgb_to_bw.osl
)
+# The headers that OSL ships differs per release so we can not
+# hardcode this.
+file(GLOB SRC_OSL_HEADER_DIST ${OSL_SHADER_DIR}/*.h)
+
set(SRC_OSL_HEADERS
node_color.h
node_fresnel.h
+ node_hash.h
+ node_math.h
+ node_noise.h
node_ramp_util.h
- node_texture.h
- stdosl.h
- oslutil.h
+ stdcycles.h
+ ${SRC_OSL_HEADER_DIST}
)
set(SRC_OSO
@@ -112,7 +123,7 @@ foreach(_file ${SRC_OSL})
string(REPLACE ${CMAKE_SOURCE_DIR} ${CMAKE_BINARY_DIR} _OSO_FILE ${_OSO_FILE})
add_custom_command(
OUTPUT ${_OSO_FILE}
- COMMAND ${OSL_COMPILER} -q -O2 -I"${CMAKE_CURRENT_SOURCE_DIR}" -o ${_OSO_FILE} ${_OSL_FILE}
+ COMMAND ${OSL_COMPILER} -q -O2 -I"${CMAKE_CURRENT_SOURCE_DIR}" -I"${OSL_SHADER_DIR}" -o ${_OSO_FILE} ${_OSL_FILE}
DEPENDS ${_OSL_FILE} ${SRC_OSL_HEADERS} ${OSL_COMPILER})
list(APPEND SRC_OSO
${_OSO_FILE}
diff --git a/intern/cycles/kernel/shaders/node_absorption_volume.osl b/intern/cycles/kernel/shaders/node_absorption_volume.osl
index e99bd254666..37ccc4c969f 100644
--- a/intern/cycles/kernel/shaders/node_absorption_volume.osl
+++ b/intern/cycles/kernel/shaders/node_absorption_volume.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_absorption_volume(color Color = color(0.8, 0.8, 0.8),
float Density = 1.0,
diff --git a/intern/cycles/kernel/shaders/node_add_closure.osl b/intern/cycles/kernel/shaders/node_add_closure.osl
index 077e2735e61..27ecc9ef0c2 100644
--- a/intern/cycles/kernel/shaders/node_add_closure.osl
+++ b/intern/cycles/kernel/shaders/node_add_closure.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_add_closure(closure color Closure1 = 0,
closure color Closure2 = 0,
diff --git a/intern/cycles/kernel/shaders/node_ambient_occlusion.osl b/intern/cycles/kernel/shaders/node_ambient_occlusion.osl
index 7bf28719e78..22d245d0698 100644
--- a/intern/cycles/kernel/shaders/node_ambient_occlusion.osl
+++ b/intern/cycles/kernel/shaders/node_ambient_occlusion.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_ambient_occlusion(color ColorIn = color(1.0, 1.0, 1.0),
int samples = 16,
diff --git a/intern/cycles/kernel/shaders/node_anisotropic_bsdf.osl b/intern/cycles/kernel/shaders/node_anisotropic_bsdf.osl
index 165c09eb8e0..739cd375ab2 100644
--- a/intern/cycles/kernel/shaders/node_anisotropic_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_anisotropic_bsdf.osl
@@ -13,8 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_anisotropic_bsdf(color Color = 0.0,
string distribution = "GGX",
diff --git a/intern/cycles/kernel/shaders/node_attribute.osl b/intern/cycles/kernel/shaders/node_attribute.osl
index 336543cc130..abec8ebfbf0 100644
--- a/intern/cycles/kernel/shaders/node_attribute.osl
+++ b/intern/cycles/kernel/shaders/node_attribute.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_attribute(string bump_offset = "center",
string name = "",
diff --git a/intern/cycles/kernel/shaders/node_background.osl b/intern/cycles/kernel/shaders/node_background.osl
index 6ded0d2c65c..3f45db751b3 100644
--- a/intern/cycles/kernel/shaders/node_background.osl
+++ b/intern/cycles/kernel/shaders/node_background.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_background(color Color = 0.8,
float Strength = 1.0,
diff --git a/intern/cycles/kernel/shaders/node_bevel.osl b/intern/cycles/kernel/shaders/node_bevel.osl
index 189c20c52e7..e87ddab716d 100644
--- a/intern/cycles/kernel/shaders/node_bevel.osl
+++ b/intern/cycles/kernel/shaders/node_bevel.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_bevel(int samples = 4,
float Radius = 0.05,
diff --git a/intern/cycles/kernel/shaders/node_blackbody.osl b/intern/cycles/kernel/shaders/node_blackbody.osl
index 8a24bf1e28b..741efae755d 100644
--- a/intern/cycles/kernel/shaders/node_blackbody.osl
+++ b/intern/cycles/kernel/shaders/node_blackbody.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_blackbody(float Temperature = 1200.0, output color Color = 0.0)
{
diff --git a/intern/cycles/kernel/shaders/node_brick_texture.osl b/intern/cycles/kernel/shaders/node_brick_texture.osl
index 0abc3574c48..075a324c730 100644
--- a/intern/cycles/kernel/shaders/node_brick_texture.osl
+++ b/intern/cycles/kernel/shaders/node_brick_texture.osl
@@ -14,8 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
-#include "node_texture.h"
+#include "stdcycles.h"
/* Brick */
diff --git a/intern/cycles/kernel/shaders/node_brightness.osl b/intern/cycles/kernel/shaders/node_brightness.osl
index 2defbc4b1db..019edfb79a3 100644
--- a/intern/cycles/kernel/shaders/node_brightness.osl
+++ b/intern/cycles/kernel/shaders/node_brightness.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_brightness(color ColorIn = 0.8,
float Bright = 0.0,
diff --git a/intern/cycles/kernel/shaders/node_bump.osl b/intern/cycles/kernel/shaders/node_bump.osl
index 3697bb37fd9..811182f40b5 100644
--- a/intern/cycles/kernel/shaders/node_bump.osl
+++ b/intern/cycles/kernel/shaders/node_bump.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
/* "Bump Mapping Unparameterized Surfaces on the GPU"
* Morten S. Mikkelsen, 2010 */
diff --git a/intern/cycles/kernel/shaders/node_camera.osl b/intern/cycles/kernel/shaders/node_camera.osl
index 833e9e775fe..45ca50c6e1e 100644
--- a/intern/cycles/kernel/shaders/node_camera.osl
+++ b/intern/cycles/kernel/shaders/node_camera.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_camera(output vector ViewVector = vector(0.0, 0.0, 0.0),
output float ViewZDepth = 0.0,
diff --git a/intern/cycles/kernel/shaders/node_checker_texture.osl b/intern/cycles/kernel/shaders/node_checker_texture.osl
index e068f7952ed..d6a30dbdb40 100644
--- a/intern/cycles/kernel/shaders/node_checker_texture.osl
+++ b/intern/cycles/kernel/shaders/node_checker_texture.osl
@@ -14,8 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
-#include "node_texture.h"
+#include "stdcycles.h"
/* Checker */
diff --git a/intern/cycles/kernel/shaders/node_clamp.osl b/intern/cycles/kernel/shaders/node_clamp.osl
new file mode 100644
index 00000000000..ce9392a0d98
--- /dev/null
+++ b/intern/cycles/kernel/shaders/node_clamp.osl
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "stdcycles.h"
+
+shader node_clamp(string type = "minmax",
+ float Value = 1.0,
+ float Min = 0.0,
+ float Max = 1.0,
+ output float Result = 0.0)
+{
+ Result = (type == "range" && (Min > Max)) ? clamp(Value, Max, Min) : clamp(Value, Min, Max);
+}
diff --git a/intern/cycles/kernel/shaders/node_combine_hsv.osl b/intern/cycles/kernel/shaders/node_combine_hsv.osl
index 1658cf3d774..05e502b5bc1 100644
--- a/intern/cycles/kernel/shaders/node_combine_hsv.osl
+++ b/intern/cycles/kernel/shaders/node_combine_hsv.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_combine_hsv(float H = 0.0, float S = 0.0, float V = 0.0, output color Color = 0.8)
{
diff --git a/intern/cycles/kernel/shaders/node_combine_rgb.osl b/intern/cycles/kernel/shaders/node_combine_rgb.osl
index aaa95e9c5af..036f371eb5c 100644
--- a/intern/cycles/kernel/shaders/node_combine_rgb.osl
+++ b/intern/cycles/kernel/shaders/node_combine_rgb.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_combine_rgb(float R = 0.0, float G = 0.0, float B = 0.0, output color Image = 0.8)
{
diff --git a/intern/cycles/kernel/shaders/node_combine_xyz.osl b/intern/cycles/kernel/shaders/node_combine_xyz.osl
index 4ab49168704..4ebd86b605c 100644
--- a/intern/cycles/kernel/shaders/node_combine_xyz.osl
+++ b/intern/cycles/kernel/shaders/node_combine_xyz.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_combine_xyz(float X = 0.0, float Y = 0.0, float Z = 0.0, output vector Vector = 0.8)
{
diff --git a/intern/cycles/kernel/shaders/node_convert_from_color.osl b/intern/cycles/kernel/shaders/node_convert_from_color.osl
index 7ea9a1e4fb3..c3f0e118844 100644
--- a/intern/cycles/kernel/shaders/node_convert_from_color.osl
+++ b/intern/cycles/kernel/shaders/node_convert_from_color.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_convert_from_color(color value_color = 0.0,
output string value_string = "",
diff --git a/intern/cycles/kernel/shaders/node_convert_from_float.osl b/intern/cycles/kernel/shaders/node_convert_from_float.osl
index 13b5dea0838..61a15a1c2b0 100644
--- a/intern/cycles/kernel/shaders/node_convert_from_float.osl
+++ b/intern/cycles/kernel/shaders/node_convert_from_float.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_convert_from_float(float value_float = 0.0,
output string value_string = "",
diff --git a/intern/cycles/kernel/shaders/node_convert_from_int.osl b/intern/cycles/kernel/shaders/node_convert_from_int.osl
index a59e025d822..2e6a99b2765 100644
--- a/intern/cycles/kernel/shaders/node_convert_from_int.osl
+++ b/intern/cycles/kernel/shaders/node_convert_from_int.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_convert_from_int(int value_int = 0,
output string value_string = "",
diff --git a/intern/cycles/kernel/shaders/node_convert_from_normal.osl b/intern/cycles/kernel/shaders/node_convert_from_normal.osl
index 7bdd94d1941..64201d63190 100644
--- a/intern/cycles/kernel/shaders/node_convert_from_normal.osl
+++ b/intern/cycles/kernel/shaders/node_convert_from_normal.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_convert_from_normal(normal value_normal = normal(0.0, 0.0, 0.0),
output string value_string = "",
diff --git a/intern/cycles/kernel/shaders/node_convert_from_point.osl b/intern/cycles/kernel/shaders/node_convert_from_point.osl
index 79c1719e7a7..11d64f76d6f 100644
--- a/intern/cycles/kernel/shaders/node_convert_from_point.osl
+++ b/intern/cycles/kernel/shaders/node_convert_from_point.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_convert_from_point(point value_point = point(0.0, 0.0, 0.0),
output string value_string = "",
diff --git a/intern/cycles/kernel/shaders/node_convert_from_string.osl b/intern/cycles/kernel/shaders/node_convert_from_string.osl
index 48d894a6b3e..b496c4e6d05 100644
--- a/intern/cycles/kernel/shaders/node_convert_from_string.osl
+++ b/intern/cycles/kernel/shaders/node_convert_from_string.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_convert_from_string(string value_string = "",
output color value_color = color(0.0, 0.0, 0.0),
diff --git a/intern/cycles/kernel/shaders/node_convert_from_vector.osl b/intern/cycles/kernel/shaders/node_convert_from_vector.osl
index 92ab2313bcb..820faabd32b 100644
--- a/intern/cycles/kernel/shaders/node_convert_from_vector.osl
+++ b/intern/cycles/kernel/shaders/node_convert_from_vector.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_convert_from_vector(vector value_vector = vector(0.0, 0.0, 0.0),
output string value_string = "",
diff --git a/intern/cycles/kernel/shaders/node_diffuse_bsdf.osl b/intern/cycles/kernel/shaders/node_diffuse_bsdf.osl
index bd5554b838a..f5886f534eb 100644
--- a/intern/cycles/kernel/shaders/node_diffuse_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_diffuse_bsdf.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_diffuse_bsdf(color Color = 0.8,
float Roughness = 0.0,
diff --git a/intern/cycles/kernel/shaders/node_displacement.osl b/intern/cycles/kernel/shaders/node_displacement.osl
index a1f3b7b7737..44a4828d511 100644
--- a/intern/cycles/kernel/shaders/node_displacement.osl
+++ b/intern/cycles/kernel/shaders/node_displacement.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_displacement(string space = "object",
float Height = 0.0,
diff --git a/intern/cycles/kernel/shaders/node_emission.osl b/intern/cycles/kernel/shaders/node_emission.osl
index 57973f57ac6..f289a9711d9 100644
--- a/intern/cycles/kernel/shaders/node_emission.osl
+++ b/intern/cycles/kernel/shaders/node_emission.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_emission(color Color = 0.8, float Strength = 1.0, output closure color Emission = 0)
{
diff --git a/intern/cycles/kernel/shaders/node_environment_texture.osl b/intern/cycles/kernel/shaders/node_environment_texture.osl
index eb32dad392f..d04743eb368 100644
--- a/intern/cycles/kernel/shaders/node_environment_texture.osl
+++ b/intern/cycles/kernel/shaders/node_environment_texture.osl
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include "stdosl.h"
#include "node_color.h"
+#include "stdcycles.h"
vector environment_texture_direction_to_equirectangular(vector dir)
{
@@ -47,9 +47,10 @@ shader node_environment_texture(
string filename = "",
string projection = "equirectangular",
string interpolation = "linear",
- string color_space = "sRGB",
+ int compress_as_srgb = 0,
+ int ignore_alpha = 0,
+ int unassociate_alpha = 0,
int is_float = 1,
- int use_alpha = 1,
output color Color = 0.0,
output float Alpha = 1.0)
{
@@ -69,13 +70,16 @@ shader node_environment_texture(
Color = (color)texture(
filename, p[0], 1.0 - p[1], "wrap", "periodic", "interp", interpolation, "alpha", Alpha);
- if (use_alpha) {
+ if (ignore_alpha) {
+ Alpha = 1.0;
+ }
+ else if (unassociate_alpha) {
Color = color_unpremultiply(Color, Alpha);
if (!is_float)
Color = min(Color, 1.0);
}
- if (color_space == "sRGB")
+ if (compress_as_srgb)
Color = color_srgb_to_scene_linear(Color);
}
diff --git a/intern/cycles/kernel/shaders/node_fresnel.osl b/intern/cycles/kernel/shaders/node_fresnel.osl
index 89250db40f3..cff084c344d 100644
--- a/intern/cycles/kernel/shaders/node_fresnel.osl
+++ b/intern/cycles/kernel/shaders/node_fresnel.osl
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include "stdosl.h"
#include "node_fresnel.h"
+#include "stdcycles.h"
shader node_fresnel(float IOR = 1.45, normal Normal = N, output float Fac = 0.0)
{
diff --git a/intern/cycles/kernel/shaders/node_gamma.osl b/intern/cycles/kernel/shaders/node_gamma.osl
index 9b9c17dc8af..0816df64fe8 100644
--- a/intern/cycles/kernel/shaders/node_gamma.osl
+++ b/intern/cycles/kernel/shaders/node_gamma.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_gamma(color ColorIn = 0.8, float Gamma = 1.0, output color ColorOut = 0.0)
{
diff --git a/intern/cycles/kernel/shaders/node_geometry.osl b/intern/cycles/kernel/shaders/node_geometry.osl
index b5c1c6611c1..55cda71db1b 100644
--- a/intern/cycles/kernel/shaders/node_geometry.osl
+++ b/intern/cycles/kernel/shaders/node_geometry.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_geometry(normal NormalIn = N,
string bump_offset = "center",
@@ -26,7 +26,8 @@ shader node_geometry(normal NormalIn = N,
output vector Incoming = vector(0.0, 0.0, 0.0),
output point Parametric = point(0.0, 0.0, 0.0),
output float Backfacing = 0.0,
- output float Pointiness = 0.0)
+ output float Pointiness = 0.0,
+ output float RandomPerIsland = 0.0)
{
Position = P;
Normal = NormalIn;
@@ -65,4 +66,6 @@ shader node_geometry(normal NormalIn = N,
else if (bump_offset == "dy") {
Pointiness += Dy(Pointiness);
}
+
+ getattribute("geom:random_per_island", RandomPerIsland);
}
diff --git a/intern/cycles/kernel/shaders/node_glass_bsdf.osl b/intern/cycles/kernel/shaders/node_glass_bsdf.osl
index c0b8a002536..0042d573f8d 100644
--- a/intern/cycles/kernel/shaders/node_glass_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_glass_bsdf.osl
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include "stdosl.h"
#include "node_fresnel.h"
+#include "stdcycles.h"
shader node_glass_bsdf(color Color = 0.8,
string distribution = "sharp",
diff --git a/intern/cycles/kernel/shaders/node_glossy_bsdf.osl b/intern/cycles/kernel/shaders/node_glossy_bsdf.osl
index 2d40ee8d3f6..c73604d3650 100644
--- a/intern/cycles/kernel/shaders/node_glossy_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_glossy_bsdf.osl
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include "stdosl.h"
#include "node_fresnel.h"
+#include "stdcycles.h"
shader node_glossy_bsdf(color Color = 0.8,
string distribution = "GGX",
diff --git a/intern/cycles/kernel/shaders/node_gradient_texture.osl b/intern/cycles/kernel/shaders/node_gradient_texture.osl
index 52bf466673d..e9acebc0572 100644
--- a/intern/cycles/kernel/shaders/node_gradient_texture.osl
+++ b/intern/cycles/kernel/shaders/node_gradient_texture.osl
@@ -14,8 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
-#include "node_texture.h"
+#include "stdcycles.h"
/* Gradient */
@@ -57,7 +56,7 @@ float gradient(point p, string type)
result = r;
}
- return result;
+ return clamp(result, 0.0, 1.0);
}
shader node_gradient_texture(
diff --git a/intern/cycles/kernel/shaders/node_hair_bsdf.osl b/intern/cycles/kernel/shaders/node_hair_bsdf.osl
index bc912087666..3e0ac7af2e0 100644
--- a/intern/cycles/kernel/shaders/node_hair_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_hair_bsdf.osl
@@ -16,7 +16,7 @@
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_hair_bsdf(color Color = 0.8,
string component = "reflection",
diff --git a/intern/cycles/kernel/shaders/node_hair_info.osl b/intern/cycles/kernel/shaders/node_hair_info.osl
index 991a27c4103..ee08ea57e68 100644
--- a/intern/cycles/kernel/shaders/node_hair_info.osl
+++ b/intern/cycles/kernel/shaders/node_hair_info.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_hair_info(output float IsStrand = 0.0,
output float Intercept = 0.0,
diff --git a/intern/cycles/kernel/shaders/node_hash.h b/intern/cycles/kernel/shaders/node_hash.h
new file mode 100644
index 00000000000..b42e42ff910
--- /dev/null
+++ b/intern/cycles/kernel/shaders/node_hash.h
@@ -0,0 +1,81 @@
+#include "stdcycles.h"
+#include "vector2.h"
+#include "vector4.h"
+
+#define vector3 point
+
+/* **** Hash a float or vector[234] into a float [0, 1] **** */
+
+float hash_float_to_float(float k)
+{
+ return hashnoise(k);
+}
+
+float hash_vector2_to_float(vector2 k)
+{
+ return hashnoise(k.x, k.y);
+}
+
+float hash_vector3_to_float(vector3 k)
+{
+ return hashnoise(k);
+}
+
+float hash_vector4_to_float(vector4 k)
+{
+ return hashnoise(vector3(k.x, k.y, k.z), k.w);
+}
+
+/* **** Hash a vector[234] into a vector[234] [0, 1] **** */
+
+vector2 hash_vector2_to_vector2(vector2 k)
+{
+ return vector2(hash_vector2_to_float(k), hash_vector3_to_float(vector3(k.x, k.y, 1.0)));
+}
+
+vector3 hash_vector3_to_vector3(vector3 k)
+{
+ return vector3(hash_vector3_to_float(k),
+ hash_vector4_to_float(vector4(k[0], k[1], k[2], 1.0)),
+ hash_vector4_to_float(vector4(k[0], k[1], k[2], 2.0)));
+}
+
+vector4 hash_vector4_to_vector4(vector4 k)
+{
+ return vector4(hash_vector4_to_float(k),
+ hash_vector4_to_float(vector4(k.w, k.x, k.y, k.z)),
+ hash_vector4_to_float(vector4(k.z, k.w, k.x, k.y)),
+ hash_vector4_to_float(vector4(k.y, k.z, k.w, k.x)));
+}
+
+/* **** Hash a float or a vec[234] into a color [0, 1] **** */
+
+color hash_float_to_color(float k)
+{
+ return color(hash_float_to_float(k),
+ hash_vector2_to_float(vector2(k, 1.0)),
+ hash_vector2_to_float(vector2(k, 2.0)));
+}
+
+color hash_vector2_to_color(vector2 k)
+{
+ return color(hash_vector2_to_float(k),
+ hash_vector3_to_float(vector3(k.x, k.y, 1.0)),
+ hash_vector3_to_float(vector3(k.x, k.y, 2.0)));
+}
+
+color hash_vector3_to_color(vector3 k)
+{
+ return color(hash_vector3_to_float(k),
+ hash_vector4_to_float(vector4(k[0], k[1], k[2], 1.0)),
+ hash_vector4_to_float(vector4(k[0], k[1], k[2], 2.0)));
+}
+
+color hash_vector4_to_color(vector4 k)
+{
+ return color(hash_vector4_to_float(k),
+ hash_vector4_to_float(vector4(k.z, k.x, k.w, k.y)),
+ hash_vector4_to_float(vector4(k.w, k.z, k.y, k.x)));
+}
+
+#undef vector3
diff --git a/intern/cycles/kernel/shaders/node_holdout.osl b/intern/cycles/kernel/shaders/node_holdout.osl
index b51bc0543a5..92e41c92f72 100644
--- a/intern/cycles/kernel/shaders/node_holdout.osl
+++ b/intern/cycles/kernel/shaders/node_holdout.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_holdout(output closure color Holdout = holdout())
{
diff --git a/intern/cycles/kernel/shaders/node_hsv.osl b/intern/cycles/kernel/shaders/node_hsv.osl
index 30c56a20a92..4417057b10f 100644
--- a/intern/cycles/kernel/shaders/node_hsv.osl
+++ b/intern/cycles/kernel/shaders/node_hsv.osl
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include "stdosl.h"
#include "node_color.h"
+#include "stdcycles.h"
shader node_hsv(float Hue = 0.5,
float Saturation = 1.0,
diff --git a/intern/cycles/kernel/shaders/node_ies_light.osl b/intern/cycles/kernel/shaders/node_ies_light.osl
index ea8c44e09de..76348b4d758 100644
--- a/intern/cycles/kernel/shaders/node_ies_light.osl
+++ b/intern/cycles/kernel/shaders/node_ies_light.osl
@@ -14,14 +14,13 @@
* limitations under the License.
*/
-#include "stdosl.h"
-#include "node_texture.h"
+#include "stdcycles.h"
/* IES Light */
shader node_ies_light(int use_mapping = 0,
matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
- int slot = 0,
+ string filename = "",
float Strength = 1.0,
point Vector = I,
output float Fac = 0.0)
@@ -32,10 +31,10 @@ shader node_ies_light(int use_mapping = 0,
p = transform(mapping, p);
}
- p = normalize(p);
+ p = normalize((vector)p);
float v_angle = acos(-p[2]);
float h_angle = atan2(p[0], p[1]) + M_PI;
- Fac = Strength * texture(format("@l%d", slot), h_angle, v_angle);
+ Fac = Strength * texture(filename, h_angle, v_angle);
}
diff --git a/intern/cycles/kernel/shaders/node_image_texture.osl b/intern/cycles/kernel/shaders/node_image_texture.osl
index df5eda39985..22d34a1082c 100644
--- a/intern/cycles/kernel/shaders/node_image_texture.osl
+++ b/intern/cycles/kernel/shaders/node_image_texture.osl
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include "stdosl.h"
#include "node_color.h"
+#include "stdcycles.h"
point texco_remap_square(point co)
{
@@ -56,26 +56,40 @@ point map_to_sphere(vector dir)
}
color image_texture_lookup(string filename,
- string color_space,
float u,
float v,
output float Alpha,
- int use_alpha,
+ int compress_as_srgb,
+ int ignore_alpha,
+ int unassociate_alpha,
int is_float,
+ int is_tiled,
string interpolation,
string extension)
{
+ /* Flip the y coordinate, but preserve UDIM tiles. */
+ float flip_v;
+ if (is_tiled) {
+ float v_i = (int)v;
+ flip_v = v_i + (1.0 - (v - v_i));
+ }
+ else {
+ flip_v = 1.0 - v;
+ }
color rgb = (color)texture(
- filename, u, 1.0 - v, "wrap", extension, "interp", interpolation, "alpha", Alpha);
+ filename, u, flip_v, "wrap", extension, "interp", interpolation, "alpha", Alpha);
- if (use_alpha) {
+ if (ignore_alpha) {
+ Alpha = 1.0;
+ }
+ else if (unassociate_alpha) {
rgb = color_unpremultiply(rgb, Alpha);
if (!is_float)
rgb = min(rgb, 1.0);
}
- if (color_space == "sRGB") {
+ if (compress_as_srgb) {
rgb = color_srgb_to_scene_linear(rgb);
}
@@ -86,13 +100,15 @@ shader node_image_texture(int use_mapping = 0,
matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
point Vector = P,
string filename = "",
- string color_space = "sRGB",
string projection = "flat",
string interpolation = "smartcubic",
string extension = "periodic",
float projection_blend = 0.0,
+ int compress_as_srgb = 0,
+ int ignore_alpha = 0,
+ int unassociate_alpha = 0,
+ int is_tiled = 0,
int is_float = 1,
- int use_alpha = 1,
output color Color = 0.0,
output float Alpha = 1.0)
{
@@ -102,8 +118,17 @@ shader node_image_texture(int use_mapping = 0,
p = transform(mapping, p);
if (projection == "flat") {
- Color = image_texture_lookup(
- filename, color_space, p[0], p[1], Alpha, use_alpha, is_float, interpolation, extension);
+ Color = image_texture_lookup(filename,
+ p[0],
+ p[1],
+ Alpha,
+ compress_as_srgb,
+ ignore_alpha,
+ unassociate_alpha,
+ is_float,
+ is_tiled,
+ interpolation,
+ extension);
}
else if (projection == "box") {
/* object space normal */
@@ -173,36 +198,42 @@ shader node_image_texture(int use_mapping = 0,
if (weight[0] > 0.0) {
Color += weight[0] * image_texture_lookup(filename,
- color_space,
p[1],
p[2],
tmp_alpha,
- use_alpha,
+ compress_as_srgb,
+ ignore_alpha,
+ unassociate_alpha,
is_float,
+ 0,
interpolation,
extension);
Alpha += weight[0] * tmp_alpha;
}
if (weight[1] > 0.0) {
Color += weight[1] * image_texture_lookup(filename,
- color_space,
p[0],
p[2],
tmp_alpha,
- use_alpha,
+ compress_as_srgb,
+ ignore_alpha,
+ unassociate_alpha,
is_float,
+ 0,
interpolation,
extension);
Alpha += weight[1] * tmp_alpha;
}
if (weight[2] > 0.0) {
Color += weight[2] * image_texture_lookup(filename,
- color_space,
p[1],
p[0],
tmp_alpha,
- use_alpha,
+ compress_as_srgb,
+ ignore_alpha,
+ unassociate_alpha,
is_float,
+ 0,
interpolation,
extension);
Alpha += weight[2] * tmp_alpha;
@@ -211,24 +242,28 @@ shader node_image_texture(int use_mapping = 0,
else if (projection == "sphere") {
point projected = map_to_sphere(texco_remap_square(p));
Color = image_texture_lookup(filename,
- color_space,
projected[0],
projected[1],
Alpha,
- use_alpha,
+ compress_as_srgb,
+ ignore_alpha,
+ unassociate_alpha,
is_float,
+ 0,
interpolation,
extension);
}
else if (projection == "tube") {
point projected = map_to_tube(texco_remap_square(p));
Color = image_texture_lookup(filename,
- color_space,
projected[0],
projected[1],
Alpha,
- use_alpha,
+ compress_as_srgb,
+ ignore_alpha,
+ unassociate_alpha,
is_float,
+ 0,
interpolation,
extension);
}
diff --git a/intern/cycles/kernel/shaders/node_invert.osl b/intern/cycles/kernel/shaders/node_invert.osl
index c7d41e4e129..23c16935ca1 100644
--- a/intern/cycles/kernel/shaders/node_invert.osl
+++ b/intern/cycles/kernel/shaders/node_invert.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_invert(float Fac = 1.0, color ColorIn = 0.8, output color ColorOut = 0.8)
{
diff --git a/intern/cycles/kernel/shaders/node_layer_weight.osl b/intern/cycles/kernel/shaders/node_layer_weight.osl
index 7c46f28b41b..1662be2cad1 100644
--- a/intern/cycles/kernel/shaders/node_layer_weight.osl
+++ b/intern/cycles/kernel/shaders/node_layer_weight.osl
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include "stdosl.h"
#include "node_fresnel.h"
+#include "stdcycles.h"
shader node_layer_weight(float Blend = 0.5,
normal Normal = N,
diff --git a/intern/cycles/kernel/shaders/node_light_falloff.osl b/intern/cycles/kernel/shaders/node_light_falloff.osl
index d0d7dd9c5aa..3f3c9444a5a 100644
--- a/intern/cycles/kernel/shaders/node_light_falloff.osl
+++ b/intern/cycles/kernel/shaders/node_light_falloff.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_light_falloff(float Strength = 0.0,
float Smooth = 0.0,
diff --git a/intern/cycles/kernel/shaders/node_light_path.osl b/intern/cycles/kernel/shaders/node_light_path.osl
index c4a3624a67f..4ff06915771 100644
--- a/intern/cycles/kernel/shaders/node_light_path.osl
+++ b/intern/cycles/kernel/shaders/node_light_path.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_light_path(output float IsCameraRay = 0.0,
output float IsShadowRay = 0.0,
diff --git a/intern/cycles/kernel/shaders/node_magic_texture.osl b/intern/cycles/kernel/shaders/node_magic_texture.osl
index aa700e575ef..476c6895f05 100644
--- a/intern/cycles/kernel/shaders/node_magic_texture.osl
+++ b/intern/cycles/kernel/shaders/node_magic_texture.osl
@@ -14,8 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
-#include "node_texture.h"
+#include "stdcycles.h"
/* Magic */
diff --git a/intern/cycles/kernel/shaders/node_map_range.osl b/intern/cycles/kernel/shaders/node_map_range.osl
new file mode 100644
index 00000000000..1c49027e6dd
--- /dev/null
+++ b/intern/cycles/kernel/shaders/node_map_range.osl
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "stdcycles.h"
+
+float safe_divide(float a, float b)
+{
+ return (b != 0.0) ? a / b : 0.0;
+}
+
+float smootherstep(float edge0, float edge1, float x)
+{
+ float t = clamp(safe_divide((x - edge0), (edge1 - edge0)), 0.0, 1.0);
+ return t * t * t * (t * (t * 6.0 - 15.0) + 10.0);
+}
+
+shader node_map_range(string type = "linear",
+ float Value = 1.0,
+ float FromMin = 0.0,
+ float FromMax = 1.0,
+ float ToMin = 0.0,
+ float ToMax = 1.0,
+ float Steps = 4.0,
+ output float Result = 0.0)
+{
+ if (FromMax != FromMin) {
+ float Factor = Value;
+ if (type == "stepped") {
+ Factor = (Value - FromMin) / (FromMax - FromMin);
+ Factor = (Steps > 0) ? floor(Factor * (Steps + 1.0)) / Steps : 0.0;
+ }
+ else if (type == "smoothstep") {
+ Factor = (FromMin > FromMax) ? 1.0 - smoothstep(FromMax, FromMin, Value) :
+ smoothstep(FromMin, FromMax, Value);
+ }
+ else if (type == "smootherstep") {
+ Factor = (FromMin > FromMax) ? 1.0 - smootherstep(FromMax, FromMin, Value) :
+ smootherstep(FromMin, FromMax, Value);
+ }
+ else {
+ Factor = (Value - FromMin) / (FromMax - FromMin);
+ }
+ Result = ToMin + Factor * (ToMax - ToMin);
+ }
+}
diff --git a/intern/cycles/kernel/shaders/node_mapping.osl b/intern/cycles/kernel/shaders/node_mapping.osl
index f5cc2d1c5dd..8d204999630 100644
--- a/intern/cycles/kernel/shaders/node_mapping.osl
+++ b/intern/cycles/kernel/shaders/node_mapping.osl
@@ -14,19 +14,60 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
-shader node_mapping(matrix Matrix = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
- point mapping_min = point(0.0, 0.0, 0.0),
- point mapping_max = point(0.0, 0.0, 0.0),
- int use_minmax = 0,
- point VectorIn = point(0.0, 0.0, 0.0),
- output point VectorOut = point(0.0, 0.0, 0.0))
+point safe_divide(point a, point b)
+{
+ return point((b[0] != 0.0) ? a[0] / b[0] : 0.0,
+ (b[1] != 0.0) ? a[1] / b[1] : 0.0,
+ (b[2] != 0.0) ? a[2] / b[2] : 0.0);
+}
+
+matrix euler_to_mat(point euler)
{
- point p = transform(Matrix, VectorIn);
+ float cx = cos(euler[0]);
+ float cy = cos(euler[1]);
+ float cz = cos(euler[2]);
+ float sx = sin(euler[0]);
+ float sy = sin(euler[1]);
+ float sz = sin(euler[2]);
+
+ matrix mat = matrix(1.0);
+ mat[0][0] = cy * cz;
+ mat[0][1] = cy * sz;
+ mat[0][2] = -sy;
- if (use_minmax)
- p = min(max(mapping_min, p), mapping_max);
+ mat[1][0] = sy * sx * cz - cx * sz;
+ mat[1][1] = sy * sx * sz + cx * cz;
+ mat[1][2] = cy * sx;
- VectorOut = p;
+ mat[2][0] = sy * cx * cz + sx * sz;
+ mat[2][1] = sy * cx * sz - sx * cz;
+ mat[2][2] = cy * cx;
+ return mat;
+}
+
+shader node_mapping(string type = "point",
+ point VectorIn = point(0.0, 0.0, 0.0),
+ point Location = point(0.0, 0.0, 0.0),
+ point Rotation = point(0.0, 0.0, 0.0),
+ point Scale = point(1.0, 1.0, 1.0),
+ output point VectorOut = point(0.0, 0.0, 0.0))
+{
+ if (type == "point") {
+ VectorOut = transform(euler_to_mat(Rotation), (VectorIn * Scale)) + Location;
+ }
+ else if (type == "texture") {
+ VectorOut = safe_divide(transform(transpose(euler_to_mat(Rotation)), (VectorIn - Location)),
+ Scale);
+ }
+ else if (type == "vector") {
+ VectorOut = transform(euler_to_mat(Rotation), (VectorIn * Scale));
+ }
+ else if (type == "normal") {
+ VectorOut = normalize((vector)transform(euler_to_mat(Rotation), safe_divide(VectorIn, Scale)));
+ }
+ else {
+ warning("%s", "Unknown Mapping vector type!");
+ }
}
diff --git a/intern/cycles/kernel/shaders/node_math.h b/intern/cycles/kernel/shaders/node_math.h
new file mode 100644
index 00000000000..4b1a6c5bc16
--- /dev/null
+++ b/intern/cycles/kernel/shaders/node_math.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright 2011-2020 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+float safe_divide(float a, float b)
+{
+ return (b != 0.0) ? a / b : 0.0;
+}
+
+vector safe_divide(vector a, vector b)
+{
+ return vector((b[0] != 0.0) ? a[0] / b[0] : 0.0,
+ (b[1] != 0.0) ? a[1] / b[1] : 0.0,
+ (b[2] != 0.0) ? a[2] / b[2] : 0.0);
+}
+
+float safe_modulo(float a, float b)
+{
+ return (b != 0.0) ? fmod(a, b) : 0.0;
+}
+
+float fract(float a)
+{
+ return a - floor(a);
+}
+
+/* See: https://www.iquilezles.org/www/articles/smin/smin.htm. */
+float smoothmin(float a, float b, float c)
+{
+ if (c != 0.0) {
+ float h = max(c - abs(a - b), 0.0) / c;
+ return min(a, b) - h * h * h * c * (1.0 / 6.0);
+ }
+ else {
+ return min(a, b);
+ }
+}
+
+float pingpong(float a, float b)
+{
+ return (b != 0.0) ? abs(fract((a - b) / (b * 2.0)) * b * 2.0 - b) : 0.0;
+}
+
+float safe_sqrt(float a)
+{
+ return (a > 0.0) ? sqrt(a) : 0.0;
+}
+
+float safe_log(float a, float b)
+{
+ return (a > 0.0 && b > 0.0) ? log(a) / log(b) : 0.0;
+}
+
+vector project(vector v, vector v_proj)
+{
+ float lenSquared = dot(v_proj, v_proj);
+ return (lenSquared != 0.0) ? (dot(v, v_proj) / lenSquared) * v_proj : vector(0.0);
+}
+
+vector snap(vector a, vector b)
+{
+ return floor(safe_divide(a, b)) * b;
+}
+
+/* Adapted from godotengine math_funcs.h. */
+float wrap(float value, float max, float min)
+{
+ float range = max - min;
+ return (range != 0.0) ? value - (range * floor((value - min) / range)) : min;
+}
+
+point wrap(point value, point max, point min)
+{
+ return point(wrap(value[0], max[0], min[0]),
+ wrap(value[1], max[1], min[1]),
+ wrap(value[2], max[2], min[2]));
+}
+
+matrix euler_to_mat(point euler)
+{
+ float cx = cos(euler[0]);
+ float cy = cos(euler[1]);
+ float cz = cos(euler[2]);
+ float sx = sin(euler[0]);
+ float sy = sin(euler[1]);
+ float sz = sin(euler[2]);
+ matrix mat = matrix(1.0);
+ mat[0][0] = cy * cz;
+ mat[0][1] = cy * sz;
+ mat[0][2] = -sy;
+ mat[1][0] = sy * sx * cz - cx * sz;
+ mat[1][1] = sy * sx * sz + cx * cz;
+ mat[1][2] = cy * sx;
+ +mat[2][0] = sy * cx * cz + sx * sz;
+ mat[2][1] = sy * cx * sz - sx * cz;
+ mat[2][2] = cy * cx;
+ return mat;
+}
diff --git a/intern/cycles/kernel/shaders/node_math.osl b/intern/cycles/kernel/shaders/node_math.osl
index 8830339e05f..dbaa7ccb60e 100644
--- a/intern/cycles/kernel/shaders/node_math.osl
+++ b/intern/cycles/kernel/shaders/node_math.osl
@@ -14,60 +14,16 @@
* limitations under the License.
*/
-#include "stdosl.h"
-
-float safe_divide(float a, float b)
-{
- float result;
-
- if (b == 0.0)
- result = 0.0;
- else
- result = a / b;
-
- return result;
-}
-
-float safe_modulo(float a, float b)
-{
- float result;
-
- if (b == 0.0)
- result = 0.0;
- else
- result = fmod(a, b);
-
- return result;
-}
-
-float safe_sqrt(float a)
-{
- float result;
-
- if (a > 0.0)
- result = sqrt(a);
- else
- result = 0.0;
-
- return result;
-}
-
-float safe_log(float a, float b)
-{
- if (a < 0.0 || b < 0.0)
- return 0.0;
-
- return log(a) / log(b);
-}
+#include "node_math.h"
+#include "stdcycles.h"
+/* OSL asin, acos, and pow functions are safe by default. */
shader node_math(string type = "add",
- int use_clamp = 0,
- float Value1 = 0.0,
- float Value2 = 0.0,
+ float Value1 = 0.5,
+ float Value2 = 0.5,
+ float Value3 = 0.5,
output float Value = 0.0)
{
- /* OSL asin, acos, pow check for values that could give rise to nan */
-
if (type == "add")
Value = Value1 + Value2;
else if (type == "subtract")
@@ -76,47 +32,78 @@ shader node_math(string type = "add",
Value = Value1 * Value2;
else if (type == "divide")
Value = safe_divide(Value1, Value2);
- else if (type == "sine")
- Value = sin(Value1);
- else if (type == "cosine")
- Value = cos(Value1);
- else if (type == "tangent")
- Value = tan(Value1);
- else if (type == "arcsine")
- Value = asin(Value1);
- else if (type == "arccosine")
- Value = acos(Value1);
- else if (type == "arctangent")
- Value = atan(Value1);
else if (type == "power")
Value = pow(Value1, Value2);
else if (type == "logarithm")
Value = safe_log(Value1, Value2);
+ else if (type == "sqrt")
+ Value = safe_sqrt(Value1);
+ else if (type == "inversesqrt")
+ Value = inversesqrt(Value1);
+ else if (type == "absolute")
+ Value = fabs(Value1);
+ else if (type == "radians")
+ Value = radians(Value1);
+ else if (type == "degrees")
+ Value = degrees(Value1);
else if (type == "minimum")
Value = min(Value1, Value2);
else if (type == "maximum")
Value = max(Value1, Value2);
- else if (type == "round")
- Value = floor(Value1 + 0.5);
else if (type == "less_than")
Value = Value1 < Value2;
else if (type == "greater_than")
Value = Value1 > Value2;
- else if (type == "modulo")
- Value = safe_modulo(Value1, Value2);
- else if (type == "absolute")
- Value = fabs(Value1);
- else if (type == "arctan2")
- Value = atan2(Value1, Value2);
+ else if (type == "round")
+ Value = floor(Value1 + 0.5);
else if (type == "floor")
Value = floor(Value1);
else if (type == "ceil")
Value = ceil(Value1);
- else if (type == "fract")
+ else if (type == "fraction")
Value = Value1 - floor(Value1);
- else if (type == "sqrt")
- Value = safe_sqrt(Value1);
-
- if (use_clamp)
- Value = clamp(Value, 0.0, 1.0);
+ else if (type == "modulo")
+ Value = safe_modulo(Value1, Value2);
+ else if (type == "trunc")
+ Value = trunc(Value1);
+ else if (type == "snap")
+ Value = floor(safe_divide(Value1, Value2)) * Value2;
+ else if (type == "wrap")
+ Value = wrap(Value1, Value2, Value3);
+ else if (type == "pingpong")
+ Value = pingpong(Value1, Value2);
+ else if (type == "sine")
+ Value = sin(Value1);
+ else if (type == "cosine")
+ Value = cos(Value1);
+ else if (type == "tangent")
+ Value = tan(Value1);
+ else if (type == "sinh")
+ Value = sinh(Value1);
+ else if (type == "cosh")
+ Value = cosh(Value1);
+ else if (type == "tanh")
+ Value = tanh(Value1);
+ else if (type == "arcsine")
+ Value = asin(Value1);
+ else if (type == "arccosine")
+ Value = acos(Value1);
+ else if (type == "arctangent")
+ Value = atan(Value1);
+ else if (type == "arctan2")
+ Value = atan2(Value1, Value2);
+ else if (type == "sign")
+ Value = sign(Value1);
+ else if (type == "exponent")
+ Value = exp(Value1);
+ else if (type == "compare")
+ Value = ((Value1 == Value2) || (abs(Value1 - Value2) <= max(Value3, 1e-5))) ? 1.0 : 0.0;
+ else if (type == "multiply_add")
+ Value = Value1 * Value2 + Value3;
+ else if (type == "smoothmin")
+ Value = smoothmin(Value1, Value2, Value3);
+ else if (type == "smoothmax")
+ Value = -(smoothmin(-Value1, -Value2, Value3));
+ else
+ warning("%s", "Unknown math operator!");
}
diff --git a/intern/cycles/kernel/shaders/node_mix.osl b/intern/cycles/kernel/shaders/node_mix.osl
index 8caea6803ed..a13b4bb7b96 100644
--- a/intern/cycles/kernel/shaders/node_mix.osl
+++ b/intern/cycles/kernel/shaders/node_mix.osl
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include "stdosl.h"
#include "node_color.h"
+#include "stdcycles.h"
color node_mix_blend(float t, color col1, color col2)
{
@@ -91,12 +91,12 @@ color node_mix_diff(float t, color col1, color col2)
color node_mix_dark(float t, color col1, color col2)
{
- return min(col1, col2) * t + col1 * (1.0 - t);
+ return mix(col1, min(col1, col2), t);
}
color node_mix_light(float t, color col1, color col2)
{
- return max(col1, col2 * t);
+ return mix(col1, max(col1, col2), t);
}
color node_mix_dodge(float t, color col1, color col2)
diff --git a/intern/cycles/kernel/shaders/node_mix_closure.osl b/intern/cycles/kernel/shaders/node_mix_closure.osl
index 517c59c8786..94fc2171c44 100644
--- a/intern/cycles/kernel/shaders/node_mix_closure.osl
+++ b/intern/cycles/kernel/shaders/node_mix_closure.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_mix_closure(float Fac = 0.5,
closure color Closure1 = 0,
diff --git a/intern/cycles/kernel/shaders/node_musgrave_texture.osl b/intern/cycles/kernel/shaders/node_musgrave_texture.osl
index a7877c43d46..d03b84c1ab4 100644
--- a/intern/cycles/kernel/shaders/node_musgrave_texture.osl
+++ b/intern/cycles/kernel/shaders/node_musgrave_texture.osl
@@ -14,10 +14,344 @@
* limitations under the License.
*/
-#include "stdosl.h"
-#include "node_texture.h"
+#include "node_noise.h"
+#include "stdcycles.h"
+#include "vector2.h"
+#include "vector4.h"
-/* Musgrave fBm
+#define vector3 point
+
+/* 1D Musgrave fBm
+ *
+ * H: fractal increment parameter
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ *
+ * from "Texturing and Modelling: A procedural approach"
+ */
+
+float noise_musgrave_fBm_1d(float co, float H, float lacunarity, float octaves)
+{
+ float p = co;
+ float value = 0.0;
+ float pwr = 1.0;
+ float pwHL = pow(lacunarity, -H);
+
+ for (int i = 0; i < (int)octaves; i++) {
+ value += safe_snoise(p) * pwr;
+ pwr *= pwHL;
+ p *= lacunarity;
+ }
+
+ float rmd = octaves - floor(octaves);
+ if (rmd != 0.0) {
+ value += rmd * safe_snoise(p) * pwr;
+ }
+
+ return value;
+}
+
+/* 1D Musgrave Multifractal
+ *
+ * H: highest fractal dimension
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ */
+
+float noise_musgrave_multi_fractal_1d(float co, float H, float lacunarity, float octaves)
+{
+ float p = co;
+ float value = 1.0;
+ float pwr = 1.0;
+ float pwHL = pow(lacunarity, -H);
+
+ for (int i = 0; i < (int)octaves; i++) {
+ value *= (pwr * safe_snoise(p) + 1.0);
+ pwr *= pwHL;
+ p *= lacunarity;
+ }
+
+ float rmd = octaves - floor(octaves);
+ if (rmd != 0.0) {
+ value *= (rmd * pwr * safe_snoise(p) + 1.0); /* correct? */
+ }
+
+ return value;
+}
+
+/* 1D Musgrave Heterogeneous Terrain
+ *
+ * H: fractal dimension of the roughest area
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ * offset: raises the terrain from `sea level'
+ */
+
+float noise_musgrave_hetero_terrain_1d(
+ float co, float H, float lacunarity, float octaves, float offset)
+{
+ float p = co;
+ float pwHL = pow(lacunarity, -H);
+ float pwr = pwHL;
+
+ /* first unscaled octave of function; later octaves are scaled */
+ float value = offset + safe_snoise(p);
+ p *= lacunarity;
+
+ for (int i = 1; i < (int)octaves; i++) {
+ float increment = (safe_snoise(p) + offset) * pwr * value;
+ value += increment;
+ pwr *= pwHL;
+ p *= lacunarity;
+ }
+
+ float rmd = octaves - floor(octaves);
+ if (rmd != 0.0) {
+ float increment = (safe_snoise(p) + offset) * pwr * value;
+ value += rmd * increment;
+ }
+
+ return value;
+}
+
+/* 1D Hybrid Additive/Multiplicative Multifractal Terrain
+ *
+ * H: fractal dimension of the roughest area
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ * offset: raises the terrain from `sea level'
+ */
+
+float noise_musgrave_hybrid_multi_fractal_1d(
+ float co, float H, float lacunarity, float octaves, float offset, float gain)
+{
+ float p = co;
+ float pwHL = pow(lacunarity, -H);
+ float pwr = pwHL;
+
+ float value = safe_snoise(p) + offset;
+ float weight = gain * value;
+ p *= lacunarity;
+
+ for (int i = 1; (weight > 0.001) && (i < (int)octaves); i++) {
+ if (weight > 1.0) {
+ weight = 1.0;
+ }
+
+ float signal = (safe_snoise(p) + offset) * pwr;
+ pwr *= pwHL;
+ value += weight * signal;
+ weight *= gain * signal;
+ p *= lacunarity;
+ }
+
+ float rmd = octaves - floor(octaves);
+ if (rmd != 0.0) {
+ value += rmd * ((safe_snoise(p) + offset) * pwr);
+ }
+
+ return value;
+}
+
+/* 1D Ridged Multifractal Terrain
+ *
+ * H: fractal dimension of the roughest area
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ * offset: raises the terrain from `sea level'
+ */
+
+float noise_musgrave_ridged_multi_fractal_1d(
+ float co, float H, float lacunarity, float octaves, float offset, float gain)
+{
+ float p = co;
+ float pwHL = pow(lacunarity, -H);
+ float pwr = pwHL;
+
+ float signal = offset - fabs(safe_snoise(p));
+ signal *= signal;
+ float value = signal;
+ float weight = 1.0;
+
+ for (int i = 1; i < (int)octaves; i++) {
+ p *= lacunarity;
+ weight = clamp(signal * gain, 0.0, 1.0);
+ signal = offset - fabs(safe_snoise(p));
+ signal *= signal;
+ signal *= weight;
+ value += signal * pwr;
+ pwr *= pwHL;
+ }
+
+ return value;
+}
+
+/* 2D Musgrave fBm
+ *
+ * H: fractal increment parameter
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ *
+ * from "Texturing and Modelling: A procedural approach"
+ */
+
+float noise_musgrave_fBm_2d(vector2 co, float H, float lacunarity, float octaves)
+{
+ vector2 p = co;
+ float value = 0.0;
+ float pwr = 1.0;
+ float pwHL = pow(lacunarity, -H);
+
+ for (int i = 0; i < (int)octaves; i++) {
+ value += safe_snoise(p) * pwr;
+ pwr *= pwHL;
+ p *= lacunarity;
+ }
+
+ float rmd = octaves - floor(octaves);
+ if (rmd != 0.0) {
+ value += rmd * safe_snoise(p) * pwr;
+ }
+
+ return value;
+}
+
+/* 2D Musgrave Multifractal
+ *
+ * H: highest fractal dimension
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ */
+
+float noise_musgrave_multi_fractal_2d(vector2 co, float H, float lacunarity, float octaves)
+{
+ vector2 p = co;
+ float value = 1.0;
+ float pwr = 1.0;
+ float pwHL = pow(lacunarity, -H);
+
+ for (int i = 0; i < (int)octaves; i++) {
+ value *= (pwr * safe_snoise(p) + 1.0);
+ pwr *= pwHL;
+ p *= lacunarity;
+ }
+
+ float rmd = octaves - floor(octaves);
+ if (rmd != 0.0) {
+ value *= (rmd * pwr * safe_snoise(p) + 1.0); /* correct? */
+ }
+
+ return value;
+}
+
+/* 2D Musgrave Heterogeneous Terrain
+ *
+ * H: fractal dimension of the roughest area
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ * offset: raises the terrain from `sea level'
+ */
+
+float noise_musgrave_hetero_terrain_2d(
+ vector2 co, float H, float lacunarity, float octaves, float offset)
+{
+ vector2 p = co;
+ float pwHL = pow(lacunarity, -H);
+ float pwr = pwHL;
+
+ /* first unscaled octave of function; later octaves are scaled */
+ float value = offset + safe_snoise(p);
+ p *= lacunarity;
+
+ for (int i = 1; i < (int)octaves; i++) {
+ float increment = (safe_snoise(p) + offset) * pwr * value;
+ value += increment;
+ pwr *= pwHL;
+ p *= lacunarity;
+ }
+
+ float rmd = octaves - floor(octaves);
+ if (rmd != 0.0) {
+ float increment = (safe_snoise(p) + offset) * pwr * value;
+ value += rmd * increment;
+ }
+
+ return value;
+}
+
+/* 2D Hybrid Additive/Multiplicative Multifractal Terrain
+ *
+ * H: fractal dimension of the roughest area
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ * offset: raises the terrain from `sea level'
+ */
+
+float noise_musgrave_hybrid_multi_fractal_2d(
+ vector2 co, float H, float lacunarity, float octaves, float offset, float gain)
+{
+ vector2 p = co;
+ float pwHL = pow(lacunarity, -H);
+ float pwr = pwHL;
+
+ float value = safe_snoise(p) + offset;
+ float weight = gain * value;
+ p *= lacunarity;
+
+ for (int i = 1; (weight > 0.001) && (i < (int)octaves); i++) {
+ if (weight > 1.0) {
+ weight = 1.0;
+ }
+
+ float signal = (safe_snoise(p) + offset) * pwr;
+ pwr *= pwHL;
+ value += weight * signal;
+ weight *= gain * signal;
+ p *= lacunarity;
+ }
+
+ float rmd = octaves - floor(octaves);
+ if (rmd != 0.0) {
+ value += rmd * ((safe_snoise(p) + offset) * pwr);
+ }
+
+ return value;
+}
+
+/* 2D Ridged Multifractal Terrain
+ *
+ * H: fractal dimension of the roughest area
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ * offset: raises the terrain from `sea level'
+ */
+
+float noise_musgrave_ridged_multi_fractal_2d(
+ vector2 co, float H, float lacunarity, float octaves, float offset, float gain)
+{
+ vector2 p = co;
+ float pwHL = pow(lacunarity, -H);
+ float pwr = pwHL;
+
+ float signal = offset - fabs(safe_snoise(p));
+ signal *= signal;
+ float value = signal;
+ float weight = 1.0;
+
+ for (int i = 1; i < (int)octaves; i++) {
+ p *= lacunarity;
+ weight = clamp(signal * gain, 0.0, 1.0);
+ signal = offset - fabs(safe_snoise(p));
+ signal *= signal;
+ signal *= weight;
+ value += signal * pwr;
+ pwr *= pwHL;
+ }
+
+ return value;
+}
+
+/* 3D Musgrave fBm
*
* H: fractal increment parameter
* lacunarity: gap between successive frequencies
@@ -26,58 +360,56 @@
* from "Texturing and Modelling: A procedural approach"
*/
-float noise_musgrave_fBm(point ip, float H, float lacunarity, float octaves)
+float noise_musgrave_fBm_3d(vector3 co, float H, float lacunarity, float octaves)
{
- float rmd;
+ vector3 p = co;
float value = 0.0;
float pwr = 1.0;
float pwHL = pow(lacunarity, -H);
- int i;
- point p = ip;
- for (i = 0; i < (int)octaves; i++) {
- value += safe_noise(p, "signed") * pwr;
+ for (int i = 0; i < (int)octaves; i++) {
+ value += safe_snoise(p) * pwr;
pwr *= pwHL;
p *= lacunarity;
}
- rmd = octaves - floor(octaves);
- if (rmd != 0.0)
- value += rmd * safe_noise(p, "signed") * pwr;
+ float rmd = octaves - floor(octaves);
+ if (rmd != 0.0) {
+ value += rmd * safe_snoise(p) * pwr;
+ }
return value;
}
-/* Musgrave Multifractal
+/* 3D Musgrave Multifractal
*
* H: highest fractal dimension
* lacunarity: gap between successive frequencies
* octaves: number of frequencies in the fBm
*/
-float noise_musgrave_multi_fractal(point ip, float H, float lacunarity, float octaves)
+float noise_musgrave_multi_fractal_3d(vector3 co, float H, float lacunarity, float octaves)
{
- float rmd;
+ vector3 p = co;
float value = 1.0;
float pwr = 1.0;
float pwHL = pow(lacunarity, -H);
- int i;
- point p = ip;
- for (i = 0; i < (int)octaves; i++) {
- value *= (pwr * safe_noise(p, "signed") + 1.0);
+ for (int i = 0; i < (int)octaves; i++) {
+ value *= (pwr * safe_snoise(p) + 1.0);
pwr *= pwHL;
p *= lacunarity;
}
- rmd = octaves - floor(octaves);
- if (rmd != 0.0)
- value *= (rmd * pwr * safe_noise(p, "signed") + 1.0); /* correct? */
+ float rmd = octaves - floor(octaves);
+ if (rmd != 0.0) {
+ value *= (rmd * pwr * safe_snoise(p) + 1.0); /* correct? */
+ }
return value;
}
-/* Musgrave Heterogeneous Terrain
+/* 3D Musgrave Heterogeneous Terrain
*
* H: fractal dimension of the roughest area
* lacunarity: gap between successive frequencies
@@ -85,36 +417,34 @@ float noise_musgrave_multi_fractal(point ip, float H, float lacunarity, float oc
* offset: raises the terrain from `sea level'
*/
-float noise_musgrave_hetero_terrain(
- point ip, float H, float lacunarity, float octaves, float offset)
+float noise_musgrave_hetero_terrain_3d(
+ vector3 co, float H, float lacunarity, float octaves, float offset)
{
- float value, increment, rmd;
+ vector3 p = co;
float pwHL = pow(lacunarity, -H);
float pwr = pwHL;
- int i;
- point p = ip;
/* first unscaled octave of function; later octaves are scaled */
- value = offset + safe_noise(p, "signed");
+ float value = offset + safe_snoise(p);
p *= lacunarity;
- for (i = 1; i < (int)octaves; i++) {
- increment = (safe_noise(p, "signed") + offset) * pwr * value;
+ for (int i = 1; i < (int)octaves; i++) {
+ float increment = (safe_snoise(p) + offset) * pwr * value;
value += increment;
pwr *= pwHL;
p *= lacunarity;
}
- rmd = octaves - floor(octaves);
+ float rmd = octaves - floor(octaves);
if (rmd != 0.0) {
- increment = (safe_noise(p, "signed") + offset) * pwr * value;
+ float increment = (safe_snoise(p) + offset) * pwr * value;
value += rmd * increment;
}
return value;
}
-/* Hybrid Additive/Multiplicative Multifractal Terrain
+/* 3D Hybrid Additive/Multiplicative Multifractal Terrain
*
* H: fractal dimension of the roughest area
* lacunarity: gap between successive frequencies
@@ -122,38 +452,38 @@ float noise_musgrave_hetero_terrain(
* offset: raises the terrain from `sea level'
*/
-float noise_musgrave_hybrid_multi_fractal(
- point ip, float H, float lacunarity, float octaves, float offset, float gain)
+float noise_musgrave_hybrid_multi_fractal_3d(
+ vector3 co, float H, float lacunarity, float octaves, float offset, float gain)
{
- float result, signal, weight, rmd;
+ vector3 p = co;
float pwHL = pow(lacunarity, -H);
float pwr = pwHL;
- int i;
- point p = ip;
- result = safe_noise(p, "signed") + offset;
- weight = gain * result;
+ float value = safe_snoise(p) + offset;
+ float weight = gain * value;
p *= lacunarity;
- for (i = 1; (weight > 0.001) && (i < (int)octaves); i++) {
- if (weight > 1.0)
+ for (int i = 1; (weight > 0.001) && (i < (int)octaves); i++) {
+ if (weight > 1.0) {
weight = 1.0;
+ }
- signal = (safe_noise(p, "signed") + offset) * pwr;
+ float signal = (safe_snoise(p) + offset) * pwr;
pwr *= pwHL;
- result += weight * signal;
+ value += weight * signal;
weight *= gain * signal;
p *= lacunarity;
}
- rmd = octaves - floor(octaves);
- if (rmd != 0.0)
- result += rmd * ((safe_noise(p, "signed") + offset) * pwr);
+ float rmd = octaves - floor(octaves);
+ if (rmd != 0.0) {
+ value += rmd * ((safe_snoise(p) + offset) * pwr);
+ }
- return result;
+ return value;
}
-/* Ridged Multifractal Terrain
+/* 3D Ridged Multifractal Terrain
*
* H: fractal dimension of the roughest area
* lacunarity: gap between successive frequencies
@@ -161,73 +491,313 @@ float noise_musgrave_hybrid_multi_fractal(
* offset: raises the terrain from `sea level'
*/
-float noise_musgrave_ridged_multi_fractal(
- point ip, float H, float lacunarity, float octaves, float offset, float gain)
+float noise_musgrave_ridged_multi_fractal_3d(
+ vector3 co, float H, float lacunarity, float octaves, float offset, float gain)
{
- float result, signal, weight;
+ vector3 p = co;
float pwHL = pow(lacunarity, -H);
float pwr = pwHL;
- int i;
- point p = ip;
- signal = offset - fabs(safe_noise(p, "signed"));
+ float signal = offset - fabs(safe_snoise(p));
signal *= signal;
- result = signal;
- weight = 1.0;
+ float value = signal;
+ float weight = 1.0;
- for (i = 1; i < (int)octaves; i++) {
+ for (int i = 1; i < (int)octaves; i++) {
p *= lacunarity;
weight = clamp(signal * gain, 0.0, 1.0);
- signal = offset - fabs(safe_noise(p, "signed"));
+ signal = offset - fabs(safe_snoise(p));
signal *= signal;
signal *= weight;
- result += signal * pwr;
+ value += signal * pwr;
pwr *= pwHL;
}
- return result;
+ return value;
}
-/* Shader */
+/* 4D Musgrave fBm
+ *
+ * H: fractal increment parameter
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ *
+ * from "Texturing and Modelling: A procedural approach"
+ */
+
+float noise_musgrave_fBm_4d(vector4 co, float H, float lacunarity, float octaves)
+{
+ vector4 p = co;
+ float value = 0.0;
+ float pwr = 1.0;
+ float pwHL = pow(lacunarity, -H);
+
+ for (int i = 0; i < (int)octaves; i++) {
+ value += safe_snoise(p) * pwr;
+ pwr *= pwHL;
+ p *= lacunarity;
+ }
+
+ float rmd = octaves - floor(octaves);
+ if (rmd != 0.0) {
+ value += rmd * safe_snoise(p) * pwr;
+ }
+
+ return value;
+}
+
+/* 4D Musgrave Multifractal
+ *
+ * H: highest fractal dimension
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ */
+
+float noise_musgrave_multi_fractal_4d(vector4 co, float H, float lacunarity, float octaves)
+{
+ vector4 p = co;
+ float value = 1.0;
+ float pwr = 1.0;
+ float pwHL = pow(lacunarity, -H);
+
+ for (int i = 0; i < (int)octaves; i++) {
+ value *= (pwr * safe_snoise(p) + 1.0);
+ pwr *= pwHL;
+ p *= lacunarity;
+ }
+
+ float rmd = octaves - floor(octaves);
+ if (rmd != 0.0) {
+ value *= (rmd * pwr * safe_snoise(p) + 1.0); /* correct? */
+ }
+
+ return value;
+}
+
+/* 4D Musgrave Heterogeneous Terrain
+ *
+ * H: fractal dimension of the roughest area
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ * offset: raises the terrain from `sea level'
+ */
+
+float noise_musgrave_hetero_terrain_4d(
+ vector4 co, float H, float lacunarity, float octaves, float offset)
+{
+ vector4 p = co;
+ float pwHL = pow(lacunarity, -H);
+ float pwr = pwHL;
+
+ /* first unscaled octave of function; later octaves are scaled */
+ float value = offset + safe_snoise(p);
+ p *= lacunarity;
+
+ for (int i = 1; i < (int)octaves; i++) {
+ float increment = (safe_snoise(p) + offset) * pwr * value;
+ value += increment;
+ pwr *= pwHL;
+ p *= lacunarity;
+ }
+
+ float rmd = octaves - floor(octaves);
+ if (rmd != 0.0) {
+ float increment = (safe_snoise(p) + offset) * pwr * value;
+ value += rmd * increment;
+ }
+
+ return value;
+}
+
+/* 4D Hybrid Additive/Multiplicative Multifractal Terrain
+ *
+ * H: fractal dimension of the roughest area
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ * offset: raises the terrain from `sea level'
+ */
+
+float noise_musgrave_hybrid_multi_fractal_4d(
+ vector4 co, float H, float lacunarity, float octaves, float offset, float gain)
+{
+ vector4 p = co;
+ float pwHL = pow(lacunarity, -H);
+ float pwr = pwHL;
+
+ float value = safe_snoise(p) + offset;
+ float weight = gain * value;
+ p *= lacunarity;
+
+ for (int i = 1; (weight > 0.001) && (i < (int)octaves); i++) {
+ if (weight > 1.0) {
+ weight = 1.0;
+ }
+
+ float signal = (safe_snoise(p) + offset) * pwr;
+ pwr *= pwHL;
+ value += weight * signal;
+ weight *= gain * signal;
+ p *= lacunarity;
+ }
+
+ float rmd = octaves - floor(octaves);
+ if (rmd != 0.0) {
+ value += rmd * ((safe_snoise(p) + offset) * pwr);
+ }
+
+ return value;
+}
+
+/* 4D Ridged Multifractal Terrain
+ *
+ * H: fractal dimension of the roughest area
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ * offset: raises the terrain from `sea level'
+ */
+
+float noise_musgrave_ridged_multi_fractal_4d(
+ vector4 co, float H, float lacunarity, float octaves, float offset, float gain)
+{
+ vector4 p = co;
+ float pwHL = pow(lacunarity, -H);
+ float pwr = pwHL;
+
+ float signal = offset - fabs(safe_snoise(p));
+ signal *= signal;
+ float value = signal;
+ float weight = 1.0;
+
+ for (int i = 1; i < (int)octaves; i++) {
+ p *= lacunarity;
+ weight = clamp(signal * gain, 0.0, 1.0);
+ signal = offset - fabs(safe_snoise(p));
+ signal *= signal;
+ signal *= weight;
+ value += signal * pwr;
+ pwr *= pwHL;
+ }
+
+ return value;
+}
shader node_musgrave_texture(
int use_mapping = 0,
matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
string type = "fBM",
+ string dimensions = "3D",
+ point Vector = P,
+ float W = 0.0,
float Dimension = 2.0,
- float Lacunarity = 1.0,
+ float Scale = 5.0,
float Detail = 2.0,
+ float Lacunarity = 2.0,
float Offset = 0.0,
float Gain = 1.0,
- float Scale = 5.0,
- point Vector = P,
- output float Fac = 0.0,
- output color Color = 0.0)
+ output float Fac = 0.0)
{
float dimension = max(Dimension, 1e-5);
float octaves = clamp(Detail, 0.0, 16.0);
float lacunarity = max(Lacunarity, 1e-5);
- float intensity = 1.0;
- point p = Vector;
+ vector3 s = Vector;
if (use_mapping)
- p = transform(mapping, p);
-
- p = p * Scale;
-
- if (type == "multifractal")
- Fac = intensity * noise_musgrave_multi_fractal(p, dimension, lacunarity, octaves);
- else if (type == "fBM")
- Fac = intensity * noise_musgrave_fBm(p, dimension, lacunarity, octaves);
- else if (type == "hybrid_multifractal")
- Fac = intensity *
- noise_musgrave_hybrid_multi_fractal(p, dimension, lacunarity, octaves, Offset, Gain);
- else if (type == "ridged_multifractal")
- Fac = intensity *
- noise_musgrave_ridged_multi_fractal(p, dimension, lacunarity, octaves, Offset, Gain);
- else if (type == "hetero_terrain")
- Fac = intensity * noise_musgrave_hetero_terrain(p, dimension, lacunarity, octaves, Offset);
-
- Color = color(Fac, Fac, Fac);
+ s = transform(mapping, s);
+
+ if (dimensions == "1D") {
+ float p = W * Scale;
+ if (type == "multifractal") {
+ Fac = noise_musgrave_multi_fractal_1d(p, dimension, lacunarity, octaves);
+ }
+ else if (type == "fBM") {
+ Fac = noise_musgrave_fBm_1d(p, dimension, lacunarity, octaves);
+ }
+ else if (type == "hybrid_multifractal") {
+ Fac = noise_musgrave_hybrid_multi_fractal_1d(
+ p, dimension, lacunarity, octaves, Offset, Gain);
+ }
+ else if (type == "ridged_multifractal") {
+ Fac = noise_musgrave_ridged_multi_fractal_1d(
+ p, dimension, lacunarity, octaves, Offset, Gain);
+ }
+ else if (type == "hetero_terrain") {
+ Fac = noise_musgrave_hetero_terrain_1d(p, dimension, lacunarity, octaves, Offset);
+ }
+ else {
+ Fac = 0.0;
+ }
+ }
+ else if (dimensions == "2D") {
+ vector2 p = vector2(s[0], s[1]) * Scale;
+ if (type == "multifractal") {
+ Fac = noise_musgrave_multi_fractal_2d(p, dimension, lacunarity, octaves);
+ }
+ else if (type == "fBM") {
+ Fac = noise_musgrave_fBm_2d(p, dimension, lacunarity, octaves);
+ }
+ else if (type == "hybrid_multifractal") {
+ Fac = noise_musgrave_hybrid_multi_fractal_2d(
+ p, dimension, lacunarity, octaves, Offset, Gain);
+ }
+ else if (type == "ridged_multifractal") {
+ Fac = noise_musgrave_ridged_multi_fractal_2d(
+ p, dimension, lacunarity, octaves, Offset, Gain);
+ }
+ else if (type == "hetero_terrain") {
+ Fac = noise_musgrave_hetero_terrain_2d(p, dimension, lacunarity, octaves, Offset);
+ }
+ else {
+ Fac = 0.0;
+ }
+ }
+ else if (dimensions == "3D") {
+ vector3 p = s * Scale;
+ if (type == "multifractal") {
+ Fac = noise_musgrave_multi_fractal_3d(p, dimension, lacunarity, octaves);
+ }
+ else if (type == "fBM") {
+ Fac = noise_musgrave_fBm_3d(p, dimension, lacunarity, octaves);
+ }
+ else if (type == "hybrid_multifractal") {
+ Fac = noise_musgrave_hybrid_multi_fractal_3d(
+ p, dimension, lacunarity, octaves, Offset, Gain);
+ }
+ else if (type == "ridged_multifractal") {
+ Fac = noise_musgrave_ridged_multi_fractal_3d(
+ p, dimension, lacunarity, octaves, Offset, Gain);
+ }
+ else if (type == "hetero_terrain") {
+ Fac = noise_musgrave_hetero_terrain_3d(p, dimension, lacunarity, octaves, Offset);
+ }
+ else {
+ Fac = 0.0;
+ }
+ }
+ else if (dimensions == "4D") {
+ vector4 p = vector4(s[0], s[1], s[2], W) * Scale;
+ if (type == "multifractal") {
+ Fac = noise_musgrave_multi_fractal_4d(p, dimension, lacunarity, octaves);
+ }
+ else if (type == "fBM") {
+ Fac = noise_musgrave_fBm_4d(p, dimension, lacunarity, octaves);
+ }
+ else if (type == "hybrid_multifractal") {
+ Fac = noise_musgrave_hybrid_multi_fractal_4d(
+ p, dimension, lacunarity, octaves, Offset, Gain);
+ }
+ else if (type == "ridged_multifractal") {
+ Fac = noise_musgrave_ridged_multi_fractal_4d(
+ p, dimension, lacunarity, octaves, Offset, Gain);
+ }
+ else if (type == "hetero_terrain") {
+ Fac = noise_musgrave_hetero_terrain_4d(p, dimension, lacunarity, octaves, Offset);
+ }
+ else {
+ Fac = 0.0;
+ }
+ }
+ else {
+ Fac = 0.0;
+ }
}
diff --git a/intern/cycles/kernel/shaders/node_noise.h b/intern/cycles/kernel/shaders/node_noise.h
new file mode 100644
index 00000000000..ab4cd7792cc
--- /dev/null
+++ b/intern/cycles/kernel/shaders/node_noise.h
@@ -0,0 +1,202 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "vector2.h"
+#include "vector4.h"
+
+#define vector3 point
+
+float safe_noise(float p)
+{
+ float f = noise("noise", p);
+ if (isinf(f))
+ return 0.5;
+ return f;
+}
+
+float safe_noise(vector2 p)
+{
+ float f = noise("noise", p.x, p.y);
+ if (isinf(f))
+ return 0.5;
+ return f;
+}
+
+float safe_noise(vector3 p)
+{
+ float f = noise("noise", p);
+ if (isinf(f))
+ return 0.5;
+ return f;
+}
+
+float safe_noise(vector4 p)
+{
+ float f = noise("noise", vector3(p.x, p.y, p.z), p.w);
+ if (isinf(f))
+ return 0.5;
+ return f;
+}
+
+float safe_snoise(float p)
+{
+ float f = noise("snoise", p);
+ if (isinf(f))
+ return 0.0;
+ return f;
+}
+
+float safe_snoise(vector2 p)
+{
+ float f = noise("snoise", p.x, p.y);
+ if (isinf(f))
+ return 0.0;
+ return f;
+}
+
+float safe_snoise(vector3 p)
+{
+ float f = noise("snoise", p);
+ if (isinf(f))
+ return 0.0;
+ return f;
+}
+
+float safe_snoise(vector4 p)
+{
+ float f = noise("snoise", vector3(p.x, p.y, p.z), p.w);
+ if (isinf(f))
+ return 0.0;
+ return f;
+}
+
+/* The fractal_noise functions are all exactly the same except for the input type. */
+float fractal_noise(float p, float details, float roughness)
+{
+ float fscale = 1.0;
+ float amp = 1.0;
+ float maxamp = 0.0;
+ float sum = 0.0;
+ float octaves = clamp(details, 0.0, 16.0);
+ int n = (int)octaves;
+ for (int i = 0; i <= n; i++) {
+ float t = safe_noise(fscale * p);
+ sum += t * amp;
+ maxamp += amp;
+ amp *= clamp(roughness, 0.0, 1.0);
+ fscale *= 2.0;
+ }
+ float rmd = octaves - floor(octaves);
+ if (rmd != 0.0) {
+ float t = safe_noise(fscale * p);
+ float sum2 = sum + t * amp;
+ sum /= maxamp;
+ sum2 /= maxamp + amp;
+ return (1.0 - rmd) * sum + rmd * sum2;
+ }
+ else {
+ return sum / maxamp;
+ }
+}
+
+/* The fractal_noise functions are all exactly the same except for the input type. */
+float fractal_noise(vector2 p, float details, float roughness)
+{
+ float fscale = 1.0;
+ float amp = 1.0;
+ float maxamp = 0.0;
+ float sum = 0.0;
+ float octaves = clamp(details, 0.0, 16.0);
+ int n = (int)octaves;
+ for (int i = 0; i <= n; i++) {
+ float t = safe_noise(fscale * p);
+ sum += t * amp;
+ maxamp += amp;
+ amp *= clamp(roughness, 0.0, 1.0);
+ fscale *= 2.0;
+ }
+ float rmd = octaves - floor(octaves);
+ if (rmd != 0.0) {
+ float t = safe_noise(fscale * p);
+ float sum2 = sum + t * amp;
+ sum /= maxamp;
+ sum2 /= maxamp + amp;
+ return (1.0 - rmd) * sum + rmd * sum2;
+ }
+ else {
+ return sum / maxamp;
+ }
+}
+
+/* The fractal_noise functions are all exactly the same except for the input type. */
+float fractal_noise(vector3 p, float details, float roughness)
+{
+ float fscale = 1.0;
+ float amp = 1.0;
+ float maxamp = 0.0;
+ float sum = 0.0;
+ float octaves = clamp(details, 0.0, 16.0);
+ int n = (int)octaves;
+ for (int i = 0; i <= n; i++) {
+ float t = safe_noise(fscale * p);
+ sum += t * amp;
+ maxamp += amp;
+ amp *= clamp(roughness, 0.0, 1.0);
+ fscale *= 2.0;
+ }
+ float rmd = octaves - floor(octaves);
+ if (rmd != 0.0) {
+ float t = safe_noise(fscale * p);
+ float sum2 = sum + t * amp;
+ sum /= maxamp;
+ sum2 /= maxamp + amp;
+ return (1.0 - rmd) * sum + rmd * sum2;
+ }
+ else {
+ return sum / maxamp;
+ }
+}
+
+/* The fractal_noise functions are all exactly the same except for the input type. */
+float fractal_noise(vector4 p, float details, float roughness)
+{
+ float fscale = 1.0;
+ float amp = 1.0;
+ float maxamp = 0.0;
+ float sum = 0.0;
+ float octaves = clamp(details, 0.0, 16.0);
+ int n = (int)octaves;
+ for (int i = 0; i <= n; i++) {
+ float t = safe_noise(fscale * p);
+ sum += t * amp;
+ maxamp += amp;
+ amp *= clamp(roughness, 0.0, 1.0);
+ fscale *= 2.0;
+ }
+ float rmd = octaves - floor(octaves);
+ if (rmd != 0.0) {
+ float t = safe_noise(fscale * p);
+ float sum2 = sum + t * amp;
+ sum /= maxamp;
+ sum2 /= maxamp + amp;
+ return (1.0 - rmd) * sum + rmd * sum2;
+ }
+ else {
+ return sum / maxamp;
+ }
+}
+
+#undef vector3
diff --git a/intern/cycles/kernel/shaders/node_noise_texture.osl b/intern/cycles/kernel/shaders/node_noise_texture.osl
index 2cbd571e206..61c0216910b 100644
--- a/intern/cycles/kernel/shaders/node_noise_texture.osl
+++ b/intern/cycles/kernel/shaders/node_noise_texture.osl
@@ -14,47 +14,139 @@
* limitations under the License.
*/
-#include "stdosl.h"
-#include "node_texture.h"
+#include "node_noise.h"
+#include "stdcycles.h"
+#include "vector2.h"
+#include "vector4.h"
-/* Noise */
+#define vector3 point
-float noise(point ip, float distortion, float detail, output color Color)
+/* The following offset functions generate random offsets to be added to texture
+ * coordinates to act as a seed since the noise functions don't have seed values.
+ * A seed value is needed for generating distortion textures and color outputs.
+ * The offset's components are in the range [100, 200], not too high to cause
+ * bad precision and not to small to be noticeable. We use float seed because
+ * OSL only support float hashes.
+ */
+
+float random_float_offset(float seed)
+{
+ return 100.0 + noise("hash", seed) * 100.0;
+}
+
+vector2 random_vector2_offset(float seed)
+{
+ return vector2(100.0 + noise("hash", seed, 0.0) * 100.0,
+ 100.0 + noise("hash", seed, 1.0) * 100.0);
+}
+
+vector3 random_vector3_offset(float seed)
{
- point r;
- point p = ip;
- int hard = 0;
+ return vector3(100.0 + noise("hash", seed, 0.0) * 100.0,
+ 100.0 + noise("hash", seed, 1.0) * 100.0,
+ 100.0 + noise("hash", seed, 2.0) * 100.0);
+}
+
+vector4 random_vector4_offset(float seed)
+{
+ return vector4(100.0 + noise("hash", seed, 0.0) * 100.0,
+ 100.0 + noise("hash", seed, 1.0) * 100.0,
+ 100.0 + noise("hash", seed, 2.0) * 100.0,
+ 100.0 + noise("hash", seed, 3.0) * 100.0);
+}
+
+float noise_texture(float co, float detail, float roughness, float distortion, output color Color)
+{
+ float p = co;
+ if (distortion != 0.0) {
+ p += safe_snoise(p + random_float_offset(0.0)) * distortion;
+ }
+ float value = fractal_noise(p, detail, roughness);
+ Color = color(value,
+ fractal_noise(p + random_float_offset(1.0), detail, roughness),
+ fractal_noise(p + random_float_offset(2.0), detail, roughness));
+ return value;
+}
+
+float noise_texture(
+ vector2 co, float detail, float roughness, float distortion, output color Color)
+{
+ vector2 p = co;
if (distortion != 0.0) {
- r[0] = safe_noise(p + point(13.5), "unsigned") * distortion;
- r[1] = safe_noise(p, "unsigned") * distortion;
- r[2] = safe_noise(p - point(13.5), "unsigned") * distortion;
+ p += vector2(safe_snoise(p + random_vector2_offset(0.0)) * distortion,
+ safe_snoise(p + random_vector2_offset(1.0)) * distortion);
+ }
- p += r;
+ float value = fractal_noise(p, detail, roughness);
+ Color = color(value,
+ fractal_noise(p + random_vector2_offset(2.0), detail, roughness),
+ fractal_noise(p + random_vector2_offset(3.0), detail, roughness));
+ return value;
+}
+
+float noise_texture(
+ vector3 co, float detail, float roughness, float distortion, output color Color)
+{
+ vector3 p = co;
+ if (distortion != 0.0) {
+ p += vector3(safe_snoise(p + random_vector3_offset(0.0)) * distortion,
+ safe_snoise(p + random_vector3_offset(1.0)) * distortion,
+ safe_snoise(p + random_vector3_offset(2.0)) * distortion);
}
- float fac = noise_turbulence(p, detail, hard);
+ float value = fractal_noise(p, detail, roughness);
+ Color = color(value,
+ fractal_noise(p + random_vector3_offset(3.0), detail, roughness),
+ fractal_noise(p + random_vector3_offset(4.0), detail, roughness));
+ return value;
+}
- Color = color(fac,
- noise_turbulence(point(p[1], p[0], p[2]), detail, hard),
- noise_turbulence(point(p[1], p[2], p[0]), detail, hard));
+float noise_texture(
+ vector4 co, float detail, float roughness, float distortion, output color Color)
+{
+ vector4 p = co;
+ if (distortion != 0.0) {
+ p += vector4(safe_snoise(p + random_vector4_offset(0.0)) * distortion,
+ safe_snoise(p + random_vector4_offset(1.0)) * distortion,
+ safe_snoise(p + random_vector4_offset(2.0)) * distortion,
+ safe_snoise(p + random_vector4_offset(3.0)) * distortion);
+ }
- return fac;
+ float value = fractal_noise(p, detail, roughness);
+ Color = color(value,
+ fractal_noise(p + random_vector4_offset(4.0), detail, roughness),
+ fractal_noise(p + random_vector4_offset(5.0), detail, roughness));
+ return value;
}
shader node_noise_texture(int use_mapping = 0,
matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
- float Distortion = 0.0,
+ string dimensions = "3D",
+ vector3 Vector = vector3(0, 0, 0),
+ float W = 0.0,
float Scale = 5.0,
float Detail = 2.0,
- point Vector = P,
+ float Roughness = 0.5,
+ float Distortion = 0.0,
output float Fac = 0.0,
output color Color = 0.0)
{
- point p = Vector;
-
+ vector3 p = Vector;
if (use_mapping)
p = transform(mapping, p);
- Fac = noise(p * Scale, Distortion, Detail, Color);
+ p *= Scale;
+ float w = W * Scale;
+
+ if (dimensions == "1D")
+ Fac = noise_texture(w, Detail, Roughness, Distortion, Color);
+ else if (dimensions == "2D")
+ Fac = noise_texture(vector2(p[0], p[1]), Detail, Roughness, Distortion, Color);
+ else if (dimensions == "3D")
+ Fac = noise_texture(p, Detail, Roughness, Distortion, Color);
+ else if (dimensions == "4D")
+ Fac = noise_texture(vector4(p[0], p[1], p[2], w), Detail, Roughness, Distortion, Color);
+ else
+ error("Unknown dimension!");
}
diff --git a/intern/cycles/kernel/shaders/node_normal.osl b/intern/cycles/kernel/shaders/node_normal.osl
index 1d20c3e7cac..a0a88445427 100644
--- a/intern/cycles/kernel/shaders/node_normal.osl
+++ b/intern/cycles/kernel/shaders/node_normal.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_normal(normal direction = normal(0.0, 0.0, 0.0),
normal NormalIn = normal(0.0, 0.0, 0.0),
diff --git a/intern/cycles/kernel/shaders/node_normal_map.osl b/intern/cycles/kernel/shaders/node_normal_map.osl
index 90b593d00bc..912960f13ab 100644
--- a/intern/cycles/kernel/shaders/node_normal_map.osl
+++ b/intern/cycles/kernel/shaders/node_normal_map.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_normal_map(normal NormalIn = N,
float Strength = 1.0,
diff --git a/intern/cycles/kernel/shaders/node_object_info.osl b/intern/cycles/kernel/shaders/node_object_info.osl
index 0904a30a53f..44513d9a1ba 100644
--- a/intern/cycles/kernel/shaders/node_object_info.osl
+++ b/intern/cycles/kernel/shaders/node_object_info.osl
@@ -14,14 +14,16 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_object_info(output point Location = point(0.0, 0.0, 0.0),
+ output color Color = color(1.0, 1.0, 1.0),
output float ObjectIndex = 0.0,
output float MaterialIndex = 0.0,
output float Random = 0.0)
{
getattribute("object:location", Location);
+ getattribute("object:color", Color);
getattribute("object:index", ObjectIndex);
getattribute("material:index", MaterialIndex);
getattribute("object:random", Random);
diff --git a/intern/cycles/kernel/shaders/node_output_displacement.osl b/intern/cycles/kernel/shaders/node_output_displacement.osl
index fa7f603980b..bd60fc2b7e1 100644
--- a/intern/cycles/kernel/shaders/node_output_displacement.osl
+++ b/intern/cycles/kernel/shaders/node_output_displacement.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
displacement node_output_displacement(vector Displacement = 0.0)
{
diff --git a/intern/cycles/kernel/shaders/node_output_surface.osl b/intern/cycles/kernel/shaders/node_output_surface.osl
index 013666145da..cd746f79c4a 100644
--- a/intern/cycles/kernel/shaders/node_output_surface.osl
+++ b/intern/cycles/kernel/shaders/node_output_surface.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
surface node_output_surface(closure color Surface = 0)
{
diff --git a/intern/cycles/kernel/shaders/node_output_volume.osl b/intern/cycles/kernel/shaders/node_output_volume.osl
index dd479e751b3..4cc14cd6699 100644
--- a/intern/cycles/kernel/shaders/node_output_volume.osl
+++ b/intern/cycles/kernel/shaders/node_output_volume.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
volume node_output_volume(closure color Volume = 0)
{
diff --git a/intern/cycles/kernel/shaders/node_particle_info.osl b/intern/cycles/kernel/shaders/node_particle_info.osl
index e286c33a1ff..2dcdf3d0f3c 100644
--- a/intern/cycles/kernel/shaders/node_particle_info.osl
+++ b/intern/cycles/kernel/shaders/node_particle_info.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_particle_info(output float Index = 0.0,
output float Random = 0.0,
diff --git a/intern/cycles/kernel/shaders/node_principled_bsdf.osl b/intern/cycles/kernel/shaders/node_principled_bsdf.osl
index 657ced9b6e6..1711811ac65 100644
--- a/intern/cycles/kernel/shaders/node_principled_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_principled_bsdf.osl
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include "stdosl.h"
#include "node_fresnel.h"
+#include "stdcycles.h"
shader node_principled_bsdf(string distribution = "Multiscatter GGX",
string subsurface_method = "burley",
diff --git a/intern/cycles/kernel/shaders/node_principled_hair_bsdf.osl b/intern/cycles/kernel/shaders/node_principled_hair_bsdf.osl
index bf986438fca..4cf17e0e703 100644
--- a/intern/cycles/kernel/shaders/node_principled_hair_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_principled_hair_bsdf.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
color log3(color a)
{
diff --git a/intern/cycles/kernel/shaders/node_principled_volume.osl b/intern/cycles/kernel/shaders/node_principled_volume.osl
index 39cf6837eb2..0cb4cdebdaa 100644
--- a/intern/cycles/kernel/shaders/node_principled_volume.osl
+++ b/intern/cycles/kernel/shaders/node_principled_volume.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_principled_volume(color Color = color(0.5, 0.5, 0.5),
float Density = 1.0,
diff --git a/intern/cycles/kernel/shaders/node_refraction_bsdf.osl b/intern/cycles/kernel/shaders/node_refraction_bsdf.osl
index 941d99dd44d..9e9b31d9a87 100644
--- a/intern/cycles/kernel/shaders/node_refraction_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_refraction_bsdf.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_refraction_bsdf(color Color = 0.8,
string distribution = "sharp",
diff --git a/intern/cycles/kernel/shaders/node_rgb_curves.osl b/intern/cycles/kernel/shaders/node_rgb_curves.osl
index e34eb027cc3..8850040d580 100644
--- a/intern/cycles/kernel/shaders/node_rgb_curves.osl
+++ b/intern/cycles/kernel/shaders/node_rgb_curves.osl
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include "stdosl.h"
#include "node_ramp_util.h"
+#include "stdcycles.h"
shader node_rgb_curves(color ramp[] = {0.0},
float min_x = 0.0,
diff --git a/intern/cycles/kernel/shaders/node_rgb_ramp.osl b/intern/cycles/kernel/shaders/node_rgb_ramp.osl
index c9f9746a4fb..2131edb2688 100644
--- a/intern/cycles/kernel/shaders/node_rgb_ramp.osl
+++ b/intern/cycles/kernel/shaders/node_rgb_ramp.osl
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include "stdosl.h"
#include "node_ramp_util.h"
+#include "stdcycles.h"
shader node_rgb_ramp(color ramp_color[] = {0.0},
float ramp_alpha[] = {0.0},
diff --git a/intern/cycles/kernel/shaders/node_rgb_to_bw.osl b/intern/cycles/kernel/shaders/node_rgb_to_bw.osl
index 837d6caf5fc..f0a094d5b57 100644
--- a/intern/cycles/kernel/shaders/node_rgb_to_bw.osl
+++ b/intern/cycles/kernel/shaders/node_rgb_to_bw.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_rgb_to_bw(color Color = 0.0, output float Val = 0.0)
{
diff --git a/intern/cycles/kernel/shaders/node_scatter_volume.osl b/intern/cycles/kernel/shaders/node_scatter_volume.osl
index fce5716f372..36ad952dee6 100644
--- a/intern/cycles/kernel/shaders/node_scatter_volume.osl
+++ b/intern/cycles/kernel/shaders/node_scatter_volume.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_scatter_volume(color Color = color(0.8, 0.8, 0.8),
float Density = 1.0,
diff --git a/intern/cycles/kernel/shaders/node_separate_hsv.osl b/intern/cycles/kernel/shaders/node_separate_hsv.osl
index c77ed1f3755..2f902b72dbc 100644
--- a/intern/cycles/kernel/shaders/node_separate_hsv.osl
+++ b/intern/cycles/kernel/shaders/node_separate_hsv.osl
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include "stdosl.h"
#include "node_color.h"
+#include "stdcycles.h"
shader node_separate_hsv(color Color = 0.8,
output float H = 0.0,
diff --git a/intern/cycles/kernel/shaders/node_separate_rgb.osl b/intern/cycles/kernel/shaders/node_separate_rgb.osl
index ee64add27e2..62e4aedb879 100644
--- a/intern/cycles/kernel/shaders/node_separate_rgb.osl
+++ b/intern/cycles/kernel/shaders/node_separate_rgb.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_separate_rgb(color Image = 0.8,
output float R = 0.0,
diff --git a/intern/cycles/kernel/shaders/node_separate_xyz.osl b/intern/cycles/kernel/shaders/node_separate_xyz.osl
index 8a563f5e920..acaf3942b6f 100644
--- a/intern/cycles/kernel/shaders/node_separate_xyz.osl
+++ b/intern/cycles/kernel/shaders/node_separate_xyz.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_separate_xyz(vector Vector = 0.8,
output float X = 0.0,
diff --git a/intern/cycles/kernel/shaders/node_set_normal.osl b/intern/cycles/kernel/shaders/node_set_normal.osl
index 9541b829ef7..26a97e2b5d1 100644
--- a/intern/cycles/kernel/shaders/node_set_normal.osl
+++ b/intern/cycles/kernel/shaders/node_set_normal.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
surface node_set_normal(normal Direction = N, output normal Normal = N)
{
diff --git a/intern/cycles/kernel/shaders/node_sky_texture.osl b/intern/cycles/kernel/shaders/node_sky_texture.osl
index 9b29e5489c2..a12e7a9dc17 100644
--- a/intern/cycles/kernel/shaders/node_sky_texture.osl
+++ b/intern/cycles/kernel/shaders/node_sky_texture.osl
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include "stdosl.h"
#include "node_color.h"
+#include "stdcycles.h"
float sky_angle_between(float thetav, float phiv, float theta, float phi)
{
@@ -44,13 +44,13 @@ float sky_perez_function(float lam[9], float theta, float gamma)
(1.0 + lam[2] * exp(lam[3] * gamma) + lam[4] * cgamma * cgamma);
}
-color sky_radiance_old(normal dir,
- float sunphi,
- float suntheta,
- color radiance,
- float config_x[9],
- float config_y[9],
- float config_z[9])
+color sky_radiance_preetham(normal dir,
+ float sunphi,
+ float suntheta,
+ color radiance,
+ float config_x[9],
+ float config_y[9],
+ float config_z[9])
{
/* convert vector to spherical coordinates */
vector spherical = sky_spherical_coordinates(dir);
@@ -88,13 +88,13 @@ float sky_radiance_internal(float config[9], float theta, float gamma)
(config[2] + config[3] * expM + config[5] * rayM + config[6] * mieM + config[7] * zenith);
}
-color sky_radiance_new(normal dir,
- float sunphi,
- float suntheta,
- color radiance,
- float config_x[9],
- float config_y[9],
- float config_z[9])
+color sky_radiance_hosek(normal dir,
+ float sunphi,
+ float suntheta,
+ color radiance,
+ float config_x[9],
+ float config_y[9],
+ float config_z[9])
{
/* convert vector to spherical coordinates */
vector spherical = sky_spherical_coordinates(dir);
@@ -116,25 +116,122 @@ color sky_radiance_new(normal dir,
return xyz_to_rgb(x, y, z) * (M_2PI / 683);
}
-shader node_sky_texture(int use_mapping = 0,
- matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
- vector Vector = P,
- string type = "hosek_wilkie",
- float theta = 0.0,
- float phi = 0.0,
- color radiance = color(0.0, 0.0, 0.0),
- float config_x[9] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0},
- float config_y[9] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0},
- float config_z[9] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0},
- output color Color = color(0.0, 0.0, 0.0))
+/* Nishita improved */
+vector geographical_to_direction(float lat, float lon)
+{
+ return vector(cos(lat) * cos(lon), cos(lat) * sin(lon), sin(lat));
+}
+
+float precise_angle(vector a, vector b)
+{
+ return 2.0 * atan2(length(a - b), length(a + b));
+}
+
+color sky_radiance_nishita(vector dir, float nishita_data[10], string filename)
+{
+ /* definitions */
+ float sun_elevation = nishita_data[6];
+ float sun_rotation = nishita_data[7];
+ float angular_diameter = nishita_data[8];
+ float sun_intensity = nishita_data[9];
+ int sun_disc = angular_diameter > 0;
+ float alpha = 1.0;
+ color xyz;
+ /* convert dir to spherical coordinates */
+ vector direction = sky_spherical_coordinates(dir);
+
+ /* render above the horizon */
+ if (dir[2] >= 0.0) {
+ /* definitions */
+ vector sun_dir = geographical_to_direction(sun_elevation, sun_rotation + M_PI_2);
+ float sun_dir_angle = precise_angle(dir, sun_dir);
+ float half_angular = angular_diameter / 2.0;
+ float dir_elevation = M_PI_2 - direction[0];
+
+ /* if ray inside sun disc render it, otherwise render sky */
+ if (sun_dir_angle < half_angular && sun_disc == 1) {
+ /* get 2 pixels data */
+ color pixel_bottom = color(nishita_data[0], nishita_data[1], nishita_data[2]);
+ color pixel_top = color(nishita_data[3], nishita_data[4], nishita_data[5]);
+ float y;
+
+ /* sun interpolation */
+ if (sun_elevation - half_angular > 0.0) {
+ if ((sun_elevation + half_angular) > 0.0) {
+ y = ((dir_elevation - sun_elevation) / angular_diameter) + 0.5;
+ xyz = mix(pixel_bottom, pixel_top, y) * sun_intensity;
+ }
+ }
+ else {
+ if (sun_elevation + half_angular > 0.0) {
+ y = dir_elevation / (sun_elevation + half_angular);
+ xyz = mix(pixel_bottom, pixel_top, y) * sun_intensity;
+ }
+ }
+ /* limb darkening, coefficient is 0.6f */
+ float angle_fraction = sun_dir_angle / half_angular;
+ float limb_darkening = (1.0 - 0.6 * (1.0 - sqrt(1.0 - angle_fraction * angle_fraction)));
+ xyz *= limb_darkening;
+ }
+ /* sky */
+ else {
+ /* sky interpolation */
+ float x = (direction[1] + M_PI + sun_rotation) / M_2PI;
+ /* more pixels toward horizon compensation */
+ float y = 1.0 - sqrt(dir_elevation / M_PI_2);
+ if (x > 1.0) {
+ x = x - 1.0;
+ }
+ xyz = (color)texture(filename, x, y, "wrap", "clamp", "interp", "linear", "alpha", alpha);
+ }
+ }
+ /* ground */
+ else {
+ if (dir[2] < -0.4) {
+ xyz = color(0, 0, 0);
+ }
+ else {
+ /* black ground fade */
+ float mul = pow(1.0 + dir[2] * 2.5, 3.0);
+ /* interpolation */
+ float x = (direction[1] + M_PI + sun_rotation) / M_2PI;
+ float y = 1.5;
+ if (x > 1.0) {
+ x = x - 1.0;
+ }
+ xyz = (color)texture(
+ filename, x, y, "wrap", "periodic", "interp", "linear", "alpha", alpha) *
+ mul;
+ }
+ }
+ /* convert to RGB */
+ return xyz_to_rgb(xyz[0], xyz[1], xyz[2]);
+}
+
+shader node_sky_texture(
+ int use_mapping = 0,
+ matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+ vector Vector = P,
+ string type = "hosek_wilkie",
+ float theta = 0.0,
+ float phi = 0.0,
+ string filename = "",
+ color radiance = color(0.0, 0.0, 0.0),
+ float config_x[9] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0},
+ float config_y[9] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0},
+ float config_z[9] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0},
+ float nishita_data[10] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0},
+ output color Color = color(0.0, 0.0, 0.0))
{
vector p = Vector;
if (use_mapping)
p = transform(mapping, p);
+ if (type == "nishita_improved")
+ Color = sky_radiance_nishita(p, nishita_data, filename);
if (type == "hosek_wilkie")
- Color = sky_radiance_new(p, phi, theta, radiance, config_x, config_y, config_z);
- else
- Color = sky_radiance_old(p, phi, theta, radiance, config_x, config_y, config_z);
+ Color = sky_radiance_hosek(p, phi, theta, radiance, config_x, config_y, config_z);
+ if (type == "preetham")
+ Color = sky_radiance_preetham(p, phi, theta, radiance, config_x, config_y, config_z);
}
diff --git a/intern/cycles/kernel/shaders/node_subsurface_scattering.osl b/intern/cycles/kernel/shaders/node_subsurface_scattering.osl
index e12199d8c3d..b1e854150ab 100644
--- a/intern/cycles/kernel/shaders/node_subsurface_scattering.osl
+++ b/intern/cycles/kernel/shaders/node_subsurface_scattering.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_subsurface_scattering(color Color = 0.8,
float Scale = 1.0,
diff --git a/intern/cycles/kernel/shaders/node_tangent.osl b/intern/cycles/kernel/shaders/node_tangent.osl
index 44eb9973f3d..83f19a4610b 100644
--- a/intern/cycles/kernel/shaders/node_tangent.osl
+++ b/intern/cycles/kernel/shaders/node_tangent.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_tangent(normal NormalIn = N,
string attr_name = "geom:tangent",
diff --git a/intern/cycles/kernel/shaders/node_texture.h b/intern/cycles/kernel/shaders/node_texture.h
deleted file mode 100644
index e1f3b900ee5..00000000000
--- a/intern/cycles/kernel/shaders/node_texture.h
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Voronoi / Worley like */
-
-color cellnoise_color(point p)
-{
- float r = cellnoise(p);
- float g = cellnoise(point(p[1], p[0], p[2]));
- float b = cellnoise(point(p[1], p[2], p[0]));
-
- return color(r, g, b);
-}
-
-void voronoi(point p, float e, float da[4], point pa[4])
-{
- /* returns distances in da and point coords in pa */
- int xx, yy, zz, xi, yi, zi;
-
- xi = (int)floor(p[0]);
- yi = (int)floor(p[1]);
- zi = (int)floor(p[2]);
-
- da[0] = 1e10;
- da[1] = 1e10;
- da[2] = 1e10;
- da[3] = 1e10;
-
- for (xx = xi - 1; xx <= xi + 1; xx++) {
- for (yy = yi - 1; yy <= yi + 1; yy++) {
- for (zz = zi - 1; zz <= zi + 1; zz++) {
- point ip = point(xx, yy, zz);
- point vp = (point)cellnoise_color(ip);
- point pd = p - (vp + ip);
- float d = dot(pd, pd);
-
- vp += point(xx, yy, zz);
-
- if (d < da[0]) {
- da[3] = da[2];
- da[2] = da[1];
- da[1] = da[0];
- da[0] = d;
-
- pa[3] = pa[2];
- pa[2] = pa[1];
- pa[1] = pa[0];
- pa[0] = vp;
- }
- else if (d < da[1]) {
- da[3] = da[2];
- da[2] = da[1];
- da[1] = d;
-
- pa[3] = pa[2];
- pa[2] = pa[1];
- pa[1] = vp;
- }
- else if (d < da[2]) {
- da[3] = da[2];
- da[2] = d;
-
- pa[3] = pa[2];
- pa[2] = vp;
- }
- else if (d < da[3]) {
- da[3] = d;
- pa[3] = vp;
- }
- }
- }
- }
-}
-
-/* Noise Bases */
-
-float safe_noise(point p, string type)
-{
- float f = 0.0;
-
- /* Perlin noise in range -1..1 */
- if (type == "signed")
- f = noise("perlin", p);
-
- /* Perlin noise in range 0..1 */
- else
- f = noise(p);
-
- /* can happen for big coordinates, things even out to 0.5 then anyway */
- if (!isfinite(f))
- return 0.5;
-
- return f;
-}
-
-/* Turbulence */
-
-float noise_turbulence(point p, float details, int hard)
-{
- float fscale = 1.0;
- float amp = 1.0;
- float sum = 0.0;
- int i, n;
-
- float octaves = clamp(details, 0.0, 16.0);
- n = (int)octaves;
-
- for (i = 0; i <= n; i++) {
- float t = safe_noise(fscale * p, "unsigned");
-
- if (hard)
- t = fabs(2.0 * t - 1.0);
-
- sum += t * amp;
- amp *= 0.5;
- fscale *= 2.0;
- }
-
- float rmd = octaves - floor(octaves);
-
- if (rmd != 0.0) {
- float t = safe_noise(fscale * p, "unsigned");
-
- if (hard)
- t = fabs(2.0 * t - 1.0);
-
- float sum2 = sum + t * amp;
-
- sum *= ((float)(1 << n) / (float)((1 << (n + 1)) - 1));
- sum2 *= ((float)(1 << (n + 1)) / (float)((1 << (n + 2)) - 1));
-
- return (1.0 - rmd) * sum + rmd * sum2;
- }
- else {
- sum *= ((float)(1 << n) / (float)((1 << (n + 1)) - 1));
- return sum;
- }
-}
-
-/* Utility */
-
-float nonzero(float f, float eps)
-{
- float r;
-
- if (abs(f) < eps)
- r = sign(f) * eps;
- else
- r = f;
-
- return r;
-}
diff --git a/intern/cycles/kernel/shaders/node_texture_coordinate.osl b/intern/cycles/kernel/shaders/node_texture_coordinate.osl
index 13861653d04..ac05e984af2 100644
--- a/intern/cycles/kernel/shaders/node_texture_coordinate.osl
+++ b/intern/cycles/kernel/shaders/node_texture_coordinate.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_texture_coordinate(
normal NormalIn = N,
diff --git a/intern/cycles/kernel/shaders/node_toon_bsdf.osl b/intern/cycles/kernel/shaders/node_toon_bsdf.osl
index ed3a0b25c60..4a44730c70c 100644
--- a/intern/cycles/kernel/shaders/node_toon_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_toon_bsdf.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_toon_bsdf(color Color = 0.8,
string component = "diffuse",
diff --git a/intern/cycles/kernel/shaders/node_translucent_bsdf.osl b/intern/cycles/kernel/shaders/node_translucent_bsdf.osl
index 7ce1ab08c59..23a562bf34d 100644
--- a/intern/cycles/kernel/shaders/node_translucent_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_translucent_bsdf.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_translucent_bsdf(color Color = 0.8, normal Normal = N, output closure color BSDF = 0)
{
diff --git a/intern/cycles/kernel/shaders/node_transparent_bsdf.osl b/intern/cycles/kernel/shaders/node_transparent_bsdf.osl
index a735513ba89..eb737a05c41 100644
--- a/intern/cycles/kernel/shaders/node_transparent_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_transparent_bsdf.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_transparent_bsdf(color Color = 0.8, normal Normal = N, output closure color BSDF = 0)
{
diff --git a/intern/cycles/kernel/shaders/node_uv_map.osl b/intern/cycles/kernel/shaders/node_uv_map.osl
index 6f2887be63c..88d8c5ba394 100644
--- a/intern/cycles/kernel/shaders/node_uv_map.osl
+++ b/intern/cycles/kernel/shaders/node_uv_map.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_uv_map(int from_dupli = 0,
string attribute = "",
diff --git a/intern/cycles/kernel/shaders/node_value.osl b/intern/cycles/kernel/shaders/node_value.osl
index 398e2c0e392..13197b9a27a 100644
--- a/intern/cycles/kernel/shaders/node_value.osl
+++ b/intern/cycles/kernel/shaders/node_value.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_value(float value_value = 0.0,
vector vector_value = vector(0.0, 0.0, 0.0),
diff --git a/intern/cycles/kernel/shaders/node_vector_curves.osl b/intern/cycles/kernel/shaders/node_vector_curves.osl
index e8c8036b550..9d3a2b82b0a 100644
--- a/intern/cycles/kernel/shaders/node_vector_curves.osl
+++ b/intern/cycles/kernel/shaders/node_vector_curves.osl
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include "stdosl.h"
#include "node_ramp_util.h"
+#include "stdcycles.h"
shader node_vector_curves(color ramp[] = {0.0},
float min_x = 0.0,
diff --git a/intern/cycles/kernel/shaders/node_vector_displacement.osl b/intern/cycles/kernel/shaders/node_vector_displacement.osl
index e9bd336347f..7cd9c2a37f2 100644
--- a/intern/cycles/kernel/shaders/node_vector_displacement.osl
+++ b/intern/cycles/kernel/shaders/node_vector_displacement.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_vector_displacement(color Vector = color(0.0, 0.0, 0.0),
float Midlevel = 0.0,
diff --git a/intern/cycles/kernel/shaders/node_vector_math.osl b/intern/cycles/kernel/shaders/node_vector_math.osl
index 10bb0c7283c..218851598b4 100644
--- a/intern/cycles/kernel/shaders/node_vector_math.osl
+++ b/intern/cycles/kernel/shaders/node_vector_math.osl
@@ -14,36 +14,90 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "node_math.h"
+#include "stdcycles.h"
shader node_vector_math(string type = "add",
vector Vector1 = vector(0.0, 0.0, 0.0),
vector Vector2 = vector(0.0, 0.0, 0.0),
+ vector Vector3 = vector(0.0, 0.0, 0.0),
+ float Scale = 1.0,
output float Value = 0.0,
output vector Vector = vector(0.0, 0.0, 0.0))
{
if (type == "add") {
Vector = Vector1 + Vector2;
- Value = (abs(Vector[0]) + abs(Vector[1]) + abs(Vector[2])) / 3.0;
}
else if (type == "subtract") {
Vector = Vector1 - Vector2;
- Value = (abs(Vector[0]) + abs(Vector[1]) + abs(Vector[2])) / 3.0;
}
- else if (type == "average") {
- Value = length(Vector1 + Vector2);
- Vector = normalize(Vector1 + Vector2);
+ else if (type == "multiply") {
+ Vector = Vector1 * Vector2;
+ }
+ else if (type == "divide") {
+ Vector = safe_divide(Vector1, Vector2);
+ }
+ else if (type == "cross_product") {
+ Vector = cross(Vector1, Vector2);
+ }
+ else if (type == "project") {
+ Vector = project(Vector1, Vector2);
+ }
+ else if (type == "reflect") {
+ Vector = reflect(Vector1, normalize(Vector2));
}
else if (type == "dot_product") {
Value = dot(Vector1, Vector2);
}
- else if (type == "cross_product") {
- vector c = cross(Vector1, Vector2);
- Value = length(c);
- Vector = normalize(c);
+ else if (type == "distance") {
+ Value = distance(Vector1, Vector2);
}
- else if (type == "normalize") {
+ else if (type == "length") {
Value = length(Vector1);
+ }
+ else if (type == "scale") {
+ Vector = Vector1 * Scale;
+ }
+ else if (type == "normalize") {
Vector = normalize(Vector1);
}
+ else if (type == "snap") {
+ Vector = snap(Vector1, Vector2);
+ }
+ else if (type == "floor") {
+ Vector = floor(Vector1);
+ }
+ else if (type == "ceil") {
+ Vector = ceil(Vector1);
+ }
+ else if (type == "modulo") {
+ Vector = fmod(Vector1, Vector2);
+ }
+ else if (type == "wrap") {
+ Vector = wrap(Vector1, Vector2, Vector3);
+ }
+ else if (type == "fraction") {
+ Vector = Vector1 - floor(Vector1);
+ }
+ else if (type == "absolute") {
+ Vector = abs(Vector1);
+ }
+ else if (type == "minimum") {
+ Vector = min(Vector1, Vector2);
+ }
+ else if (type == "maximum") {
+ Vector = max(Vector1, Vector2);
+ }
+ else if (type == "sine") {
+ Vector = sin(Vector1);
+ }
+ else if (type == "cosine") {
+ Vector = cos(Vector1);
+ }
+ else if (type == "tangent") {
+ Vector = tan(Vector1);
+ }
+ else {
+ warning("%s", "Unknown vector math operator!");
+ }
}
diff --git a/intern/cycles/kernel/shaders/node_vector_rotate.osl b/intern/cycles/kernel/shaders/node_vector_rotate.osl
new file mode 100644
index 00000000000..2efe3470ae2
--- /dev/null
+++ b/intern/cycles/kernel/shaders/node_vector_rotate.osl
@@ -0,0 +1,49 @@
+/*
+ * Copyright 2011-2020 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "node_math.h"
+#include "stdcycles.h"
+
+shader node_vector_rotate(int invert = 0,
+ string type = "axis",
+ vector VectorIn = vector(0.0, 0.0, 0.0),
+ point Center = point(0.0, 0.0, 0.0),
+ point Rotation = point(0.0, 0.0, 0.0),
+ vector Axis = vector(0.0, 0.0, 1.0),
+ float Angle = 0.0,
+ output vector VectorOut = vector(0.0, 0.0, 0.0))
+{
+ if (type == "euler_xyz") {
+ matrix rmat = (invert) ? transpose(euler_to_mat(Rotation)) : euler_to_mat(Rotation);
+ VectorOut = transform(rmat, VectorIn - Center) + Center;
+ }
+ else {
+ float a = (invert) ? -Angle : Angle;
+ if (type == "x_axis") {
+ VectorOut = rotate(VectorIn - Center, a, point(0.0), vector(1.0, 0.0, 0.0)) + Center;
+ }
+ else if (type == "y_axis") {
+ VectorOut = rotate(VectorIn - Center, a, point(0.0), vector(0.0, 1.0, 0.0)) + Center;
+ }
+ else if (type == "z_axis") {
+ VectorOut = rotate(VectorIn - Center, a, point(0.0), vector(0.0, 0.0, 1.0)) + Center;
+ }
+ else { // axis
+ VectorOut = (length(Axis) != 0.0) ? rotate(VectorIn - Center, a, point(0.0), Axis) + Center :
+ VectorIn;
+ }
+ }
+}
diff --git a/intern/cycles/kernel/shaders/node_vector_transform.osl b/intern/cycles/kernel/shaders/node_vector_transform.osl
index 22939577be0..1db799cfc9e 100644
--- a/intern/cycles/kernel/shaders/node_vector_transform.osl
+++ b/intern/cycles/kernel/shaders/node_vector_transform.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_vector_transform(string type = "vector",
string convert_from = "world",
diff --git a/intern/cycles/kernel/shaders/node_velvet_bsdf.osl b/intern/cycles/kernel/shaders/node_velvet_bsdf.osl
index 9290b845325..299acef35ee 100644
--- a/intern/cycles/kernel/shaders/node_velvet_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_velvet_bsdf.osl
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include "stdosl.h"
#include "node_fresnel.h"
+#include "stdcycles.h"
shader node_velvet_bsdf(color Color = 0.8,
float Sigma = 0.0,
diff --git a/intern/cycles/kernel/shaders/node_vertex_color.osl b/intern/cycles/kernel/shaders/node_vertex_color.osl
new file mode 100644
index 00000000000..ffaf7a2f720
--- /dev/null
+++ b/intern/cycles/kernel/shaders/node_vertex_color.osl
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "stdcycles.h"
+
+shader node_vertex_color(string bump_offset = "center",
+ string layer_name = "",
+ output color Color = 0.0,
+ output float Alpha = 0.0)
+{
+ float vertex_color[4];
+ string vertex_color_layer;
+
+ if (layer_name == "") {
+ vertex_color_layer = "geom:vertex_color";
+ }
+ else {
+ vertex_color_layer = layer_name;
+ }
+
+ if (getattribute(vertex_color_layer, vertex_color)) {
+ Color = color(vertex_color[0], vertex_color[1], vertex_color[2]);
+ Alpha = vertex_color[3];
+
+ if (bump_offset == "dx") {
+ Color += Dx(Color);
+ Alpha += Dx(Alpha);
+ }
+ else if (bump_offset == "dy") {
+ Color += Dy(Color);
+ Alpha += Dy(Alpha);
+ }
+ }
+ else {
+ warning("%s", "Invalid attribute.");
+ }
+}
diff --git a/intern/cycles/kernel/shaders/node_voronoi_texture.osl b/intern/cycles/kernel/shaders/node_voronoi_texture.osl
index 34c86d5b98d..04d61c32f8a 100644
--- a/intern/cycles/kernel/shaders/node_voronoi_texture.osl
+++ b/intern/cycles/kernel/shaders/node_voronoi_texture.osl
@@ -14,151 +14,1014 @@
* limitations under the License.
*/
-#include "stdosl.h"
-#include "node_texture.h"
-
-void voronoi_m(point p, string metric, float e, float da[4], point pa[4])
-{
- /* Compute the distance to and the position of the four closest neighbors to p.
- *
- * The neighbors are randomly placed, 1 each in a 3x3x3 grid (Worley pattern).
- * The distances and points are returned in ascending order, i.e. da[0] and pa[0] will
- * contain the distance to the closest point and its coordinates respectively.
- */
- int xx, yy, zz, xi, yi, zi;
-
- xi = (int)floor(p[0]);
- yi = (int)floor(p[1]);
- zi = (int)floor(p[2]);
-
- da[0] = 1e10;
- da[1] = 1e10;
- da[2] = 1e10;
- da[3] = 1e10;
-
- for (xx = xi - 1; xx <= xi + 1; xx++) {
- for (yy = yi - 1; yy <= yi + 1; yy++) {
- for (zz = zi - 1; zz <= zi + 1; zz++) {
- point ip = point(xx, yy, zz);
- point vp = (point)cellnoise_color(ip);
- point pd = p - (vp + ip);
-
- float d = 0.0;
- if (metric == "distance") {
- d = dot(pd, pd);
+#include "node_hash.h"
+#include "stdcycles.h"
+#include "vector2.h"
+#include "vector4.h"
+
+#define vector3 point
+
+/* **** Distance Functions **** */
+
+float distance(float a, float b)
+{
+ return abs(a - b);
+}
+
+float distance(vector2 a, vector2 b)
+{
+ return length(a - b);
+}
+
+float distance(vector4 a, vector4 b)
+{
+ return length(a - b);
+}
+
+/* **** Safe Division **** */
+
+vector2 safe_divide(vector2 a, float b)
+{
+ return vector2((b != 0.0) ? a.x / b : 0.0, (b != 0.0) ? a.y / b : 0.0);
+}
+
+vector4 safe_divide(vector4 a, float b)
+{
+ return vector4((b != 0.0) ? a.x / b : 0.0,
+ (b != 0.0) ? a.y / b : 0.0,
+ (b != 0.0) ? a.z / b : 0.0,
+ (b != 0.0) ? a.w / b : 0.0);
+}
+
+/*
+ * Smooth Voronoi:
+ *
+ * - https://wiki.blender.org/wiki/User:OmarSquircleArt/GSoC2019/Documentation/Smooth_Voronoi
+ *
+ * Distance To Edge:
+ *
+ * - https://www.shadertoy.com/view/llG3zy
+ *
+ */
+
+/* **** 1D Voronoi **** */
+
+float voronoi_distance(float a, float b, string metric, float exponent)
+{
+ return abs(a - b);
+}
+
+void voronoi_f1_1d(float w,
+ float exponent,
+ float randomness,
+ string metric,
+ output float outDistance,
+ output color outColor,
+ output float outW)
+{
+ float cellPosition = floor(w);
+ float localPosition = w - cellPosition;
+
+ float minDistance = 8.0;
+ float targetOffset, targetPosition;
+ for (int i = -1; i <= 1; i++) {
+ float cellOffset = float(i);
+ float pointPosition = cellOffset + hash_float_to_float(cellPosition + cellOffset) * randomness;
+ float distanceToPoint = voronoi_distance(pointPosition, localPosition, metric, exponent);
+ if (distanceToPoint < minDistance) {
+ targetOffset = cellOffset;
+ minDistance = distanceToPoint;
+ targetPosition = pointPosition;
+ }
+ }
+ outDistance = minDistance;
+ outColor = hash_float_to_color(cellPosition + targetOffset);
+ outW = targetPosition + cellPosition;
+}
+
+void voronoi_smooth_f1_1d(float w,
+ float smoothness,
+ float exponent,
+ float randomness,
+ string metric,
+ output float outDistance,
+ output color outColor,
+ output float outW)
+{
+ float cellPosition = floor(w);
+ float localPosition = w - cellPosition;
+
+ float smoothDistance = 8.0;
+ float smoothPosition = 0.0;
+ color smoothColor = color(0.0);
+ for (int i = -2; i <= 2; i++) {
+ float cellOffset = float(i);
+ float pointPosition = cellOffset + hash_float_to_float(cellPosition + cellOffset) * randomness;
+ float distanceToPoint = voronoi_distance(pointPosition, localPosition, metric, exponent);
+ float h = smoothstep(0.0, 1.0, 0.5 + 0.5 * (smoothDistance - distanceToPoint) / smoothness);
+ float correctionFactor = smoothness * h * (1.0 - h);
+ smoothDistance = mix(smoothDistance, distanceToPoint, h) - correctionFactor;
+ correctionFactor /= 1.0 + 3.0 * smoothness;
+ color cellColor = hash_float_to_color(cellPosition + cellOffset);
+ smoothColor = mix(smoothColor, cellColor, h) - correctionFactor;
+ smoothPosition = mix(smoothPosition, pointPosition, h) - correctionFactor;
+ }
+ outDistance = smoothDistance;
+ outColor = smoothColor;
+ outW = cellPosition + smoothPosition;
+}
+
+void voronoi_f2_1d(float w,
+ float exponent,
+ float randomness,
+ string metric,
+ output float outDistance,
+ output color outColor,
+ output float outW)
+{
+ float cellPosition = floor(w);
+ float localPosition = w - cellPosition;
+
+ float distanceF1 = 8.0;
+ float distanceF2 = 8.0;
+ float offsetF1 = 0.0;
+ float positionF1 = 0.0;
+ float offsetF2, positionF2;
+ for (int i = -1; i <= 1; i++) {
+ float cellOffset = float(i);
+ float pointPosition = cellOffset + hash_float_to_float(cellPosition + cellOffset) * randomness;
+ float distanceToPoint = voronoi_distance(pointPosition, localPosition, metric, exponent);
+ if (distanceToPoint < distanceF1) {
+ distanceF2 = distanceF1;
+ distanceF1 = distanceToPoint;
+ offsetF2 = offsetF1;
+ offsetF1 = cellOffset;
+ positionF2 = positionF1;
+ positionF1 = pointPosition;
+ }
+ else if (distanceToPoint < distanceF2) {
+ distanceF2 = distanceToPoint;
+ offsetF2 = cellOffset;
+ positionF2 = pointPosition;
+ }
+ }
+ outDistance = distanceF2;
+ outColor = hash_float_to_color(cellPosition + offsetF2);
+ outW = positionF2 + cellPosition;
+}
+
+void voronoi_distance_to_edge_1d(float w, float randomness, output float outDistance)
+{
+ float cellPosition = floor(w);
+ float localPosition = w - cellPosition;
+
+ float minDistance = 8.0;
+ for (int i = -1; i <= 1; i++) {
+ float cellOffset = float(i);
+ float pointPosition = cellOffset + hash_float_to_float(cellPosition + cellOffset) * randomness;
+ float distanceToPoint = distance(pointPosition, localPosition);
+ minDistance = min(distanceToPoint, minDistance);
+ }
+ outDistance = minDistance;
+}
+
+void voronoi_n_sphere_radius_1d(float w, float randomness, output float outRadius)
+{
+ float cellPosition = floor(w);
+ float localPosition = w - cellPosition;
+
+ float closestPoint;
+ float closestPointOffset;
+ float minDistance = 8.0;
+ for (int i = -1; i <= 1; i++) {
+ float cellOffset = float(i);
+ float pointPosition = cellOffset + hash_float_to_float(cellPosition + cellOffset) * randomness;
+ float distanceToPoint = distance(pointPosition, localPosition);
+ if (distanceToPoint < minDistance) {
+ minDistance = distanceToPoint;
+ closestPoint = pointPosition;
+ closestPointOffset = cellOffset;
+ }
+ }
+
+ minDistance = 8.0;
+ float closestPointToClosestPoint;
+ for (int i = -1; i <= 1; i++) {
+ if (i == 0) {
+ continue;
+ }
+ float cellOffset = float(i) + closestPointOffset;
+ float pointPosition = cellOffset + hash_float_to_float(cellPosition + cellOffset) * randomness;
+ float distanceToPoint = distance(closestPoint, pointPosition);
+ if (distanceToPoint < minDistance) {
+ minDistance = distanceToPoint;
+ closestPointToClosestPoint = pointPosition;
+ }
+ }
+ outRadius = distance(closestPointToClosestPoint, closestPoint) / 2.0;
+}
+
+/* **** 2D Voronoi **** */
+
+float voronoi_distance(vector2 a, vector2 b, string metric, float exponent)
+{
+ if (metric == "euclidean") {
+ return distance(a, b);
+ }
+ else if (metric == "manhattan") {
+ return abs(a.x - b.x) + abs(a.y - b.y);
+ }
+ else if (metric == "chebychev") {
+ return max(abs(a.x - b.x), abs(a.y - b.y));
+ }
+ else if (metric == "minkowski") {
+ return pow(pow(abs(a.x - b.x), exponent) + pow(abs(a.y - b.y), exponent), 1.0 / exponent);
+ }
+ else {
+ return 0.0;
+ }
+}
+
+void voronoi_f1_2d(vector2 coord,
+ float exponent,
+ float randomness,
+ string metric,
+ output float outDistance,
+ output color outColor,
+ output vector2 outPosition)
+{
+ vector2 cellPosition = floor(coord);
+ vector2 localPosition = coord - cellPosition;
+
+ float minDistance = 8.0;
+ vector2 targetOffset, targetPosition;
+ for (int j = -1; j <= 1; j++) {
+ for (int i = -1; i <= 1; i++) {
+ vector2 cellOffset = vector2(i, j);
+ vector2 pointPosition = cellOffset +
+ hash_vector2_to_vector2(cellPosition + cellOffset) * randomness;
+ float distanceToPoint = voronoi_distance(pointPosition, localPosition, metric, exponent);
+ if (distanceToPoint < minDistance) {
+ targetOffset = cellOffset;
+ minDistance = distanceToPoint;
+ targetPosition = pointPosition;
+ }
+ }
+ }
+ outDistance = minDistance;
+ outColor = hash_vector2_to_color(cellPosition + targetOffset);
+ outPosition = targetPosition + cellPosition;
+}
+
+void voronoi_smooth_f1_2d(vector2 coord,
+ float smoothness,
+ float exponent,
+ float randomness,
+ string metric,
+ output float outDistance,
+ output color outColor,
+ output vector2 outPosition)
+{
+ vector2 cellPosition = floor(coord);
+ vector2 localPosition = coord - cellPosition;
+
+ float smoothDistance = 8.0;
+ color smoothColor = color(0.0);
+ vector2 smoothPosition = vector2(0.0, 0.0);
+ for (int j = -2; j <= 2; j++) {
+ for (int i = -2; i <= 2; i++) {
+ vector2 cellOffset = vector2(i, j);
+ vector2 pointPosition = cellOffset +
+ hash_vector2_to_vector2(cellPosition + cellOffset) * randomness;
+ float distanceToPoint = voronoi_distance(pointPosition, localPosition, metric, exponent);
+ float h = smoothstep(0.0, 1.0, 0.5 + 0.5 * (smoothDistance - distanceToPoint) / smoothness);
+ float correctionFactor = smoothness * h * (1.0 - h);
+ smoothDistance = mix(smoothDistance, distanceToPoint, h) - correctionFactor;
+ correctionFactor /= 1.0 + 3.0 * smoothness;
+ color cellColor = hash_vector2_to_color(cellPosition + cellOffset);
+ smoothColor = mix(smoothColor, cellColor, h) - correctionFactor;
+ smoothPosition = mix(smoothPosition, pointPosition, h) - correctionFactor;
+ }
+ }
+ outDistance = smoothDistance;
+ outColor = smoothColor;
+ outPosition = cellPosition + smoothPosition;
+}
+
+void voronoi_f2_2d(vector2 coord,
+ float exponent,
+ float randomness,
+ string metric,
+ output float outDistance,
+ output color outColor,
+ output vector2 outPosition)
+{
+ vector2 cellPosition = floor(coord);
+ vector2 localPosition = coord - cellPosition;
+
+ float distanceF1 = 8.0;
+ float distanceF2 = 8.0;
+ vector2 offsetF1 = vector2(0.0, 0.0);
+ vector2 positionF1 = vector2(0.0, 0.0);
+ vector2 offsetF2, positionF2;
+ for (int j = -1; j <= 1; j++) {
+ for (int i = -1; i <= 1; i++) {
+ vector2 cellOffset = vector2(i, j);
+ vector2 pointPosition = cellOffset +
+ hash_vector2_to_vector2(cellPosition + cellOffset) * randomness;
+ float distanceToPoint = voronoi_distance(pointPosition, localPosition, metric, exponent);
+ if (distanceToPoint < distanceF1) {
+ distanceF2 = distanceF1;
+ distanceF1 = distanceToPoint;
+ offsetF2 = offsetF1;
+ offsetF1 = cellOffset;
+ positionF2 = positionF1;
+ positionF1 = pointPosition;
+ }
+ else if (distanceToPoint < distanceF2) {
+ distanceF2 = distanceToPoint;
+ offsetF2 = cellOffset;
+ positionF2 = pointPosition;
+ }
+ }
+ }
+ outDistance = distanceF2;
+ outColor = hash_vector2_to_color(cellPosition + offsetF2);
+ outPosition = positionF2 + cellPosition;
+}
+
+void voronoi_distance_to_edge_2d(vector2 coord, float randomness, output float outDistance)
+{
+ vector2 cellPosition = floor(coord);
+ vector2 localPosition = coord - cellPosition;
+
+ vector2 vectorToClosest;
+ float minDistance = 8.0;
+ for (int j = -1; j <= 1; j++) {
+ for (int i = -1; i <= 1; i++) {
+ vector2 cellOffset = vector2(i, j);
+ vector2 vectorToPoint = cellOffset +
+ hash_vector2_to_vector2(cellPosition + cellOffset) * randomness -
+ localPosition;
+ float distanceToPoint = dot(vectorToPoint, vectorToPoint);
+ if (distanceToPoint < minDistance) {
+ minDistance = distanceToPoint;
+ vectorToClosest = vectorToPoint;
+ }
+ }
+ }
+
+ minDistance = 8.0;
+ for (int j = -1; j <= 1; j++) {
+ for (int i = -1; i <= 1; i++) {
+ vector2 cellOffset = vector2(i, j);
+ vector2 vectorToPoint = cellOffset +
+ hash_vector2_to_vector2(cellPosition + cellOffset) * randomness -
+ localPosition;
+ vector2 perpendicularToEdge = vectorToPoint - vectorToClosest;
+ if (dot(perpendicularToEdge, perpendicularToEdge) > 0.0001) {
+ float distanceToEdge = dot((vectorToClosest + vectorToPoint) / 2.0,
+ normalize(perpendicularToEdge));
+ minDistance = min(minDistance, distanceToEdge);
+ }
+ }
+ }
+ outDistance = minDistance;
+}
+
+void voronoi_n_sphere_radius_2d(vector2 coord, float randomness, output float outRadius)
+{
+ vector2 cellPosition = floor(coord);
+ vector2 localPosition = coord - cellPosition;
+
+ vector2 closestPoint;
+ vector2 closestPointOffset;
+ float minDistance = 8.0;
+ for (int j = -1; j <= 1; j++) {
+ for (int i = -1; i <= 1; i++) {
+ vector2 cellOffset = vector2(i, j);
+ vector2 pointPosition = cellOffset +
+ hash_vector2_to_vector2(cellPosition + cellOffset) * randomness;
+ float distanceToPoint = distance(pointPosition, localPosition);
+ if (distanceToPoint < minDistance) {
+ minDistance = distanceToPoint;
+ closestPoint = pointPosition;
+ closestPointOffset = cellOffset;
+ }
+ }
+ }
+
+ minDistance = 8.0;
+ vector2 closestPointToClosestPoint;
+ for (int j = -1; j <= 1; j++) {
+ for (int i = -1; i <= 1; i++) {
+ if (i == 0 && j == 0) {
+ continue;
+ }
+ vector2 cellOffset = vector2(i, j) + closestPointOffset;
+ vector2 pointPosition = cellOffset +
+ hash_vector2_to_vector2(cellPosition + cellOffset) * randomness;
+ float distanceToPoint = distance(closestPoint, pointPosition);
+ if (distanceToPoint < minDistance) {
+ minDistance = distanceToPoint;
+ closestPointToClosestPoint = pointPosition;
+ }
+ }
+ }
+ outRadius = distance(closestPointToClosestPoint, closestPoint) / 2.0;
+}
+
+/* **** 3D Voronoi **** */
+
+float voronoi_distance(vector3 a, vector3 b, string metric, float exponent)
+{
+ if (metric == "euclidean") {
+ return distance(a, b);
+ }
+ else if (metric == "manhattan") {
+ return abs(a[0] - b[0]) + abs(a[1] - b[1]) + abs(a[2] - b[2]);
+ }
+ else if (metric == "chebychev") {
+ return max(abs(a[0] - b[0]), max(abs(a[1] - b[1]), abs(a[2] - b[2])));
+ }
+ else if (metric == "minkowski") {
+ return pow(pow(abs(a[0] - b[0]), exponent) + pow(abs(a[1] - b[1]), exponent) +
+ pow(abs(a[2] - b[2]), exponent),
+ 1.0 / exponent);
+ }
+ else {
+ return 0.0;
+ }
+}
+
+void voronoi_f1_3d(vector3 coord,
+ float exponent,
+ float randomness,
+ string metric,
+ output float outDistance,
+ output color outColor,
+ output vector3 outPosition)
+{
+ vector3 cellPosition = floor(coord);
+ vector3 localPosition = coord - cellPosition;
+
+ float minDistance = 8.0;
+ vector3 targetOffset, targetPosition;
+ for (int k = -1; k <= 1; k++) {
+ for (int j = -1; j <= 1; j++) {
+ for (int i = -1; i <= 1; i++) {
+ vector3 cellOffset = vector3(i, j, k);
+ vector3 pointPosition = cellOffset +
+ hash_vector3_to_vector3(cellPosition + cellOffset) * randomness;
+ float distanceToPoint = voronoi_distance(pointPosition, localPosition, metric, exponent);
+ if (distanceToPoint < minDistance) {
+ targetOffset = cellOffset;
+ minDistance = distanceToPoint;
+ targetPosition = pointPosition;
}
- else if (metric == "manhattan") {
- d = fabs(pd[0]) + fabs(pd[1]) + fabs(pd[2]);
+ }
+ }
+ }
+ outDistance = minDistance;
+ outColor = hash_vector3_to_color(cellPosition + targetOffset);
+ outPosition = targetPosition + cellPosition;
+}
+
+void voronoi_smooth_f1_3d(vector3 coord,
+ float smoothness,
+ float exponent,
+ float randomness,
+ string metric,
+ output float outDistance,
+ output color outColor,
+ output vector3 outPosition)
+{
+ vector3 cellPosition = floor(coord);
+ vector3 localPosition = coord - cellPosition;
+
+ float smoothDistance = 8.0;
+ color smoothColor = color(0.0);
+ vector3 smoothPosition = vector3(0.0);
+ for (int k = -2; k <= 2; k++) {
+ for (int j = -2; j <= 2; j++) {
+ for (int i = -2; i <= 2; i++) {
+ vector3 cellOffset = vector3(i, j, k);
+ vector3 pointPosition = cellOffset +
+ hash_vector3_to_vector3(cellPosition + cellOffset) * randomness;
+ float distanceToPoint = voronoi_distance(pointPosition, localPosition, metric, exponent);
+ float h = smoothstep(
+ 0.0, 1.0, 0.5 + 0.5 * (smoothDistance - distanceToPoint) / smoothness);
+ float correctionFactor = smoothness * h * (1.0 - h);
+ smoothDistance = mix(smoothDistance, distanceToPoint, h) - correctionFactor;
+ correctionFactor /= 1.0 + 3.0 * smoothness;
+ color cellColor = hash_vector3_to_color(cellPosition + cellOffset);
+ smoothColor = mix(smoothColor, cellColor, h) - correctionFactor;
+ smoothPosition = mix(smoothPosition, pointPosition, h) - correctionFactor;
+ }
+ }
+ }
+ outDistance = smoothDistance;
+ outColor = smoothColor;
+ outPosition = cellPosition + smoothPosition;
+}
+
+void voronoi_f2_3d(vector3 coord,
+ float exponent,
+ float randomness,
+ string metric,
+ output float outDistance,
+ output color outColor,
+ output vector3 outPosition)
+{
+ vector3 cellPosition = floor(coord);
+ vector3 localPosition = coord - cellPosition;
+
+ float distanceF1 = 8.0;
+ float distanceF2 = 8.0;
+ vector3 offsetF1 = vector3(0.0);
+ vector3 positionF1 = vector3(0.0);
+ vector3 offsetF2, positionF2;
+ for (int k = -1; k <= 1; k++) {
+ for (int j = -1; j <= 1; j++) {
+ for (int i = -1; i <= 1; i++) {
+ vector3 cellOffset = vector3(i, j, k);
+ vector3 pointPosition = cellOffset +
+ hash_vector3_to_vector3(cellPosition + cellOffset) * randomness;
+ float distanceToPoint = voronoi_distance(pointPosition, localPosition, metric, exponent);
+ if (distanceToPoint < distanceF1) {
+ distanceF2 = distanceF1;
+ distanceF1 = distanceToPoint;
+ offsetF2 = offsetF1;
+ offsetF1 = cellOffset;
+ positionF2 = positionF1;
+ positionF1 = pointPosition;
}
- else if (metric == "chebychev") {
- d = max(fabs(pd[0]), max(fabs(pd[1]), fabs(pd[2])));
+ else if (distanceToPoint < distanceF2) {
+ distanceF2 = distanceToPoint;
+ offsetF2 = cellOffset;
+ positionF2 = pointPosition;
}
- else if (metric == "minkowski") {
- d = pow(pow(fabs(pd[0]), e) + pow(fabs(pd[1]), e) + pow(fabs(pd[2]), e), 1.0 / e);
+ }
+ }
+ }
+ outDistance = distanceF2;
+ outColor = hash_vector3_to_color(cellPosition + offsetF2);
+ outPosition = positionF2 + cellPosition;
+}
+
+void voronoi_distance_to_edge_3d(vector3 coord, float randomness, output float outDistance)
+{
+ vector3 cellPosition = floor(coord);
+ vector3 localPosition = coord - cellPosition;
+
+ vector3 vectorToClosest;
+ float minDistance = 8.0;
+ for (int k = -1; k <= 1; k++) {
+ for (int j = -1; j <= 1; j++) {
+ for (int i = -1; i <= 1; i++) {
+ vector3 cellOffset = vector3(i, j, k);
+ vector3 vectorToPoint = cellOffset +
+ hash_vector3_to_vector3(cellPosition + cellOffset) * randomness -
+ localPosition;
+ float distanceToPoint = dot(vectorToPoint, vectorToPoint);
+ if (distanceToPoint < minDistance) {
+ minDistance = distanceToPoint;
+ vectorToClosest = vectorToPoint;
}
+ }
+ }
+ }
- vp += point(xx, yy, zz);
+ minDistance = 8.0;
+ for (int k = -1; k <= 1; k++) {
+ for (int j = -1; j <= 1; j++) {
+ for (int i = -1; i <= 1; i++) {
+ vector3 cellOffset = vector3(i, j, k);
+ vector3 vectorToPoint = cellOffset +
+ hash_vector3_to_vector3(cellPosition + cellOffset) * randomness -
+ localPosition;
+ vector3 perpendicularToEdge = vectorToPoint - vectorToClosest;
+ if (dot(perpendicularToEdge, perpendicularToEdge) > 0.0001) {
+ float distanceToEdge = dot((vectorToClosest + vectorToPoint) / 2.0,
+ normalize((vector)perpendicularToEdge));
+ minDistance = min(minDistance, distanceToEdge);
+ }
+ }
+ }
+ }
+ outDistance = minDistance;
+}
- if (d < da[0]) {
- da[3] = da[2];
- da[2] = da[1];
- da[1] = da[0];
- da[0] = d;
+void voronoi_n_sphere_radius_3d(vector3 coord, float randomness, output float outRadius)
+{
+ vector3 cellPosition = floor(coord);
+ vector3 localPosition = coord - cellPosition;
- pa[3] = pa[2];
- pa[2] = pa[1];
- pa[1] = pa[0];
- pa[0] = vp;
+ vector3 closestPoint;
+ vector3 closestPointOffset;
+ float minDistance = 8.0;
+ for (int k = -1; k <= 1; k++) {
+ for (int j = -1; j <= 1; j++) {
+ for (int i = -1; i <= 1; i++) {
+ vector3 cellOffset = vector3(i, j, k);
+ vector3 pointPosition = cellOffset +
+ hash_vector3_to_vector3(cellPosition + cellOffset) * randomness;
+ float distanceToPoint = distance(pointPosition, localPosition);
+ if (distanceToPoint < minDistance) {
+ minDistance = distanceToPoint;
+ closestPoint = pointPosition;
+ closestPointOffset = cellOffset;
}
- else if (d < da[1]) {
- da[3] = da[2];
- da[2] = da[1];
- da[1] = d;
-
- pa[3] = pa[2];
- pa[2] = pa[1];
- pa[1] = vp;
+ }
+ }
+ }
+
+ minDistance = 8.0;
+ vector3 closestPointToClosestPoint;
+ for (int k = -1; k <= 1; k++) {
+ for (int j = -1; j <= 1; j++) {
+ for (int i = -1; i <= 1; i++) {
+ if (i == 0 && j == 0 && k == 0) {
+ continue;
}
- else if (d < da[2]) {
- da[3] = da[2];
- da[2] = d;
+ vector3 cellOffset = vector3(i, j, k) + closestPointOffset;
+ vector3 pointPosition = cellOffset +
+ hash_vector3_to_vector3(cellPosition + cellOffset) * randomness;
+ float distanceToPoint = distance(closestPoint, pointPosition);
+ if (distanceToPoint < minDistance) {
+ minDistance = distanceToPoint;
+ closestPointToClosestPoint = pointPosition;
+ }
+ }
+ }
+ }
+ outRadius = distance(closestPointToClosestPoint, closestPoint) / 2.0;
+}
- pa[3] = pa[2];
- pa[2] = vp;
+/* **** 4D Voronoi **** */
+
+float voronoi_distance(vector4 a, vector4 b, string metric, float exponent)
+{
+ if (metric == "euclidean") {
+ return distance(a, b);
+ }
+ else if (metric == "manhattan") {
+ return abs(a.x - b.x) + abs(a.y - b.y) + abs(a.z - b.z) + abs(a.w - b.w);
+ }
+ else if (metric == "chebychev") {
+ return max(abs(a.x - b.x), max(abs(a.y - b.y), max(abs(a.z - b.z), abs(a.w - b.w))));
+ }
+ else if (metric == "minkowski") {
+ return pow(pow(abs(a.x - b.x), exponent) + pow(abs(a.y - b.y), exponent) +
+ pow(abs(a.z - b.z), exponent) + pow(abs(a.w - b.w), exponent),
+ 1.0 / exponent);
+ }
+ else {
+ return 0.0;
+ }
+}
+
+void voronoi_f1_4d(vector4 coord,
+ float exponent,
+ float randomness,
+ string metric,
+ output float outDistance,
+ output color outColor,
+ output vector4 outPosition)
+{
+ vector4 cellPosition = floor(coord);
+ vector4 localPosition = coord - cellPosition;
+
+ float minDistance = 8.0;
+ vector4 targetOffset, targetPosition;
+ for (int u = -1; u <= 1; u++) {
+ for (int k = -1; k <= 1; k++) {
+ for (int j = -1; j <= 1; j++) {
+ for (int i = -1; i <= 1; i++) {
+ vector4 cellOffset = vector4(i, j, k, u);
+ vector4 pointPosition = cellOffset +
+ hash_vector4_to_vector4(cellPosition + cellOffset) * randomness;
+ float distanceToPoint = voronoi_distance(pointPosition, localPosition, metric, exponent);
+ if (distanceToPoint < minDistance) {
+ targetOffset = cellOffset;
+ minDistance = distanceToPoint;
+ targetPosition = pointPosition;
+ }
}
- else if (d < da[3]) {
- da[3] = d;
- pa[3] = vp;
+ }
+ }
+ }
+ outDistance = minDistance;
+ outColor = hash_vector4_to_color(cellPosition + targetOffset);
+ outPosition = targetPosition + cellPosition;
+}
+
+void voronoi_smooth_f1_4d(vector4 coord,
+ float smoothness,
+ float exponent,
+ float randomness,
+ string metric,
+ output float outDistance,
+ output color outColor,
+ output vector4 outPosition)
+{
+ vector4 cellPosition = floor(coord);
+ vector4 localPosition = coord - cellPosition;
+
+ float smoothDistance = 8.0;
+ color smoothColor = color(0.0);
+ vector4 smoothPosition = vector4(0.0, 0.0, 0.0, 0.0);
+ for (int u = -2; u <= 2; u++) {
+ for (int k = -2; k <= 2; k++) {
+ for (int j = -2; j <= 2; j++) {
+ for (int i = -2; i <= 2; i++) {
+ vector4 cellOffset = vector4(i, j, k, u);
+ vector4 pointPosition = cellOffset +
+ hash_vector4_to_vector4(cellPosition + cellOffset) * randomness;
+ float distanceToPoint = voronoi_distance(pointPosition, localPosition, metric, exponent);
+ float h = smoothstep(
+ 0.0, 1.0, 0.5 + 0.5 * (smoothDistance - distanceToPoint) / smoothness);
+ float correctionFactor = smoothness * h * (1.0 - h);
+ smoothDistance = mix(smoothDistance, distanceToPoint, h) - correctionFactor;
+ correctionFactor /= 1.0 + 3.0 * smoothness;
+ color cellColor = hash_vector4_to_color(cellPosition + cellOffset);
+ smoothColor = mix(smoothColor, cellColor, h) - correctionFactor;
+ smoothPosition = mix(smoothPosition, pointPosition, h) - correctionFactor;
+ }
+ }
+ }
+ }
+ outDistance = smoothDistance;
+ outColor = smoothColor;
+ outPosition = cellPosition + smoothPosition;
+}
+
+void voronoi_f2_4d(vector4 coord,
+ float exponent,
+ float randomness,
+ string metric,
+ output float outDistance,
+ output color outColor,
+ output vector4 outPosition)
+{
+ vector4 cellPosition = floor(coord);
+ vector4 localPosition = coord - cellPosition;
+
+ float distanceF1 = 8.0;
+ float distanceF2 = 8.0;
+ vector4 offsetF1 = vector4(0.0, 0.0, 0.0, 0.0);
+ vector4 positionF1 = vector4(0.0, 0.0, 0.0, 0.0);
+ vector4 offsetF2, positionF2;
+ for (int u = -1; u <= 1; u++) {
+ for (int k = -1; k <= 1; k++) {
+ for (int j = -1; j <= 1; j++) {
+ for (int i = -1; i <= 1; i++) {
+ vector4 cellOffset = vector4(i, j, k, u);
+ vector4 pointPosition = cellOffset +
+ hash_vector4_to_vector4(cellPosition + cellOffset) * randomness;
+ float distanceToPoint = voronoi_distance(pointPosition, localPosition, metric, exponent);
+ if (distanceToPoint < distanceF1) {
+ distanceF2 = distanceF1;
+ distanceF1 = distanceToPoint;
+ offsetF2 = offsetF1;
+ offsetF1 = cellOffset;
+ positionF2 = positionF1;
+ positionF1 = pointPosition;
+ }
+ else if (distanceToPoint < distanceF2) {
+ distanceF2 = distanceToPoint;
+ offsetF2 = cellOffset;
+ positionF2 = pointPosition;
+ }
+ }
+ }
+ }
+ }
+ outDistance = distanceF2;
+ outColor = hash_vector4_to_color(cellPosition + offsetF2);
+ outPosition = positionF2 + cellPosition;
+}
+
+void voronoi_distance_to_edge_4d(vector4 coord, float randomness, output float outDistance)
+{
+ vector4 cellPosition = floor(coord);
+ vector4 localPosition = coord - cellPosition;
+
+ vector4 vectorToClosest;
+ float minDistance = 8.0;
+ for (int u = -1; u <= 1; u++) {
+ for (int k = -1; k <= 1; k++) {
+ for (int j = -1; j <= 1; j++) {
+ for (int i = -1; i <= 1; i++) {
+ vector4 cellOffset = vector4(i, j, k, u);
+ vector4 vectorToPoint = cellOffset +
+ hash_vector4_to_vector4(cellPosition + cellOffset) * randomness -
+ localPosition;
+ float distanceToPoint = dot(vectorToPoint, vectorToPoint);
+ if (distanceToPoint < minDistance) {
+ minDistance = distanceToPoint;
+ vectorToClosest = vectorToPoint;
+ }
}
}
}
}
+
+ minDistance = 8.0;
+ for (int u = -1; u <= 1; u++) {
+ for (int k = -1; k <= 1; k++) {
+ for (int j = -1; j <= 1; j++) {
+ for (int i = -1; i <= 1; i++) {
+ vector4 cellOffset = vector4(i, j, k, u);
+ vector4 vectorToPoint = cellOffset +
+ hash_vector4_to_vector4(cellPosition + cellOffset) * randomness -
+ localPosition;
+ vector4 perpendicularToEdge = vectorToPoint - vectorToClosest;
+ if (dot(perpendicularToEdge, perpendicularToEdge) > 0.0001) {
+ float distanceToEdge = dot((vectorToClosest + vectorToPoint) / 2.0,
+ normalize(perpendicularToEdge));
+ minDistance = min(minDistance, distanceToEdge);
+ }
+ }
+ }
+ }
+ }
+ outDistance = minDistance;
}
-/* Voronoi */
+void voronoi_n_sphere_radius_4d(vector4 coord, float randomness, output float outRadius)
+{
+ vector4 cellPosition = floor(coord);
+ vector4 localPosition = coord - cellPosition;
+
+ vector4 closestPoint;
+ vector4 closestPointOffset;
+ float minDistance = 8.0;
+ for (int u = -1; u <= 1; u++) {
+ for (int k = -1; k <= 1; k++) {
+ for (int j = -1; j <= 1; j++) {
+ for (int i = -1; i <= 1; i++) {
+ vector4 cellOffset = vector4(i, j, k, u);
+ vector4 pointPosition = cellOffset +
+ hash_vector4_to_vector4(cellPosition + cellOffset) * randomness;
+ float distanceToPoint = distance(pointPosition, localPosition);
+ if (distanceToPoint < minDistance) {
+ minDistance = distanceToPoint;
+ closestPoint = pointPosition;
+ closestPointOffset = cellOffset;
+ }
+ }
+ }
+ }
+ }
+
+ minDistance = 8.0;
+ vector4 closestPointToClosestPoint;
+ for (int u = -1; u <= 1; u++) {
+ for (int k = -1; k <= 1; k++) {
+ for (int j = -1; j <= 1; j++) {
+ for (int i = -1; i <= 1; i++) {
+ if (i == 0 && j == 0 && k == 0 && u == 0) {
+ continue;
+ }
+ vector4 cellOffset = vector4(i, j, k, u) + closestPointOffset;
+ vector4 pointPosition = cellOffset +
+ hash_vector4_to_vector4(cellPosition + cellOffset) * randomness;
+ float distanceToPoint = distance(closestPoint, pointPosition);
+ if (distanceToPoint < minDistance) {
+ minDistance = distanceToPoint;
+ closestPointToClosestPoint = pointPosition;
+ }
+ }
+ }
+ }
+ }
+ outRadius = distance(closestPointToClosestPoint, closestPoint) / 2.0;
+}
shader node_voronoi_texture(
int use_mapping = 0,
matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
- string coloring = "intensity",
- string metric = "distance",
- string feature = "F1",
- float Exponent = 1.0,
+ string dimensions = "3D",
+ string feature = "f1",
+ string metric = "euclidean",
+ vector3 Vector = P,
+ float WIn = 0.0,
float Scale = 5.0,
- point Vector = P,
- output float Fac = 0.0,
- output color Color = 0.0)
+ float Smoothness = 5.0,
+ float Exponent = 1.0,
+ float Randomness = 1.0,
+ output float Distance = 0.0,
+ output color Color = 0.0,
+ output vector3 Position = P,
+ output float WOut = 0.0,
+ output float Radius = 0.0)
{
- point p = Vector;
+ float randomness = clamp(Randomness, 0.0, 1.0);
+ float smoothness = clamp(Smoothness / 2.0, 0.0, 0.5);
+ vector3 coord = Vector;
if (use_mapping)
- p = transform(mapping, p);
-
- /* compute distance and point coordinate of 4 nearest neighbours */
- float da[4];
- point pa[4];
+ coord = transform(mapping, coord);
- /* compute distance and point coordinate of 4 nearest neighbours */
- voronoi_m(p * Scale, metric, Exponent, da, pa);
+ float w = WIn * Scale;
+ coord *= Scale;
- if (coloring == "intensity") {
- /* Intensity output */
- if (feature == "F1") {
- Fac = fabs(da[0]);
+ if (dimensions == "1D") {
+ if (feature == "f1") {
+ voronoi_f1_1d(w, Exponent, randomness, metric, Distance, Color, WOut);
}
- else if (feature == "F2") {
- Fac = fabs(da[1]);
+ else if (feature == "smooth_f1") {
+ voronoi_smooth_f1_1d(w, smoothness, Exponent, randomness, metric, Distance, Color, WOut);
}
- else if (feature == "F3") {
- Fac = fabs(da[2]);
+ else if (feature == "f2") {
+ voronoi_f2_1d(w, Exponent, randomness, metric, Distance, Color, WOut);
}
- else if (feature == "F4") {
- Fac = fabs(da[3]);
+ else if (feature == "distance_to_edge") {
+ voronoi_distance_to_edge_1d(w, randomness, Distance);
}
- else if (feature == "F2F1") {
- Fac = fabs(da[1] - da[0]);
+ else if (feature == "n_sphere_radius") {
+ voronoi_n_sphere_radius_1d(w, randomness, Radius);
}
- Color = color(Fac);
+ else {
+ error("Unknown feature!");
+ }
+ WOut = (Scale != 0.0) ? WOut / Scale : 0.0;
}
- else {
- /* Color output */
- if (feature == "F1") {
- Color = pa[0];
+ else if (dimensions == "2D") {
+ vector2 coord2D = vector2(coord[0], coord[1]);
+ vector2 outPosition2D;
+ if (feature == "f1") {
+ voronoi_f1_2d(coord2D, Exponent, randomness, metric, Distance, Color, outPosition2D);
}
- else if (feature == "F2") {
- Color = pa[1];
+ else if (feature == "smooth_f1") {
+ voronoi_smooth_f1_2d(
+ coord2D, smoothness, Exponent, randomness, metric, Distance, Color, outPosition2D);
}
- else if (feature == "F3") {
- Color = pa[2];
+ else if (feature == "f2") {
+ voronoi_f2_2d(coord2D, Exponent, randomness, metric, Distance, Color, outPosition2D);
}
- else if (feature == "F4") {
- Color = pa[3];
+ else if (feature == "distance_to_edge") {
+ voronoi_distance_to_edge_2d(coord2D, randomness, Distance);
}
- else if (feature == "F2F1") {
- Color = fabs(pa[1] - pa[0]);
+ else if (feature == "n_sphere_radius") {
+ voronoi_n_sphere_radius_2d(coord2D, randomness, Radius);
}
-
- Color = cellnoise_color(Color);
- Fac = (Color[0] + Color[1] + Color[2]) * (1.0 / 3.0);
+ else {
+ error("Unknown feature!");
+ }
+ outPosition2D = safe_divide(outPosition2D, Scale);
+ Position = vector3(outPosition2D.x, outPosition2D.y, 0.0);
+ }
+ else if (dimensions == "3D") {
+ if (feature == "f1") {
+ voronoi_f1_3d(coord, Exponent, randomness, metric, Distance, Color, Position);
+ }
+ else if (feature == "smooth_f1") {
+ voronoi_smooth_f1_3d(
+ coord, smoothness, Exponent, randomness, metric, Distance, Color, Position);
+ }
+ else if (feature == "f2") {
+ voronoi_f2_3d(coord, Exponent, randomness, metric, Distance, Color, Position);
+ }
+ else if (feature == "distance_to_edge") {
+ voronoi_distance_to_edge_3d(coord, randomness, Distance);
+ }
+ else if (feature == "n_sphere_radius") {
+ voronoi_n_sphere_radius_3d(coord, randomness, Radius);
+ }
+ else {
+ error("Unknown feature!");
+ }
+ Position = (Scale != 0.0) ? Position / Scale : vector3(0.0);
+ }
+ else if (dimensions == "4D") {
+ vector4 coord4D = vector4(coord[0], coord[1], coord[2], w);
+ vector4 outPosition4D;
+ if (feature == "f1") {
+ voronoi_f1_4d(coord4D, Exponent, randomness, metric, Distance, Color, outPosition4D);
+ }
+ else if (feature == "smooth_f1") {
+ voronoi_smooth_f1_4d(
+ coord4D, smoothness, Exponent, randomness, metric, Distance, Color, outPosition4D);
+ }
+ else if (feature == "f2") {
+ voronoi_f2_4d(coord4D, Exponent, randomness, metric, Distance, Color, outPosition4D);
+ }
+ else if (feature == "distance_to_edge") {
+ voronoi_distance_to_edge_4d(coord4D, randomness, Distance);
+ }
+ else if (feature == "n_sphere_radius") {
+ voronoi_n_sphere_radius_4d(coord4D, randomness, Radius);
+ }
+ else {
+ error("Unknown feature!");
+ }
+ outPosition4D = safe_divide(outPosition4D, Scale);
+ Position = vector3(outPosition4D.x, outPosition4D.y, outPosition4D.z);
+ WOut = outPosition4D.w;
+ }
+ else {
+ error("Unknown dimension!");
}
}
diff --git a/intern/cycles/kernel/shaders/node_voxel_texture.osl b/intern/cycles/kernel/shaders/node_voxel_texture.osl
index 0e4484561d8..14489298367 100644
--- a/intern/cycles/kernel/shaders/node_voxel_texture.osl
+++ b/intern/cycles/kernel/shaders/node_voxel_texture.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_voxel_texture(string filename = "",
string interpolation = "linear",
diff --git a/intern/cycles/kernel/shaders/node_wave_texture.osl b/intern/cycles/kernel/shaders/node_wave_texture.osl
index dfc2dbfb800..874bfb8d3af 100644
--- a/intern/cycles/kernel/shaders/node_wave_texture.osl
+++ b/intern/cycles/kernel/shaders/node_wave_texture.osl
@@ -14,45 +14,88 @@
* limitations under the License.
*/
-#include "stdosl.h"
-#include "node_texture.h"
+#include "node_noise.h"
+#include "stdcycles.h"
/* Wave */
-float wave(point p, string type, string profile, float detail, float distortion, float dscale)
+float wave(point p_input,
+ string type,
+ string bands_direction,
+ string rings_direction,
+ string profile,
+ float distortion,
+ float detail,
+ float dscale,
+ float droughness,
+ float phase)
{
+ /* Prevent precision issues on unit coordinates. */
+ point p = (p_input + 0.000001) * 0.999999;
+
float n = 0.0;
if (type == "bands") {
- n = (p[0] + p[1] + p[2]) * 10.0;
+ if (bands_direction == "x") {
+ n = p[0] * 20.0;
+ }
+ else if (bands_direction == "y") {
+ n = p[1] * 20.0;
+ }
+ else if (bands_direction == "z") {
+ n = p[2] * 20.0;
+ }
+ else { /* diagonal */
+ n = (p[0] + p[1] + p[2]) * 10.0;
+ }
}
else if (type == "rings") {
- n = length(p) * 20.0;
+ point rp = p;
+ if (rings_direction == "x") {
+ rp *= point(0.0, 1.0, 1.0);
+ }
+ else if (rings_direction == "y") {
+ rp *= point(1.0, 0.0, 1.0);
+ }
+ else if (rings_direction == "z") {
+ rp *= point(1.0, 1.0, 0.0);
+ }
+ /* else: "spherical" */
+
+ n = length(rp) * 20.0;
}
+ n += phase;
+
if (distortion != 0.0) {
- n = n + (distortion * noise_turbulence(p * dscale, detail, 0));
+ n = n + (distortion * (fractal_noise(p * dscale, detail, droughness) * 2.0 - 1.0));
}
if (profile == "sine") {
- return 0.5 + 0.5 * sin(n);
+ return 0.5 + 0.5 * sin(n - M_PI_2);
+ }
+ else if (profile == "saw") {
+ n /= M_2PI;
+ return n - floor(n);
}
- else {
- /* Saw profile */
+ else { /* profile tri */
n /= M_2PI;
- n -= (int)n;
- return (n < 0.0) ? n + 1.0 : n;
+ return abs(n - floor(n + 0.5)) * 2.0;
}
}
shader node_wave_texture(int use_mapping = 0,
matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
string type = "bands",
+ string bands_direction = "x",
+ string rings_direction = "x",
string profile = "sine",
float Scale = 5.0,
float Distortion = 0.0,
float Detail = 2.0,
float DetailScale = 1.0,
+ float DetailRoughness = 0.5,
+ float PhaseOffset = 0.0,
point Vector = P,
output float Fac = 0.0,
output color Color = 0.0)
@@ -62,6 +105,15 @@ shader node_wave_texture(int use_mapping = 0,
if (use_mapping)
p = transform(mapping, p);
- Fac = wave(p * Scale, type, profile, Detail, Distortion, DetailScale);
+ Fac = wave(p * Scale,
+ type,
+ bands_direction,
+ rings_direction,
+ profile,
+ Distortion,
+ Detail,
+ DetailScale,
+ DetailRoughness,
+ PhaseOffset);
Color = Fac;
}
diff --git a/intern/cycles/kernel/shaders/node_wavelength.osl b/intern/cycles/kernel/shaders/node_wavelength.osl
index c8c6eecb171..f484c4b4788 100644
--- a/intern/cycles/kernel/shaders/node_wavelength.osl
+++ b/intern/cycles/kernel/shaders/node_wavelength.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_wavelength(float Wavelength = 500.0, output color Color = 0.0)
{
diff --git a/intern/cycles/kernel/shaders/node_white_noise_texture.osl b/intern/cycles/kernel/shaders/node_white_noise_texture.osl
new file mode 100644
index 00000000000..94735a019d5
--- /dev/null
+++ b/intern/cycles/kernel/shaders/node_white_noise_texture.osl
@@ -0,0 +1,49 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "node_hash.h"
+#include "stdcycles.h"
+#include "vector2.h"
+#include "vector4.h"
+
+#define vector3 point
+
+shader node_white_noise_texture(string dimensions = "3D",
+ point Vector = point(0.0, 0.0, 0.0),
+ float W = 0.0,
+ output float Value = 0.0,
+ output color Color = 0.0)
+{
+ if (dimensions == "1D") {
+ Value = noise("hash", W);
+ Color = hash_float_to_color(W);
+ }
+ else if (dimensions == "2D") {
+ Value = noise("hash", Vector[0], Vector[1]);
+ Color = hash_vector2_to_color(vector2(Vector[0], Vector[1]));
+ }
+ else if (dimensions == "3D") {
+ Value = noise("hash", Vector);
+ Color = hash_vector3_to_color(vector3(Vector[0], Vector[1], Vector[2]));
+ }
+ else if (dimensions == "4D") {
+ Value = noise("hash", Vector, W);
+ Color = hash_vector4_to_color(vector4(Vector[0], Vector[1], Vector[2], W));
+ }
+ else {
+ warning("%s", "Unknown dimension!");
+ }
+}
diff --git a/intern/cycles/kernel/shaders/node_wireframe.osl b/intern/cycles/kernel/shaders/node_wireframe.osl
index ea4bd3a4c87..673a451c928 100644
--- a/intern/cycles/kernel/shaders/node_wireframe.osl
+++ b/intern/cycles/kernel/shaders/node_wireframe.osl
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include "stdosl.h"
#include "oslutil.h"
+#include "stdcycles.h"
shader node_wireframe(string bump_offset = "center",
int use_pixel_size = 0,
diff --git a/intern/cycles/kernel/shaders/oslutil.h b/intern/cycles/kernel/shaders/oslutil.h
deleted file mode 100644
index d48bfa4a665..00000000000
--- a/intern/cycles/kernel/shaders/oslutil.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Adapted from Open Shading Language with this license:
- *
- * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al.
- * All Rights Reserved.
- *
- * Modifications Copyright 2011, Blender Foundation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Sony Pictures Imageworks nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef CCL_OSLUTIL_H
-#define CCL_OSLUTIL_H
-
-// Return wireframe opacity factor [0, 1] given a geometry type in
-// ("triangles", "polygons" or "patches"), and a line_width in raster
-// or world space depending on the last (raster) boolean argument.
-//
-float wireframe(string edge_type, float line_width, int raster)
-{
- // ray differentials are so big in diffuse context that this function would always return "wire"
- if (raytype("path:diffuse"))
- return 0.0;
-
- int np = 0;
- point p[64];
- float pixelWidth = 1;
-
- if (edge_type == "triangles") {
- np = 3;
- if (!getattribute("geom:trianglevertices", p))
- return 0.0;
- }
- else if (edge_type == "polygons" || edge_type == "patches") {
- getattribute("geom:numpolyvertices", np);
- if (np < 3 || !getattribute("geom:polyvertices", p))
- return 0.0;
- }
-
- if (raster) {
- // Project the derivatives of P to the viewing plane defined
- // by I so we have a measure of how big is a pixel at this point
- float pixelWidthX = length(Dx(P) - dot(Dx(P), I) * I);
- float pixelWidthY = length(Dy(P) - dot(Dy(P), I) * I);
- // Take the average of both axis' length
- pixelWidth = (pixelWidthX + pixelWidthY) / 2;
- }
-
- // Use half the width as the neighbor face will render the
- // other half. And take the square for fast comparison
- pixelWidth *= 0.5 * line_width;
- pixelWidth *= pixelWidth;
- for (int i = 0; i < np; i++) {
- int i2 = i ? i - 1 : np - 1;
- vector dir = P - p[i];
- vector edge = p[i] - p[i2];
- vector crs = cross(edge, dir);
- // At this point dot(crs, crs) / dot(edge, edge) is
- // the square of area / length(edge) == square of the
- // distance to the edge.
- if (dot(crs, crs) < (dot(edge, edge) * pixelWidth))
- return 1;
- }
- return 0;
-}
-
-float wireframe(string edge_type, float line_width)
-{
- return wireframe(edge_type, line_width, 1);
-}
-float wireframe(string edge_type)
-{
- return wireframe(edge_type, 1.0, 1);
-}
-float wireframe()
-{
- return wireframe("polygons", 1.0, 1);
-}
-
-#endif /* CCL_OSLUTIL_H */
diff --git a/intern/cycles/kernel/shaders/stdcycles.h b/intern/cycles/kernel/shaders/stdcycles.h
new file mode 100644
index 00000000000..dd604da68ce
--- /dev/null
+++ b/intern/cycles/kernel/shaders/stdcycles.h
@@ -0,0 +1,150 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+// * Neither the name of Sony Pictures Imageworks nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/////////////////////////////////////////////////////////////////////////////
+
+#ifndef CCL_STDCYCLESOSL_H
+#define CCL_STDCYCLESOSL_H
+
+#include "stdosl.h"
+
+// Declaration of built-in functions and closures, stdosl.h does not make
+// these available so we have to redefine them.
+#define BUILTIN [[int builtin = 1]]
+#define BUILTIN_DERIV [[ int builtin = 1, int deriv = 1 ]]
+
+closure color diffuse_ramp(normal N, color colors[8]) BUILTIN;
+closure color phong_ramp(normal N, float exponent, color colors[8]) BUILTIN;
+closure color diffuse_toon(normal N, float size, float smooth) BUILTIN;
+closure color glossy_toon(normal N, float size, float smooth) BUILTIN;
+closure color microfacet_ggx(normal N, float ag) BUILTIN;
+closure color microfacet_ggx_aniso(normal N, vector T, float ax, float ay) BUILTIN;
+closure color microfacet_ggx_refraction(normal N, float ag, float eta) BUILTIN;
+closure color microfacet_multi_ggx(normal N, float ag, color C) BUILTIN;
+closure color microfacet_multi_ggx_aniso(normal N, vector T, float ax, float ay, color C) BUILTIN;
+closure color microfacet_multi_ggx_glass(normal N, float ag, float eta, color C) BUILTIN;
+closure color microfacet_ggx_fresnel(normal N, float ag, float eta, color C, color Cspec0) BUILTIN;
+closure color microfacet_ggx_aniso_fresnel(
+ normal N, vector T, float ax, float ay, float eta, color C, color Cspec0) BUILTIN;
+closure color
+microfacet_multi_ggx_fresnel(normal N, float ag, float eta, color C, color Cspec0) BUILTIN;
+closure color microfacet_multi_ggx_aniso_fresnel(
+ normal N, vector T, float ax, float ay, float eta, color C, color Cspec0) BUILTIN;
+closure color
+microfacet_multi_ggx_glass_fresnel(normal N, float ag, float eta, color C, color Cspec0) BUILTIN;
+closure color microfacet_beckmann(normal N, float ab) BUILTIN;
+closure color microfacet_beckmann_aniso(normal N, vector T, float ax, float ay) BUILTIN;
+closure color microfacet_beckmann_refraction(normal N, float ab, float eta) BUILTIN;
+closure color ashikhmin_shirley(normal N, vector T, float ax, float ay) BUILTIN;
+closure color ashikhmin_velvet(normal N, float sigma) BUILTIN;
+closure color ambient_occlusion() BUILTIN;
+closure color principled_diffuse(normal N, float roughness) BUILTIN;
+closure color principled_sheen(normal N) BUILTIN;
+closure color principled_clearcoat(normal N, float clearcoat, float clearcoat_roughness) BUILTIN;
+
+// BSSRDF
+closure color bssrdf(string method, normal N, vector radius, color albedo) BUILTIN;
+
+// Hair
+closure color
+hair_reflection(normal N, float roughnessu, float roughnessv, vector T, float offset) BUILTIN;
+closure color
+hair_transmission(normal N, float roughnessu, float roughnessv, vector T, float offset) BUILTIN;
+closure color principled_hair(normal N,
+ color sigma,
+ float roughnessu,
+ float roughnessv,
+ float coat,
+ float alpha,
+ float eta) BUILTIN;
+
+// Volume
+closure color henyey_greenstein(float g) BUILTIN;
+closure color absorption() BUILTIN;
+
+normal ensure_valid_reflection(normal Ng, vector I, normal N)
+{
+ /* The implementation here mirrors the one in kernel_montecarlo.h,
+ * check there for an explanation of the algorithm. */
+
+ float sqr(float x)
+ {
+ return x * x;
+ }
+
+ vector R = 2 * dot(N, I) * N - I;
+
+ float threshold = min(0.9 * dot(Ng, I), 0.01);
+ if (dot(Ng, R) >= threshold) {
+ return N;
+ }
+
+ float NdotNg = dot(N, Ng);
+ vector X = normalize(N - NdotNg * Ng);
+
+ float Ix = dot(I, X), Iz = dot(I, Ng);
+ float Ix2 = sqr(Ix), Iz2 = sqr(Iz);
+ float a = Ix2 + Iz2;
+
+ float b = sqrt(Ix2 * (a - sqr(threshold)));
+ float c = Iz * threshold + a;
+
+ float fac = 0.5 / a;
+ float N1_z2 = fac * (b + c), N2_z2 = fac * (-b + c);
+ int valid1 = (N1_z2 > 1e-5) && (N1_z2 <= (1.0 + 1e-5));
+ int valid2 = (N2_z2 > 1e-5) && (N2_z2 <= (1.0 + 1e-5));
+
+ float N_new_x, N_new_z;
+ if (valid1 && valid2) {
+ float N1_x = sqrt(1.0 - N1_z2), N1_z = sqrt(N1_z2);
+ float N2_x = sqrt(1.0 - N2_z2), N2_z = sqrt(N2_z2);
+
+ float R1 = 2 * (N1_x * Ix + N1_z * Iz) * N1_z - Iz;
+ float R2 = 2 * (N2_x * Ix + N2_z * Iz) * N2_z - Iz;
+
+ valid1 = (R1 >= 1e-5);
+ valid2 = (R2 >= 1e-5);
+ if (valid1 && valid2) {
+ N_new_x = (R1 < R2) ? N1_x : N2_x;
+ N_new_z = (R1 < R2) ? N1_z : N2_z;
+ }
+ else {
+ N_new_x = (R1 > R2) ? N1_x : N2_x;
+ N_new_z = (R1 > R2) ? N1_z : N2_z;
+ }
+ }
+ else if (valid1 || valid2) {
+ float Nz2 = valid1 ? N1_z2 : N2_z2;
+ N_new_x = sqrt(1.0 - Nz2);
+ N_new_z = sqrt(Nz2);
+ }
+ else {
+ return Ng;
+ }
+
+ return N_new_x * X + N_new_z * Ng;
+}
+
+#endif /* CCL_STDOSL_H */
diff --git a/intern/cycles/kernel/shaders/stdosl.h b/intern/cycles/kernel/shaders/stdosl.h
deleted file mode 100644
index 9b9720ffff9..00000000000
--- a/intern/cycles/kernel/shaders/stdosl.h
+++ /dev/null
@@ -1,853 +0,0 @@
-/////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al. All Rights Reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-// * Neither the name of Sony Pictures Imageworks nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-/////////////////////////////////////////////////////////////////////////////
-
-#ifndef CCL_STDOSL_H
-#define CCL_STDOSL_H
-
-#ifndef M_PI
-# define M_PI 3.1415926535897932 /* pi */
-# define M_PI_2 1.5707963267948966 /* pi/2 */
-# define M_PI_4 0.7853981633974483 /* pi/4 */
-# define M_2_PI 0.6366197723675813 /* 2/pi */
-# define M_2PI 6.2831853071795865 /* 2*pi */
-# define M_4PI 12.566370614359173 /* 4*pi */
-# define M_2_SQRTPI 1.1283791670955126 /* 2/sqrt(pi) */
-# define M_E 2.7182818284590452 /* e (Euler's number) */
-# define M_LN2 0.6931471805599453 /* ln(2) */
-# define M_LN10 2.3025850929940457 /* ln(10) */
-# define M_LOG2E 1.4426950408889634 /* log_2(e) */
-# define M_LOG10E 0.4342944819032518 /* log_10(e) */
-# define M_SQRT2 1.4142135623730950 /* sqrt(2) */
-# define M_SQRT1_2 0.7071067811865475 /* 1/sqrt(2) */
-#endif
-
-// Declaration of built-in functions and closures
-#define BUILTIN [[int builtin = 1]]
-#define BUILTIN_DERIV [[ int builtin = 1, int deriv = 1 ]]
-
-#define PERCOMP1(name) \
- normal name(normal x) BUILTIN; \
- vector name(vector x) BUILTIN; \
- point name(point x) BUILTIN; \
- color name(color x) BUILTIN; \
- float name(float x) BUILTIN;
-
-#define PERCOMP2(name) \
- normal name(normal x, normal y) BUILTIN; \
- vector name(vector x, vector y) BUILTIN; \
- point name(point x, point y) BUILTIN; \
- color name(color x, color y) BUILTIN; \
- float name(float x, float y) BUILTIN;
-
-#define PERCOMP2F(name) \
- normal name(normal x, float y) BUILTIN; \
- vector name(vector x, float y) BUILTIN; \
- point name(point x, float y) BUILTIN; \
- color name(color x, float y) BUILTIN; \
- float name(float x, float y) BUILTIN;
-
-// Basic math
-normal degrees(normal x)
-{
- return x * (180.0 / M_PI);
-}
-vector degrees(vector x)
-{
- return x * (180.0 / M_PI);
-}
-point degrees(point x)
-{
- return x * (180.0 / M_PI);
-}
-color degrees(color x)
-{
- return x * (180.0 / M_PI);
-}
-float degrees(float x)
-{
- return x * (180.0 / M_PI);
-}
-normal radians(normal x)
-{
- return x * (M_PI / 180.0);
-}
-vector radians(vector x)
-{
- return x * (M_PI / 180.0);
-}
-point radians(point x)
-{
- return x * (M_PI / 180.0);
-}
-color radians(color x)
-{
- return x * (M_PI / 180.0);
-}
-float radians(float x)
-{
- return x * (M_PI / 180.0);
-}
-PERCOMP1(cos)
-PERCOMP1(sin)
-PERCOMP1(tan)
-PERCOMP1(acos)
-PERCOMP1(asin)
-PERCOMP1(atan)
-PERCOMP2(atan2)
-PERCOMP1(cosh)
-PERCOMP1(sinh)
-PERCOMP1(tanh)
-PERCOMP2F(pow)
-PERCOMP1(exp)
-PERCOMP1(exp2)
-PERCOMP1(expm1)
-PERCOMP1(log)
-point log(point a, float b)
-{
- return log(a) / log(b);
-}
-vector log(vector a, float b)
-{
- return log(a) / log(b);
-}
-color log(color a, float b)
-{
- return log(a) / log(b);
-}
-float log(float a, float b)
-{
- return log(a) / log(b);
-}
-PERCOMP1(log2)
-PERCOMP1(log10)
-PERCOMP1(logb)
-PERCOMP1(sqrt)
-PERCOMP1(inversesqrt)
-float hypot(float a, float b)
-{
- return sqrt(a * a + b * b);
-}
-float hypot(float a, float b, float c)
-{
- return sqrt(a * a + b * b + c * c);
-}
-PERCOMP1(abs)
-int abs(int x) BUILTIN;
-PERCOMP1(fabs)
-int fabs(int x) BUILTIN;
-PERCOMP1(sign)
-PERCOMP1(floor)
-PERCOMP1(ceil)
-PERCOMP1(round)
-PERCOMP1(trunc)
-PERCOMP2(fmod)
-PERCOMP2F(fmod)
-int mod(int a, int b)
-{
- return a - b * (int)floor(a / b);
-}
-point mod(point a, point b)
-{
- return a - b * floor(a / b);
-}
-vector mod(vector a, vector b)
-{
- return a - b * floor(a / b);
-}
-normal mod(normal a, normal b)
-{
- return a - b * floor(a / b);
-}
-color mod(color a, color b)
-{
- return a - b * floor(a / b);
-}
-point mod(point a, float b)
-{
- return a - b * floor(a / b);
-}
-vector mod(vector a, float b)
-{
- return a - b * floor(a / b);
-}
-normal mod(normal a, float b)
-{
- return a - b * floor(a / b);
-}
-color mod(color a, float b)
-{
- return a - b * floor(a / b);
-}
-float mod(float a, float b)
-{
- return a - b * floor(a / b);
-}
-PERCOMP2(min)
-int min(int a, int b) BUILTIN;
-PERCOMP2(max)
-int max(int a, int b) BUILTIN;
-normal clamp(normal x, normal minval, normal maxval)
-{
- return max(min(x, maxval), minval);
-}
-vector clamp(vector x, vector minval, vector maxval)
-{
- return max(min(x, maxval), minval);
-}
-point clamp(point x, point minval, point maxval)
-{
- return max(min(x, maxval), minval);
-}
-color clamp(color x, color minval, color maxval)
-{
- return max(min(x, maxval), minval);
-}
-float clamp(float x, float minval, float maxval)
-{
- return max(min(x, maxval), minval);
-}
-int clamp(int x, int minval, int maxval)
-{
- return max(min(x, maxval), minval);
-}
-#if 0
-normal mix (normal x, normal y, normal a) { return x*(1-a) + y*a; }
-normal mix (normal x, normal y, float a) { return x*(1-a) + y*a; }
-vector mix (vector x, vector y, vector a) { return x*(1-a) + y*a; }
-vector mix (vector x, vector y, float a) { return x*(1-a) + y*a; }
-point mix (point x, point y, point a) { return x*(1-a) + y*a; }
-point mix (point x, point y, float a) { return x*(1-a) + y*a; }
-color mix (color x, color y, color a) { return x*(1-a) + y*a; }
-color mix (color x, color y, float a) { return x*(1-a) + y*a; }
-float mix (float x, float y, float a) { return x*(1-a) + y*a; }
-#else
-normal mix(normal x, normal y, normal a) BUILTIN;
-normal mix(normal x, normal y, float a) BUILTIN;
-vector mix(vector x, vector y, vector a) BUILTIN;
-vector mix(vector x, vector y, float a) BUILTIN;
-point mix(point x, point y, point a) BUILTIN;
-point mix(point x, point y, float a) BUILTIN;
-color mix(color x, color y, color a) BUILTIN;
-color mix(color x, color y, float a) BUILTIN;
-float mix(float x, float y, float a) BUILTIN;
-#endif
-int isnan(float x) BUILTIN;
-int isinf(float x) BUILTIN;
-int isfinite(float x) BUILTIN;
-float erf(float x) BUILTIN;
-float erfc(float x) BUILTIN;
-
-// Vector functions
-
-vector cross(vector a, vector b) BUILTIN;
-float dot(vector a, vector b) BUILTIN;
-float length(vector v) BUILTIN;
-float distance(point a, point b) BUILTIN;
-float distance(point a, point b, point q)
-{
- vector d = b - a;
- float dd = dot(d, d);
- if (dd == 0.0)
- return distance(q, a);
- float t = dot(q - a, d) / dd;
- return distance(q, a + clamp(t, 0.0, 1.0) * d);
-}
-normal normalize(normal v) BUILTIN;
-vector normalize(vector v) BUILTIN;
-vector faceforward(vector N, vector I, vector Nref) BUILTIN;
-vector faceforward(vector N, vector I) BUILTIN;
-vector reflect(vector I, vector N)
-{
- return I - 2 * dot(N, I) * N;
-}
-vector refract(vector I, vector N, float eta)
-{
- float IdotN = dot(I, N);
- float k = 1 - eta * eta * (1 - IdotN * IdotN);
- return (k < 0) ? vector(0, 0, 0) : (eta * I - N * (eta * IdotN + sqrt(k)));
-}
-void fresnel(vector I,
- normal N,
- float eta,
- output float Kr,
- output float Kt,
- output vector R,
- output vector T)
-{
- float sqr(float x)
- {
- return x * x;
- }
- float c = dot(I, N);
- if (c < 0)
- c = -c;
- R = reflect(I, N);
- float g = 1.0 / sqr(eta) - 1.0 + c * c;
- if (g >= 0.0) {
- g = sqrt(g);
- float beta = g - c;
- float F = (c * (g + c) - 1.0) / (c * beta + 1.0);
- F = 0.5 * (1.0 + sqr(F));
- F *= sqr(beta / (g + c));
- Kr = F;
- Kt = (1.0 - Kr) * eta * eta;
- // OPT: the following recomputes some of the above values, but it
- // gives us the same result as if the shader-writer called refract()
- T = refract(I, N, eta);
- }
- else {
- // total internal reflection
- Kr = 1.0;
- Kt = 0.0;
- T = vector(0, 0, 0);
- }
-}
-
-void fresnel(vector I, normal N, float eta, output float Kr, output float Kt)
-{
- vector R, T;
- fresnel(I, N, eta, Kr, Kt, R, T);
-}
-
-normal transform(matrix Mto, normal p) BUILTIN;
-vector transform(matrix Mto, vector p) BUILTIN;
-point transform(matrix Mto, point p) BUILTIN;
-normal transform(string from, string to, normal p) BUILTIN;
-vector transform(string from, string to, vector p) BUILTIN;
-point transform(string from, string to, point p) BUILTIN;
-normal transform(string to, normal p)
-{
- return transform("common", to, p);
-}
-vector transform(string to, vector p)
-{
- return transform("common", to, p);
-}
-point transform(string to, point p)
-{
- return transform("common", to, p);
-}
-
-float transformu(string tounits, float x) BUILTIN;
-float transformu(string fromunits, string tounits, float x) BUILTIN;
-
-point rotate(point p, float angle, point a, point b)
-{
- vector axis = normalize(b - a);
- float cosang, sinang;
- /* Older OSX has major issues with sincos() function,
- * it's likely a big in OSL or LLVM. For until we've
- * updated to new versions of this libraries we'll
- * use a workaround to prevent possible crashes on all
- * the platforms.
- *
- * Shouldn't be that bad because it's mainly used for
- * anisotropic shader where angle is usually constant.
- */
-#if 0
- sincos (angle, sinang, cosang);
-#else
- sinang = sin(angle);
- cosang = cos(angle);
-#endif
- float cosang1 = 1.0 - cosang;
- float x = axis[0], y = axis[1], z = axis[2];
- matrix M = matrix(x * x + (1.0 - x * x) * cosang,
- x * y * cosang1 + z * sinang,
- x * z * cosang1 - y * sinang,
- 0.0,
- x * y * cosang1 - z * sinang,
- y * y + (1.0 - y * y) * cosang,
- y * z * cosang1 + x * sinang,
- 0.0,
- x * z * cosang1 + y * sinang,
- y * z * cosang1 - x * sinang,
- z * z + (1.0 - z * z) * cosang,
- 0.0,
- 0.0,
- 0.0,
- 0.0,
- 1.0);
- return transform(M, p - a) + a;
-}
-
-normal ensure_valid_reflection(normal Ng, vector I, normal N)
-{
- /* The implementation here mirrors the one in kernel_montecarlo.h,
- * check there for an explanation of the algorithm. */
-
- float sqr(float x)
- {
- return x * x;
- }
-
- vector R = 2 * dot(N, I) * N - I;
-
- float threshold = min(0.9 * dot(Ng, I), 0.01);
- if (dot(Ng, R) >= threshold) {
- return N;
- }
-
- float NdotNg = dot(N, Ng);
- vector X = normalize(N - NdotNg * Ng);
-
- float Ix = dot(I, X), Iz = dot(I, Ng);
- float Ix2 = sqr(Ix), Iz2 = sqr(Iz);
- float a = Ix2 + Iz2;
-
- float b = sqrt(Ix2 * (a - sqr(threshold)));
- float c = Iz * threshold + a;
-
- float fac = 0.5 / a;
- float N1_z2 = fac * (b + c), N2_z2 = fac * (-b + c);
- int valid1 = (N1_z2 > 1e-5) && (N1_z2 <= (1.0 + 1e-5));
- int valid2 = (N2_z2 > 1e-5) && (N2_z2 <= (1.0 + 1e-5));
-
- float N_new_x, N_new_z;
- if (valid1 && valid2) {
- float N1_x = sqrt(1.0 - N1_z2), N1_z = sqrt(N1_z2);
- float N2_x = sqrt(1.0 - N2_z2), N2_z = sqrt(N2_z2);
-
- float R1 = 2 * (N1_x * Ix + N1_z * Iz) * N1_z - Iz;
- float R2 = 2 * (N2_x * Ix + N2_z * Iz) * N2_z - Iz;
-
- valid1 = (R1 >= 1e-5);
- valid2 = (R2 >= 1e-5);
- if (valid1 && valid2) {
- N_new_x = (R1 < R2) ? N1_x : N2_x;
- N_new_z = (R1 < R2) ? N1_z : N2_z;
- }
- else {
- N_new_x = (R1 > R2) ? N1_x : N2_x;
- N_new_z = (R1 > R2) ? N1_z : N2_z;
- }
- }
- else if (valid1 || valid2) {
- float Nz2 = valid1 ? N1_z2 : N2_z2;
- N_new_x = sqrt(1.0 - Nz2);
- N_new_z = sqrt(Nz2);
- }
- else {
- return Ng;
- }
-
- return N_new_x * X + N_new_z * Ng;
-}
-
-// Color functions
-
-float luminance(color c) BUILTIN;
-color blackbody(float temperatureK) BUILTIN;
-color wavelength_color(float wavelength_nm) BUILTIN;
-
-color transformc(string to, color x)
-{
- color rgb_to_hsv(color rgb)
- { // See Foley & van Dam
- float r = rgb[0], g = rgb[1], b = rgb[2];
- float mincomp = min(r, min(g, b));
- float maxcomp = max(r, max(g, b));
- float delta = maxcomp - mincomp; // chroma
- float h, s, v;
- v = maxcomp;
- if (maxcomp > 0)
- s = delta / maxcomp;
- else
- s = 0;
- if (s <= 0)
- h = 0;
- else {
- if (r >= maxcomp)
- h = (g - b) / delta;
- else if (g >= maxcomp)
- h = 2 + (b - r) / delta;
- else
- h = 4 + (r - g) / delta;
- h /= 6;
- if (h < 0)
- h += 1;
- }
- return color(h, s, v);
- }
-
- color rgb_to_hsl(color rgb)
- { // See Foley & van Dam
- // First convert rgb to hsv, then to hsl
- float minval = min(rgb[0], min(rgb[1], rgb[2]));
- color hsv = rgb_to_hsv(rgb);
- float maxval = hsv[2]; // v == maxval
- float h = hsv[0], s, l = (minval + maxval) / 2;
- if (minval == maxval)
- s = 0; // special 'achromatic' case, hue is 0
- else if (l <= 0.5)
- s = (maxval - minval) / (maxval + minval);
- else
- s = (maxval - minval) / (2 - maxval - minval);
- return color(h, s, l);
- }
-
- color r;
- if (to == "rgb" || to == "RGB")
- r = x;
- else if (to == "hsv")
- r = rgb_to_hsv(x);
- else if (to == "hsl")
- r = rgb_to_hsl(x);
- else if (to == "YIQ")
- r = color(dot(vector(0.299, 0.587, 0.114), (vector)x),
- dot(vector(0.596, -0.275, -0.321), (vector)x),
- dot(vector(0.212, -0.523, 0.311), (vector)x));
- else if (to == "XYZ")
- r = color(dot(vector(0.412453, 0.357580, 0.180423), (vector)x),
- dot(vector(0.212671, 0.715160, 0.072169), (vector)x),
- dot(vector(0.019334, 0.119193, 0.950227), (vector)x));
- else {
- error("Unknown color space \"%s\"", to);
- r = x;
- }
- return r;
-}
-
-color transformc(string from, string to, color x)
-{
- color hsv_to_rgb(color c)
- { // Reference: Foley & van Dam
- float h = c[0], s = c[1], v = c[2];
- color r;
- if (s < 0.0001) {
- r = v;
- }
- else {
- h = 6 * (h - floor(h)); // expand to [0..6)
- int hi = (int)h;
- float f = h - hi;
- float p = v * (1 - s);
- float q = v * (1 - s * f);
- float t = v * (1 - s * (1 - f));
- if (hi == 0)
- r = color(v, t, p);
- else if (hi == 1)
- r = color(q, v, p);
- else if (hi == 2)
- r = color(p, v, t);
- else if (hi == 3)
- r = color(p, q, v);
- else if (hi == 4)
- r = color(t, p, v);
- else
- r = color(v, p, q);
- }
- return r;
- }
-
- color hsl_to_rgb(color c)
- {
- float h = c[0], s = c[1], l = c[2];
- // Easiest to convert hsl -> hsv, then hsv -> RGB (per Foley & van Dam)
- float v = (l <= 0.5) ? (l * (1 + s)) : (l * (1 - s) + s);
- color r;
- if (v <= 0) {
- r = 0;
- }
- else {
- float min = 2 * l - v;
- s = (v - min) / v;
- r = hsv_to_rgb(color(h, s, v));
- }
- return r;
- }
-
- color r;
- if (from == "rgb" || from == "RGB")
- r = x;
- else if (from == "hsv")
- r = hsv_to_rgb(x);
- else if (from == "hsl")
- r = hsl_to_rgb(x);
- else if (from == "YIQ")
- r = color(dot(vector(1, 0.9557, 0.6199), (vector)x),
- dot(vector(1, -0.2716, -0.6469), (vector)x),
- dot(vector(1, -1.1082, 1.7051), (vector)x));
- else if (from == "XYZ")
- r = color(dot(vector(3.240479, -1.537150, -0.498535), (vector)x),
- dot(vector(-0.969256, 1.875991, 0.041556), (vector)x),
- dot(vector(0.055648, -0.204043, 1.057311), (vector)x));
- else {
- error("Unknown color space \"%s\"", to);
- r = x;
- }
- return transformc(to, r);
-}
-
-// Matrix functions
-
-float determinant(matrix m) BUILTIN;
-matrix transpose(matrix m) BUILTIN;
-
-// Pattern generation
-
-color step(color edge, color x) BUILTIN;
-point step(point edge, point x) BUILTIN;
-vector step(vector edge, vector x) BUILTIN;
-normal step(normal edge, normal x) BUILTIN;
-float step(float edge, float x) BUILTIN;
-float smoothstep(float edge0, float edge1, float x) BUILTIN;
-
-float linearstep(float edge0, float edge1, float x)
-{
- float result;
- if (edge0 != edge1) {
- float xclamped = clamp(x, edge0, edge1);
- result = (xclamped - edge0) / (edge1 - edge0);
- }
- else { // special case: edges coincide
- result = step(edge0, x);
- }
- return result;
-}
-
-float smooth_linearstep(float edge0, float edge1, float x_, float eps_)
-{
- float result;
- if (edge0 != edge1) {
- float rampup(float x, float r)
- {
- return 0.5 / r * x * x;
- }
- float width_inv = 1.0 / (edge1 - edge0);
- float eps = eps_ * width_inv;
- float x = (x_ - edge0) * width_inv;
- if (x <= -eps)
- result = 0;
- else if (x >= eps && x <= 1.0 - eps)
- result = x;
- else if (x >= 1.0 + eps)
- result = 1;
- else if (x < eps)
- result = rampup(x + eps, 2.0 * eps);
- else /* if (x < 1.0+eps) */
- result = 1.0 - rampup(1.0 + eps - x, 2.0 * eps);
- }
- else {
- result = step(edge0, x_);
- }
- return result;
-}
-
-float aastep(float edge, float s, float dedge, float ds)
-{
- // Box filtered AA step
- float width = fabs(dedge) + fabs(ds);
- float halfwidth = 0.5 * width;
- float e1 = edge - halfwidth;
- return (s <= e1) ? 0.0 : ((s >= (edge + halfwidth)) ? 1.0 : (s - e1) / width);
-}
-float aastep(float edge, float s, float ds)
-{
- return aastep(edge, s, filterwidth(edge), ds);
-}
-float aastep(float edge, float s)
-{
- return aastep(edge, s, filterwidth(edge), filterwidth(s));
-}
-
-// Derivatives and area operators
-
-// Displacement functions
-
-// String functions
-int strlen(string s) BUILTIN;
-int hash(string s) BUILTIN;
-int getchar(string s, int index) BUILTIN;
-int startswith(string s, string prefix) BUILTIN;
-int endswith(string s, string suffix) BUILTIN;
-string substr(string s, int start, int len) BUILTIN;
-string substr(string s, int start)
-{
- return substr(s, start, strlen(s));
-}
-float stof(string str) BUILTIN;
-int stoi(string str) BUILTIN;
-
-// Define concat in terms of shorter concat
-string concat(string a, string b, string c)
-{
- return concat(concat(a, b), c);
-}
-string concat(string a, string b, string c, string d)
-{
- return concat(concat(a, b, c), d);
-}
-string concat(string a, string b, string c, string d, string e)
-{
- return concat(concat(a, b, c, d), e);
-}
-string concat(string a, string b, string c, string d, string e, string f)
-{
- return concat(concat(a, b, c, d, e), f);
-}
-
-// Texture
-
-// Closures
-
-closure color diffuse(normal N) BUILTIN;
-closure color oren_nayar(normal N, float sigma) BUILTIN;
-closure color diffuse_ramp(normal N, color colors[8]) BUILTIN;
-closure color phong_ramp(normal N, float exponent, color colors[8]) BUILTIN;
-closure color diffuse_toon(normal N, float size, float smooth) BUILTIN;
-closure color glossy_toon(normal N, float size, float smooth) BUILTIN;
-closure color translucent(normal N) BUILTIN;
-closure color reflection(normal N) BUILTIN;
-closure color refraction(normal N, float eta) BUILTIN;
-closure color transparent() BUILTIN;
-closure color microfacet_ggx(normal N, float ag) BUILTIN;
-closure color microfacet_ggx_aniso(normal N, vector T, float ax, float ay) BUILTIN;
-closure color microfacet_ggx_refraction(normal N, float ag, float eta) BUILTIN;
-closure color microfacet_multi_ggx(normal N, float ag, color C) BUILTIN;
-closure color microfacet_multi_ggx_aniso(normal N, vector T, float ax, float ay, color C) BUILTIN;
-closure color microfacet_multi_ggx_glass(normal N, float ag, float eta, color C) BUILTIN;
-closure color microfacet_ggx_fresnel(normal N, float ag, float eta, color C, color Cspec0) BUILTIN;
-closure color microfacet_ggx_aniso_fresnel(
- normal N, vector T, float ax, float ay, float eta, color C, color Cspec0) BUILTIN;
-closure color
-microfacet_multi_ggx_fresnel(normal N, float ag, float eta, color C, color Cspec0) BUILTIN;
-closure color microfacet_multi_ggx_aniso_fresnel(
- normal N, vector T, float ax, float ay, float eta, color C, color Cspec0) BUILTIN;
-closure color
-microfacet_multi_ggx_glass_fresnel(normal N, float ag, float eta, color C, color Cspec0) BUILTIN;
-closure color microfacet_beckmann(normal N, float ab) BUILTIN;
-closure color microfacet_beckmann_aniso(normal N, vector T, float ax, float ay) BUILTIN;
-closure color microfacet_beckmann_refraction(normal N, float ab, float eta) BUILTIN;
-closure color ashikhmin_shirley(normal N, vector T, float ax, float ay) BUILTIN;
-closure color ashikhmin_velvet(normal N, float sigma) BUILTIN;
-closure color emission() BUILTIN;
-closure color background() BUILTIN;
-closure color holdout() BUILTIN;
-closure color ambient_occlusion() BUILTIN;
-closure color principled_diffuse(normal N, float roughness) BUILTIN;
-closure color principled_sheen(normal N) BUILTIN;
-closure color principled_clearcoat(normal N, float clearcoat, float clearcoat_roughness) BUILTIN;
-
-// BSSRDF
-closure color bssrdf(string method, normal N, vector radius, color albedo) BUILTIN;
-
-// Hair
-closure color
-hair_reflection(normal N, float roughnessu, float roughnessv, vector T, float offset) BUILTIN;
-closure color
-hair_transmission(normal N, float roughnessu, float roughnessv, vector T, float offset) BUILTIN;
-closure color principled_hair(normal N,
- color sigma,
- float roughnessu,
- float roughnessv,
- float coat,
- float alpha,
- float eta) BUILTIN;
-
-// Volume
-closure color henyey_greenstein(float g) BUILTIN;
-closure color absorption() BUILTIN;
-
-// OSL 1.5 Microfacet functions
-closure color microfacet(
- string distribution, normal N, vector U, float xalpha, float yalpha, float eta, int refract)
-{
- /* GGX */
- if (distribution == "ggx" || distribution == "default") {
- if (!refract) {
- if (xalpha == yalpha) {
- /* Isotropic */
- return microfacet_ggx(N, xalpha);
- }
- else {
- /* Anisotropic */
- return microfacet_ggx_aniso(N, U, xalpha, yalpha);
- }
- }
- else {
- return microfacet_ggx_refraction(N, xalpha, eta);
- }
- }
- /* Beckmann */
- else {
- if (!refract) {
- if (xalpha == yalpha) {
- /* Isotropic */
- return microfacet_beckmann(N, xalpha);
- }
- else {
- /* Anisotropic */
- return microfacet_beckmann_aniso(N, U, xalpha, yalpha);
- }
- }
- else {
- return microfacet_beckmann_refraction(N, xalpha, eta);
- }
- }
-}
-
-closure color microfacet(string distribution, normal N, float alpha, float eta, int refract)
-{
- return microfacet(distribution, N, vector(0), alpha, alpha, eta, refract);
-}
-
-// Renderer state
-int backfacing() BUILTIN;
-int raytype(string typename) BUILTIN;
-// the individual 'isFOOray' functions are deprecated
-int iscameraray()
-{
- return raytype("camera");
-}
-int isdiffuseray()
-{
- return raytype("diffuse");
-}
-int isglossyray()
-{
- return raytype("glossy");
-}
-int isshadowray()
-{
- return raytype("shadow");
-}
-int getmatrix(string fromspace, string tospace, output matrix M) BUILTIN;
-int getmatrix(string fromspace, output matrix M)
-{
- return getmatrix(fromspace, "common", M);
-}
-
-// Miscellaneous
-
-#undef BUILTIN
-#undef BUILTIN_DERIV
-#undef PERCOMP1
-#undef PERCOMP2
-#undef PERCOMP2F
-
-#endif /* CCL_STDOSL_H */
diff --git a/intern/cycles/kernel/split/kernel_adaptive_adjust_samples.h b/intern/cycles/kernel/split/kernel_adaptive_adjust_samples.h
new file mode 100644
index 00000000000..60ebf415970
--- /dev/null
+++ b/intern/cycles/kernel/split/kernel_adaptive_adjust_samples.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device void kernel_adaptive_adjust_samples(KernelGlobals *kg)
+{
+ int pixel_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ if (pixel_index < kernel_split_params.tile.w * kernel_split_params.tile.h) {
+ int x = kernel_split_params.tile.x + pixel_index % kernel_split_params.tile.w;
+ int y = kernel_split_params.tile.y + pixel_index / kernel_split_params.tile.w;
+ int buffer_offset = (kernel_split_params.tile.offset + x +
+ y * kernel_split_params.tile.stride) *
+ kernel_data.film.pass_stride;
+ ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
+ int sample = kernel_split_params.tile.start_sample + kernel_split_params.tile.num_samples;
+ if (buffer[kernel_data.film.pass_sample_count] < 0.0f) {
+ buffer[kernel_data.film.pass_sample_count] = -buffer[kernel_data.film.pass_sample_count];
+ float sample_multiplier = sample / max((float)kernel_split_params.tile.start_sample + 1.0f,
+ buffer[kernel_data.film.pass_sample_count]);
+ if (sample_multiplier != 1.0f) {
+ kernel_adaptive_post_adjust(kg, buffer, sample_multiplier);
+ }
+ }
+ else {
+ kernel_adaptive_post_adjust(kg, buffer, sample / (sample - 1.0f));
+ }
+ }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_adaptive_filter_x.h b/intern/cycles/kernel/split/kernel_adaptive_filter_x.h
new file mode 100644
index 00000000000..93f41f7ced4
--- /dev/null
+++ b/intern/cycles/kernel/split/kernel_adaptive_filter_x.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device void kernel_adaptive_filter_x(KernelGlobals *kg)
+{
+ int pixel_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ if (pixel_index < kernel_split_params.tile.h &&
+ kernel_split_params.tile.start_sample + kernel_split_params.tile.num_samples >=
+ kernel_data.integrator.adaptive_min_samples) {
+ int y = kernel_split_params.tile.y + pixel_index;
+ kernel_do_adaptive_filter_x(kg, y, &kernel_split_params.tile);
+ }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_adaptive_filter_y.h b/intern/cycles/kernel/split/kernel_adaptive_filter_y.h
new file mode 100644
index 00000000000..eca53d079ec
--- /dev/null
+++ b/intern/cycles/kernel/split/kernel_adaptive_filter_y.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device void kernel_adaptive_filter_y(KernelGlobals *kg)
+{
+ int pixel_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ if (pixel_index < kernel_split_params.tile.w &&
+ kernel_split_params.tile.start_sample + kernel_split_params.tile.num_samples >=
+ kernel_data.integrator.adaptive_min_samples) {
+ int x = kernel_split_params.tile.x + pixel_index;
+ kernel_do_adaptive_filter_y(kg, x, &kernel_split_params.tile);
+ }
+}
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_adaptive_stopping.h b/intern/cycles/kernel/split/kernel_adaptive_stopping.h
new file mode 100644
index 00000000000..c8eb1ebd705
--- /dev/null
+++ b/intern/cycles/kernel/split/kernel_adaptive_stopping.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device void kernel_adaptive_stopping(KernelGlobals *kg)
+{
+ int pixel_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ if (pixel_index < kernel_split_params.tile.w * kernel_split_params.tile.h &&
+ kernel_split_params.tile.start_sample + kernel_split_params.tile.num_samples >=
+ kernel_data.integrator.adaptive_min_samples) {
+ int x = kernel_split_params.tile.x + pixel_index % kernel_split_params.tile.w;
+ int y = kernel_split_params.tile.y + pixel_index / kernel_split_params.tile.w;
+ int buffer_offset = (kernel_split_params.tile.offset + x +
+ y * kernel_split_params.tile.stride) *
+ kernel_data.film.pass_stride;
+ ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
+ kernel_do_adaptive_stopping(kg,
+ buffer,
+ kernel_split_params.tile.start_sample +
+ kernel_split_params.tile.num_samples - 1);
+ }
+}
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_branched.h b/intern/cycles/kernel/split/kernel_branched.h
index e08d87ab618..bfcd21baac4 100644
--- a/intern/cycles/kernel/split/kernel_branched.h
+++ b/intern/cycles/kernel/split/kernel_branched.h
@@ -106,7 +106,7 @@ ccl_device_inline bool kernel_split_branched_indirect_start_shared(KernelGlobals
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
PathRadiance *inactive_L = &kernel_split_state.path_radiance[inactive_ray];
- path_radiance_init(inactive_L, kernel_data.film.use_light_pass);
+ path_radiance_init(kg, inactive_L);
path_radiance_copy_indirect(inactive_L, L);
ray_state[inactive_ray] = RAY_REGENERATED;
diff --git a/intern/cycles/kernel/split/kernel_buffer_update.h b/intern/cycles/kernel/split/kernel_buffer_update.h
index e77743350dc..dba1768f03f 100644
--- a/intern/cycles/kernel/split/kernel_buffer_update.h
+++ b/intern/cycles/kernel/split/kernel_buffer_update.h
@@ -132,10 +132,10 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg,
if (ray->t != 0.0f) {
/* Initialize throughput, path radiance, Ray, PathState;
- * These rays proceed with path-iteration.
- */
+ * These rays proceed with path-iteration.
+ */
*throughput = make_float3(1.0f, 1.0f, 1.0f);
- path_radiance_init(L, kernel_data.film.use_light_pass);
+ path_radiance_init(kg, L);
path_state_init(kg,
AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]),
state,
diff --git a/intern/cycles/kernel/split/kernel_data_init.h b/intern/cycles/kernel/split/kernel_data_init.h
index 52930843f56..2f83a10316d 100644
--- a/intern/cycles/kernel/split/kernel_data_init.h
+++ b/intern/cycles/kernel/split/kernel_data_init.h
@@ -46,10 +46,10 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)(
int sh,
int offset,
int stride,
- ccl_global int *Queue_index, /* Tracks the number of elements in queues */
- int queuesize, /* size (capacity) of the queue */
- ccl_global char *
- use_queues_flag, /* flag to decide if scene-intersect kernel should use queues to fetch ray index */
+ ccl_global int *Queue_index, /* Tracks the number of elements in queues */
+ int queuesize, /* size (capacity) of the queue */
+ ccl_global char *use_queues_flag, /* flag to decide if scene-intersect kernel should use queues
+ to fetch ray index */
ccl_global unsigned int *work_pools, /* Work pool for each work group */
unsigned int num_samples,
ccl_global float *buffer)
diff --git a/intern/cycles/kernel/split/kernel_direct_lighting.h b/intern/cycles/kernel/split/kernel_direct_lighting.h
index dd3ffe3f52c..d0c91d43eed 100644
--- a/intern/cycles/kernel/split/kernel_direct_lighting.h
+++ b/intern/cycles/kernel/split/kernel_direct_lighting.h
@@ -88,7 +88,6 @@ ccl_device void kernel_direct_lighting(KernelGlobals *kg,
LightSample ls;
if (light_sample(
kg, light_u, light_v, sd->time, sd->P_pick, sd->N_pick, state->bounce, &ls, false)) {
-
Ray light_ray;
light_ray.time = sd->time;
diff --git a/intern/cycles/kernel/split/kernel_do_volume.h b/intern/cycles/kernel/split/kernel_do_volume.h
index bdadfb45d74..99a7973019e 100644
--- a/intern/cycles/kernel/split/kernel_do_volume.h
+++ b/intern/cycles/kernel/split/kernel_do_volume.h
@@ -44,7 +44,7 @@ ccl_device_noinline bool kernel_split_branched_path_volume_indirect_light_iter(K
branched_state->isect.t :
FLT_MAX;
- bool heterogeneous = volume_stack_is_heterogeneous(kg, branched_state->path_state.volume_stack);
+ float step_size = volume_stack_step_size(kg, branched_state->path_state.volume_stack);
for (int j = branched_state->next_sample; j < num_samples; j++) {
ccl_global PathState *ps = &kernel_split_state.path_state[ray_index];
@@ -61,7 +61,7 @@ ccl_device_noinline bool kernel_split_branched_path_volume_indirect_light_iter(K
/* integrate along volume segment with distance sampling */
VolumeIntegrateResult result = kernel_volume_integrate(
- kg, ps, sd, &volume_ray, L, tp, heterogeneous);
+ kg, ps, sd, &volume_ray, L, tp, step_size);
kernel_update_light_picking(sd, ps);
@@ -166,12 +166,12 @@ ccl_device void kernel_do_volume(KernelGlobals *kg)
if (!kernel_data.integrator.branched ||
IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
# endif /* __BRANCHED_PATH__ */
- bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
+ float step_size = volume_stack_step_size(kg, state->volume_stack);
{
/* integrate along volume segment with distance sampling */
VolumeIntegrateResult result = kernel_volume_integrate(
- kg, state, sd, &volume_ray, L, throughput, heterogeneous);
+ kg, state, sd, &volume_ray, L, throughput, step_size);
# ifdef __VOLUME_SCATTER__
if (result == VOLUME_PATH_SCATTERED) {
diff --git a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
index 6eb9e1815c4..630625fa2ac 100644
--- a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
+++ b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
@@ -123,9 +123,9 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
/* Path termination. this is a strange place to put the termination, it's
- * mainly due to the mixed in MIS that we use. gives too many unneeded
- * shader evaluations, only need emission if we are going to terminate.
- */
+ * mainly due to the mixed in MIS that we use. gives too many unneeded
+ * shader evaluations, only need emission if we are going to terminate.
+ */
float probability = path_state_continuation_probability(kg, state, throughput);
if (probability == 0.0f) {
@@ -141,10 +141,12 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
}
}
+#ifdef __DENOISING_FEATURES__
if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
kernel_update_denoising_features(kg, sd, state, L);
}
+#endif
}
#ifdef __AO__
diff --git a/intern/cycles/kernel/split/kernel_indirect_background.h b/intern/cycles/kernel/split/kernel_indirect_background.h
index b1c65f61e2c..6d500650cc0 100644
--- a/intern/cycles/kernel/split/kernel_indirect_background.h
+++ b/intern/cycles/kernel/split/kernel_indirect_background.h
@@ -58,8 +58,10 @@ ccl_device void kernel_indirect_background(KernelGlobals *kg)
ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
float3 throughput = kernel_split_state.throughput[ray_index];
ShaderData *sd = kernel_split_sd(sd, ray_index);
+ uint buffer_offset = kernel_split_state.buffer_offset[ray_index];
+ ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
- kernel_path_background(kg, state, ray, throughput, sd, L);
+ kernel_path_background(kg, state, ray, throughput, sd, buffer, L);
kernel_split_path_end(kg, ray_index);
}
}
diff --git a/intern/cycles/kernel/split/kernel_next_iteration_setup.h b/intern/cycles/kernel/split/kernel_next_iteration_setup.h
index 781ce869374..320f6a414bf 100644
--- a/intern/cycles/kernel/split/kernel_next_iteration_setup.h
+++ b/intern/cycles/kernel/split/kernel_next_iteration_setup.h
@@ -109,9 +109,9 @@ ccl_device void kernel_next_iteration_setup(KernelGlobals *kg,
if (ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
/* If we are here, then it means that scene-intersect kernel
- * has already been executed atleast once. From the next time,
- * scene-intersect kernel may operate on queues to fetch ray index
- */
+ * has already been executed at least once. From the next time,
+ * scene-intersect kernel may operate on queues to fetch ray index
+ */
*kernel_split_params.use_queues_flag = 1;
/* Mark queue indices of QUEUE_SHADOW_RAY_CAST_AO_RAYS and
diff --git a/intern/cycles/kernel/split/kernel_path_init.h b/intern/cycles/kernel/split/kernel_path_init.h
index 3faa3208341..82b0f583d8d 100644
--- a/intern/cycles/kernel/split/kernel_path_init.h
+++ b/intern/cycles/kernel/split/kernel_path_init.h
@@ -59,8 +59,7 @@ ccl_device void kernel_path_init(KernelGlobals *kg)
* These rays proceed with path-iteration.
*/
kernel_split_state.throughput[ray_index] = make_float3(1.0f, 1.0f, 1.0f);
- path_radiance_init(&kernel_split_state.path_radiance[ray_index],
- kernel_data.film.use_light_pass);
+ path_radiance_init(kg, &kernel_split_state.path_radiance[ray_index]);
path_state_init(kg,
AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]),
&kernel_split_state.path_state[ray_index],
diff --git a/intern/cycles/kernel/split/kernel_shader_eval.h b/intern/cycles/kernel/split/kernel_shader_eval.h
index 8e39c9797e5..c760a2b2049 100644
--- a/intern/cycles/kernel/split/kernel_shader_eval.h
+++ b/intern/cycles/kernel/split/kernel_shader_eval.h
@@ -50,8 +50,10 @@ ccl_device void kernel_shader_eval(KernelGlobals *kg)
ccl_global char *ray_state = kernel_split_state.ray_state;
if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+ uint buffer_offset = kernel_split_state.buffer_offset[ray_index];
+ ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
- shader_eval_surface(kg, kernel_split_sd(sd, ray_index), state, state->flag);
+ shader_eval_surface(kg, kernel_split_sd(sd, ray_index), state, buffer, state->flag);
#ifdef __BRANCHED_PATH__
if (kernel_data.integrator.branched) {
shader_merge_closures(kernel_split_sd(sd, ray_index));
diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
index 82990ce9fae..5e46d300bca 100644
--- a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
+++ b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
@@ -87,7 +87,7 @@ ccl_device void kernel_shadow_blocked_dl(KernelGlobals *kg)
if (!shadow_blocked(kg, sd, emission_sd, state, &ray, &shadow)) {
/* accumulate */
- path_radiance_accum_light(L, state, throughput, &L_light, shadow, 1.0f, is_lamp);
+ path_radiance_accum_light(kg, L, state, throughput, &L_light, shadow, 1.0f, is_lamp);
}
else {
path_radiance_accum_total_light(L, state, throughput, &L_light);
diff --git a/intern/cycles/kernel/split/kernel_split_common.h b/intern/cycles/kernel/split/kernel_split_common.h
index 384bc952460..5114f2b03e5 100644
--- a/intern/cycles/kernel/split/kernel_split_common.h
+++ b/intern/cycles/kernel/split/kernel_split_common.h
@@ -17,6 +17,7 @@
#ifndef __KERNEL_SPLIT_H__
#define __KERNEL_SPLIT_H__
+// clang-format off
#include "kernel/kernel_math.h"
#include "kernel/kernel_types.h"
@@ -52,6 +53,7 @@
#ifdef __BRANCHED_PATH__
# include "kernel/split/kernel_branched.h"
#endif
+// clang-format on
CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/kernel/split/kernel_split_data.h b/intern/cycles/kernel/split/kernel_split_data.h
index 433b1221a37..decc537b39b 100644
--- a/intern/cycles/kernel/split/kernel_split_data.h
+++ b/intern/cycles/kernel/split/kernel_split_data.h
@@ -18,6 +18,7 @@
#define __KERNEL_SPLIT_DATA_H__
#include "kernel/split/kernel_split_data_types.h"
+
#include "kernel/kernel_globals.h"
CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/kernel/split/kernel_split_data_types.h b/intern/cycles/kernel/split/kernel_split_data_types.h
index 6ff3f5bdb55..ac4a450ca2b 100644
--- a/intern/cycles/kernel/split/kernel_split_data_types.h
+++ b/intern/cycles/kernel/split/kernel_split_data_types.h
@@ -19,7 +19,8 @@
CCL_NAMESPACE_BEGIN
-/* parameters used by the split kernels, we use a single struct to avoid passing these to each kernel */
+/* parameters used by the split kernels, we use a single struct to avoid passing these to each
+ * kernel */
typedef struct SplitParams {
WorkTile tile;
@@ -112,7 +113,8 @@ typedef ccl_global struct SplitBranchedState {
SPLIT_DATA_BRANCHED_ENTRIES \
SPLIT_DATA_ENTRY(ShaderData, _sd, 0)
-/* entries to be copied to inactive rays when sharing branched samples (TODO: which are actually needed?) */
+/* Entries to be copied to inactive rays when sharing branched samples
+ * (TODO: which are actually needed?) */
#define SPLIT_DATA_ENTRIES_BRANCHED_SHARED \
SPLIT_DATA_ENTRY(ccl_global float3, throughput, 1) \
SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \
@@ -134,8 +136,9 @@ typedef struct SplitData {
SPLIT_DATA_ENTRIES
#undef SPLIT_DATA_ENTRY
- /* this is actually in a separate buffer from the rest of the split state data (so it can be read back from
- * the host easily) but is still used the same as the other data so we have it here in this struct as well
+ /* this is actually in a separate buffer from the rest of the split state data (so it can be read
+ * back from the host easily) but is still used the same as the other data so we have it here in
+ * this struct as well
*/
ccl_global char *ray_state;
} SplitData;
diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h
index 4a386afa5de..abeb8fa7457 100644
--- a/intern/cycles/kernel/svm/svm.h
+++ b/intern/cycles/kernel/svm/svm.h
@@ -132,16 +132,25 @@ ccl_device_inline float4 fetch_node_float(KernelGlobals *kg, int offset)
__uint_as_float(node.w));
}
-ccl_device_inline void decode_node_uchar4(uint i, uint *x, uint *y, uint *z, uint *w)
+ccl_device_forceinline void svm_unpack_node_uchar2(uint i, uint *x, uint *y)
{
- if (x)
- *x = (i & 0xFF);
- if (y)
- *y = ((i >> 8) & 0xFF);
- if (z)
- *z = ((i >> 16) & 0xFF);
- if (w)
- *w = ((i >> 24) & 0xFF);
+ *x = (i & 0xFF);
+ *y = ((i >> 8) & 0xFF);
+}
+
+ccl_device_forceinline void svm_unpack_node_uchar3(uint i, uint *x, uint *y, uint *z)
+{
+ *x = (i & 0xFF);
+ *y = ((i >> 8) & 0xFF);
+ *z = ((i >> 16) & 0xFF);
+}
+
+ccl_device_forceinline void svm_unpack_node_uchar4(uint i, uint *x, uint *y, uint *z, uint *w)
+{
+ *x = (i & 0xFF);
+ *y = ((i >> 8) & 0xFF);
+ *z = ((i >> 16) & 0xFF);
+ *w = ((i >> 24) & 0xFF);
}
CCL_NAMESPACE_END
@@ -149,49 +158,56 @@ CCL_NAMESPACE_END
/* Nodes */
#include "kernel/svm/svm_noise.h"
-#include "svm_texture.h"
+#include "svm_fractal_noise.h"
#include "kernel/svm/svm_color_util.h"
+#include "kernel/svm/svm_mapping_util.h"
#include "kernel/svm/svm_math_util.h"
+#include "kernel/svm/svm_aov.h"
#include "kernel/svm/svm_attribute.h"
-#include "kernel/svm/svm_gradient.h"
#include "kernel/svm/svm_blackbody.h"
+#include "kernel/svm/svm_brick.h"
+#include "kernel/svm/svm_brightness.h"
+#include "kernel/svm/svm_bump.h"
+#include "kernel/svm/svm_camera.h"
+#include "kernel/svm/svm_checker.h"
+#include "kernel/svm/svm_clamp.h"
#include "kernel/svm/svm_closure.h"
-#include "kernel/svm/svm_noisetex.h"
#include "kernel/svm/svm_convert.h"
#include "kernel/svm/svm_displace.h"
#include "kernel/svm/svm_fresnel.h"
-#include "kernel/svm/svm_wireframe.h"
-#include "kernel/svm/svm_wavelength.h"
-#include "kernel/svm/svm_camera.h"
+#include "kernel/svm/svm_gamma.h"
#include "kernel/svm/svm_geometry.h"
+#include "kernel/svm/svm_gradient.h"
#include "kernel/svm/svm_hsv.h"
#include "kernel/svm/svm_ies.h"
#include "kernel/svm/svm_image.h"
-#include "kernel/svm/svm_gamma.h"
-#include "kernel/svm/svm_brightness.h"
#include "kernel/svm/svm_invert.h"
#include "kernel/svm/svm_light_path.h"
#include "kernel/svm/svm_magic.h"
+#include "kernel/svm/svm_map_range.h"
#include "kernel/svm/svm_mapping.h"
-#include "kernel/svm/svm_normal.h"
-#include "kernel/svm/svm_wave.h"
#include "kernel/svm/svm_math.h"
#include "kernel/svm/svm_mix.h"
+#include "kernel/svm/svm_musgrave.h"
+#include "kernel/svm/svm_noisetex.h"
+#include "kernel/svm/svm_normal.h"
#include "kernel/svm/svm_ramp.h"
#include "kernel/svm/svm_sepcomb_hsv.h"
#include "kernel/svm/svm_sepcomb_vector.h"
-#include "kernel/svm/svm_musgrave.h"
#include "kernel/svm/svm_sky.h"
#include "kernel/svm/svm_tex_coord.h"
#include "kernel/svm/svm_value.h"
-#include "kernel/svm/svm_voronoi.h"
-#include "kernel/svm/svm_checker.h"
-#include "kernel/svm/svm_brick.h"
+#include "kernel/svm/svm_vector_rotate.h"
#include "kernel/svm/svm_vector_transform.h"
+#include "kernel/svm/svm_vertex_color.h"
+#include "kernel/svm/svm_voronoi.h"
#include "kernel/svm/svm_voxel.h"
-#include "kernel/svm/svm_bump.h"
+#include "kernel/svm/svm_wave.h"
+#include "kernel/svm/svm_wavelength.h"
+#include "kernel/svm/svm_white_noise.h"
+#include "kernel/svm/svm_wireframe.h"
#ifdef __SHADER_RAYTRACE__
# include "kernel/svm/svm_ao.h"
@@ -200,13 +216,11 @@ CCL_NAMESPACE_END
CCL_NAMESPACE_BEGIN
-#define NODES_GROUP(group) ((group) <= __NODES_MAX_GROUP__)
-#define NODES_FEATURE(feature) ((__NODES_FEATURES__ & (feature)) != 0)
-
/* Main Interpreter Loop */
ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg,
ShaderData *sd,
ccl_addr_space PathState *state,
+ ccl_global float *buffer,
ShaderType type,
int path_flag)
{
@@ -217,6 +231,8 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg,
uint4 node = read_node(kg, &offset);
switch (node.x) {
+ case NODE_END:
+ return;
#if NODES_GROUP(NODE_GROUP_LEVEL_0)
case NODE_SHADER_JUMP: {
if (type == SHADER_TYPE_SURFACE)
@@ -276,6 +292,9 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg,
case NODE_ATTR:
svm_node_attr(kg, sd, stack, node);
break;
+ case NODE_VERTEX_COLOR:
+ svm_node_vertex_color(kg, sd, stack, node.y, node.z, node.w);
+ break;
# if NODES_FEATURE(NODE_FEATURE_BUMP)
case NODE_GEOMETRY_BUMP_DX:
svm_node_geometry_bump_dx(kg, sd, stack, node.y, node.z);
@@ -293,19 +312,16 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg,
svm_node_vector_displacement(kg, sd, stack, node, &offset);
break;
# endif /* NODES_FEATURE(NODE_FEATURE_BUMP) */
-# ifdef __TEXTURES__
case NODE_TEX_IMAGE:
- svm_node_tex_image(kg, sd, stack, node);
+ svm_node_tex_image(kg, sd, stack, node, &offset);
break;
case NODE_TEX_IMAGE_BOX:
svm_node_tex_image_box(kg, sd, stack, node);
break;
case NODE_TEX_NOISE:
- svm_node_tex_noise(kg, sd, stack, node, &offset);
+ svm_node_tex_noise(kg, sd, stack, node.y, node.z, node.w, &offset);
break;
-# endif /* __TEXTURES__ */
-# ifdef __EXTRA_NODES__
-# if NODES_FEATURE(NODE_FEATURE_BUMP)
+# if NODES_FEATURE(NODE_FEATURE_BUMP)
case NODE_SET_BUMP:
svm_node_set_bump(kg, sd, stack, node);
break;
@@ -315,6 +331,12 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg,
case NODE_ATTR_BUMP_DY:
svm_node_attr_bump_dy(kg, sd, stack, node);
break;
+ case NODE_VERTEX_COLOR_BUMP_DX:
+ svm_node_vertex_color_bump_dx(kg, sd, stack, node.y, node.z, node.w);
+ break;
+ case NODE_VERTEX_COLOR_BUMP_DY:
+ svm_node_vertex_color_bump_dy(kg, sd, stack, node.y, node.z, node.w);
+ break;
case NODE_TEX_COORD_BUMP_DX:
svm_node_tex_coord_bump_dx(kg, sd, path_flag, stack, node, &offset);
break;
@@ -324,20 +346,19 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg,
case NODE_CLOSURE_SET_NORMAL:
svm_node_set_normal(kg, sd, stack, node.y, node.z);
break;
-# if NODES_FEATURE(NODE_FEATURE_BUMP_STATE)
+# if NODES_FEATURE(NODE_FEATURE_BUMP_STATE)
case NODE_ENTER_BUMP_EVAL:
svm_node_enter_bump_eval(kg, sd, stack, node.y);
break;
case NODE_LEAVE_BUMP_EVAL:
svm_node_leave_bump_eval(kg, sd, stack, node.y);
break;
-# endif /* NODES_FEATURE(NODE_FEATURE_BUMP_STATE) */
-# endif /* NODES_FEATURE(NODE_FEATURE_BUMP) */
+# endif /* NODES_FEATURE(NODE_FEATURE_BUMP_STATE) */
+# endif /* NODES_FEATURE(NODE_FEATURE_BUMP) */
case NODE_HSV:
svm_node_hsv(kg, sd, stack, node, &offset);
break;
-# endif /* __EXTRA_NODES__ */
-#endif /* NODES_GROUP(NODE_GROUP_LEVEL_0) */
+#endif /* NODES_GROUP(NODE_GROUP_LEVEL_0) */
#if NODES_GROUP(NODE_GROUP_LEVEL_1)
case NODE_CLOSURE_HOLDOUT:
@@ -357,7 +378,6 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg,
svm_node_principled_volume(kg, sd, stack, node, type, path_flag, &offset);
break;
# endif /* NODES_FEATURE(NODE_FEATURE_VOLUME) */
-# ifdef __EXTRA_NODES__
case NODE_MATH:
svm_node_math(kg, sd, stack, node.y, node.z, node.w, &offset);
break;
@@ -382,19 +402,19 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg,
case NODE_PARTICLE_INFO:
svm_node_particle_info(kg, sd, stack, node.y, node.z);
break;
-# ifdef __HAIR__
-# if NODES_FEATURE(NODE_FEATURE_HAIR)
+# if defined(__HAIR__) && NODES_FEATURE(NODE_FEATURE_HAIR)
case NODE_HAIR_INFO:
svm_node_hair_info(kg, sd, stack, node.y, node.z);
break;
-# endif /* NODES_FEATURE(NODE_FEATURE_HAIR) */
-# endif /* __HAIR__ */
-# endif /* __EXTRA_NODES__ */
-#endif /* NODES_GROUP(NODE_GROUP_LEVEL_1) */
+# endif /* NODES_FEATURE(NODE_FEATURE_HAIR) */
+#endif /* NODES_GROUP(NODE_GROUP_LEVEL_1) */
#if NODES_GROUP(NODE_GROUP_LEVEL_2)
+ case NODE_TEXTURE_MAPPING:
+ svm_node_texture_mapping(kg, sd, stack, node.y, node.z, &offset);
+ break;
case NODE_MAPPING:
- svm_node_mapping(kg, sd, stack, node.y, node.z, &offset);
+ svm_node_mapping(kg, sd, stack, node.y, node.z, node.w, &offset);
break;
case NODE_MIN_MAX:
svm_node_min_max(kg, sd, stack, node.y, node.z, &offset);
@@ -402,7 +422,6 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg,
case NODE_CAMERA:
svm_node_camera(kg, sd, stack, node.y, node.z, node.w);
break;
-# ifdef __TEXTURES__
case NODE_TEX_ENVIRONMENT:
svm_node_tex_environment(kg, sd, stack, node);
break;
@@ -413,10 +432,10 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg,
svm_node_tex_gradient(sd, stack, node);
break;
case NODE_TEX_VORONOI:
- svm_node_tex_voronoi(kg, sd, stack, node, &offset);
+ svm_node_tex_voronoi(kg, sd, stack, node.y, node.z, node.w, &offset);
break;
case NODE_TEX_MUSGRAVE:
- svm_node_tex_musgrave(kg, sd, stack, node, &offset);
+ svm_node_tex_musgrave(kg, sd, stack, node.y, node.z, node.w, &offset);
break;
case NODE_TEX_WAVE:
svm_node_tex_wave(kg, sd, stack, node, &offset);
@@ -430,8 +449,9 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg,
case NODE_TEX_BRICK:
svm_node_tex_brick(kg, sd, stack, node, &offset);
break;
-# endif /* __TEXTURES__ */
-# ifdef __EXTRA_NODES__
+ case NODE_TEX_WHITE_NOISE:
+ svm_node_tex_white_noise(kg, sd, stack, node.y, node.z, node.w, &offset);
+ break;
case NODE_NORMAL:
svm_node_normal(kg, sd, stack, node.y, node.z, node.w, &offset);
break;
@@ -441,8 +461,7 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg,
case NODE_IES:
svm_node_ies(kg, sd, stack, node, &offset);
break;
-# endif /* __EXTRA_NODES__ */
-#endif /* NODES_GROUP(NODE_GROUP_LEVEL_2) */
+#endif /* NODES_GROUP(NODE_GROUP_LEVEL_2) */
#if NODES_GROUP(NODE_GROUP_LEVEL_3)
case NODE_RGB_CURVES:
@@ -455,7 +474,6 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg,
case NODE_NORMAL_MAP:
svm_node_normal_map(kg, sd, stack, node);
break;
-# ifdef __EXTRA_NODES__
case NODE_INVERT:
svm_node_invert(sd, stack, node.y, node.z, node.w);
break;
@@ -474,6 +492,9 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg,
case NODE_COMBINE_HSV:
svm_node_combine_hsv(kg, sd, stack, node.y, node.z, node.w, &offset);
break;
+ case NODE_VECTOR_ROTATE:
+ svm_node_vector_rotate(sd, stack, node.y, node.z, node.w);
+ break;
case NODE_VECTOR_TRANSFORM:
svm_node_vector_transform(kg, sd, stack, node);
break;
@@ -486,12 +507,12 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg,
case NODE_BLACKBODY:
svm_node_blackbody(kg, sd, stack, node.y, node.z);
break;
-# endif /* __EXTRA_NODES__ */
-# if NODES_FEATURE(NODE_FEATURE_VOLUME)
- case NODE_TEX_VOXEL:
- svm_node_tex_voxel(kg, sd, stack, node, &offset);
+ case NODE_MAP_RANGE:
+ svm_node_map_range(kg, sd, stack, node.y, node.z, node.w, &offset);
+ break;
+ case NODE_CLAMP:
+ svm_node_clamp(kg, sd, stack, node.y, node.z, node.w, &offset);
break;
-# endif /* NODES_FEATURE(NODE_FEATURE_VOLUME) */
# ifdef __SHADER_RAYTRACE__
case NODE_BEVEL:
svm_node_bevel(kg, sd, state, stack, node);
@@ -501,8 +522,25 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg,
break;
# endif /* __SHADER_RAYTRACE__ */
#endif /* NODES_GROUP(NODE_GROUP_LEVEL_3) */
- case NODE_END:
- return;
+
+#if NODES_GROUP(NODE_GROUP_LEVEL_4)
+# if NODES_FEATURE(NODE_FEATURE_VOLUME)
+ case NODE_TEX_VOXEL:
+ svm_node_tex_voxel(kg, sd, stack, node, &offset);
+ break;
+# endif /* NODES_FEATURE(NODE_FEATURE_VOLUME) */
+ case NODE_AOV_START:
+ if (!svm_node_aov_check(state, buffer)) {
+ return;
+ }
+ break;
+ case NODE_AOV_COLOR:
+ svm_node_aov_color(kg, sd, stack, node, buffer);
+ break;
+ case NODE_AOV_VALUE:
+ svm_node_aov_value(kg, sd, stack, node, buffer);
+ break;
+#endif /* NODES_GROUP(NODE_GROUP_LEVEL_4) */
default:
kernel_assert(!"Unknown node type was passed to the SVM machine");
return;
@@ -510,9 +548,6 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg,
}
}
-#undef NODES_GROUP
-#undef NODES_FEATURE
-
CCL_NAMESPACE_END
#endif /* __SVM_H__ */
diff --git a/intern/cycles/kernel/svm/svm_ao.h b/intern/cycles/kernel/svm/svm_ao.h
index 06076175c40..4cb986b897a 100644
--- a/intern/cycles/kernel/svm/svm_ao.h
+++ b/intern/cycles/kernel/svm/svm_ao.h
@@ -1,21 +1,23 @@
/*
-* Copyright 2011-2018 Blender Foundation
-*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
+ * Copyright 2011-2018 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
CCL_NAMESPACE_BEGIN
+#ifdef __SHADER_RAYTRACE__
+
ccl_device_noinline float svm_ao(KernelGlobals *kg,
ShaderData *sd,
float3 N,
@@ -64,13 +66,13 @@ ccl_device_noinline float svm_ao(KernelGlobals *kg,
ray.dD = differential3_zero();
if (flags & NODE_AO_ONLY_LOCAL) {
- if (!scene_intersect_local(kg, ray, NULL, sd->object, NULL, 0)) {
+ if (!scene_intersect_local(kg, &ray, NULL, sd->object, NULL, 0)) {
unoccluded++;
}
}
else {
Intersection isect;
- if (!scene_intersect(kg, ray, PATH_RAY_SHADOW_OPAQUE, &isect, NULL, 0.0f, 0.0f)) {
+ if (!scene_intersect(kg, &ray, PATH_RAY_SHADOW_OPAQUE, &isect)) {
unoccluded++;
}
}
@@ -83,10 +85,10 @@ ccl_device void svm_node_ao(
KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state, float *stack, uint4 node)
{
uint flags, dist_offset, normal_offset, out_ao_offset;
- decode_node_uchar4(node.y, &flags, &dist_offset, &normal_offset, &out_ao_offset);
+ svm_unpack_node_uchar4(node.y, &flags, &dist_offset, &normal_offset, &out_ao_offset);
uint color_offset, out_color_offset, samples;
- decode_node_uchar4(node.z, &color_offset, &out_color_offset, &samples, NULL);
+ svm_unpack_node_uchar3(node.z, &color_offset, &out_color_offset, &samples);
float dist = stack_load_float_default(stack, dist_offset, node.w);
float3 normal = stack_valid(normal_offset) ? stack_load_float3(stack, normal_offset) : sd->N;
@@ -102,4 +104,6 @@ ccl_device void svm_node_ao(
}
}
+#endif /* __SHADER_RAYTRACE__ */
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_aov.h b/intern/cycles/kernel/svm/svm_aov.h
new file mode 100644
index 00000000000..899e466d099
--- /dev/null
+++ b/intern/cycles/kernel/svm/svm_aov.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device_inline bool svm_node_aov_check(ccl_addr_space PathState *state,
+ ccl_global float *buffer)
+{
+ int path_flag = state->flag;
+
+ bool is_primary = (path_flag & PATH_RAY_CAMERA) && (!(path_flag & PATH_RAY_SINGLE_PASS_DONE));
+
+ return ((buffer != NULL) && is_primary);
+}
+
+ccl_device void svm_node_aov_color(
+ KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, ccl_global float *buffer)
+{
+ float3 val = stack_load_float3(stack, node.y);
+
+ if (buffer) {
+ kernel_write_pass_float4(buffer + kernel_data.film.pass_aov_color + 4 * node.z,
+ make_float4(val.x, val.y, val.z, 1.0f));
+ }
+}
+
+ccl_device void svm_node_aov_value(
+ KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, ccl_global float *buffer)
+{
+ float val = stack_load_float(stack, node.y);
+
+ if (buffer) {
+ kernel_write_pass_float(buffer + kernel_data.film.pass_aov_value + node.z, val);
+ }
+}
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_attribute.h b/intern/cycles/kernel/svm/svm_attribute.h
index a67cfe91a30..fc7a3ba3f5a 100644
--- a/intern/cycles/kernel/svm/svm_attribute.h
+++ b/intern/cycles/kernel/svm/svm_attribute.h
@@ -46,8 +46,8 @@ ccl_device AttributeDescriptor svm_node_attr_init(
ccl_device void svm_node_attr(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
{
- NodeAttributeType type;
- uint out_offset;
+ NodeAttributeType type = NODE_ATTR_FLOAT;
+ uint out_offset = 0;
AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset);
/* fetch and store attribute */
@@ -69,6 +69,15 @@ ccl_device void svm_node_attr(KernelGlobals *kg, ShaderData *sd, float *stack, u
stack_store_float3(stack, out_offset, make_float3(f.x, f.y, 0.0f));
}
}
+ else if (desc.type == NODE_ATTR_RGBA) {
+ float4 f = primitive_attribute_float4(kg, sd, desc, NULL, NULL);
+ if (type == NODE_ATTR_FLOAT) {
+ stack_store_float(stack, out_offset, average(float4_to_float3(f)));
+ }
+ else {
+ stack_store_float3(stack, out_offset, float4_to_float3(f));
+ }
+ }
else {
float3 f = primitive_attribute_float3(kg, sd, desc, NULL, NULL);
if (type == NODE_ATTR_FLOAT) {
@@ -80,16 +89,10 @@ ccl_device void svm_node_attr(KernelGlobals *kg, ShaderData *sd, float *stack, u
}
}
-#ifndef __KERNEL_CUDA__
-ccl_device
-#else
-ccl_device_noinline
-#endif
- void
- svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
+ccl_device void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
{
- NodeAttributeType type;
- uint out_offset;
+ NodeAttributeType type = NODE_ATTR_FLOAT;
+ uint out_offset = 0;
AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset);
/* fetch and store attribute */
@@ -113,6 +116,16 @@ ccl_device_noinline
stack_store_float3(stack, out_offset, make_float3(f.x + dx.x, f.y + dx.y, 0.0f));
}
}
+ else if (desc.type == NODE_ATTR_RGBA) {
+ float4 dx;
+ float4 f = primitive_attribute_float4(kg, sd, desc, &dx, NULL);
+ if (type == NODE_ATTR_FLOAT) {
+ stack_store_float(stack, out_offset, average(float4_to_float3(f + dx)));
+ }
+ else {
+ stack_store_float3(stack, out_offset, float4_to_float3(f + dx));
+ }
+ }
else {
float3 dx;
float3 f = primitive_surface_attribute_float3(kg, sd, desc, &dx, NULL);
@@ -125,16 +138,10 @@ ccl_device_noinline
}
}
-#ifndef __KERNEL_CUDA__
-ccl_device
-#else
-ccl_device_noinline
-#endif
- void
- svm_node_attr_bump_dy(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
+ccl_device void svm_node_attr_bump_dy(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
{
- NodeAttributeType type;
- uint out_offset;
+ NodeAttributeType type = NODE_ATTR_FLOAT;
+ uint out_offset = 0;
AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset);
/* fetch and store attribute */
@@ -158,6 +165,16 @@ ccl_device_noinline
stack_store_float3(stack, out_offset, make_float3(f.x + dy.x, f.y + dy.y, 0.0f));
}
}
+ else if (desc.type == NODE_ATTR_RGBA) {
+ float4 dy;
+ float4 f = primitive_attribute_float4(kg, sd, desc, NULL, &dy);
+ if (type == NODE_ATTR_FLOAT) {
+ stack_store_float(stack, out_offset, average(float4_to_float3(f + dy)));
+ }
+ else {
+ stack_store_float3(stack, out_offset, float4_to_float3(f + dy));
+ }
+ }
else {
float3 dy;
float3 f = primitive_surface_attribute_float3(kg, sd, desc, NULL, &dy);
diff --git a/intern/cycles/kernel/svm/svm_bevel.h b/intern/cycles/kernel/svm/svm_bevel.h
index fcf28e96e98..bf5957ec9e4 100644
--- a/intern/cycles/kernel/svm/svm_bevel.h
+++ b/intern/cycles/kernel/svm/svm_bevel.h
@@ -16,6 +16,8 @@
CCL_NAMESPACE_BEGIN
+#ifdef __SHADER_RAYTRACE__
+
/* Bevel shader averaging normals from nearby surfaces.
*
* Sampling strategy from: BSSRDF Importance Sampling, SIGGRAPH 2013
@@ -110,7 +112,7 @@ ccl_device_noinline float3 svm_bevel(KernelGlobals *kg,
/* Intersect with the same object. if multiple intersections are found it
* will use at most LOCAL_MAX_HITS hits, a random subset of all hits. */
- scene_intersect_local(kg, *ray, &isect, sd->object, &lcg_state, LOCAL_MAX_HITS);
+ scene_intersect_local(kg, ray, &isect, sd->object, &lcg_state, LOCAL_MAX_HITS);
int num_eval_hits = min(isect.num_hits, LOCAL_MAX_HITS);
@@ -120,14 +122,14 @@ ccl_device_noinline float3 svm_bevel(KernelGlobals *kg,
if (sd->type & PRIMITIVE_TRIANGLE) {
hit_P = triangle_refine_local(kg, sd, &isect.hits[hit], ray);
}
-#ifdef __OBJECT_MOTION__
+# ifdef __OBJECT_MOTION__
else if (sd->type & PRIMITIVE_MOTION_TRIANGLE) {
float3 verts[3];
motion_triangle_vertices(
kg, sd->object, kernel_tex_fetch(__prim_index, isect.hits[hit].prim), sd->time, verts);
hit_P = motion_triangle_refine_local(kg, sd, &isect.hits[hit], ray, verts);
}
-#endif /* __OBJECT_MOTION__ */
+# endif /* __OBJECT_MOTION__ */
/* Get geometric normal. */
float3 hit_Ng = isect.Ng[hit];
@@ -151,11 +153,11 @@ ccl_device_noinline float3 svm_bevel(KernelGlobals *kg,
if (sd->type & PRIMITIVE_TRIANGLE) {
N = triangle_smooth_normal(kg, N, prim, u, v);
}
-#ifdef __OBJECT_MOTION__
+# ifdef __OBJECT_MOTION__
else if (sd->type & PRIMITIVE_MOTION_TRIANGLE) {
N = motion_triangle_smooth_normal(kg, N, sd->object, prim, u, v, sd->time);
}
-#endif /* __OBJECT_MOTION__ */
+# endif /* __OBJECT_MOTION__ */
}
/* Transform normals to world space. */
@@ -200,7 +202,7 @@ ccl_device void svm_node_bevel(
KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state, float *stack, uint4 node)
{
uint num_samples, radius_offset, normal_offset, out_offset;
- decode_node_uchar4(node.y, &num_samples, &radius_offset, &normal_offset, &out_offset);
+ svm_unpack_node_uchar4(node.y, &num_samples, &radius_offset, &normal_offset, &out_offset);
float radius = stack_load_float(stack, radius_offset);
float3 bevel_N = svm_bevel(kg, sd, state, radius, num_samples);
@@ -214,4 +216,6 @@ ccl_device void svm_node_bevel(
stack_store_float3(stack, out_offset, bevel_N);
}
+#endif /* __SHADER_RAYTRACE__ */
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_brick.h b/intern/cycles/kernel/svm/svm_brick.h
index b5cbfcc72df..6984afa30a5 100644
--- a/intern/cycles/kernel/svm/svm_brick.h
+++ b/intern/cycles/kernel/svm/svm_brick.h
@@ -18,7 +18,7 @@ CCL_NAMESPACE_BEGIN
/* Brick */
-ccl_device_noinline float brick_noise(uint n) /* fast integer noise */
+ccl_device_inline float brick_noise(uint n) /* fast integer noise */
{
uint nn;
n = (n + 1013) & 0x7fffffff;
@@ -27,16 +27,16 @@ ccl_device_noinline float brick_noise(uint n) /* fast integer noise */
return 0.5f * ((float)nn / 1073741824.0f);
}
-ccl_device_noinline float2 svm_brick(float3 p,
- float mortar_size,
- float mortar_smooth,
- float bias,
- float brick_width,
- float row_height,
- float offset_amount,
- int offset_frequency,
- float squash_amount,
- int squash_frequency)
+ccl_device_noinline_cpu float2 svm_brick(float3 p,
+ float mortar_size,
+ float mortar_smooth,
+ float bias,
+ float brick_width,
+ float row_height,
+ float offset_amount,
+ int offset_frequency,
+ float squash_amount,
+ int squash_frequency)
{
int bricknum, rownum;
float offset = 0.0f;
@@ -87,13 +87,13 @@ ccl_device void svm_node_tex_brick(
/* RNA properties */
uint offset_frequency, squash_frequency;
- decode_node_uchar4(node.y, &co_offset, &color1_offset, &color2_offset, &mortar_offset);
- decode_node_uchar4(
+ svm_unpack_node_uchar4(node.y, &co_offset, &color1_offset, &color2_offset, &mortar_offset);
+ svm_unpack_node_uchar4(
node.z, &scale_offset, &mortar_size_offset, &bias_offset, &brick_width_offset);
- decode_node_uchar4(
+ svm_unpack_node_uchar4(
node.w, &row_height_offset, &color_offset, &fac_offset, &mortar_smooth_offset);
- decode_node_uchar4(node2.x, &offset_frequency, &squash_frequency, NULL, NULL);
+ svm_unpack_node_uchar2(node2.x, &offset_frequency, &squash_frequency);
float3 co = stack_load_float3(stack, co_offset);
diff --git a/intern/cycles/kernel/svm/svm_brightness.h b/intern/cycles/kernel/svm/svm_brightness.h
index dcd75a2fe8f..9554b5946fb 100644
--- a/intern/cycles/kernel/svm/svm_brightness.h
+++ b/intern/cycles/kernel/svm/svm_brightness.h
@@ -22,7 +22,7 @@ ccl_device void svm_node_brightness(
uint bright_offset, contrast_offset;
float3 color = stack_load_float3(stack, in_color);
- decode_node_uchar4(node, &bright_offset, &contrast_offset, NULL, NULL);
+ svm_unpack_node_uchar2(node, &bright_offset, &contrast_offset);
float brightness = stack_load_float(stack, bright_offset);
float contrast = stack_load_float(stack, contrast_offset);
diff --git a/intern/cycles/kernel/svm/svm_checker.h b/intern/cycles/kernel/svm/svm_checker.h
index 63b4d1e149b..d54cb73df91 100644
--- a/intern/cycles/kernel/svm/svm_checker.h
+++ b/intern/cycles/kernel/svm/svm_checker.h
@@ -18,7 +18,7 @@ CCL_NAMESPACE_BEGIN
/* Checker */
-ccl_device_noinline float svm_checker(float3 p)
+ccl_device float svm_checker(float3 p)
{
/* avoid precision issues on unit coordinates */
p.x = (p.x + 0.000001f) * 0.999999f;
@@ -37,8 +37,8 @@ ccl_device void svm_node_tex_checker(KernelGlobals *kg, ShaderData *sd, float *s
uint co_offset, color1_offset, color2_offset, scale_offset;
uint color_offset, fac_offset;
- decode_node_uchar4(node.y, &co_offset, &color1_offset, &color2_offset, &scale_offset);
- decode_node_uchar4(node.z, &color_offset, &fac_offset, NULL, NULL);
+ svm_unpack_node_uchar4(node.y, &co_offset, &color1_offset, &color2_offset, &scale_offset);
+ svm_unpack_node_uchar2(node.z, &color_offset, &fac_offset);
float3 co = stack_load_float3(stack, co_offset);
float3 color1 = stack_load_float3(stack, color1_offset);
diff --git a/intern/cycles/kernel/svm/svm_clamp.h b/intern/cycles/kernel/svm/svm_clamp.h
new file mode 100644
index 00000000000..a85fd82754e
--- /dev/null
+++ b/intern/cycles/kernel/svm/svm_clamp.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+/* Clamp Node */
+
+ccl_device void svm_node_clamp(KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint value_stack_offset,
+ uint parameters_stack_offsets,
+ uint result_stack_offset,
+ int *offset)
+{
+ uint min_stack_offset, max_stack_offset, type;
+ svm_unpack_node_uchar3(parameters_stack_offsets, &min_stack_offset, &max_stack_offset, &type);
+
+ uint4 defaults = read_node(kg, offset);
+
+ float value = stack_load_float(stack, value_stack_offset);
+ float min = stack_load_float_default(stack, min_stack_offset, defaults.x);
+ float max = stack_load_float_default(stack, max_stack_offset, defaults.y);
+
+ if (type == NODE_CLAMP_RANGE && (min > max)) {
+ stack_store_float(stack, result_stack_offset, clamp(value, max, min));
+ }
+ else {
+ stack_store_float(stack, result_stack_offset, clamp(value, min, max));
+ }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_closure.h b/intern/cycles/kernel/svm/svm_closure.h
index 270fe4c8615..1ae94f1d766 100644
--- a/intern/cycles/kernel/svm/svm_closure.h
+++ b/intern/cycles/kernel/svm/svm_closure.h
@@ -16,23 +16,6 @@
CCL_NAMESPACE_BEGIN
-/* Hair Melanin */
-
-ccl_device_inline float3 sigma_from_concentration(float eumelanin, float pheomelanin)
-{
- return eumelanin * make_float3(0.506f, 0.841f, 1.653f) +
- pheomelanin * make_float3(0.343f, 0.733f, 1.924f);
-}
-
-ccl_device_inline float3 sigma_from_reflectance(float3 color, float azimuthal_roughness)
-{
- float x = azimuthal_roughness;
- float roughness_fac = (((((0.245f * x) + 5.574f) * x - 10.73f) * x + 2.532f) * x - 0.215f) * x +
- 5.969f;
- float3 sigma = log3(color) / roughness_fac;
- return sigma * sigma;
-}
-
/* Closure Nodes */
ccl_device void svm_node_glass_setup(
@@ -85,7 +68,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg,
uint type, param1_offset, param2_offset;
uint mix_weight_offset;
- decode_node_uchar4(node.y, &type, &param1_offset, &param2_offset, &mix_weight_offset);
+ svm_unpack_node_uchar4(node.y, &type, &param1_offset, &param2_offset, &mix_weight_offset);
float mix_weight = (stack_valid(mix_weight_offset) ? stack_load_float(stack, mix_weight_offset) :
1.0f);
@@ -122,21 +105,21 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg,
uint4 data_node2 = read_node(kg, offset);
float3 T = stack_load_float3(stack, data_node.y);
- decode_node_uchar4(data_node.z,
- &specular_offset,
- &roughness_offset,
- &specular_tint_offset,
- &anisotropic_offset);
- decode_node_uchar4(data_node.w,
- &sheen_offset,
- &sheen_tint_offset,
- &clearcoat_offset,
- &clearcoat_roughness_offset);
- decode_node_uchar4(data_node2.x,
- &eta_offset,
- &transmission_offset,
- &anisotropic_rotation_offset,
- &transmission_roughness_offset);
+ svm_unpack_node_uchar4(data_node.z,
+ &specular_offset,
+ &roughness_offset,
+ &specular_tint_offset,
+ &anisotropic_offset);
+ svm_unpack_node_uchar4(data_node.w,
+ &sheen_offset,
+ &sheen_tint_offset,
+ &clearcoat_offset,
+ &clearcoat_roughness_offset);
+ svm_unpack_node_uchar4(data_node2.x,
+ &eta_offset,
+ &transmission_offset,
+ &anisotropic_rotation_offset,
+ &transmission_roughness_offset);
// get Disney principled parameters
float metallic = param1;
@@ -290,7 +273,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg,
bsdf->N = N;
/* setup bsdf */
- sd->flag |= bsdf_principled_sheen_setup(bsdf);
+ sd->flag |= bsdf_principled_sheen_setup(sd, bsdf);
}
}
@@ -337,9 +320,9 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg,
/* setup bsdf */
if (distribution == CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID ||
roughness <= 0.075f) /* use single-scatter GGX */
- sd->flag |= bsdf_microfacet_ggx_aniso_fresnel_setup(bsdf, sd);
+ sd->flag |= bsdf_microfacet_ggx_fresnel_setup(bsdf, sd);
else /* use multi-scatter GGX */
- sd->flag |= bsdf_microfacet_multi_ggx_aniso_fresnel_setup(bsdf, sd);
+ sd->flag |= bsdf_microfacet_multi_ggx_fresnel_setup(bsdf, sd);
}
}
# ifdef __CAUSTICS_TRICKS__
@@ -532,12 +515,34 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg,
float roughness = sqr(param1);
bsdf->N = N;
- bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
- bsdf->alpha_x = roughness;
- bsdf->alpha_y = roughness;
bsdf->ior = 0.0f;
bsdf->extra = NULL;
+ if (data_node.y == SVM_STACK_INVALID) {
+ bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+ bsdf->alpha_x = roughness;
+ bsdf->alpha_y = roughness;
+ }
+ else {
+ bsdf->T = stack_load_float3(stack, data_node.y);
+
+ /* rotate tangent */
+ float rotation = stack_load_float(stack, data_node.z);
+ if (rotation != 0.0f)
+ bsdf->T = rotate_around_axis(bsdf->T, bsdf->N, rotation * M_2PI_F);
+
+ /* compute roughness */
+ float anisotropy = clamp(param2, -0.99f, 0.99f);
+ if (anisotropy < 0.0f) {
+ bsdf->alpha_x = roughness / (1.0f + anisotropy);
+ bsdf->alpha_y = roughness * (1.0f + anisotropy);
+ }
+ else {
+ bsdf->alpha_x = roughness * (1.0f - anisotropy);
+ bsdf->alpha_y = roughness / (1.0f - anisotropy);
+ }
+ }
+
/* setup bsdf */
if (type == CLOSURE_BSDF_REFLECTION_ID)
sd->flag |= bsdf_reflection_setup(bsdf);
@@ -546,10 +551,10 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg,
else if (type == CLOSURE_BSDF_MICROFACET_GGX_ID)
sd->flag |= bsdf_microfacet_ggx_setup(bsdf);
else if (type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID) {
- kernel_assert(stack_valid(data_node.z));
+ kernel_assert(stack_valid(data_node.w));
bsdf->extra = (MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra));
if (bsdf->extra) {
- bsdf->extra->color = stack_load_float3(stack, data_node.z);
+ bsdf->extra->color = stack_load_float3(stack, data_node.w);
bsdf->extra->cspec0 = make_float3(0.0f, 0.0f, 0.0f);
bsdf->extra->clearcoat = 0.0f;
sd->flag |= bsdf_microfacet_multi_ggx_setup(bsdf);
@@ -692,64 +697,6 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg,
sd->flag |= bsdf_microfacet_multi_ggx_glass_setup(bsdf);
break;
}
- case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
- case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ANISO_ID:
- case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID: {
-#ifdef __CAUSTICS_TRICKS__
- if (!kernel_data.integrator.caustics_reflective && (path_flag & PATH_RAY_DIFFUSE))
- break;
-#endif
- float3 weight = sd->svm_closure_weight * mix_weight;
- MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight);
-
- if (bsdf) {
- bsdf->N = N;
- bsdf->extra = NULL;
- bsdf->T = stack_load_float3(stack, data_node.y);
-
- /* rotate tangent */
- float rotation = stack_load_float(stack, data_node.z);
-
- if (rotation != 0.0f)
- bsdf->T = rotate_around_axis(bsdf->T, bsdf->N, rotation * M_2PI_F);
-
- /* compute roughness */
- float roughness = sqr(param1);
- float anisotropy = clamp(param2, -0.99f, 0.99f);
-
- if (anisotropy < 0.0f) {
- bsdf->alpha_x = roughness / (1.0f + anisotropy);
- bsdf->alpha_y = roughness * (1.0f + anisotropy);
- }
- else {
- bsdf->alpha_x = roughness * (1.0f - anisotropy);
- bsdf->alpha_y = roughness / (1.0f - anisotropy);
- }
-
- bsdf->ior = 0.0f;
-
- if (type == CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID) {
- sd->flag |= bsdf_microfacet_beckmann_aniso_setup(bsdf);
- }
- else if (type == CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID) {
- sd->flag |= bsdf_microfacet_ggx_aniso_setup(bsdf);
- }
- else if (type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_ANISO_ID) {
- kernel_assert(stack_valid(data_node.w));
- bsdf->extra = (MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra));
- if (bsdf->extra) {
- bsdf->extra->color = stack_load_float3(stack, data_node.w);
- bsdf->extra->cspec0 = make_float3(0.0f, 0.0f, 0.0f);
- bsdf->extra->clearcoat = 0.0f;
- sd->flag |= bsdf_microfacet_multi_ggx_aniso_setup(bsdf);
- }
- }
- else
- sd->flag |= bsdf_ashikhmin_shirley_aniso_setup(bsdf);
- }
- break;
- }
case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID: {
float3 weight = sd->svm_closure_weight * mix_weight;
VelvetBsdf *bsdf = (VelvetBsdf *)bsdf_alloc(sd, sizeof(VelvetBsdf), weight);
@@ -793,19 +740,19 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg,
float3 weight = sd->svm_closure_weight * mix_weight;
uint offset_ofs, ior_ofs, color_ofs, parametrization;
- decode_node_uchar4(data_node.y, &offset_ofs, &ior_ofs, &color_ofs, &parametrization);
+ svm_unpack_node_uchar4(data_node.y, &offset_ofs, &ior_ofs, &color_ofs, &parametrization);
float alpha = stack_load_float_default(stack, offset_ofs, data_node.z);
float ior = stack_load_float_default(stack, ior_ofs, data_node.w);
uint coat_ofs, melanin_ofs, melanin_redness_ofs, absorption_coefficient_ofs;
- decode_node_uchar4(data_node2.x,
- &coat_ofs,
- &melanin_ofs,
- &melanin_redness_ofs,
- &absorption_coefficient_ofs);
+ svm_unpack_node_uchar4(data_node2.x,
+ &coat_ofs,
+ &melanin_ofs,
+ &melanin_redness_ofs,
+ &absorption_coefficient_ofs);
uint tint_ofs, random_ofs, random_color_ofs, random_roughness_ofs;
- decode_node_uchar4(
+ svm_unpack_node_uchar4(
data_node3.x, &tint_ofs, &random_ofs, &random_color_ofs, &random_roughness_ofs);
const AttributeDescriptor attr_descr_random = find_attribute(kg, sd, data_node4.y);
@@ -868,24 +815,26 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg,
/* Benedikt Bitterli's melanin ratio remapping. */
float eumelanin = melanin * (1.0f - melanin_redness);
float pheomelanin = melanin * melanin_redness;
- float3 melanin_sigma = sigma_from_concentration(eumelanin, pheomelanin);
+ float3 melanin_sigma = bsdf_principled_hair_sigma_from_concentration(eumelanin,
+ pheomelanin);
/* Optional tint. */
float3 tint = stack_load_float3(stack, tint_ofs);
- float3 tint_sigma = sigma_from_reflectance(tint, radial_roughness);
+ float3 tint_sigma = bsdf_principled_hair_sigma_from_reflectance(tint,
+ radial_roughness);
bsdf->sigma = melanin_sigma + tint_sigma;
break;
}
case NODE_PRINCIPLED_HAIR_REFLECTANCE: {
float3 color = stack_load_float3(stack, color_ofs);
- bsdf->sigma = sigma_from_reflectance(color, radial_roughness);
+ bsdf->sigma = bsdf_principled_hair_sigma_from_reflectance(color, radial_roughness);
break;
}
default: {
/* Fallback to brownish hair, same as defaults for melanin. */
kernel_assert(!"Invalid Principled Hair parametrization!");
- bsdf->sigma = sigma_from_concentration(0.0f, 0.8054375f);
+ bsdf->sigma = bsdf_principled_hair_sigma_from_concentration(0.0f, 0.8054375f);
break;
}
}
@@ -898,39 +847,29 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg,
case CLOSURE_BSDF_HAIR_TRANSMISSION_ID: {
float3 weight = sd->svm_closure_weight * mix_weight;
- if (sd->flag & SD_BACKFACING && sd->type & PRIMITIVE_ALL_CURVE) {
- /* todo: giving a fixed weight here will cause issues when
- * mixing multiple BSDFS. energy will not be conserved and
- * the throughput can blow up after multiple bounces. we
- * better figure out a way to skip backfaces from rays
- * spawned by transmission from the front */
- bsdf_transparent_setup(sd, make_float3(1.0f, 1.0f, 1.0f), path_flag);
- }
- else {
- HairBsdf *bsdf = (HairBsdf *)bsdf_alloc(sd, sizeof(HairBsdf), weight);
+ HairBsdf *bsdf = (HairBsdf *)bsdf_alloc(sd, sizeof(HairBsdf), weight);
- if (bsdf) {
- bsdf->N = N;
- bsdf->roughness1 = param1;
- bsdf->roughness2 = param2;
- bsdf->offset = -stack_load_float(stack, data_node.z);
+ if (bsdf) {
+ bsdf->N = N;
+ bsdf->roughness1 = param1;
+ bsdf->roughness2 = param2;
+ bsdf->offset = -stack_load_float(stack, data_node.z);
- if (stack_valid(data_node.y)) {
- bsdf->T = normalize(stack_load_float3(stack, data_node.y));
- }
- else if (!(sd->type & PRIMITIVE_ALL_CURVE)) {
- bsdf->T = normalize(sd->dPdv);
- bsdf->offset = 0.0f;
- }
- else
- bsdf->T = normalize(sd->dPdu);
+ if (stack_valid(data_node.y)) {
+ bsdf->T = normalize(stack_load_float3(stack, data_node.y));
+ }
+ else if (!(sd->type & PRIMITIVE_ALL_CURVE)) {
+ bsdf->T = normalize(sd->dPdv);
+ bsdf->offset = 0.0f;
+ }
+ else
+ bsdf->T = normalize(sd->dPdu);
- if (type == CLOSURE_BSDF_HAIR_REFLECTION_ID) {
- sd->flag |= bsdf_hair_reflection_setup(bsdf);
- }
- else {
- sd->flag |= bsdf_hair_transmission_setup(bsdf);
- }
+ if (type == CLOSURE_BSDF_HAIR_REFLECTION_ID) {
+ sd->flag |= bsdf_hair_reflection_setup(bsdf);
+ }
+ else {
+ sd->flag |= bsdf_hair_transmission_setup(bsdf);
}
}
@@ -982,7 +921,7 @@ ccl_device void svm_node_closure_volume(
uint type, density_offset, anisotropy_offset;
uint mix_weight_offset;
- decode_node_uchar4(node.y, &type, &density_offset, &anisotropy_offset, &mix_weight_offset);
+ svm_unpack_node_uchar4(node.y, &type, &density_offset, &anisotropy_offset, &mix_weight_offset);
float mix_weight = (stack_valid(mix_weight_offset) ? stack_load_float(stack, mix_weight_offset) :
1.0f);
@@ -1040,7 +979,7 @@ ccl_device void svm_node_principled_volume(KernelGlobals *kg,
}
uint density_offset, anisotropy_offset, absorption_color_offset, mix_weight_offset;
- decode_node_uchar4(
+ svm_unpack_node_uchar4(
node.y, &density_offset, &anisotropy_offset, &absorption_color_offset, &mix_weight_offset);
float mix_weight = (stack_valid(mix_weight_offset) ? stack_load_float(stack, mix_weight_offset) :
1.0f);
@@ -1099,7 +1038,7 @@ ccl_device void svm_node_principled_volume(KernelGlobals *kg,
}
uint emission_offset, emission_color_offset, blackbody_offset, temperature_offset;
- decode_node_uchar4(
+ svm_unpack_node_uchar4(
node.z, &emission_offset, &emission_color_offset, &blackbody_offset, &temperature_offset);
float emission = (stack_valid(emission_offset)) ? stack_load_float(stack, emission_offset) :
__uint_as_float(value_node.z);
@@ -1229,7 +1168,8 @@ ccl_device void svm_node_mix_closure(ShaderData *sd, float *stack, uint4 node)
/* fetch weight from blend input, previous mix closures,
* and write to stack to be used by closure nodes later */
uint weight_offset, in_weight_offset, weight1_offset, weight2_offset;
- decode_node_uchar4(node.y, &weight_offset, &in_weight_offset, &weight1_offset, &weight2_offset);
+ svm_unpack_node_uchar4(
+ node.y, &weight_offset, &in_weight_offset, &weight1_offset, &weight2_offset);
float weight = stack_load_float(stack, weight_offset);
weight = saturate(weight);
diff --git a/intern/cycles/kernel/svm/svm_color_util.h b/intern/cycles/kernel/svm/svm_color_util.h
index 12b59d2616b..1a0fa03305e 100644
--- a/intern/cycles/kernel/svm/svm_color_util.h
+++ b/intern/cycles/kernel/svm/svm_color_util.h
@@ -92,12 +92,12 @@ ccl_device float3 svm_mix_diff(float t, float3 col1, float3 col2)
ccl_device float3 svm_mix_dark(float t, float3 col1, float3 col2)
{
- return min(col1, col2) * t + col1 * (1.0f - t);
+ return interp(col1, min(col1, col2), t);
}
ccl_device float3 svm_mix_light(float t, float3 col1, float3 col2)
{
- return max(col1, col2 * t);
+ return interp(col1, max(col1, col2), t);
}
ccl_device float3 svm_mix_dodge(float t, float3 col1, float3 col2)
@@ -255,16 +255,10 @@ ccl_device float3 svm_mix_linear(float t, float3 col1, float3 col2)
ccl_device float3 svm_mix_clamp(float3 col)
{
- float3 outcol = col;
-
- outcol.x = saturate(col.x);
- outcol.y = saturate(col.y);
- outcol.z = saturate(col.z);
-
- return outcol;
+ return saturate3(col);
}
-ccl_device_noinline float3 svm_mix(NodeMix type, float fac, float3 c1, float3 c2)
+ccl_device_noinline_cpu float3 svm_mix(NodeMix type, float fac, float3 c1, float3 c2)
{
float t = saturate(fac);
diff --git a/intern/cycles/kernel/svm/svm_displace.h b/intern/cycles/kernel/svm/svm_displace.h
index f16664a684c..250fac6bcb8 100644
--- a/intern/cycles/kernel/svm/svm_displace.h
+++ b/intern/cycles/kernel/svm/svm_displace.h
@@ -23,7 +23,7 @@ ccl_device void svm_node_set_bump(KernelGlobals *kg, ShaderData *sd, float *stac
#ifdef __RAY_DIFFERENTIALS__
/* get normal input */
uint normal_offset, scale_offset, invert, use_object_space;
- decode_node_uchar4(node.y, &normal_offset, &scale_offset, &invert, &use_object_space);
+ svm_unpack_node_uchar4(node.y, &normal_offset, &scale_offset, &invert, &use_object_space);
float3 normal_in = stack_valid(normal_offset) ? stack_load_float3(stack, normal_offset) : sd->N;
@@ -42,7 +42,7 @@ ccl_device void svm_node_set_bump(KernelGlobals *kg, ShaderData *sd, float *stac
/* get bump values */
uint c_offset, x_offset, y_offset, strength_offset;
- decode_node_uchar4(node.z, &c_offset, &x_offset, &y_offset, &strength_offset);
+ svm_unpack_node_uchar4(node.z, &c_offset, &x_offset, &y_offset, &strength_offset);
float h_c = stack_load_float(stack, c_offset);
float h_x = stack_load_float(stack, x_offset);
@@ -95,7 +95,7 @@ ccl_device void svm_node_set_displacement(KernelGlobals *kg,
ccl_device void svm_node_displacement(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
{
uint height_offset, midlevel_offset, scale_offset, normal_offset;
- decode_node_uchar4(node.y, &height_offset, &midlevel_offset, &scale_offset, &normal_offset);
+ svm_unpack_node_uchar4(node.y, &height_offset, &midlevel_offset, &scale_offset, &normal_offset);
float height = stack_load_float(stack, height_offset);
float midlevel = stack_load_float(stack, midlevel_offset);
@@ -126,7 +126,7 @@ ccl_device void svm_node_vector_displacement(
uint space = data_node.x;
uint vector_offset, midlevel_offset, scale_offset, displacement_offset;
- decode_node_uchar4(
+ svm_unpack_node_uchar4(
node.y, &vector_offset, &midlevel_offset, &scale_offset, &displacement_offset);
float3 vector = stack_load_float3(stack, vector_offset);
diff --git a/intern/cycles/kernel/svm/svm_fractal_noise.h b/intern/cycles/kernel/svm/svm_fractal_noise.h
new file mode 100644
index 00000000000..57fa8c690ac
--- /dev/null
+++ b/intern/cycles/kernel/svm/svm_fractal_noise.h
@@ -0,0 +1,135 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+/* The fractal_noise_[1-4] functions are all exactly the same except for the input type. */
+ccl_device_noinline float fractal_noise_1d(float p, float octaves, float roughness)
+{
+ float fscale = 1.0f;
+ float amp = 1.0f;
+ float maxamp = 0.0f;
+ float sum = 0.0f;
+ octaves = clamp(octaves, 0.0f, 16.0f);
+ int n = float_to_int(octaves);
+ for (int i = 0; i <= n; i++) {
+ float t = noise_1d(fscale * p);
+ sum += t * amp;
+ maxamp += amp;
+ amp *= clamp(roughness, 0.0f, 1.0f);
+ fscale *= 2.0f;
+ }
+ float rmd = octaves - floorf(octaves);
+ if (rmd != 0.0f) {
+ float t = noise_1d(fscale * p);
+ float sum2 = sum + t * amp;
+ sum /= maxamp;
+ sum2 /= maxamp + amp;
+ return (1.0f - rmd) * sum + rmd * sum2;
+ }
+ else {
+ return sum / maxamp;
+ }
+}
+
+/* The fractal_noise_[1-4] functions are all exactly the same except for the input type. */
+ccl_device_noinline float fractal_noise_2d(float2 p, float octaves, float roughness)
+{
+ float fscale = 1.0f;
+ float amp = 1.0f;
+ float maxamp = 0.0f;
+ float sum = 0.0f;
+ octaves = clamp(octaves, 0.0f, 16.0f);
+ int n = float_to_int(octaves);
+ for (int i = 0; i <= n; i++) {
+ float t = noise_2d(fscale * p);
+ sum += t * amp;
+ maxamp += amp;
+ amp *= clamp(roughness, 0.0f, 1.0f);
+ fscale *= 2.0f;
+ }
+ float rmd = octaves - floorf(octaves);
+ if (rmd != 0.0f) {
+ float t = noise_2d(fscale * p);
+ float sum2 = sum + t * amp;
+ sum /= maxamp;
+ sum2 /= maxamp + amp;
+ return (1.0f - rmd) * sum + rmd * sum2;
+ }
+ else {
+ return sum / maxamp;
+ }
+}
+
+/* The fractal_noise_[1-4] functions are all exactly the same except for the input type. */
+ccl_device_noinline float fractal_noise_3d(float3 p, float octaves, float roughness)
+{
+ float fscale = 1.0f;
+ float amp = 1.0f;
+ float maxamp = 0.0f;
+ float sum = 0.0f;
+ octaves = clamp(octaves, 0.0f, 16.0f);
+ int n = float_to_int(octaves);
+ for (int i = 0; i <= n; i++) {
+ float t = noise_3d(fscale * p);
+ sum += t * amp;
+ maxamp += amp;
+ amp *= clamp(roughness, 0.0f, 1.0f);
+ fscale *= 2.0f;
+ }
+ float rmd = octaves - floorf(octaves);
+ if (rmd != 0.0f) {
+ float t = noise_3d(fscale * p);
+ float sum2 = sum + t * amp;
+ sum /= maxamp;
+ sum2 /= maxamp + amp;
+ return (1.0f - rmd) * sum + rmd * sum2;
+ }
+ else {
+ return sum / maxamp;
+ }
+}
+
+/* The fractal_noise_[1-4] functions are all exactly the same except for the input type. */
+ccl_device_noinline float fractal_noise_4d(float4 p, float octaves, float roughness)
+{
+ float fscale = 1.0f;
+ float amp = 1.0f;
+ float maxamp = 0.0f;
+ float sum = 0.0f;
+ octaves = clamp(octaves, 0.0f, 16.0f);
+ int n = float_to_int(octaves);
+ for (int i = 0; i <= n; i++) {
+ float t = noise_4d(fscale * p);
+ sum += t * amp;
+ maxamp += amp;
+ amp *= clamp(roughness, 0.0f, 1.0f);
+ fscale *= 2.0f;
+ }
+ float rmd = octaves - floorf(octaves);
+ if (rmd != 0.0f) {
+ float t = noise_4d(fscale * p);
+ float sum2 = sum + t * amp;
+ sum /= maxamp;
+ sum2 /= maxamp + amp;
+ return (1.0f - rmd) * sum + rmd * sum2;
+ }
+ else {
+ return sum / maxamp;
+ }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_fresnel.h b/intern/cycles/kernel/svm/svm_fresnel.h
index 03119991597..96d602e35bf 100644
--- a/intern/cycles/kernel/svm/svm_fresnel.h
+++ b/intern/cycles/kernel/svm/svm_fresnel.h
@@ -22,7 +22,7 @@ ccl_device void svm_node_fresnel(
ShaderData *sd, float *stack, uint ior_offset, uint ior_value, uint node)
{
uint normal_offset, out_offset;
- decode_node_uchar4(node, &normal_offset, &out_offset, NULL, NULL);
+ svm_unpack_node_uchar2(node, &normal_offset, &out_offset);
float eta = (stack_valid(ior_offset)) ? stack_load_float(stack, ior_offset) :
__uint_as_float(ior_value);
float3 normal_in = stack_valid(normal_offset) ? stack_load_float3(stack, normal_offset) : sd->N;
@@ -43,7 +43,7 @@ ccl_device void svm_node_layer_weight(ShaderData *sd, float *stack, uint4 node)
uint blend_value = node.z;
uint type, normal_offset, out_offset;
- decode_node_uchar4(node.w, &type, &normal_offset, &out_offset, NULL);
+ svm_unpack_node_uchar3(node.w, &type, &normal_offset, &out_offset);
float blend = (stack_valid(blend_offset)) ? stack_load_float(stack, blend_offset) :
__uint_as_float(blend_value);
diff --git a/intern/cycles/kernel/svm/svm_geometry.h b/intern/cycles/kernel/svm/svm_geometry.h
index a9104643299..77df19b2298 100644
--- a/intern/cycles/kernel/svm/svm_geometry.h
+++ b/intern/cycles/kernel/svm/svm_geometry.h
@@ -41,11 +41,9 @@ ccl_device_inline void svm_node_geometry(
case NODE_GEOM_Ng:
data = sd->Ng;
break;
-#ifdef __UV__
case NODE_GEOM_uv:
data = make_float3(sd->u, sd->v, 0.0f);
break;
-#endif
default:
data = make_float3(0.0f, 0.0f, 0.0f);
}
@@ -113,6 +111,10 @@ ccl_device void svm_node_object_info(
stack_store_float3(stack, out_offset, object_location(kg, sd));
return;
}
+ case NODE_INFO_OB_COLOR: {
+ stack_store_float3(stack, out_offset, object_color(kg, sd->object));
+ return;
+ }
case NODE_INFO_OB_INDEX:
data = object_pass_id(kg, sd->object);
break;
@@ -149,7 +151,7 @@ ccl_device void svm_node_particle_info(
}
case NODE_INFO_PAR_RANDOM: {
int particle_id = object_particle_id(kg, sd->object);
- float random = hash_int_01(particle_index(kg, particle_id));
+ float random = hash_uint2_to_float(particle_index(kg, particle_id), 0);
stack_store_float(stack, out_offset, random);
break;
}
diff --git a/intern/cycles/kernel/svm/svm_gradient.h b/intern/cycles/kernel/svm/svm_gradient.h
index c315564fbc2..08304bc47e8 100644
--- a/intern/cycles/kernel/svm/svm_gradient.h
+++ b/intern/cycles/kernel/svm/svm_gradient.h
@@ -64,7 +64,7 @@ ccl_device void svm_node_tex_gradient(ShaderData *sd, float *stack, uint4 node)
{
uint type, co_offset, color_offset, fac_offset;
- decode_node_uchar4(node.y, &type, &co_offset, &fac_offset, &color_offset);
+ svm_unpack_node_uchar4(node.y, &type, &co_offset, &fac_offset, &color_offset);
float3 co = stack_load_float3(stack, co_offset);
diff --git a/intern/cycles/kernel/svm/svm_hsv.h b/intern/cycles/kernel/svm/svm_hsv.h
index 72379fba870..1f7bd421869 100644
--- a/intern/cycles/kernel/svm/svm_hsv.h
+++ b/intern/cycles/kernel/svm/svm_hsv.h
@@ -24,8 +24,8 @@ ccl_device void svm_node_hsv(
{
uint in_color_offset, fac_offset, out_color_offset;
uint hue_offset, sat_offset, val_offset;
- decode_node_uchar4(node.y, &in_color_offset, &fac_offset, &out_color_offset, NULL);
- decode_node_uchar4(node.z, &hue_offset, &sat_offset, &val_offset, NULL);
+ svm_unpack_node_uchar3(node.y, &in_color_offset, &fac_offset, &out_color_offset);
+ svm_unpack_node_uchar3(node.z, &hue_offset, &sat_offset, &val_offset);
float fac = stack_load_float(stack, fac_offset);
float3 in_color = stack_load_float3(stack, in_color_offset);
diff --git a/intern/cycles/kernel/svm/svm_ies.h b/intern/cycles/kernel/svm/svm_ies.h
index 9434c0c5505..56c804b44d0 100644
--- a/intern/cycles/kernel/svm/svm_ies.h
+++ b/intern/cycles/kernel/svm/svm_ies.h
@@ -21,12 +21,12 @@ CCL_NAMESPACE_BEGIN
ccl_device_inline float interpolate_ies_vertical(
KernelGlobals *kg, int ofs, int v, int v_num, float v_frac, int h)
{
- /* Since lookups are performed in spherical coordinates, clamping the coordinates at the low end of v
- * (corresponding to the north pole) would result in artifacts.
- * The proper way of dealing with this would be to lookup the corresponding value on the other side of the pole,
- * but since the horizontal coordinates might be nonuniform, this would require yet another interpolation.
- * Therefore, the assumtion is made that the light is going to be symmetrical, which means that we can just take
- * the corresponding value at the current horizontal coordinate. */
+ /* Since lookups are performed in spherical coordinates, clamping the coordinates at the low end
+ * of v (corresponding to the north pole) would result in artifacts. The proper way of dealing
+ * with this would be to lookup the corresponding value on the other side of the pole, but since
+ * the horizontal coordinates might be nonuniform, this would require yet another interpolation.
+ * Therefore, the assumption is made that the light is going to be symmetrical, which means that
+ * we can just take the corresponding value at the current horizontal coordinate. */
#define IES_LOOKUP(v) kernel_tex_fetch(__ies, ofs + h * v_num + (v))
/* If v is zero, assume symmetry and read at v=1 instead of v=-1. */
@@ -66,7 +66,8 @@ ccl_device_inline float kernel_ies_interp(KernelGlobals *kg,
/* Lookup the angles to find the table position. */
int h_i, v_i;
- /* TODO(lukas): Consider using bisection. Probably not worth it for the vast majority of IES files. */
+ /* TODO(lukas): Consider using bisection.
+ * Probably not worth it for the vast majority of IES files. */
for (h_i = 0; IES_LOOKUP_ANGLE_H(h_i + 1) < h_angle; h_i++)
;
for (v_i = 0; IES_LOOKUP_ANGLE_V(v_i + 1) < v_angle; v_i++)
@@ -83,7 +84,8 @@ ccl_device_inline float kernel_ies_interp(KernelGlobals *kg,
/* Perform cubic interpolation along the horizontal coordinate to get the intensity value.
* If h_i is zero, just wrap around since the horizontal angles always go over the full circle.
- * However, the last entry (360°) equals the first one, so we need to wrap around to the one before that. */
+ * However, the last entry (360°) equals the first one, so we need to wrap around to the one
+ * before that. */
float a = interpolate_ies_vertical(
kg, ofs, v_i, v_num, v_frac, (h_i == 0) ? h_num - 2 : h_i - 1);
float b = interpolate_ies_vertical(kg, ofs, v_i, v_num, v_frac, h_i);
@@ -99,8 +101,8 @@ ccl_device_inline float kernel_ies_interp(KernelGlobals *kg,
ccl_device void svm_node_ies(
KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
{
- uint vector_offset, strength_offset, fac_offset, dummy, slot = node.z;
- decode_node_uchar4(node.y, &strength_offset, &vector_offset, &fac_offset, &dummy);
+ uint vector_offset, strength_offset, fac_offset, slot = node.z;
+ svm_unpack_node_uchar3(node.y, &strength_offset, &vector_offset, &fac_offset);
float3 vector = stack_load_float3(stack, vector_offset);
float strength = stack_load_float_default(stack, strength_offset, node.w);
diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h
index ee4b8b6e50c..f57c85fc23e 100644
--- a/intern/cycles/kernel/svm/svm_image.h
+++ b/intern/cycles/kernel/svm/svm_image.h
@@ -16,23 +16,22 @@
CCL_NAMESPACE_BEGIN
-ccl_device float4
-svm_image_texture(KernelGlobals *kg, int id, float x, float y, uint srgb, uint use_alpha)
+ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, uint flags)
{
+ if (id == -1) {
+ return make_float4(
+ TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B, TEX_IMAGE_MISSING_A);
+ }
+
float4 r = kernel_tex_image_interp(kg, id, x, y);
const float alpha = r.w;
- if (use_alpha && alpha != 1.0f && alpha != 0.0f) {
+ if ((flags & NODE_IMAGE_ALPHA_UNASSOCIATE) && alpha != 1.0f && alpha != 0.0f) {
r /= alpha;
- const int texture_type = kernel_tex_type(id);
- if (texture_type == IMAGE_DATA_TYPE_BYTE4 || texture_type == IMAGE_DATA_TYPE_BYTE) {
- r = min(r, make_float4(1.0f, 1.0f, 1.0f, 1.0f));
- }
r.w = alpha;
}
- if (srgb) {
- /* TODO(lukas): Implement proper conversion for image textures. */
+ if (flags & NODE_IMAGE_COMPRESS_AS_SRGB) {
r = color_srgb_to_linear_v4(r);
}
@@ -45,16 +44,15 @@ ccl_device_inline float3 texco_remap_square(float3 co)
return (co - make_float3(0.5f, 0.5f, 0.5f)) * 2.0f;
}
-ccl_device void svm_node_tex_image(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
+ccl_device void svm_node_tex_image(
+ KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
{
- uint id = node.y;
- uint co_offset, out_offset, alpha_offset, srgb;
+ uint co_offset, out_offset, alpha_offset, flags;
- decode_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &srgb);
+ svm_unpack_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &flags);
float3 co = stack_load_float3(stack, co_offset);
float2 tex_co;
- uint use_alpha = stack_valid(alpha_offset);
if (node.w == NODE_IMAGE_PROJ_SPHERE) {
co = texco_remap_square(co);
tex_co = map_to_sphere(co);
@@ -66,7 +64,51 @@ ccl_device void svm_node_tex_image(KernelGlobals *kg, ShaderData *sd, float *sta
else {
tex_co = make_float2(co.x, co.y);
}
- float4 f = svm_image_texture(kg, id, tex_co.x, tex_co.y, srgb, use_alpha);
+
+ /* TODO(lukas): Consider moving tile information out of the SVM node.
+ * TextureInfo seems a reasonable candidate. */
+ int id = -1;
+ int num_nodes = (int)node.y;
+ if (num_nodes > 0) {
+ /* Remember the offset of the node following the tile nodes. */
+ int next_offset = (*offset) + num_nodes;
+
+ /* Find the tile that the UV lies in. */
+ int tx = (int)tex_co.x;
+ int ty = (int)tex_co.y;
+
+ /* Check that we're within a legitimate tile. */
+ if (tx >= 0 && ty >= 0 && tx < 10) {
+ int tile = 1001 + 10 * ty + tx;
+
+ /* Find the index of the tile. */
+ for (int i = 0; i < num_nodes; i++) {
+ uint4 tile_node = read_node(kg, offset);
+ if (tile_node.x == tile) {
+ id = tile_node.y;
+ break;
+ }
+ if (tile_node.z == tile) {
+ id = tile_node.w;
+ break;
+ }
+ }
+
+ /* If we found the tile, offset the UVs to be relative to it. */
+ if (id != -1) {
+ tex_co.x -= tx;
+ tex_co.y -= ty;
+ }
+ }
+
+ /* Skip over the remaining nodes. */
+ *offset = next_offset;
+ }
+ else {
+ id = -num_nodes;
+ }
+
+ float4 f = svm_image_texture(kg, id, tex_co.x, tex_co.y, flags);
if (stack_valid(out_offset))
stack_store_float3(stack, out_offset, make_float3(f.x, f.y, f.z));
@@ -145,27 +187,26 @@ ccl_device void svm_node_tex_image_box(KernelGlobals *kg, ShaderData *sd, float
}
/* now fetch textures */
- uint co_offset, out_offset, alpha_offset, srgb;
- decode_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &srgb);
+ uint co_offset, out_offset, alpha_offset, flags;
+ svm_unpack_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &flags);
float3 co = stack_load_float3(stack, co_offset);
uint id = node.y;
float4 f = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- uint use_alpha = stack_valid(alpha_offset);
/* Map so that no textures are flipped, rotation is somewhat arbitrary. */
if (weight.x > 0.0f) {
float2 uv = make_float2((signed_N.x < 0.0f) ? 1.0f - co.y : co.y, co.z);
- f += weight.x * svm_image_texture(kg, id, uv.x, uv.y, srgb, use_alpha);
+ f += weight.x * svm_image_texture(kg, id, uv.x, uv.y, flags);
}
if (weight.y > 0.0f) {
float2 uv = make_float2((signed_N.y > 0.0f) ? 1.0f - co.x : co.x, co.z);
- f += weight.y * svm_image_texture(kg, id, uv.x, uv.y, srgb, use_alpha);
+ f += weight.y * svm_image_texture(kg, id, uv.x, uv.y, flags);
}
if (weight.z > 0.0f) {
float2 uv = make_float2((signed_N.z > 0.0f) ? 1.0f - co.y : co.y, co.x);
- f += weight.z * svm_image_texture(kg, id, uv.x, uv.y, srgb, use_alpha);
+ f += weight.z * svm_image_texture(kg, id, uv.x, uv.y, flags);
}
if (stack_valid(out_offset))
@@ -180,10 +221,10 @@ ccl_device void svm_node_tex_environment(KernelGlobals *kg,
uint4 node)
{
uint id = node.y;
- uint co_offset, out_offset, alpha_offset, srgb;
+ uint co_offset, out_offset, alpha_offset, flags;
uint projection = node.w;
- decode_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &srgb);
+ svm_unpack_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &flags);
float3 co = stack_load_float3(stack, co_offset);
float2 uv;
@@ -195,8 +236,7 @@ ccl_device void svm_node_tex_environment(KernelGlobals *kg,
else
uv = direction_to_mirrorball(co);
- uint use_alpha = stack_valid(alpha_offset);
- float4 f = svm_image_texture(kg, id, uv.x, uv.y, srgb, use_alpha);
+ float4 f = svm_image_texture(kg, id, uv.x, uv.y, flags);
if (stack_valid(out_offset))
stack_store_float3(stack, out_offset, make_float3(f.x, f.y, f.z));
diff --git a/intern/cycles/kernel/svm/svm_light_path.h b/intern/cycles/kernel/svm/svm_light_path.h
index 65a9a284a17..768c65918cd 100644
--- a/intern/cycles/kernel/svm/svm_light_path.h
+++ b/intern/cycles/kernel/svm/svm_light_path.h
@@ -84,7 +84,7 @@ ccl_device void svm_node_light_falloff(ShaderData *sd, float *stack, uint4 node)
{
uint strength_offset, out_offset, smooth_offset;
- decode_node_uchar4(node.z, &strength_offset, &smooth_offset, &out_offset, NULL);
+ svm_unpack_node_uchar3(node.z, &strength_offset, &smooth_offset, &out_offset);
float strength = stack_load_float(stack, strength_offset);
uint type = node.y;
diff --git a/intern/cycles/kernel/svm/svm_magic.h b/intern/cycles/kernel/svm/svm_magic.h
index 115d2e2fe4b..9c160e6d8cc 100644
--- a/intern/cycles/kernel/svm/svm_magic.h
+++ b/intern/cycles/kernel/svm/svm_magic.h
@@ -18,7 +18,7 @@ CCL_NAMESPACE_BEGIN
/* Magic */
-ccl_device_noinline float3 svm_magic(float3 p, int n, float distortion)
+ccl_device_noinline_cpu float3 svm_magic(float3 p, int n, float distortion)
{
float x = sinf((p.x + p.y + p.z) * 5.0f);
float y = cosf((-p.x + p.y - p.z) * 5.0f);
@@ -93,8 +93,8 @@ ccl_device void svm_node_tex_magic(
uint depth;
uint scale_offset, distortion_offset, co_offset, fac_offset, color_offset;
- decode_node_uchar4(node.y, &depth, &color_offset, &fac_offset, NULL);
- decode_node_uchar4(node.z, &co_offset, &scale_offset, &distortion_offset, NULL);
+ svm_unpack_node_uchar3(node.y, &depth, &color_offset, &fac_offset);
+ svm_unpack_node_uchar3(node.z, &co_offset, &scale_offset, &distortion_offset);
uint4 node2 = read_node(kg, offset);
float3 co = stack_load_float3(stack, co_offset);
diff --git a/intern/cycles/kernel/svm/svm_map_range.h b/intern/cycles/kernel/svm/svm_map_range.h
new file mode 100644
index 00000000000..533a631c837
--- /dev/null
+++ b/intern/cycles/kernel/svm/svm_map_range.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+/* Map Range Node */
+
+ccl_device_inline float smootherstep(float edge0, float edge1, float x)
+{
+ x = clamp(safe_divide((x - edge0), (edge1 - edge0)), 0.0f, 1.0f);
+ return x * x * x * (x * (x * 6.0f - 15.0f) + 10.0f);
+}
+
+ccl_device void svm_node_map_range(KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint value_stack_offset,
+ uint parameters_stack_offsets,
+ uint results_stack_offsets,
+ int *offset)
+{
+ uint from_min_stack_offset, from_max_stack_offset, to_min_stack_offset, to_max_stack_offset;
+ uint type_stack_offset, steps_stack_offset, result_stack_offset;
+ svm_unpack_node_uchar4(parameters_stack_offsets,
+ &from_min_stack_offset,
+ &from_max_stack_offset,
+ &to_min_stack_offset,
+ &to_max_stack_offset);
+ svm_unpack_node_uchar3(
+ results_stack_offsets, &type_stack_offset, &steps_stack_offset, &result_stack_offset);
+
+ uint4 defaults = read_node(kg, offset);
+ uint4 defaults2 = read_node(kg, offset);
+
+ float value = stack_load_float(stack, value_stack_offset);
+ float from_min = stack_load_float_default(stack, from_min_stack_offset, defaults.x);
+ float from_max = stack_load_float_default(stack, from_max_stack_offset, defaults.y);
+ float to_min = stack_load_float_default(stack, to_min_stack_offset, defaults.z);
+ float to_max = stack_load_float_default(stack, to_max_stack_offset, defaults.w);
+ float steps = stack_load_float_default(stack, steps_stack_offset, defaults2.x);
+
+ float result;
+
+ if (from_max != from_min) {
+ float factor = value;
+ switch (type_stack_offset) {
+ default:
+ case NODE_MAP_RANGE_LINEAR:
+ factor = (value - from_min) / (from_max - from_min);
+ break;
+ case NODE_MAP_RANGE_STEPPED: {
+ factor = (value - from_min) / (from_max - from_min);
+ factor = (steps > 0.0f) ? floorf(factor * (steps + 1.0f)) / steps : 0.0f;
+ break;
+ }
+ case NODE_MAP_RANGE_SMOOTHSTEP: {
+ factor = (from_min > from_max) ? 1.0f - smoothstep(from_max, from_min, factor) :
+ smoothstep(from_min, from_max, factor);
+ break;
+ }
+ case NODE_MAP_RANGE_SMOOTHERSTEP: {
+ factor = (from_min > from_max) ? 1.0f - smootherstep(from_max, from_min, factor) :
+ smootherstep(from_min, from_max, factor);
+ break;
+ }
+ }
+ result = to_min + factor * (to_max - to_min);
+ }
+ else {
+ result = 0.0f;
+ }
+ stack_store_float(stack, result_stack_offset, result);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_mapping.h b/intern/cycles/kernel/svm/svm_mapping.h
index 998a29912d4..6e19c859e19 100644
--- a/intern/cycles/kernel/svm/svm_mapping.h
+++ b/intern/cycles/kernel/svm/svm_mapping.h
@@ -18,7 +18,33 @@ CCL_NAMESPACE_BEGIN
/* Mapping Node */
-ccl_device void svm_node_mapping(
+ccl_device void svm_node_mapping(KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint type,
+ uint inputs_stack_offsets,
+ uint result_stack_offset,
+ int *offset)
+{
+ uint vector_stack_offset, location_stack_offset, rotation_stack_offset, scale_stack_offset;
+ svm_unpack_node_uchar4(inputs_stack_offsets,
+ &vector_stack_offset,
+ &location_stack_offset,
+ &rotation_stack_offset,
+ &scale_stack_offset);
+
+ float3 vector = stack_load_float3(stack, vector_stack_offset);
+ float3 location = stack_load_float3(stack, location_stack_offset);
+ float3 rotation = stack_load_float3(stack, rotation_stack_offset);
+ float3 scale = stack_load_float3(stack, scale_stack_offset);
+
+ float3 result = svm_mapping((NodeMappingType)type, vector, location, rotation, scale);
+ stack_store_float3(stack, result_stack_offset, result);
+}
+
+/* Texture Mapping */
+
+ccl_device void svm_node_texture_mapping(
KernelGlobals *kg, ShaderData *sd, float *stack, uint vec_offset, uint out_offset, int *offset)
{
float3 v = stack_load_float3(stack, vec_offset);
diff --git a/intern/cycles/kernel/svm/svm_mapping_util.h b/intern/cycles/kernel/svm/svm_mapping_util.h
new file mode 100644
index 00000000000..ec2c84e0791
--- /dev/null
+++ b/intern/cycles/kernel/svm/svm_mapping_util.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2011-2014 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device float3
+svm_mapping(NodeMappingType type, float3 vector, float3 location, float3 rotation, float3 scale)
+{
+ Transform rotationTransform = euler_to_transform(rotation);
+ switch (type) {
+ case NODE_MAPPING_TYPE_POINT:
+ return transform_direction(&rotationTransform, (vector * scale)) + location;
+ case NODE_MAPPING_TYPE_TEXTURE:
+ return safe_divide_float3_float3(
+ transform_direction_transposed(&rotationTransform, (vector - location)), scale);
+ case NODE_MAPPING_TYPE_VECTOR:
+ return transform_direction(&rotationTransform, (vector * scale));
+ case NODE_MAPPING_TYPE_NORMAL:
+ return safe_normalize(
+ transform_direction(&rotationTransform, safe_divide_float3_float3(vector, scale)));
+ default:
+ return make_float3(0.0f, 0.0f, 0.0f);
+ }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_math.h b/intern/cycles/kernel/svm/svm_math.h
index 5920913825b..12a2bbdeb9b 100644
--- a/intern/cycles/kernel/svm/svm_math.h
+++ b/intern/cycles/kernel/svm/svm_math.h
@@ -16,48 +16,59 @@
CCL_NAMESPACE_BEGIN
-/* Nodes */
-
ccl_device void svm_node_math(KernelGlobals *kg,
ShaderData *sd,
float *stack,
- uint itype,
- uint f1_offset,
- uint f2_offset,
+ uint type,
+ uint inputs_stack_offsets,
+ uint result_stack_offset,
int *offset)
{
- NodeMath type = (NodeMath)itype;
- float f1 = stack_load_float(stack, f1_offset);
- float f2 = stack_load_float(stack, f2_offset);
- float f = svm_math(type, f1, f2);
+ uint a_stack_offset, b_stack_offset, c_stack_offset;
+ svm_unpack_node_uchar3(inputs_stack_offsets, &a_stack_offset, &b_stack_offset, &c_stack_offset);
- uint4 node1 = read_node(kg, offset);
+ float a = stack_load_float(stack, a_stack_offset);
+ float b = stack_load_float(stack, b_stack_offset);
+ float c = stack_load_float(stack, c_stack_offset);
+ float result = svm_math((NodeMathType)type, a, b, c);
- stack_store_float(stack, node1.y, f);
+ stack_store_float(stack, result_stack_offset, result);
}
ccl_device void svm_node_vector_math(KernelGlobals *kg,
ShaderData *sd,
float *stack,
- uint itype,
- uint v1_offset,
- uint v2_offset,
+ uint type,
+ uint inputs_stack_offsets,
+ uint outputs_stack_offsets,
int *offset)
{
- NodeVectorMath type = (NodeVectorMath)itype;
- float3 v1 = stack_load_float3(stack, v1_offset);
- float3 v2 = stack_load_float3(stack, v2_offset);
- float f;
- float3 v;
+ uint value_stack_offset, vector_stack_offset;
+ uint a_stack_offset, b_stack_offset, scale_stack_offset;
+ svm_unpack_node_uchar3(
+ inputs_stack_offsets, &a_stack_offset, &b_stack_offset, &scale_stack_offset);
+ svm_unpack_node_uchar2(outputs_stack_offsets, &value_stack_offset, &vector_stack_offset);
+
+ float3 a = stack_load_float3(stack, a_stack_offset);
+ float3 b = stack_load_float3(stack, b_stack_offset);
+ float3 c = make_float3(0.0f, 0.0f, 0.0f);
+ float scale = stack_load_float(stack, scale_stack_offset);
+
+ float value;
+ float3 vector;
- svm_vector_math(&f, &v, type, v1, v2);
+ /* 3 Vector Operators */
+ if (type == NODE_VECTOR_MATH_WRAP) {
+ uint4 extra_node = read_node(kg, offset);
+ c = stack_load_float3(stack, extra_node.x);
+ }
- uint4 node1 = read_node(kg, offset);
+ svm_vector_math(&value, &vector, (NodeVectorMathType)type, a, b, c, scale);
- if (stack_valid(node1.y))
- stack_store_float(stack, node1.y, f);
- if (stack_valid(node1.z))
- stack_store_float3(stack, node1.z, v);
+ if (stack_valid(value_stack_offset))
+ stack_store_float(stack, value_stack_offset, value);
+ if (stack_valid(vector_stack_offset))
+ stack_store_float3(stack, vector_stack_offset, vector);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_math_util.h b/intern/cycles/kernel/svm/svm_math_util.h
index e3544515f1b..d1e1fa87e53 100644
--- a/intern/cycles/kernel/svm/svm_math_util.h
+++ b/intern/cycles/kernel/svm/svm_math_util.h
@@ -16,99 +16,179 @@
CCL_NAMESPACE_BEGIN
-ccl_device float average_fac(float3 v)
+ccl_device void svm_vector_math(float *value,
+ float3 *vector,
+ NodeVectorMathType type,
+ float3 a,
+ float3 b,
+ float3 c,
+ float scale)
{
- return (fabsf(v.x) + fabsf(v.y) + fabsf(v.z)) / 3.0f;
-}
-
-ccl_device void svm_vector_math(
- float *Fac, float3 *Vector, NodeVectorMath type, float3 Vector1, float3 Vector2)
-{
- if (type == NODE_VECTOR_MATH_ADD) {
- *Vector = Vector1 + Vector2;
- *Fac = average_fac(*Vector);
- }
- else if (type == NODE_VECTOR_MATH_SUBTRACT) {
- *Vector = Vector1 - Vector2;
- *Fac = average_fac(*Vector);
- }
- else if (type == NODE_VECTOR_MATH_AVERAGE) {
- *Vector = safe_normalize_len(Vector1 + Vector2, Fac);
- }
- else if (type == NODE_VECTOR_MATH_DOT_PRODUCT) {
- *Fac = dot(Vector1, Vector2);
- *Vector = make_float3(0.0f, 0.0f, 0.0f);
- }
- else if (type == NODE_VECTOR_MATH_CROSS_PRODUCT) {
- *Vector = safe_normalize_len(cross(Vector1, Vector2), Fac);
- }
- else if (type == NODE_VECTOR_MATH_NORMALIZE) {
- *Vector = safe_normalize_len(Vector1, Fac);
- }
- else {
- *Fac = 0.0f;
- *Vector = make_float3(0.0f, 0.0f, 0.0f);
+ switch (type) {
+ case NODE_VECTOR_MATH_ADD:
+ *vector = a + b;
+ break;
+ case NODE_VECTOR_MATH_SUBTRACT:
+ *vector = a - b;
+ break;
+ case NODE_VECTOR_MATH_MULTIPLY:
+ *vector = a * b;
+ break;
+ case NODE_VECTOR_MATH_DIVIDE:
+ *vector = safe_divide_float3_float3(a, b);
+ break;
+ case NODE_VECTOR_MATH_CROSS_PRODUCT:
+ *vector = cross(a, b);
+ break;
+ case NODE_VECTOR_MATH_PROJECT:
+ *vector = project(a, b);
+ break;
+ case NODE_VECTOR_MATH_REFLECT:
+ *vector = reflect(a, b);
+ break;
+ case NODE_VECTOR_MATH_DOT_PRODUCT:
+ *value = dot(a, b);
+ break;
+ case NODE_VECTOR_MATH_DISTANCE:
+ *value = distance(a, b);
+ break;
+ case NODE_VECTOR_MATH_LENGTH:
+ *value = len(a);
+ break;
+ case NODE_VECTOR_MATH_SCALE:
+ *vector = a * scale;
+ break;
+ case NODE_VECTOR_MATH_NORMALIZE:
+ *vector = safe_normalize(a);
+ break;
+ case NODE_VECTOR_MATH_SNAP:
+ *vector = floor(safe_divide_float3_float3(a, b)) * b;
+ break;
+ case NODE_VECTOR_MATH_FLOOR:
+ *vector = floor(a);
+ break;
+ case NODE_VECTOR_MATH_CEIL:
+ *vector = ceil(a);
+ break;
+ case NODE_VECTOR_MATH_MODULO:
+ *vector = make_float3(safe_modulo(a.x, b.x), safe_modulo(a.y, b.y), safe_modulo(a.z, b.z));
+ break;
+ case NODE_VECTOR_MATH_WRAP:
+ *vector = make_float3(wrapf(a.x, b.x, c.x), wrapf(a.y, b.y, c.y), wrapf(a.z, b.z, c.z));
+ break;
+ case NODE_VECTOR_MATH_FRACTION:
+ *vector = a - floor(a);
+ break;
+ case NODE_VECTOR_MATH_ABSOLUTE:
+ *vector = fabs(a);
+ break;
+ case NODE_VECTOR_MATH_MINIMUM:
+ *vector = min(a, b);
+ break;
+ case NODE_VECTOR_MATH_MAXIMUM:
+ *vector = max(a, b);
+ break;
+ case NODE_VECTOR_MATH_SINE:
+ *vector = make_float3(sinf(a.x), sinf(a.y), sinf(a.z));
+ break;
+ case NODE_VECTOR_MATH_COSINE:
+ *vector = make_float3(cosf(a.x), cosf(a.y), cosf(a.z));
+ break;
+ case NODE_VECTOR_MATH_TANGENT:
+ *vector = make_float3(tanf(a.x), tanf(a.y), tanf(a.z));
+ break;
+ default:
+ *vector = make_float3(0.0f, 0.0f, 0.0f);
+ *value = 0.0f;
}
}
-ccl_device float svm_math(NodeMath type, float Fac1, float Fac2)
+ccl_device float svm_math(NodeMathType type, float a, float b, float c)
{
- float Fac;
-
- if (type == NODE_MATH_ADD)
- Fac = Fac1 + Fac2;
- else if (type == NODE_MATH_SUBTRACT)
- Fac = Fac1 - Fac2;
- else if (type == NODE_MATH_MULTIPLY)
- Fac = Fac1 * Fac2;
- else if (type == NODE_MATH_DIVIDE)
- Fac = safe_divide(Fac1, Fac2);
- else if (type == NODE_MATH_SINE)
- Fac = sinf(Fac1);
- else if (type == NODE_MATH_COSINE)
- Fac = cosf(Fac1);
- else if (type == NODE_MATH_TANGENT)
- Fac = tanf(Fac1);
- else if (type == NODE_MATH_ARCSINE)
- Fac = safe_asinf(Fac1);
- else if (type == NODE_MATH_ARCCOSINE)
- Fac = safe_acosf(Fac1);
- else if (type == NODE_MATH_ARCTANGENT)
- Fac = atanf(Fac1);
- else if (type == NODE_MATH_POWER)
- Fac = safe_powf(Fac1, Fac2);
- else if (type == NODE_MATH_LOGARITHM)
- Fac = safe_logf(Fac1, Fac2);
- else if (type == NODE_MATH_MINIMUM)
- Fac = fminf(Fac1, Fac2);
- else if (type == NODE_MATH_MAXIMUM)
- Fac = fmaxf(Fac1, Fac2);
- else if (type == NODE_MATH_ROUND)
- Fac = floorf(Fac1 + 0.5f);
- else if (type == NODE_MATH_LESS_THAN)
- Fac = Fac1 < Fac2;
- else if (type == NODE_MATH_GREATER_THAN)
- Fac = Fac1 > Fac2;
- else if (type == NODE_MATH_MODULO)
- Fac = safe_modulo(Fac1, Fac2);
- else if (type == NODE_MATH_ABSOLUTE)
- Fac = fabsf(Fac1);
- else if (type == NODE_MATH_ARCTAN2)
- Fac = atan2f(Fac1, Fac2);
- else if (type == NODE_MATH_FLOOR)
- Fac = floorf(Fac1);
- else if (type == NODE_MATH_CEIL)
- Fac = ceilf(Fac1);
- else if (type == NODE_MATH_FRACT)
- Fac = Fac1 - floorf(Fac1);
- else if (type == NODE_MATH_SQRT)
- Fac = safe_sqrtf(Fac1);
- else if (type == NODE_MATH_CLAMP)
- Fac = saturate(Fac1);
- else
- Fac = 0.0f;
-
- return Fac;
+ switch (type) {
+ case NODE_MATH_ADD:
+ return a + b;
+ case NODE_MATH_SUBTRACT:
+ return a - b;
+ case NODE_MATH_MULTIPLY:
+ return a * b;
+ case NODE_MATH_DIVIDE:
+ return safe_divide(a, b);
+ case NODE_MATH_POWER:
+ return safe_powf(a, b);
+ case NODE_MATH_LOGARITHM:
+ return safe_logf(a, b);
+ case NODE_MATH_SQRT:
+ return safe_sqrtf(a);
+ case NODE_MATH_INV_SQRT:
+ return inversesqrtf(a);
+ case NODE_MATH_ABSOLUTE:
+ return fabsf(a);
+ case NODE_MATH_RADIANS:
+ return a * (M_PI_F / 180.0f);
+ case NODE_MATH_DEGREES:
+ return a * (180.0f / M_PI_F);
+ case NODE_MATH_MINIMUM:
+ return fminf(a, b);
+ case NODE_MATH_MAXIMUM:
+ return fmaxf(a, b);
+ case NODE_MATH_LESS_THAN:
+ return a < b;
+ case NODE_MATH_GREATER_THAN:
+ return a > b;
+ case NODE_MATH_ROUND:
+ return floorf(a + 0.5f);
+ case NODE_MATH_FLOOR:
+ return floorf(a);
+ case NODE_MATH_CEIL:
+ return ceilf(a);
+ case NODE_MATH_FRACTION:
+ return a - floorf(a);
+ case NODE_MATH_MODULO:
+ return safe_modulo(a, b);
+ case NODE_MATH_TRUNC:
+ return a >= 0.0f ? floorf(a) : ceilf(a);
+ case NODE_MATH_SNAP:
+ return floorf(safe_divide(a, b)) * b;
+ case NODE_MATH_WRAP:
+ return wrapf(a, b, c);
+ case NODE_MATH_PINGPONG:
+ return pingpongf(a, b);
+ case NODE_MATH_SINE:
+ return sinf(a);
+ case NODE_MATH_COSINE:
+ return cosf(a);
+ case NODE_MATH_TANGENT:
+ return tanf(a);
+ case NODE_MATH_SINH:
+ return sinhf(a);
+ case NODE_MATH_COSH:
+ return coshf(a);
+ case NODE_MATH_TANH:
+ return tanhf(a);
+ case NODE_MATH_ARCSINE:
+ return safe_asinf(a);
+ case NODE_MATH_ARCCOSINE:
+ return safe_acosf(a);
+ case NODE_MATH_ARCTANGENT:
+ return atanf(a);
+ case NODE_MATH_ARCTAN2:
+ return atan2f(a, b);
+ case NODE_MATH_SIGN:
+ return compatible_signf(a);
+ case NODE_MATH_EXPONENT:
+ return expf(a);
+ case NODE_MATH_COMPARE:
+ return ((a == b) || (fabsf(a - b) <= fmaxf(c, FLT_EPSILON))) ? 1.0f : 0.0f;
+ case NODE_MATH_MULTIPLY_ADD:
+ return a * b + c;
+ case NODE_MATH_SMOOTH_MIN:
+ return smoothminf(a, b, c);
+ case NODE_MATH_SMOOTH_MAX:
+ return -smoothminf(-a, -b, c);
+ default:
+ return 0.0f;
+ }
}
/* Calculate color in range 800..12000 using an approximation
diff --git a/intern/cycles/kernel/svm/svm_musgrave.h b/intern/cycles/kernel/svm/svm_musgrave.h
index 67fb5ca6241..571f62fe27f 100644
--- a/intern/cycles/kernel/svm/svm_musgrave.h
+++ b/intern/cycles/kernel/svm/svm_musgrave.h
@@ -16,7 +16,7 @@
CCL_NAMESPACE_BEGIN
-/* Musgrave fBm
+/* 1D Musgrave fBm
*
* H: fractal increment parameter
* lacunarity: gap between successive frequencies
@@ -25,59 +25,404 @@ CCL_NAMESPACE_BEGIN
* from "Texturing and Modelling: A procedural approach"
*/
-ccl_device_noinline float noise_musgrave_fBm(float3 p, float H, float lacunarity, float octaves)
+ccl_device_noinline_cpu float noise_musgrave_fBm_1d(float co,
+ float H,
+ float lacunarity,
+ float octaves)
{
- float rmd;
+ float p = co;
float value = 0.0f;
float pwr = 1.0f;
float pwHL = powf(lacunarity, -H);
- int i;
- for (i = 0; i < float_to_int(octaves); i++) {
- value += snoise(p) * pwr;
+ for (int i = 0; i < float_to_int(octaves); i++) {
+ value += snoise_1d(p) * pwr;
pwr *= pwHL;
p *= lacunarity;
}
- rmd = octaves - floorf(octaves);
- if (rmd != 0.0f)
- value += rmd * snoise(p) * pwr;
+ float rmd = octaves - floorf(octaves);
+ if (rmd != 0.0f) {
+ value += rmd * snoise_1d(p) * pwr;
+ }
+
+ return value;
+}
+
+/* 1D Musgrave Multifractal
+ *
+ * H: highest fractal dimension
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ */
+
+ccl_device_noinline_cpu float noise_musgrave_multi_fractal_1d(float co,
+ float H,
+ float lacunarity,
+ float octaves)
+{
+ float p = co;
+ float value = 1.0f;
+ float pwr = 1.0f;
+ float pwHL = powf(lacunarity, -H);
+
+ for (int i = 0; i < float_to_int(octaves); i++) {
+ value *= (pwr * snoise_1d(p) + 1.0f);
+ pwr *= pwHL;
+ p *= lacunarity;
+ }
+
+ float rmd = octaves - floorf(octaves);
+ if (rmd != 0.0f) {
+ value *= (rmd * pwr * snoise_1d(p) + 1.0f); /* correct? */
+ }
+
+ return value;
+}
+
+/* 1D Musgrave Heterogeneous Terrain
+ *
+ * H: fractal dimension of the roughest area
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ * offset: raises the terrain from `sea level'
+ */
+
+ccl_device_noinline_cpu float noise_musgrave_hetero_terrain_1d(
+ float co, float H, float lacunarity, float octaves, float offset)
+{
+ float p = co;
+ float pwHL = powf(lacunarity, -H);
+ float pwr = pwHL;
+
+ /* first unscaled octave of function; later octaves are scaled */
+ float value = offset + snoise_1d(p);
+ p *= lacunarity;
+
+ for (int i = 1; i < float_to_int(octaves); i++) {
+ float increment = (snoise_1d(p) + offset) * pwr * value;
+ value += increment;
+ pwr *= pwHL;
+ p *= lacunarity;
+ }
+
+ float rmd = octaves - floorf(octaves);
+ if (rmd != 0.0f) {
+ float increment = (snoise_1d(p) + offset) * pwr * value;
+ value += rmd * increment;
+ }
+
+ return value;
+}
+
+/* 1D Hybrid Additive/Multiplicative Multifractal Terrain
+ *
+ * H: fractal dimension of the roughest area
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ * offset: raises the terrain from `sea level'
+ */
+
+ccl_device_noinline_cpu float noise_musgrave_hybrid_multi_fractal_1d(
+ float co, float H, float lacunarity, float octaves, float offset, float gain)
+{
+ float p = co;
+ float pwHL = powf(lacunarity, -H);
+ float pwr = pwHL;
+
+ float value = snoise_1d(p) + offset;
+ float weight = gain * value;
+ p *= lacunarity;
+
+ for (int i = 1; (weight > 0.001f) && (i < float_to_int(octaves)); i++) {
+ if (weight > 1.0f) {
+ weight = 1.0f;
+ }
+
+ float signal = (snoise_1d(p) + offset) * pwr;
+ pwr *= pwHL;
+ value += weight * signal;
+ weight *= gain * signal;
+ p *= lacunarity;
+ }
+
+ float rmd = octaves - floorf(octaves);
+ if (rmd != 0.0f) {
+ value += rmd * ((snoise_1d(p) + offset) * pwr);
+ }
+
+ return value;
+}
+
+/* 1D Ridged Multifractal Terrain
+ *
+ * H: fractal dimension of the roughest area
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ * offset: raises the terrain from `sea level'
+ */
+
+ccl_device_noinline_cpu float noise_musgrave_ridged_multi_fractal_1d(
+ float co, float H, float lacunarity, float octaves, float offset, float gain)
+{
+ float p = co;
+ float pwHL = powf(lacunarity, -H);
+ float pwr = pwHL;
+
+ float signal = offset - fabsf(snoise_1d(p));
+ signal *= signal;
+ float value = signal;
+ float weight = 1.0f;
+
+ for (int i = 1; i < float_to_int(octaves); i++) {
+ p *= lacunarity;
+ weight = saturate(signal * gain);
+ signal = offset - fabsf(snoise_1d(p));
+ signal *= signal;
+ signal *= weight;
+ value += signal * pwr;
+ pwr *= pwHL;
+ }
+
+ return value;
+}
+
+/* 2D Musgrave fBm
+ *
+ * H: fractal increment parameter
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ *
+ * from "Texturing and Modelling: A procedural approach"
+ */
+
+ccl_device_noinline_cpu float noise_musgrave_fBm_2d(float2 co,
+ float H,
+ float lacunarity,
+ float octaves)
+{
+ float2 p = co;
+ float value = 0.0f;
+ float pwr = 1.0f;
+ float pwHL = powf(lacunarity, -H);
+
+ for (int i = 0; i < float_to_int(octaves); i++) {
+ value += snoise_2d(p) * pwr;
+ pwr *= pwHL;
+ p *= lacunarity;
+ }
+
+ float rmd = octaves - floorf(octaves);
+ if (rmd != 0.0f) {
+ value += rmd * snoise_2d(p) * pwr;
+ }
+
+ return value;
+}
+
+/* 2D Musgrave Multifractal
+ *
+ * H: highest fractal dimension
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ */
+
+ccl_device_noinline_cpu float noise_musgrave_multi_fractal_2d(float2 co,
+ float H,
+ float lacunarity,
+ float octaves)
+{
+ float2 p = co;
+ float value = 1.0f;
+ float pwr = 1.0f;
+ float pwHL = powf(lacunarity, -H);
+
+ for (int i = 0; i < float_to_int(octaves); i++) {
+ value *= (pwr * snoise_2d(p) + 1.0f);
+ pwr *= pwHL;
+ p *= lacunarity;
+ }
+
+ float rmd = octaves - floorf(octaves);
+ if (rmd != 0.0f) {
+ value *= (rmd * pwr * snoise_2d(p) + 1.0f); /* correct? */
+ }
+
+ return value;
+}
+
+/* 2D Musgrave Heterogeneous Terrain
+ *
+ * H: fractal dimension of the roughest area
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ * offset: raises the terrain from `sea level'
+ */
+
+ccl_device_noinline_cpu float noise_musgrave_hetero_terrain_2d(
+ float2 co, float H, float lacunarity, float octaves, float offset)
+{
+ float2 p = co;
+ float pwHL = powf(lacunarity, -H);
+ float pwr = pwHL;
+
+ /* first unscaled octave of function; later octaves are scaled */
+ float value = offset + snoise_2d(p);
+ p *= lacunarity;
+
+ for (int i = 1; i < float_to_int(octaves); i++) {
+ float increment = (snoise_2d(p) + offset) * pwr * value;
+ value += increment;
+ pwr *= pwHL;
+ p *= lacunarity;
+ }
+
+ float rmd = octaves - floorf(octaves);
+ if (rmd != 0.0f) {
+ float increment = (snoise_2d(p) + offset) * pwr * value;
+ value += rmd * increment;
+ }
+
+ return value;
+}
+
+/* 2D Hybrid Additive/Multiplicative Multifractal Terrain
+ *
+ * H: fractal dimension of the roughest area
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ * offset: raises the terrain from `sea level'
+ */
+
+ccl_device_noinline_cpu float noise_musgrave_hybrid_multi_fractal_2d(
+ float2 co, float H, float lacunarity, float octaves, float offset, float gain)
+{
+ float2 p = co;
+ float pwHL = powf(lacunarity, -H);
+ float pwr = pwHL;
+
+ float value = snoise_2d(p) + offset;
+ float weight = gain * value;
+ p *= lacunarity;
+
+ for (int i = 1; (weight > 0.001f) && (i < float_to_int(octaves)); i++) {
+ if (weight > 1.0f) {
+ weight = 1.0f;
+ }
+
+ float signal = (snoise_2d(p) + offset) * pwr;
+ pwr *= pwHL;
+ value += weight * signal;
+ weight *= gain * signal;
+ p *= lacunarity;
+ }
+
+ float rmd = octaves - floorf(octaves);
+ if (rmd != 0.0f) {
+ value += rmd * ((snoise_2d(p) + offset) * pwr);
+ }
+
+ return value;
+}
+
+/* 2D Ridged Multifractal Terrain
+ *
+ * H: fractal dimension of the roughest area
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ * offset: raises the terrain from `sea level'
+ */
+
+ccl_device_noinline_cpu float noise_musgrave_ridged_multi_fractal_2d(
+ float2 co, float H, float lacunarity, float octaves, float offset, float gain)
+{
+ float2 p = co;
+ float pwHL = powf(lacunarity, -H);
+ float pwr = pwHL;
+
+ float signal = offset - fabsf(snoise_2d(p));
+ signal *= signal;
+ float value = signal;
+ float weight = 1.0f;
+
+ for (int i = 1; i < float_to_int(octaves); i++) {
+ p *= lacunarity;
+ weight = saturate(signal * gain);
+ signal = offset - fabsf(snoise_2d(p));
+ signal *= signal;
+ signal *= weight;
+ value += signal * pwr;
+ pwr *= pwHL;
+ }
+
+ return value;
+}
+
+/* 3D Musgrave fBm
+ *
+ * H: fractal increment parameter
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ *
+ * from "Texturing and Modelling: A procedural approach"
+ */
+
+ccl_device_noinline_cpu float noise_musgrave_fBm_3d(float3 co,
+ float H,
+ float lacunarity,
+ float octaves)
+{
+ float3 p = co;
+ float value = 0.0f;
+ float pwr = 1.0f;
+ float pwHL = powf(lacunarity, -H);
+
+ for (int i = 0; i < float_to_int(octaves); i++) {
+ value += snoise_3d(p) * pwr;
+ pwr *= pwHL;
+ p *= lacunarity;
+ }
+
+ float rmd = octaves - floorf(octaves);
+ if (rmd != 0.0f) {
+ value += rmd * snoise_3d(p) * pwr;
+ }
return value;
}
-/* Musgrave Multifractal
+/* 3D Musgrave Multifractal
*
* H: highest fractal dimension
* lacunarity: gap between successive frequencies
* octaves: number of frequencies in the fBm
*/
-ccl_device_noinline float noise_musgrave_multi_fractal(float3 p,
- float H,
- float lacunarity,
- float octaves)
+ccl_device_noinline_cpu float noise_musgrave_multi_fractal_3d(float3 co,
+ float H,
+ float lacunarity,
+ float octaves)
{
- float rmd;
+ float3 p = co;
float value = 1.0f;
float pwr = 1.0f;
float pwHL = powf(lacunarity, -H);
- int i;
- for (i = 0; i < float_to_int(octaves); i++) {
- value *= (pwr * snoise(p) + 1.0f);
+ for (int i = 0; i < float_to_int(octaves); i++) {
+ value *= (pwr * snoise_3d(p) + 1.0f);
pwr *= pwHL;
p *= lacunarity;
}
- rmd = octaves - floorf(octaves);
- if (rmd != 0.0f)
- value *= (rmd * pwr * snoise(p) + 1.0f); /* correct? */
+ float rmd = octaves - floorf(octaves);
+ if (rmd != 0.0f) {
+ value *= (rmd * pwr * snoise_3d(p) + 1.0f); /* correct? */
+ }
return value;
}
-/* Musgrave Heterogeneous Terrain
+/* 3D Musgrave Heterogeneous Terrain
*
* H: fractal dimension of the roughest area
* lacunarity: gap between successive frequencies
@@ -85,35 +430,34 @@ ccl_device_noinline float noise_musgrave_multi_fractal(float3 p,
* offset: raises the terrain from `sea level'
*/
-ccl_device_noinline float noise_musgrave_hetero_terrain(
- float3 p, float H, float lacunarity, float octaves, float offset)
+ccl_device_noinline_cpu float noise_musgrave_hetero_terrain_3d(
+ float3 co, float H, float lacunarity, float octaves, float offset)
{
- float value, increment, rmd;
+ float3 p = co;
float pwHL = powf(lacunarity, -H);
float pwr = pwHL;
- int i;
/* first unscaled octave of function; later octaves are scaled */
- value = offset + snoise(p);
+ float value = offset + snoise_3d(p);
p *= lacunarity;
- for (i = 1; i < float_to_int(octaves); i++) {
- increment = (snoise(p) + offset) * pwr * value;
+ for (int i = 1; i < float_to_int(octaves); i++) {
+ float increment = (snoise_3d(p) + offset) * pwr * value;
value += increment;
pwr *= pwHL;
p *= lacunarity;
}
- rmd = octaves - floorf(octaves);
+ float rmd = octaves - floorf(octaves);
if (rmd != 0.0f) {
- increment = (snoise(p) + offset) * pwr * value;
+ float increment = (snoise_3d(p) + offset) * pwr * value;
value += rmd * increment;
}
return value;
}
-/* Hybrid Additive/Multiplicative Multifractal Terrain
+/* 3D Hybrid Additive/Multiplicative Multifractal Terrain
*
* H: fractal dimension of the roughest area
* lacunarity: gap between successive frequencies
@@ -121,37 +465,38 @@ ccl_device_noinline float noise_musgrave_hetero_terrain(
* offset: raises the terrain from `sea level'
*/
-ccl_device_noinline float noise_musgrave_hybrid_multi_fractal(
- float3 p, float H, float lacunarity, float octaves, float offset, float gain)
+ccl_device_noinline_cpu float noise_musgrave_hybrid_multi_fractal_3d(
+ float3 co, float H, float lacunarity, float octaves, float offset, float gain)
{
- float result, signal, weight, rmd;
+ float3 p = co;
float pwHL = powf(lacunarity, -H);
float pwr = pwHL;
- int i;
- result = snoise(p) + offset;
- weight = gain * result;
+ float value = snoise_3d(p) + offset;
+ float weight = gain * value;
p *= lacunarity;
- for (i = 1; (weight > 0.001f) && (i < float_to_int(octaves)); i++) {
- if (weight > 1.0f)
+ for (int i = 1; (weight > 0.001f) && (i < float_to_int(octaves)); i++) {
+ if (weight > 1.0f) {
weight = 1.0f;
+ }
- signal = (snoise(p) + offset) * pwr;
+ float signal = (snoise_3d(p) + offset) * pwr;
pwr *= pwHL;
- result += weight * signal;
+ value += weight * signal;
weight *= gain * signal;
p *= lacunarity;
}
- rmd = octaves - floorf(octaves);
- if (rmd != 0.0f)
- result += rmd * ((snoise(p) + offset) * pwr);
+ float rmd = octaves - floorf(octaves);
+ if (rmd != 0.0f) {
+ value += rmd * ((snoise_3d(p) + offset) * pwr);
+ }
- return result;
+ return value;
}
-/* Ridged Multifractal Terrain
+/* 3D Ridged Multifractal Terrain
*
* H: fractal dimension of the roughest area
* lacunarity: gap between successive frequencies
@@ -159,93 +504,346 @@ ccl_device_noinline float noise_musgrave_hybrid_multi_fractal(
* offset: raises the terrain from `sea level'
*/
-ccl_device_noinline float noise_musgrave_ridged_multi_fractal(
- float3 p, float H, float lacunarity, float octaves, float offset, float gain)
+ccl_device_noinline_cpu float noise_musgrave_ridged_multi_fractal_3d(
+ float3 co, float H, float lacunarity, float octaves, float offset, float gain)
{
- float result, signal, weight;
+ float3 p = co;
float pwHL = powf(lacunarity, -H);
float pwr = pwHL;
- int i;
- signal = offset - fabsf(snoise(p));
+ float signal = offset - fabsf(snoise_3d(p));
signal *= signal;
- result = signal;
- weight = 1.0f;
+ float value = signal;
+ float weight = 1.0f;
- for (i = 1; i < float_to_int(octaves); i++) {
+ for (int i = 1; i < float_to_int(octaves); i++) {
p *= lacunarity;
weight = saturate(signal * gain);
- signal = offset - fabsf(snoise(p));
+ signal = offset - fabsf(snoise_3d(p));
signal *= signal;
signal *= weight;
- result += signal * pwr;
+ value += signal * pwr;
pwr *= pwHL;
}
- return result;
+ return value;
}
-/* Shader */
+/* 4D Musgrave fBm
+ *
+ * H: fractal increment parameter
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ *
+ * from "Texturing and Modelling: A procedural approach"
+ */
-ccl_device float svm_musgrave(NodeMusgraveType type,
- float dimension,
- float lacunarity,
- float octaves,
- float offset,
- float intensity,
- float gain,
- float3 p)
+ccl_device_noinline_cpu float noise_musgrave_fBm_4d(float4 co,
+ float H,
+ float lacunarity,
+ float octaves)
{
- if (type == NODE_MUSGRAVE_MULTIFRACTAL)
- return intensity * noise_musgrave_multi_fractal(p, dimension, lacunarity, octaves);
- else if (type == NODE_MUSGRAVE_FBM)
- return intensity * noise_musgrave_fBm(p, dimension, lacunarity, octaves);
- else if (type == NODE_MUSGRAVE_HYBRID_MULTIFRACTAL)
- return intensity *
- noise_musgrave_hybrid_multi_fractal(p, dimension, lacunarity, octaves, offset, gain);
- else if (type == NODE_MUSGRAVE_RIDGED_MULTIFRACTAL)
- return intensity *
- noise_musgrave_ridged_multi_fractal(p, dimension, lacunarity, octaves, offset, gain);
- else if (type == NODE_MUSGRAVE_HETERO_TERRAIN)
- return intensity * noise_musgrave_hetero_terrain(p, dimension, lacunarity, octaves, offset);
+ float4 p = co;
+ float value = 0.0f;
+ float pwr = 1.0f;
+ float pwHL = powf(lacunarity, -H);
+
+ for (int i = 0; i < float_to_int(octaves); i++) {
+ value += snoise_4d(p) * pwr;
+ pwr *= pwHL;
+ p *= lacunarity;
+ }
+
+ float rmd = octaves - floorf(octaves);
+ if (rmd != 0.0f) {
+ value += rmd * snoise_4d(p) * pwr;
+ }
- return 0.0f;
+ return value;
}
-ccl_device void svm_node_tex_musgrave(
- KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
+/* 4D Musgrave Multifractal
+ *
+ * H: highest fractal dimension
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ */
+
+ccl_device_noinline_cpu float noise_musgrave_multi_fractal_4d(float4 co,
+ float H,
+ float lacunarity,
+ float octaves)
{
- uint4 node2 = read_node(kg, offset);
- uint4 node3 = read_node(kg, offset);
+ float4 p = co;
+ float value = 1.0f;
+ float pwr = 1.0f;
+ float pwHL = powf(lacunarity, -H);
- uint type, co_offset, color_offset, fac_offset;
- uint dimension_offset, lacunarity_offset, detail_offset, offset_offset;
- uint gain_offset, scale_offset;
+ for (int i = 0; i < float_to_int(octaves); i++) {
+ value *= (pwr * snoise_4d(p) + 1.0f);
+ pwr *= pwHL;
+ p *= lacunarity;
+ }
- decode_node_uchar4(node.y, &type, &co_offset, &color_offset, &fac_offset);
- decode_node_uchar4(
- node.z, &dimension_offset, &lacunarity_offset, &detail_offset, &offset_offset);
- decode_node_uchar4(node.w, &gain_offset, &scale_offset, NULL, NULL);
+ float rmd = octaves - floorf(octaves);
+ if (rmd != 0.0f) {
+ value *= (rmd * pwr * snoise_4d(p) + 1.0f); /* correct? */
+ }
- float3 co = stack_load_float3(stack, co_offset);
- float dimension = stack_load_float_default(stack, dimension_offset, node2.x);
- float lacunarity = stack_load_float_default(stack, lacunarity_offset, node2.y);
- float detail = stack_load_float_default(stack, detail_offset, node2.z);
- float foffset = stack_load_float_default(stack, offset_offset, node2.w);
- float gain = stack_load_float_default(stack, gain_offset, node3.x);
- float scale = stack_load_float_default(stack, scale_offset, node3.y);
+ return value;
+}
+
+/* 4D Musgrave Heterogeneous Terrain
+ *
+ * H: fractal dimension of the roughest area
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ * offset: raises the terrain from `sea level'
+ */
+
+ccl_device_noinline_cpu float noise_musgrave_hetero_terrain_4d(
+ float4 co, float H, float lacunarity, float octaves, float offset)
+{
+ float4 p = co;
+ float pwHL = powf(lacunarity, -H);
+ float pwr = pwHL;
+
+ /* first unscaled octave of function; later octaves are scaled */
+ float value = offset + snoise_4d(p);
+ p *= lacunarity;
+
+ for (int i = 1; i < float_to_int(octaves); i++) {
+ float increment = (snoise_4d(p) + offset) * pwr * value;
+ value += increment;
+ pwr *= pwHL;
+ p *= lacunarity;
+ }
+
+ float rmd = octaves - floorf(octaves);
+ if (rmd != 0.0f) {
+ float increment = (snoise_4d(p) + offset) * pwr * value;
+ value += rmd * increment;
+ }
+
+ return value;
+}
+
+/* 4D Hybrid Additive/Multiplicative Multifractal Terrain
+ *
+ * H: fractal dimension of the roughest area
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ * offset: raises the terrain from `sea level'
+ */
+
+ccl_device_noinline_cpu float noise_musgrave_hybrid_multi_fractal_4d(
+ float4 co, float H, float lacunarity, float octaves, float offset, float gain)
+{
+ float4 p = co;
+ float pwHL = powf(lacunarity, -H);
+ float pwr = pwHL;
+
+ float value = snoise_4d(p) + offset;
+ float weight = gain * value;
+ p *= lacunarity;
+
+ for (int i = 1; (weight > 0.001f) && (i < float_to_int(octaves)); i++) {
+ if (weight > 1.0f) {
+ weight = 1.0f;
+ }
+
+ float signal = (snoise_4d(p) + offset) * pwr;
+ pwr *= pwHL;
+ value += weight * signal;
+ weight *= gain * signal;
+ p *= lacunarity;
+ }
+
+ float rmd = octaves - floorf(octaves);
+ if (rmd != 0.0f) {
+ value += rmd * ((snoise_4d(p) + offset) * pwr);
+ }
+
+ return value;
+}
+
+/* 4D Ridged Multifractal Terrain
+ *
+ * H: fractal dimension of the roughest area
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ * offset: raises the terrain from `sea level'
+ */
+
+ccl_device_noinline_cpu float noise_musgrave_ridged_multi_fractal_4d(
+ float4 co, float H, float lacunarity, float octaves, float offset, float gain)
+{
+ float4 p = co;
+ float pwHL = powf(lacunarity, -H);
+ float pwr = pwHL;
+
+ float signal = offset - fabsf(snoise_4d(p));
+ signal *= signal;
+ float value = signal;
+ float weight = 1.0f;
+
+ for (int i = 1; i < float_to_int(octaves); i++) {
+ p *= lacunarity;
+ weight = saturate(signal * gain);
+ signal = offset - fabsf(snoise_4d(p));
+ signal *= signal;
+ signal *= weight;
+ value += signal * pwr;
+ pwr *= pwHL;
+ }
+
+ return value;
+}
+
+ccl_device void svm_node_tex_musgrave(KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint offsets1,
+ uint offsets2,
+ uint offsets3,
+ int *offset)
+{
+ uint type, dimensions, co_stack_offset, w_stack_offset;
+ uint scale_stack_offset, detail_stack_offset, dimension_stack_offset, lacunarity_stack_offset;
+ uint offset_stack_offset, gain_stack_offset, fac_stack_offset;
+
+ svm_unpack_node_uchar4(offsets1, &type, &dimensions, &co_stack_offset, &w_stack_offset);
+ svm_unpack_node_uchar4(offsets2,
+ &scale_stack_offset,
+ &detail_stack_offset,
+ &dimension_stack_offset,
+ &lacunarity_stack_offset);
+ svm_unpack_node_uchar3(offsets3, &offset_stack_offset, &gain_stack_offset, &fac_stack_offset);
+
+ uint4 defaults1 = read_node(kg, offset);
+ uint4 defaults2 = read_node(kg, offset);
+
+ float3 co = stack_load_float3(stack, co_stack_offset);
+ float w = stack_load_float_default(stack, w_stack_offset, defaults1.x);
+ float scale = stack_load_float_default(stack, scale_stack_offset, defaults1.y);
+ float detail = stack_load_float_default(stack, detail_stack_offset, defaults1.z);
+ float dimension = stack_load_float_default(stack, dimension_stack_offset, defaults1.w);
+ float lacunarity = stack_load_float_default(stack, lacunarity_stack_offset, defaults2.x);
+ float foffset = stack_load_float_default(stack, offset_stack_offset, defaults2.y);
+ float gain = stack_load_float_default(stack, gain_stack_offset, defaults2.z);
dimension = fmaxf(dimension, 1e-5f);
detail = clamp(detail, 0.0f, 16.0f);
lacunarity = fmaxf(lacunarity, 1e-5f);
- float f = svm_musgrave(
- (NodeMusgraveType)type, dimension, lacunarity, detail, foffset, 1.0f, gain, co * scale);
+ float fac;
+
+ switch (dimensions) {
+ case 1: {
+ float p = w * scale;
+ switch ((NodeMusgraveType)type) {
+ case NODE_MUSGRAVE_MULTIFRACTAL:
+ fac = noise_musgrave_multi_fractal_1d(p, dimension, lacunarity, detail);
+ break;
+ case NODE_MUSGRAVE_FBM:
+ fac = noise_musgrave_fBm_1d(p, dimension, lacunarity, detail);
+ break;
+ case NODE_MUSGRAVE_HYBRID_MULTIFRACTAL:
+ fac = noise_musgrave_hybrid_multi_fractal_1d(
+ p, dimension, lacunarity, detail, foffset, gain);
+ break;
+ case NODE_MUSGRAVE_RIDGED_MULTIFRACTAL:
+ fac = noise_musgrave_ridged_multi_fractal_1d(
+ p, dimension, lacunarity, detail, foffset, gain);
+ break;
+ case NODE_MUSGRAVE_HETERO_TERRAIN:
+ fac = noise_musgrave_hetero_terrain_1d(p, dimension, lacunarity, detail, foffset);
+ break;
+ default:
+ fac = 0.0f;
+ }
+ break;
+ }
+ case 2: {
+ float2 p = make_float2(co.x, co.y) * scale;
+ switch ((NodeMusgraveType)type) {
+ case NODE_MUSGRAVE_MULTIFRACTAL:
+ fac = noise_musgrave_multi_fractal_2d(p, dimension, lacunarity, detail);
+ break;
+ case NODE_MUSGRAVE_FBM:
+ fac = noise_musgrave_fBm_2d(p, dimension, lacunarity, detail);
+ break;
+ case NODE_MUSGRAVE_HYBRID_MULTIFRACTAL:
+ fac = noise_musgrave_hybrid_multi_fractal_2d(
+ p, dimension, lacunarity, detail, foffset, gain);
+ break;
+ case NODE_MUSGRAVE_RIDGED_MULTIFRACTAL:
+ fac = noise_musgrave_ridged_multi_fractal_2d(
+ p, dimension, lacunarity, detail, foffset, gain);
+ break;
+ case NODE_MUSGRAVE_HETERO_TERRAIN:
+ fac = noise_musgrave_hetero_terrain_2d(p, dimension, lacunarity, detail, foffset);
+ break;
+ default:
+ fac = 0.0f;
+ }
+ break;
+ }
+ case 3: {
+ float3 p = co * scale;
+ switch ((NodeMusgraveType)type) {
+ case NODE_MUSGRAVE_MULTIFRACTAL:
+ fac = noise_musgrave_multi_fractal_3d(p, dimension, lacunarity, detail);
+ break;
+ case NODE_MUSGRAVE_FBM:
+ fac = noise_musgrave_fBm_3d(p, dimension, lacunarity, detail);
+ break;
+ case NODE_MUSGRAVE_HYBRID_MULTIFRACTAL:
+ fac = noise_musgrave_hybrid_multi_fractal_3d(
+ p, dimension, lacunarity, detail, foffset, gain);
+ break;
+ case NODE_MUSGRAVE_RIDGED_MULTIFRACTAL:
+ fac = noise_musgrave_ridged_multi_fractal_3d(
+ p, dimension, lacunarity, detail, foffset, gain);
+ break;
+ case NODE_MUSGRAVE_HETERO_TERRAIN:
+ fac = noise_musgrave_hetero_terrain_3d(p, dimension, lacunarity, detail, foffset);
+ break;
+ default:
+ fac = 0.0f;
+ }
+ break;
+ }
+ case 4: {
+ float4 p = make_float4(co.x, co.y, co.z, w) * scale;
+ switch ((NodeMusgraveType)type) {
+ case NODE_MUSGRAVE_MULTIFRACTAL:
+ fac = noise_musgrave_multi_fractal_4d(p, dimension, lacunarity, detail);
+ break;
+ case NODE_MUSGRAVE_FBM:
+ fac = noise_musgrave_fBm_4d(p, dimension, lacunarity, detail);
+ break;
+ case NODE_MUSGRAVE_HYBRID_MULTIFRACTAL:
+ fac = noise_musgrave_hybrid_multi_fractal_4d(
+ p, dimension, lacunarity, detail, foffset, gain);
+ break;
+ case NODE_MUSGRAVE_RIDGED_MULTIFRACTAL:
+ fac = noise_musgrave_ridged_multi_fractal_4d(
+ p, dimension, lacunarity, detail, foffset, gain);
+ break;
+ case NODE_MUSGRAVE_HETERO_TERRAIN:
+ fac = noise_musgrave_hetero_terrain_4d(p, dimension, lacunarity, detail, foffset);
+ break;
+ default:
+ fac = 0.0f;
+ }
+ break;
+ }
+ default:
+ fac = 0.0f;
+ }
- if (stack_valid(fac_offset))
- stack_store_float(stack, fac_offset, f);
- if (stack_valid(color_offset))
- stack_store_float3(stack, color_offset, make_float3(f, f, f));
+ stack_store_float(stack, fac_stack_offset, fac);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_noise.h b/intern/cycles/kernel/svm/svm_noise.h
index 322579ccfe3..7db8ffcc6e1 100644
--- a/intern/cycles/kernel/svm/svm_noise.h
+++ b/intern/cycles/kernel/svm/svm_noise.h
@@ -32,304 +32,711 @@
CCL_NAMESPACE_BEGIN
-#ifdef __KERNEL_SSE2__
-ccl_device_inline ssei quick_floor_sse(const ssef &x)
-{
- ssei b = truncatei(x);
- ssei isneg = cast((x < ssef(0.0f)).m128);
- return b + isneg; // unsaturated add 0xffffffff is the same as subtract -1
-}
-#endif
+/* **** Perlin Noise **** */
-ccl_device uint hash(uint kx, uint ky, uint kz)
-{
- // define some handy macros
-#define rot(x, k) (((x) << (k)) | ((x) >> (32 - (k))))
-#define final(a, b, c) \
- { \
- c ^= b; \
- c -= rot(b, 14); \
- a ^= c; \
- a -= rot(c, 11); \
- b ^= a; \
- b -= rot(a, 25); \
- c ^= b; \
- c -= rot(b, 16); \
- a ^= c; \
- a -= rot(c, 4); \
- b ^= a; \
- b -= rot(a, 14); \
- c ^= b; \
- c -= rot(b, 24); \
- }
- // now hash the data!
- uint a, b, c, len = 3;
- a = b = c = 0xdeadbeef + (len << 2) + 13;
-
- c += kz;
- b += ky;
- a += kx;
- final(a, b, c);
-
- return c;
- // macros not needed anymore
-#undef rot
-#undef final
-}
-
-#ifdef __KERNEL_SSE2__
-ccl_device_inline ssei hash_sse(const ssei &kx, const ssei &ky, const ssei &kz)
-{
-# define rot(x, k) (((x) << (k)) | (srl(x, 32 - (k))))
-# define xor_rot(a, b, c) \
- do { \
- a = a ^ b; \
- a = a - rot(b, c); \
- } while (0)
-
- uint len = 3;
- ssei magic = ssei(0xdeadbeef + (len << 2) + 13);
- ssei a = magic + kx;
- ssei b = magic + ky;
- ssei c = magic + kz;
-
- xor_rot(c, b, 14);
- xor_rot(a, c, 11);
- xor_rot(b, a, 25);
- xor_rot(c, b, 16);
- xor_rot(a, c, 4);
- xor_rot(b, a, 14);
- xor_rot(c, b, 24);
-
- return c;
-# undef rot
-# undef xor_rot
-}
-#endif
-
-#if 0 // unused
-ccl_device int imod(int a, int b)
+ccl_device float fade(float t)
{
- a %= b;
- return a < 0 ? a + b : a;
+ return t * t * t * (t * (t * 6.0f - 15.0f) + 10.0f);
}
-ccl_device uint phash(int kx, int ky, int kz, int3 p)
+ccl_device_inline float negate_if(float val, int condition)
{
- return hash(imod(kx, p.x), imod(ky, p.y), imod(kz, p.z));
+ return (condition) ? -val : val;
}
-#endif
-#ifndef __KERNEL_SSE2__
-ccl_device float floorfrac(float x, int *i)
+ccl_device float grad1(int hash, float x)
{
- *i = quick_floor_to_int(x);
- return x - *i;
+ int h = hash & 15;
+ float g = 1 + (h & 7);
+ return negate_if(g, h & 8) * x;
}
-#else
-ccl_device_inline ssef floorfrac_sse(const ssef &x, ssei *i)
+
+ccl_device_noinline_cpu float perlin_1d(float x)
{
- *i = quick_floor_sse(x);
- return x - ssef(*i);
+ int X;
+ float fx = floorfrac(x, &X);
+ float u = fade(fx);
+
+ return mix(grad1(hash_uint(X), fx), grad1(hash_uint(X + 1), fx - 1.0f), u);
}
-#endif
-#ifndef __KERNEL_SSE2__
-ccl_device float fade(float t)
+/* 2D, 3D, and 4D noise can be accelerated using SSE, so we first check if
+ * SSE is supported, that is, if __KERNEL_SSE2__ is defined. If it is not
+ * supported, we do a standard implementation, but if it is supported, we
+ * do an implementation using SSE intrinsics.
+ */
+#if !defined(__KERNEL_SSE2__)
+
+/* ** Standard Implementation ** */
+
+/* Bilinear Interpolation:
+ *
+ * v2 v3
+ * @ + + + + @ y
+ * + + ^
+ * + + |
+ * + + |
+ * @ + + + + @ @------> x
+ * v0 v1
+ *
+ */
+ccl_device float bi_mix(float v0, float v1, float v2, float v3, float x, float y)
{
- return t * t * t * (t * (t * 6.0f - 15.0f) + 10.0f);
+ float x1 = 1.0f - x;
+ return (1.0f - y) * (v0 * x1 + v1 * x) + y * (v2 * x1 + v3 * x);
}
-#else
-ccl_device_inline ssef fade_sse(const ssef *t)
+
+/* Trilinear Interpolation:
+ *
+ * v6 v7
+ * @ + + + + + + @
+ * +\ +\
+ * + \ + \
+ * + \ + \
+ * + \ v4 + \ v5
+ * + @ + + + +++ + @ z
+ * + + + + y ^
+ * v2 @ + +++ + + + @ v3 + \ |
+ * \ + \ + \ |
+ * \ + \ + \|
+ * \ + \ + +---------> x
+ * \+ \+
+ * @ + + + + + + @
+ * v0 v1
+ */
+ccl_device float tri_mix(float v0,
+ float v1,
+ float v2,
+ float v3,
+ float v4,
+ float v5,
+ float v6,
+ float v7,
+ float x,
+ float y,
+ float z)
{
- ssef a = madd(*t, ssef(6.0f), ssef(-15.0f));
- ssef b = madd(*t, a, ssef(10.0f));
- return ((*t) * (*t)) * ((*t) * b);
+ float x1 = 1.0f - x;
+ float y1 = 1.0f - y;
+ float z1 = 1.0f - z;
+ return z1 * (y1 * (v0 * x1 + v1 * x) + y * (v2 * x1 + v3 * x)) +
+ z * (y1 * (v4 * x1 + v5 * x) + y * (v6 * x1 + v7 * x));
}
-#endif
-#ifndef __KERNEL_SSE2__
-ccl_device float nerp(float t, float a, float b)
+ccl_device float quad_mix(float v0,
+ float v1,
+ float v2,
+ float v3,
+ float v4,
+ float v5,
+ float v6,
+ float v7,
+ float v8,
+ float v9,
+ float v10,
+ float v11,
+ float v12,
+ float v13,
+ float v14,
+ float v15,
+ float x,
+ float y,
+ float z,
+ float w)
{
- return (1.0f - t) * a + t * b;
+ return mix(tri_mix(v0, v1, v2, v3, v4, v5, v6, v7, x, y, z),
+ tri_mix(v8, v9, v10, v11, v12, v13, v14, v15, x, y, z),
+ w);
}
-#else
-ccl_device_inline ssef nerp_sse(const ssef &t, const ssef &a, const ssef &b)
+
+ccl_device float grad2(int hash, float x, float y)
{
- ssef x1 = (ssef(1.0f) - t) * a;
- return madd(t, b, x1);
+ int h = hash & 7;
+ float u = h < 4 ? x : y;
+ float v = 2.0f * (h < 4 ? y : x);
+ return negate_if(u, h & 1) + negate_if(v, h & 2);
}
-#endif
-#ifndef __KERNEL_SSE2__
-ccl_device float grad(int hash, float x, float y, float z)
+ccl_device float grad3(int hash, float x, float y, float z)
{
- // use vectors pointing to the edges of the cube
int h = hash & 15;
float u = h < 8 ? x : y;
- float vt = ((h == 12) | (h == 14)) ? x : z;
+ float vt = ((h == 12) || (h == 14)) ? x : z;
float v = h < 4 ? y : vt;
- return ((h & 1) ? -u : u) + ((h & 2) ? -v : v);
+ return negate_if(u, h & 1) + negate_if(v, h & 2);
}
-#else
-ccl_device_inline ssef grad_sse(const ssei &hash, const ssef &x, const ssef &y, const ssef &z)
-{
- ssei c1 = ssei(1);
- ssei c2 = ssei(2);
- ssei h = hash & ssei(15); // h = hash & 15
+ccl_device float grad4(int hash, float x, float y, float z, float w)
+{
+ int h = hash & 31;
+ float u = h < 24 ? x : y;
+ float v = h < 16 ? y : z;
+ float s = h < 8 ? z : w;
+ return negate_if(u, h & 1) + negate_if(v, h & 2) + negate_if(s, h & 4);
+}
- sseb case_ux = h < ssei(8); // 0xffffffff if h < 8 else 0
+ccl_device_noinline_cpu float perlin_2d(float x, float y)
+{
+ int X;
+ int Y;
- ssef u = select(case_ux, x, y); // u = h<8 ? x : y
+ float fx = floorfrac(x, &X);
+ float fy = floorfrac(y, &Y);
- sseb case_vy = h < ssei(4); // 0xffffffff if h < 4 else 0
+ float u = fade(fx);
+ float v = fade(fy);
- sseb case_h12 = h == ssei(12); // 0xffffffff if h == 12 else 0
- sseb case_h14 = h == ssei(14); // 0xffffffff if h == 14 else 0
+ float r = bi_mix(grad2(hash_uint2(X, Y), fx, fy),
+ grad2(hash_uint2(X + 1, Y), fx - 1.0f, fy),
+ grad2(hash_uint2(X, Y + 1), fx, fy - 1.0f),
+ grad2(hash_uint2(X + 1, Y + 1), fx - 1.0f, fy - 1.0f),
+ u,
+ v);
- sseb case_vx = case_h12 | case_h14; // 0xffffffff if h == 12 or h == 14 else 0
+ return r;
+}
- ssef v = select(case_vy, y, select(case_vx, x, z)); // v = h<4 ? y : h == 12 || h == 14 ? x : z
+ccl_device_noinline_cpu float perlin_3d(float x, float y, float z)
+{
+ int X;
+ int Y;
+ int Z;
- ssei case_uneg = (h & c1) << 31; // 1<<31 if h&1 else 0
- ssef case_uneg_mask = cast(case_uneg); // -0.0 if h&1 else +0.0
- ssef ru = u ^ case_uneg_mask; // -u if h&1 else u (copy float sign)
+ float fx = floorfrac(x, &X);
+ float fy = floorfrac(y, &Y);
+ float fz = floorfrac(z, &Z);
- ssei case_vneg = (h & c2) << 30; // 2<<30 if h&2 else 0
- ssef case_vneg_mask = cast(case_vneg); // -0.0 if h&2 else +0.0
- ssef rv = v ^ case_vneg_mask; // -v if h&2 else v (copy float sign)
+ float u = fade(fx);
+ float v = fade(fy);
+ float w = fade(fz);
- ssef r = ru + rv; // ((h&1) ? -u : u) + ((h&2) ? -v : v)
+ float r = tri_mix(grad3(hash_uint3(X, Y, Z), fx, fy, fz),
+ grad3(hash_uint3(X + 1, Y, Z), fx - 1.0f, fy, fz),
+ grad3(hash_uint3(X, Y + 1, Z), fx, fy - 1.0f, fz),
+ grad3(hash_uint3(X + 1, Y + 1, Z), fx - 1.0f, fy - 1.0f, fz),
+ grad3(hash_uint3(X, Y, Z + 1), fx, fy, fz - 1.0f),
+ grad3(hash_uint3(X + 1, Y, Z + 1), fx - 1.0f, fy, fz - 1.0f),
+ grad3(hash_uint3(X, Y + 1, Z + 1), fx, fy - 1.0f, fz - 1.0f),
+ grad3(hash_uint3(X + 1, Y + 1, Z + 1), fx - 1.0f, fy - 1.0f, fz - 1.0f),
+ u,
+ v,
+ w);
return r;
}
-#endif
-#ifndef __KERNEL_SSE2__
-ccl_device float scale3(float result)
-{
- return 0.9820f * result;
-}
-#else
-ccl_device_inline ssef scale3_sse(const ssef &result)
-{
- return ssef(0.9820f) * result;
-}
-#endif
-
-#ifndef __KERNEL_SSE2__
-ccl_device_noinline float perlin(float x, float y, float z)
+ccl_device_noinline_cpu float perlin_4d(float x, float y, float z, float w)
{
int X;
- float fx = floorfrac(x, &X);
int Y;
- float fy = floorfrac(y, &Y);
int Z;
+ int W;
+
+ float fx = floorfrac(x, &X);
+ float fy = floorfrac(y, &Y);
float fz = floorfrac(z, &Z);
+ float fw = floorfrac(w, &W);
float u = fade(fx);
float v = fade(fy);
- float w = fade(fz);
+ float t = fade(fz);
+ float s = fade(fw);
+
+ float r = quad_mix(
+ grad4(hash_uint4(X, Y, Z, W), fx, fy, fz, fw),
+ grad4(hash_uint4(X + 1, Y, Z, W), fx - 1.0f, fy, fz, fw),
+ grad4(hash_uint4(X, Y + 1, Z, W), fx, fy - 1.0f, fz, fw),
+ grad4(hash_uint4(X + 1, Y + 1, Z, W), fx - 1.0f, fy - 1.0f, fz, fw),
+ grad4(hash_uint4(X, Y, Z + 1, W), fx, fy, fz - 1.0f, fw),
+ grad4(hash_uint4(X + 1, Y, Z + 1, W), fx - 1.0f, fy, fz - 1.0f, fw),
+ grad4(hash_uint4(X, Y + 1, Z + 1, W), fx, fy - 1.0f, fz - 1.0f, fw),
+ grad4(hash_uint4(X + 1, Y + 1, Z + 1, W), fx - 1.0f, fy - 1.0f, fz - 1.0f, fw),
+ grad4(hash_uint4(X, Y, Z, W + 1), fx, fy, fz, fw - 1.0f),
+ grad4(hash_uint4(X + 1, Y, Z, W + 1), fx - 1.0f, fy, fz, fw - 1.0f),
+ grad4(hash_uint4(X, Y + 1, Z, W + 1), fx, fy - 1.0f, fz, fw - 1.0f),
+ grad4(hash_uint4(X + 1, Y + 1, Z, W + 1), fx - 1.0f, fy - 1.0f, fz, fw - 1.0f),
+ grad4(hash_uint4(X, Y, Z + 1, W + 1), fx, fy, fz - 1.0f, fw - 1.0f),
+ grad4(hash_uint4(X + 1, Y, Z + 1, W + 1), fx - 1.0f, fy, fz - 1.0f, fw - 1.0f),
+ grad4(hash_uint4(X, Y + 1, Z + 1, W + 1), fx, fy - 1.0f, fz - 1.0f, fw - 1.0f),
+ grad4(hash_uint4(X + 1, Y + 1, Z + 1, W + 1), fx - 1.0f, fy - 1.0f, fz - 1.0f, fw - 1.0f),
+ u,
+ v,
+ t,
+ s);
+
+ return r;
+}
+
+#else /* SSE is supported. */
+
+/* ** SSE Implementation ** */
+
+/* SSE Bilinear Interpolation:
+ *
+ * The function takes two ssef inputs:
+ * - p : Contains the values at the points (v0, v1, v2, v3).
+ * - f : Contains the values (x, y, _, _). The third and fourth values are unused.
+ *
+ * The interpolation is done in two steps:
+ * 1. Interpolate (v0, v1) and (v2, v3) along the x axis to get g (g0, g1).
+ * (v2, v3) is generated by moving v2 and v3 to the first and second
+ * places of the ssef using the shuffle mask <2, 3, 2, 3>. The third and
+ * fourth values are unused.
+ * 2. Interpolate g0 and g1 along the y axis to get the final value.
+ * g1 is generated by populating an ssef with the second value of g.
+ * Only the first value is important in the final ssef.
+ *
+ * v1 v3 g1
+ * @ + + + + @ @ y
+ * + + (1) + (2) ^
+ * + + ---> + ---> final |
+ * + + + |
+ * @ + + + + @ @ @------> x
+ * v0 v2 g0
+ *
+ */
+ccl_device_inline ssef bi_mix(ssef p, ssef f)
+{
+ ssef g = mix(p, shuffle<2, 3, 2, 3>(p), shuffle<0>(f));
+ return mix(g, shuffle<1>(g), shuffle<1>(f));
+}
+
+ccl_device_inline ssef fade(const ssef &t)
+{
+ ssef a = madd(t, 6.0f, -15.0f);
+ ssef b = madd(t, a, 10.0f);
+ return (t * t) * (t * b);
+}
+
+/* Negate val if the nth bit of h is 1. */
+# define negate_if_nth_bit(val, h, n) ((val) ^ cast(((h) & (1 << (n))) << (31 - (n))))
+
+ccl_device_inline ssef grad(const ssei &hash, const ssef &x, const ssef &y)
+{
+ ssei h = hash & 7;
+ ssef u = select(h < 4, x, y);
+ ssef v = 2.0f * select(h < 4, y, x);
+ return negate_if_nth_bit(u, h, 0) + negate_if_nth_bit(v, h, 1);
+}
+
+/* We use SSE to compute and interpolate 4 gradients at once:
+ *
+ * Point Offset from v0
+ * v0 (0, 0)
+ * v1 (0, 1)
+ * v2 (1, 0) (0, 1, 0, 1) = shuffle<0, 2, 0, 2>(shuffle<1, 1, 1, 1>(V, V + 1))
+ * v3 (1, 1) ^
+ * | |__________| (0, 0, 1, 1) = shuffle<0, 0, 0, 0>(V, V + 1)
+ * | ^
+ * |__________________________|
+ *
+ */
+ccl_device_noinline float perlin_2d(float x, float y)
+{
+ ssei XY;
+ ssef fxy = floorfrac(ssef(x, y, 0.0f, 0.0f), &XY);
+ ssef uv = fade(fxy);
+
+ ssei XY1 = XY + 1;
+ ssei X = shuffle<0, 0, 0, 0>(XY, XY1);
+ ssei Y = shuffle<0, 2, 0, 2>(shuffle<1, 1, 1, 1>(XY, XY1));
+
+ ssei h = hash_ssei2(X, Y);
+
+ ssef fxy1 = fxy - 1.0f;
+ ssef fx = shuffle<0, 0, 0, 0>(fxy, fxy1);
+ ssef fy = shuffle<0, 2, 0, 2>(shuffle<1, 1, 1, 1>(fxy, fxy1));
+
+ ssef g = grad(h, fx, fy);
+
+ return extract<0>(bi_mix(g, uv));
+}
+
+/* SSE Trilinear Interpolation:
+ *
+ * The function takes three ssef inputs:
+ * - p : Contains the values at the points (v0, v1, v2, v3).
+ * - q : Contains the values at the points (v4, v5, v6, v7).
+ * - f : Contains the values (x, y, z, _). The fourth value is unused.
+ *
+ * The interpolation is done in three steps:
+ * 1. Interpolate p and q along the x axis to get s (s0, s1, s2, s3).
+ * 2. Interpolate (s0, s1) and (s2, s3) along the y axis to get g (g0, g1).
+ * (s2, s3) is generated by moving v2 and v3 to the first and second
+ * places of the ssef using the shuffle mask <2, 3, 2, 3>. The third and
+ * fourth values are unused.
+ * 3. Interpolate g0 and g1 along the z axis to get the final value.
+ * g1 is generated by populating an ssef with the second value of g.
+ * Only the first value is important in the final ssef.
+ *
+ * v3 v7
+ * @ + + + + + + @ s3 @
+ * +\ +\ +\
+ * + \ + \ + \
+ * + \ + \ + \ g1
+ * + \ v1 + \ v5 + \ s1 @
+ * + @ + + + +++ + @ + @ + z
+ * + + + + (1) + + (2) + (3) y ^
+ * v2 @ + +++ + + + @ v6 + ---> s2 @ + ---> + ---> final \ |
+ * \ + \ + \ + + \ |
+ * \ + \ + \ + + \|
+ * \ + \ + \ + @ +---------> x
+ * \+ \+ \+ g0
+ * @ + + + + + + @ @
+ * v0 v4 s0
+ */
+ccl_device_inline ssef tri_mix(ssef p, ssef q, ssef f)
+{
+ ssef s = mix(p, q, shuffle<0>(f));
+ ssef g = mix(s, shuffle<2, 3, 2, 3>(s), shuffle<1>(f));
+ return mix(g, shuffle<1>(g), shuffle<2>(f));
+}
+
+/* 3D and 4D noise can be accelerated using AVX, so we first check if AVX
+ * is supported, that is, if __KERNEL_AVX__ is defined. If it is not
+ * supported, we do an SSE implementation, but if it is supported,
+ * we do an implementation using AVX intrinsics.
+ */
+# if !defined(__KERNEL_AVX__)
+
+ccl_device_inline ssef grad(const ssei &hash, const ssef &x, const ssef &y, const ssef &z)
+{
+ ssei h = hash & 15;
+ ssef u = select(h < 8, x, y);
+ ssef vt = select((h == 12) | (h == 14), x, z);
+ ssef v = select(h < 4, y, vt);
+ return negate_if_nth_bit(u, h, 0) + negate_if_nth_bit(v, h, 1);
+}
+
+ccl_device_inline ssef
+grad(const ssei &hash, const ssef &x, const ssef &y, const ssef &z, const ssef &w)
+{
+ ssei h = hash & 31;
+ ssef u = select(h < 24, x, y);
+ ssef v = select(h < 16, y, z);
+ ssef s = select(h < 8, z, w);
+ return negate_if_nth_bit(u, h, 0) + negate_if_nth_bit(v, h, 1) + negate_if_nth_bit(s, h, 2);
+}
- float result;
-
- result = nerp(
- w,
- nerp(v,
- nerp(u, grad(hash(X, Y, Z), fx, fy, fz), grad(hash(X + 1, Y, Z), fx - 1.0f, fy, fz)),
- nerp(u,
- grad(hash(X, Y + 1, Z), fx, fy - 1.0f, fz),
- grad(hash(X + 1, Y + 1, Z), fx - 1.0f, fy - 1.0f, fz))),
- nerp(v,
- nerp(u,
- grad(hash(X, Y, Z + 1), fx, fy, fz - 1.0f),
- grad(hash(X + 1, Y, Z + 1), fx - 1.0f, fy, fz - 1.0f)),
- nerp(u,
- grad(hash(X, Y + 1, Z + 1), fx, fy - 1.0f, fz - 1.0f),
- grad(hash(X + 1, Y + 1, Z + 1), fx - 1.0f, fy - 1.0f, fz - 1.0f))));
- float r = scale3(result);
-
- /* can happen for big coordinates, things even out to 0.0 then anyway */
- return (isfinite(r)) ? r : 0.0f;
-}
-#else
-ccl_device_noinline float perlin(float x, float y, float z)
-{
- ssef xyz = ssef(x, y, z, 0.0f);
+/* SSE Quadrilinear Interpolation:
+ *
+ * Quadrilinear interpolation is as simple as a linear interpolation
+ * between two trilinear interpolations.
+ *
+ */
+ccl_device_inline ssef quad_mix(ssef p, ssef q, ssef r, ssef s, ssef f)
+{
+ return mix(tri_mix(p, q, f), tri_mix(r, s, f), shuffle<3>(f));
+}
+
+/* We use SSE to compute and interpolate 4 gradients at once. Since we have 8
+ * gradients in 3D, we need to compute two sets of gradients at the points:
+ *
+ * Point Offset from v0
+ * v0 (0, 0, 0)
+ * v1 (0, 0, 1)
+ * v2 (0, 1, 0) (0, 1, 0, 1) = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(V, V + 1))
+ * v3 (0, 1, 1) ^
+ * | |__________| (0, 0, 1, 1) = shuffle<1, 1, 1, 1>(V, V + 1)
+ * | ^
+ * |__________________________|
+ *
+ * Point Offset from v0
+ * v4 (1, 0, 0)
+ * v5 (1, 0, 1)
+ * v6 (1, 1, 0)
+ * v7 (1, 1, 1)
+ *
+ */
+ccl_device_noinline float perlin_3d(float x, float y, float z)
+{
ssei XYZ;
+ ssef fxyz = floorfrac(ssef(x, y, z, 0.0f), &XYZ);
+ ssef uvw = fade(fxyz);
+
+ ssei XYZ1 = XYZ + 1;
+ ssei Y = shuffle<1, 1, 1, 1>(XYZ, XYZ1);
+ ssei Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZ, XYZ1));
- ssef fxyz = floorfrac_sse(xyz, &XYZ);
+ ssei h1 = hash_ssei3(shuffle<0>(XYZ), Y, Z);
+ ssei h2 = hash_ssei3(shuffle<0>(XYZ1), Y, Z);
- ssef uvw = fade_sse(&fxyz);
- ssef u = shuffle<0>(uvw), v = shuffle<1>(uvw), w = shuffle<2>(uvw);
+ ssef fxyz1 = fxyz - 1.0f;
+ ssef fy = shuffle<1, 1, 1, 1>(fxyz, fxyz1);
+ ssef fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyz, fxyz1));
- ssei XYZ_ofc = XYZ + ssei(1);
- ssei vdy = shuffle<1, 1, 1, 1>(XYZ, XYZ_ofc); // +0, +0, +1, +1
- ssei vdz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZ, XYZ_ofc)); // +0, +1, +0, +1
+ ssef g1 = grad(h1, shuffle<0>(fxyz), fy, fz);
+ ssef g2 = grad(h2, shuffle<0>(fxyz1), fy, fz);
- ssei h1 = hash_sse(shuffle<0>(XYZ), vdy, vdz); // hash directions 000, 001, 010, 011
- ssei h2 = hash_sse(shuffle<0>(XYZ_ofc), vdy, vdz); // hash directions 100, 101, 110, 111
+ return extract<0>(tri_mix(g1, g2, uvw));
+}
- ssef fxyz_ofc = fxyz - ssef(1.0f);
- ssef vfy = shuffle<1, 1, 1, 1>(fxyz, fxyz_ofc);
- ssef vfz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyz, fxyz_ofc));
+/* We use SSE to compute and interpolate 4 gradients at once. Since we have 16
+ * gradients in 4D, we need to compute four sets of gradients at the points:
+ *
+ * Point Offset from v0
+ * v0 (0, 0, 0, 0)
+ * v1 (0, 0, 1, 0)
+ * v2 (0, 1, 0, 0) (0, 1, 0, 1) = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(V, V + 1))
+ * v3 (0, 1, 1, 0) ^
+ * | |________| (0, 0, 1, 1) = shuffle<1, 1, 1, 1>(V, V + 1)
+ * | ^
+ * |_______________________|
+ *
+ * Point Offset from v0
+ * v4 (1, 0, 0, 0)
+ * v5 (1, 0, 1, 0)
+ * v6 (1, 1, 0, 0)
+ * v7 (1, 1, 1, 0)
+ *
+ * Point Offset from v0
+ * v8 (0, 0, 0, 1)
+ * v9 (0, 0, 1, 1)
+ * v10 (0, 1, 0, 1)
+ * v11 (0, 1, 1, 1)
+ *
+ * Point Offset from v0
+ * v12 (1, 0, 0, 1)
+ * v13 (1, 0, 1, 1)
+ * v14 (1, 1, 0, 1)
+ * v15 (1, 1, 1, 1)
+ *
+ */
+ccl_device_noinline float perlin_4d(float x, float y, float z, float w)
+{
+ ssei XYZW;
+ ssef fxyzw = floorfrac(ssef(x, y, z, w), &XYZW);
+ ssef uvws = fade(fxyzw);
+
+ ssei XYZW1 = XYZW + 1;
+ ssei Y = shuffle<1, 1, 1, 1>(XYZW, XYZW1);
+ ssei Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZW, XYZW1));
+
+ ssei h1 = hash_ssei4(shuffle<0>(XYZW), Y, Z, shuffle<3>(XYZW));
+ ssei h2 = hash_ssei4(shuffle<0>(XYZW1), Y, Z, shuffle<3>(XYZW));
- ssef g1 = grad_sse(h1, shuffle<0>(fxyz), vfy, vfz);
- ssef g2 = grad_sse(h2, shuffle<0>(fxyz_ofc), vfy, vfz);
- ssef n1 = nerp_sse(u, g1, g2);
+ ssei h3 = hash_ssei4(shuffle<0>(XYZW), Y, Z, shuffle<3>(XYZW1));
+ ssei h4 = hash_ssei4(shuffle<0>(XYZW1), Y, Z, shuffle<3>(XYZW1));
- ssef n1_half = shuffle<2, 3, 2, 3>(n1); // extract 2 floats to a separate vector
- ssef n2 = nerp_sse(
- v, n1, n1_half); // process nerp([a, b, _, _], [c, d, _, _]) -> [a', b', _, _]
+ ssef fxyzw1 = fxyzw - 1.0f;
+ ssef fy = shuffle<1, 1, 1, 1>(fxyzw, fxyzw1);
+ ssef fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyzw, fxyzw1));
- ssef n2_second = shuffle<1>(n2); // extract b to a separate vector
- ssef result = nerp_sse(
- w, n2, n2_second); // process nerp([a', _, _, _], [b', _, _, _]) -> [a'', _, _, _]
+ ssef g1 = grad(h1, shuffle<0>(fxyzw), fy, fz, shuffle<3>(fxyzw));
+ ssef g2 = grad(h2, shuffle<0>(fxyzw1), fy, fz, shuffle<3>(fxyzw));
- ssef r = scale3_sse(result);
+ ssef g3 = grad(h3, shuffle<0>(fxyzw), fy, fz, shuffle<3>(fxyzw1));
+ ssef g4 = grad(h4, shuffle<0>(fxyzw1), fy, fz, shuffle<3>(fxyzw1));
- ssef infmask = cast(ssei(0x7f800000));
- ssef rinfmask = ((r & infmask) == infmask).m128; // 0xffffffff if r is inf/-inf/nan else 0
- ssef rfinite = andnot(rinfmask, r); // 0 if r is inf/-inf/nan else r
- return extract<0>(rfinite);
+ return extract<0>(quad_mix(g1, g2, g3, g4, uvws));
}
-#endif
-/* perlin noise in range 0..1 */
-ccl_device float noise(float3 p)
+# else /* AVX is supported. */
+
+/* AVX Implementation */
+
+ccl_device_inline avxf grad(const avxi &hash, const avxf &x, const avxf &y, const avxf &z)
{
- float r = perlin(p.x, p.y, p.z);
- return 0.5f * r + 0.5f;
+ avxi h = hash & 15;
+ avxf u = select(h < 8, x, y);
+ avxf vt = select((h == 12) | (h == 14), x, z);
+ avxf v = select(h < 4, y, vt);
+ return negate_if_nth_bit(u, h, 0) + negate_if_nth_bit(v, h, 1);
}
-/* perlin noise in range -1..1 */
-ccl_device float snoise(float3 p)
+ccl_device_inline avxf
+grad(const avxi &hash, const avxf &x, const avxf &y, const avxf &z, const avxf &w)
{
- return perlin(p.x, p.y, p.z);
+ avxi h = hash & 31;
+ avxf u = select(h < 24, x, y);
+ avxf v = select(h < 16, y, z);
+ avxf s = select(h < 8, z, w);
+ return negate_if_nth_bit(u, h, 0) + negate_if_nth_bit(v, h, 1) + negate_if_nth_bit(s, h, 2);
}
-/* cell noise */
-ccl_device float cellnoise(float3 p)
+/* SSE Quadrilinear Interpolation:
+ *
+ * The interpolation is done in two steps:
+ * 1. Interpolate p and q along the w axis to get s.
+ * 2. Trilinearly interpolate (s0, s1, s2, s3) and (s4, s5, s6, s7) to get the final
+ * value. (s0, s1, s2, s3) and (s4, s5, s6, s7) are generated by extracting the
+ * low and high ssef from s.
+ *
+ */
+ccl_device_inline ssef quad_mix(avxf p, avxf q, ssef f)
{
- int3 ip = quick_floor_to_int3(p);
- return bits_to_01(hash(ip.x, ip.y, ip.z));
+ ssef fv = shuffle<3>(f);
+ avxf s = mix(p, q, avxf(fv, fv));
+ return tri_mix(low(s), high(s), f);
}
-ccl_device float3 cellnoise3(float3 p)
+/* We use AVX to compute and interpolate 8 gradients at once.
+ *
+ * Point Offset from v0
+ * v0 (0, 0, 0)
+ * v1 (0, 0, 1) The full AVX type is computed by inserting the following
+ * v2 (0, 1, 0) SSE types into both the low and high parts of the AVX.
+ * v3 (0, 1, 1)
+ * v4 (1, 0, 0)
+ * v5 (1, 0, 1) (0, 1, 0, 1) = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(V, V + 1))
+ * v6 (1, 1, 0) ^
+ * v7 (1, 1, 1) |
+ * | |__________| (0, 0, 1, 1) = shuffle<1, 1, 1, 1>(V, V + 1)
+ * | ^
+ * |__________________________|
+ *
+ */
+ccl_device_noinline float perlin_3d(float x, float y, float z)
{
- int3 ip = quick_floor_to_int3(p);
-#ifndef __KERNEL_SSE__
- float r = bits_to_01(hash(ip.x, ip.y, ip.z));
- float g = bits_to_01(hash(ip.y, ip.x, ip.z));
- float b = bits_to_01(hash(ip.y, ip.z, ip.x));
- return make_float3(r, g, b);
-#else
- ssei ip_yxz = shuffle<1, 0, 2, 3>(ssei(ip.m128));
- ssei ip_xyy = shuffle<0, 1, 1, 3>(ssei(ip.m128));
- ssei ip_zzx = shuffle<2, 2, 0, 3>(ssei(ip.m128));
- ssei bits = hash_sse(ip_xyy, ip_yxz, ip_zzx);
- return float3(uint32_to_float(bits) * ssef(1.0f / (float)0xFFFFFFFF));
+ ssei XYZ;
+ ssef fxyz = floorfrac(ssef(x, y, z, 0.0f), &XYZ);
+ ssef uvw = fade(fxyz);
+
+ ssei XYZ1 = XYZ + 1;
+ ssei X = shuffle<0>(XYZ);
+ ssei X1 = shuffle<0>(XYZ1);
+ ssei Y = shuffle<1, 1, 1, 1>(XYZ, XYZ1);
+ ssei Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZ, XYZ1));
+
+ avxi h = hash_avxi3(avxi(X, X1), avxi(Y, Y), avxi(Z, Z));
+
+ ssef fxyz1 = fxyz - 1.0f;
+ ssef fx = shuffle<0>(fxyz);
+ ssef fx1 = shuffle<0>(fxyz1);
+ ssef fy = shuffle<1, 1, 1, 1>(fxyz, fxyz1);
+ ssef fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyz, fxyz1));
+
+ avxf g = grad(h, avxf(fx, fx1), avxf(fy, fy), avxf(fz, fz));
+
+ return extract<0>(tri_mix(low(g), high(g), uvw));
+}
+
+/* We use AVX to compute and interpolate 8 gradients at once. Since we have 16
+ * gradients in 4D, we need to compute two sets of gradients at the points:
+ *
+ * Point Offset from v0
+ * v0 (0, 0, 0, 0)
+ * v1 (0, 0, 1, 0) The full avx type is computed by inserting the following
+ * v2 (0, 1, 0, 0) sse types into both the low and high parts of the avx.
+ * v3 (0, 1, 1, 0)
+ * v4 (1, 0, 0, 0)
+ * v5 (1, 0, 1, 0) (0, 1, 0, 1) = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(V, V + 1))
+ * v6 (1, 1, 0, 0) ^
+ * v7 (1, 1, 1, 0) |
+ * | |________| (0, 0, 1, 1) = shuffle<1, 1, 1, 1>(V, V + 1)
+ * | ^
+ * |_______________________|
+ *
+ * Point Offset from v0
+ * v8 (0, 0, 0, 1)
+ * v9 (0, 0, 1, 1)
+ * v10 (0, 1, 0, 1)
+ * v11 (0, 1, 1, 1)
+ * v12 (1, 0, 0, 1)
+ * v13 (1, 0, 1, 1)
+ * v14 (1, 1, 0, 1)
+ * v15 (1, 1, 1, 1)
+ *
+ */
+ccl_device_noinline float perlin_4d(float x, float y, float z, float w)
+{
+ ssei XYZW;
+ ssef fxyzw = floorfrac(ssef(x, y, z, w), &XYZW);
+ ssef uvws = fade(fxyzw);
+
+ ssei XYZW1 = XYZW + 1;
+ ssei X = shuffle<0>(XYZW);
+ ssei X1 = shuffle<0>(XYZW1);
+ ssei Y = shuffle<1, 1, 1, 1>(XYZW, XYZW1);
+ ssei Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZW, XYZW1));
+ ssei W = shuffle<3>(XYZW);
+ ssei W1 = shuffle<3>(XYZW1);
+
+ avxi h1 = hash_avxi4(avxi(X, X1), avxi(Y, Y), avxi(Z, Z), avxi(W, W));
+ avxi h2 = hash_avxi4(avxi(X, X1), avxi(Y, Y), avxi(Z, Z), avxi(W1, W1));
+
+ ssef fxyzw1 = fxyzw - 1.0f;
+ ssef fx = shuffle<0>(fxyzw);
+ ssef fx1 = shuffle<0>(fxyzw1);
+ ssef fy = shuffle<1, 1, 1, 1>(fxyzw, fxyzw1);
+ ssef fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyzw, fxyzw1));
+ ssef fw = shuffle<3>(fxyzw);
+ ssef fw1 = shuffle<3>(fxyzw1);
+
+ avxf g1 = grad(h1, avxf(fx, fx1), avxf(fy, fy), avxf(fz, fz), avxf(fw, fw));
+ avxf g2 = grad(h2, avxf(fx, fx1), avxf(fy, fy), avxf(fz, fz), avxf(fw1, fw1));
+
+ return extract<0>(quad_mix(g1, g2, uvws));
+}
+# endif
+
+# undef negate_if_nth_bit
+
#endif
+
+/* Remap the output of noise to a predictable range [-1, 1].
+ * The scale values were computed experimentally by the OSL developers.
+ */
+
+ccl_device_inline float noise_scale1(float result)
+{
+ return 0.2500f * result;
+}
+
+ccl_device_inline float noise_scale2(float result)
+{
+ return 0.6616f * result;
+}
+
+ccl_device_inline float noise_scale3(float result)
+{
+ return 0.9820f * result;
+}
+
+ccl_device_inline float noise_scale4(float result)
+{
+ return 0.8344f * result;
+}
+
+/* Safe Signed And Unsigned Noise */
+
+ccl_device_inline float snoise_1d(float p)
+{
+ return noise_scale1(ensure_finite(perlin_1d(p)));
+}
+
+ccl_device_inline float noise_1d(float p)
+{
+ return 0.5f * snoise_1d(p) + 0.5f;
+}
+
+ccl_device_inline float snoise_2d(float2 p)
+{
+ return noise_scale2(ensure_finite(perlin_2d(p.x, p.y)));
+}
+
+ccl_device_inline float noise_2d(float2 p)
+{
+ return 0.5f * snoise_2d(p) + 0.5f;
+}
+
+ccl_device_inline float snoise_3d(float3 p)
+{
+ return noise_scale3(ensure_finite(perlin_3d(p.x, p.y, p.z)));
+}
+
+ccl_device_inline float noise_3d(float3 p)
+{
+ return 0.5f * snoise_3d(p) + 0.5f;
+}
+
+ccl_device_inline float snoise_4d(float4 p)
+{
+ return noise_scale4(ensure_finite(perlin_4d(p.x, p.y, p.z, p.w)));
+}
+
+ccl_device_inline float noise_4d(float4 p)
+{
+ return 0.5f * snoise_4d(p) + 0.5f;
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_noisetex.h b/intern/cycles/kernel/svm/svm_noisetex.h
index 3324e86fcd8..920dd7d9d02 100644
--- a/intern/cycles/kernel/svm/svm_noisetex.h
+++ b/intern/cycles/kernel/svm/svm_noisetex.h
@@ -16,44 +16,201 @@
CCL_NAMESPACE_BEGIN
-/* Noise */
+/* The following offset functions generate random offsets to be added to texture
+ * coordinates to act as a seed since the noise functions don't have seed values.
+ * A seed value is needed for generating distortion textures and color outputs.
+ * The offset's components are in the range [100, 200], not too high to cause
+ * bad precision and not to small to be noticeable. We use float seed because
+ * OSL only support float hashes.
+ */
+
+ccl_device_inline float random_float_offset(float seed)
+{
+ return 100.0f + hash_float_to_float(seed) * 100.0f;
+}
+
+ccl_device_inline float2 random_float2_offset(float seed)
+{
+ return make_float2(100.0f + hash_float2_to_float(make_float2(seed, 0.0f)) * 100.0f,
+ 100.0f + hash_float2_to_float(make_float2(seed, 1.0f)) * 100.0f);
+}
-ccl_device void svm_node_tex_noise(
- KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
+ccl_device_inline float3 random_float3_offset(float seed)
{
- uint co_offset, scale_offset, detail_offset, distortion_offset, fac_offset, color_offset;
+ return make_float3(100.0f + hash_float2_to_float(make_float2(seed, 0.0f)) * 100.0f,
+ 100.0f + hash_float2_to_float(make_float2(seed, 1.0f)) * 100.0f,
+ 100.0f + hash_float2_to_float(make_float2(seed, 2.0f)) * 100.0f);
+}
- decode_node_uchar4(node.y, &co_offset, &scale_offset, &detail_offset, &distortion_offset);
- decode_node_uchar4(node.z, &color_offset, &fac_offset, NULL, NULL);
+ccl_device_inline float4 random_float4_offset(float seed)
+{
+ return make_float4(100.0f + hash_float2_to_float(make_float2(seed, 0.0f)) * 100.0f,
+ 100.0f + hash_float2_to_float(make_float2(seed, 1.0f)) * 100.0f,
+ 100.0f + hash_float2_to_float(make_float2(seed, 2.0f)) * 100.0f,
+ 100.0f + hash_float2_to_float(make_float2(seed, 3.0f)) * 100.0f);
+}
- uint4 node2 = read_node(kg, offset);
+ccl_device void noise_texture_1d(float co,
+ float detail,
+ float roughness,
+ float distortion,
+ bool color_is_needed,
+ float *value,
+ float3 *color)
+{
+ float p = co;
+ if (distortion != 0.0f) {
+ p += snoise_1d(p + random_float_offset(0.0f)) * distortion;
+ }
+
+ *value = fractal_noise_1d(p, detail, roughness);
+ if (color_is_needed) {
+ *color = make_float3(*value,
+ fractal_noise_1d(p + random_float_offset(1.0f), detail, roughness),
+ fractal_noise_1d(p + random_float_offset(2.0f), detail, roughness));
+ }
+}
+
+ccl_device void noise_texture_2d(float2 co,
+ float detail,
+ float roughness,
+ float distortion,
+ bool color_is_needed,
+ float *value,
+ float3 *color)
+{
+ float2 p = co;
+ if (distortion != 0.0f) {
+ p += make_float2(snoise_2d(p + random_float2_offset(0.0f)) * distortion,
+ snoise_2d(p + random_float2_offset(1.0f)) * distortion);
+ }
- float scale = stack_load_float_default(stack, scale_offset, node2.x);
- float detail = stack_load_float_default(stack, detail_offset, node2.y);
- float distortion = stack_load_float_default(stack, distortion_offset, node2.z);
- float3 p = stack_load_float3(stack, co_offset) * scale;
- int hard = 0;
+ *value = fractal_noise_2d(p, detail, roughness);
+ if (color_is_needed) {
+ *color = make_float3(*value,
+ fractal_noise_2d(p + random_float2_offset(2.0f), detail, roughness),
+ fractal_noise_2d(p + random_float2_offset(3.0f), detail, roughness));
+ }
+}
+ccl_device void noise_texture_3d(float3 co,
+ float detail,
+ float roughness,
+ float distortion,
+ bool color_is_needed,
+ float *value,
+ float3 *color)
+{
+ float3 p = co;
if (distortion != 0.0f) {
- float3 r, offset = make_float3(13.5f, 13.5f, 13.5f);
+ p += make_float3(snoise_3d(p + random_float3_offset(0.0f)) * distortion,
+ snoise_3d(p + random_float3_offset(1.0f)) * distortion,
+ snoise_3d(p + random_float3_offset(2.0f)) * distortion);
+ }
- r.x = noise(p + offset) * distortion;
- r.y = noise(p) * distortion;
- r.z = noise(p - offset) * distortion;
+ *value = fractal_noise_3d(p, detail, roughness);
+ if (color_is_needed) {
+ *color = make_float3(*value,
+ fractal_noise_3d(p + random_float3_offset(3.0f), detail, roughness),
+ fractal_noise_3d(p + random_float3_offset(4.0f), detail, roughness));
+ }
+}
- p += r;
+ccl_device void noise_texture_4d(float4 co,
+ float detail,
+ float roughness,
+ float distortion,
+ bool color_is_needed,
+ float *value,
+ float3 *color)
+{
+ float4 p = co;
+ if (distortion != 0.0f) {
+ p += make_float4(snoise_4d(p + random_float4_offset(0.0f)) * distortion,
+ snoise_4d(p + random_float4_offset(1.0f)) * distortion,
+ snoise_4d(p + random_float4_offset(2.0f)) * distortion,
+ snoise_4d(p + random_float4_offset(3.0f)) * distortion);
}
- float f = noise_turbulence(p, detail, hard);
+ *value = fractal_noise_4d(p, detail, roughness);
+ if (color_is_needed) {
+ *color = make_float3(*value,
+ fractal_noise_4d(p + random_float4_offset(4.0f), detail, roughness),
+ fractal_noise_4d(p + random_float4_offset(5.0f), detail, roughness));
+ }
+}
+
+ccl_device void svm_node_tex_noise(KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint dimensions,
+ uint offsets1,
+ uint offsets2,
+ int *offset)
+{
+ uint vector_stack_offset, w_stack_offset, scale_stack_offset;
+ uint detail_stack_offset, roughness_stack_offset, distortion_stack_offset;
+ uint value_stack_offset, color_stack_offset;
+
+ svm_unpack_node_uchar4(
+ offsets1, &vector_stack_offset, &w_stack_offset, &scale_stack_offset, &detail_stack_offset);
+ svm_unpack_node_uchar4(offsets2,
+ &roughness_stack_offset,
+ &distortion_stack_offset,
+ &value_stack_offset,
+ &color_stack_offset);
+
+ uint4 defaults1 = read_node(kg, offset);
+ uint4 defaults2 = read_node(kg, offset);
+
+ float3 vector = stack_load_float3(stack, vector_stack_offset);
+ float w = stack_load_float_default(stack, w_stack_offset, defaults1.x);
+ float scale = stack_load_float_default(stack, scale_stack_offset, defaults1.y);
+ float detail = stack_load_float_default(stack, detail_stack_offset, defaults1.z);
+ float roughness = stack_load_float_default(stack, roughness_stack_offset, defaults1.w);
+ float distortion = stack_load_float_default(stack, distortion_stack_offset, defaults2.x);
+
+ vector *= scale;
+ w *= scale;
+
+ float value;
+ float3 color;
+ switch (dimensions) {
+ case 1:
+ noise_texture_1d(
+ w, detail, roughness, distortion, stack_valid(color_stack_offset), &value, &color);
+ break;
+ case 2:
+ noise_texture_2d(make_float2(vector.x, vector.y),
+ detail,
+ roughness,
+ distortion,
+ stack_valid(color_stack_offset),
+ &value,
+ &color);
+ break;
+ case 3:
+ noise_texture_3d(
+ vector, detail, roughness, distortion, stack_valid(color_stack_offset), &value, &color);
+ break;
+ case 4:
+ noise_texture_4d(make_float4(vector.x, vector.y, vector.z, w),
+ detail,
+ roughness,
+ distortion,
+ stack_valid(color_stack_offset),
+ &value,
+ &color);
+ break;
+ default:
+ kernel_assert(0);
+ }
- if (stack_valid(fac_offset)) {
- stack_store_float(stack, fac_offset, f);
+ if (stack_valid(value_stack_offset)) {
+ stack_store_float(stack, value_stack_offset, value);
}
- if (stack_valid(color_offset)) {
- float3 color = make_float3(f,
- noise_turbulence(make_float3(p.y, p.x, p.z), detail, hard),
- noise_turbulence(make_float3(p.y, p.z, p.x), detail, hard));
- stack_store_float3(stack, color_offset, color);
+ if (stack_valid(color_stack_offset)) {
+ stack_store_float3(stack, color_stack_offset, color);
}
}
diff --git a/intern/cycles/kernel/svm/svm_ramp.h b/intern/cycles/kernel/svm/svm_ramp.h
index 6084ee35a1f..85ccf39144b 100644
--- a/intern/cycles/kernel/svm/svm_ramp.h
+++ b/intern/cycles/kernel/svm/svm_ramp.h
@@ -59,7 +59,7 @@ ccl_device void svm_node_rgb_ramp(
uint fac_offset, color_offset, alpha_offset;
uint interpolate = node.z;
- decode_node_uchar4(node.y, &fac_offset, &color_offset, &alpha_offset, NULL);
+ svm_unpack_node_uchar3(node.y, &fac_offset, &color_offset, &alpha_offset);
uint table_size = read_node(kg, offset).x;
@@ -78,7 +78,7 @@ ccl_device void svm_node_curves(
KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
{
uint fac_offset, color_offset, out_offset;
- decode_node_uchar4(node.y, &fac_offset, &color_offset, &out_offset, NULL);
+ svm_unpack_node_uchar3(node.y, &fac_offset, &color_offset, &out_offset);
uint table_size = read_node(kg, offset).x;
diff --git a/intern/cycles/kernel/svm/svm_sky.h b/intern/cycles/kernel/svm/svm_sky.h
index 50fe0c8232f..f824184c1d4 100644
--- a/intern/cycles/kernel/svm/svm_sky.h
+++ b/intern/cycles/kernel/svm/svm_sky.h
@@ -37,16 +37,16 @@ ccl_device float sky_perez_function(float *lam, float theta, float gamma)
(1.0f + lam[2] * expf(lam[3] * gamma) + lam[4] * cgamma * cgamma);
}
-ccl_device float3 sky_radiance_old(KernelGlobals *kg,
- float3 dir,
- float sunphi,
- float suntheta,
- float radiance_x,
- float radiance_y,
- float radiance_z,
- float *config_x,
- float *config_y,
- float *config_z)
+ccl_device float3 sky_radiance_preetham(KernelGlobals *kg,
+ float3 dir,
+ float sunphi,
+ float suntheta,
+ float radiance_x,
+ float radiance_y,
+ float radiance_z,
+ float *config_x,
+ float *config_y,
+ float *config_z)
{
/* convert vector to spherical coordinates */
float2 spherical = direction_to_spherical(dir);
@@ -90,16 +90,16 @@ ccl_device float sky_radiance_internal(float *configuration, float theta, float
configuration[6] * mieM + configuration[7] * zenith);
}
-ccl_device float3 sky_radiance_new(KernelGlobals *kg,
- float3 dir,
- float sunphi,
- float suntheta,
- float radiance_x,
- float radiance_y,
- float radiance_z,
- float *config_x,
- float *config_y,
- float *config_z)
+ccl_device float3 sky_radiance_hosek(KernelGlobals *kg,
+ float3 dir,
+ float sunphi,
+ float suntheta,
+ float radiance_x,
+ float radiance_y,
+ float radiance_z,
+ float *config_x,
+ float *config_y,
+ float *config_z)
{
/* convert vector to spherical coordinates */
float2 spherical = direction_to_spherical(dir);
@@ -121,93 +121,209 @@ ccl_device float3 sky_radiance_new(KernelGlobals *kg,
return xyz_to_rgb(kg, make_float3(x, y, z)) * (M_2PI_F / 683);
}
+/* Nishita improved sky model */
+ccl_device float3 geographical_to_direction(float lat, float lon)
+{
+ return make_float3(cos(lat) * cos(lon), cos(lat) * sin(lon), sin(lat));
+}
+
+ccl_device float3 sky_radiance_nishita(KernelGlobals *kg,
+ float3 dir,
+ float *nishita_data,
+ uint texture_id)
+{
+ /* definitions */
+ float sun_elevation = nishita_data[6];
+ float sun_rotation = nishita_data[7];
+ float angular_diameter = nishita_data[8];
+ float sun_intensity = nishita_data[9];
+ bool sun_disc = (angular_diameter > 0.0f);
+ float3 xyz;
+ /* convert dir to spherical coordinates */
+ float2 direction = direction_to_spherical(dir);
+
+ /* render above the horizon */
+ if (dir.z >= 0.0f) {
+ /* definitions */
+ float3 sun_dir = geographical_to_direction(sun_elevation, sun_rotation + M_PI_2_F);
+ float sun_dir_angle = precise_angle(dir, sun_dir);
+ float half_angular = angular_diameter / 2.0f;
+ float dir_elevation = M_PI_2_F - direction.x;
+
+ /* if ray inside sun disc render it, otherwise render sky */
+ if (sun_disc && sun_dir_angle < half_angular) {
+ /* get 2 pixels data */
+ float3 pixel_bottom = make_float3(nishita_data[0], nishita_data[1], nishita_data[2]);
+ float3 pixel_top = make_float3(nishita_data[3], nishita_data[4], nishita_data[5]);
+ float y;
+
+ /* sun interpolation */
+ if (sun_elevation - half_angular > 0.0f) {
+ if (sun_elevation + half_angular > 0.0f) {
+ y = ((dir_elevation - sun_elevation) / angular_diameter) + 0.5f;
+ xyz = interp(pixel_bottom, pixel_top, y) * sun_intensity;
+ }
+ }
+ else {
+ if (sun_elevation + half_angular > 0.0f) {
+ y = dir_elevation / (sun_elevation + half_angular);
+ xyz = interp(pixel_bottom, pixel_top, y) * sun_intensity;
+ }
+ }
+ /* limb darkening, coefficient is 0.6f */
+ float limb_darkening = (1.0f -
+ 0.6f * (1.0f - sqrtf(1.0f - sqr(sun_dir_angle / half_angular))));
+ xyz *= limb_darkening;
+ }
+ /* sky */
+ else {
+ /* sky interpolation */
+ float x = (direction.y + M_PI_F + sun_rotation) / M_2PI_F;
+ /* more pixels toward horizon compensation */
+ float y = safe_sqrtf(dir_elevation / M_PI_2_F);
+ if (x > 1.0f) {
+ x -= 1.0f;
+ }
+ xyz = float4_to_float3(kernel_tex_image_interp(kg, texture_id, x, y));
+ }
+ }
+ /* ground */
+ else {
+ if (dir.z < -0.4f) {
+ xyz = make_float3(0.0f, 0.0f, 0.0f);
+ }
+ else {
+ /* black ground fade */
+ float fade = 1.0f + dir.z * 2.5f;
+ fade = sqr(fade) * fade;
+ /* interpolation */
+ float x = (direction.y + M_PI_F + sun_rotation) / M_2PI_F;
+ if (x > 1.0f) {
+ x -= 1.0f;
+ }
+ xyz = float4_to_float3(kernel_tex_image_interp(kg, texture_id, x, -0.5)) * fade;
+ }
+ }
+
+ /* convert to RGB */
+ return xyz_to_rgb(kg, xyz);
+}
+
ccl_device void svm_node_tex_sky(
KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
{
- /* Define variables */
- float sunphi, suntheta, radiance_x, radiance_y, radiance_z;
- float config_x[9], config_y[9], config_z[9];
-
/* Load data */
uint dir_offset = node.y;
uint out_offset = node.z;
int sky_model = node.w;
- float4 data = read_node_float(kg, offset);
- sunphi = data.x;
- suntheta = data.y;
- radiance_x = data.z;
- radiance_y = data.w;
-
- data = read_node_float(kg, offset);
- radiance_z = data.x;
- config_x[0] = data.y;
- config_x[1] = data.z;
- config_x[2] = data.w;
-
- data = read_node_float(kg, offset);
- config_x[3] = data.x;
- config_x[4] = data.y;
- config_x[5] = data.z;
- config_x[6] = data.w;
-
- data = read_node_float(kg, offset);
- config_x[7] = data.x;
- config_x[8] = data.y;
- config_y[0] = data.z;
- config_y[1] = data.w;
-
- data = read_node_float(kg, offset);
- config_y[2] = data.x;
- config_y[3] = data.y;
- config_y[4] = data.z;
- config_y[5] = data.w;
-
- data = read_node_float(kg, offset);
- config_y[6] = data.x;
- config_y[7] = data.y;
- config_y[8] = data.z;
- config_z[0] = data.w;
-
- data = read_node_float(kg, offset);
- config_z[1] = data.x;
- config_z[2] = data.y;
- config_z[3] = data.z;
- config_z[4] = data.w;
-
- data = read_node_float(kg, offset);
- config_z[5] = data.x;
- config_z[6] = data.y;
- config_z[7] = data.z;
- config_z[8] = data.w;
-
float3 dir = stack_load_float3(stack, dir_offset);
float3 f;
- /* Compute Sky */
- if (sky_model == 0) {
- f = sky_radiance_old(kg,
- dir,
- sunphi,
- suntheta,
- radiance_x,
- radiance_y,
- radiance_z,
- config_x,
- config_y,
- config_z);
+ /* Preetham and Hosek share the same data */
+ if (sky_model == 0 || sky_model == 1) {
+ /* Define variables */
+ float sunphi, suntheta, radiance_x, radiance_y, radiance_z;
+ float config_x[9], config_y[9], config_z[9];
+
+ float4 data = read_node_float(kg, offset);
+ sunphi = data.x;
+ suntheta = data.y;
+ radiance_x = data.z;
+ radiance_y = data.w;
+
+ data = read_node_float(kg, offset);
+ radiance_z = data.x;
+ config_x[0] = data.y;
+ config_x[1] = data.z;
+ config_x[2] = data.w;
+
+ data = read_node_float(kg, offset);
+ config_x[3] = data.x;
+ config_x[4] = data.y;
+ config_x[5] = data.z;
+ config_x[6] = data.w;
+
+ data = read_node_float(kg, offset);
+ config_x[7] = data.x;
+ config_x[8] = data.y;
+ config_y[0] = data.z;
+ config_y[1] = data.w;
+
+ data = read_node_float(kg, offset);
+ config_y[2] = data.x;
+ config_y[3] = data.y;
+ config_y[4] = data.z;
+ config_y[5] = data.w;
+
+ data = read_node_float(kg, offset);
+ config_y[6] = data.x;
+ config_y[7] = data.y;
+ config_y[8] = data.z;
+ config_z[0] = data.w;
+
+ data = read_node_float(kg, offset);
+ config_z[1] = data.x;
+ config_z[2] = data.y;
+ config_z[3] = data.z;
+ config_z[4] = data.w;
+
+ data = read_node_float(kg, offset);
+ config_z[5] = data.x;
+ config_z[6] = data.y;
+ config_z[7] = data.z;
+ config_z[8] = data.w;
+
+ /* Compute Sky */
+ if (sky_model == 0) {
+ f = sky_radiance_preetham(kg,
+ dir,
+ sunphi,
+ suntheta,
+ radiance_x,
+ radiance_y,
+ radiance_z,
+ config_x,
+ config_y,
+ config_z);
+ }
+ else {
+ f = sky_radiance_hosek(kg,
+ dir,
+ sunphi,
+ suntheta,
+ radiance_x,
+ radiance_y,
+ radiance_z,
+ config_x,
+ config_y,
+ config_z);
+ }
}
+ /* Nishita */
else {
- f = sky_radiance_new(kg,
- dir,
- sunphi,
- suntheta,
- radiance_x,
- radiance_y,
- radiance_z,
- config_x,
- config_y,
- config_z);
+ /* Define variables */
+ float nishita_data[10];
+
+ float4 data = read_node_float(kg, offset);
+ nishita_data[0] = data.x;
+ nishita_data[1] = data.y;
+ nishita_data[2] = data.z;
+ nishita_data[3] = data.w;
+
+ data = read_node_float(kg, offset);
+ nishita_data[4] = data.x;
+ nishita_data[5] = data.y;
+ nishita_data[6] = data.z;
+ nishita_data[7] = data.w;
+
+ data = read_node_float(kg, offset);
+ nishita_data[8] = data.x;
+ nishita_data[9] = data.y;
+ uint texture_id = __float_as_uint(data.z);
+
+ /* Compute Sky */
+ f = sky_radiance_nishita(kg, dir, nishita_data, texture_id);
}
stack_store_float3(stack, out_offset, f);
diff --git a/intern/cycles/kernel/svm/svm_tex_coord.h b/intern/cycles/kernel/svm/svm_tex_coord.h
index 1fb3e20f9e0..a876d6bc916 100644
--- a/intern/cycles/kernel/svm/svm_tex_coord.h
+++ b/intern/cycles/kernel/svm/svm_tex_coord.h
@@ -257,7 +257,7 @@ ccl_device void svm_node_tex_coord_bump_dy(
ccl_device void svm_node_normal_map(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
{
uint color_offset, strength_offset, normal_offset, space;
- decode_node_uchar4(node.y, &color_offset, &strength_offset, &normal_offset, &space);
+ svm_unpack_node_uchar4(node.y, &color_offset, &strength_offset, &normal_offset, &space);
float3 color = stack_load_float3(stack, color_offset);
color = 2.0f * make_float3(color.x - 0.5f, color.y - 0.5f, color.z - 0.5f);
@@ -349,7 +349,7 @@ ccl_device void svm_node_normal_map(KernelGlobals *kg, ShaderData *sd, float *st
ccl_device void svm_node_tangent(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
{
uint tangent_offset, direction_type, axis;
- decode_node_uchar4(node.y, &tangent_offset, &direction_type, &axis, NULL);
+ svm_unpack_node_uchar3(node.y, &tangent_offset, &direction_type, &axis);
float3 tangent;
float3 attribute_value;
diff --git a/intern/cycles/kernel/svm/svm_texture.h b/intern/cycles/kernel/svm/svm_texture.h
deleted file mode 100644
index 290aa85c831..00000000000
--- a/intern/cycles/kernel/svm/svm_texture.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-/* Turbulence */
-
-ccl_device_noinline float noise_turbulence(float3 p, float octaves, int hard)
-{
- float fscale = 1.0f;
- float amp = 1.0f;
- float sum = 0.0f;
- int i, n;
-
- octaves = clamp(octaves, 0.0f, 16.0f);
- n = float_to_int(octaves);
-
- for (i = 0; i <= n; i++) {
- float t = noise(fscale * p);
-
- if (hard)
- t = fabsf(2.0f * t - 1.0f);
-
- sum += t * amp;
- amp *= 0.5f;
- fscale *= 2.0f;
- }
-
- float rmd = octaves - floorf(octaves);
-
- if (rmd != 0.0f) {
- float t = noise(fscale * p);
-
- if (hard)
- t = fabsf(2.0f * t - 1.0f);
-
- float sum2 = sum + t * amp;
-
- sum *= ((float)(1 << n) / (float)((1 << (n + 1)) - 1));
- sum2 *= ((float)(1 << (n + 1)) / (float)((1 << (n + 2)) - 1));
-
- return (1.0f - rmd) * sum + rmd * sum2;
- }
- else {
- sum *= ((float)(1 << n) / (float)((1 << (n + 1)) - 1));
- return sum;
- }
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_types.h b/intern/cycles/kernel/svm/svm_types.h
index d31e4f93696..f1ebb37e23e 100644
--- a/intern/cycles/kernel/svm/svm_types.h
+++ b/intern/cycles/kernel/svm/svm_types.h
@@ -42,108 +42,126 @@ CCL_NAMESPACE_BEGIN
#define NODE_GROUP_LEVEL_1 1
#define NODE_GROUP_LEVEL_2 2
#define NODE_GROUP_LEVEL_3 3
-#define NODE_GROUP_LEVEL_MAX NODE_GROUP_LEVEL_3
+#define NODE_GROUP_LEVEL_4 4
+#define NODE_GROUP_LEVEL_MAX NODE_GROUP_LEVEL_4
#define NODE_FEATURE_VOLUME (1 << 0)
#define NODE_FEATURE_HAIR (1 << 1)
#define NODE_FEATURE_BUMP (1 << 2)
#define NODE_FEATURE_BUMP_STATE (1 << 3)
+#define NODE_FEATURE_VORONOI_EXTRA (1 << 4)
/* TODO(sergey): Consider using something like ((uint)(-1)).
* Need to check carefully operand types around usage of this
* define first.
*/
#define NODE_FEATURE_ALL \
- (NODE_FEATURE_VOLUME | NODE_FEATURE_HAIR | NODE_FEATURE_BUMP | NODE_FEATURE_BUMP_STATE)
+ (NODE_FEATURE_VOLUME | NODE_FEATURE_HAIR | NODE_FEATURE_BUMP | NODE_FEATURE_BUMP_STATE | \
+ NODE_FEATURE_VORONOI_EXTRA)
+
+#define NODES_GROUP(group) ((group) <= __NODES_MAX_GROUP__)
+#define NODES_FEATURE(feature) ((__NODES_FEATURES__ & (feature)) != 0)
typedef enum ShaderNodeType {
NODE_END = 0,
+ NODE_SHADER_JUMP,
NODE_CLOSURE_BSDF,
NODE_CLOSURE_EMISSION,
NODE_CLOSURE_BACKGROUND,
NODE_CLOSURE_SET_WEIGHT,
NODE_CLOSURE_WEIGHT,
+ NODE_EMISSION_WEIGHT,
NODE_MIX_CLOSURE,
NODE_JUMP_IF_ZERO,
NODE_JUMP_IF_ONE,
- NODE_TEX_IMAGE,
- NODE_TEX_IMAGE_BOX,
- NODE_TEX_SKY,
NODE_GEOMETRY,
- NODE_GEOMETRY_DUPLI,
- NODE_LIGHT_PATH,
+ NODE_CONVERT,
+ NODE_TEX_COORD,
NODE_VALUE_F,
NODE_VALUE_V,
- NODE_MIX,
NODE_ATTR,
- NODE_CONVERT,
- NODE_FRESNEL,
- NODE_WIREFRAME,
- NODE_WAVELENGTH,
- NODE_BLACKBODY,
- NODE_EMISSION_WEIGHT,
- NODE_TEX_GRADIENT,
- NODE_TEX_VORONOI,
- NODE_TEX_MUSGRAVE,
- NODE_TEX_WAVE,
- NODE_TEX_MAGIC,
- NODE_TEX_NOISE,
- NODE_SHADER_JUMP,
- NODE_SET_DISPLACEMENT,
+ NODE_VERTEX_COLOR,
NODE_GEOMETRY_BUMP_DX,
NODE_GEOMETRY_BUMP_DY,
+ NODE_SET_DISPLACEMENT,
+ NODE_DISPLACEMENT,
+ NODE_VECTOR_DISPLACEMENT,
+ NODE_TEX_IMAGE,
+ NODE_TEX_IMAGE_BOX,
+ NODE_TEX_NOISE,
NODE_SET_BUMP,
- NODE_MATH,
- NODE_VECTOR_MATH,
- NODE_VECTOR_TRANSFORM,
- NODE_MAPPING,
- NODE_TEX_COORD,
- NODE_TEX_COORD_BUMP_DX,
- NODE_TEX_COORD_BUMP_DY,
NODE_ATTR_BUMP_DX,
NODE_ATTR_BUMP_DY,
- NODE_TEX_ENVIRONMENT,
+ NODE_VERTEX_COLOR_BUMP_DX,
+ NODE_VERTEX_COLOR_BUMP_DY,
+ NODE_TEX_COORD_BUMP_DX,
+ NODE_TEX_COORD_BUMP_DY,
+ NODE_CLOSURE_SET_NORMAL,
+ NODE_ENTER_BUMP_EVAL,
+ NODE_LEAVE_BUMP_EVAL,
+ NODE_HSV,
NODE_CLOSURE_HOLDOUT,
+ NODE_FRESNEL,
NODE_LAYER_WEIGHT,
NODE_CLOSURE_VOLUME,
- NODE_SEPARATE_VECTOR,
- NODE_COMBINE_VECTOR,
- NODE_SEPARATE_HSV,
- NODE_COMBINE_HSV,
- NODE_HSV,
- NODE_CAMERA,
- NODE_INVERT,
- NODE_NORMAL,
+ NODE_PRINCIPLED_VOLUME,
+ NODE_MATH,
+ NODE_VECTOR_MATH,
+ NODE_RGB_RAMP,
NODE_GAMMA,
- NODE_TEX_CHECKER,
NODE_BRIGHTCONTRAST,
- NODE_RGB_RAMP,
- NODE_RGB_CURVES,
- NODE_VECTOR_CURVES,
- NODE_MIN_MAX,
- NODE_LIGHT_FALLOFF,
+ NODE_LIGHT_PATH,
NODE_OBJECT_INFO,
NODE_PARTICLE_INFO,
+ NODE_HAIR_INFO,
+ NODE_TEXTURE_MAPPING,
+ NODE_MAPPING,
+ NODE_MIN_MAX,
+ NODE_CAMERA,
+ NODE_TEX_ENVIRONMENT,
+ NODE_TEX_SKY,
+ NODE_TEX_GRADIENT,
+ NODE_TEX_VORONOI,
+ NODE_TEX_MUSGRAVE,
+ NODE_TEX_WAVE,
+ NODE_TEX_MAGIC,
+ NODE_TEX_CHECKER,
NODE_TEX_BRICK,
- NODE_CLOSURE_SET_NORMAL,
- NODE_AMBIENT_OCCLUSION,
+ NODE_TEX_WHITE_NOISE,
+ NODE_NORMAL,
+ NODE_LIGHT_FALLOFF,
+ NODE_IES,
+ NODE_RGB_CURVES,
+ NODE_VECTOR_CURVES,
NODE_TANGENT,
NODE_NORMAL_MAP,
- NODE_HAIR_INFO,
- NODE_UVMAP,
- NODE_TEX_VOXEL,
- NODE_ENTER_BUMP_EVAL,
- NODE_LEAVE_BUMP_EVAL,
+ NODE_INVERT,
+ NODE_MIX,
+ NODE_SEPARATE_VECTOR,
+ NODE_COMBINE_VECTOR,
+ NODE_SEPARATE_HSV,
+ NODE_COMBINE_HSV,
+ NODE_VECTOR_ROTATE,
+ NODE_VECTOR_TRANSFORM,
+ NODE_WIREFRAME,
+ NODE_WAVELENGTH,
+ NODE_BLACKBODY,
+ NODE_MAP_RANGE,
+ NODE_CLAMP,
NODE_BEVEL,
- NODE_DISPLACEMENT,
- NODE_VECTOR_DISPLACEMENT,
- NODE_PRINCIPLED_VOLUME,
- NODE_IES,
+ NODE_AMBIENT_OCCLUSION,
+ NODE_TEX_VOXEL,
+ NODE_AOV_START,
+ NODE_AOV_COLOR,
+ NODE_AOV_VALUE,
+ /* NOTE: for best OpenCL performance, item definition in the enum must
+ * match the switch case order in svm.h. */
} ShaderNodeType;
typedef enum NodeAttributeType {
NODE_ATTR_FLOAT = 0,
NODE_ATTR_FLOAT2,
NODE_ATTR_FLOAT3,
+ NODE_ATTR_RGBA,
NODE_ATTR_MATRIX
} NodeAttributeType;
@@ -158,6 +176,7 @@ typedef enum NodeGeometry {
typedef enum NodeObjectInfo {
NODE_INFO_OB_LOCATION,
+ NODE_INFO_OB_COLOR,
NODE_INFO_OB_INDEX,
NODE_INFO_MAT_INDEX,
NODE_INFO_OB_RANDOM
@@ -242,7 +261,7 @@ typedef enum NodeMix {
NODE_MIX_CLAMP /* used for the clamp UI option */
} NodeMix;
-typedef enum NodeMath {
+typedef enum NodeMathType {
NODE_MATH_ADD,
NODE_MATH_SUBTRACT,
NODE_MATH_MULTIPLY,
@@ -265,19 +284,82 @@ typedef enum NodeMath {
NODE_MATH_ARCTAN2,
NODE_MATH_FLOOR,
NODE_MATH_CEIL,
- NODE_MATH_FRACT,
+ NODE_MATH_FRACTION,
NODE_MATH_SQRT,
- NODE_MATH_CLAMP /* used for the clamp UI option */
-} NodeMath;
-
-typedef enum NodeVectorMath {
+ NODE_MATH_INV_SQRT,
+ NODE_MATH_SIGN,
+ NODE_MATH_EXPONENT,
+ NODE_MATH_RADIANS,
+ NODE_MATH_DEGREES,
+ NODE_MATH_SINH,
+ NODE_MATH_COSH,
+ NODE_MATH_TANH,
+ NODE_MATH_TRUNC,
+ NODE_MATH_SNAP,
+ NODE_MATH_WRAP,
+ NODE_MATH_COMPARE,
+ NODE_MATH_MULTIPLY_ADD,
+ NODE_MATH_PINGPONG,
+ NODE_MATH_SMOOTH_MIN,
+ NODE_MATH_SMOOTH_MAX,
+} NodeMathType;
+
+typedef enum NodeVectorMathType {
NODE_VECTOR_MATH_ADD,
NODE_VECTOR_MATH_SUBTRACT,
- NODE_VECTOR_MATH_AVERAGE,
- NODE_VECTOR_MATH_DOT_PRODUCT,
+ NODE_VECTOR_MATH_MULTIPLY,
+ NODE_VECTOR_MATH_DIVIDE,
+
NODE_VECTOR_MATH_CROSS_PRODUCT,
- NODE_VECTOR_MATH_NORMALIZE
-} NodeVectorMath;
+ NODE_VECTOR_MATH_PROJECT,
+ NODE_VECTOR_MATH_REFLECT,
+ NODE_VECTOR_MATH_DOT_PRODUCT,
+
+ NODE_VECTOR_MATH_DISTANCE,
+ NODE_VECTOR_MATH_LENGTH,
+ NODE_VECTOR_MATH_SCALE,
+ NODE_VECTOR_MATH_NORMALIZE,
+
+ NODE_VECTOR_MATH_SNAP,
+ NODE_VECTOR_MATH_FLOOR,
+ NODE_VECTOR_MATH_CEIL,
+ NODE_VECTOR_MATH_MODULO,
+ NODE_VECTOR_MATH_FRACTION,
+ NODE_VECTOR_MATH_ABSOLUTE,
+ NODE_VECTOR_MATH_MINIMUM,
+ NODE_VECTOR_MATH_MAXIMUM,
+ NODE_VECTOR_MATH_WRAP,
+ NODE_VECTOR_MATH_SINE,
+ NODE_VECTOR_MATH_COSINE,
+ NODE_VECTOR_MATH_TANGENT,
+} NodeVectorMathType;
+
+typedef enum NodeClampType {
+ NODE_CLAMP_MINMAX,
+ NODE_CLAMP_RANGE,
+} NodeClampType;
+
+typedef enum NodeMapRangeType {
+ NODE_MAP_RANGE_LINEAR,
+ NODE_MAP_RANGE_STEPPED,
+ NODE_MAP_RANGE_SMOOTHSTEP,
+ NODE_MAP_RANGE_SMOOTHERSTEP,
+} NodeMapRangeType;
+
+typedef enum NodeMappingType {
+ NODE_MAPPING_TYPE_POINT,
+ NODE_MAPPING_TYPE_TEXTURE,
+ NODE_MAPPING_TYPE_VECTOR,
+ NODE_MAPPING_TYPE_NORMAL
+} NodeMappingType;
+
+typedef enum NodeVectorRotateType {
+ NODE_VECTOR_ROTATE_TYPE_AXIS,
+ NODE_VECTOR_ROTATE_TYPE_AXIS_X,
+ NODE_VECTOR_ROTATE_TYPE_AXIS_Y,
+ NODE_VECTOR_ROTATE_TYPE_AXIS_Z,
+ NODE_VECTOR_ROTATE_TYPE_EULER_XYZ,
+} NodeVectorRotateType;
typedef enum NodeVectorTransformType {
NODE_VECTOR_TRANSFORM_TYPE_VECTOR,
@@ -312,12 +394,27 @@ typedef enum NodeMusgraveType {
typedef enum NodeWaveType { NODE_WAVE_BANDS, NODE_WAVE_RINGS } NodeWaveType;
-typedef enum NodeWaveProfiles {
+typedef enum NodeWaveBandsDirection {
+ NODE_WAVE_BANDS_DIRECTION_X,
+ NODE_WAVE_BANDS_DIRECTION_Y,
+ NODE_WAVE_BANDS_DIRECTION_Z,
+ NODE_WAVE_BANDS_DIRECTION_DIAGONAL
+} NodeWaveBandsDirection;
+
+typedef enum NodeWaveRingsDirection {
+ NODE_WAVE_RINGS_DIRECTION_X,
+ NODE_WAVE_RINGS_DIRECTION_Y,
+ NODE_WAVE_RINGS_DIRECTION_Z,
+ NODE_WAVE_RINGS_DIRECTION_SPHERICAL
+} NodeWaveRingsDirection;
+
+typedef enum NodeWaveProfile {
NODE_WAVE_PROFILE_SIN,
NODE_WAVE_PROFILE_SAW,
+ NODE_WAVE_PROFILE_TRI,
} NodeWaveProfile;
-typedef enum NodeSkyType { NODE_SKY_OLD, NODE_SKY_NEW } NodeSkyType;
+typedef enum NodeSkyType { NODE_SKY_PREETHAM, NODE_SKY_HOSEK, NODE_SKY_NISHITA } NodeSkyType;
typedef enum NodeGradientType {
NODE_BLEND_LINEAR,
@@ -329,24 +426,19 @@ typedef enum NodeGradientType {
NODE_BLEND_SPHERICAL
} NodeGradientType;
-typedef enum NodeVoronoiColoring {
- NODE_VORONOI_INTENSITY,
- NODE_VORONOI_CELLS
-} NodeVoronoiColoring;
-
typedef enum NodeVoronoiDistanceMetric {
- NODE_VORONOI_DISTANCE,
+ NODE_VORONOI_EUCLIDEAN,
NODE_VORONOI_MANHATTAN,
NODE_VORONOI_CHEBYCHEV,
- NODE_VORONOI_MINKOWSKI
+ NODE_VORONOI_MINKOWSKI,
} NodeVoronoiDistanceMetric;
typedef enum NodeVoronoiFeature {
NODE_VORONOI_F1,
NODE_VORONOI_F2,
- NODE_VORONOI_F3,
- NODE_VORONOI_F4,
- NODE_VORONOI_F2F1
+ NODE_VORONOI_SMOOTH_F1,
+ NODE_VORONOI_DISTANCE_TO_EDGE,
+ NODE_VORONOI_N_SPHERE_RADIUS,
} NodeVoronoiFeature;
typedef enum NodeBlendWeightType {
@@ -373,11 +465,6 @@ typedef enum NodeNormalMapSpace {
NODE_NORMAL_MAP_BLENDER_WORLD,
} NodeNormalMapSpace;
-typedef enum NodeImageColorSpace {
- NODE_COLOR_SPACE_NONE = 0,
- NODE_COLOR_SPACE_COLOR = 1,
-} NodeImageColorSpace;
-
typedef enum NodeImageProjection {
NODE_IMAGE_PROJ_FLAT = 0,
NODE_IMAGE_PROJ_BOX = 1,
@@ -385,6 +472,11 @@ typedef enum NodeImageProjection {
NODE_IMAGE_PROJ_TUBE = 3,
} NodeImageProjection;
+typedef enum NodeImageFlags {
+ NODE_IMAGE_COMPRESS_AS_SRGB = 1,
+ NODE_IMAGE_ALPHA_UNASSOCIATE = 2,
+} NodeImageFlags;
+
typedef enum NodeEnvironmentProjection {
NODE_ENVIRONMENT_EQUIRECTANGULAR = 0,
NODE_ENVIRONMENT_MIRROR_BALL = 1,
@@ -436,6 +528,7 @@ typedef enum ClosureType {
CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID,
CLOSURE_BSDF_PRINCIPLED_SHEEN_ID,
CLOSURE_BSDF_DIFFUSE_TOON_ID,
+ CLOSURE_BSDF_TRANSLUCENT_ID,
/* Glossy */
CLOSURE_BSDF_REFLECTION_ID,
@@ -446,19 +539,12 @@ typedef enum ClosureType {
CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID,
CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID,
CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID,
- CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID,
- CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID,
- CLOSURE_BSDF_MICROFACET_MULTI_GGX_ANISO_ID,
- CLOSURE_BSDF_MICROFACET_MULTI_GGX_ANISO_FRESNEL_ID,
- CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID,
- CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID,
CLOSURE_BSDF_ASHIKHMIN_VELVET_ID,
CLOSURE_BSDF_PHONG_RAMP_ID,
CLOSURE_BSDF_GLOSSY_TOON_ID,
CLOSURE_BSDF_HAIR_REFLECTION_ID,
/* Transmission */
- CLOSURE_BSDF_TRANSLUCENT_ID,
CLOSURE_BSDF_REFRACTION_ID,
CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID,
CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID,
@@ -499,12 +585,12 @@ typedef enum ClosureType {
/* watch this, being lazy with memory usage */
#define CLOSURE_IS_BSDF(type) (type <= CLOSURE_BSDF_TRANSPARENT_ID)
#define CLOSURE_IS_BSDF_DIFFUSE(type) \
- (type >= CLOSURE_BSDF_DIFFUSE_ID && type <= CLOSURE_BSDF_DIFFUSE_TOON_ID)
+ (type >= CLOSURE_BSDF_DIFFUSE_ID && type <= CLOSURE_BSDF_TRANSLUCENT_ID)
#define CLOSURE_IS_BSDF_GLOSSY(type) \
((type >= CLOSURE_BSDF_REFLECTION_ID && type <= CLOSURE_BSDF_HAIR_REFLECTION_ID) || \
(type == CLOSURE_BSDF_HAIR_PRINCIPLED_ID))
#define CLOSURE_IS_BSDF_TRANSMISSION(type) \
- (type >= CLOSURE_BSDF_TRANSLUCENT_ID && type <= CLOSURE_BSDF_HAIR_TRANSMISSION_ID)
+ (type >= CLOSURE_BSDF_REFRACTION_ID && type <= CLOSURE_BSDF_HAIR_TRANSMISSION_ID)
#define CLOSURE_IS_BSDF_BSSRDF(type) \
(type == CLOSURE_BSDF_BSSRDF_ID || type == CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID)
#define CLOSURE_IS_BSDF_SINGULAR(type) \
@@ -513,13 +599,17 @@ typedef enum ClosureType {
#define CLOSURE_IS_BSDF_TRANSPARENT(type) (type == CLOSURE_BSDF_TRANSPARENT_ID)
#define CLOSURE_IS_BSDF_MULTISCATTER(type) \
(type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID || \
- type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_ANISO_ID || \
type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID)
#define CLOSURE_IS_BSDF_MICROFACET(type) \
- ((type >= CLOSURE_BSDF_MICROFACET_GGX_ID && type <= CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID) || \
+ ((type >= CLOSURE_BSDF_MICROFACET_GGX_ID && type <= CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID) || \
(type >= CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID && \
type <= CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID) || \
(type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID))
+#define CLOSURE_IS_BSDF_MICROFACET_FRESNEL(type) \
+ (type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID || \
+ type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID || \
+ type == CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID || \
+ type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID)
#define CLOSURE_IS_BSDF_OR_BSSRDF(type) (type <= CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID)
#define CLOSURE_IS_BSSRDF(type) \
(type >= CLOSURE_BSSRDF_CUBIC_ID && type <= CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID)
diff --git a/intern/cycles/kernel/svm/svm_vector_rotate.h b/intern/cycles/kernel/svm/svm_vector_rotate.h
new file mode 100644
index 00000000000..79a4ec2c40e
--- /dev/null
+++ b/intern/cycles/kernel/svm/svm_vector_rotate.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright 2011-2020 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+/* Vector Rotate */
+
+ccl_device void svm_node_vector_rotate(ShaderData *sd,
+ float *stack,
+ uint input_stack_offsets,
+ uint axis_stack_offsets,
+ uint result_stack_offset)
+{
+ uint type, vector_stack_offset, rotation_stack_offset, center_stack_offset, axis_stack_offset,
+ angle_stack_offset, invert;
+
+ svm_unpack_node_uchar4(
+ input_stack_offsets, &type, &vector_stack_offset, &rotation_stack_offset, &invert);
+ svm_unpack_node_uchar3(
+ axis_stack_offsets, &center_stack_offset, &axis_stack_offset, &angle_stack_offset);
+
+ if (stack_valid(result_stack_offset)) {
+
+ float3 vector = stack_load_float3(stack, vector_stack_offset);
+ float3 center = stack_load_float3(stack, center_stack_offset);
+ float3 result = make_float3(0.0f, 0.0f, 0.0f);
+
+ if (type == NODE_VECTOR_ROTATE_TYPE_EULER_XYZ) {
+ float3 rotation = stack_load_float3(stack, rotation_stack_offset); // Default XYZ.
+ Transform rotationTransform = euler_to_transform(rotation);
+ if (invert) {
+ result = transform_direction_transposed(&rotationTransform, vector - center) + center;
+ }
+ else {
+ result = transform_direction(&rotationTransform, vector - center) + center;
+ }
+ }
+ else {
+ float3 axis;
+ switch (type) {
+ case NODE_VECTOR_ROTATE_TYPE_AXIS_X:
+ axis = make_float3(1.0f, 0.0f, 0.0f);
+ break;
+ case NODE_VECTOR_ROTATE_TYPE_AXIS_Y:
+ axis = make_float3(0.0f, 1.0f, 0.0f);
+ break;
+ case NODE_VECTOR_ROTATE_TYPE_AXIS_Z:
+ axis = make_float3(0.0f, 0.0f, 1.0f);
+ break;
+ default:
+ axis = normalize(stack_load_float3(stack, axis_stack_offset));
+ break;
+ }
+ float angle = stack_load_float(stack, angle_stack_offset);
+ angle = invert ? -angle : angle;
+ result = (len_squared(axis) != 0.0f) ?
+ rotate_around_axis(vector - center, axis, angle) + center :
+ vector;
+ }
+
+ stack_store_float3(stack, result_stack_offset, result);
+ }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_vector_transform.h b/intern/cycles/kernel/svm/svm_vector_transform.h
index 7ec0f07f2e4..1e95492cf1b 100644
--- a/intern/cycles/kernel/svm/svm_vector_transform.h
+++ b/intern/cycles/kernel/svm/svm_vector_transform.h
@@ -26,8 +26,8 @@ ccl_device void svm_node_vector_transform(KernelGlobals *kg,
uint itype, ifrom, ito;
uint vector_in, vector_out;
- decode_node_uchar4(node.y, &itype, &ifrom, &ito, NULL);
- decode_node_uchar4(node.z, &vector_in, &vector_out, NULL, NULL);
+ svm_unpack_node_uchar3(node.y, &itype, &ifrom, &ito);
+ svm_unpack_node_uchar2(node.z, &vector_in, &vector_out);
float3 in = stack_load_float3(stack, vector_in);
diff --git a/intern/cycles/kernel/svm/svm_vertex_color.h b/intern/cycles/kernel/svm/svm_vertex_color.h
new file mode 100644
index 00000000000..3c105b1cbfa
--- /dev/null
+++ b/intern/cycles/kernel/svm/svm_vertex_color.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device void svm_node_vertex_color(KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint layer_id,
+ uint color_offset,
+ uint alpha_offset)
+{
+ AttributeDescriptor descriptor = find_attribute(kg, sd, layer_id);
+ if (descriptor.offset != ATTR_STD_NOT_FOUND) {
+ float4 vertex_color = primitive_attribute_float4(kg, sd, descriptor, NULL, NULL);
+ stack_store_float3(stack, color_offset, float4_to_float3(vertex_color));
+ stack_store_float(stack, alpha_offset, vertex_color.w);
+ }
+ else {
+ stack_store_float3(stack, color_offset, make_float3(0.0f, 0.0f, 0.0f));
+ stack_store_float(stack, alpha_offset, 0.0f);
+ }
+}
+
+#ifndef __KERNEL_CUDA__
+ccl_device
+#else
+ccl_device_noinline
+#endif
+ void
+ svm_node_vertex_color_bump_dx(KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint layer_id,
+ uint color_offset,
+ uint alpha_offset)
+{
+ AttributeDescriptor descriptor = find_attribute(kg, sd, layer_id);
+ if (descriptor.offset != ATTR_STD_NOT_FOUND) {
+ float4 dx;
+ float4 vertex_color = primitive_attribute_float4(kg, sd, descriptor, &dx, NULL);
+ vertex_color += dx;
+ stack_store_float3(stack, color_offset, float4_to_float3(vertex_color));
+ stack_store_float(stack, alpha_offset, vertex_color.w);
+ }
+ else {
+ stack_store_float3(stack, color_offset, make_float3(0.0f, 0.0f, 0.0f));
+ stack_store_float(stack, alpha_offset, 0.0f);
+ }
+}
+
+#ifndef __KERNEL_CUDA__
+ccl_device
+#else
+ccl_device_noinline
+#endif
+ void
+ svm_node_vertex_color_bump_dy(KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint layer_id,
+ uint color_offset,
+ uint alpha_offset)
+{
+ AttributeDescriptor descriptor = find_attribute(kg, sd, layer_id);
+ if (descriptor.offset != ATTR_STD_NOT_FOUND) {
+ float4 dy;
+ float4 vertex_color = primitive_attribute_float4(kg, sd, descriptor, NULL, &dy);
+ vertex_color += dy;
+ stack_store_float3(stack, color_offset, float4_to_float3(vertex_color));
+ stack_store_float(stack, alpha_offset, vertex_color.w);
+ }
+ else {
+ stack_store_float3(stack, color_offset, make_float3(0.0f, 0.0f, 0.0f));
+ stack_store_float(stack, alpha_offset, 0.0f);
+ }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_voronoi.h b/intern/cycles/kernel/svm/svm_voronoi.h
index c311aefaf38..f0fc0068fa2 100644
--- a/intern/cycles/kernel/svm/svm_voronoi.h
+++ b/intern/cycles/kernel/svm/svm_voronoi.h
@@ -16,169 +16,1124 @@
CCL_NAMESPACE_BEGIN
-/* Voronoi */
-
-ccl_device void voronoi_neighbors(
- float3 p, NodeVoronoiDistanceMetric distance, float e, float da[4], float3 pa[4])
-{
- /* Compute the distance to and the position of the closest neighbors to p.
- *
- * The neighbors are randomly placed, 1 each in a 3x3x3 grid (Worley pattern).
- * The distances and points are returned in ascending order, i.e. da[0] and pa[0] will
- * contain the distance to the closest point and its coordinates respectively.
- */
-
- da[0] = 1e10f;
- da[1] = 1e10f;
- da[2] = 1e10f;
- da[3] = 1e10f;
-
- pa[0] = make_float3(0.0f, 0.0f, 0.0f);
- pa[1] = make_float3(0.0f, 0.0f, 0.0f);
- pa[2] = make_float3(0.0f, 0.0f, 0.0f);
- pa[3] = make_float3(0.0f, 0.0f, 0.0f);
-
- int3 xyzi = quick_floor_to_int3(p);
-
- for (int xx = -1; xx <= 1; xx++) {
- for (int yy = -1; yy <= 1; yy++) {
- for (int zz = -1; zz <= 1; zz++) {
- int3 ip = xyzi + make_int3(xx, yy, zz);
- float3 fp = make_float3(ip.x, ip.y, ip.z);
- float3 vp = fp + cellnoise3(fp);
-
- float d;
- switch (distance) {
- case NODE_VORONOI_DISTANCE:
- d = len_squared(p - vp);
- break;
- case NODE_VORONOI_MANHATTAN:
- d = reduce_add(fabs(vp - p));
- break;
- case NODE_VORONOI_CHEBYCHEV:
- d = max3(fabs(vp - p));
- break;
- case NODE_VORONOI_MINKOWSKI: {
- float3 n = fabs(vp - p);
- if (e == 0.5f) {
- d = sqr(reduce_add(sqrt(n)));
- }
- else {
- d = powf(reduce_add(pow3(n, e)), 1.0f / e);
- }
- break;
+/*
+ * Smooth Voronoi:
+ *
+ * - https://wiki.blender.org/wiki/User:OmarSquircleArt/GSoC2019/Documentation/Smooth_Voronoi
+ *
+ * Distance To Edge:
+ *
+ * - https://www.shadertoy.com/view/llG3zy
+ *
+ */
+
+/* **** 1D Voronoi **** */
+
+ccl_device float voronoi_distance_1d(float a,
+ float b,
+ NodeVoronoiDistanceMetric metric,
+ float exponent)
+{
+ return fabsf(b - a);
+}
+
+ccl_device void voronoi_f1_1d(float w,
+ float exponent,
+ float randomness,
+ NodeVoronoiDistanceMetric metric,
+ float *outDistance,
+ float3 *outColor,
+ float *outW)
+{
+ float cellPosition = floorf(w);
+ float localPosition = w - cellPosition;
+
+ float minDistance = 8.0f;
+ float targetOffset = 0.0f;
+ float targetPosition = 0.0f;
+ for (int i = -1; i <= 1; i++) {
+ float cellOffset = i;
+ float pointPosition = cellOffset + hash_float_to_float(cellPosition + cellOffset) * randomness;
+ float distanceToPoint = voronoi_distance_1d(pointPosition, localPosition, metric, exponent);
+ if (distanceToPoint < minDistance) {
+ targetOffset = cellOffset;
+ minDistance = distanceToPoint;
+ targetPosition = pointPosition;
+ }
+ }
+ *outDistance = minDistance;
+ *outColor = hash_float_to_float3(cellPosition + targetOffset);
+ *outW = targetPosition + cellPosition;
+}
+
+ccl_device void voronoi_smooth_f1_1d(float w,
+ float smoothness,
+ float exponent,
+ float randomness,
+ NodeVoronoiDistanceMetric metric,
+ float *outDistance,
+ float3 *outColor,
+ float *outW)
+{
+ float cellPosition = floorf(w);
+ float localPosition = w - cellPosition;
+
+ float smoothDistance = 8.0f;
+ float smoothPosition = 0.0f;
+ float3 smoothColor = make_float3(0.0f, 0.0f, 0.0f);
+ for (int i = -2; i <= 2; i++) {
+ float cellOffset = i;
+ float pointPosition = cellOffset + hash_float_to_float(cellPosition + cellOffset) * randomness;
+ float distanceToPoint = voronoi_distance_1d(pointPosition, localPosition, metric, exponent);
+ float h = smoothstep(
+ 0.0f, 1.0f, 0.5f + 0.5f * (smoothDistance - distanceToPoint) / smoothness);
+ float correctionFactor = smoothness * h * (1.0f - h);
+ smoothDistance = mix(smoothDistance, distanceToPoint, h) - correctionFactor;
+ correctionFactor /= 1.0f + 3.0f * smoothness;
+ float3 cellColor = hash_float_to_float3(cellPosition + cellOffset);
+ smoothColor = mix(smoothColor, cellColor, h) - correctionFactor;
+ smoothPosition = mix(smoothPosition, pointPosition, h) - correctionFactor;
+ }
+ *outDistance = smoothDistance;
+ *outColor = smoothColor;
+ *outW = cellPosition + smoothPosition;
+}
+
+ccl_device void voronoi_f2_1d(float w,
+ float exponent,
+ float randomness,
+ NodeVoronoiDistanceMetric metric,
+ float *outDistance,
+ float3 *outColor,
+ float *outW)
+{
+ float cellPosition = floorf(w);
+ float localPosition = w - cellPosition;
+
+ float distanceF1 = 8.0f;
+ float distanceF2 = 8.0f;
+ float offsetF1 = 0.0f;
+ float positionF1 = 0.0f;
+ float offsetF2 = 0.0f;
+ float positionF2 = 0.0f;
+ for (int i = -1; i <= 1; i++) {
+ float cellOffset = i;
+ float pointPosition = cellOffset + hash_float_to_float(cellPosition + cellOffset) * randomness;
+ float distanceToPoint = voronoi_distance_1d(pointPosition, localPosition, metric, exponent);
+ if (distanceToPoint < distanceF1) {
+ distanceF2 = distanceF1;
+ distanceF1 = distanceToPoint;
+ offsetF2 = offsetF1;
+ offsetF1 = cellOffset;
+ positionF2 = positionF1;
+ positionF1 = pointPosition;
+ }
+ else if (distanceToPoint < distanceF2) {
+ distanceF2 = distanceToPoint;
+ offsetF2 = cellOffset;
+ positionF2 = pointPosition;
+ }
+ }
+ *outDistance = distanceF2;
+ *outColor = hash_float_to_float3(cellPosition + offsetF2);
+ *outW = positionF2 + cellPosition;
+}
+
+ccl_device void voronoi_distance_to_edge_1d(float w, float randomness, float *outDistance)
+{
+ float cellPosition = floorf(w);
+ float localPosition = w - cellPosition;
+
+ float minDistance = 8.0f;
+ for (int i = -1; i <= 1; i++) {
+ float cellOffset = i;
+ float pointPosition = cellOffset + hash_float_to_float(cellPosition + cellOffset) * randomness;
+ float distanceToPoint = fabsf(pointPosition - localPosition);
+ minDistance = min(distanceToPoint, minDistance);
+ }
+ *outDistance = minDistance;
+}
+
+ccl_device void voronoi_n_sphere_radius_1d(float w, float randomness, float *outRadius)
+{
+ float cellPosition = floorf(w);
+ float localPosition = w - cellPosition;
+
+ float closestPoint = 0.0f;
+ float closestPointOffset = 0.0f;
+ float minDistance = 8.0f;
+ for (int i = -1; i <= 1; i++) {
+ float cellOffset = i;
+ float pointPosition = cellOffset + hash_float_to_float(cellPosition + cellOffset) * randomness;
+ float distanceToPoint = fabsf(pointPosition - localPosition);
+ if (distanceToPoint < minDistance) {
+ minDistance = distanceToPoint;
+ closestPoint = pointPosition;
+ closestPointOffset = cellOffset;
+ }
+ }
+
+ minDistance = 8.0f;
+ float closestPointToClosestPoint = 0.0f;
+ for (int i = -1; i <= 1; i++) {
+ if (i == 0) {
+ continue;
+ }
+ float cellOffset = i + closestPointOffset;
+ float pointPosition = cellOffset + hash_float_to_float(cellPosition + cellOffset) * randomness;
+ float distanceToPoint = fabsf(closestPoint - pointPosition);
+ if (distanceToPoint < minDistance) {
+ minDistance = distanceToPoint;
+ closestPointToClosestPoint = pointPosition;
+ }
+ }
+ *outRadius = fabsf(closestPointToClosestPoint - closestPoint) / 2.0f;
+}
+
+/* **** 2D Voronoi **** */
+
+ccl_device float voronoi_distance_2d(float2 a,
+ float2 b,
+ NodeVoronoiDistanceMetric metric,
+ float exponent)
+{
+ if (metric == NODE_VORONOI_EUCLIDEAN) {
+ return distance(a, b);
+ }
+ else if (metric == NODE_VORONOI_MANHATTAN) {
+ return fabsf(a.x - b.x) + fabsf(a.y - b.y);
+ }
+ else if (metric == NODE_VORONOI_CHEBYCHEV) {
+ return max(fabsf(a.x - b.x), fabsf(a.y - b.y));
+ }
+ else if (metric == NODE_VORONOI_MINKOWSKI) {
+ return powf(powf(fabsf(a.x - b.x), exponent) + powf(fabsf(a.y - b.y), exponent),
+ 1.0f / exponent);
+ }
+ else {
+ return 0.0f;
+ }
+}
+
+ccl_device void voronoi_f1_2d(float2 coord,
+ float exponent,
+ float randomness,
+ NodeVoronoiDistanceMetric metric,
+ float *outDistance,
+ float3 *outColor,
+ float2 *outPosition)
+{
+ float2 cellPosition = floor(coord);
+ float2 localPosition = coord - cellPosition;
+
+ float minDistance = 8.0f;
+ float2 targetOffset = make_float2(0.0f, 0.0f);
+ float2 targetPosition = make_float2(0.0f, 0.0f);
+ for (int j = -1; j <= 1; j++) {
+ for (int i = -1; i <= 1; i++) {
+ float2 cellOffset = make_float2(i, j);
+ float2 pointPosition = cellOffset +
+ hash_float2_to_float2(cellPosition + cellOffset) * randomness;
+ float distanceToPoint = voronoi_distance_2d(pointPosition, localPosition, metric, exponent);
+ if (distanceToPoint < minDistance) {
+ targetOffset = cellOffset;
+ minDistance = distanceToPoint;
+ targetPosition = pointPosition;
+ }
+ }
+ }
+ *outDistance = minDistance;
+ *outColor = hash_float2_to_float3(cellPosition + targetOffset);
+ *outPosition = targetPosition + cellPosition;
+}
+
+ccl_device void voronoi_smooth_f1_2d(float2 coord,
+ float smoothness,
+ float exponent,
+ float randomness,
+ NodeVoronoiDistanceMetric metric,
+ float *outDistance,
+ float3 *outColor,
+ float2 *outPosition)
+{
+ float2 cellPosition = floor(coord);
+ float2 localPosition = coord - cellPosition;
+
+ float smoothDistance = 8.0f;
+ float3 smoothColor = make_float3(0.0f, 0.0f, 0.0f);
+ float2 smoothPosition = make_float2(0.0f, 0.0f);
+ for (int j = -2; j <= 2; j++) {
+ for (int i = -2; i <= 2; i++) {
+ float2 cellOffset = make_float2(i, j);
+ float2 pointPosition = cellOffset +
+ hash_float2_to_float2(cellPosition + cellOffset) * randomness;
+ float distanceToPoint = voronoi_distance_2d(pointPosition, localPosition, metric, exponent);
+ float h = smoothstep(
+ 0.0f, 1.0f, 0.5f + 0.5f * (smoothDistance - distanceToPoint) / smoothness);
+ float correctionFactor = smoothness * h * (1.0f - h);
+ smoothDistance = mix(smoothDistance, distanceToPoint, h) - correctionFactor;
+ correctionFactor /= 1.0f + 3.0f * smoothness;
+ float3 cellColor = hash_float2_to_float3(cellPosition + cellOffset);
+ smoothColor = mix(smoothColor, cellColor, h) - correctionFactor;
+ smoothPosition = mix(smoothPosition, pointPosition, h) - correctionFactor;
+ }
+ }
+ *outDistance = smoothDistance;
+ *outColor = smoothColor;
+ *outPosition = cellPosition + smoothPosition;
+}
+
+ccl_device void voronoi_f2_2d(float2 coord,
+ float exponent,
+ float randomness,
+ NodeVoronoiDistanceMetric metric,
+ float *outDistance,
+ float3 *outColor,
+ float2 *outPosition)
+{
+ float2 cellPosition = floor(coord);
+ float2 localPosition = coord - cellPosition;
+
+ float distanceF1 = 8.0f;
+ float distanceF2 = 8.0f;
+ float2 offsetF1 = make_float2(0.0f, 0.0f);
+ float2 positionF1 = make_float2(0.0f, 0.0f);
+ float2 offsetF2 = make_float2(0.0f, 0.0f);
+ float2 positionF2 = make_float2(0.0f, 0.0f);
+ for (int j = -1; j <= 1; j++) {
+ for (int i = -1; i <= 1; i++) {
+ float2 cellOffset = make_float2(i, j);
+ float2 pointPosition = cellOffset +
+ hash_float2_to_float2(cellPosition + cellOffset) * randomness;
+ float distanceToPoint = voronoi_distance_2d(pointPosition, localPosition, metric, exponent);
+ if (distanceToPoint < distanceF1) {
+ distanceF2 = distanceF1;
+ distanceF1 = distanceToPoint;
+ offsetF2 = offsetF1;
+ offsetF1 = cellOffset;
+ positionF2 = positionF1;
+ positionF1 = pointPosition;
+ }
+ else if (distanceToPoint < distanceF2) {
+ distanceF2 = distanceToPoint;
+ offsetF2 = cellOffset;
+ positionF2 = pointPosition;
+ }
+ }
+ }
+ *outDistance = distanceF2;
+ *outColor = hash_float2_to_float3(cellPosition + offsetF2);
+ *outPosition = positionF2 + cellPosition;
+}
+
+ccl_device void voronoi_distance_to_edge_2d(float2 coord, float randomness, float *outDistance)
+{
+ float2 cellPosition = floor(coord);
+ float2 localPosition = coord - cellPosition;
+
+ float2 vectorToClosest = make_float2(0.0f, 0.0f);
+ float minDistance = 8.0f;
+ for (int j = -1; j <= 1; j++) {
+ for (int i = -1; i <= 1; i++) {
+ float2 cellOffset = make_float2(i, j);
+ float2 vectorToPoint = cellOffset +
+ hash_float2_to_float2(cellPosition + cellOffset) * randomness -
+ localPosition;
+ float distanceToPoint = dot(vectorToPoint, vectorToPoint);
+ if (distanceToPoint < minDistance) {
+ minDistance = distanceToPoint;
+ vectorToClosest = vectorToPoint;
+ }
+ }
+ }
+
+ minDistance = 8.0f;
+ for (int j = -1; j <= 1; j++) {
+ for (int i = -1; i <= 1; i++) {
+ float2 cellOffset = make_float2(i, j);
+ float2 vectorToPoint = cellOffset +
+ hash_float2_to_float2(cellPosition + cellOffset) * randomness -
+ localPosition;
+ float2 perpendicularToEdge = vectorToPoint - vectorToClosest;
+ if (dot(perpendicularToEdge, perpendicularToEdge) > 0.0001f) {
+ float distanceToEdge = dot((vectorToClosest + vectorToPoint) / 2.0f,
+ normalize(perpendicularToEdge));
+ minDistance = min(minDistance, distanceToEdge);
+ }
+ }
+ }
+ *outDistance = minDistance;
+}
+
+ccl_device void voronoi_n_sphere_radius_2d(float2 coord, float randomness, float *outRadius)
+{
+ float2 cellPosition = floor(coord);
+ float2 localPosition = coord - cellPosition;
+
+ float2 closestPoint = make_float2(0.0f, 0.0f);
+ float2 closestPointOffset = make_float2(0.0f, 0.0f);
+ float minDistance = 8.0f;
+ for (int j = -1; j <= 1; j++) {
+ for (int i = -1; i <= 1; i++) {
+ float2 cellOffset = make_float2(i, j);
+ float2 pointPosition = cellOffset +
+ hash_float2_to_float2(cellPosition + cellOffset) * randomness;
+ float distanceToPoint = distance(pointPosition, localPosition);
+ if (distanceToPoint < minDistance) {
+ minDistance = distanceToPoint;
+ closestPoint = pointPosition;
+ closestPointOffset = cellOffset;
+ }
+ }
+ }
+
+ minDistance = 8.0f;
+ float2 closestPointToClosestPoint = make_float2(0.0f, 0.0f);
+ for (int j = -1; j <= 1; j++) {
+ for (int i = -1; i <= 1; i++) {
+ if (i == 0 && j == 0) {
+ continue;
+ }
+ float2 cellOffset = make_float2(i, j) + closestPointOffset;
+ float2 pointPosition = cellOffset +
+ hash_float2_to_float2(cellPosition + cellOffset) * randomness;
+ float distanceToPoint = distance(closestPoint, pointPosition);
+ if (distanceToPoint < minDistance) {
+ minDistance = distanceToPoint;
+ closestPointToClosestPoint = pointPosition;
+ }
+ }
+ }
+ *outRadius = distance(closestPointToClosestPoint, closestPoint) / 2.0f;
+}
+
+/* **** 3D Voronoi **** */
+
+ccl_device float voronoi_distance_3d(float3 a,
+ float3 b,
+ NodeVoronoiDistanceMetric metric,
+ float exponent)
+{
+ if (metric == NODE_VORONOI_EUCLIDEAN) {
+ return distance(a, b);
+ }
+ else if (metric == NODE_VORONOI_MANHATTAN) {
+ return fabsf(a.x - b.x) + fabsf(a.y - b.y) + fabsf(a.z - b.z);
+ }
+ else if (metric == NODE_VORONOI_CHEBYCHEV) {
+ return max(fabsf(a.x - b.x), max(fabsf(a.y - b.y), fabsf(a.z - b.z)));
+ }
+ else if (metric == NODE_VORONOI_MINKOWSKI) {
+ return powf(powf(fabsf(a.x - b.x), exponent) + powf(fabsf(a.y - b.y), exponent) +
+ powf(fabsf(a.z - b.z), exponent),
+ 1.0f / exponent);
+ }
+ else {
+ return 0.0f;
+ }
+}
+
+ccl_device void voronoi_f1_3d(float3 coord,
+ float exponent,
+ float randomness,
+ NodeVoronoiDistanceMetric metric,
+ float *outDistance,
+ float3 *outColor,
+ float3 *outPosition)
+{
+ float3 cellPosition = floor(coord);
+ float3 localPosition = coord - cellPosition;
+
+ float minDistance = 8.0f;
+ float3 targetOffset = make_float3(0.0f, 0.0f, 0.0f);
+ float3 targetPosition = make_float3(0.0f, 0.0f, 0.0f);
+ for (int k = -1; k <= 1; k++) {
+ for (int j = -1; j <= 1; j++) {
+ for (int i = -1; i <= 1; i++) {
+ float3 cellOffset = make_float3(i, j, k);
+ float3 pointPosition = cellOffset +
+ hash_float3_to_float3(cellPosition + cellOffset) * randomness;
+ float distanceToPoint = voronoi_distance_3d(
+ pointPosition, localPosition, metric, exponent);
+ if (distanceToPoint < minDistance) {
+ targetOffset = cellOffset;
+ minDistance = distanceToPoint;
+ targetPosition = pointPosition;
+ }
+ }
+ }
+ }
+ *outDistance = minDistance;
+ *outColor = hash_float3_to_float3(cellPosition + targetOffset);
+ *outPosition = targetPosition + cellPosition;
+}
+
+ccl_device void voronoi_smooth_f1_3d(float3 coord,
+ float smoothness,
+ float exponent,
+ float randomness,
+ NodeVoronoiDistanceMetric metric,
+ float *outDistance,
+ float3 *outColor,
+ float3 *outPosition)
+{
+ float3 cellPosition = floor(coord);
+ float3 localPosition = coord - cellPosition;
+
+ float smoothDistance = 8.0f;
+ float3 smoothColor = make_float3(0.0f, 0.0f, 0.0f);
+ float3 smoothPosition = make_float3(0.0f, 0.0f, 0.0f);
+ for (int k = -2; k <= 2; k++) {
+ for (int j = -2; j <= 2; j++) {
+ for (int i = -2; i <= 2; i++) {
+ float3 cellOffset = make_float3(i, j, k);
+ float3 pointPosition = cellOffset +
+ hash_float3_to_float3(cellPosition + cellOffset) * randomness;
+ float distanceToPoint = voronoi_distance_3d(
+ pointPosition, localPosition, metric, exponent);
+ float h = smoothstep(
+ 0.0f, 1.0f, 0.5f + 0.5f * (smoothDistance - distanceToPoint) / smoothness);
+ float correctionFactor = smoothness * h * (1.0f - h);
+ smoothDistance = mix(smoothDistance, distanceToPoint, h) - correctionFactor;
+ correctionFactor /= 1.0f + 3.0f * smoothness;
+ float3 cellColor = hash_float3_to_float3(cellPosition + cellOffset);
+ smoothColor = mix(smoothColor, cellColor, h) - correctionFactor;
+ smoothPosition = mix(smoothPosition, pointPosition, h) - correctionFactor;
+ }
+ }
+ }
+ *outDistance = smoothDistance;
+ *outColor = smoothColor;
+ *outPosition = cellPosition + smoothPosition;
+}
+
+ccl_device void voronoi_f2_3d(float3 coord,
+ float exponent,
+ float randomness,
+ NodeVoronoiDistanceMetric metric,
+ float *outDistance,
+ float3 *outColor,
+ float3 *outPosition)
+{
+ float3 cellPosition = floor(coord);
+ float3 localPosition = coord - cellPosition;
+
+ float distanceF1 = 8.0f;
+ float distanceF2 = 8.0f;
+ float3 offsetF1 = make_float3(0.0f, 0.0f, 0.0f);
+ float3 positionF1 = make_float3(0.0f, 0.0f, 0.0f);
+ float3 offsetF2 = make_float3(0.0f, 0.0f, 0.0f);
+ float3 positionF2 = make_float3(0.0f, 0.0f, 0.0f);
+ for (int k = -1; k <= 1; k++) {
+ for (int j = -1; j <= 1; j++) {
+ for (int i = -1; i <= 1; i++) {
+ float3 cellOffset = make_float3(i, j, k);
+ float3 pointPosition = cellOffset +
+ hash_float3_to_float3(cellPosition + cellOffset) * randomness;
+ float distanceToPoint = voronoi_distance_3d(
+ pointPosition, localPosition, metric, exponent);
+ if (distanceToPoint < distanceF1) {
+ distanceF2 = distanceF1;
+ distanceF1 = distanceToPoint;
+ offsetF2 = offsetF1;
+ offsetF1 = cellOffset;
+ positionF2 = positionF1;
+ positionF1 = pointPosition;
+ }
+ else if (distanceToPoint < distanceF2) {
+ distanceF2 = distanceToPoint;
+ offsetF2 = cellOffset;
+ positionF2 = pointPosition;
+ }
+ }
+ }
+ }
+ *outDistance = distanceF2;
+ *outColor = hash_float3_to_float3(cellPosition + offsetF2);
+ *outPosition = positionF2 + cellPosition;
+}
+
+ccl_device void voronoi_distance_to_edge_3d(float3 coord, float randomness, float *outDistance)
+{
+ float3 cellPosition = floor(coord);
+ float3 localPosition = coord - cellPosition;
+
+ float3 vectorToClosest = make_float3(0.0f, 0.0f, 0.0f);
+ float minDistance = 8.0f;
+ for (int k = -1; k <= 1; k++) {
+ for (int j = -1; j <= 1; j++) {
+ for (int i = -1; i <= 1; i++) {
+ float3 cellOffset = make_float3(i, j, k);
+ float3 vectorToPoint = cellOffset +
+ hash_float3_to_float3(cellPosition + cellOffset) * randomness -
+ localPosition;
+ float distanceToPoint = dot(vectorToPoint, vectorToPoint);
+ if (distanceToPoint < minDistance) {
+ minDistance = distanceToPoint;
+ vectorToClosest = vectorToPoint;
+ }
+ }
+ }
+ }
+
+ minDistance = 8.0f;
+ for (int k = -1; k <= 1; k++) {
+ for (int j = -1; j <= 1; j++) {
+ for (int i = -1; i <= 1; i++) {
+ float3 cellOffset = make_float3(i, j, k);
+ float3 vectorToPoint = cellOffset +
+ hash_float3_to_float3(cellPosition + cellOffset) * randomness -
+ localPosition;
+ float3 perpendicularToEdge = vectorToPoint - vectorToClosest;
+ if (dot(perpendicularToEdge, perpendicularToEdge) > 0.0001f) {
+ float distanceToEdge = dot((vectorToClosest + vectorToPoint) / 2.0f,
+ normalize(perpendicularToEdge));
+ minDistance = min(minDistance, distanceToEdge);
+ }
+ }
+ }
+ }
+ *outDistance = minDistance;
+}
+
+ccl_device void voronoi_n_sphere_radius_3d(float3 coord, float randomness, float *outRadius)
+{
+ float3 cellPosition = floor(coord);
+ float3 localPosition = coord - cellPosition;
+
+ float3 closestPoint = make_float3(0.0f, 0.0f, 0.0f);
+ float3 closestPointOffset = make_float3(0.0f, 0.0f, 0.0f);
+ float minDistance = 8.0f;
+ for (int k = -1; k <= 1; k++) {
+ for (int j = -1; j <= 1; j++) {
+ for (int i = -1; i <= 1; i++) {
+ float3 cellOffset = make_float3(i, j, k);
+ float3 pointPosition = cellOffset +
+ hash_float3_to_float3(cellPosition + cellOffset) * randomness;
+ float distanceToPoint = distance(pointPosition, localPosition);
+ if (distanceToPoint < minDistance) {
+ minDistance = distanceToPoint;
+ closestPoint = pointPosition;
+ closestPointOffset = cellOffset;
+ }
+ }
+ }
+ }
+
+ minDistance = 8.0f;
+ float3 closestPointToClosestPoint = make_float3(0.0f, 0.0f, 0.0f);
+ for (int k = -1; k <= 1; k++) {
+ for (int j = -1; j <= 1; j++) {
+ for (int i = -1; i <= 1; i++) {
+ if (i == 0 && j == 0 && k == 0) {
+ continue;
+ }
+ float3 cellOffset = make_float3(i, j, k) + closestPointOffset;
+ float3 pointPosition = cellOffset +
+ hash_float3_to_float3(cellPosition + cellOffset) * randomness;
+ float distanceToPoint = distance(closestPoint, pointPosition);
+ if (distanceToPoint < minDistance) {
+ minDistance = distanceToPoint;
+ closestPointToClosestPoint = pointPosition;
+ }
+ }
+ }
+ }
+ *outRadius = distance(closestPointToClosestPoint, closestPoint) / 2.0f;
+}
+
+/* **** 4D Voronoi **** */
+
+ccl_device float voronoi_distance_4d(float4 a,
+ float4 b,
+ NodeVoronoiDistanceMetric metric,
+ float exponent)
+{
+ if (metric == NODE_VORONOI_EUCLIDEAN) {
+ return distance(a, b);
+ }
+ else if (metric == NODE_VORONOI_MANHATTAN) {
+ return fabsf(a.x - b.x) + fabsf(a.y - b.y) + fabsf(a.z - b.z) + fabsf(a.w - b.w);
+ }
+ else if (metric == NODE_VORONOI_CHEBYCHEV) {
+ return max(fabsf(a.x - b.x), max(fabsf(a.y - b.y), max(fabsf(a.z - b.z), fabsf(a.w - b.w))));
+ }
+ else if (metric == NODE_VORONOI_MINKOWSKI) {
+ return powf(powf(fabsf(a.x - b.x), exponent) + powf(fabsf(a.y - b.y), exponent) +
+ powf(fabsf(a.z - b.z), exponent) + powf(fabsf(a.w - b.w), exponent),
+ 1.0f / exponent);
+ }
+ else {
+ return 0.0f;
+ }
+}
+
+ccl_device void voronoi_f1_4d(float4 coord,
+ float exponent,
+ float randomness,
+ NodeVoronoiDistanceMetric metric,
+ float *outDistance,
+ float3 *outColor,
+ float4 *outPosition)
+{
+ float4 cellPosition = floor(coord);
+ float4 localPosition = coord - cellPosition;
+
+ float minDistance = 8.0f;
+ float4 targetOffset = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ float4 targetPosition = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ for (int u = -1; u <= 1; u++) {
+ for (int k = -1; k <= 1; k++) {
+ ccl_loop_no_unroll for (int j = -1; j <= 1; j++)
+ {
+ for (int i = -1; i <= 1; i++) {
+ float4 cellOffset = make_float4(i, j, k, u);
+ float4 pointPosition = cellOffset +
+ hash_float4_to_float4(cellPosition + cellOffset) * randomness;
+ float distanceToPoint = voronoi_distance_4d(
+ pointPosition, localPosition, metric, exponent);
+ if (distanceToPoint < minDistance) {
+ targetOffset = cellOffset;
+ minDistance = distanceToPoint;
+ targetPosition = pointPosition;
+ }
+ }
+ }
+ }
+ }
+ *outDistance = minDistance;
+ *outColor = hash_float4_to_float3(cellPosition + targetOffset);
+ *outPosition = targetPosition + cellPosition;
+}
+
+ccl_device void voronoi_smooth_f1_4d(float4 coord,
+ float smoothness,
+ float exponent,
+ float randomness,
+ NodeVoronoiDistanceMetric metric,
+ float *outDistance,
+ float3 *outColor,
+ float4 *outPosition)
+{
+ float4 cellPosition = floor(coord);
+ float4 localPosition = coord - cellPosition;
+
+ float smoothDistance = 8.0f;
+ float3 smoothColor = make_float3(0.0f, 0.0f, 0.0f);
+ float4 smoothPosition = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ for (int u = -2; u <= 2; u++) {
+ for (int k = -2; k <= 2; k++) {
+ ccl_loop_no_unroll for (int j = -2; j <= 2; j++)
+ {
+ for (int i = -2; i <= 2; i++) {
+ float4 cellOffset = make_float4(i, j, k, u);
+ float4 pointPosition = cellOffset +
+ hash_float4_to_float4(cellPosition + cellOffset) * randomness;
+ float distanceToPoint = voronoi_distance_4d(
+ pointPosition, localPosition, metric, exponent);
+ float h = smoothstep(
+ 0.0f, 1.0f, 0.5f + 0.5f * (smoothDistance - distanceToPoint) / smoothness);
+ float correctionFactor = smoothness * h * (1.0f - h);
+ smoothDistance = mix(smoothDistance, distanceToPoint, h) - correctionFactor;
+ correctionFactor /= 1.0f + 3.0f * smoothness;
+ float3 cellColor = hash_float4_to_float3(cellPosition + cellOffset);
+ smoothColor = mix(smoothColor, cellColor, h) - correctionFactor;
+ smoothPosition = mix(smoothPosition, pointPosition, h) - correctionFactor;
+ }
+ }
+ }
+ }
+ *outDistance = smoothDistance;
+ *outColor = smoothColor;
+ *outPosition = cellPosition + smoothPosition;
+}
+
+ccl_device void voronoi_f2_4d(float4 coord,
+ float exponent,
+ float randomness,
+ NodeVoronoiDistanceMetric metric,
+ float *outDistance,
+ float3 *outColor,
+ float4 *outPosition)
+{
+ float4 cellPosition = floor(coord);
+ float4 localPosition = coord - cellPosition;
+
+ float distanceF1 = 8.0f;
+ float distanceF2 = 8.0f;
+ float4 offsetF1 = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ float4 positionF1 = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ float4 offsetF2 = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ float4 positionF2 = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ for (int u = -1; u <= 1; u++) {
+ for (int k = -1; k <= 1; k++) {
+ ccl_loop_no_unroll for (int j = -1; j <= 1; j++)
+ {
+ for (int i = -1; i <= 1; i++) {
+ float4 cellOffset = make_float4(i, j, k, u);
+ float4 pointPosition = cellOffset +
+ hash_float4_to_float4(cellPosition + cellOffset) * randomness;
+ float distanceToPoint = voronoi_distance_4d(
+ pointPosition, localPosition, metric, exponent);
+ if (distanceToPoint < distanceF1) {
+ distanceF2 = distanceF1;
+ distanceF1 = distanceToPoint;
+ offsetF2 = offsetF1;
+ offsetF1 = cellOffset;
+ positionF2 = positionF1;
+ positionF1 = pointPosition;
+ }
+ else if (distanceToPoint < distanceF2) {
+ distanceF2 = distanceToPoint;
+ offsetF2 = cellOffset;
+ positionF2 = pointPosition;
}
}
+ }
+ }
+ }
+ *outDistance = distanceF2;
+ *outColor = hash_float4_to_float3(cellPosition + offsetF2);
+ *outPosition = positionF2 + cellPosition;
+}
+
+ccl_device void voronoi_distance_to_edge_4d(float4 coord, float randomness, float *outDistance)
+{
+ float4 cellPosition = floor(coord);
+ float4 localPosition = coord - cellPosition;
- /* To keep the shortest four distances and associated points we have to keep them in sorted order. */
- if (d < da[0]) {
- da[3] = da[2];
- da[2] = da[1];
- da[1] = da[0];
- da[0] = d;
-
- pa[3] = pa[2];
- pa[2] = pa[1];
- pa[1] = pa[0];
- pa[0] = vp;
+ float4 vectorToClosest = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ float minDistance = 8.0f;
+ for (int u = -1; u <= 1; u++) {
+ for (int k = -1; k <= 1; k++) {
+ ccl_loop_no_unroll for (int j = -1; j <= 1; j++)
+ {
+ for (int i = -1; i <= 1; i++) {
+ float4 cellOffset = make_float4(i, j, k, u);
+ float4 vectorToPoint = cellOffset +
+ hash_float4_to_float4(cellPosition + cellOffset) * randomness -
+ localPosition;
+ float distanceToPoint = dot(vectorToPoint, vectorToPoint);
+ if (distanceToPoint < minDistance) {
+ minDistance = distanceToPoint;
+ vectorToClosest = vectorToPoint;
+ }
}
- else if (d < da[1]) {
- da[3] = da[2];
- da[2] = da[1];
- da[1] = d;
-
- pa[3] = pa[2];
- pa[2] = pa[1];
- pa[1] = vp;
+ }
+ }
+ }
+
+ minDistance = 8.0f;
+ for (int u = -1; u <= 1; u++) {
+ for (int k = -1; k <= 1; k++) {
+ ccl_loop_no_unroll for (int j = -1; j <= 1; j++)
+ {
+ for (int i = -1; i <= 1; i++) {
+ float4 cellOffset = make_float4(i, j, k, u);
+ float4 vectorToPoint = cellOffset +
+ hash_float4_to_float4(cellPosition + cellOffset) * randomness -
+ localPosition;
+ float4 perpendicularToEdge = vectorToPoint - vectorToClosest;
+ if (dot(perpendicularToEdge, perpendicularToEdge) > 0.0001f) {
+ float distanceToEdge = dot((vectorToClosest + vectorToPoint) / 2.0f,
+ normalize(perpendicularToEdge));
+ minDistance = min(minDistance, distanceToEdge);
+ }
}
- else if (d < da[2]) {
- da[3] = da[2];
- da[2] = d;
+ }
+ }
+ }
+ *outDistance = minDistance;
+}
- pa[3] = pa[2];
- pa[2] = vp;
+ccl_device void voronoi_n_sphere_radius_4d(float4 coord, float randomness, float *outRadius)
+{
+ float4 cellPosition = floor(coord);
+ float4 localPosition = coord - cellPosition;
+
+ float4 closestPoint = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ float4 closestPointOffset = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ float minDistance = 8.0f;
+ for (int u = -1; u <= 1; u++) {
+ for (int k = -1; k <= 1; k++) {
+ ccl_loop_no_unroll for (int j = -1; j <= 1; j++)
+ {
+ for (int i = -1; i <= 1; i++) {
+ float4 cellOffset = make_float4(i, j, k, u);
+ float4 pointPosition = cellOffset +
+ hash_float4_to_float4(cellPosition + cellOffset) * randomness;
+ float distanceToPoint = distance(pointPosition, localPosition);
+ if (distanceToPoint < minDistance) {
+ minDistance = distanceToPoint;
+ closestPoint = pointPosition;
+ closestPointOffset = cellOffset;
+ }
}
- else if (d < da[3]) {
- da[3] = d;
- pa[3] = vp;
+ }
+ }
+ }
+
+ minDistance = 8.0f;
+ float4 closestPointToClosestPoint = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ for (int u = -1; u <= 1; u++) {
+ for (int k = -1; k <= 1; k++) {
+ ccl_loop_no_unroll for (int j = -1; j <= 1; j++)
+ {
+ for (int i = -1; i <= 1; i++) {
+ if (i == 0 && j == 0 && k == 0 && u == 0) {
+ continue;
+ }
+ float4 cellOffset = make_float4(i, j, k, u) + closestPointOffset;
+ float4 pointPosition = cellOffset +
+ hash_float4_to_float4(cellPosition + cellOffset) * randomness;
+ float distanceToPoint = distance(closestPoint, pointPosition);
+ if (distanceToPoint < minDistance) {
+ minDistance = distanceToPoint;
+ closestPointToClosestPoint = pointPosition;
+ }
}
}
}
}
+ *outRadius = distance(closestPointToClosestPoint, closestPoint) / 2.0f;
}
-ccl_device void svm_node_tex_voronoi(
- KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
+ccl_device void svm_node_tex_voronoi(KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint dimensions,
+ uint feature,
+ uint metric,
+ int *offset)
{
- uint4 node2 = read_node(kg, offset);
+ uint4 stack_offsets = read_node(kg, offset);
+ uint4 defaults = read_node(kg, offset);
+
+ uint coord_stack_offset, w_stack_offset, scale_stack_offset, smoothness_stack_offset;
+ uint exponent_stack_offset, randomness_stack_offset, distance_out_stack_offset,
+ color_out_stack_offset;
+ uint position_out_stack_offset, w_out_stack_offset, radius_out_stack_offset;
+
+ svm_unpack_node_uchar4(stack_offsets.x,
+ &coord_stack_offset,
+ &w_stack_offset,
+ &scale_stack_offset,
+ &smoothness_stack_offset);
+ svm_unpack_node_uchar4(stack_offsets.y,
+ &exponent_stack_offset,
+ &randomness_stack_offset,
+ &distance_out_stack_offset,
+ &color_out_stack_offset);
+ svm_unpack_node_uchar3(
+ stack_offsets.z, &position_out_stack_offset, &w_out_stack_offset, &radius_out_stack_offset);
- uint co_offset, coloring, distance, feature;
- uint scale_offset, e_offset, fac_offset, color_offset;
+ float3 coord = stack_load_float3(stack, coord_stack_offset);
+ float w = stack_load_float_default(stack, w_stack_offset, stack_offsets.w);
+ float scale = stack_load_float_default(stack, scale_stack_offset, defaults.x);
+ float smoothness = stack_load_float_default(stack, smoothness_stack_offset, defaults.y);
+ float exponent = stack_load_float_default(stack, exponent_stack_offset, defaults.z);
+ float randomness = stack_load_float_default(stack, randomness_stack_offset, defaults.w);
- decode_node_uchar4(node.y, &co_offset, &coloring, &distance, &feature);
- decode_node_uchar4(node.z, &scale_offset, &e_offset, &fac_offset, &color_offset);
+ NodeVoronoiFeature voronoi_feature = (NodeVoronoiFeature)feature;
+ NodeVoronoiDistanceMetric voronoi_metric = (NodeVoronoiDistanceMetric)metric;
- float3 co = stack_load_float3(stack, co_offset);
- float scale = stack_load_float_default(stack, scale_offset, node2.x);
- float exponent = stack_load_float_default(stack, e_offset, node2.y);
+ float distance_out = 0.0f, w_out = 0.0f, radius_out = 0.0f;
+ float3 color_out = make_float3(0.0f, 0.0f, 0.0f);
+ float3 position_out = make_float3(0.0f, 0.0f, 0.0f);
- float dist[4];
- float3 neighbor[4];
- voronoi_neighbors(co * scale, (NodeVoronoiDistanceMetric)distance, exponent, dist, neighbor);
+ randomness = clamp(randomness, 0.0f, 1.0f);
+ smoothness = clamp(smoothness / 2.0f, 0.0f, 0.5f);
- float3 color;
- float fac;
- if (coloring == NODE_VORONOI_INTENSITY) {
- switch (feature) {
- case NODE_VORONOI_F1:
- fac = dist[0];
- break;
- case NODE_VORONOI_F2:
- fac = dist[1];
- break;
- case NODE_VORONOI_F3:
- fac = dist[2];
- break;
- case NODE_VORONOI_F4:
- fac = dist[3];
- break;
- case NODE_VORONOI_F2F1:
- fac = dist[1] - dist[0];
- break;
+ w *= scale;
+ coord *= scale;
+
+ switch (dimensions) {
+ case 1: {
+ switch (voronoi_feature) {
+ case NODE_VORONOI_F1:
+ voronoi_f1_1d(
+ w, exponent, randomness, voronoi_metric, &distance_out, &color_out, &w_out);
+ break;
+ case NODE_VORONOI_SMOOTH_F1:
+ voronoi_smooth_f1_1d(w,
+ smoothness,
+ exponent,
+ randomness,
+ voronoi_metric,
+ &distance_out,
+ &color_out,
+ &w_out);
+ break;
+ case NODE_VORONOI_F2:
+ voronoi_f2_1d(
+ w, exponent, randomness, voronoi_metric, &distance_out, &color_out, &w_out);
+ break;
+ case NODE_VORONOI_DISTANCE_TO_EDGE:
+ voronoi_distance_to_edge_1d(w, randomness, &distance_out);
+ break;
+ case NODE_VORONOI_N_SPHERE_RADIUS:
+ voronoi_n_sphere_radius_1d(w, randomness, &radius_out);
+ break;
+ default:
+ kernel_assert(0);
+ }
+ w_out = safe_divide(w_out, scale);
+ break;
+ }
+ case 2: {
+ float2 coord_2d = make_float2(coord.x, coord.y);
+ float2 position_out_2d;
+ switch (voronoi_feature) {
+ case NODE_VORONOI_F1:
+ voronoi_f1_2d(coord_2d,
+ exponent,
+ randomness,
+ voronoi_metric,
+ &distance_out,
+ &color_out,
+ &position_out_2d);
+ break;
+#if NODES_FEATURE(NODE_FEATURE_VORONOI_EXTRA)
+ case NODE_VORONOI_SMOOTH_F1:
+ voronoi_smooth_f1_2d(coord_2d,
+ smoothness,
+ exponent,
+ randomness,
+ voronoi_metric,
+ &distance_out,
+ &color_out,
+ &position_out_2d);
+ break;
+#endif
+ case NODE_VORONOI_F2:
+ voronoi_f2_2d(coord_2d,
+ exponent,
+ randomness,
+ voronoi_metric,
+ &distance_out,
+ &color_out,
+ &position_out_2d);
+ break;
+ case NODE_VORONOI_DISTANCE_TO_EDGE:
+ voronoi_distance_to_edge_2d(coord_2d, randomness, &distance_out);
+ break;
+ case NODE_VORONOI_N_SPHERE_RADIUS:
+ voronoi_n_sphere_radius_2d(coord_2d, randomness, &radius_out);
+ break;
+ default:
+ kernel_assert(0);
+ }
+ position_out_2d = safe_divide_float2_float(position_out_2d, scale);
+ position_out = make_float3(position_out_2d.x, position_out_2d.y, 0.0f);
+ break;
+ }
+ case 3: {
+ switch (voronoi_feature) {
+ case NODE_VORONOI_F1:
+ voronoi_f1_3d(coord,
+ exponent,
+ randomness,
+ voronoi_metric,
+ &distance_out,
+ &color_out,
+ &position_out);
+ break;
+#if NODES_FEATURE(NODE_FEATURE_VORONOI_EXTRA)
+ case NODE_VORONOI_SMOOTH_F1:
+ voronoi_smooth_f1_3d(coord,
+ smoothness,
+ exponent,
+ randomness,
+ voronoi_metric,
+ &distance_out,
+ &color_out,
+ &position_out);
+ break;
+#endif
+ case NODE_VORONOI_F2:
+ voronoi_f2_3d(coord,
+ exponent,
+ randomness,
+ voronoi_metric,
+ &distance_out,
+ &color_out,
+ &position_out);
+ break;
+ case NODE_VORONOI_DISTANCE_TO_EDGE:
+ voronoi_distance_to_edge_3d(coord, randomness, &distance_out);
+ break;
+ case NODE_VORONOI_N_SPHERE_RADIUS:
+ voronoi_n_sphere_radius_3d(coord, randomness, &radius_out);
+ break;
+ default:
+ kernel_assert(0);
+ }
+ position_out = safe_divide_float3_float(position_out, scale);
+ break;
}
- color = make_float3(fac, fac, fac);
+#if NODES_FEATURE(NODE_FEATURE_VORONOI_EXTRA)
+ case 4: {
+ float4 coord_4d = make_float4(coord.x, coord.y, coord.z, w);
+ float4 position_out_4d;
+ switch (voronoi_feature) {
+ case NODE_VORONOI_F1:
+ voronoi_f1_4d(coord_4d,
+ exponent,
+ randomness,
+ voronoi_metric,
+ &distance_out,
+ &color_out,
+ &position_out_4d);
+ break;
+ case NODE_VORONOI_SMOOTH_F1:
+ voronoi_smooth_f1_4d(coord_4d,
+ smoothness,
+ exponent,
+ randomness,
+ voronoi_metric,
+ &distance_out,
+ &color_out,
+ &position_out_4d);
+ break;
+ case NODE_VORONOI_F2:
+ voronoi_f2_4d(coord_4d,
+ exponent,
+ randomness,
+ voronoi_metric,
+ &distance_out,
+ &color_out,
+ &position_out_4d);
+ break;
+ case NODE_VORONOI_DISTANCE_TO_EDGE:
+ voronoi_distance_to_edge_4d(coord_4d, randomness, &distance_out);
+ break;
+ case NODE_VORONOI_N_SPHERE_RADIUS:
+ voronoi_n_sphere_radius_4d(coord_4d, randomness, &radius_out);
+ break;
+ default:
+ kernel_assert(0);
+ }
+ position_out_4d = safe_divide_float4_float(position_out_4d, scale);
+ position_out = make_float3(position_out_4d.x, position_out_4d.y, position_out_4d.z);
+ w_out = position_out_4d.w;
+ break;
+ }
+#endif
+ default:
+ kernel_assert(0);
}
- else {
- /* NODE_VORONOI_CELLS */
- switch (feature) {
- case NODE_VORONOI_F1:
- color = neighbor[0];
- break;
- case NODE_VORONOI_F2:
- color = neighbor[1];
- break;
- case NODE_VORONOI_F3:
- color = neighbor[2];
- break;
- case NODE_VORONOI_F4:
- color = neighbor[3];
- break;
- /* Usefulness of this vector is questionable. Note F2 >= F1 but the
- * individual vector components might not be. */
- case NODE_VORONOI_F2F1:
- color = fabs(neighbor[1] - neighbor[0]);
- break;
- }
-
- color = cellnoise3(color);
- fac = average(color);
- }
-
- if (stack_valid(fac_offset))
- stack_store_float(stack, fac_offset, fac);
- if (stack_valid(color_offset))
- stack_store_float3(stack, color_offset, color);
+
+ if (stack_valid(distance_out_stack_offset))
+ stack_store_float(stack, distance_out_stack_offset, distance_out);
+ if (stack_valid(color_out_stack_offset))
+ stack_store_float3(stack, color_out_stack_offset, color_out);
+ if (stack_valid(position_out_stack_offset))
+ stack_store_float3(stack, position_out_stack_offset, position_out);
+ if (stack_valid(w_out_stack_offset))
+ stack_store_float(stack, w_out_stack_offset, w_out);
+ if (stack_valid(radius_out_stack_offset))
+ stack_store_float(stack, radius_out_stack_offset, radius_out);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_voxel.h b/intern/cycles/kernel/svm/svm_voxel.h
index 26d8cc71d3b..4bc14f82382 100644
--- a/intern/cycles/kernel/svm/svm_voxel.h
+++ b/intern/cycles/kernel/svm/svm_voxel.h
@@ -23,7 +23,7 @@ ccl_device void svm_node_tex_voxel(
KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
{
uint co_offset, density_out_offset, color_out_offset, space;
- decode_node_uchar4(node.z, &co_offset, &density_out_offset, &color_out_offset, &space);
+ svm_unpack_node_uchar4(node.z, &co_offset, &density_out_offset, &color_out_offset, &space);
#ifdef __VOLUME__
int id = node.y;
float3 co = stack_load_float3(stack, co_offset);
@@ -39,7 +39,7 @@ ccl_device void svm_node_tex_voxel(
co = transform_point(&tfm, co);
}
- float4 r = kernel_tex_image_interp_3d(kg, id, co.x, co.y, co.z, INTERPOLATION_NONE);
+ float4 r = kernel_tex_image_interp_3d(kg, id, co, INTERPOLATION_NONE);
#else
float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
#endif
diff --git a/intern/cycles/kernel/svm/svm_wave.h b/intern/cycles/kernel/svm/svm_wave.h
index 003ad7dc63a..c4763475b47 100644
--- a/intern/cycles/kernel/svm/svm_wave.h
+++ b/intern/cycles/kernel/svm/svm_wave.h
@@ -18,30 +18,67 @@ CCL_NAMESPACE_BEGIN
/* Wave */
-ccl_device_noinline float svm_wave(NodeWaveType type,
- NodeWaveProfile profile,
- float3 p,
- float detail,
- float distortion,
- float dscale)
+ccl_device_noinline_cpu float svm_wave(NodeWaveType type,
+ NodeWaveBandsDirection bands_dir,
+ NodeWaveRingsDirection rings_dir,
+ NodeWaveProfile profile,
+ float3 p,
+ float distortion,
+ float detail,
+ float dscale,
+ float droughness,
+ float phase)
{
+ /* Prevent precision issues on unit coordinates. */
+ p = (p + 0.000001f) * 0.999999f;
+
float n;
- if (type == NODE_WAVE_BANDS)
- n = (p.x + p.y + p.z) * 10.0f;
- else /* NODE_WAVE_RINGS */
- n = len(p) * 20.0f;
+ if (type == NODE_WAVE_BANDS) {
+ if (bands_dir == NODE_WAVE_BANDS_DIRECTION_X) {
+ n = p.x * 20.0f;
+ }
+ else if (bands_dir == NODE_WAVE_BANDS_DIRECTION_Y) {
+ n = p.y * 20.0f;
+ }
+ else if (bands_dir == NODE_WAVE_BANDS_DIRECTION_Z) {
+ n = p.z * 20.0f;
+ }
+ else { /* NODE_WAVE_BANDS_DIRECTION_DIAGONAL */
+ n = (p.x + p.y + p.z) * 10.0f;
+ }
+ }
+ else { /* NODE_WAVE_RINGS */
+ float3 rp = p;
+ if (rings_dir == NODE_WAVE_RINGS_DIRECTION_X) {
+ rp *= make_float3(0.0f, 1.0f, 1.0f);
+ }
+ else if (rings_dir == NODE_WAVE_RINGS_DIRECTION_Y) {
+ rp *= make_float3(1.0f, 0.0f, 1.0f);
+ }
+ else if (rings_dir == NODE_WAVE_RINGS_DIRECTION_Z) {
+ rp *= make_float3(1.0f, 1.0f, 0.0f);
+ }
+ /* else: NODE_WAVE_RINGS_DIRECTION_SPHERICAL */
+
+ n = len(rp) * 20.0f;
+ }
+
+ n += phase;
if (distortion != 0.0f)
- n += distortion * noise_turbulence(p * dscale, detail, 0);
+ n += distortion * (fractal_noise_3d(p * dscale, detail, droughness) * 2.0f - 1.0f);
if (profile == NODE_WAVE_PROFILE_SIN) {
- return 0.5f + 0.5f * sinf(n);
+ return 0.5f + 0.5f * sinf(n - M_PI_2_F);
+ }
+ else if (profile == NODE_WAVE_PROFILE_SAW) {
+ n /= M_2PI_F;
+ return n - floorf(n);
}
- else { /* NODE_WAVE_PROFILE_SAW */
+ else { /* NODE_WAVE_PROFILE_TRI */
n /= M_2PI_F;
- n -= (int)n;
- return (n < 0.0f) ? n + 1.0f : n;
+ return fabsf(n - floorf(n + 0.5f)) * 2.0f;
}
}
@@ -49,22 +86,40 @@ ccl_device void svm_node_tex_wave(
KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
{
uint4 node2 = read_node(kg, offset);
+ uint4 node3 = read_node(kg, offset);
- uint type;
- uint co_offset, scale_offset, detail_offset, dscale_offset, distortion_offset, color_offset,
- fac_offset;
+ /* RNA properties */
+ uint type_offset, bands_dir_offset, rings_dir_offset, profile_offset;
+ /* Inputs, Outputs */
+ uint co_offset, scale_offset, distortion_offset, detail_offset, dscale_offset, droughness_offset,
+ phase_offset;
+ uint color_offset, fac_offset;
- decode_node_uchar4(node.y, &type, &color_offset, &fac_offset, &dscale_offset);
- decode_node_uchar4(node.z, &co_offset, &scale_offset, &detail_offset, &distortion_offset);
+ svm_unpack_node_uchar4(
+ node.y, &type_offset, &bands_dir_offset, &rings_dir_offset, &profile_offset);
+ svm_unpack_node_uchar3(node.z, &co_offset, &scale_offset, &distortion_offset);
+ svm_unpack_node_uchar4(
+ node.w, &detail_offset, &dscale_offset, &droughness_offset, &phase_offset);
+ svm_unpack_node_uchar2(node2.x, &color_offset, &fac_offset);
float3 co = stack_load_float3(stack, co_offset);
- float scale = stack_load_float_default(stack, scale_offset, node2.x);
- float detail = stack_load_float_default(stack, detail_offset, node2.y);
+ float scale = stack_load_float_default(stack, scale_offset, node2.y);
float distortion = stack_load_float_default(stack, distortion_offset, node2.z);
- float dscale = stack_load_float_default(stack, dscale_offset, node2.w);
+ float detail = stack_load_float_default(stack, detail_offset, node2.w);
+ float dscale = stack_load_float_default(stack, dscale_offset, node3.x);
+ float droughness = stack_load_float_default(stack, droughness_offset, node3.y);
+ float phase = stack_load_float_default(stack, phase_offset, node3.z);
- float f = svm_wave(
- (NodeWaveType)type, (NodeWaveProfile)node.w, co * scale, detail, distortion, dscale);
+ float f = svm_wave((NodeWaveType)type_offset,
+ (NodeWaveBandsDirection)bands_dir_offset,
+ (NodeWaveRingsDirection)rings_dir_offset,
+ (NodeWaveProfile)profile_offset,
+ co * scale,
+ distortion,
+ detail,
+ dscale,
+ droughness,
+ phase);
if (stack_valid(fac_offset))
stack_store_float(stack, fac_offset, f);
diff --git a/intern/cycles/kernel/svm/svm_white_noise.h b/intern/cycles/kernel/svm/svm_white_noise.h
new file mode 100644
index 00000000000..b30d85acaec
--- /dev/null
+++ b/intern/cycles/kernel/svm/svm_white_noise.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device void svm_node_tex_white_noise(KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint dimensions,
+ uint inputs_stack_offsets,
+ uint ouptuts_stack_offsets,
+ int *offset)
+{
+ uint vector_stack_offset, w_stack_offset, value_stack_offset, color_stack_offset;
+ svm_unpack_node_uchar2(inputs_stack_offsets, &vector_stack_offset, &w_stack_offset);
+ svm_unpack_node_uchar2(ouptuts_stack_offsets, &value_stack_offset, &color_stack_offset);
+
+ float3 vector = stack_load_float3(stack, vector_stack_offset);
+ float w = stack_load_float(stack, w_stack_offset);
+
+ if (stack_valid(color_stack_offset)) {
+ float3 color;
+ switch (dimensions) {
+ case 1:
+ color = hash_float_to_float3(w);
+ break;
+ case 2:
+ color = hash_float2_to_float3(make_float2(vector.x, vector.y));
+ break;
+ case 3:
+ color = hash_float3_to_float3(vector);
+ break;
+ case 4:
+ color = hash_float4_to_float3(make_float4(vector.x, vector.y, vector.z, w));
+ break;
+ default:
+ color = make_float3(1.0f, 0.0f, 1.0f);
+ kernel_assert(0);
+ break;
+ }
+ stack_store_float3(stack, color_stack_offset, color);
+ }
+
+ if (stack_valid(value_stack_offset)) {
+ float value;
+ switch (dimensions) {
+ case 1:
+ value = hash_float_to_float(w);
+ break;
+ case 2:
+ value = hash_float2_to_float(make_float2(vector.x, vector.y));
+ break;
+ case 3:
+ value = hash_float3_to_float(vector);
+ break;
+ case 4:
+ value = hash_float4_to_float(make_float4(vector.x, vector.y, vector.z, w));
+ break;
+ default:
+ value = 0.0f;
+ kernel_assert(0);
+ break;
+ }
+ stack_store_float(stack, value_stack_offset, value);
+ }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_wireframe.h b/intern/cycles/kernel/svm/svm_wireframe.h
index 55e61d0e8c7..49158bd86d5 100644
--- a/intern/cycles/kernel/svm/svm_wireframe.h
+++ b/intern/cycles/kernel/svm/svm_wireframe.h
@@ -93,7 +93,7 @@ ccl_device void svm_node_wireframe(KernelGlobals *kg, ShaderData *sd, float *sta
uint in_size = node.y;
uint out_fac = node.z;
uint use_pixel_size, bump_offset;
- decode_node_uchar4(node.w, &use_pixel_size, &bump_offset, NULL, NULL);
+ svm_unpack_node_uchar2(node.w, &use_pixel_size, &bump_offset);
/* Input Data */
float size = stack_load_float(stack, in_size);
diff --git a/intern/cycles/render/CMakeLists.txt b/intern/cycles/render/CMakeLists.txt
index 32bdb077ab7..e4b86d33fe7 100644
--- a/intern/cycles/render/CMakeLists.txt
+++ b/intern/cycles/render/CMakeLists.txt
@@ -2,6 +2,7 @@
set(INC
..
../../glew-mx
+ ../../sky/include
)
set(INC_SYS
@@ -14,13 +15,20 @@ set(SRC
bake.cpp
buffers.cpp
camera.cpp
+ colorspace.cpp
constant_fold.cpp
coverage.cpp
denoising.cpp
film.cpp
+ geometry.cpp
graph.cpp
+ hair.cpp
image.cpp
+ image_oiio.cpp
+ image_sky.cpp
+ image_vdb.cpp
integrator.cpp
+ jitter.cpp
light.cpp
light_tree.cpp
merge.cpp
@@ -49,14 +57,21 @@ set(SRC_HEADERS
background.h
buffers.h
camera.h
+ colorspace.h
constant_fold.h
coverage.h
denoising.h
film.h
+ geometry.h
graph.h
+ hair.h
image.h
+ image_oiio.h
+ image_sky.h
+ image_vdb.h
integrator.h
light.h
+ jitter.h
merge.h
mesh.h
nodes.h
@@ -76,15 +91,40 @@ set(SRC_HEADERS
set(LIB
cycles_bvh
+ cycles_device
+ cycles_subd
+ cycles_util
+ bf_intern_sky
)
if(WITH_CYCLES_OSL)
list(APPEND LIB
cycles_kernel_osl
)
+
+ SET_PROPERTY(SOURCE osl.cpp PROPERTY COMPILE_FLAGS ${RTTI_DISABLE_FLAGS})
+endif()
+
+if(WITH_OPENCOLORIO)
+ add_definitions(-DWITH_OCIO)
+ include_directories(
+ SYSTEM
+ ${OPENCOLORIO_INCLUDE_DIRS}
+ )
+ if(WIN32)
+ add_definitions(-DOpenColorIO_STATIC)
+ endif()
endif()
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${RTTI_DISABLE_FLAGS}")
+if(WITH_OPENVDB)
+ add_definitions(-DWITH_OPENVDB ${OPENVDB_DEFINITIONS})
+ list(APPEND INC_SYS
+ ${OPENVDB_INCLUDE_DIRS}
+ )
+ list(APPEND LIB
+ ${OPENVDB_LIBRARIES}
+ )
+endif()
include_directories(${INC})
include_directories(SYSTEM ${INC_SYS})
diff --git a/intern/cycles/render/attribute.cpp b/intern/cycles/render/attribute.cpp
index dad6cb4fe6d..4c26d5e8365 100644
--- a/intern/cycles/render/attribute.cpp
+++ b/intern/cycles/render/attribute.cpp
@@ -14,9 +14,10 @@
* limitations under the License.
*/
+#include "render/attribute.h"
+#include "render/hair.h"
#include "render/image.h"
#include "render/mesh.h"
-#include "render/attribute.h"
#include "util/util_foreach.h"
#include "util/util_transform.h"
@@ -25,45 +26,51 @@ CCL_NAMESPACE_BEGIN
/* Attribute */
-Attribute::~Attribute()
+Attribute::Attribute(
+ ustring name, TypeDesc type, AttributeElement element, Geometry *geom, AttributePrimitive prim)
+ : name(name), std(ATTR_STD_NONE), type(type), element(element), flags(0)
{
- /* for voxel data, we need to remove the image from the image manager */
- if (element == ATTR_ELEMENT_VOXEL) {
- VoxelAttribute *voxel_data = data_voxel();
+ /* string and matrix not supported! */
+ assert(type == TypeDesc::TypeFloat || type == TypeDesc::TypeColor ||
+ type == TypeDesc::TypePoint || type == TypeDesc::TypeVector ||
+ type == TypeDesc::TypeNormal || type == TypeDesc::TypeMatrix || type == TypeFloat2 ||
+ type == TypeRGBA);
- if (voxel_data && voxel_data->slot != -1) {
- voxel_data->manager->remove_image(voxel_data->slot);
- }
+ if (element == ATTR_ELEMENT_VOXEL) {
+ buffer.resize(sizeof(ImageHandle));
+ new (buffer.data()) ImageHandle();
+ }
+ else {
+ resize(geom, prim, false);
}
}
-void Attribute::set(ustring name_, TypeDesc type_, AttributeElement element_)
+Attribute::~Attribute()
{
- name = name_;
- type = type_;
- element = element_;
- std = ATTR_STD_NONE;
- flags = 0;
-
- /* string and matrix not supported! */
- assert(type == TypeDesc::TypeFloat || type == TypeDesc::TypeColor ||
- type == TypeDesc::TypePoint || type == TypeDesc::TypeVector ||
- type == TypeDesc::TypeNormal || type == TypeDesc::TypeMatrix || type == TypeFloat2);
+ /* For voxel data, we need to free the image handle. */
+ if (element == ATTR_ELEMENT_VOXEL && buffer.size()) {
+ ImageHandle &handle = data_voxel();
+ handle.~ImageHandle();
+ }
}
-void Attribute::resize(Mesh *mesh, AttributePrimitive prim, bool reserve_only)
+void Attribute::resize(Geometry *geom, AttributePrimitive prim, bool reserve_only)
{
- if (reserve_only) {
- buffer.reserve(buffer_size(mesh, prim));
- }
- else {
- buffer.resize(buffer_size(mesh, prim), 0);
+ if (element != ATTR_ELEMENT_VOXEL) {
+ if (reserve_only) {
+ buffer.reserve(buffer_size(geom, prim));
+ }
+ else {
+ buffer.resize(buffer_size(geom, prim), 0);
+ }
}
}
void Attribute::resize(size_t num_elements)
{
- buffer.resize(num_elements * data_sizeof(), 0);
+ if (element != ATTR_ELEMENT_VOXEL) {
+ buffer.resize(num_elements * data_sizeof(), 0);
+ }
}
void Attribute::add(const float &f)
@@ -121,17 +128,6 @@ void Attribute::add(const Transform &f)
buffer.push_back(data[i]);
}
-void Attribute::add(const VoxelAttribute &f)
-{
- assert(data_sizeof() == sizeof(VoxelAttribute));
-
- char *data = (char *)&f;
- size_t size = sizeof(f);
-
- for (size_t i = 0; i < size; i++)
- buffer.push_back(data[i]);
-}
-
void Attribute::add(const char *data)
{
size_t size = data_sizeof();
@@ -143,7 +139,7 @@ void Attribute::add(const char *data)
size_t Attribute::data_sizeof() const
{
if (element == ATTR_ELEMENT_VOXEL)
- return sizeof(VoxelAttribute);
+ return sizeof(ImageHandle);
else if (element == ATTR_ELEMENT_CORNER_BYTE)
return sizeof(uchar4);
else if (type == TypeDesc::TypeFloat)
@@ -156,13 +152,13 @@ size_t Attribute::data_sizeof() const
return sizeof(float3);
}
-size_t Attribute::element_size(Mesh *mesh, AttributePrimitive prim) const
+size_t Attribute::element_size(Geometry *geom, AttributePrimitive prim) const
{
if (flags & ATTR_FINAL_SIZE) {
return buffer.size() / data_sizeof();
}
- size_t size;
+ size_t size = 0;
switch (element) {
case ATTR_ELEMENT_OBJECT:
@@ -171,54 +167,74 @@ size_t Attribute::element_size(Mesh *mesh, AttributePrimitive prim) const
size = 1;
break;
case ATTR_ELEMENT_VERTEX:
- size = mesh->verts.size() + mesh->num_ngons;
- if (prim == ATTR_PRIM_SUBD) {
- size -= mesh->num_subd_verts;
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ size = mesh->verts.size() + mesh->num_ngons;
+ if (prim == ATTR_PRIM_SUBD) {
+ size -= mesh->num_subd_verts;
+ }
}
break;
case ATTR_ELEMENT_VERTEX_MOTION:
- size = (mesh->verts.size() + mesh->num_ngons) * (mesh->motion_steps - 1);
- if (prim == ATTR_PRIM_SUBD) {
- size -= mesh->num_subd_verts * (mesh->motion_steps - 1);
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ size = (mesh->verts.size() + mesh->num_ngons) * (mesh->motion_steps - 1);
+ if (prim == ATTR_PRIM_SUBD) {
+ size -= mesh->num_subd_verts * (mesh->motion_steps - 1);
+ }
}
break;
case ATTR_ELEMENT_FACE:
- if (prim == ATTR_PRIM_TRIANGLE) {
- size = mesh->num_triangles();
- }
- else {
- size = mesh->subd_faces.size() + mesh->num_ngons;
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ if (prim == ATTR_PRIM_GEOMETRY) {
+ size = mesh->num_triangles();
+ }
+ else {
+ size = mesh->subd_faces.size() + mesh->num_ngons;
+ }
}
break;
case ATTR_ELEMENT_CORNER:
case ATTR_ELEMENT_CORNER_BYTE:
- if (prim == ATTR_PRIM_TRIANGLE) {
- size = mesh->num_triangles() * 3;
- }
- else {
- size = mesh->subd_face_corners.size() + mesh->num_ngons;
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ if (prim == ATTR_PRIM_GEOMETRY) {
+ size = mesh->num_triangles() * 3;
+ }
+ else {
+ size = mesh->subd_face_corners.size() + mesh->num_ngons;
+ }
}
break;
case ATTR_ELEMENT_CURVE:
- size = mesh->num_curves();
+ if (geom->type == Geometry::HAIR) {
+ Hair *hair = static_cast<Hair *>(geom);
+ size = hair->num_curves();
+ }
break;
case ATTR_ELEMENT_CURVE_KEY:
- size = mesh->curve_keys.size();
+ if (geom->type == Geometry::HAIR) {
+ Hair *hair = static_cast<Hair *>(geom);
+ size = hair->curve_keys.size();
+ }
break;
case ATTR_ELEMENT_CURVE_KEY_MOTION:
- size = mesh->curve_keys.size() * (mesh->motion_steps - 1);
+ if (geom->type == Geometry::HAIR) {
+ Hair *hair = static_cast<Hair *>(geom);
+ size = hair->curve_keys.size() * (hair->motion_steps - 1);
+ }
break;
default:
- size = 0;
break;
}
return size;
}
-size_t Attribute::buffer_size(Mesh *mesh, AttributePrimitive prim) const
+size_t Attribute::buffer_size(Geometry *geom, AttributePrimitive prim) const
{
- return element_size(mesh, prim) * data_sizeof();
+ return element_size(geom, prim) * data_sizeof();
}
bool Attribute::same_storage(TypeDesc a, TypeDesc b)
@@ -251,6 +267,9 @@ void Attribute::add_with_weight(void *dst, void *src, float weight)
else if (same_storage(type, TypeDesc::TypeFloat)) {
*((float *)dst) += *((float *)src) * weight;
}
+ else if (same_storage(type, TypeFloat2)) {
+ *((float2 *)dst) += *((float2 *)src) * weight;
+ }
else if (same_storage(type, TypeDesc::TypeVector)) {
*((float4 *)dst) += *((float4 *)src) * weight;
}
@@ -276,6 +295,8 @@ const char *Attribute::standard_name(AttributeStandard std)
return "tangent";
case ATTR_STD_UV_TANGENT_SIGN:
return "tangent_sign";
+ case ATTR_STD_VERTEX_COLOR:
+ return "vertex_color";
case ATTR_STD_POSITION_UNDEFORMED:
return "undeformed";
case ATTR_STD_POSITION_UNDISPLACED:
@@ -308,6 +329,8 @@ const char *Attribute::standard_name(AttributeStandard std)
return "velocity";
case ATTR_STD_POINTINESS:
return "pointiness";
+ case ATTR_STD_RANDOM_PER_ISLAND:
+ return "random_per_island";
case ATTR_STD_NOT_FOUND:
case ATTR_STD_NONE:
case ATTR_STD_NUM:
@@ -330,13 +353,42 @@ AttributeStandard Attribute::name_standard(const char *name)
return ATTR_STD_NONE;
}
+void Attribute::get_uv_tiles(Geometry *geom,
+ AttributePrimitive prim,
+ unordered_set<int> &tiles) const
+{
+ if (type != TypeFloat2) {
+ return;
+ }
+
+ const int num = element_size(geom, prim);
+ const float2 *uv = data_float2();
+ for (int i = 0; i < num; i++, uv++) {
+ float u = uv->x, v = uv->y;
+ int x = (int)u, y = (int)v;
+
+ if (x < 0 || y < 0 || x >= 10) {
+ continue;
+ }
+
+ /* Be conservative in corners - precisely touching the right or upper edge of a tile
+ * should not load its right/upper neighbor as well. */
+ if (x > 0 && (u < x + 1e-6f)) {
+ x--;
+ }
+ if (y > 0 && (v < y + 1e-6f)) {
+ y--;
+ }
+
+ tiles.insert(1001 + 10 * y + x);
+ }
+}
+
/* Attribute Set */
-AttributeSet::AttributeSet()
+AttributeSet::AttributeSet(Geometry *geometry, AttributePrimitive prim)
+ : geometry(geometry), prim(prim)
{
- triangle_mesh = NULL;
- curve_mesh = NULL;
- subd_mesh = NULL;
}
AttributeSet::~AttributeSet()
@@ -356,28 +408,9 @@ Attribute *AttributeSet::add(ustring name, TypeDesc type, AttributeElement eleme
remove(name);
}
-#if __cplusplus >= 201103L
- attributes.emplace_back();
- attr = &attributes.back();
- attr->set(name, type, element);
-#else
- {
- Attribute attr_temp;
- attr_temp.set(name, type, element);
- attributes.push_back(attr_temp);
- attr = &attributes.back();
- }
-#endif
-
- /* this is weak .. */
- if (triangle_mesh)
- attr->resize(triangle_mesh, ATTR_PRIM_TRIANGLE, false);
- if (curve_mesh)
- attr->resize(curve_mesh, ATTR_PRIM_CURVE, false);
- if (subd_mesh)
- attr->resize(subd_mesh, ATTR_PRIM_SUBD, false);
-
- return attr;
+ Attribute new_attr(name, type, element, geometry, prim);
+ attributes.emplace_back(std::move(new_attr));
+ return &attributes.back();
}
Attribute *AttributeSet::find(ustring name) const
@@ -412,7 +445,7 @@ Attribute *AttributeSet::add(AttributeStandard std, ustring name)
if (name == ustring())
name = Attribute::standard_name(std);
- if (triangle_mesh || subd_mesh) {
+ if (geometry->type == Geometry::MESH) {
switch (std) {
case ATTR_STD_VERTEX_NORMAL:
attr = add(name, TypeDesc::TypeNormal, ATTR_ELEMENT_VERTEX);
@@ -429,6 +462,9 @@ Attribute *AttributeSet::add(AttributeStandard std, ustring name)
case ATTR_STD_UV_TANGENT_SIGN:
attr = add(name, TypeDesc::TypeFloat, ATTR_ELEMENT_CORNER);
break;
+ case ATTR_STD_VERTEX_COLOR:
+ attr = add(name, TypeRGBA, ATTR_ELEMENT_CORNER_BYTE);
+ break;
case ATTR_STD_GENERATED:
case ATTR_STD_POSITION_UNDEFORMED:
case ATTR_STD_POSITION_UNDISPLACED:
@@ -464,12 +500,15 @@ Attribute *AttributeSet::add(AttributeStandard std, ustring name)
case ATTR_STD_POINTINESS:
attr = add(name, TypeDesc::TypeFloat, ATTR_ELEMENT_VERTEX);
break;
+ case ATTR_STD_RANDOM_PER_ISLAND:
+ attr = add(name, TypeDesc::TypeFloat, ATTR_ELEMENT_FACE);
+ break;
default:
assert(0);
break;
}
}
- else if (curve_mesh) {
+ else if (geometry->type == Geometry::HAIR) {
switch (std) {
case ATTR_STD_UV:
attr = add(name, TypeFloat2, ATTR_ELEMENT_CURVE);
@@ -492,6 +531,9 @@ Attribute *AttributeSet::add(AttributeStandard std, ustring name)
case ATTR_STD_POINTINESS:
attr = add(name, TypeDesc::TypeFloat, ATTR_ELEMENT_VERTEX);
break;
+ case ATTR_STD_RANDOM_PER_ISLAND:
+ attr = add(name, TypeDesc::TypeFloat, ATTR_ELEMENT_FACE);
+ break;
default:
assert(0);
break;
@@ -549,12 +591,7 @@ void AttributeSet::remove(Attribute *attribute)
void AttributeSet::resize(bool reserve_only)
{
foreach (Attribute &attr, attributes) {
- if (triangle_mesh)
- attr.resize(triangle_mesh, ATTR_PRIM_TRIANGLE, reserve_only);
- if (curve_mesh)
- attr.resize(curve_mesh, ATTR_PRIM_CURVE, reserve_only);
- if (subd_mesh)
- attr.resize(subd_mesh, ATTR_PRIM_SUBD, reserve_only);
+ attr.resize(geometry, prim, reserve_only);
}
}
@@ -584,15 +621,10 @@ AttributeRequest::AttributeRequest(ustring name_)
name = name_;
std = ATTR_STD_NONE;
- triangle_type = TypeDesc::TypeFloat;
- triangle_desc.element = ATTR_ELEMENT_NONE;
- triangle_desc.offset = 0;
- triangle_desc.type = NODE_ATTR_FLOAT;
-
- curve_type = TypeDesc::TypeFloat;
- curve_desc.element = ATTR_ELEMENT_NONE;
- curve_desc.offset = 0;
- curve_desc.type = NODE_ATTR_FLOAT;
+ type = TypeDesc::TypeFloat;
+ desc.element = ATTR_ELEMENT_NONE;
+ desc.offset = 0;
+ desc.type = NODE_ATTR_FLOAT;
subd_type = TypeDesc::TypeFloat;
subd_desc.element = ATTR_ELEMENT_NONE;
@@ -605,15 +637,10 @@ AttributeRequest::AttributeRequest(AttributeStandard std_)
name = ustring();
std = std_;
- triangle_type = TypeDesc::TypeFloat;
- triangle_desc.element = ATTR_ELEMENT_NONE;
- triangle_desc.offset = 0;
- triangle_desc.type = NODE_ATTR_FLOAT;
-
- curve_type = TypeDesc::TypeFloat;
- curve_desc.element = ATTR_ELEMENT_NONE;
- curve_desc.offset = 0;
- curve_desc.type = NODE_ATTR_FLOAT;
+ type = TypeDesc::TypeFloat;
+ desc.element = ATTR_ELEMENT_NONE;
+ desc.offset = 0;
+ desc.type = NODE_ATTR_FLOAT;
subd_type = TypeDesc::TypeFloat;
subd_desc.element = ATTR_ELEMENT_NONE;
diff --git a/intern/cycles/render/attribute.h b/intern/cycles/render/attribute.h
index ebab0fe7f88..5871fa04a31 100644
--- a/intern/cycles/render/attribute.h
+++ b/intern/cycles/render/attribute.h
@@ -17,10 +17,13 @@
#ifndef __ATTRIBUTE_H__
#define __ATTRIBUTE_H__
+#include "render/image.h"
+
#include "kernel/kernel_types.h"
#include "util/util_list.h"
#include "util/util_param.h"
+#include "util/util_set.h"
#include "util/util_types.h"
#include "util/util_vector.h"
@@ -30,17 +33,12 @@ class Attribute;
class AttributeRequest;
class AttributeRequestSet;
class AttributeSet;
-class ImageManager;
+class ImageHandle;
+class Geometry;
+class Hair;
class Mesh;
struct Transform;
-/* Attributes for voxels are images */
-
-struct VoxelAttribute {
- ImageManager *manager;
- int slot;
-};
-
/* Attribute
*
* Arbitrary data layers on meshes.
@@ -56,17 +54,23 @@ class Attribute {
AttributeElement element;
uint flags; /* enum AttributeFlag */
- Attribute()
- {
- }
+ Attribute(ustring name,
+ TypeDesc type,
+ AttributeElement element,
+ Geometry *geom,
+ AttributePrimitive prim);
+ Attribute(Attribute &&other) = default;
+ Attribute(const Attribute &other) = delete;
+ Attribute &operator=(const Attribute &other) = delete;
~Attribute();
+
void set(ustring name, TypeDesc type, AttributeElement element);
- void resize(Mesh *mesh, AttributePrimitive prim, bool reserve_only);
+ void resize(Geometry *geom, AttributePrimitive prim, bool reserve_only);
void resize(size_t num_elements);
size_t data_sizeof() const;
- size_t element_size(Mesh *mesh, AttributePrimitive prim) const;
- size_t buffer_size(Mesh *mesh, AttributePrimitive prim) const;
+ size_t element_size(Geometry *geom, AttributePrimitive prim) const;
+ size_t buffer_size(Geometry *geom, AttributePrimitive prim) const;
char *data()
{
@@ -102,10 +106,12 @@ class Attribute {
assert(data_sizeof() == sizeof(Transform));
return (Transform *)data();
}
- VoxelAttribute *data_voxel()
+
+ /* Attributes for voxels are images */
+ ImageHandle &data_voxel()
{
- assert(data_sizeof() == sizeof(VoxelAttribute));
- return (VoxelAttribute *)data();
+ assert(data_sizeof() == sizeof(ImageHandle));
+ return *(ImageHandle *)data();
}
const char *data() const
@@ -137,10 +143,10 @@ class Attribute {
assert(data_sizeof() == sizeof(Transform));
return (const Transform *)data();
}
- const VoxelAttribute *data_voxel() const
+ const ImageHandle &data_voxel() const
{
- assert(data_sizeof() == sizeof(VoxelAttribute));
- return (const VoxelAttribute *)data();
+ assert(data_sizeof() == sizeof(ImageHandle));
+ return *(const ImageHandle *)data();
}
void zero_data(void *dst);
@@ -150,13 +156,14 @@ class Attribute {
void add(const float2 &f);
void add(const float3 &f);
void add(const uchar4 &f);
- void add(const Transform &f);
- void add(const VoxelAttribute &f);
+ void add(const Transform &tfm);
void add(const char *data);
static bool same_storage(TypeDesc a, TypeDesc b);
static const char *standard_name(AttributeStandard std);
static AttributeStandard name_standard(const char *name);
+
+ void get_uv_tiles(Geometry *geom, AttributePrimitive prim, unordered_set<int> &tiles) const;
};
/* Attribute Set
@@ -165,12 +172,11 @@ class Attribute {
class AttributeSet {
public:
- Mesh *triangle_mesh;
- Mesh *curve_mesh;
- Mesh *subd_mesh;
+ Geometry *geometry;
+ AttributePrimitive prim;
list<Attribute> attributes;
- AttributeSet();
+ AttributeSet(Geometry *geometry, AttributePrimitive prim);
~AttributeSet();
Attribute *add(ustring name, TypeDesc type, AttributeElement element);
@@ -200,9 +206,9 @@ class AttributeRequest {
ustring name;
AttributeStandard std;
- /* temporary variables used by MeshManager */
- TypeDesc triangle_type, curve_type, subd_type;
- AttributeDescriptor triangle_desc, curve_desc, subd_desc;
+ /* temporary variables used by GeometryManager */
+ TypeDesc type, subd_type;
+ AttributeDescriptor desc, subd_desc;
explicit AttributeRequest(ustring name_);
explicit AttributeRequest(AttributeStandard std);
diff --git a/intern/cycles/render/background.cpp b/intern/cycles/render/background.cpp
index b32cc55903d..694bb640995 100644
--- a/intern/cycles/render/background.cpp
+++ b/intern/cycles/render/background.cpp
@@ -16,8 +16,8 @@
#include "render/background.h"
#include "device/device.h"
-#include "render/integrator.h"
#include "render/graph.h"
+#include "render/integrator.h"
#include "render/nodes.h"
#include "render/scene.h"
#include "render/shader.h"
@@ -43,6 +43,8 @@ NODE_DEFINE(Background)
SOCKET_BOOLEAN(transparent_glass, "Transparent Glass", false);
SOCKET_FLOAT(transparent_roughness_threshold, "Transparent Roughness Threshold", 0.0f);
+ SOCKET_FLOAT(volume_step_size, "Volume Step Size", 0.1f);
+
SOCKET_NODE(shader, "Shader", &Shader::node_type);
return type;
@@ -51,6 +53,7 @@ NODE_DEFINE(Background)
Background::Background() : Node(node_type)
{
need_update = true;
+ shader = NULL;
}
Background::~Background()
@@ -64,14 +67,7 @@ void Background::device_update(Device *device, DeviceScene *dscene, Scene *scene
device_free(device, dscene);
- Shader *bg_shader = shader;
-
- if (use_shader) {
- if (!bg_shader)
- bg_shader = scene->default_background;
- }
- else
- bg_shader = scene->default_empty;
+ Shader *bg_shader = get_shader(scene);
/* set shader index and transparent option */
KernelBackground *kbackground = &dscene->data.background;
@@ -98,6 +94,8 @@ void Background::device_update(Device *device, DeviceScene *dscene, Scene *scene
else
kbackground->volume_shader = SHADER_NONE;
+ kbackground->volume_step_size = volume_step_size * scene->integrator->volume_step_rate;
+
/* No background node, make world shader invisible to all rays, to skip evaluation in kernel. */
if (bg_shader->graph->nodes.size() <= 1) {
kbackground->surface_shader |= SHADER_EXCLUDE_ANY;
@@ -134,4 +132,9 @@ void Background::tag_update(Scene *scene)
need_update = true;
}
+Shader *Background::get_shader(const Scene *scene)
+{
+ return (use_shader) ? ((shader) ? shader : scene->default_background) : scene->default_empty;
+}
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/render/background.h b/intern/cycles/render/background.h
index 020db7bf6aa..c2ca1f75179 100644
--- a/intern/cycles/render/background.h
+++ b/intern/cycles/render/background.h
@@ -45,6 +45,8 @@ class Background : public Node {
bool transparent_glass;
float transparent_roughness_threshold;
+ float volume_step_size;
+
bool need_update;
Background();
@@ -55,6 +57,8 @@ class Background : public Node {
bool modified(const Background &background);
void tag_update(Scene *scene);
+
+ Shader *get_shader(const Scene *scene);
};
CCL_NAMESPACE_END
diff --git a/intern/cycles/render/bake.cpp b/intern/cycles/render/bake.cpp
index 73893921500..6044182a51a 100644
--- a/intern/cycles/render/bake.cpp
+++ b/intern/cycles/render/bake.cpp
@@ -15,283 +15,140 @@
*/
#include "render/bake.h"
+#include "render/buffers.h"
+#include "render/integrator.h"
#include "render/mesh.h"
#include "render/object.h"
#include "render/shader.h"
-#include "render/integrator.h"
#include "util/util_foreach.h"
CCL_NAMESPACE_BEGIN
-BakeData::BakeData(const int object, const size_t tri_offset, const size_t num_pixels)
- : m_object(object), m_tri_offset(tri_offset), m_num_pixels(num_pixels)
-{
- m_primitive.resize(num_pixels);
- m_u.resize(num_pixels);
- m_v.resize(num_pixels);
- m_dudx.resize(num_pixels);
- m_dudy.resize(num_pixels);
- m_dvdx.resize(num_pixels);
- m_dvdy.resize(num_pixels);
-}
-
-BakeData::~BakeData()
-{
- m_primitive.clear();
- m_u.clear();
- m_v.clear();
- m_dudx.clear();
- m_dudy.clear();
- m_dvdx.clear();
- m_dvdy.clear();
-}
-
-void BakeData::set(int i, int prim, float uv[2], float dudx, float dudy, float dvdx, float dvdy)
-{
- m_primitive[i] = (prim == -1 ? -1 : m_tri_offset + prim);
- m_u[i] = uv[0];
- m_v[i] = uv[1];
- m_dudx[i] = dudx;
- m_dudy[i] = dudy;
- m_dvdx[i] = dvdx;
- m_dvdy[i] = dvdy;
-}
-
-void BakeData::set_null(int i)
-{
- m_primitive[i] = -1;
-}
-
-int BakeData::object()
-{
- return m_object;
-}
-
-size_t BakeData::size()
+static int aa_samples(Scene *scene, Object *object, ShaderEvalType type)
{
- return m_num_pixels;
-}
+ if (type == SHADER_EVAL_UV || type == SHADER_EVAL_ROUGHNESS) {
+ return 1;
+ }
+ else if (type == SHADER_EVAL_NORMAL) {
+ /* Only antialias normal if mesh has bump mapping. */
+ if (object->geometry) {
+ foreach (Shader *shader, object->geometry->used_shaders) {
+ if (shader->has_bump) {
+ return scene->integrator->aa_samples;
+ }
+ }
+ }
-bool BakeData::is_valid(int i)
-{
- return m_primitive[i] != -1;
+ return 1;
+ }
+ else {
+ return scene->integrator->aa_samples;
+ }
}
-uint4 BakeData::data(int i)
+/* Keep it synced with kernel_bake.h logic */
+static int shader_type_to_pass_filter(ShaderEvalType type, int pass_filter)
{
- return make_uint4(m_object, m_primitive[i], __float_as_int(m_u[i]), __float_as_int(m_v[i]));
-}
+ const int component_flags = pass_filter &
+ (BAKE_FILTER_DIRECT | BAKE_FILTER_INDIRECT | BAKE_FILTER_COLOR);
-uint4 BakeData::differentials(int i)
-{
- return make_uint4(__float_as_int(m_dudx[i]),
- __float_as_int(m_dudy[i]),
- __float_as_int(m_dvdx[i]),
- __float_as_int(m_dvdy[i]));
+ switch (type) {
+ case SHADER_EVAL_AO:
+ return BAKE_FILTER_AO;
+ case SHADER_EVAL_SHADOW:
+ return BAKE_FILTER_DIRECT;
+ case SHADER_EVAL_DIFFUSE:
+ return BAKE_FILTER_DIFFUSE | component_flags;
+ case SHADER_EVAL_GLOSSY:
+ return BAKE_FILTER_GLOSSY | component_flags;
+ case SHADER_EVAL_TRANSMISSION:
+ return BAKE_FILTER_TRANSMISSION | component_flags;
+ case SHADER_EVAL_COMBINED:
+ return pass_filter;
+ default:
+ return 0;
+ }
}
BakeManager::BakeManager()
{
- m_bake_data = NULL;
- m_is_baking = false;
+ type = SHADER_EVAL_BAKE;
+ pass_filter = 0;
+
need_update = true;
- m_shader_limit = 512 * 512;
}
BakeManager::~BakeManager()
{
- if (m_bake_data)
- delete m_bake_data;
}
bool BakeManager::get_baking()
{
- return m_is_baking;
-}
-
-void BakeManager::set_baking(const bool value)
-{
- m_is_baking = value;
-}
-
-BakeData *BakeManager::init(const int object, const size_t tri_offset, const size_t num_pixels)
-{
- m_bake_data = new BakeData(object, tri_offset, num_pixels);
- return m_bake_data;
+ return !object_name.empty();
}
-void BakeManager::set_shader_limit(const size_t x, const size_t y)
+void BakeManager::set(Scene *scene,
+ const std::string &object_name_,
+ ShaderEvalType type_,
+ int pass_filter_)
{
- m_shader_limit = x * y;
- m_shader_limit = (size_t)pow(2, ceil(log(m_shader_limit) / log(2)));
-}
-
-bool BakeManager::bake(Device *device,
- DeviceScene *dscene,
- Scene *scene,
- Progress &progress,
- ShaderEvalType shader_type,
- const int pass_filter,
- BakeData *bake_data,
- float result[])
-{
- size_t num_pixels = bake_data->size();
+ object_name = object_name_;
+ type = type_;
+ pass_filter = shader_type_to_pass_filter(type_, pass_filter_);
- int num_samples = aa_samples(scene, bake_data, shader_type);
+ Pass::add(PASS_BAKE_PRIMITIVE, scene->film->passes);
+ Pass::add(PASS_BAKE_DIFFERENTIAL, scene->film->passes);
- /* calculate the total pixel samples for the progress bar */
- total_pixel_samples = 0;
- for (size_t shader_offset = 0; shader_offset < num_pixels; shader_offset += m_shader_limit) {
- size_t shader_size = (size_t)fminf(num_pixels - shader_offset, m_shader_limit);
- total_pixel_samples += shader_size * num_samples;
+ if (type == SHADER_EVAL_UV) {
+ /* force UV to be available */
+ Pass::add(PASS_UV, scene->film->passes);
}
- progress.reset_sample();
- progress.set_total_pixel_samples(total_pixel_samples);
-
- /* needs to be up to date for baking specific AA samples */
- dscene->data.integrator.aa_samples = num_samples;
- device->const_copy_to("__data", &dscene->data, sizeof(dscene->data));
- for (size_t shader_offset = 0; shader_offset < num_pixels; shader_offset += m_shader_limit) {
- size_t shader_size = (size_t)fminf(num_pixels - shader_offset, m_shader_limit);
-
- /* setup input for device task */
- device_vector<uint4> d_input(device, "bake_input", MEM_READ_ONLY);
- uint4 *d_input_data = d_input.alloc(shader_size * 2);
- size_t d_input_size = 0;
-
- for (size_t i = shader_offset; i < (shader_offset + shader_size); i++) {
- d_input_data[d_input_size++] = bake_data->data(i);
- d_input_data[d_input_size++] = bake_data->differentials(i);
- }
-
- if (d_input_size == 0) {
- m_is_baking = false;
- return false;
- }
-
- /* run device task */
- device_vector<float4> d_output(device, "bake_output", MEM_READ_WRITE);
- d_output.alloc(shader_size);
- d_output.zero_to_device();
- d_input.copy_to_device();
-
- DeviceTask task(DeviceTask::SHADER);
- task.shader_input = d_input.device_pointer;
- task.shader_output = d_output.device_pointer;
- task.shader_eval_type = shader_type;
- task.shader_filter = pass_filter;
- task.shader_x = 0;
- task.offset = shader_offset;
- task.shader_w = d_output.size();
- task.num_samples = num_samples;
- task.get_cancel = function_bind(&Progress::get_cancel, &progress);
- task.update_progress_sample = function_bind(&Progress::add_samples_update, &progress, _1, _2);
-
- device->task_add(task);
- device->task_wait();
-
- if (progress.get_cancel()) {
- d_input.free();
- d_output.free();
- m_is_baking = false;
- return false;
- }
-
- d_output.copy_from_device(0, 1, d_output.size());
- d_input.free();
-
- /* read result */
- int k = 0;
-
- float4 *offset = d_output.data();
-
- size_t depth = 4;
- for (size_t i = shader_offset; i < (shader_offset + shader_size); i++) {
- size_t index = i * depth;
- float4 out = offset[k++];
-
- if (bake_data->is_valid(i)) {
- for (size_t j = 0; j < 4; j++) {
- result[index + j] = out[j];
- }
- }
- }
-
- d_output.free();
+ /* force use_light_pass to be true if we bake more than just colors */
+ if (pass_filter & ~BAKE_FILTER_COLOR) {
+ Pass::add(PASS_LIGHT, scene->film->passes);
}
- m_is_baking = false;
- return true;
+ /* create device and update scene */
+ scene->film->tag_update(scene);
+ scene->integrator->tag_update(scene);
+
+ need_update = true;
}
void BakeManager::device_update(Device * /*device*/,
- DeviceScene * /*dscene*/,
- Scene * /*scene*/,
- Progress &progress)
+ DeviceScene *dscene,
+ Scene *scene,
+ Progress & /* progress */)
{
if (!need_update)
return;
- if (progress.get_cancel())
- return;
-
- need_update = false;
-}
-
-void BakeManager::device_free(Device * /*device*/, DeviceScene * /*dscene*/)
-{
-}
+ KernelIntegrator *kintegrator = &dscene->data.integrator;
+ KernelBake *kbake = &dscene->data.bake;
-int BakeManager::aa_samples(Scene *scene, BakeData *bake_data, ShaderEvalType type)
-{
- if (type == SHADER_EVAL_UV || type == SHADER_EVAL_ROUGHNESS) {
- return 1;
- }
- else if (type == SHADER_EVAL_NORMAL) {
- /* Only antialias normal if mesh has bump mapping. */
- Object *object = scene->objects[bake_data->object()];
+ kbake->type = type;
+ kbake->pass_filter = pass_filter;
- if (object->mesh) {
- foreach (Shader *shader, object->mesh->used_shaders) {
- if (shader->has_bump) {
- return scene->integrator->aa_samples;
- }
- }
+ int object_index = 0;
+ foreach (Object *object, scene->objects) {
+ const Geometry *geom = object->geometry;
+ if (object->name == object_name && geom->type == Geometry::MESH) {
+ kbake->object_index = object_index;
+ kbake->tri_offset = geom->prim_offset;
+ kintegrator->aa_samples = aa_samples(scene, object, type);
+ break;
}
- return 1;
- }
- else {
- return scene->integrator->aa_samples;
+ object_index++;
}
+
+ need_update = false;
}
-/* Keep it synced with kernel_bake.h logic */
-int BakeManager::shader_type_to_pass_filter(ShaderEvalType type, const int pass_filter)
+void BakeManager::device_free(Device * /*device*/, DeviceScene * /*dscene*/)
{
- const int component_flags = pass_filter &
- (BAKE_FILTER_DIRECT | BAKE_FILTER_INDIRECT | BAKE_FILTER_COLOR);
-
- switch (type) {
- case SHADER_EVAL_AO:
- return BAKE_FILTER_AO;
- case SHADER_EVAL_SHADOW:
- return BAKE_FILTER_DIRECT;
- case SHADER_EVAL_DIFFUSE:
- return BAKE_FILTER_DIFFUSE | component_flags;
- case SHADER_EVAL_GLOSSY:
- return BAKE_FILTER_GLOSSY | component_flags;
- case SHADER_EVAL_TRANSMISSION:
- return BAKE_FILTER_TRANSMISSION | component_flags;
- case SHADER_EVAL_SUBSURFACE:
- return BAKE_FILTER_SUBSURFACE | component_flags;
- case SHADER_EVAL_COMBINED:
- return pass_filter;
- default:
- return 0;
- }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/render/bake.h b/intern/cycles/render/bake.h
index 88537623efb..93e664c2ab1 100644
--- a/intern/cycles/render/bake.h
+++ b/intern/cycles/render/bake.h
@@ -25,67 +25,23 @@
CCL_NAMESPACE_BEGIN
-class BakeData {
- public:
- BakeData(const int object, const size_t tri_offset, const size_t num_pixels);
- ~BakeData();
-
- void set(int i, int prim, float uv[2], float dudx, float dudy, float dvdx, float dvdy);
- void set_null(int i);
- int object();
- size_t size();
- uint4 data(int i);
- uint4 differentials(int i);
- bool is_valid(int i);
-
- private:
- int m_object;
- size_t m_tri_offset;
- size_t m_num_pixels;
- vector<int> m_primitive;
- vector<float> m_u;
- vector<float> m_v;
- vector<float> m_dudx;
- vector<float> m_dudy;
- vector<float> m_dvdx;
- vector<float> m_dvdy;
-};
-
class BakeManager {
public:
BakeManager();
~BakeManager();
+ void set(Scene *scene, const std::string &object_name, ShaderEvalType type, int pass_filter);
bool get_baking();
- void set_baking(const bool value);
-
- BakeData *init(const int object, const size_t tri_offset, const size_t num_pixels);
-
- void set_shader_limit(const size_t x, const size_t y);
-
- bool bake(Device *device,
- DeviceScene *dscene,
- Scene *scene,
- Progress &progress,
- ShaderEvalType shader_type,
- const int pass_filter,
- BakeData *bake_data,
- float result[]);
void device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress &progress);
void device_free(Device *device, DeviceScene *dscene);
- static int shader_type_to_pass_filter(ShaderEvalType type, const int pass_filter);
- static int aa_samples(Scene *scene, BakeData *bake_data, ShaderEvalType type);
-
bool need_update;
- size_t total_pixel_samples;
-
private:
- BakeData *m_bake_data;
- bool m_is_baking;
- size_t m_shader_limit;
+ ShaderEvalType type;
+ int pass_filter;
+ std::string object_name;
};
CCL_NAMESPACE_END
diff --git a/intern/cycles/render/buffers.cpp b/intern/cycles/render/buffers.cpp
index 5405aaefc1d..b26366af852 100644
--- a/intern/cycles/render/buffers.cpp
+++ b/intern/cycles/render/buffers.cpp
@@ -16,8 +16,8 @@
#include <stdlib.h>
-#include "render/buffers.h"
#include "device/device.h"
+#include "render/buffers.h"
#include "util/util_foreach.h"
#include "util/util_hash.h"
@@ -57,7 +57,10 @@ bool BufferParams::modified(const BufferParams &params)
{
return !(full_x == params.full_x && full_y == params.full_y && width == params.width &&
height == params.height && full_width == params.full_width &&
- full_height == params.full_height && Pass::equals(passes, params.passes));
+ full_height == params.full_height && Pass::equals(passes, params.passes) &&
+ denoising_data_pass == params.denoising_data_pass &&
+ denoising_clean_pass == params.denoising_clean_pass &&
+ denoising_prefiltered_pass == params.denoising_prefiltered_pass);
}
int BufferParams::get_passes_size()
@@ -143,7 +146,7 @@ void RenderBuffers::reset(BufferParams &params_)
params = params_;
/* re-allocate buffer */
- buffer.alloc(params.width * params.height * params.get_passes_size());
+ buffer.alloc(params.width * params.get_passes_size(), params.height);
buffer.zero_to_device();
}
@@ -185,13 +188,28 @@ bool RenderBuffers::get_denoising_pass_rect(
offset = type + params.get_denoising_offset();
scale /= sample;
}
- else if (type == DENOISING_PASS_PREFILTERED_COLOR && !params.denoising_prefiltered_pass) {
- /* If we're not saving the prefiltering result, return the original noisy pass. */
- offset = params.get_denoising_offset() + DENOISING_PASS_COLOR;
- scale /= sample;
+ else if (params.denoising_prefiltered_pass) {
+ offset = type + params.get_denoising_prefiltered_offset();
}
else {
- offset = type + params.get_denoising_prefiltered_offset();
+ switch (type) {
+ case DENOISING_PASS_PREFILTERED_DEPTH:
+ offset = params.get_denoising_offset() + DENOISING_PASS_DEPTH;
+ break;
+ case DENOISING_PASS_PREFILTERED_NORMAL:
+ offset = params.get_denoising_offset() + DENOISING_PASS_NORMAL;
+ break;
+ case DENOISING_PASS_PREFILTERED_ALBEDO:
+ offset = params.get_denoising_offset() + DENOISING_PASS_ALBEDO;
+ break;
+ case DENOISING_PASS_PREFILTERED_COLOR:
+ /* If we're not saving the prefiltering result, return the original noisy pass. */
+ offset = params.get_denoising_offset() + DENOISING_PASS_COLOR;
+ break;
+ default:
+ return false;
+ }
+ scale /= sample;
}
int pass_stride = params.get_passes_size();
@@ -217,9 +235,14 @@ bool RenderBuffers::get_denoising_pass_rect(
float *in_combined = buffer.data();
for (int i = 0; i < size; i++, in += pass_stride, in_combined += pass_stride, pixels += 4) {
- pixels[0] = in[0] * scale;
- pixels[1] = in[1] * scale;
- pixels[2] = in[2] * scale;
+ float3 val = make_float3(in[0], in[1], in[2]);
+ if (type == DENOISING_PASS_PREFILTERED_COLOR && params.denoising_prefiltered_pass) {
+ /* Remove highlight compression from the image. */
+ val = color_highlight_uncompress(val);
+ }
+ pixels[0] = val.x * scale;
+ pixels[1] = val.y * scale;
+ pixels[2] = val.z * scale;
pixels[3] = saturate(in_combined[3] * alpha_scale);
}
}
@@ -231,29 +254,41 @@ bool RenderBuffers::get_denoising_pass_rect(
}
bool RenderBuffers::get_pass_rect(
- PassType type, float exposure, int sample, int components, float *pixels, const string &name)
+ const string &name, float exposure, int sample, int components, float *pixels)
{
if (buffer.data() == NULL) {
return false;
}
+ float *sample_count = NULL;
+ if (name == "Combined") {
+ int sample_offset = 0;
+ for (size_t j = 0; j < params.passes.size(); j++) {
+ Pass &pass = params.passes[j];
+ if (pass.type != PASS_SAMPLE_COUNT) {
+ sample_offset += pass.components;
+ continue;
+ }
+ else {
+ sample_count = buffer.data() + sample_offset;
+ break;
+ }
+ }
+ }
+
int pass_offset = 0;
for (size_t j = 0; j < params.passes.size(); j++) {
Pass &pass = params.passes[j];
- if (pass.type != type) {
+ /* Pass is identified by both type and name, multiple of the same type
+ * may exist with a different name. */
+ if (pass.name != name) {
pass_offset += pass.components;
continue;
}
- /* Tell Cryptomatte passes apart by their name. */
- if (pass.type == PASS_CRYPTOMATTE) {
- if (pass.name != name) {
- pass_offset += pass.components;
- continue;
- }
- }
+ PassType type = pass.type;
float *in = buffer.data() + pass_offset;
int pass_stride = params.get_passes_size();
@@ -401,6 +436,11 @@ bool RenderBuffers::get_pass_rect(
}
else {
for (int i = 0; i < size; i++, in += pass_stride, pixels += 4) {
+ if (sample_count && sample_count[i * pass_stride] < 0.0f) {
+ scale = (pass.filter) ? -1.0f / (sample_count[i * pass_stride]) : 1.0f;
+ scale_exposure = (pass.exposure) ? scale * exposure : scale;
+ }
+
float4 f = make_float4(in[0], in[1], in[2], in[3]);
pixels[0] = f.x * scale_exposure;
@@ -419,6 +459,40 @@ bool RenderBuffers::get_pass_rect(
return false;
}
+bool RenderBuffers::set_pass_rect(PassType type, int components, float *pixels)
+{
+ if (buffer.data() == NULL) {
+ return false;
+ }
+
+ int pass_offset = 0;
+
+ for (size_t j = 0; j < params.passes.size(); j++) {
+ Pass &pass = params.passes[j];
+
+ if (pass.type != type) {
+ pass_offset += pass.components;
+ continue;
+ }
+
+ float *out = buffer.data() + pass_offset;
+ int pass_stride = params.get_passes_size();
+ int size = params.width * params.height;
+
+ assert(pass.components == components);
+
+ for (int i = 0; i < size; i++, out += pass_stride, pixels += components) {
+ for (int j = 0; j < components; j++) {
+ out[j] = pixels[j];
+ }
+ }
+
+ return true;
+ }
+
+ return false;
+}
+
/* Display Buffer */
DisplayBuffer::DisplayBuffer(Device *device, bool linear)
diff --git a/intern/cycles/render/buffers.h b/intern/cycles/render/buffers.h
index 1c49038cd4b..06b6094e6c9 100644
--- a/intern/cycles/render/buffers.h
+++ b/intern/cycles/render/buffers.h
@@ -52,7 +52,7 @@ class BufferParams {
/* passes */
vector<Pass> passes;
bool denoising_data_pass;
- /* If only some light path types should be denoised, an additional pass is needed. */
+ /* If only some light path types should be target, an additional pass is needed. */
bool denoising_clean_pass;
/* When we're prefiltering the passes during rendering, we need to keep both the
* original and the prefiltered data around because neighboring tiles might still
@@ -64,7 +64,6 @@ class BufferParams {
void get_offset_stride(int &offset, int &stride);
bool modified(const BufferParams &params);
- void add_pass(PassType type);
int get_passes_size();
int get_denoising_offset();
int get_denoising_prefiltered_offset();
@@ -89,14 +88,11 @@ class RenderBuffers {
void zero();
bool copy_from_device();
- bool get_pass_rect(PassType type,
- float exposure,
- int sample,
- int components,
- float *pixels,
- const string &name);
+ bool get_pass_rect(
+ const string &name, float exposure, int sample, int components, float *pixels);
bool get_denoising_pass_rect(
int offset, float exposure, int sample, int components, float *pixels);
+ bool set_pass_rect(PassType type, int components, float *pixels);
};
/* Display Buffer
@@ -135,7 +131,7 @@ class DisplayBuffer {
class RenderTile {
public:
- typedef enum { PATH_TRACE, DENOISE } Task;
+ typedef enum { PATH_TRACE = (1 << 0), BAKE = (1 << 1), DENOISE = (1 << 2) } Task;
Task task;
int x, y, w, h;
@@ -153,6 +149,50 @@ class RenderTile {
RenderBuffers *buffers;
RenderTile();
+
+ int4 bounds() const
+ {
+ return make_int4(x, /* xmin */
+ y, /* ymin */
+ x + w, /* xmax */
+ y + h); /* ymax */
+ }
+};
+
+/* Render Tile Neighbors
+ * Set of neighboring tiles used for denoising. Tile order:
+ * 0 1 2
+ * 3 4 5
+ * 6 7 8 */
+
+class RenderTileNeighbors {
+ public:
+ static const int SIZE = 9;
+ static const int CENTER = 4;
+
+ RenderTile tiles[SIZE];
+ RenderTile target;
+
+ RenderTileNeighbors(const RenderTile &center)
+ {
+ tiles[CENTER] = center;
+ }
+
+ int4 bounds() const
+ {
+ return make_int4(tiles[3].x, /* xmin */
+ tiles[1].y, /* ymin */
+ tiles[5].x + tiles[5].w, /* xmax */
+ tiles[7].y + tiles[7].h); /* ymax */
+ }
+
+ void set_bounds_from_center()
+ {
+ tiles[3].x = tiles[CENTER].x;
+ tiles[1].y = tiles[CENTER].y;
+ tiles[5].x = tiles[CENTER].x + tiles[CENTER].w;
+ tiles[7].y = tiles[CENTER].y + tiles[CENTER].h;
+ }
};
CCL_NAMESPACE_END
diff --git a/intern/cycles/render/camera.cpp b/intern/cycles/render/camera.cpp
index 9c9070c8a90..bbc111cb798 100644
--- a/intern/cycles/render/camera.cpp
+++ b/intern/cycles/render/camera.cpp
@@ -26,9 +26,11 @@
#include "util/util_function.h"
#include "util/util_logging.h"
#include "util/util_math_cdf.h"
+#include "util/util_task.h"
#include "util/util_vector.h"
/* needed for calculating differentials */
+// clang-format off
#include "kernel/kernel_compat_cpu.h"
#include "kernel/split/kernel_split_data.h"
#include "kernel/kernel_globals.h"
@@ -36,6 +38,7 @@
#include "kernel/kernel_differential.h"
#include "kernel/kernel_montecarlo.h"
#include "kernel/kernel_camera.h"
+// clang-format on
CCL_NAMESPACE_BEGIN
@@ -118,6 +121,8 @@ NODE_DEFINE(Camera)
stereo_eye_enum.insert("right", STEREO_RIGHT);
SOCKET_ENUM(stereo_eye, "Stereo Eye", stereo_eye_enum, STEREO_NONE);
+ SOCKET_BOOLEAN(use_spherical_stereo, "Use Spherical Stereo", false);
+
SOCKET_FLOAT(interocular_distance, "Interocular Distance", 0.065f);
SOCKET_FLOAT(convergence_distance, "Convergence Distance", 30.0f * 0.065f);
@@ -492,20 +497,35 @@ void Camera::device_update_volume(Device * /*device*/, DeviceScene *dscene, Scen
if (!need_device_update && !need_flags_update) {
return;
}
- KernelCamera *kcam = &dscene->data.cam;
- BoundBox viewplane_boundbox = viewplane_bounds_get();
- for (size_t i = 0; i < scene->objects.size(); ++i) {
- Object *object = scene->objects[i];
- if (object->mesh->has_volume && viewplane_boundbox.intersects(object->bounds)) {
- /* TODO(sergey): Consider adding more grained check. */
- VLOG(1) << "Detected camera inside volume.";
- kcam->is_inside_volume = 1;
- break;
+
+ KernelIntegrator *kintegrator = &dscene->data.integrator;
+ if (kintegrator->use_volumes) {
+ KernelCamera *kcam = &dscene->data.cam;
+ BoundBox viewplane_boundbox = viewplane_bounds_get();
+
+ /* Parallel object update, with grain size to avoid too much threading overhead
+ * for individual objects. */
+ static const int OBJECTS_PER_TASK = 32;
+ parallel_for(blocked_range<size_t>(0, scene->objects.size(), OBJECTS_PER_TASK),
+ [&](const blocked_range<size_t> &r) {
+ for (size_t i = r.begin(); i != r.end(); i++) {
+ Object *object = scene->objects[i];
+ if (object->geometry->has_volume &&
+ viewplane_boundbox.intersects(object->bounds)) {
+ /* TODO(sergey): Consider adding more grained check. */
+ VLOG(1) << "Detected camera inside volume.";
+ kcam->is_inside_volume = 1;
+ parallel_for_cancel();
+ break;
+ }
+ }
+ });
+
+ if (!kcam->is_inside_volume) {
+ VLOG(1) << "Camera is outside of the volume.";
}
}
- if (!kcam->is_inside_volume) {
- VLOG(1) << "Camera is outside of the volume.";
- }
+
need_device_update = false;
need_flags_update = false;
}
@@ -563,8 +583,7 @@ float3 Camera::transform_raster_to_world(float raster_x, float raster_y)
BoundBox Camera::viewplane_bounds_get()
{
/* TODO(sergey): This is all rather stupid, but is there a way to perform
- * checks we need in a more clear and smart fasion?
- */
+ * checks we need in a more clear and smart fashion? */
BoundBox bounds = BoundBox::empty;
if (type == CAMERA_PANORAMA) {
@@ -642,7 +661,8 @@ float Camera::world_to_raster_size(float3 P)
float3 D = normalize(Ddiff);
res = len(dist * dDdx - dot(dist * dDdx, D) * D);
- /* Decent approx distance to frustum (doesn't handle corners correctly, but not that big of a deal) */
+ /* Decent approx distance to frustum
+ * (doesn't handle corners correctly, but not that big of a deal) */
float f_dist = 0.0f;
if (offscreen_dicing_scale > 1.0f) {
@@ -686,7 +706,8 @@ float Camera::world_to_raster_size(float3 P)
f_dist = max(f_dist, *d);
}
else {
- /* Possibly far enough behind the frustum to use distance to origin instead of edge */
+ /* Possibly far enough behind the frustum to use distance to origin instead of edge
+ */
test_o = true;
}
}
@@ -716,10 +737,17 @@ float Camera::world_to_raster_size(float3 P)
float3 raster = transform_perspective(&full_cameratoraster, make_float3(dir.x, dir.y, 0.0f));
ray.t = 1.0f;
- camera_sample_panorama(&kernel_camera, kernel_camera_motion.data(), raster.x, raster.y, 0.0f, 0.0f, &ray);
- if(ray.t == 0.0f) {
+ camera_sample_panorama(
+ &kernel_camera, kernel_camera_motion.data(), raster.x, raster.y, 0.0f, 0.0f, &ray);
+ if (ray.t == 0.0f) {
/* No differentials, just use from directly ahead. */
- camera_sample_panorama(&kernel_camera, kernel_camera_motion.data(), 0.5f*full_width, 0.5f*full_height, 0.0f, 0.0f, &ray);
+ camera_sample_panorama(&kernel_camera,
+ kernel_camera_motion.data(),
+ 0.5f * full_width,
+ 0.5f * full_height,
+ 0.0f,
+ 0.0f,
+ &ray);
}
#else
camera_sample_panorama(&kernel_camera,
diff --git a/intern/cycles/render/colorspace.cpp b/intern/cycles/render/colorspace.cpp
new file mode 100644
index 00000000000..57979d5f225
--- /dev/null
+++ b/intern/cycles/render/colorspace.cpp
@@ -0,0 +1,395 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "render/colorspace.h"
+
+#include "util/util_color.h"
+#include "util/util_half.h"
+#include "util/util_image.h"
+#include "util/util_logging.h"
+#include "util/util_math.h"
+#include "util/util_thread.h"
+#include "util/util_vector.h"
+
+#ifdef WITH_OCIO
+# include <OpenColorIO/OpenColorIO.h>
+namespace OCIO = OCIO_NAMESPACE;
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+/* Builtin colorspaces. */
+ustring u_colorspace_auto;
+ustring u_colorspace_raw("__builtin_raw");
+ustring u_colorspace_srgb("__builtin_srgb");
+
+/* Cached data. */
+#ifdef WITH_OCIO
+static thread_mutex cache_colorspaces_mutex;
+static thread_mutex cache_processors_mutex;
+static unordered_map<ustring, ustring, ustringHash> cached_colorspaces;
+static unordered_map<ustring, OCIO::ConstProcessorRcPtr, ustringHash> cached_processors;
+#endif
+
+ColorSpaceProcessor *ColorSpaceManager::get_processor(ustring colorspace)
+{
+#ifdef WITH_OCIO
+ /* Only use this for OpenColorIO color spaces, not the builtin ones. */
+ assert(colorspace != u_colorspace_srgb && colorspace != u_colorspace_auto);
+
+ if (colorspace == u_colorspace_raw) {
+ return NULL;
+ }
+
+ OCIO::ConstConfigRcPtr config = OCIO::GetCurrentConfig();
+ if (!config) {
+ return NULL;
+ }
+
+ /* Cache processor until free_memory(), memory overhead is expected to be
+ * small and the processor is likely to be reused. */
+ thread_scoped_lock cache_processors_lock(cache_processors_mutex);
+ if (cached_processors.find(colorspace) == cached_processors.end()) {
+ try {
+ cached_processors[colorspace] = config->getProcessor(colorspace.c_str(), "scene_linear");
+ }
+ catch (OCIO::Exception &exception) {
+ cached_processors[colorspace] = OCIO::ConstProcessorRcPtr();
+ VLOG(1) << "Colorspace " << colorspace.c_str()
+ << " can't be converted to scene_linear: " << exception.what();
+ }
+ }
+
+ const OCIO::Processor *processor = cached_processors[colorspace].get();
+ return (ColorSpaceProcessor *)processor;
+#else
+ /* No OpenColorIO. */
+ (void)colorspace;
+ return NULL;
+#endif
+}
+
+bool ColorSpaceManager::colorspace_is_data(ustring colorspace)
+{
+ if (colorspace == u_colorspace_auto || colorspace == u_colorspace_raw ||
+ colorspace == u_colorspace_srgb) {
+ return false;
+ }
+
+#ifdef WITH_OCIO
+ OCIO::ConstConfigRcPtr config = OCIO::GetCurrentConfig();
+ if (!config) {
+ return false;
+ }
+
+ try {
+ OCIO::ConstColorSpaceRcPtr space = config->getColorSpace(colorspace.c_str());
+ return space && space->isData();
+ }
+ catch (OCIO::Exception &) {
+ return false;
+ }
+#else
+ return false;
+#endif
+}
+
+ustring ColorSpaceManager::detect_known_colorspace(ustring colorspace,
+ const char *file_format,
+ bool is_float)
+{
+ if (colorspace == u_colorspace_auto) {
+ /* Auto detect sRGB or raw if none specified. */
+ if (is_float) {
+ bool srgb = (colorspace == "sRGB" || colorspace == "GammaCorrected" ||
+ (colorspace.empty() &&
+ (strcmp(file_format, "png") == 0 || strcmp(file_format, "tiff") == 0 ||
+ strcmp(file_format, "dpx") == 0 || strcmp(file_format, "jpeg2000") == 0)));
+ return srgb ? u_colorspace_srgb : u_colorspace_raw;
+ }
+ else {
+ return u_colorspace_srgb;
+ }
+ }
+ else if (colorspace == u_colorspace_srgb || colorspace == u_colorspace_raw) {
+ /* Builtin colorspaces. */
+ return colorspace;
+ }
+ else {
+ /* Use OpenColorIO. */
+#ifdef WITH_OCIO
+ {
+ thread_scoped_lock cache_lock(cache_colorspaces_mutex);
+ /* Cached lookup. */
+ if (cached_colorspaces.find(colorspace) != cached_colorspaces.end()) {
+ return cached_colorspaces[colorspace];
+ }
+ }
+
+ /* Detect if it matches a simple builtin colorspace. */
+ bool is_scene_linear, is_srgb;
+ is_builtin_colorspace(colorspace, is_scene_linear, is_srgb);
+
+ thread_scoped_lock cache_lock(cache_colorspaces_mutex);
+ if (is_scene_linear) {
+ VLOG(1) << "Colorspace " << colorspace.string() << " is no-op";
+ cached_colorspaces[colorspace] = u_colorspace_raw;
+ return u_colorspace_raw;
+ }
+ else if (is_srgb) {
+ VLOG(1) << "Colorspace " << colorspace.string() << " is sRGB";
+ cached_colorspaces[colorspace] = u_colorspace_srgb;
+ return u_colorspace_srgb;
+ }
+
+ /* Verify if we can convert from the requested color space. */
+ if (!get_processor(colorspace)) {
+ OCIO::ConstConfigRcPtr config = OCIO::GetCurrentConfig();
+ if (!config || !config->getColorSpace(colorspace.c_str())) {
+ VLOG(1) << "Colorspace " << colorspace.c_str() << " not found, using raw instead";
+ }
+ else {
+ VLOG(1) << "Colorspace " << colorspace.c_str()
+ << " can't be converted to scene_linear, using raw instead";
+ }
+ cached_colorspaces[colorspace] = u_colorspace_raw;
+ return u_colorspace_raw;
+ }
+
+ /* Convert to/from colorspace with OpenColorIO. */
+ VLOG(1) << "Colorspace " << colorspace.string() << " handled through OpenColorIO";
+ cached_colorspaces[colorspace] = colorspace;
+ return colorspace;
+#else
+ VLOG(1) << "Colorspace " << colorspace.c_str() << " not available, built without OpenColorIO";
+ return u_colorspace_raw;
+#endif
+ }
+}
+
+void ColorSpaceManager::is_builtin_colorspace(ustring colorspace,
+ bool &is_scene_linear,
+ bool &is_srgb)
+{
+#ifdef WITH_OCIO
+ const OCIO::Processor *processor = (const OCIO::Processor *)get_processor(colorspace);
+ if (!processor) {
+ is_scene_linear = false;
+ is_srgb = false;
+ return;
+ }
+
+ is_scene_linear = true;
+ is_srgb = true;
+ for (int i = 0; i < 256; i++) {
+ float v = i / 255.0f;
+
+ float cR[3] = {v, 0, 0};
+ float cG[3] = {0, v, 0};
+ float cB[3] = {0, 0, v};
+ float cW[3] = {v, v, v};
+ processor->applyRGB(cR);
+ processor->applyRGB(cG);
+ processor->applyRGB(cB);
+ processor->applyRGB(cW);
+
+ /* Make sure that there is no channel crosstalk. */
+ if (fabsf(cR[1]) > 1e-5f || fabsf(cR[2]) > 1e-5f || fabsf(cG[0]) > 1e-5f ||
+ fabsf(cG[2]) > 1e-5f || fabsf(cB[0]) > 1e-5f || fabsf(cB[1]) > 1e-5f) {
+ is_scene_linear = false;
+ is_srgb = false;
+ break;
+ }
+ /* Make sure that the three primaries combine linearly. */
+ if (!compare_floats(cR[0], cW[0], 1e-6f, 64) || !compare_floats(cG[1], cW[1], 1e-6f, 64) ||
+ !compare_floats(cB[2], cW[2], 1e-6f, 64)) {
+ is_scene_linear = false;
+ is_srgb = false;
+ break;
+ }
+ /* Make sure that the three channels behave identically. */
+ if (!compare_floats(cW[0], cW[1], 1e-6f, 64) || !compare_floats(cW[1], cW[2], 1e-6f, 64)) {
+ is_scene_linear = false;
+ is_srgb = false;
+ break;
+ }
+
+ float out_v = average(make_float3(cW[0], cW[1], cW[2]));
+ if (!compare_floats(v, out_v, 1e-6f, 64)) {
+ is_scene_linear = false;
+ }
+ if (!compare_floats(color_srgb_to_linear(v), out_v, 1e-6f, 64)) {
+ is_srgb = false;
+ }
+ }
+#else
+ (void)colorspace;
+ is_scene_linear = false;
+ is_srgb = false;
+#endif
+}
+
+#ifdef WITH_OCIO
+
+template<typename T> inline float4 cast_to_float4(T *data)
+{
+ return make_float4(util_image_cast_to_float(data[0]),
+ util_image_cast_to_float(data[1]),
+ util_image_cast_to_float(data[2]),
+ util_image_cast_to_float(data[3]));
+}
+
+template<typename T> inline void cast_from_float4(T *data, float4 value)
+{
+ data[0] = util_image_cast_from_float<T>(value.x);
+ data[1] = util_image_cast_from_float<T>(value.y);
+ data[2] = util_image_cast_from_float<T>(value.z);
+ data[3] = util_image_cast_from_float<T>(value.w);
+}
+
+/* Slower versions for other all data types, which needs to convert to float and back. */
+template<typename T, bool compress_as_srgb = false>
+inline void processor_apply_pixels(const OCIO::Processor *processor, T *pixels, size_t num_pixels)
+{
+ /* TODO: implement faster version for when we know the conversion
+ * is a simple matrix transform between linear spaces. In that case
+ * un-premultiply is not needed. */
+
+ /* Process large images in chunks to keep temporary memory requirement down. */
+ const size_t chunk_size = std::min((size_t)(16 * 1024 * 1024), num_pixels);
+ vector<float4> float_pixels(chunk_size);
+
+ for (size_t j = 0; j < num_pixels; j += chunk_size) {
+ size_t width = std::min(chunk_size, num_pixels - j);
+
+ for (size_t i = 0; i < width; i++) {
+ float4 value = cast_to_float4(pixels + 4 * (j + i));
+
+ if (!(value.w <= 0.0f || value.w == 1.0f)) {
+ float inv_alpha = 1.0f / value.w;
+ value.x *= inv_alpha;
+ value.y *= inv_alpha;
+ value.z *= inv_alpha;
+ }
+
+ float_pixels[i] = value;
+ }
+
+ OCIO::PackedImageDesc desc((float *)float_pixels.data(), width, 1, 4);
+ processor->apply(desc);
+
+ for (size_t i = 0; i < width; i++) {
+ float4 value = float_pixels[i];
+
+ if (compress_as_srgb) {
+ value = color_linear_to_srgb_v4(value);
+ }
+
+ if (!(value.w <= 0.0f || value.w == 1.0f)) {
+ value.x *= value.w;
+ value.y *= value.w;
+ value.z *= value.w;
+ }
+
+ cast_from_float4(pixels + 4 * (j + i), value);
+ }
+ }
+}
+#endif
+
+template<typename T>
+void ColorSpaceManager::to_scene_linear(ustring colorspace,
+ T *pixels,
+ size_t num_pixels,
+ bool compress_as_srgb)
+{
+#ifdef WITH_OCIO
+ const OCIO::Processor *processor = (const OCIO::Processor *)get_processor(colorspace);
+
+ if (processor) {
+ if (compress_as_srgb) {
+ /* Compress output as sRGB. */
+ processor_apply_pixels<T, true>(processor, pixels, num_pixels);
+ }
+ else {
+ /* Write output as scene linear directly. */
+ processor_apply_pixels<T>(processor, pixels, num_pixels);
+ }
+ }
+#else
+ (void)colorspace;
+ (void)pixels;
+ (void)num_pixels;
+ (void)compress_as_srgb;
+#endif
+}
+
+void ColorSpaceManager::to_scene_linear(ColorSpaceProcessor *processor_,
+ float *pixel,
+ int channels)
+{
+#ifdef WITH_OCIO
+ const OCIO::Processor *processor = (const OCIO::Processor *)processor_;
+
+ if (processor) {
+ if (channels == 3) {
+ processor->applyRGB(pixel);
+ }
+ else if (channels == 4) {
+ if (pixel[3] == 1.0f || pixel[3] == 0.0f) {
+ /* Fast path for RGBA. */
+ processor->applyRGB(pixel);
+ }
+ else {
+ /* Un-associate and associate alpha since color management should not
+ * be affected by transparency. */
+ float alpha = pixel[3];
+ float inv_alpha = 1.0f / alpha;
+
+ pixel[0] *= inv_alpha;
+ pixel[1] *= inv_alpha;
+ pixel[2] *= inv_alpha;
+
+ processor->applyRGB(pixel);
+
+ pixel[0] *= alpha;
+ pixel[1] *= alpha;
+ pixel[2] *= alpha;
+ }
+ }
+ }
+#else
+ (void)processor_;
+ (void)pixel;
+ (void)channels;
+#endif
+}
+
+void ColorSpaceManager::free_memory()
+{
+#ifdef WITH_OCIO
+ map_free_memory(cached_colorspaces);
+ map_free_memory(cached_colorspaces);
+#endif
+}
+
+/* Template instanstations so we don't have to inline functions. */
+template void ColorSpaceManager::to_scene_linear(ustring, uchar *, size_t, bool);
+template void ColorSpaceManager::to_scene_linear(ustring, ushort *, size_t, bool);
+template void ColorSpaceManager::to_scene_linear(ustring, half *, size_t, bool);
+template void ColorSpaceManager::to_scene_linear(ustring, float *, size_t, bool);
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/render/colorspace.h b/intern/cycles/render/colorspace.h
new file mode 100644
index 00000000000..51d0b121cc0
--- /dev/null
+++ b/intern/cycles/render/colorspace.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COLORSPACE_H__
+#define __COLORSPACE_H__
+
+#include "util/util_map.h"
+#include "util/util_param.h"
+
+CCL_NAMESPACE_BEGIN
+
+extern ustring u_colorspace_auto;
+extern ustring u_colorspace_raw;
+extern ustring u_colorspace_srgb;
+
+class ColorSpaceProcessor;
+
+class ColorSpaceManager {
+ public:
+ /* Convert used specified colorspace to a colorspace that we are able to
+ * convert to and from. If the colorspace is u_colorspace_auto, we auto
+ * detect a colospace. */
+ static ustring detect_known_colorspace(ustring colorspace,
+ const char *file_format,
+ bool is_float);
+
+ /* Test if colorspace is for non-color data. */
+ static bool colorspace_is_data(ustring colorspace);
+
+ /* Convert pixels in the specified colorspace to scene linear color for
+ * rendering. Must be a colorspace returned from detect_known_colorspace. */
+ template<typename T>
+ static void to_scene_linear(ustring colorspace,
+ T *pixels,
+ size_t num_pixels,
+ bool compress_as_srgb);
+
+ /* Efficiently convert pixels to scene linear colorspace at render time,
+ * for OSL where the image texture cache contains original pixels. The
+ * handle is valid for the lifetime of the application. */
+ static ColorSpaceProcessor *get_processor(ustring colorspace);
+ static void to_scene_linear(ColorSpaceProcessor *processor, float *pixel, int channels);
+
+ /* Clear memory when the application exits. Invalidates all processors. */
+ static void free_memory();
+
+ private:
+ static void is_builtin_colorspace(ustring colorspace, bool &is_no_op, bool &is_srgb);
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __COLORSPACE_H__ */
diff --git a/intern/cycles/render/constant_fold.cpp b/intern/cycles/render/constant_fold.cpp
index e475ff60eef..f3809ee8d80 100644
--- a/intern/cycles/render/constant_fold.cpp
+++ b/intern/cycles/render/constant_fold.cpp
@@ -301,7 +301,7 @@ void ConstantFolder::fold_mix(NodeMix type, bool clamp) const
}
}
-void ConstantFolder::fold_math(NodeMath type, bool clamp) const
+void ConstantFolder::fold_math(NodeMathType type) const
{
ShaderInput *value1_in = node->input("Value1");
ShaderInput *value2_in = node->input("Value2");
@@ -310,25 +310,25 @@ void ConstantFolder::fold_math(NodeMath type, bool clamp) const
case NODE_MATH_ADD:
/* X + 0 == 0 + X == X */
if (is_zero(value1_in)) {
- try_bypass_or_make_constant(value2_in, clamp);
+ try_bypass_or_make_constant(value2_in);
}
else if (is_zero(value2_in)) {
- try_bypass_or_make_constant(value1_in, clamp);
+ try_bypass_or_make_constant(value1_in);
}
break;
case NODE_MATH_SUBTRACT:
/* X - 0 == X */
if (is_zero(value2_in)) {
- try_bypass_or_make_constant(value1_in, clamp);
+ try_bypass_or_make_constant(value1_in);
}
break;
case NODE_MATH_MULTIPLY:
/* X * 1 == 1 * X == X */
if (is_one(value1_in)) {
- try_bypass_or_make_constant(value2_in, clamp);
+ try_bypass_or_make_constant(value2_in);
}
else if (is_one(value2_in)) {
- try_bypass_or_make_constant(value1_in, clamp);
+ try_bypass_or_make_constant(value1_in);
}
/* X * 0 == 0 * X == 0 */
else if (is_zero(value1_in) || is_zero(value2_in)) {
@@ -338,7 +338,7 @@ void ConstantFolder::fold_math(NodeMath type, bool clamp) const
case NODE_MATH_DIVIDE:
/* X / 1 == X */
if (is_one(value2_in)) {
- try_bypass_or_make_constant(value1_in, clamp);
+ try_bypass_or_make_constant(value1_in);
}
/* 0 / X == 0 */
else if (is_zero(value1_in)) {
@@ -352,17 +352,18 @@ void ConstantFolder::fold_math(NodeMath type, bool clamp) const
}
/* X ^ 1 == X */
else if (is_one(value2_in)) {
- try_bypass_or_make_constant(value1_in, clamp);
+ try_bypass_or_make_constant(value1_in);
}
default:
break;
}
}
-void ConstantFolder::fold_vector_math(NodeVectorMath type) const
+void ConstantFolder::fold_vector_math(NodeVectorMathType type) const
{
ShaderInput *vector1_in = node->input("Vector1");
ShaderInput *vector2_in = node->input("Vector2");
+ ShaderInput *scale_in = node->input("Scale");
switch (type) {
case NODE_VECTOR_MATH_ADD:
@@ -380,6 +381,27 @@ void ConstantFolder::fold_vector_math(NodeVectorMath type) const
try_bypass_or_make_constant(vector1_in);
}
break;
+ case NODE_VECTOR_MATH_MULTIPLY:
+ /* X * 0 == 0 * X == 0 */
+ if (is_zero(vector1_in) || is_zero(vector2_in)) {
+ make_zero();
+ } /* X * 1 == 1 * X == X */
+ else if (is_one(vector1_in)) {
+ try_bypass_or_make_constant(vector2_in);
+ }
+ else if (is_one(vector2_in)) {
+ try_bypass_or_make_constant(vector1_in);
+ }
+ break;
+ case NODE_VECTOR_MATH_DIVIDE:
+ /* X / 0 == 0 / X == 0 */
+ if (is_zero(vector1_in) || is_zero(vector2_in)) {
+ make_zero();
+ } /* X / 1 == X */
+ else if (is_one(vector2_in)) {
+ try_bypass_or_make_constant(vector1_in);
+ }
+ break;
case NODE_VECTOR_MATH_DOT_PRODUCT:
case NODE_VECTOR_MATH_CROSS_PRODUCT:
/* X * 0 == 0 * X == 0 */
@@ -387,9 +409,41 @@ void ConstantFolder::fold_vector_math(NodeVectorMath type) const
make_zero();
}
break;
+ case NODE_VECTOR_MATH_LENGTH:
+ case NODE_VECTOR_MATH_ABSOLUTE:
+ if (is_zero(vector1_in)) {
+ make_zero();
+ }
+ break;
+ case NODE_VECTOR_MATH_SCALE:
+ /* X * 0 == 0 * X == 0 */
+ if (is_zero(vector1_in) || is_zero(scale_in)) {
+ make_zero();
+ } /* X * 1 == X */
+ else if (is_one(scale_in)) {
+ try_bypass_or_make_constant(vector1_in);
+ }
+ break;
default:
break;
}
}
+void ConstantFolder::fold_mapping(NodeMappingType type) const
+{
+ ShaderInput *vector_in = node->input("Vector");
+ ShaderInput *location_in = node->input("Location");
+ ShaderInput *rotation_in = node->input("Rotation");
+ ShaderInput *scale_in = node->input("Scale");
+
+ if (is_zero(scale_in)) {
+ make_zero();
+ }
+ else if ((is_zero(location_in) || type == NODE_MAPPING_TYPE_VECTOR ||
+ type == NODE_MAPPING_TYPE_NORMAL) &&
+ is_zero(rotation_in) && is_one(scale_in)) {
+ try_bypass_or_make_constant(vector_in);
+ }
+}
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/render/constant_fold.h b/intern/cycles/render/constant_fold.h
index c14b94868dc..fec4123c361 100644
--- a/intern/cycles/render/constant_fold.h
+++ b/intern/cycles/render/constant_fold.h
@@ -17,8 +17,8 @@
#ifndef __CONSTANT_FOLD_H__
#define __CONSTANT_FOLD_H__
-#include "util/util_types.h"
#include "kernel/svm/svm_types.h"
+#include "util/util_types.h"
CCL_NAMESPACE_BEGIN
@@ -64,8 +64,9 @@ class ConstantFolder {
/* Specific nodes. */
void fold_mix(NodeMix type, bool clamp) const;
- void fold_math(NodeMath type, bool clamp) const;
- void fold_vector_math(NodeVectorMath type) const;
+ void fold_math(NodeMathType type) const;
+ void fold_vector_math(NodeVectorMathType type) const;
+ void fold_mapping(NodeMappingType type) const;
};
CCL_NAMESPACE_END
diff --git a/intern/cycles/render/coverage.cpp b/intern/cycles/render/coverage.cpp
index 0a29903728a..99d4daa6961 100644
--- a/intern/cycles/render/coverage.cpp
+++ b/intern/cycles/render/coverage.cpp
@@ -15,13 +15,16 @@
*/
#include "render/coverage.h"
+#include "render/buffers.h"
+
#include "kernel/kernel_compat_cpu.h"
+#include "kernel/kernel_types.h"
#include "kernel/split/kernel_split_data.h"
+
#include "kernel/kernel_globals.h"
#include "kernel/kernel_id_passes.h"
-#include "kernel/kernel_types.h"
+
#include "util/util_map.h"
-#include "util/util_vector.h"
CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/render/coverage.h b/intern/cycles/render/coverage.h
index 3d1f6a2b040..12182c614da 100644
--- a/intern/cycles/render/coverage.h
+++ b/intern/cycles/render/coverage.h
@@ -14,18 +14,19 @@
* limitations under the License.
*/
-#include "render/buffers.h"
-#include "kernel/kernel_compat_cpu.h"
-#include "kernel/split/kernel_split_data.h"
-#include "kernel/kernel_globals.h"
+#ifndef __COVERAGE_H__
+#define __COVERAGE_H__
+
#include "util/util_map.h"
#include "util/util_vector.h"
-#ifndef __COVERAGE_H__
-# define __COVERAGE_H__
-
CCL_NAMESPACE_BEGIN
+struct KernelGlobals;
+class RenderTile;
+
+typedef unordered_map<float, float> CoverageMap;
+
class Coverage {
public:
Coverage(KernelGlobals *kg_, RenderTile &tile_) : kg(kg_), tile(tile_)
diff --git a/intern/cycles/render/curves.cpp b/intern/cycles/render/curves.cpp
index 49ab70541c2..db48d8b6430 100644
--- a/intern/cycles/render/curves.cpp
+++ b/intern/cycles/render/curves.cpp
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include "device/device.h"
#include "render/curves.h"
+#include "device/device.h"
#include "render/mesh.h"
#include "render/object.h"
#include "render/scene.h"
@@ -36,13 +36,12 @@ void curvebounds(float *lower, float *upper, float3 *p, int dim)
float *p2 = &p[2].x;
float *p3 = &p[3].x;
- float fc = 0.71f;
+ /* Catmull-Rom weights. */
float curve_coef[4];
curve_coef[0] = p1[dim];
- curve_coef[1] = -fc * p0[dim] + fc * p2[dim];
- curve_coef[2] = 2.0f * fc * p0[dim] + (fc - 3.0f) * p1[dim] + (3.0f - 2.0f * fc) * p2[dim] -
- fc * p3[dim];
- curve_coef[3] = -fc * p0[dim] + (2.0f - fc) * p1[dim] + (fc - 2.0f) * p2[dim] + fc * p3[dim];
+ curve_coef[1] = 0.5f * (-p0[dim] + p2[dim]);
+ curve_coef[2] = 0.5f * (2 * p0[dim] - 5 * p1[dim] + 4 * p2[dim] - p3[dim]);
+ curve_coef[3] = 0.5f * (-p0[dim] + 3 * p1[dim] - 3 * p2[dim] + p3[dim]);
float discroot = curve_coef[2] * curve_coef[2] - 3 * curve_coef[3] * curve_coef[1];
float ta = -1.0f;
@@ -77,112 +76,4 @@ void curvebounds(float *lower, float *upper, float3 *p, int dim)
*lower = min(*lower, min(exa, exb));
}
-/* Hair System Manager */
-
-CurveSystemManager::CurveSystemManager()
-{
- primitive = CURVE_LINE_SEGMENTS;
- curve_shape = CURVE_THICK;
- line_method = CURVE_CORRECTED;
- triangle_method = CURVE_CAMERA_TRIANGLES;
- resolution = 3;
- subdivisions = 3;
-
- minimum_width = 0.0f;
- maximum_width = 0.0f;
-
- use_curves = true;
- use_encasing = true;
- use_backfacing = false;
- use_tangent_normal_geometry = false;
-
- need_update = true;
- need_mesh_update = false;
-}
-
-CurveSystemManager::~CurveSystemManager()
-{
-}
-
-void CurveSystemManager::device_update(Device *device,
- DeviceScene *dscene,
- Scene * /*scene*/,
- Progress &progress)
-{
- if (!need_update)
- return;
-
- device_free(device, dscene);
-
- progress.set_status("Updating Hair settings", "Copying Hair settings to device");
-
- KernelCurves *kcurve = &dscene->data.curve;
-
- kcurve->curveflags = 0;
-
- if (use_curves) {
- if (primitive == CURVE_SEGMENTS || primitive == CURVE_RIBBONS)
- kcurve->curveflags |= CURVE_KN_INTERPOLATE;
- if (primitive == CURVE_RIBBONS)
- kcurve->curveflags |= CURVE_KN_RIBBONS;
-
- if (line_method == CURVE_ACCURATE)
- kcurve->curveflags |= CURVE_KN_ACCURATE;
- else if (line_method == CURVE_CORRECTED)
- kcurve->curveflags |= CURVE_KN_INTERSECTCORRECTION;
-
- if (use_tangent_normal_geometry)
- kcurve->curveflags |= CURVE_KN_TRUETANGENTGNORMAL;
- if (use_backfacing)
- kcurve->curveflags |= CURVE_KN_BACKFACING;
- if (use_encasing)
- kcurve->curveflags |= CURVE_KN_ENCLOSEFILTER;
-
- kcurve->minimum_width = minimum_width;
- kcurve->maximum_width = maximum_width;
- kcurve->subdivisions = subdivisions;
- }
-
- if (progress.get_cancel())
- return;
-
- need_update = false;
-}
-
-void CurveSystemManager::device_free(Device * /*device*/, DeviceScene * /*dscene*/)
-{
-}
-
-bool CurveSystemManager::modified(const CurveSystemManager &CurveSystemManager)
-{
- return !(
- curve_shape == CurveSystemManager.curve_shape &&
- line_method == CurveSystemManager.line_method && primitive == CurveSystemManager.primitive &&
- use_encasing == CurveSystemManager.use_encasing &&
- use_tangent_normal_geometry == CurveSystemManager.use_tangent_normal_geometry &&
- minimum_width == CurveSystemManager.minimum_width &&
- maximum_width == CurveSystemManager.maximum_width &&
- use_backfacing == CurveSystemManager.use_backfacing &&
- triangle_method == CurveSystemManager.triangle_method &&
- resolution == CurveSystemManager.resolution && use_curves == CurveSystemManager.use_curves &&
- subdivisions == CurveSystemManager.subdivisions);
-}
-
-bool CurveSystemManager::modified_mesh(const CurveSystemManager &CurveSystemManager)
-{
- return !(
- primitive == CurveSystemManager.primitive && curve_shape == CurveSystemManager.curve_shape &&
- triangle_method == CurveSystemManager.triangle_method &&
- resolution == CurveSystemManager.resolution && use_curves == CurveSystemManager.use_curves);
-}
-
-void CurveSystemManager::tag_update(Scene * /*scene*/)
-{
- need_update = true;
-}
-
-void CurveSystemManager::tag_update_mesh()
-{
- need_mesh_update = true;
-}
CCL_NAMESPACE_END
diff --git a/intern/cycles/render/curves.h b/intern/cycles/render/curves.h
index 81e7b4ac88d..c52fcb9c882 100644
--- a/intern/cycles/render/curves.h
+++ b/intern/cycles/render/curves.h
@@ -20,6 +20,8 @@
#include "util/util_array.h"
#include "util/util_types.h"
+#include "render/hair.h"
+
CCL_NAMESPACE_BEGIN
class Device;
@@ -29,33 +31,6 @@ class Scene;
void curvebounds(float *lower, float *upper, float3 *p, int dim);
-typedef enum CurvePrimitiveType {
- CURVE_TRIANGLES = 0,
- CURVE_LINE_SEGMENTS = 1,
- CURVE_SEGMENTS = 2,
- CURVE_RIBBONS = 3,
-
- CURVE_NUM_PRIMITIVE_TYPES,
-} CurvePrimitiveType;
-
-typedef enum CurveShapeType {
- CURVE_RIBBON = 0,
- CURVE_THICK = 1,
-
- CURVE_NUM_SHAPE_TYPES,
-} CurveShapeType;
-
-typedef enum CurveTriangleMethod {
- CURVE_CAMERA_TRIANGLES,
- CURVE_TESSELATED_TRIANGLES
-} CurveTriangleMethod;
-
-typedef enum CurveLineMethod {
- CURVE_ACCURATE,
- CURVE_CORRECTED,
- CURVE_UNCORRECTED
-} CurveLineMethod;
-
class ParticleCurveData {
public:
@@ -75,46 +50,12 @@ class ParticleCurveData {
array<int> curve_keynum;
array<float> curve_length;
array<float2> curve_uv;
- array<float3> curve_vcol;
+ array<float4> curve_vcol;
array<float3> curvekey_co;
array<float> curvekey_time;
};
-/* HairSystem Manager */
-
-class CurveSystemManager {
- public:
- CurvePrimitiveType primitive;
- CurveShapeType curve_shape;
- CurveLineMethod line_method;
- CurveTriangleMethod triangle_method;
- int resolution;
- int subdivisions;
-
- float minimum_width;
- float maximum_width;
-
- bool use_curves;
- bool use_encasing;
- bool use_backfacing;
- bool use_tangent_normal_geometry;
-
- bool need_update;
- bool need_mesh_update;
-
- CurveSystemManager();
- ~CurveSystemManager();
-
- void device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress &progress);
- void device_free(Device *device, DeviceScene *dscene);
- bool modified(const CurveSystemManager &CurveSystemManager);
- bool modified_mesh(const CurveSystemManager &CurveSystemManager);
-
- void tag_update(Scene *scene);
- void tag_update_mesh();
-};
-
CCL_NAMESPACE_END
#endif /* __CURVES_H__ */
diff --git a/intern/cycles/render/denoising.cpp b/intern/cycles/render/denoising.cpp
index c4f21d9c771..76408ca4849 100644
--- a/intern/cycles/render/denoising.cpp
+++ b/intern/cycles/render/denoising.cpp
@@ -21,6 +21,7 @@
#include "util/util_foreach.h"
#include "util/util_map.h"
#include "util/util_system.h"
+#include "util/util_task.h"
#include "util/util_time.h"
#include <OpenImageIO/filesystem.h>
@@ -69,8 +70,8 @@ static void print_progress(int num, int total, int frame, int num_frames)
fflush(stdout);
}
-/* Splits in at its last dot, setting suffix to the part after the dot and in to the part before it.
- * Returns whether a dot was found. */
+/* Splits in at its last dot, setting suffix to the part after the dot and in to the part before
+ * it. Returns whether a dot was found. */
static bool split_last_dot(string &in, string &suffix)
{
size_t pos = in.rfind(".");
@@ -84,9 +85,8 @@ static bool split_last_dot(string &in, string &suffix)
/* Separate channel names as generated by Blender.
* If views is true:
- * Inputs are expected in the form RenderLayer.Pass.View.Channel, sets renderlayer to "RenderLayer.View"
- * Otherwise:
- * Inputs are expected in the form RenderLayer.Pass.Channel */
+ * Inputs are expected in the form RenderLayer.Pass.View.Channel, sets renderlayer to
+ * "RenderLayer.View" Otherwise: Inputs are expected in the form RenderLayer.Pass.Channel */
static bool parse_channel_name(
string name, string &renderlayer, string &pass, string &channel, bool multiview_channels)
{
@@ -271,42 +271,45 @@ bool DenoiseTask::acquire_tile(Device *device, Device *tile_device, RenderTile &
*
* However, since there is only one large memory, the denoised result has to be written to
* a different buffer to avoid having to copy an entire horizontal slice of the image. */
-void DenoiseTask::map_neighboring_tiles(RenderTile *tiles, Device *tile_device)
+void DenoiseTask::map_neighboring_tiles(RenderTileNeighbors &neighbors, Device *tile_device)
{
+ RenderTile &center_tile = neighbors.tiles[RenderTileNeighbors::CENTER];
+ RenderTile &target_tile = neighbors.target;
+
/* Fill tile information. */
- for (int i = 0; i < 9; i++) {
- if (i == 4) {
+ for (int i = 0; i < RenderTileNeighbors::SIZE; i++) {
+ if (i == RenderTileNeighbors::CENTER) {
continue;
}
+ RenderTile &tile = neighbors.tiles[i];
int dx = (i % 3) - 1;
int dy = (i / 3) - 1;
- tiles[i].x = clamp(tiles[4].x + dx * denoiser->tile_size.x, 0, image.width);
- tiles[i].w = clamp(tiles[4].x + (dx + 1) * denoiser->tile_size.x, 0, image.width) - tiles[i].x;
- tiles[i].y = clamp(tiles[4].y + dy * denoiser->tile_size.y, 0, image.height);
- tiles[i].h = clamp(tiles[4].y + (dy + 1) * denoiser->tile_size.y, 0, image.height) -
- tiles[i].y;
+ tile.x = clamp(center_tile.x + dx * denoiser->tile_size.x, 0, image.width);
+ tile.w = clamp(center_tile.x + (dx + 1) * denoiser->tile_size.x, 0, image.width) - tile.x;
+ tile.y = clamp(center_tile.y + dy * denoiser->tile_size.y, 0, image.height);
+ tile.h = clamp(center_tile.y + (dy + 1) * denoiser->tile_size.y, 0, image.height) - tile.y;
- tiles[i].buffer = tiles[4].buffer;
- tiles[i].offset = tiles[4].offset;
- tiles[i].stride = image.width;
+ tile.buffer = center_tile.buffer;
+ tile.offset = center_tile.offset;
+ tile.stride = image.width;
}
/* Allocate output buffer. */
device_vector<float> *output_mem = new device_vector<float>(
tile_device, "denoising_output", MEM_READ_WRITE);
- output_mem->alloc(OUTPUT_NUM_CHANNELS * tiles[4].w * tiles[4].h);
+ output_mem->alloc(OUTPUT_NUM_CHANNELS * center_tile.w * center_tile.h);
/* Fill output buffer with noisy image, assumed by kernel_filter_finalize
* when skipping denoising of some pixels. */
float *result = output_mem->data();
- float *in = &image.pixels[image.num_channels * (tiles[4].y * image.width + tiles[4].x)];
+ float *in = &image.pixels[image.num_channels * (center_tile.y * image.width + center_tile.x)];
const DenoiseImageLayer &layer = image.layers[current_layer];
const int *input_to_image_channel = layer.input_to_image_channel.data();
- for (int y = 0; y < tiles[4].h; y++) {
- for (int x = 0; x < tiles[4].w; x++, result += OUTPUT_NUM_CHANNELS) {
+ for (int y = 0; y < center_tile.h; y++) {
+ for (int x = 0; x < center_tile.w; x++, result += OUTPUT_NUM_CHANNELS) {
for (int i = 0; i < OUTPUT_NUM_CHANNELS; i++) {
result[i] = in[image.num_channels * x + input_to_image_channel[INPUT_NOISY_IMAGE + i]];
}
@@ -317,35 +320,38 @@ void DenoiseTask::map_neighboring_tiles(RenderTile *tiles, Device *tile_device)
output_mem->copy_to_device();
/* Fill output tile info. */
- tiles[9] = tiles[4];
- tiles[9].buffer = output_mem->device_pointer;
- tiles[9].stride = tiles[9].w;
- tiles[9].offset -= tiles[9].x + tiles[9].y * tiles[9].stride;
+ target_tile = center_tile;
+ target_tile.buffer = output_mem->device_pointer;
+ target_tile.stride = target_tile.w;
+ target_tile.offset -= target_tile.x + target_tile.y * target_tile.stride;
thread_scoped_lock output_lock(output_mutex);
- assert(output_pixels.count(tiles[4].tile_index) == 0);
- output_pixels[tiles[9].tile_index] = output_mem;
+ assert(output_pixels.count(center_tile.tile_index) == 0);
+ output_pixels[target_tile.tile_index] = output_mem;
}
-void DenoiseTask::unmap_neighboring_tiles(RenderTile *tiles)
+void DenoiseTask::unmap_neighboring_tiles(RenderTileNeighbors &neighbors)
{
+ RenderTile &center_tile = neighbors.tiles[RenderTileNeighbors::CENTER];
+ RenderTile &target_tile = neighbors.target;
+
thread_scoped_lock output_lock(output_mutex);
- assert(output_pixels.count(tiles[4].tile_index) == 1);
- device_vector<float> *output_mem = output_pixels[tiles[9].tile_index];
- output_pixels.erase(tiles[4].tile_index);
+ assert(output_pixels.count(center_tile.tile_index) == 1);
+ device_vector<float> *output_mem = output_pixels[target_tile.tile_index];
+ output_pixels.erase(center_tile.tile_index);
output_lock.unlock();
/* Copy denoised pixels from device. */
- output_mem->copy_from_device(0, OUTPUT_NUM_CHANNELS * tiles[9].w, tiles[9].h);
+ output_mem->copy_from_device(0, OUTPUT_NUM_CHANNELS * target_tile.w, target_tile.h);
float *result = output_mem->data();
- float *out = &image.pixels[image.num_channels * (tiles[9].y * image.width + tiles[9].x)];
+ float *out = &image.pixels[image.num_channels * (target_tile.y * image.width + target_tile.x)];
const DenoiseImageLayer &layer = image.layers[current_layer];
const int *output_to_image_channel = layer.output_to_image_channel.data();
- for (int y = 0; y < tiles[9].h; y++) {
- for (int x = 0; x < tiles[9].w; x++, result += OUTPUT_NUM_CHANNELS) {
+ for (int y = 0; y < target_tile.h; y++) {
+ for (int x = 0; x < target_tile.w; x++, result += OUTPUT_NUM_CHANNELS) {
for (int i = 0; i < OUTPUT_NUM_CHANNELS; i++) {
out[image.num_channels * x + output_to_image_channel[i]] = result[i];
}
@@ -378,8 +384,9 @@ void DenoiseTask::create_task(DeviceTask &task)
/* Denoising parameters. */
task.denoising = denoiser->params;
- task.denoising_do_filter = true;
- task.denoising_write_passes = false;
+ task.denoising.type = DENOISER_NLM;
+ task.denoising.use = true;
+ task.denoising.store_passes = false;
task.denoising_from_render = false;
task.denoising_frames.resize(neighbor_frames.size());
@@ -492,6 +499,19 @@ bool DenoiseTask::load_input_pixels(int layer)
}
}
+ /* Highlight compression */
+ data = buffer_data + 8;
+ for (int y = 0; y < h; y++) {
+ for (int x = 0; x < w; x++) {
+ int idx = INPUT_NUM_CHANNELS * (y * w + x);
+ float3 color = make_float3(data[idx], data[idx + 1], data[idx + 2]);
+ color = color_highlight_compress(color, NULL);
+ data[idx] = color.x;
+ data[idx + 1] = color.y;
+ data[idx + 2] = color.z;
+ }
+ }
+
buffer_data += frame_stride;
}
@@ -631,7 +651,8 @@ bool DenoiseImage::parse_channels(const ImageSpec &in_spec, string &error)
layer.name = name;
layer.samples = samples;
- /* If the sample value isn't set yet, check if there is a layer-specific one in the input file. */
+ /* If the sample value isn't set yet, check if there is a layer-specific one in the input file.
+ */
if (layer.samples < 1) {
string sample_string = in_spec.get_string_attribute("cycles." + name + ".samples", "");
if (sample_string != "") {
@@ -853,8 +874,10 @@ Denoiser::Denoiser(DeviceInfo &device_info)
TaskScheduler::init();
/* Initialize device. */
- DeviceRequestedFeatures req;
device = Device::create(device_info, stats, profiler, true);
+
+ DeviceRequestedFeatures req;
+ req.use_denoising = true;
device->load_kernels(req);
}
diff --git a/intern/cycles/render/denoising.h b/intern/cycles/render/denoising.h
index dcb842a4603..c1b4d0a5596 100644
--- a/intern/cycles/render/denoising.h
+++ b/intern/cycles/render/denoising.h
@@ -23,8 +23,8 @@
#include "render/buffers.h"
#include "util/util_string.h"
-#include "util/util_vector.h"
#include "util/util_unique_ptr.h"
+#include "util/util_vector.h"
#include <OpenImageIO/imageio.h>
@@ -87,14 +87,17 @@ struct DenoiseImageLayer {
/* input_to_image_channel of the secondary frames, if any are used. */
vector<vector<int>> neighbor_input_to_image_channel;
- /* Write i-th channel of the processing output to output_to_image_channel[i]-th channel of the file. */
+ /* Write i-th channel of the processing output to output_to_image_channel[i]-th channel of the
+ * file. */
vector<int> output_to_image_channel;
- /* Detect whether this layer contains a full set of channels and set up the offsets accordingly. */
+ /* Detect whether this layer contains a full set of channels and set up the offsets accordingly.
+ */
bool detect_denoising_channels();
/* Map the channels of a secondary frame to the channels that are required for processing,
- * fill neighbor_input_to_image_channel if all are present or return false if a channel are missing. */
+ * fill neighbor_input_to_image_channel if all are present or return false if a channel are
+ * missing. */
bool match_channels(int neighbor,
const std::vector<string> &channelnames,
const std::vector<string> &neighbor_channelnames);
@@ -125,7 +128,8 @@ class DenoiseImage {
void free();
- /* Open the input image, parse its channels, open the output image and allocate the output buffer. */
+ /* Open the input image, parse its channels, open the output image and allocate the output
+ * buffer. */
bool load(const string &in_filepath, string &error);
/* Load neighboring frames. */
@@ -139,7 +143,8 @@ class DenoiseImage {
bool save_output(const string &out_filepath, string &error);
protected:
- /* Parse input file channels, separate them into DenoiseImageLayers, detect DenoiseImageLayers with full channel sets,
+ /* Parse input file channels, separate them into DenoiseImageLayers,
+ * detect DenoiseImageLayers with full channel sets,
* fill layers and set up the output channels and passthrough map. */
bool parse_channels(const ImageSpec &in_spec, string &error);
@@ -191,8 +196,8 @@ class DenoiseTask {
/* Device task callbacks */
bool acquire_tile(Device *device, Device *tile_device, RenderTile &tile);
- void map_neighboring_tiles(RenderTile *tiles, Device *tile_device);
- void unmap_neighboring_tiles(RenderTile *tiles);
+ void map_neighboring_tiles(RenderTileNeighbors &neighbors, Device *tile_device);
+ void unmap_neighboring_tiles(RenderTileNeighbors &neighbors);
void release_tile();
bool get_cancel();
};
diff --git a/intern/cycles/render/film.cpp b/intern/cycles/render/film.cpp
index d6c44b66117..d7cbf4a3581 100644
--- a/intern/cycles/render/film.cpp
+++ b/intern/cycles/render/film.cpp
@@ -14,9 +14,9 @@
* limitations under the License.
*/
-#include "render/camera.h"
-#include "device/device.h"
#include "render/film.h"
+#include "device/device.h"
+#include "render/camera.h"
#include "render/integrator.h"
#include "render/mesh.h"
#include "render/scene.h"
@@ -41,7 +41,34 @@ static bool compare_pass_order(const Pass &a, const Pass &b)
void Pass::add(PassType type, vector<Pass> &passes, const char *name)
{
for (size_t i = 0; i < passes.size(); i++) {
- if (passes[i].type == type && (name ? (passes[i].name == name) : passes[i].name.empty())) {
+ if (passes[i].type != type) {
+ continue;
+ }
+
+ /* An empty name is used as a placeholder to signal that any pass of
+ * that type is fine (because the content always is the same).
+ * This is important to support divide_type: If the pass that has a
+ * divide_type is added first, a pass for divide_type with an empty
+ * name will be added. Then, if a matching pass with a name is later
+ * requested, the existing placeholder will be renamed to that.
+ * If the divide_type is explicitly allocated with a name first and
+ * then again as part of another pass, the second one will just be
+ * skipped because that type already exists. */
+
+ /* If no name is specified, any pass of the correct type will match. */
+ if (name == NULL) {
+ return;
+ }
+
+ /* If we already have a placeholder pass, rename that one. */
+ if (passes[i].name.empty()) {
+ passes[i].name = name;
+ return;
+ }
+
+ /* If neither existing nor requested pass have placeholder name, they
+ * must match. */
+ if (name == passes[i].name) {
return;
}
}
@@ -128,7 +155,6 @@ void Pass::add(PassType type, vector<Pass> &passes, const char *name)
case PASS_DIFFUSE_COLOR:
case PASS_GLOSSY_COLOR:
case PASS_TRANSMISSION_COLOR:
- case PASS_SUBSURFACE_COLOR:
pass.components = 4;
break;
case PASS_DIFFUSE_DIRECT:
@@ -149,12 +175,6 @@ void Pass::add(PassType type, vector<Pass> &passes, const char *name)
pass.exposure = true;
pass.divide_type = PASS_TRANSMISSION_COLOR;
break;
- case PASS_SUBSURFACE_DIRECT:
- case PASS_SUBSURFACE_INDIRECT:
- pass.components = 4;
- pass.exposure = true;
- pass.divide_type = PASS_SUBSURFACE_COLOR;
- break;
case PASS_VOLUME_DIRECT:
case PASS_VOLUME_INDIRECT:
pass.components = 4;
@@ -163,6 +183,23 @@ void Pass::add(PassType type, vector<Pass> &passes, const char *name)
case PASS_CRYPTOMATTE:
pass.components = 4;
break;
+ case PASS_ADAPTIVE_AUX_BUFFER:
+ pass.components = 4;
+ break;
+ case PASS_SAMPLE_COUNT:
+ pass.components = 1;
+ pass.exposure = false;
+ break;
+ case PASS_AOV_COLOR:
+ pass.components = 4;
+ break;
+ case PASS_AOV_VALUE:
+ pass.components = 1;
+ break;
+ case PASS_BAKE_PRIMITIVE:
+ case PASS_BAKE_DIFFERENTIAL:
+ pass.components = 4;
+ break;
default:
assert(false);
break;
@@ -170,9 +207,10 @@ void Pass::add(PassType type, vector<Pass> &passes, const char *name)
passes.push_back(pass);
- /* order from by components, to ensure alignment so passes with size 4
- * come first and then passes with size 1 */
- sort(&passes[0], &passes[0] + passes.size(), compare_pass_order);
+ /* Order from by components, to ensure alignment so passes with size 4
+ * come first and then passes with size 1. Note this must use stable sort
+ * so cryptomatte passes remain in the right order. */
+ stable_sort(&passes[0], &passes[0] + passes.size(), compare_pass_order);
if (pass.divide_type != PASS_NONE)
Pass::add(pass.divide_type, passes);
@@ -267,7 +305,7 @@ NODE_DEFINE(Film)
NodeType *type = NodeType::add("film", create);
SOCKET_FLOAT(exposure, "Exposure", 0.8f);
- SOCKET_FLOAT(pass_alpha_threshold, "Pass Alpha Threshold", 0.5f);
+ SOCKET_FLOAT(pass_alpha_threshold, "Pass Alpha Threshold", 0.0f);
static NodeEnum filter_enum;
filter_enum.insert("box", FILTER_BOX);
@@ -281,12 +319,11 @@ NODE_DEFINE(Film)
SOCKET_FLOAT(mist_depth, "Mist Depth", 100.0f);
SOCKET_FLOAT(mist_falloff, "Mist Falloff", 1.0f);
- SOCKET_BOOLEAN(use_sample_clamp, "Use Sample Clamp", false);
-
SOCKET_BOOLEAN(denoising_data_pass, "Generate Denoising Data Pass", false);
SOCKET_BOOLEAN(denoising_clean_pass, "Generate Denoising Clean Pass", false);
SOCKET_BOOLEAN(denoising_prefiltered_pass, "Generate Denoising Prefiltered Pass", false);
SOCKET_INT(denoising_flags, "Denoising Flags", 0);
+ SOCKET_BOOLEAN(use_adaptive_sampling, "Use Adaptive Sampling", false);
return type;
}
@@ -298,6 +335,7 @@ Film::Film() : Node(node_type)
use_light_visibility = false;
filter_table_offset = TABLE_OFFSET_INVALID;
cryptomatte_passes = CRYPT_NONE;
+ display_pass = PASS_COMBINED;
need_update = true;
}
@@ -318,9 +356,18 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene)
/* update __data */
kfilm->exposure = exposure;
kfilm->pass_flag = 0;
+
+ kfilm->display_pass_stride = -1;
+ kfilm->display_pass_components = 0;
+ kfilm->display_divide_pass_stride = -1;
+ kfilm->use_display_exposure = false;
+ kfilm->use_display_pass_alpha = (display_pass == PASS_COMBINED);
+
kfilm->light_pass_flag = 0;
kfilm->pass_stride = 0;
- kfilm->use_light_pass = use_light_visibility || use_sample_clamp;
+ kfilm->use_light_pass = use_light_visibility;
+ kfilm->pass_aov_value_num = 0;
+ kfilm->pass_aov_color_num = 0;
bool have_cryptomatte = false;
@@ -343,11 +390,13 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene)
if (pass.type <= PASS_CATEGORY_MAIN_END) {
kfilm->pass_flag |= pass_flag;
}
- else {
- assert(pass.type <= PASS_CATEGORY_LIGHT_END);
+ else if (pass.type <= PASS_CATEGORY_LIGHT_END) {
kfilm->use_light_pass = 1;
kfilm->light_pass_flag |= pass_flag;
}
+ else {
+ assert(pass.type <= PASS_CATEGORY_BAKE_END);
+ }
switch (pass.type) {
case PASS_COMBINED:
@@ -403,9 +452,6 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene)
case PASS_TRANSMISSION_COLOR:
kfilm->pass_transmission_color = kfilm->pass_stride;
break;
- case PASS_SUBSURFACE_COLOR:
- kfilm->pass_subsurface_color = kfilm->pass_stride;
- break;
case PASS_DIFFUSE_INDIRECT:
kfilm->pass_diffuse_indirect = kfilm->pass_stride;
break;
@@ -415,9 +461,6 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene)
case PASS_TRANSMISSION_INDIRECT:
kfilm->pass_transmission_indirect = kfilm->pass_stride;
break;
- case PASS_SUBSURFACE_INDIRECT:
- kfilm->pass_subsurface_indirect = kfilm->pass_stride;
- break;
case PASS_VOLUME_INDIRECT:
kfilm->pass_volume_indirect = kfilm->pass_stride;
break;
@@ -430,13 +473,17 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene)
case PASS_TRANSMISSION_DIRECT:
kfilm->pass_transmission_direct = kfilm->pass_stride;
break;
- case PASS_SUBSURFACE_DIRECT:
- kfilm->pass_subsurface_direct = kfilm->pass_stride;
- break;
case PASS_VOLUME_DIRECT:
kfilm->pass_volume_direct = kfilm->pass_stride;
break;
+ case PASS_BAKE_PRIMITIVE:
+ kfilm->pass_bake_primitive = kfilm->pass_stride;
+ break;
+ case PASS_BAKE_DIFFERENTIAL:
+ kfilm->pass_bake_differential = kfilm->pass_stride;
+ break;
+
#ifdef WITH_CYCLES_DEBUG
case PASS_BVH_TRAVERSED_NODES:
kfilm->pass_bvh_traversed_nodes = kfilm->pass_stride;
@@ -459,11 +506,39 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene)
kfilm->pass_stride;
have_cryptomatte = true;
break;
+ case PASS_ADAPTIVE_AUX_BUFFER:
+ kfilm->pass_adaptive_aux_buffer = kfilm->pass_stride;
+ break;
+ case PASS_SAMPLE_COUNT:
+ kfilm->pass_sample_count = kfilm->pass_stride;
+ break;
+ case PASS_AOV_COLOR:
+ if (kfilm->pass_aov_color_num == 0) {
+ kfilm->pass_aov_color = kfilm->pass_stride;
+ }
+ kfilm->pass_aov_color_num++;
+ break;
+ case PASS_AOV_VALUE:
+ if (kfilm->pass_aov_value_num == 0) {
+ kfilm->pass_aov_value = kfilm->pass_stride;
+ }
+ kfilm->pass_aov_value_num++;
+ break;
default:
assert(false);
break;
}
+ if (pass.type == display_pass) {
+ kfilm->display_pass_stride = kfilm->pass_stride;
+ kfilm->display_pass_components = pass.components;
+ kfilm->use_display_exposure = pass.exposure && (kfilm->exposure != 1.0f);
+ }
+ else if (pass.type == PASS_DIFFUSE_COLOR || pass.type == PASS_TRANSMISSION_COLOR ||
+ pass.type == PASS_GLOSSY_COLOR) {
+ kfilm->display_divide_pass_stride = kfilm->pass_stride;
+ }
+
kfilm->pass_stride += pass.components;
}
@@ -485,7 +560,18 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene)
}
kfilm->pass_stride = align_up(kfilm->pass_stride, 4);
- kfilm->pass_alpha_threshold = pass_alpha_threshold;
+
+ /* When displaying the normal/uv pass in the viewport we need to disable
+ * transparency.
+ *
+ * We also don't need to perform light accumulations. Later we want to optimize this to suppress
+ * light calculations. */
+ if (display_pass == PASS_NORMAL || display_pass == PASS_UV) {
+ kfilm->use_light_pass = 0;
+ }
+ else {
+ kfilm->pass_alpha_threshold = pass_alpha_threshold;
+ }
/* update filter table */
vector<float> table = filter_table(filter_type, filter_width);
@@ -518,18 +604,24 @@ bool Film::modified(const Film &film)
return !Node::equals(film) || !Pass::equals(passes, film.passes);
}
-void Film::tag_passes_update(Scene *scene, const vector<Pass> &passes_)
+void Film::tag_passes_update(Scene *scene, const vector<Pass> &passes_, bool update_passes)
{
if (Pass::contains(passes, PASS_UV) != Pass::contains(passes_, PASS_UV)) {
- scene->mesh_manager->tag_update(scene);
+ scene->geometry_manager->tag_update(scene);
foreach (Shader *shader, scene->shaders)
- shader->need_update_mesh = true;
+ shader->need_update_geometry = true;
+ }
+ else if (Pass::contains(passes, PASS_MOTION) != Pass::contains(passes_, PASS_MOTION)) {
+ scene->geometry_manager->tag_update(scene);
+ }
+ else if (Pass::contains(passes, PASS_AO) != Pass::contains(passes_, PASS_AO)) {
+ scene->integrator->tag_update(scene);
}
- else if (Pass::contains(passes, PASS_MOTION) != Pass::contains(passes_, PASS_MOTION))
- scene->mesh_manager->tag_update(scene);
- passes = passes_;
+ if (update_passes) {
+ passes = passes_;
+ }
}
void Film::tag_update(Scene * /*scene*/)
@@ -537,4 +629,27 @@ void Film::tag_update(Scene * /*scene*/)
need_update = true;
}
+int Film::get_aov_offset(string name, bool &is_color)
+{
+ int num_color = 0, num_value = 0;
+ foreach (const Pass &pass, passes) {
+ if (pass.type == PASS_AOV_COLOR) {
+ num_color++;
+ }
+ else if (pass.type == PASS_AOV_VALUE) {
+ num_value++;
+ }
+ else {
+ continue;
+ }
+
+ if (pass.name == name) {
+ is_color = (pass.type == PASS_AOV_COLOR);
+ return (is_color ? num_color : num_value) - 1;
+ }
+ }
+
+ return -1;
+}
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/render/film.h b/intern/cycles/render/film.h
index 1cfa7c3b77d..aae8fb404b0 100644
--- a/intern/cycles/render/film.h
+++ b/intern/cycles/render/film.h
@@ -64,6 +64,7 @@ class Film : public Node {
int denoising_flags;
float pass_alpha_threshold;
+ PassType display_pass;
int pass_stride;
int denoising_data_offset;
int denoising_clean_offset;
@@ -77,10 +78,11 @@ class Film : public Node {
float mist_falloff;
bool use_light_visibility;
- bool use_sample_clamp;
CryptomatteType cryptomatte_passes;
int cryptomatte_depth;
+ bool use_adaptive_sampling;
+
bool need_update;
Film();
@@ -90,8 +92,10 @@ class Film : public Node {
void device_free(Device *device, DeviceScene *dscene, Scene *scene);
bool modified(const Film &film);
- void tag_passes_update(Scene *scene, const vector<Pass> &passes_);
+ void tag_passes_update(Scene *scene, const vector<Pass> &passes_, bool update_passes = true);
void tag_update(Scene *scene);
+
+ int get_aov_offset(string name, bool &is_color);
};
CCL_NAMESPACE_END
diff --git a/intern/cycles/render/geometry.cpp b/intern/cycles/render/geometry.cpp
new file mode 100644
index 00000000000..3d1b6e1d865
--- /dev/null
+++ b/intern/cycles/render/geometry.cpp
@@ -0,0 +1,1473 @@
+/*
+ * Copyright 2011-2020 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "bvh/bvh.h"
+#include "bvh/bvh_build.h"
+#include "bvh/bvh_embree.h"
+
+#include "device/device.h"
+
+#include "render/attribute.h"
+#include "render/camera.h"
+#include "render/geometry.h"
+#include "render/hair.h"
+#include "render/light.h"
+#include "render/mesh.h"
+#include "render/nodes.h"
+#include "render/object.h"
+#include "render/scene.h"
+#include "render/shader.h"
+#include "render/stats.h"
+
+#include "subd/subd_patch_table.h"
+#include "subd/subd_split.h"
+
+#include "kernel/osl/osl_globals.h"
+
+#include "util/util_foreach.h"
+#include "util/util_logging.h"
+#include "util/util_progress.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Geometry */
+
+NODE_ABSTRACT_DEFINE(Geometry)
+{
+ NodeType *type = NodeType::add("geometry_base", NULL);
+
+ SOCKET_UINT(motion_steps, "Motion Steps", 3);
+ SOCKET_BOOLEAN(use_motion_blur, "Use Motion Blur", false);
+
+ return type;
+}
+
+Geometry::Geometry(const NodeType *node_type, const Type type)
+ : Node(node_type), type(type), attributes(this, ATTR_PRIM_GEOMETRY)
+{
+ need_update = true;
+ need_update_rebuild = false;
+
+ transform_applied = false;
+ transform_negative_scaled = false;
+ transform_normal = transform_identity();
+ bounds = BoundBox::empty;
+
+ has_volume = false;
+ has_surface_bssrdf = false;
+
+ bvh = NULL;
+ attr_map_offset = 0;
+ optix_prim_offset = 0;
+ prim_offset = 0;
+}
+
+Geometry::~Geometry()
+{
+ delete bvh;
+}
+
+void Geometry::clear()
+{
+ used_shaders.clear();
+ transform_applied = false;
+ transform_negative_scaled = false;
+ transform_normal = transform_identity();
+}
+
+bool Geometry::need_attribute(Scene *scene, AttributeStandard std)
+{
+ if (std == ATTR_STD_NONE)
+ return false;
+
+ if (scene->need_global_attribute(std))
+ return true;
+
+ foreach (Shader *shader, used_shaders)
+ if (shader->attributes.find(std))
+ return true;
+
+ return false;
+}
+
+bool Geometry::need_attribute(Scene * /*scene*/, ustring name)
+{
+ if (name == ustring())
+ return false;
+
+ foreach (Shader *shader, used_shaders)
+ if (shader->attributes.find(name))
+ return true;
+
+ return false;
+}
+
+float Geometry::motion_time(int step) const
+{
+ return (motion_steps > 1) ? 2.0f * step / (motion_steps - 1) - 1.0f : 0.0f;
+}
+
+int Geometry::motion_step(float time) const
+{
+ if (motion_steps > 1) {
+ int attr_step = 0;
+
+ for (int step = 0; step < motion_steps; step++) {
+ float step_time = motion_time(step);
+ if (step_time == time) {
+ return attr_step;
+ }
+
+ /* Center step is stored in a separate attribute. */
+ if (step != motion_steps / 2) {
+ attr_step++;
+ }
+ }
+ }
+
+ return -1;
+}
+
+bool Geometry::need_build_bvh(BVHLayout layout) const
+{
+ return !transform_applied || has_surface_bssrdf || layout == BVH_LAYOUT_OPTIX;
+}
+
+bool Geometry::is_instanced() const
+{
+ /* Currently we treat subsurface objects as instanced.
+ *
+ * While it might be not very optimal for ray traversal, it avoids having
+ * duplicated BVH in the memory, saving quite some space.
+ */
+ return !transform_applied || has_surface_bssrdf;
+}
+
+bool Geometry::has_true_displacement() const
+{
+ foreach (Shader *shader, used_shaders) {
+ if (shader->has_displacement && shader->displacement_method != DISPLACE_BUMP) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+void Geometry::compute_bvh(
+ Device *device, DeviceScene *dscene, SceneParams *params, Progress *progress, int n, int total)
+{
+ if (progress->get_cancel())
+ return;
+
+ compute_bounds();
+
+ const BVHLayout bvh_layout = BVHParams::best_bvh_layout(params->bvh_layout,
+ device->get_bvh_layout_mask());
+ if (need_build_bvh(bvh_layout)) {
+ string msg = "Updating Geometry BVH ";
+ if (name.empty())
+ msg += string_printf("%u/%u", (uint)(n + 1), (uint)total);
+ else
+ msg += string_printf("%s %u/%u", name.c_str(), (uint)(n + 1), (uint)total);
+
+ Object object;
+ object.geometry = this;
+
+ vector<Geometry *> geometry;
+ geometry.push_back(this);
+ vector<Object *> objects;
+ objects.push_back(&object);
+
+ if (bvh && !need_update_rebuild) {
+ progress->set_status(msg, "Refitting BVH");
+
+ bvh->geometry = geometry;
+ bvh->objects = objects;
+
+ bvh->refit(*progress);
+ }
+ else {
+ progress->set_status(msg, "Building BVH");
+
+ BVHParams bparams;
+ bparams.use_spatial_split = params->use_bvh_spatial_split;
+ bparams.bvh_layout = bvh_layout;
+ bparams.use_unaligned_nodes = dscene->data.bvh.have_curves &&
+ params->use_bvh_unaligned_nodes;
+ bparams.num_motion_triangle_steps = params->num_bvh_time_steps;
+ bparams.num_motion_curve_steps = params->num_bvh_time_steps;
+ bparams.bvh_type = params->bvh_type;
+ bparams.curve_subdivisions = params->curve_subdivisions();
+
+ delete bvh;
+ bvh = BVH::create(bparams, geometry, objects);
+ MEM_GUARDED_CALL(progress, bvh->build, *progress);
+ }
+ }
+
+ need_update = false;
+ need_update_rebuild = false;
+}
+
+bool Geometry::has_motion_blur() const
+{
+ return (use_motion_blur && attributes.find(ATTR_STD_MOTION_VERTEX_POSITION));
+}
+
+bool Geometry::has_voxel_attributes() const
+{
+ foreach (const Attribute &attr, attributes.attributes) {
+ if (attr.element == ATTR_ELEMENT_VOXEL) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+void Geometry::tag_update(Scene *scene, bool rebuild)
+{
+ need_update = true;
+
+ if (rebuild) {
+ need_update_rebuild = true;
+ scene->light_manager->need_update = true;
+ }
+ else {
+ foreach (Shader *shader, used_shaders)
+ if (shader->has_surface_emission)
+ scene->light_manager->need_update = true;
+ }
+
+ scene->geometry_manager->need_update = true;
+ scene->object_manager->need_update = true;
+}
+
+/* Geometry Manager */
+
+GeometryManager::GeometryManager()
+{
+ need_update = true;
+ need_flags_update = true;
+}
+
+GeometryManager::~GeometryManager()
+{
+}
+
+void GeometryManager::update_osl_attributes(Device *device,
+ Scene *scene,
+ vector<AttributeRequestSet> &geom_attributes)
+{
+#ifdef WITH_OSL
+ /* for OSL, a hash map is used to lookup the attribute by name. */
+ OSLGlobals *og = (OSLGlobals *)device->osl_memory();
+
+ og->object_name_map.clear();
+ og->attribute_map.clear();
+ og->object_names.clear();
+
+ og->attribute_map.resize(scene->objects.size() * ATTR_PRIM_TYPES);
+
+ for (size_t i = 0; i < scene->objects.size(); i++) {
+ /* set object name to object index map */
+ Object *object = scene->objects[i];
+ og->object_name_map[object->name] = i;
+ og->object_names.push_back(object->name);
+
+ /* set object attributes */
+ foreach (ParamValue &attr, object->attributes) {
+ OSLGlobals::Attribute osl_attr;
+
+ osl_attr.type = attr.type();
+ osl_attr.desc.element = ATTR_ELEMENT_OBJECT;
+ osl_attr.value = attr;
+ osl_attr.desc.offset = 0;
+ osl_attr.desc.flags = 0;
+
+ og->attribute_map[i * ATTR_PRIM_TYPES + ATTR_PRIM_GEOMETRY][attr.name()] = osl_attr;
+ og->attribute_map[i * ATTR_PRIM_TYPES + ATTR_PRIM_SUBD][attr.name()] = osl_attr;
+ }
+
+ /* find geometry attributes */
+ size_t j;
+
+ for (j = 0; j < scene->geometry.size(); j++)
+ if (scene->geometry[j] == object->geometry)
+ break;
+
+ AttributeRequestSet &attributes = geom_attributes[j];
+
+ /* set object attributes */
+ foreach (AttributeRequest &req, attributes.requests) {
+ OSLGlobals::Attribute osl_attr;
+
+ if (req.desc.element != ATTR_ELEMENT_NONE) {
+ osl_attr.desc = req.desc;
+
+ if (req.type == TypeDesc::TypeFloat)
+ osl_attr.type = TypeDesc::TypeFloat;
+ else if (req.type == TypeDesc::TypeMatrix)
+ osl_attr.type = TypeDesc::TypeMatrix;
+ else if (req.type == TypeFloat2)
+ osl_attr.type = TypeFloat2;
+ else if (req.type == TypeRGBA)
+ osl_attr.type = TypeRGBA;
+ else
+ osl_attr.type = TypeDesc::TypeColor;
+
+ if (req.std != ATTR_STD_NONE) {
+ /* if standard attribute, add lookup by geom: name convention */
+ ustring stdname(string("geom:") + string(Attribute::standard_name(req.std)));
+ og->attribute_map[i * ATTR_PRIM_TYPES + ATTR_PRIM_GEOMETRY][stdname] = osl_attr;
+ }
+ else if (req.name != ustring()) {
+ /* add lookup by geometry attribute name */
+ og->attribute_map[i * ATTR_PRIM_TYPES + ATTR_PRIM_GEOMETRY][req.name] = osl_attr;
+ }
+ }
+
+ if (req.subd_desc.element != ATTR_ELEMENT_NONE) {
+ osl_attr.desc = req.subd_desc;
+
+ if (req.subd_type == TypeDesc::TypeFloat)
+ osl_attr.type = TypeDesc::TypeFloat;
+ else if (req.subd_type == TypeDesc::TypeMatrix)
+ osl_attr.type = TypeDesc::TypeMatrix;
+ else if (req.subd_type == TypeFloat2)
+ osl_attr.type = TypeFloat2;
+ else if (req.subd_type == TypeRGBA)
+ osl_attr.type = TypeRGBA;
+ else
+ osl_attr.type = TypeDesc::TypeColor;
+
+ if (req.std != ATTR_STD_NONE) {
+ /* if standard attribute, add lookup by geom: name convention */
+ ustring stdname(string("geom:") + string(Attribute::standard_name(req.std)));
+ og->attribute_map[i * ATTR_PRIM_TYPES + ATTR_PRIM_SUBD][stdname] = osl_attr;
+ }
+ else if (req.name != ustring()) {
+ /* add lookup by geometry attribute name */
+ og->attribute_map[i * ATTR_PRIM_TYPES + ATTR_PRIM_SUBD][req.name] = osl_attr;
+ }
+ }
+ }
+ }
+#else
+ (void)device;
+ (void)scene;
+ (void)geom_attributes;
+#endif
+}
+
+void GeometryManager::update_svm_attributes(Device *,
+ DeviceScene *dscene,
+ Scene *scene,
+ vector<AttributeRequestSet> &geom_attributes)
+{
+ /* for SVM, the attributes_map table is used to lookup the offset of an
+ * attribute, based on a unique shader attribute id. */
+
+ /* compute array stride */
+ int attr_map_size = 0;
+
+ for (size_t i = 0; i < scene->geometry.size(); i++) {
+ Geometry *geom = scene->geometry[i];
+ geom->attr_map_offset = attr_map_size;
+ attr_map_size += (geom_attributes[i].size() + 1) * ATTR_PRIM_TYPES;
+ }
+
+ if (attr_map_size == 0)
+ return;
+
+ /* create attribute map */
+ uint4 *attr_map = dscene->attributes_map.alloc(attr_map_size);
+ memset(attr_map, 0, dscene->attributes_map.size() * sizeof(uint));
+
+ for (size_t i = 0; i < scene->geometry.size(); i++) {
+ Geometry *geom = scene->geometry[i];
+ AttributeRequestSet &attributes = geom_attributes[i];
+
+ /* set object attributes */
+ int index = geom->attr_map_offset;
+
+ foreach (AttributeRequest &req, attributes.requests) {
+ uint id;
+
+ if (req.std == ATTR_STD_NONE)
+ id = scene->shader_manager->get_attribute_id(req.name);
+ else
+ id = scene->shader_manager->get_attribute_id(req.std);
+
+ attr_map[index].x = id;
+ attr_map[index].y = req.desc.element;
+ attr_map[index].z = as_uint(req.desc.offset);
+
+ if (req.type == TypeDesc::TypeFloat)
+ attr_map[index].w = NODE_ATTR_FLOAT;
+ else if (req.type == TypeDesc::TypeMatrix)
+ attr_map[index].w = NODE_ATTR_MATRIX;
+ else if (req.type == TypeFloat2)
+ attr_map[index].w = NODE_ATTR_FLOAT2;
+ else if (req.type == TypeRGBA)
+ attr_map[index].w = NODE_ATTR_RGBA;
+ else
+ attr_map[index].w = NODE_ATTR_FLOAT3;
+
+ attr_map[index].w |= req.desc.flags << 8;
+
+ index++;
+
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ if (mesh->subd_faces.size()) {
+ attr_map[index].x = id;
+ attr_map[index].y = req.subd_desc.element;
+ attr_map[index].z = as_uint(req.subd_desc.offset);
+
+ if (req.subd_type == TypeDesc::TypeFloat)
+ attr_map[index].w = NODE_ATTR_FLOAT;
+ else if (req.subd_type == TypeDesc::TypeMatrix)
+ attr_map[index].w = NODE_ATTR_MATRIX;
+ else if (req.subd_type == TypeFloat2)
+ attr_map[index].w = NODE_ATTR_FLOAT2;
+ else if (req.subd_type == TypeRGBA)
+ attr_map[index].w = NODE_ATTR_RGBA;
+ else
+ attr_map[index].w = NODE_ATTR_FLOAT3;
+
+ attr_map[index].w |= req.subd_desc.flags << 8;
+ }
+ }
+
+ index++;
+ }
+
+ /* terminator */
+ for (int j = 0; j < ATTR_PRIM_TYPES; j++) {
+ attr_map[index].x = ATTR_STD_NONE;
+ attr_map[index].y = 0;
+ attr_map[index].z = 0;
+ attr_map[index].w = 0;
+
+ index++;
+ }
+ }
+
+ /* copy to device */
+ dscene->attributes_map.copy_to_device();
+}
+
+static void update_attribute_element_size(Geometry *geom,
+ Attribute *mattr,
+ AttributePrimitive prim,
+ size_t *attr_float_size,
+ size_t *attr_float2_size,
+ size_t *attr_float3_size,
+ size_t *attr_uchar4_size)
+{
+ if (mattr) {
+ size_t size = mattr->element_size(geom, prim);
+
+ if (mattr->element == ATTR_ELEMENT_VOXEL) {
+ /* pass */
+ }
+ else if (mattr->element == ATTR_ELEMENT_CORNER_BYTE) {
+ *attr_uchar4_size += size;
+ }
+ else if (mattr->type == TypeDesc::TypeFloat) {
+ *attr_float_size += size;
+ }
+ else if (mattr->type == TypeFloat2) {
+ *attr_float2_size += size;
+ }
+ else if (mattr->type == TypeDesc::TypeMatrix) {
+ *attr_float3_size += size * 4;
+ }
+ else {
+ *attr_float3_size += size;
+ }
+ }
+}
+
+static void update_attribute_element_offset(Geometry *geom,
+ device_vector<float> &attr_float,
+ size_t &attr_float_offset,
+ device_vector<float2> &attr_float2,
+ size_t &attr_float2_offset,
+ device_vector<float4> &attr_float3,
+ size_t &attr_float3_offset,
+ device_vector<uchar4> &attr_uchar4,
+ size_t &attr_uchar4_offset,
+ Attribute *mattr,
+ AttributePrimitive prim,
+ TypeDesc &type,
+ AttributeDescriptor &desc)
+{
+ if (mattr) {
+ /* store element and type */
+ desc.element = mattr->element;
+ desc.flags = mattr->flags;
+ type = mattr->type;
+
+ /* store attribute data in arrays */
+ size_t size = mattr->element_size(geom, prim);
+
+ AttributeElement &element = desc.element;
+ int &offset = desc.offset;
+
+ if (mattr->element == ATTR_ELEMENT_VOXEL) {
+ /* store slot in offset value */
+ ImageHandle &handle = mattr->data_voxel();
+ offset = handle.svm_slot();
+ }
+ else if (mattr->element == ATTR_ELEMENT_CORNER_BYTE) {
+ uchar4 *data = mattr->data_uchar4();
+ offset = attr_uchar4_offset;
+
+ assert(attr_uchar4.size() >= offset + size);
+ for (size_t k = 0; k < size; k++) {
+ attr_uchar4[offset + k] = data[k];
+ }
+ attr_uchar4_offset += size;
+ }
+ else if (mattr->type == TypeDesc::TypeFloat) {
+ float *data = mattr->data_float();
+ offset = attr_float_offset;
+
+ assert(attr_float.size() >= offset + size);
+ for (size_t k = 0; k < size; k++) {
+ attr_float[offset + k] = data[k];
+ }
+ attr_float_offset += size;
+ }
+ else if (mattr->type == TypeFloat2) {
+ float2 *data = mattr->data_float2();
+ offset = attr_float2_offset;
+
+ assert(attr_float2.size() >= offset + size);
+ for (size_t k = 0; k < size; k++) {
+ attr_float2[offset + k] = data[k];
+ }
+ attr_float2_offset += size;
+ }
+ else if (mattr->type == TypeDesc::TypeMatrix) {
+ Transform *tfm = mattr->data_transform();
+ offset = attr_float3_offset;
+
+ assert(attr_float3.size() >= offset + size * 3);
+ for (size_t k = 0; k < size * 3; k++) {
+ attr_float3[offset + k] = (&tfm->x)[k];
+ }
+ attr_float3_offset += size * 3;
+ }
+ else {
+ float4 *data = mattr->data_float4();
+ offset = attr_float3_offset;
+
+ assert(attr_float3.size() >= offset + size);
+ for (size_t k = 0; k < size; k++) {
+ attr_float3[offset + k] = data[k];
+ }
+ attr_float3_offset += size;
+ }
+
+ /* mesh vertex/curve index is global, not per object, so we sneak
+ * a correction for that in here */
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ if (mesh->subdivision_type == Mesh::SUBDIVISION_CATMULL_CLARK &&
+ desc.flags & ATTR_SUBDIVIDED) {
+ /* indices for subdivided attributes are retrieved
+ * from patch table so no need for correction here*/
+ }
+ else if (element == ATTR_ELEMENT_VERTEX)
+ offset -= mesh->vert_offset;
+ else if (element == ATTR_ELEMENT_VERTEX_MOTION)
+ offset -= mesh->vert_offset;
+ else if (element == ATTR_ELEMENT_FACE) {
+ if (prim == ATTR_PRIM_GEOMETRY)
+ offset -= mesh->prim_offset;
+ else
+ offset -= mesh->face_offset;
+ }
+ else if (element == ATTR_ELEMENT_CORNER || element == ATTR_ELEMENT_CORNER_BYTE) {
+ if (prim == ATTR_PRIM_GEOMETRY)
+ offset -= 3 * mesh->prim_offset;
+ else
+ offset -= mesh->corner_offset;
+ }
+ }
+ else if (geom->type == Geometry::HAIR) {
+ Hair *hair = static_cast<Hair *>(geom);
+ if (element == ATTR_ELEMENT_CURVE)
+ offset -= hair->prim_offset;
+ else if (element == ATTR_ELEMENT_CURVE_KEY)
+ offset -= hair->curvekey_offset;
+ else if (element == ATTR_ELEMENT_CURVE_KEY_MOTION)
+ offset -= hair->curvekey_offset;
+ }
+ }
+ else {
+ /* attribute not found */
+ desc.element = ATTR_ELEMENT_NONE;
+ desc.offset = 0;
+ }
+}
+
+void GeometryManager::device_update_attributes(Device *device,
+ DeviceScene *dscene,
+ Scene *scene,
+ Progress &progress)
+{
+ progress.set_status("Updating Mesh", "Computing attributes");
+
+ /* gather per mesh requested attributes. as meshes may have multiple
+ * shaders assigned, this merges the requested attributes that have
+ * been set per shader by the shader manager */
+ vector<AttributeRequestSet> geom_attributes(scene->geometry.size());
+
+ for (size_t i = 0; i < scene->geometry.size(); i++) {
+ Geometry *geom = scene->geometry[i];
+
+ scene->need_global_attributes(geom_attributes[i]);
+
+ foreach (Shader *shader, geom->used_shaders) {
+ geom_attributes[i].add(shader->attributes);
+ }
+ }
+
+ /* mesh attribute are stored in a single array per data type. here we fill
+ * those arrays, and set the offset and element type to create attribute
+ * maps next */
+
+ /* Pre-allocate attributes to avoid arrays re-allocation which would
+ * take 2x of overall attribute memory usage.
+ */
+ size_t attr_float_size = 0;
+ size_t attr_float2_size = 0;
+ size_t attr_float3_size = 0;
+ size_t attr_uchar4_size = 0;
+ for (size_t i = 0; i < scene->geometry.size(); i++) {
+ Geometry *geom = scene->geometry[i];
+ AttributeRequestSet &attributes = geom_attributes[i];
+ foreach (AttributeRequest &req, attributes.requests) {
+ Attribute *attr = geom->attributes.find(req);
+
+ update_attribute_element_size(geom,
+ attr,
+ ATTR_PRIM_GEOMETRY,
+ &attr_float_size,
+ &attr_float2_size,
+ &attr_float3_size,
+ &attr_uchar4_size);
+
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ Attribute *subd_attr = mesh->subd_attributes.find(req);
+
+ update_attribute_element_size(mesh,
+ subd_attr,
+ ATTR_PRIM_SUBD,
+ &attr_float_size,
+ &attr_float2_size,
+ &attr_float3_size,
+ &attr_uchar4_size);
+ }
+ }
+ }
+
+ dscene->attributes_float.alloc(attr_float_size);
+ dscene->attributes_float2.alloc(attr_float2_size);
+ dscene->attributes_float3.alloc(attr_float3_size);
+ dscene->attributes_uchar4.alloc(attr_uchar4_size);
+
+ size_t attr_float_offset = 0;
+ size_t attr_float2_offset = 0;
+ size_t attr_float3_offset = 0;
+ size_t attr_uchar4_offset = 0;
+
+ /* Fill in attributes. */
+ for (size_t i = 0; i < scene->geometry.size(); i++) {
+ Geometry *geom = scene->geometry[i];
+ AttributeRequestSet &attributes = geom_attributes[i];
+
+ /* todo: we now store std and name attributes from requests even if
+ * they actually refer to the same mesh attributes, optimize */
+ foreach (AttributeRequest &req, attributes.requests) {
+ Attribute *attr = geom->attributes.find(req);
+ update_attribute_element_offset(geom,
+ dscene->attributes_float,
+ attr_float_offset,
+ dscene->attributes_float2,
+ attr_float2_offset,
+ dscene->attributes_float3,
+ attr_float3_offset,
+ dscene->attributes_uchar4,
+ attr_uchar4_offset,
+ attr,
+ ATTR_PRIM_GEOMETRY,
+ req.type,
+ req.desc);
+
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ Attribute *subd_attr = mesh->subd_attributes.find(req);
+
+ update_attribute_element_offset(mesh,
+ dscene->attributes_float,
+ attr_float_offset,
+ dscene->attributes_float2,
+ attr_float2_offset,
+ dscene->attributes_float3,
+ attr_float3_offset,
+ dscene->attributes_uchar4,
+ attr_uchar4_offset,
+ subd_attr,
+ ATTR_PRIM_SUBD,
+ req.subd_type,
+ req.subd_desc);
+ }
+
+ if (progress.get_cancel())
+ return;
+ }
+ }
+
+ /* create attribute lookup maps */
+ if (scene->shader_manager->use_osl())
+ update_osl_attributes(device, scene, geom_attributes);
+
+ update_svm_attributes(device, dscene, scene, geom_attributes);
+
+ if (progress.get_cancel())
+ return;
+
+ /* copy to device */
+ progress.set_status("Updating Mesh", "Copying Attributes to device");
+
+ if (dscene->attributes_float.size()) {
+ dscene->attributes_float.copy_to_device();
+ }
+ if (dscene->attributes_float2.size()) {
+ dscene->attributes_float2.copy_to_device();
+ }
+ if (dscene->attributes_float3.size()) {
+ dscene->attributes_float3.copy_to_device();
+ }
+ if (dscene->attributes_uchar4.size()) {
+ dscene->attributes_uchar4.copy_to_device();
+ }
+
+ if (progress.get_cancel())
+ return;
+
+ /* After mesh attributes and patch tables have been copied to device memory,
+ * we need to update offsets in the objects. */
+ scene->object_manager->device_update_mesh_offsets(device, dscene, scene);
+}
+
+void GeometryManager::mesh_calc_offset(Scene *scene)
+{
+ size_t vert_size = 0;
+ size_t tri_size = 0;
+
+ size_t curve_key_size = 0;
+ size_t curve_size = 0;
+
+ size_t patch_size = 0;
+ size_t face_size = 0;
+ size_t corner_size = 0;
+
+ size_t optix_prim_size = 0;
+
+ foreach (Geometry *geom, scene->geometry) {
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+
+ mesh->vert_offset = vert_size;
+ mesh->prim_offset = tri_size;
+
+ mesh->patch_offset = patch_size;
+ mesh->face_offset = face_size;
+ mesh->corner_offset = corner_size;
+
+ vert_size += mesh->verts.size();
+ tri_size += mesh->num_triangles();
+
+ if (mesh->subd_faces.size()) {
+ Mesh::SubdFace &last = mesh->subd_faces[mesh->subd_faces.size() - 1];
+ patch_size += (last.ptex_offset + last.num_ptex_faces()) * 8;
+
+ /* patch tables are stored in same array so include them in patch_size */
+ if (mesh->patch_table) {
+ mesh->patch_table_offset = patch_size;
+ patch_size += mesh->patch_table->total_size();
+ }
+ }
+
+ face_size += mesh->subd_faces.size();
+ corner_size += mesh->subd_face_corners.size();
+
+ mesh->optix_prim_offset = optix_prim_size;
+ optix_prim_size += mesh->num_triangles();
+ }
+ else if (geom->type == Geometry::HAIR) {
+ Hair *hair = static_cast<Hair *>(geom);
+
+ hair->curvekey_offset = curve_key_size;
+ hair->prim_offset = curve_size;
+
+ curve_key_size += hair->curve_keys.size();
+ curve_size += hair->num_curves();
+
+ hair->optix_prim_offset = optix_prim_size;
+ optix_prim_size += hair->num_segments();
+ }
+ }
+}
+
+void GeometryManager::device_update_mesh(
+ Device *, DeviceScene *dscene, Scene *scene, bool for_displacement, Progress &progress)
+{
+ /* Count. */
+ size_t vert_size = 0;
+ size_t tri_size = 0;
+
+ size_t curve_key_size = 0;
+ size_t curve_size = 0;
+
+ size_t patch_size = 0;
+
+ foreach (Geometry *geom, scene->geometry) {
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+
+ vert_size += mesh->verts.size();
+ tri_size += mesh->num_triangles();
+
+ if (mesh->subd_faces.size()) {
+ Mesh::SubdFace &last = mesh->subd_faces[mesh->subd_faces.size() - 1];
+ patch_size += (last.ptex_offset + last.num_ptex_faces()) * 8;
+
+ /* patch tables are stored in same array so include them in patch_size */
+ if (mesh->patch_table) {
+ mesh->patch_table_offset = patch_size;
+ patch_size += mesh->patch_table->total_size();
+ }
+ }
+ }
+ else if (geom->type == Geometry::HAIR) {
+ Hair *hair = static_cast<Hair *>(geom);
+
+ curve_key_size += hair->curve_keys.size();
+ curve_size += hair->num_curves();
+ }
+ }
+
+ /* Create mapping from triangle to primitive triangle array. */
+ vector<uint> tri_prim_index(tri_size);
+ if (for_displacement) {
+ /* For displacement kernels we do some trickery to make them believe
+ * we've got all required data ready. However, that data is different
+ * from final render kernels since we don't have BVH yet, so can't
+ * really use same semantic of arrays.
+ */
+ foreach (Geometry *geom, scene->geometry) {
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ for (size_t i = 0; i < mesh->num_triangles(); ++i) {
+ tri_prim_index[i + mesh->prim_offset] = 3 * (i + mesh->prim_offset);
+ }
+ }
+ }
+ }
+ else {
+ for (size_t i = 0; i < dscene->prim_index.size(); ++i) {
+ if ((dscene->prim_type[i] & PRIMITIVE_ALL_TRIANGLE) != 0) {
+ tri_prim_index[dscene->prim_index[i]] = dscene->prim_tri_index[i];
+ }
+ }
+ }
+
+ /* Fill in all the arrays. */
+ if (tri_size != 0) {
+ /* normals */
+ progress.set_status("Updating Mesh", "Computing normals");
+
+ uint *tri_shader = dscene->tri_shader.alloc(tri_size);
+ float4 *vnormal = dscene->tri_vnormal.alloc(vert_size);
+ uint4 *tri_vindex = dscene->tri_vindex.alloc(tri_size);
+ uint *tri_patch = dscene->tri_patch.alloc(tri_size);
+ float2 *tri_patch_uv = dscene->tri_patch_uv.alloc(vert_size);
+
+ foreach (Geometry *geom, scene->geometry) {
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ mesh->pack_shaders(scene, &tri_shader[mesh->prim_offset]);
+ mesh->pack_normals(&vnormal[mesh->vert_offset]);
+ mesh->pack_verts(tri_prim_index,
+ &tri_vindex[mesh->prim_offset],
+ &tri_patch[mesh->prim_offset],
+ &tri_patch_uv[mesh->vert_offset],
+ mesh->vert_offset,
+ mesh->prim_offset);
+ if (progress.get_cancel())
+ return;
+ }
+ }
+
+ /* vertex coordinates */
+ progress.set_status("Updating Mesh", "Copying Mesh to device");
+
+ dscene->tri_shader.copy_to_device();
+ dscene->tri_vnormal.copy_to_device();
+ dscene->tri_vindex.copy_to_device();
+ dscene->tri_patch.copy_to_device();
+ dscene->tri_patch_uv.copy_to_device();
+ }
+
+ if (curve_size != 0) {
+ progress.set_status("Updating Mesh", "Copying Strands to device");
+
+ float4 *curve_keys = dscene->curve_keys.alloc(curve_key_size);
+ float4 *curves = dscene->curves.alloc(curve_size);
+
+ foreach (Geometry *geom, scene->geometry) {
+ if (geom->type == Geometry::HAIR) {
+ Hair *hair = static_cast<Hair *>(geom);
+ hair->pack_curves(scene,
+ &curve_keys[hair->curvekey_offset],
+ &curves[hair->prim_offset],
+ hair->curvekey_offset);
+ if (progress.get_cancel())
+ return;
+ }
+ }
+
+ dscene->curve_keys.copy_to_device();
+ dscene->curves.copy_to_device();
+ }
+
+ if (patch_size != 0) {
+ progress.set_status("Updating Mesh", "Copying Patches to device");
+
+ uint *patch_data = dscene->patches.alloc(patch_size);
+
+ foreach (Geometry *geom, scene->geometry) {
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ mesh->pack_patches(&patch_data[mesh->patch_offset],
+ mesh->vert_offset,
+ mesh->face_offset,
+ mesh->corner_offset);
+
+ if (mesh->patch_table) {
+ mesh->patch_table->copy_adjusting_offsets(&patch_data[mesh->patch_table_offset],
+ mesh->patch_table_offset);
+ }
+
+ if (progress.get_cancel())
+ return;
+ }
+ }
+
+ dscene->patches.copy_to_device();
+ }
+
+ if (for_displacement) {
+ float4 *prim_tri_verts = dscene->prim_tri_verts.alloc(tri_size * 3);
+ foreach (Geometry *geom, scene->geometry) {
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ for (size_t i = 0; i < mesh->num_triangles(); ++i) {
+ Mesh::Triangle t = mesh->get_triangle(i);
+ size_t offset = 3 * (i + mesh->prim_offset);
+ prim_tri_verts[offset + 0] = float3_to_float4(mesh->verts[t.v[0]]);
+ prim_tri_verts[offset + 1] = float3_to_float4(mesh->verts[t.v[1]]);
+ prim_tri_verts[offset + 2] = float3_to_float4(mesh->verts[t.v[2]]);
+ }
+ }
+ }
+ dscene->prim_tri_verts.copy_to_device();
+ }
+}
+
+void GeometryManager::device_update_bvh(Device *device,
+ DeviceScene *dscene,
+ Scene *scene,
+ Progress &progress)
+{
+ /* bvh build */
+ progress.set_status("Updating Scene BVH", "Building");
+
+ BVHParams bparams;
+ bparams.top_level = true;
+ bparams.bvh_layout = BVHParams::best_bvh_layout(scene->params.bvh_layout,
+ device->get_bvh_layout_mask());
+ bparams.use_spatial_split = scene->params.use_bvh_spatial_split;
+ bparams.use_unaligned_nodes = dscene->data.bvh.have_curves &&
+ scene->params.use_bvh_unaligned_nodes;
+ bparams.num_motion_triangle_steps = scene->params.num_bvh_time_steps;
+ bparams.num_motion_curve_steps = scene->params.num_bvh_time_steps;
+ bparams.bvh_type = scene->params.bvh_type;
+ bparams.curve_subdivisions = scene->params.curve_subdivisions();
+
+ VLOG(1) << "Using " << bvh_layout_name(bparams.bvh_layout) << " layout.";
+
+ BVH *bvh = BVH::create(bparams, scene->geometry, scene->objects);
+ bvh->build(progress, &device->stats);
+
+ if (progress.get_cancel()) {
+#ifdef WITH_EMBREE
+ if (dscene->data.bvh.scene) {
+ BVHEmbree::destroy(dscene->data.bvh.scene);
+ dscene->data.bvh.scene = NULL;
+ }
+#endif
+ delete bvh;
+ return;
+ }
+
+ /* copy to device */
+ progress.set_status("Updating Scene BVH", "Copying BVH to device");
+
+ PackedBVH &pack = bvh->pack;
+
+ if (pack.nodes.size()) {
+ dscene->bvh_nodes.steal_data(pack.nodes);
+ dscene->bvh_nodes.copy_to_device();
+ }
+ if (pack.leaf_nodes.size()) {
+ dscene->bvh_leaf_nodes.steal_data(pack.leaf_nodes);
+ dscene->bvh_leaf_nodes.copy_to_device();
+ }
+ if (pack.object_node.size()) {
+ dscene->object_node.steal_data(pack.object_node);
+ dscene->object_node.copy_to_device();
+ }
+ if (pack.prim_tri_index.size()) {
+ dscene->prim_tri_index.steal_data(pack.prim_tri_index);
+ dscene->prim_tri_index.copy_to_device();
+ }
+ if (pack.prim_tri_verts.size()) {
+ dscene->prim_tri_verts.steal_data(pack.prim_tri_verts);
+ dscene->prim_tri_verts.copy_to_device();
+ }
+ if (pack.prim_type.size()) {
+ dscene->prim_type.steal_data(pack.prim_type);
+ dscene->prim_type.copy_to_device();
+ }
+ if (pack.prim_visibility.size()) {
+ dscene->prim_visibility.steal_data(pack.prim_visibility);
+ dscene->prim_visibility.copy_to_device();
+ }
+ if (pack.prim_index.size()) {
+ dscene->prim_index.steal_data(pack.prim_index);
+ dscene->prim_index.copy_to_device();
+ }
+ if (pack.prim_object.size()) {
+ dscene->prim_object.steal_data(pack.prim_object);
+ dscene->prim_object.copy_to_device();
+ }
+ if (pack.prim_time.size()) {
+ dscene->prim_time.steal_data(pack.prim_time);
+ dscene->prim_time.copy_to_device();
+ }
+
+ dscene->data.bvh.root = pack.root_index;
+ dscene->data.bvh.bvh_layout = bparams.bvh_layout;
+ dscene->data.bvh.use_bvh_steps = (scene->params.num_bvh_time_steps != 0);
+ dscene->data.bvh.curve_subdivisions = scene->params.curve_subdivisions();
+
+ bvh->copy_to_device(progress, dscene);
+
+ delete bvh;
+}
+
+void GeometryManager::device_update_preprocess(Device *device, Scene *scene, Progress &progress)
+{
+ if (!need_update && !need_flags_update) {
+ return;
+ }
+
+ progress.set_status("Updating Meshes Flags");
+
+ /* Update flags. */
+ bool volume_images_updated = false;
+
+ foreach (Geometry *geom, scene->geometry) {
+ geom->has_volume = false;
+
+ foreach (const Shader *shader, geom->used_shaders) {
+ if (shader->has_volume) {
+ geom->has_volume = true;
+ }
+ if (shader->has_surface_bssrdf) {
+ geom->has_surface_bssrdf = true;
+ }
+ }
+
+ if (need_update && geom->has_volume && geom->type == Geometry::MESH) {
+ /* Create volume meshes if there is voxel data. */
+ if (geom->has_voxel_attributes()) {
+ if (!volume_images_updated) {
+ progress.set_status("Updating Meshes Volume Bounds");
+ device_update_volume_images(device, scene, progress);
+ volume_images_updated = true;
+ }
+
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ create_volume_mesh(mesh, progress);
+ }
+ }
+
+ if (geom->type == Geometry::HAIR) {
+ /* Set curve shape, still a global scene setting for now. */
+ Hair *hair = static_cast<Hair *>(geom);
+ hair->curve_shape = scene->params.hair_shape;
+ }
+ }
+
+ need_flags_update = false;
+}
+
+void GeometryManager::device_update_displacement_images(Device *device,
+ Scene *scene,
+ Progress &progress)
+{
+ progress.set_status("Updating Displacement Images");
+ TaskPool pool;
+ ImageManager *image_manager = scene->image_manager;
+ set<int> bump_images;
+ foreach (Geometry *geom, scene->geometry) {
+ if (geom->need_update) {
+ foreach (Shader *shader, geom->used_shaders) {
+ if (!shader->has_displacement || shader->displacement_method == DISPLACE_BUMP) {
+ continue;
+ }
+ foreach (ShaderNode *node, shader->graph->nodes) {
+ if (node->special_type != SHADER_SPECIAL_TYPE_IMAGE_SLOT) {
+ continue;
+ }
+
+ ImageSlotTextureNode *image_node = static_cast<ImageSlotTextureNode *>(node);
+ for (int i = 0; i < image_node->handle.num_tiles(); i++) {
+ const int slot = image_node->handle.svm_slot(i);
+ if (slot != -1) {
+ bump_images.insert(slot);
+ }
+ }
+ }
+ }
+ }
+ }
+ foreach (int slot, bump_images) {
+ pool.push(function_bind(
+ &ImageManager::device_update_slot, image_manager, device, scene, slot, &progress));
+ }
+ pool.wait_work();
+}
+
+void GeometryManager::device_update_volume_images(Device *device, Scene *scene, Progress &progress)
+{
+ progress.set_status("Updating Volume Images");
+ TaskPool pool;
+ ImageManager *image_manager = scene->image_manager;
+ set<int> volume_images;
+
+ foreach (Geometry *geom, scene->geometry) {
+ if (!geom->need_update) {
+ continue;
+ }
+
+ foreach (Attribute &attr, geom->attributes.attributes) {
+ if (attr.element != ATTR_ELEMENT_VOXEL) {
+ continue;
+ }
+
+ ImageHandle &handle = attr.data_voxel();
+ const int slot = handle.svm_slot();
+ if (slot != -1) {
+ volume_images.insert(slot);
+ }
+ }
+ }
+
+ foreach (int slot, volume_images) {
+ pool.push(function_bind(
+ &ImageManager::device_update_slot, image_manager, device, scene, slot, &progress));
+ }
+ pool.wait_work();
+}
+
+void GeometryManager::device_update(Device *device,
+ DeviceScene *dscene,
+ Scene *scene,
+ Progress &progress)
+{
+ if (!need_update)
+ return;
+
+ VLOG(1) << "Total " << scene->geometry.size() << " meshes.";
+
+ bool true_displacement_used = false;
+ size_t total_tess_needed = 0;
+
+ foreach (Geometry *geom, scene->geometry) {
+ foreach (Shader *shader, geom->used_shaders) {
+ if (shader->need_update_geometry)
+ geom->need_update = true;
+ }
+
+ if (geom->need_update && geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+
+ /* Update normals. */
+ mesh->add_face_normals();
+ mesh->add_vertex_normals();
+
+ if (mesh->need_attribute(scene, ATTR_STD_POSITION_UNDISPLACED)) {
+ mesh->add_undisplaced();
+ }
+
+ /* Test if we need tessellation. */
+ if (mesh->subdivision_type != Mesh::SUBDIVISION_NONE && mesh->num_subd_verts == 0 &&
+ mesh->subd_params) {
+ total_tess_needed++;
+ }
+
+ /* Test if we need displacement. */
+ if (mesh->has_true_displacement()) {
+ true_displacement_used = true;
+ }
+
+ if (progress.get_cancel())
+ return;
+ }
+ }
+
+ /* Tessellate meshes that are using subdivision */
+ if (total_tess_needed) {
+ Camera *dicing_camera = scene->dicing_camera;
+ dicing_camera->update(scene);
+
+ size_t i = 0;
+ foreach (Geometry *geom, scene->geometry) {
+ if (!(geom->need_update && geom->type == Geometry::MESH)) {
+ continue;
+ }
+
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ if (mesh->subdivision_type != Mesh::SUBDIVISION_NONE && mesh->num_subd_verts == 0 &&
+ mesh->subd_params) {
+ string msg = "Tessellating ";
+ if (mesh->name == "")
+ msg += string_printf("%u/%u", (uint)(i + 1), (uint)total_tess_needed);
+ else
+ msg += string_printf(
+ "%s %u/%u", mesh->name.c_str(), (uint)(i + 1), (uint)total_tess_needed);
+
+ progress.set_status("Updating Mesh", msg);
+
+ mesh->subd_params->camera = dicing_camera;
+ DiagSplit dsplit(*mesh->subd_params);
+ mesh->tessellate(&dsplit);
+
+ i++;
+
+ if (progress.get_cancel())
+ return;
+ }
+ }
+ }
+
+ /* Update images needed for true displacement. */
+ bool old_need_object_flags_update = false;
+ if (true_displacement_used) {
+ VLOG(1) << "Updating images used for true displacement.";
+ device_update_displacement_images(device, scene, progress);
+ old_need_object_flags_update = scene->object_manager->need_flags_update;
+ scene->object_manager->device_update_flags(device, dscene, scene, progress, false);
+ }
+
+ /* Device update. */
+ device_free(device, dscene);
+
+ mesh_calc_offset(scene);
+ if (true_displacement_used) {
+ device_update_mesh(device, dscene, scene, true, progress);
+ }
+ if (progress.get_cancel())
+ return;
+
+ device_update_attributes(device, dscene, scene, progress);
+ if (progress.get_cancel())
+ return;
+
+ /* Update displacement. */
+ bool displacement_done = false;
+ size_t num_bvh = 0;
+ BVHLayout bvh_layout = BVHParams::best_bvh_layout(scene->params.bvh_layout,
+ device->get_bvh_layout_mask());
+
+ foreach (Geometry *geom, scene->geometry) {
+ if (geom->need_update) {
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ if (displace(device, dscene, scene, mesh, progress)) {
+ displacement_done = true;
+ }
+ }
+
+ if (geom->need_build_bvh(bvh_layout)) {
+ num_bvh++;
+ }
+ }
+
+ if (progress.get_cancel())
+ return;
+ }
+
+ /* Device re-update after displacement. */
+ if (displacement_done) {
+ device_free(device, dscene);
+
+ device_update_attributes(device, dscene, scene, progress);
+ if (progress.get_cancel())
+ return;
+ }
+
+ TaskPool pool;
+
+ size_t i = 0;
+ foreach (Geometry *geom, scene->geometry) {
+ if (geom->need_update) {
+ pool.push(function_bind(
+ &Geometry::compute_bvh, geom, device, dscene, &scene->params, &progress, i, num_bvh));
+ if (geom->need_build_bvh(bvh_layout)) {
+ i++;
+ }
+ }
+ }
+
+ TaskPool::Summary summary;
+ pool.wait_work(&summary);
+ VLOG(2) << "Objects BVH build pool statistics:\n" << summary.full_report();
+
+ foreach (Shader *shader, scene->shaders) {
+ shader->need_update_geometry = false;
+ }
+
+ Scene::MotionType need_motion = scene->need_motion();
+ bool motion_blur = need_motion == Scene::MOTION_BLUR;
+
+ /* Update objects. */
+ vector<Object *> volume_objects;
+ foreach (Object *object, scene->objects) {
+ object->compute_bounds(motion_blur);
+ }
+
+ if (progress.get_cancel())
+ return;
+
+ device_update_bvh(device, dscene, scene, progress);
+ if (progress.get_cancel())
+ return;
+
+ device_update_mesh(device, dscene, scene, false, progress);
+ if (progress.get_cancel())
+ return;
+
+ need_update = false;
+
+ if (true_displacement_used) {
+ /* Re-tag flags for update, so they're re-evaluated
+ * for meshes with correct bounding boxes.
+ *
+ * This wouldn't cause wrong results, just true
+ * displacement might be less optimal ot calculate.
+ */
+ scene->object_manager->need_flags_update = old_need_object_flags_update;
+ }
+}
+
+void GeometryManager::device_free(Device *device, DeviceScene *dscene)
+{
+#ifdef WITH_EMBREE
+ if (dscene->data.bvh.scene) {
+ if (dscene->data.bvh.bvh_layout == BVH_LAYOUT_EMBREE)
+ BVHEmbree::destroy(dscene->data.bvh.scene);
+ dscene->data.bvh.scene = NULL;
+ }
+#endif
+
+ dscene->bvh_nodes.free();
+ dscene->bvh_leaf_nodes.free();
+ dscene->object_node.free();
+ dscene->prim_tri_verts.free();
+ dscene->prim_tri_index.free();
+ dscene->prim_type.free();
+ dscene->prim_visibility.free();
+ dscene->prim_index.free();
+ dscene->prim_object.free();
+ dscene->prim_time.free();
+ dscene->tri_shader.free();
+ dscene->tri_vnormal.free();
+ dscene->tri_vindex.free();
+ dscene->tri_patch.free();
+ dscene->tri_patch_uv.free();
+ dscene->curves.free();
+ dscene->curve_keys.free();
+ dscene->patches.free();
+ dscene->attributes_map.free();
+ dscene->attributes_float.free();
+ dscene->attributes_float2.free();
+ dscene->attributes_float3.free();
+ dscene->attributes_uchar4.free();
+
+ /* Signal for shaders like displacement not to do ray tracing. */
+ dscene->data.bvh.bvh_layout = BVH_LAYOUT_NONE;
+
+#ifdef WITH_OSL
+ OSLGlobals *og = (OSLGlobals *)device->osl_memory();
+
+ if (og) {
+ og->object_name_map.clear();
+ og->attribute_map.clear();
+ og->object_names.clear();
+ }
+#else
+ (void)device;
+#endif
+}
+
+void GeometryManager::tag_update(Scene *scene)
+{
+ need_update = true;
+ scene->object_manager->need_update = true;
+}
+
+void GeometryManager::collect_statistics(const Scene *scene, RenderStats *stats)
+{
+ foreach (Geometry *geometry, scene->geometry) {
+ stats->mesh.geometry.add_entry(
+ NamedSizeEntry(string(geometry->name.c_str()), geometry->get_total_size_in_bytes()));
+ }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/render/geometry.h b/intern/cycles/render/geometry.h
new file mode 100644
index 00000000000..b0284304843
--- /dev/null
+++ b/intern/cycles/render/geometry.h
@@ -0,0 +1,205 @@
+/*
+ * Copyright 2011-2020 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __GEOMETRY_H__
+#define __GEOMETRY_H__
+
+#include "graph/node.h"
+
+#include "bvh/bvh_params.h"
+
+#include "render/attribute.h"
+
+#include "util/util_boundbox.h"
+#include "util/util_set.h"
+#include "util/util_transform.h"
+#include "util/util_types.h"
+#include "util/util_vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+class BVH;
+class Device;
+class DeviceScene;
+class Mesh;
+class Progress;
+class RenderStats;
+class Scene;
+class SceneParams;
+class Shader;
+
+/* Geometry
+ *
+ * Base class for geometric types like Mesh and Hair. */
+
+class Geometry : public Node {
+ public:
+ NODE_ABSTRACT_DECLARE
+
+ enum Type {
+ MESH,
+ HAIR,
+ };
+
+ Type type;
+
+ /* Attributes */
+ AttributeSet attributes;
+
+ /* Shaders */
+ vector<Shader *> used_shaders;
+
+ /* Transform */
+ BoundBox bounds;
+ bool transform_applied;
+ bool transform_negative_scaled;
+ Transform transform_normal;
+
+ /* Motion Blur */
+ uint motion_steps;
+ bool use_motion_blur;
+
+ /* Maximum number of motion steps supported (due to Embree). */
+ static const uint MAX_MOTION_STEPS = 129;
+
+ /* BVH */
+ BVH *bvh;
+ size_t attr_map_offset;
+ size_t prim_offset;
+ size_t optix_prim_offset;
+
+ /* Shader Properties */
+ bool has_volume; /* Set in the device_update_flags(). */
+ bool has_surface_bssrdf; /* Set in the device_update_flags(). */
+
+ /* Update Flags */
+ bool need_update;
+ bool need_update_rebuild;
+
+ /* Constructor/Destructor */
+ explicit Geometry(const NodeType *node_type, const Type type);
+ virtual ~Geometry();
+
+ /* Geometry */
+ virtual void clear();
+ virtual void compute_bounds() = 0;
+ virtual void apply_transform(const Transform &tfm, const bool apply_to_motion) = 0;
+
+ /* Attribute Requests */
+ bool need_attribute(Scene *scene, AttributeStandard std);
+ bool need_attribute(Scene *scene, ustring name);
+
+ /* UDIM */
+ virtual void get_uv_tiles(ustring map, unordered_set<int> &tiles) = 0;
+
+ /* Convert between normalized -1..1 motion time and index in the
+ * VERTEX_MOTION attribute. */
+ float motion_time(int step) const;
+ int motion_step(float time) const;
+
+ /* BVH */
+ void compute_bvh(Device *device,
+ DeviceScene *dscene,
+ SceneParams *params,
+ Progress *progress,
+ int n,
+ int total);
+
+ /* Check whether the geometry should have own BVH built separately. Briefly,
+ * own BVH is needed for geometry, if:
+ *
+ * - It is instanced multiple times, so each instance object should share the
+ * same BVH tree.
+ * - Special ray intersection is needed, for example to limit subsurface rays
+ * to only the geometry itself.
+ * - The BVH layout requires the top level to only contain instances.
+ */
+ bool need_build_bvh(BVHLayout layout) const;
+
+ /* Test if the geometry should be treated as instanced. */
+ bool is_instanced() const;
+
+ bool has_true_displacement() const;
+ bool has_motion_blur() const;
+ bool has_voxel_attributes() const;
+
+ /* Updates */
+ void tag_update(Scene *scene, bool rebuild);
+};
+
+/* Geometry Manager */
+
+class GeometryManager {
+ public:
+ /* Update Flags */
+ bool need_update;
+ bool need_flags_update;
+
+ /* Constructor/Destructor */
+ GeometryManager();
+ ~GeometryManager();
+
+ /* Device Updates */
+ void device_update_preprocess(Device *device, Scene *scene, Progress &progress);
+ void device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress &progress);
+ void device_free(Device *device, DeviceScene *dscene);
+
+ /* Updates */
+ void tag_update(Scene *scene);
+
+ /* Statistics */
+ void collect_statistics(const Scene *scene, RenderStats *stats);
+
+ protected:
+ bool displace(Device *device, DeviceScene *dscene, Scene *scene, Mesh *mesh, Progress &progress);
+
+ void create_volume_mesh(Mesh *mesh, Progress &progress);
+
+ /* Attributes */
+ void update_osl_attributes(Device *device,
+ Scene *scene,
+ vector<AttributeRequestSet> &geom_attributes);
+ void update_svm_attributes(Device *device,
+ DeviceScene *dscene,
+ Scene *scene,
+ vector<AttributeRequestSet> &geom_attributes);
+
+ /* Compute verts/triangles/curves offsets in global arrays. */
+ void mesh_calc_offset(Scene *scene);
+
+ void device_update_object(Device *device, DeviceScene *dscene, Scene *scene, Progress &progress);
+
+ void device_update_mesh(Device *device,
+ DeviceScene *dscene,
+ Scene *scene,
+ bool for_displacement,
+ Progress &progress);
+
+ void device_update_attributes(Device *device,
+ DeviceScene *dscene,
+ Scene *scene,
+ Progress &progress);
+
+ void device_update_bvh(Device *device, DeviceScene *dscene, Scene *scene, Progress &progress);
+
+ void device_update_displacement_images(Device *device, Scene *scene, Progress &progress);
+
+ void device_update_volume_images(Device *device, Scene *scene, Progress &progress);
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __GEOMETRY_H__ */
diff --git a/intern/cycles/render/graph.cpp b/intern/cycles/render/graph.cpp
index c284c64b5bf..436324b00ba 100644
--- a/intern/cycles/render/graph.cpp
+++ b/intern/cycles/render/graph.cpp
@@ -14,12 +14,12 @@
* limitations under the License.
*/
-#include "render/attribute.h"
#include "render/graph.h"
+#include "render/attribute.h"
+#include "render/constant_fold.h"
#include "render/nodes.h"
#include "render/scene.h"
#include "render/shader.h"
-#include "render/constant_fold.h"
#include "util/util_algorithm.h"
#include "util/util_foreach.h"
@@ -55,6 +55,25 @@ bool check_node_inputs_traversed(const ShaderNode *node, const ShaderNodeSet &do
} /* namespace */
+/* Sockets */
+
+void ShaderInput::disconnect()
+{
+ if (link) {
+ link->links.erase(remove(link->links.begin(), link->links.end(), this), link->links.end());
+ }
+ link = NULL;
+}
+
+void ShaderOutput::disconnect()
+{
+ foreach (ShaderInput *sock, links) {
+ sock->link = NULL;
+ }
+
+ links.clear();
+}
+
/* Node */
ShaderNode::ShaderNode(const NodeType *type) : Node(type)
@@ -127,6 +146,13 @@ ShaderOutput *ShaderNode::output(ustring name)
return NULL;
}
+void ShaderNode::remove_input(ShaderInput *input)
+{
+ assert(input->link == NULL);
+ delete input;
+ inputs.erase(remove(inputs.begin(), inputs.end(), input), inputs.end());
+}
+
void ShaderNode::attributes(Shader *shader, AttributeRequestSet *attributes)
{
foreach (ShaderInput *input, inputs) {
@@ -278,11 +304,7 @@ void ShaderGraph::disconnect(ShaderOutput *from)
assert(!finalized);
simplified = false;
- foreach (ShaderInput *sock, from->links) {
- sock->link = NULL;
- }
-
- from->links.clear();
+ from->disconnect();
}
void ShaderGraph::disconnect(ShaderInput *to)
@@ -291,10 +313,29 @@ void ShaderGraph::disconnect(ShaderInput *to)
assert(to->link);
simplified = false;
- ShaderOutput *from = to->link;
+ to->disconnect();
+}
+
+void ShaderGraph::relink(ShaderInput *from, ShaderInput *to)
+{
+ ShaderOutput *out = from->link;
+ if (out) {
+ disconnect(from);
+ connect(out, to);
+ }
+ to->parent->copy_value(to->socket_type, *(from->parent), from->socket_type);
+}
- to->link = NULL;
- from->links.erase(remove(from->links.begin(), from->links.end(), to), from->links.end());
+void ShaderGraph::relink(ShaderOutput *from, ShaderOutput *to)
+{
+ /* Copy because disconnect modifies this list. */
+ vector<ShaderInput *> outputs = from->links;
+
+ foreach (ShaderInput *sock, outputs) {
+ disconnect(sock);
+ if (to)
+ connect(to, sock);
+ }
}
void ShaderGraph::relink(ShaderNode *node, ShaderOutput *from, ShaderOutput *to)
@@ -320,6 +361,7 @@ void ShaderGraph::relink(ShaderNode *node, ShaderOutput *from, ShaderOutput *to)
void ShaderGraph::simplify(Scene *scene)
{
if (!simplified) {
+ expand();
default_inputs(scene->shader_manager->use_osl());
clean(scene);
refine_bump_nodes();
@@ -721,6 +763,13 @@ void ShaderGraph::compute_displacement_hash()
int link_id = (input->link) ? input->link->parent->id : 0;
md5.append((uint8_t *)&link_id, sizeof(link_id));
}
+
+ if (node->special_type == SHADER_SPECIAL_TYPE_OSL) {
+ /* Hash takes into account socket values, to detect changes
+ * in the code of the node we need an exception. */
+ OSLNode *oslnode = static_cast<OSLNode *>(node);
+ md5.append(oslnode->bytecode_hash);
+ }
}
displacement_hash = md5.get_hex();
@@ -745,6 +794,11 @@ void ShaderGraph::clean(Scene *scene)
/* break cycles */
break_cycles(output(), visited, on_stack);
+ foreach (ShaderNode *node, nodes) {
+ if (node->special_type == SHADER_SPECIAL_TYPE_OUTPUT_AOV) {
+ break_cycles(node, visited, on_stack);
+ }
+ }
/* disconnect unused nodes */
foreach (ShaderNode *node, nodes) {
@@ -773,6 +827,14 @@ void ShaderGraph::clean(Scene *scene)
nodes = newnodes;
}
+void ShaderGraph::expand()
+{
+ /* Call expand on all nodes, to generate additional nodes. */
+ foreach (ShaderNode *node, nodes) {
+ node->expand(this);
+ }
+}
+
void ShaderGraph::default_inputs(bool do_osl)
{
/* nodes can specify default texture coordinates, for now we give
@@ -880,12 +942,12 @@ void ShaderGraph::refine_bump_nodes()
foreach (NodePair &pair, nodes_dy)
add(pair.second);
- /* connect what is connected is bump to samplecenter input*/
+ /* Connect what is connected is bump to sample-center input. */
connect(out, node->input("SampleCenter"));
- /* bump input is just for connectivity purpose for the graph input,
- * we re-connected this input to samplecenter, so lets disconnect it
- * from bump input */
+ /* Bump input is just for connectivity purpose for the graph input,
+ * we re-connected this input to sample-center, so lets disconnect it
+ * from bump input. */
disconnect(bump_input);
}
}
@@ -933,7 +995,7 @@ void ShaderGraph::bump_from_displacement(bool use_object_space)
foreach (NodePair &pair, nodes_dy)
pair.second->bump = SHADER_BUMP_DY;
- /* add set normal node and connect the bump normal ouput to the set normal
+ /* add set normal node and connect the bump normal output to the set normal
* output, so it can finally set the shader normal, note we are only doing
* this for bump from displacement, this will be the only bump allowed to
* overwrite the shader normal */
diff --git a/intern/cycles/render/graph.h b/intern/cycles/render/graph.h
index a2c030fd226..febd7a76f03 100644
--- a/intern/cycles/render/graph.h
+++ b/intern/cycles/render/graph.h
@@ -61,12 +61,13 @@ enum ShaderNodeSpecialType {
SHADER_SPECIAL_TYPE_PROXY,
SHADER_SPECIAL_TYPE_AUTOCONVERT,
SHADER_SPECIAL_TYPE_GEOMETRY,
- SHADER_SPECIAL_TYPE_SCRIPT,
+ SHADER_SPECIAL_TYPE_OSL,
SHADER_SPECIAL_TYPE_IMAGE_SLOT,
SHADER_SPECIAL_TYPE_CLOSURE,
SHADER_SPECIAL_TYPE_COMBINE_CLOSURE,
SHADER_SPECIAL_TYPE_OUTPUT,
SHADER_SPECIAL_TYPE_BUMP,
+ SHADER_SPECIAL_TYPE_OUTPUT_AOV,
};
/* Input
@@ -104,6 +105,8 @@ class ShaderInput {
((Node *)parent)->set(socket_type, f);
}
+ void disconnect();
+
const SocketType &socket_type;
ShaderNode *parent;
ShaderOutput *link;
@@ -130,6 +133,8 @@ class ShaderOutput {
return socket_type.type;
}
+ void disconnect();
+
const SocketType &socket_type;
ShaderNode *parent;
vector<ShaderInput *> links;
@@ -147,6 +152,7 @@ class ShaderNode : public Node {
virtual ~ShaderNode();
void create_inputs_outputs(const NodeType *type);
+ void remove_input(ShaderInput *input);
ShaderInput *input(const char *name);
ShaderOutput *output(const char *name);
@@ -158,6 +164,11 @@ class ShaderNode : public Node {
virtual void compile(SVMCompiler &compiler) = 0;
virtual void compile(OSLCompiler &compiler) = 0;
+ /* Expand node into additional nodes. */
+ virtual void expand(ShaderGraph * /* graph */)
+ {
+ }
+
/* ** Node optimization ** */
/* Check whether the node can be replaced with single constant. */
virtual void constant_fold(const ConstantFolder & /*folder*/)
@@ -193,10 +204,6 @@ class ShaderNode : public Node {
{
return false;
}
- virtual bool has_object_dependency()
- {
- return false;
- }
virtual bool has_attribute_dependency()
{
return false;
@@ -322,6 +329,8 @@ class ShaderGraph {
void connect(ShaderOutput *from, ShaderInput *to);
void disconnect(ShaderOutput *from);
void disconnect(ShaderInput *to);
+ void relink(ShaderInput *from, ShaderInput *to);
+ void relink(ShaderOutput *from, ShaderOutput *to);
void relink(ShaderNode *node, ShaderOutput *from, ShaderOutput *to);
void remove_proxy_nodes();
@@ -346,6 +355,7 @@ class ShaderGraph {
void break_cycles(ShaderNode *node, vector<bool> &visited, vector<bool> &on_stack);
void bump_from_displacement(bool use_object_space);
void refine_bump_nodes();
+ void expand();
void default_inputs(bool do_osl);
void transform_multi_closure(ShaderNode *node, ShaderOutput *weight_out, bool volume);
diff --git a/intern/cycles/render/hair.cpp b/intern/cycles/render/hair.cpp
new file mode 100644
index 00000000000..816c15cf4ef
--- /dev/null
+++ b/intern/cycles/render/hair.cpp
@@ -0,0 +1,488 @@
+/*
+ * Copyright 2011-2020 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "render/hair.h"
+#include "render/curves.h"
+#include "render/scene.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Hair Curve */
+
+void Hair::Curve::bounds_grow(const int k,
+ const float3 *curve_keys,
+ const float *curve_radius,
+ BoundBox &bounds) const
+{
+ float3 P[4];
+
+ P[0] = curve_keys[max(first_key + k - 1, first_key)];
+ P[1] = curve_keys[first_key + k];
+ P[2] = curve_keys[first_key + k + 1];
+ P[3] = curve_keys[min(first_key + k + 2, first_key + num_keys - 1)];
+
+ float3 lower;
+ float3 upper;
+
+ curvebounds(&lower.x, &upper.x, P, 0);
+ curvebounds(&lower.y, &upper.y, P, 1);
+ curvebounds(&lower.z, &upper.z, P, 2);
+
+ float mr = max(curve_radius[first_key + k], curve_radius[first_key + k + 1]);
+
+ bounds.grow(lower, mr);
+ bounds.grow(upper, mr);
+}
+
+void Hair::Curve::bounds_grow(const int k,
+ const float3 *curve_keys,
+ const float *curve_radius,
+ const Transform &aligned_space,
+ BoundBox &bounds) const
+{
+ float3 P[4];
+
+ P[0] = curve_keys[max(first_key + k - 1, first_key)];
+ P[1] = curve_keys[first_key + k];
+ P[2] = curve_keys[first_key + k + 1];
+ P[3] = curve_keys[min(first_key + k + 2, first_key + num_keys - 1)];
+
+ P[0] = transform_point(&aligned_space, P[0]);
+ P[1] = transform_point(&aligned_space, P[1]);
+ P[2] = transform_point(&aligned_space, P[2]);
+ P[3] = transform_point(&aligned_space, P[3]);
+
+ float3 lower;
+ float3 upper;
+
+ curvebounds(&lower.x, &upper.x, P, 0);
+ curvebounds(&lower.y, &upper.y, P, 1);
+ curvebounds(&lower.z, &upper.z, P, 2);
+
+ float mr = max(curve_radius[first_key + k], curve_radius[first_key + k + 1]);
+
+ bounds.grow(lower, mr);
+ bounds.grow(upper, mr);
+}
+
+void Hair::Curve::bounds_grow(float4 keys[4], BoundBox &bounds) const
+{
+ float3 P[4] = {
+ float4_to_float3(keys[0]),
+ float4_to_float3(keys[1]),
+ float4_to_float3(keys[2]),
+ float4_to_float3(keys[3]),
+ };
+
+ float3 lower;
+ float3 upper;
+
+ curvebounds(&lower.x, &upper.x, P, 0);
+ curvebounds(&lower.y, &upper.y, P, 1);
+ curvebounds(&lower.z, &upper.z, P, 2);
+
+ float mr = max(keys[1].w, keys[2].w);
+
+ bounds.grow(lower, mr);
+ bounds.grow(upper, mr);
+}
+
+void Hair::Curve::motion_keys(const float3 *curve_keys,
+ const float *curve_radius,
+ const float3 *key_steps,
+ size_t num_curve_keys,
+ size_t num_steps,
+ float time,
+ size_t k0,
+ size_t k1,
+ float4 r_keys[2]) const
+{
+ /* Figure out which steps we need to fetch and their interpolation factor. */
+ const size_t max_step = num_steps - 1;
+ const size_t step = min((int)(time * max_step), max_step - 1);
+ const float t = time * max_step - step;
+ /* Fetch vertex coordinates. */
+ float4 curr_keys[2];
+ float4 next_keys[2];
+ keys_for_step(
+ curve_keys, curve_radius, key_steps, num_curve_keys, num_steps, step, k0, k1, curr_keys);
+ keys_for_step(
+ curve_keys, curve_radius, key_steps, num_curve_keys, num_steps, step + 1, k0, k1, next_keys);
+ /* Interpolate between steps. */
+ r_keys[0] = (1.0f - t) * curr_keys[0] + t * next_keys[0];
+ r_keys[1] = (1.0f - t) * curr_keys[1] + t * next_keys[1];
+}
+
+void Hair::Curve::cardinal_motion_keys(const float3 *curve_keys,
+ const float *curve_radius,
+ const float3 *key_steps,
+ size_t num_curve_keys,
+ size_t num_steps,
+ float time,
+ size_t k0,
+ size_t k1,
+ size_t k2,
+ size_t k3,
+ float4 r_keys[4]) const
+{
+ /* Figure out which steps we need to fetch and their interpolation factor. */
+ const size_t max_step = num_steps - 1;
+ const size_t step = min((int)(time * max_step), max_step - 1);
+ const float t = time * max_step - step;
+ /* Fetch vertex coordinates. */
+ float4 curr_keys[4];
+ float4 next_keys[4];
+ cardinal_keys_for_step(curve_keys,
+ curve_radius,
+ key_steps,
+ num_curve_keys,
+ num_steps,
+ step,
+ k0,
+ k1,
+ k2,
+ k3,
+ curr_keys);
+ cardinal_keys_for_step(curve_keys,
+ curve_radius,
+ key_steps,
+ num_curve_keys,
+ num_steps,
+ step + 1,
+ k0,
+ k1,
+ k2,
+ k3,
+ next_keys);
+ /* Interpolate between steps. */
+ r_keys[0] = (1.0f - t) * curr_keys[0] + t * next_keys[0];
+ r_keys[1] = (1.0f - t) * curr_keys[1] + t * next_keys[1];
+ r_keys[2] = (1.0f - t) * curr_keys[2] + t * next_keys[2];
+ r_keys[3] = (1.0f - t) * curr_keys[3] + t * next_keys[3];
+}
+
+void Hair::Curve::keys_for_step(const float3 *curve_keys,
+ const float *curve_radius,
+ const float3 *key_steps,
+ size_t num_curve_keys,
+ size_t num_steps,
+ size_t step,
+ size_t k0,
+ size_t k1,
+ float4 r_keys[2]) const
+{
+ k0 = max(k0, 0);
+ k1 = min(k1, num_keys - 1);
+ const size_t center_step = ((num_steps - 1) / 2);
+ if (step == center_step) {
+ /* Center step: regular key location. */
+ /* TODO(sergey): Consider adding make_float4(float3, float)
+ * function.
+ */
+ r_keys[0] = make_float4(curve_keys[first_key + k0].x,
+ curve_keys[first_key + k0].y,
+ curve_keys[first_key + k0].z,
+ curve_radius[first_key + k0]);
+ r_keys[1] = make_float4(curve_keys[first_key + k1].x,
+ curve_keys[first_key + k1].y,
+ curve_keys[first_key + k1].z,
+ curve_radius[first_key + k1]);
+ }
+ else {
+ /* Center step is not stored in this array. */
+ if (step > center_step) {
+ step--;
+ }
+ const size_t offset = first_key + step * num_curve_keys;
+ r_keys[0] = make_float4(key_steps[offset + k0].x,
+ key_steps[offset + k0].y,
+ key_steps[offset + k0].z,
+ curve_radius[first_key + k0]);
+ r_keys[1] = make_float4(key_steps[offset + k1].x,
+ key_steps[offset + k1].y,
+ key_steps[offset + k1].z,
+ curve_radius[first_key + k1]);
+ }
+}
+
+void Hair::Curve::cardinal_keys_for_step(const float3 *curve_keys,
+ const float *curve_radius,
+ const float3 *key_steps,
+ size_t num_curve_keys,
+ size_t num_steps,
+ size_t step,
+ size_t k0,
+ size_t k1,
+ size_t k2,
+ size_t k3,
+ float4 r_keys[4]) const
+{
+ k0 = max(k0, 0);
+ k3 = min(k3, num_keys - 1);
+ const size_t center_step = ((num_steps - 1) / 2);
+ if (step == center_step) {
+ /* Center step: regular key location. */
+ r_keys[0] = make_float4(curve_keys[first_key + k0].x,
+ curve_keys[first_key + k0].y,
+ curve_keys[first_key + k0].z,
+ curve_radius[first_key + k0]);
+ r_keys[1] = make_float4(curve_keys[first_key + k1].x,
+ curve_keys[first_key + k1].y,
+ curve_keys[first_key + k1].z,
+ curve_radius[first_key + k1]);
+ r_keys[2] = make_float4(curve_keys[first_key + k2].x,
+ curve_keys[first_key + k2].y,
+ curve_keys[first_key + k2].z,
+ curve_radius[first_key + k2]);
+ r_keys[3] = make_float4(curve_keys[first_key + k3].x,
+ curve_keys[first_key + k3].y,
+ curve_keys[first_key + k3].z,
+ curve_radius[first_key + k3]);
+ }
+ else {
+ /* Center step is not stored in this array. */
+ if (step > center_step) {
+ step--;
+ }
+ const size_t offset = first_key + step * num_curve_keys;
+ r_keys[0] = make_float4(key_steps[offset + k0].x,
+ key_steps[offset + k0].y,
+ key_steps[offset + k0].z,
+ curve_radius[first_key + k0]);
+ r_keys[1] = make_float4(key_steps[offset + k1].x,
+ key_steps[offset + k1].y,
+ key_steps[offset + k1].z,
+ curve_radius[first_key + k1]);
+ r_keys[2] = make_float4(key_steps[offset + k2].x,
+ key_steps[offset + k2].y,
+ key_steps[offset + k2].z,
+ curve_radius[first_key + k2]);
+ r_keys[3] = make_float4(key_steps[offset + k3].x,
+ key_steps[offset + k3].y,
+ key_steps[offset + k3].z,
+ curve_radius[first_key + k3]);
+ }
+}
+
+/* Hair */
+
+NODE_DEFINE(Hair)
+{
+ NodeType *type = NodeType::add("hair", create, NodeType::NONE, Geometry::node_base_type);
+
+ SOCKET_POINT_ARRAY(curve_keys, "Curve Keys", array<float3>());
+ SOCKET_FLOAT_ARRAY(curve_radius, "Curve Radius", array<float>());
+ SOCKET_INT_ARRAY(curve_first_key, "Curve First Key", array<int>());
+ SOCKET_INT_ARRAY(curve_shader, "Curve Shader", array<int>());
+
+ return type;
+}
+
+Hair::Hair() : Geometry(node_type, Geometry::HAIR)
+{
+ curvekey_offset = 0;
+ curve_shape = CURVE_RIBBON;
+}
+
+Hair::~Hair()
+{
+}
+
+void Hair::resize_curves(int numcurves, int numkeys)
+{
+ curve_keys.resize(numkeys);
+ curve_radius.resize(numkeys);
+ curve_first_key.resize(numcurves);
+ curve_shader.resize(numcurves);
+
+ attributes.resize();
+}
+
+void Hair::reserve_curves(int numcurves, int numkeys)
+{
+ curve_keys.reserve(numkeys);
+ curve_radius.reserve(numkeys);
+ curve_first_key.reserve(numcurves);
+ curve_shader.reserve(numcurves);
+
+ attributes.resize(true);
+}
+
+void Hair::clear()
+{
+ Geometry::clear();
+
+ curve_keys.clear();
+ curve_radius.clear();
+ curve_first_key.clear();
+ curve_shader.clear();
+
+ attributes.clear();
+}
+
+void Hair::add_curve_key(float3 co, float radius)
+{
+ curve_keys.push_back_reserved(co);
+ curve_radius.push_back_reserved(radius);
+}
+
+void Hair::add_curve(int first_key, int shader)
+{
+ curve_first_key.push_back_reserved(first_key);
+ curve_shader.push_back_reserved(shader);
+}
+
+void Hair::copy_center_to_motion_step(const int motion_step)
+{
+ Attribute *attr_mP = attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+ if (attr_mP) {
+ float3 *keys = &curve_keys[0];
+ size_t numkeys = curve_keys.size();
+ memcpy(attr_mP->data_float3() + motion_step * numkeys, keys, sizeof(float3) * numkeys);
+ }
+}
+
+void Hair::get_uv_tiles(ustring map, unordered_set<int> &tiles)
+{
+ Attribute *attr;
+
+ if (map.empty()) {
+ attr = attributes.find(ATTR_STD_UV);
+ }
+ else {
+ attr = attributes.find(map);
+ }
+
+ if (attr) {
+ attr->get_uv_tiles(this, ATTR_PRIM_GEOMETRY, tiles);
+ }
+}
+
+void Hair::compute_bounds()
+{
+ BoundBox bnds = BoundBox::empty;
+ size_t curve_keys_size = curve_keys.size();
+
+ if (curve_keys_size > 0) {
+ for (size_t i = 0; i < curve_keys_size; i++)
+ bnds.grow(curve_keys[i], curve_radius[i]);
+
+ Attribute *curve_attr = attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+ if (use_motion_blur && curve_attr) {
+ size_t steps_size = curve_keys.size() * (motion_steps - 1);
+ float3 *key_steps = curve_attr->data_float3();
+
+ for (size_t i = 0; i < steps_size; i++)
+ bnds.grow(key_steps[i]);
+ }
+
+ if (!bnds.valid()) {
+ bnds = BoundBox::empty;
+
+ /* skip nan or inf coordinates */
+ for (size_t i = 0; i < curve_keys_size; i++)
+ bnds.grow_safe(curve_keys[i], curve_radius[i]);
+
+ if (use_motion_blur && curve_attr) {
+ size_t steps_size = curve_keys.size() * (motion_steps - 1);
+ float3 *key_steps = curve_attr->data_float3();
+
+ for (size_t i = 0; i < steps_size; i++)
+ bnds.grow_safe(key_steps[i]);
+ }
+ }
+ }
+
+ if (!bnds.valid()) {
+ /* empty mesh */
+ bnds.grow(make_float3(0.0f, 0.0f, 0.0f));
+ }
+
+ bounds = bnds;
+}
+
+void Hair::apply_transform(const Transform &tfm, const bool apply_to_motion)
+{
+ /* compute uniform scale */
+ float3 c0 = transform_get_column(&tfm, 0);
+ float3 c1 = transform_get_column(&tfm, 1);
+ float3 c2 = transform_get_column(&tfm, 2);
+ float scalar = powf(fabsf(dot(cross(c0, c1), c2)), 1.0f / 3.0f);
+
+ /* apply transform to curve keys */
+ for (size_t i = 0; i < curve_keys.size(); i++) {
+ float3 co = transform_point(&tfm, curve_keys[i]);
+ float radius = curve_radius[i] * scalar;
+
+ /* scale for curve radius is only correct for uniform scale */
+ curve_keys[i] = co;
+ curve_radius[i] = radius;
+ }
+
+ if (apply_to_motion) {
+ Attribute *curve_attr = attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+
+ if (curve_attr) {
+ /* apply transform to motion curve keys */
+ size_t steps_size = curve_keys.size() * (motion_steps - 1);
+ float4 *key_steps = curve_attr->data_float4();
+
+ for (size_t i = 0; i < steps_size; i++) {
+ float3 co = transform_point(&tfm, float4_to_float3(key_steps[i]));
+ float radius = key_steps[i].w * scalar;
+
+ /* scale for curve radius is only correct for uniform scale */
+ key_steps[i] = float3_to_float4(co);
+ key_steps[i].w = radius;
+ }
+ }
+ }
+}
+
+void Hair::pack_curves(Scene *scene,
+ float4 *curve_key_co,
+ float4 *curve_data,
+ size_t curvekey_offset)
+{
+ size_t curve_keys_size = curve_keys.size();
+
+ /* pack curve keys */
+ if (curve_keys_size) {
+ float3 *keys_ptr = curve_keys.data();
+ float *radius_ptr = curve_radius.data();
+
+ for (size_t i = 0; i < curve_keys_size; i++)
+ curve_key_co[i] = make_float4(keys_ptr[i].x, keys_ptr[i].y, keys_ptr[i].z, radius_ptr[i]);
+ }
+
+ /* pack curve segments */
+ size_t curve_num = num_curves();
+
+ for (size_t i = 0; i < curve_num; i++) {
+ Curve curve = get_curve(i);
+ int shader_id = curve_shader[i];
+ Shader *shader = (shader_id < used_shaders.size()) ? used_shaders[shader_id] :
+ scene->default_surface;
+ shader_id = scene->shader_manager->get_shader_id(shader, false);
+
+ curve_data[i] = make_float4(__int_as_float(curve.first_key + curvekey_offset),
+ __int_as_float(curve.num_keys),
+ __int_as_float(shader_id),
+ 0.0f);
+ }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/render/hair.h b/intern/cycles/render/hair.h
new file mode 100644
index 00000000000..39d6a34d799
--- /dev/null
+++ b/intern/cycles/render/hair.h
@@ -0,0 +1,152 @@
+/*
+ * Copyright 2011-2020 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __HAIR_H__
+#define __HAIR_H__
+
+#include "render/geometry.h"
+
+CCL_NAMESPACE_BEGIN
+
+class Hair : public Geometry {
+ public:
+ NODE_DECLARE
+
+ /* Hair Curve */
+ struct Curve {
+ int first_key;
+ int num_keys;
+
+ int num_segments() const
+ {
+ return num_keys - 1;
+ }
+
+ void bounds_grow(const int k,
+ const float3 *curve_keys,
+ const float *curve_radius,
+ BoundBox &bounds) const;
+ void bounds_grow(float4 keys[4], BoundBox &bounds) const;
+ void bounds_grow(const int k,
+ const float3 *curve_keys,
+ const float *curve_radius,
+ const Transform &aligned_space,
+ BoundBox &bounds) const;
+
+ void motion_keys(const float3 *curve_keys,
+ const float *curve_radius,
+ const float3 *key_steps,
+ size_t num_curve_keys,
+ size_t num_steps,
+ float time,
+ size_t k0,
+ size_t k1,
+ float4 r_keys[2]) const;
+ void cardinal_motion_keys(const float3 *curve_keys,
+ const float *curve_radius,
+ const float3 *key_steps,
+ size_t num_curve_keys,
+ size_t num_steps,
+ float time,
+ size_t k0,
+ size_t k1,
+ size_t k2,
+ size_t k3,
+ float4 r_keys[4]) const;
+
+ void keys_for_step(const float3 *curve_keys,
+ const float *curve_radius,
+ const float3 *key_steps,
+ size_t num_curve_keys,
+ size_t num_steps,
+ size_t step,
+ size_t k0,
+ size_t k1,
+ float4 r_keys[2]) const;
+ void cardinal_keys_for_step(const float3 *curve_keys,
+ const float *curve_radius,
+ const float3 *key_steps,
+ size_t num_curve_keys,
+ size_t num_steps,
+ size_t step,
+ size_t k0,
+ size_t k1,
+ size_t k2,
+ size_t k3,
+ float4 r_keys[4]) const;
+ };
+
+ array<float3> curve_keys;
+ array<float> curve_radius;
+ array<int> curve_first_key;
+ array<int> curve_shader;
+
+ /* BVH */
+ size_t curvekey_offset;
+ CurveShapeType curve_shape;
+
+ /* Constructor/Destructor */
+ Hair();
+ ~Hair();
+
+ /* Geometry */
+ void clear() override;
+
+ void resize_curves(int numcurves, int numkeys);
+ void reserve_curves(int numcurves, int numkeys);
+ void add_curve_key(float3 loc, float radius);
+ void add_curve(int first_key, int shader);
+
+ void copy_center_to_motion_step(const int motion_step);
+
+ void compute_bounds() override;
+ void apply_transform(const Transform &tfm, const bool apply_to_motion) override;
+
+ /* Curves */
+ Curve get_curve(size_t i) const
+ {
+ int first = curve_first_key[i];
+ int next_first = (i + 1 < curve_first_key.size()) ? curve_first_key[i + 1] : curve_keys.size();
+
+ Curve curve = {first, next_first - first};
+ return curve;
+ }
+
+ size_t num_keys() const
+ {
+ return curve_keys.size();
+ }
+
+ size_t num_curves() const
+ {
+ return curve_first_key.size();
+ }
+
+ size_t num_segments() const
+ {
+ return curve_keys.size() - curve_first_key.size();
+ }
+
+ /* UDIM */
+ void get_uv_tiles(ustring map, unordered_set<int> &tiles) override;
+
+ /* BVH */
+ void pack_curves(Scene *scene, float4 *curve_key_co, float4 *curve_data, size_t curvekey_offset);
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __HAIR_H__ */
diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp
index ae219e912e0..8d187814d64 100644
--- a/intern/cycles/render/image.cpp
+++ b/intern/cycles/render/image.cpp
@@ -14,15 +14,20 @@
* limitations under the License.
*/
-#include "device/device.h"
#include "render/image.h"
+#include "device/device.h"
+#include "render/colorspace.h"
+#include "render/image_oiio.h"
#include "render/scene.h"
#include "render/stats.h"
#include "util/util_foreach.h"
+#include "util/util_image.h"
+#include "util/util_image_impl.h"
#include "util/util_logging.h"
#include "util/util_path.h"
#include "util/util_progress.h"
+#include "util/util_task.h"
#include "util/util_texture.h"
#include "util/util_unique_ptr.h"
@@ -48,21 +53,6 @@ bool isfinite(uint16_t /*value*/)
return true;
}
-/* The lower three bits of a device texture slot number indicate its type.
- * These functions convert the slot ids from ImageManager "images" ones
- * to device ones and vice verse.
- */
-int type_index_to_flattened_slot(int slot, ImageDataType type)
-{
- return (slot << IMAGE_DATA_TYPE_SHIFT) | (type);
-}
-
-int flattened_slot_to_type_index(int flat_slot, ImageDataType *type)
-{
- *type = (ImageDataType)(flat_slot & IMAGE_DATA_TYPE_MASK);
- return flat_slot >> IMAGE_DATA_TYPE_SHIFT;
-}
-
const char *name_from_type(ImageDataType type)
{
switch (type) {
@@ -92,301 +82,355 @@ const char *name_from_type(ImageDataType type)
} // namespace
-ImageManager::ImageManager(const DeviceInfo &info)
+/* Image Handle */
+
+ImageHandle::ImageHandle() : manager(NULL)
{
- need_update = true;
- osl_texture_system = NULL;
- animation_frame = 0;
+}
- /* Set image limits */
- max_num_images = TEX_NUM_MAX;
- has_half_images = info.has_half_images;
+ImageHandle::ImageHandle(const ImageHandle &other)
+ : tile_slots(other.tile_slots), manager(other.manager)
+{
+ /* Increase image user count. */
+ foreach (const int slot, tile_slots) {
+ manager->add_image_user(slot);
+ }
+}
+
+ImageHandle &ImageHandle::operator=(const ImageHandle &other)
+{
+ clear();
+ manager = other.manager;
+ tile_slots = other.tile_slots;
- for (size_t type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
- tex_num_images[type] = 0;
+ foreach (const int slot, tile_slots) {
+ manager->add_image_user(slot);
}
+
+ return *this;
}
-ImageManager::~ImageManager()
+ImageHandle::~ImageHandle()
{
- for (size_t type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
- for (size_t slot = 0; slot < images[type].size(); slot++)
- assert(!images[type][slot]);
+ clear();
+}
+
+void ImageHandle::clear()
+{
+ foreach (const int slot, tile_slots) {
+ manager->remove_image_user(slot);
}
+
+ tile_slots.clear();
+ manager = NULL;
}
-void ImageManager::set_osl_texture_system(void *texture_system)
+bool ImageHandle::empty()
{
- osl_texture_system = texture_system;
+ return tile_slots.empty();
}
-bool ImageManager::set_animation_frame_update(int frame)
+int ImageHandle::num_tiles()
{
- if (frame != animation_frame) {
- animation_frame = frame;
+ return tile_slots.size();
+}
- for (size_t type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
- for (size_t slot = 0; slot < images[type].size(); slot++) {
- if (images[type][slot] && images[type][slot]->animated)
- return true;
- }
- }
+ImageMetaData ImageHandle::metadata()
+{
+ if (tile_slots.empty()) {
+ return ImageMetaData();
}
- return false;
+ ImageManager::Image *img = manager->images[tile_slots.front()];
+ manager->load_image_metadata(img);
+ return img->metadata;
}
-device_memory *ImageManager::image_memory(int flat_slot)
+int ImageHandle::svm_slot(const int tile_index) const
{
- ImageDataType type;
- int slot = flattened_slot_to_type_index(flat_slot, &type);
+ if (tile_index >= tile_slots.size()) {
+ return -1;
+ }
- Image *img = images[type][slot];
+ if (manager->osl_texture_system) {
+ ImageManager::Image *img = manager->images[tile_slots[tile_index]];
+ if (!img->loader->osl_filepath().empty()) {
+ return -1;
+ }
+ }
- return img->mem;
+ return tile_slots[tile_index];
}
-bool ImageManager::get_image_metadata(int flat_slot, ImageMetaData &metadata)
+device_texture *ImageHandle::image_memory(const int tile_index) const
{
- if (flat_slot == -1) {
- return false;
+ if (tile_index >= tile_slots.size()) {
+ return NULL;
}
- ImageDataType type;
- int slot = flattened_slot_to_type_index(flat_slot, &type);
+ ImageManager::Image *img = manager->images[tile_slots[tile_index]];
+ return img ? img->mem : NULL;
+}
- Image *img = images[type][slot];
- if (img) {
- metadata = img->metadata;
- return true;
- }
+bool ImageHandle::operator==(const ImageHandle &other) const
+{
+ return manager == other.manager && tile_slots == other.tile_slots;
+}
- return false;
+/* Image MetaData */
+
+ImageMetaData::ImageMetaData()
+ : channels(0),
+ width(0),
+ height(0),
+ depth(0),
+ type(IMAGE_DATA_NUM_TYPES),
+ colorspace(u_colorspace_raw),
+ colorspace_file_format(""),
+ use_transform_3d(false),
+ compress_as_srgb(false)
+{
}
-bool ImageManager::get_image_metadata(const string &filename,
- void *builtin_data,
- ImageMetaData &metadata)
+bool ImageMetaData::operator==(const ImageMetaData &other) const
{
- memset(&metadata, 0, sizeof(metadata));
+ return channels == other.channels && width == other.width && height == other.height &&
+ depth == other.depth && use_transform_3d == other.use_transform_3d &&
+ (!use_transform_3d || transform_3d == other.transform_3d) && type == other.type &&
+ colorspace == other.colorspace && compress_as_srgb == other.compress_as_srgb;
+}
- if (builtin_data) {
- if (builtin_image_info_cb) {
- builtin_image_info_cb(filename, builtin_data, metadata);
- }
- else {
- return false;
- }
+bool ImageMetaData::is_float() const
+{
+ return (type == IMAGE_DATA_TYPE_FLOAT || type == IMAGE_DATA_TYPE_FLOAT4 ||
+ type == IMAGE_DATA_TYPE_HALF || type == IMAGE_DATA_TYPE_HALF4);
+}
+
+void ImageMetaData::detect_colorspace()
+{
+ /* Convert used specified color spaces to one we know how to handle. */
+ colorspace = ColorSpaceManager::detect_known_colorspace(
+ colorspace, colorspace_file_format, is_float());
+
+ if (colorspace == u_colorspace_raw) {
+ /* Nothing to do. */
+ }
+ else if (colorspace == u_colorspace_srgb) {
+ /* Keep sRGB colorspace stored as sRGB, to save memory and/or loading time
+ * for the common case of 8bit sRGB images like PNG. */
+ compress_as_srgb = true;
+ }
+ else {
+ /* Always compress non-raw 8bit images as scene linear + sRGB, as a
+ * heuristic to keep memory usage the same without too much data loss
+ * due to quantization in common cases. */
+ compress_as_srgb = (type == IMAGE_DATA_TYPE_BYTE || type == IMAGE_DATA_TYPE_BYTE4);
- if (metadata.is_float) {
- metadata.is_linear = true;
- metadata.type = (metadata.channels > 1) ? IMAGE_DATA_TYPE_FLOAT4 : IMAGE_DATA_TYPE_FLOAT;
+ /* If colorspace conversion needed, use half instead of short so we can
+ * represent HDR values that might result from conversion. */
+ if (type == IMAGE_DATA_TYPE_USHORT) {
+ type = IMAGE_DATA_TYPE_HALF;
}
- else {
- metadata.type = (metadata.channels > 1) ? IMAGE_DATA_TYPE_BYTE4 : IMAGE_DATA_TYPE_BYTE;
+ else if (type == IMAGE_DATA_TYPE_USHORT4) {
+ type = IMAGE_DATA_TYPE_HALF4;
}
-
- return true;
}
+}
- /* Perform preliminary checks, with meaningful logging. */
- if (!path_exists(filename)) {
- VLOG(1) << "File '" << filename << "' does not exist.";
- return false;
+/* Image Loader */
+
+ImageLoader::ImageLoader()
+{
+}
+
+ustring ImageLoader::osl_filepath() const
+{
+ return ustring();
+}
+
+bool ImageLoader::equals(const ImageLoader *a, const ImageLoader *b)
+{
+ if (a == NULL && b == NULL) {
+ return true;
}
- if (path_is_directory(filename)) {
- VLOG(1) << "File '" << filename << "' is a directory, can't use as image.";
- return false;
+ else {
+ return (a && b && typeid(*a) == typeid(*b) && a->equals(*b));
}
+}
- unique_ptr<ImageInput> in(ImageInput::create(filename));
+/* Image Manager */
- if (!in) {
- return false;
- }
+ImageManager::ImageManager(const DeviceInfo &info)
+{
+ need_update = true;
+ osl_texture_system = NULL;
+ animation_frame = 0;
- ImageSpec spec;
- if (!in->open(filename, spec)) {
- return false;
- }
+ /* Set image limits */
+ has_half_images = info.has_half_images;
+}
- metadata.width = spec.width;
- metadata.height = spec.height;
- metadata.depth = spec.depth;
+ImageManager::~ImageManager()
+{
+ for (size_t slot = 0; slot < images.size(); slot++)
+ assert(!images[slot]);
+}
- /* Check the main format, and channel formats. */
- size_t channel_size = spec.format.basesize();
+void ImageManager::set_osl_texture_system(void *texture_system)
+{
+ osl_texture_system = texture_system;
+}
- if (spec.format.is_floating_point()) {
- metadata.is_float = true;
- metadata.is_linear = true;
- }
+bool ImageManager::set_animation_frame_update(int frame)
+{
+ if (frame != animation_frame) {
+ thread_scoped_lock device_lock(images_mutex);
+ animation_frame = frame;
- for (size_t channel = 0; channel < spec.channelformats.size(); channel++) {
- channel_size = max(channel_size, spec.channelformats[channel].basesize());
- if (spec.channelformats[channel].is_floating_point()) {
- metadata.is_float = true;
- metadata.is_linear = true;
+ for (size_t slot = 0; slot < images.size(); slot++) {
+ if (images[slot] && images[slot]->params.animated)
+ return true;
}
}
- /* check if it's half float */
- if (spec.format == TypeDesc::HALF) {
- metadata.is_half = true;
+ return false;
+}
+
+void ImageManager::load_image_metadata(Image *img)
+{
+ if (!img->need_metadata) {
+ return;
}
- /* basic color space detection, not great but better than nothing
- * before we do OpenColorIO integration */
- if (metadata.is_float) {
- string colorspace = spec.get_string_attribute("oiio:ColorSpace");
+ thread_scoped_lock image_lock(img->mutex);
+ if (!img->need_metadata) {
+ return;
+ }
- metadata.is_linear = !(
- colorspace == "sRGB" || colorspace == "GammaCorrected" ||
- (colorspace == "" &&
- (strcmp(in->format_name(), "png") == 0 || strcmp(in->format_name(), "tiff") == 0 ||
- strcmp(in->format_name(), "dpx") == 0 || strcmp(in->format_name(), "jpeg2000") == 0)));
+ ImageMetaData &metadata = img->metadata;
+ metadata = ImageMetaData();
+ metadata.colorspace = img->params.colorspace;
+
+ if (img->loader->load_metadata(metadata)) {
+ assert(metadata.type != IMAGE_DATA_NUM_TYPES);
}
else {
- metadata.is_linear = false;
+ metadata.type = IMAGE_DATA_TYPE_BYTE4;
}
- /* set type and channels */
- metadata.channels = spec.nchannels;
+ metadata.detect_colorspace();
- if (metadata.is_half) {
- metadata.type = (metadata.channels > 1) ? IMAGE_DATA_TYPE_HALF4 : IMAGE_DATA_TYPE_HALF;
- }
- else if (metadata.is_float) {
- metadata.type = (metadata.channels > 1) ? IMAGE_DATA_TYPE_FLOAT4 : IMAGE_DATA_TYPE_FLOAT;
- }
- else if (spec.format == TypeDesc::USHORT) {
- metadata.type = (metadata.channels > 1) ? IMAGE_DATA_TYPE_USHORT4 : IMAGE_DATA_TYPE_USHORT;
- }
- else {
- metadata.type = (metadata.channels > 1) ? IMAGE_DATA_TYPE_BYTE4 : IMAGE_DATA_TYPE_BYTE;
+ /* No half textures on OpenCL, use full float instead. */
+ if (!has_half_images) {
+ if (metadata.type == IMAGE_DATA_TYPE_HALF4) {
+ metadata.type = IMAGE_DATA_TYPE_FLOAT4;
+ }
+ else if (metadata.type == IMAGE_DATA_TYPE_HALF) {
+ metadata.type = IMAGE_DATA_TYPE_FLOAT;
+ }
}
- in->close();
+ img->need_metadata = false;
+}
- return true;
+ImageHandle ImageManager::add_image(const string &filename, const ImageParams &params)
+{
+ const int slot = add_image_slot(new OIIOImageLoader(filename), params, false);
+
+ ImageHandle handle;
+ handle.tile_slots.push_back(slot);
+ handle.manager = this;
+ return handle;
+}
+
+ImageHandle ImageManager::add_image(const string &filename,
+ const ImageParams &params,
+ const vector<int> &tiles)
+{
+ ImageHandle handle;
+ handle.manager = this;
+
+ foreach (int tile, tiles) {
+ string tile_filename = filename;
+ if (tile != 0) {
+ string_replace(tile_filename, "<UDIM>", string_printf("%04d", tile));
+ }
+ const int slot = add_image_slot(new OIIOImageLoader(tile_filename), params, false);
+ handle.tile_slots.push_back(slot);
+ }
+
+ return handle;
}
-static bool image_equals(ImageManager::Image *image,
- const string &filename,
- void *builtin_data,
- InterpolationType interpolation,
- ExtensionType extension,
- bool use_alpha)
+ImageHandle ImageManager::add_image(ImageLoader *loader, const ImageParams &params)
{
- return image->filename == filename && image->builtin_data == builtin_data &&
- image->interpolation == interpolation && image->extension == extension &&
- image->use_alpha == use_alpha;
+ const int slot = add_image_slot(loader, params, true);
+
+ ImageHandle handle;
+ handle.tile_slots.push_back(slot);
+ handle.manager = this;
+ return handle;
}
-int ImageManager::add_image(const string &filename,
- void *builtin_data,
- bool animated,
- float frame,
- InterpolationType interpolation,
- ExtensionType extension,
- bool use_alpha,
- ImageMetaData &metadata)
+int ImageManager::add_image_slot(ImageLoader *loader,
+ const ImageParams &params,
+ const bool builtin)
{
Image *img;
size_t slot;
- get_image_metadata(filename, builtin_data, metadata);
- ImageDataType type = metadata.type;
-
- thread_scoped_lock device_lock(device_mutex);
-
- /* No half textures on OpenCL, use full float instead. */
- if (!has_half_images) {
- if (type == IMAGE_DATA_TYPE_HALF4) {
- type = IMAGE_DATA_TYPE_FLOAT4;
- }
- else if (type == IMAGE_DATA_TYPE_HALF) {
- type = IMAGE_DATA_TYPE_FLOAT;
- }
- }
+ thread_scoped_lock device_lock(images_mutex);
/* Fnd existing image. */
- for (slot = 0; slot < images[type].size(); slot++) {
- img = images[type][slot];
- if (img && image_equals(img, filename, builtin_data, interpolation, extension, use_alpha)) {
- if (img->frame != frame) {
- img->frame = frame;
- img->need_load = true;
- }
- if (img->use_alpha != use_alpha) {
- img->use_alpha = use_alpha;
- img->need_load = true;
- }
- if (!(img->metadata == metadata)) {
- img->metadata = metadata;
- img->need_load = true;
- }
+ for (slot = 0; slot < images.size(); slot++) {
+ img = images[slot];
+ if (img && ImageLoader::equals(img->loader, loader) && img->params == params) {
img->users++;
- return type_index_to_flattened_slot(slot, type);
+ delete loader;
+ return slot;
}
}
/* Find free slot. */
- for (slot = 0; slot < images[type].size(); slot++) {
- if (!images[type][slot])
+ for (slot = 0; slot < images.size(); slot++) {
+ if (!images[slot])
break;
}
- /* Count if we're over the limit.
- * Very unlikely, since max_num_images is insanely big. But better safe
- * than sorry.
- */
- int tex_count = 0;
- for (int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
- tex_count += tex_num_images[type];
- }
- if (tex_count > max_num_images) {
- printf(
- "ImageManager::add_image: Reached image limit (%d), "
- "skipping '%s'\n",
- max_num_images,
- filename.c_str());
- return -1;
- }
-
- if (slot == images[type].size()) {
- images[type].resize(images[type].size() + 1);
+ if (slot == images.size()) {
+ images.resize(images.size() + 1);
}
/* Add new image. */
img = new Image();
- img->filename = filename;
- img->builtin_data = builtin_data;
- img->metadata = metadata;
- img->need_load = true;
- img->animated = animated;
- img->frame = frame;
- img->interpolation = interpolation;
- img->extension = extension;
+ img->params = params;
+ img->loader = loader;
+ img->need_metadata = true;
+ img->need_load = !(osl_texture_system && !img->loader->osl_filepath().empty());
+ img->builtin = builtin;
img->users = 1;
- img->use_alpha = use_alpha;
img->mem = NULL;
- images[type][slot] = img;
-
- ++tex_num_images[type];
+ images[slot] = img;
need_update = true;
- return type_index_to_flattened_slot(slot, type);
+ return slot;
}
-void ImageManager::remove_image(int flat_slot)
+void ImageManager::add_image_user(int slot)
{
- ImageDataType type;
- int slot = flattened_slot_to_type_index(flat_slot, &type);
+ thread_scoped_lock device_lock(images_mutex);
+ Image *image = images[slot];
+ assert(image && image->users >= 1);
+
+ image->users++;
+}
- Image *image = images[type][slot];
+void ImageManager::remove_image_user(int slot)
+{
+ thread_scoped_lock device_lock(images_mutex);
+ Image *image = images[slot];
assert(image && image->users >= 1);
/* decrement user count */
@@ -399,100 +443,20 @@ void ImageManager::remove_image(int flat_slot)
need_update = true;
}
-void ImageManager::remove_image(const string &filename,
- void *builtin_data,
- InterpolationType interpolation,
- ExtensionType extension,
- bool use_alpha)
+static bool image_associate_alpha(ImageManager::Image *img)
{
- size_t slot;
-
- for (int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
- for (slot = 0; slot < images[type].size(); slot++) {
- if (images[type][slot] &&
- image_equals(
- images[type][slot], filename, builtin_data, interpolation, extension, use_alpha)) {
- remove_image(type_index_to_flattened_slot(slot, (ImageDataType)type));
- return;
- }
- }
- }
-}
-
-/* TODO(sergey): Deduplicate with the iteration above, but make it pretty,
- * without bunch of arguments passing around making code readability even
- * more cluttered.
- */
-void ImageManager::tag_reload_image(const string &filename,
- void *builtin_data,
- InterpolationType interpolation,
- ExtensionType extension,
- bool use_alpha)
-{
- for (size_t type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
- for (size_t slot = 0; slot < images[type].size(); slot++) {
- if (images[type][slot] &&
- image_equals(
- images[type][slot], filename, builtin_data, interpolation, extension, use_alpha)) {
- images[type][slot]->need_load = true;
- break;
- }
- }
- }
+ /* For typical RGBA images we let OIIO convert to associated alpha,
+ * but some types we want to leave the RGB channels untouched. */
+ return !(ColorSpaceManager::colorspace_is_data(img->params.colorspace) ||
+ img->params.alpha_type == IMAGE_ALPHA_IGNORE ||
+ img->params.alpha_type == IMAGE_ALPHA_CHANNEL_PACKED);
}
-bool ImageManager::file_load_image_generic(Image *img, unique_ptr<ImageInput> *in)
+template<TypeDesc::BASETYPE FileFormat, typename StorageType>
+bool ImageManager::file_load_image(Image *img, int texture_limit)
{
- if (img->filename == "")
- return false;
-
- if (!img->builtin_data) {
- /* NOTE: Error logging is done in meta data acquisition. */
- if (!path_exists(img->filename) || path_is_directory(img->filename)) {
- return false;
- }
-
- /* load image from file through OIIO */
- *in = unique_ptr<ImageInput>(ImageInput::create(img->filename));
-
- if (!*in)
- return false;
-
- ImageSpec spec = ImageSpec();
- ImageSpec config = ImageSpec();
-
- if (img->use_alpha == false)
- config.attribute("oiio:UnassociatedAlpha", 1);
-
- if (!(*in)->open(img->filename, spec, config)) {
- return false;
- }
- }
- else {
- /* load image using builtin images callbacks */
- if (!builtin_image_info_cb || !builtin_image_pixels_cb)
- return false;
- }
-
/* we only handle certain number of components */
if (!(img->metadata.channels >= 1 && img->metadata.channels <= 4)) {
- if (*in) {
- (*in)->close();
- }
- return false;
- }
-
- return true;
-}
-
-template<TypeDesc::BASETYPE FileFormat, typename StorageType, typename DeviceType>
-bool ImageManager::file_load_image(Image *img,
- ImageDataType type,
- int texture_limit,
- device_vector<DeviceType> &tex_img)
-{
- unique_ptr<ImageInput> in = NULL;
- if (!file_load_image_generic(img, &in)) {
return false;
}
@@ -502,101 +466,46 @@ bool ImageManager::file_load_image(Image *img,
int depth = img->metadata.depth;
int components = img->metadata.channels;
- /* Read RGBA pixels. */
+ /* Read pixels. */
vector<StorageType> pixels_storage;
StorageType *pixels;
const size_t max_size = max(max(width, height), depth);
if (max_size == 0) {
- /* Don't bother with invalid images. */
+ /* Don't bother with empty images. */
return false;
}
+
+ /* Allocate memory as needed, may be smaller to resize down. */
if (texture_limit > 0 && max_size > texture_limit) {
pixels_storage.resize(((size_t)width) * height * depth * 4);
pixels = &pixels_storage[0];
}
else {
thread_scoped_lock device_lock(device_mutex);
- pixels = (StorageType *)tex_img.alloc(width, height, depth);
+ pixels = (StorageType *)img->mem->alloc(width, height, depth);
}
+
if (pixels == NULL) {
/* Could be that we've run out of memory. */
return false;
}
- bool cmyk = false;
+
const size_t num_pixels = ((size_t)width) * height * depth;
- if (in) {
- StorageType *readpixels = pixels;
- vector<StorageType> tmppixels;
- if (components > 4) {
- tmppixels.resize(((size_t)width) * height * components);
- readpixels = &tmppixels[0];
- }
- if (depth <= 1) {
- size_t scanlinesize = ((size_t)width) * components * sizeof(StorageType);
- in->read_image(FileFormat,
- (uchar *)readpixels + (height - 1) * scanlinesize,
- AutoStride,
- -scanlinesize,
- AutoStride);
- }
- else {
- in->read_image(FileFormat, (uchar *)readpixels);
- }
- if (components > 4) {
- size_t dimensions = ((size_t)width) * height;
- for (size_t i = dimensions - 1, pixel = 0; pixel < dimensions; pixel++, i--) {
- pixels[i * 4 + 3] = tmppixels[i * components + 3];
- pixels[i * 4 + 2] = tmppixels[i * components + 2];
- pixels[i * 4 + 1] = tmppixels[i * components + 1];
- pixels[i * 4 + 0] = tmppixels[i * components + 0];
- }
- tmppixels.clear();
- }
- cmyk = strcmp(in->format_name(), "jpeg") == 0 && components == 4;
- in->close();
- }
- else {
- if (FileFormat == TypeDesc::FLOAT) {
- builtin_image_float_pixels_cb(img->filename,
- img->builtin_data,
- (float *)&pixels[0],
- num_pixels * components,
- img->metadata.builtin_free_cache);
- }
- else if (FileFormat == TypeDesc::UINT8) {
- builtin_image_pixels_cb(img->filename,
- img->builtin_data,
- (uchar *)&pixels[0],
- num_pixels * components,
- img->metadata.builtin_free_cache);
- }
- else {
- /* TODO(dingto): Support half for ImBuf. */
- }
- }
- /* Check if we actually have a float4 slot, in case components == 1,
- * but device doesn't support single channel textures.
- */
- bool is_rgba = (type == IMAGE_DATA_TYPE_FLOAT4 || type == IMAGE_DATA_TYPE_HALF4 ||
- type == IMAGE_DATA_TYPE_BYTE4 || type == IMAGE_DATA_TYPE_USHORT4);
+ img->loader->load_pixels(
+ img->metadata, pixels, num_pixels * components, image_associate_alpha(img));
+
+ /* The kernel can handle 1 and 4 channel images. Anything that is not a single
+ * channel image is converted to RGBA format. */
+ bool is_rgba = (img->metadata.type == IMAGE_DATA_TYPE_FLOAT4 ||
+ img->metadata.type == IMAGE_DATA_TYPE_HALF4 ||
+ img->metadata.type == IMAGE_DATA_TYPE_BYTE4 ||
+ img->metadata.type == IMAGE_DATA_TYPE_USHORT4);
+
if (is_rgba) {
const StorageType one = util_image_cast_from_float<StorageType>(1.0f);
- if (cmyk) {
- /* CMYK */
- for (size_t i = num_pixels - 1, pixel = 0; pixel < num_pixels; pixel++, i--) {
- float c = util_image_cast_to_float(pixels[i * 4 + 0]);
- float m = util_image_cast_to_float(pixels[i * 4 + 1]);
- float y = util_image_cast_to_float(pixels[i * 4 + 2]);
- float k = util_image_cast_to_float(pixels[i * 4 + 3]);
- pixels[i * 4 + 0] = util_image_cast_from_float<StorageType>((1.0f - c) * (1.0f - k));
- pixels[i * 4 + 1] = util_image_cast_from_float<StorageType>((1.0f - m) * (1.0f - k));
- pixels[i * 4 + 2] = util_image_cast_from_float<StorageType>((1.0f - y) * (1.0f - k));
- pixels[i * 4 + 3] = one;
- }
- }
- else if (components == 2) {
- /* grayscale + alpha */
+ if (components == 2) {
+ /* Grayscale + alpha to RGBA. */
for (size_t i = num_pixels - 1, pixel = 0; pixel < num_pixels; pixel++, i--) {
pixels[i * 4 + 3] = pixels[i * 2 + 1];
pixels[i * 4 + 2] = pixels[i * 2 + 0];
@@ -605,7 +514,7 @@ bool ImageManager::file_load_image(Image *img,
}
}
else if (components == 3) {
- /* RGB */
+ /* RGB to RGBA. */
for (size_t i = num_pixels - 1, pixel = 0; pixel < num_pixels; pixel++, i--) {
pixels[i * 4 + 3] = one;
pixels[i * 4 + 2] = pixels[i * 3 + 2];
@@ -614,7 +523,7 @@ bool ImageManager::file_load_image(Image *img,
}
}
else if (components == 1) {
- /* grayscale */
+ /* Grayscale to RGBA. */
for (size_t i = num_pixels - 1, pixel = 0; pixel < num_pixels; pixel++, i--) {
pixels[i * 4 + 3] = one;
pixels[i * 4 + 2] = pixels[i];
@@ -622,18 +531,27 @@ bool ImageManager::file_load_image(Image *img,
pixels[i * 4 + 0] = pixels[i];
}
}
- if (img->use_alpha == false) {
+
+ /* Disable alpha if requested by the user. */
+ if (img->params.alpha_type == IMAGE_ALPHA_IGNORE) {
for (size_t i = num_pixels - 1, pixel = 0; pixel < num_pixels; pixel++, i--) {
pixels[i * 4 + 3] = one;
}
}
+
+ if (img->metadata.colorspace != u_colorspace_raw &&
+ img->metadata.colorspace != u_colorspace_srgb) {
+ /* Convert to scene linear. */
+ ColorSpaceManager::to_scene_linear(
+ img->metadata.colorspace, pixels, num_pixels, img->metadata.compress_as_srgb);
+ }
}
+
/* Make sure we don't have buggy values. */
if (FileFormat == TypeDesc::FLOAT) {
/* For RGBA buffers we put all channels to 0 if either of them is not
* finite. This way we avoid possible artifacts caused by fully changed
- * hue.
- */
+ * hue. */
if (is_rgba) {
for (size_t i = 0; i < num_pixels; i += 4) {
StorageType *pixel = &pixels[i * 4];
@@ -655,13 +573,15 @@ bool ImageManager::file_load_image(Image *img,
}
}
}
+
/* Scale image down if needed. */
if (pixels_storage.size() > 0) {
float scale_factor = 1.0f;
while (max_size * scale_factor > texture_limit) {
scale_factor *= 0.5f;
}
- VLOG(1) << "Scaling image " << img->filename << " by a factor of " << scale_factor << ".";
+ VLOG(1) << "Scaling image " << img->loader->name() << " by a factor of " << scale_factor
+ << ".";
vector<StorageType> scaled_pixels;
size_t scaled_width, scaled_height, scaled_depth;
util_image_resize_pixels(pixels_storage,
@@ -679,33 +599,32 @@ bool ImageManager::file_load_image(Image *img,
{
thread_scoped_lock device_lock(device_mutex);
- texture_pixels = (StorageType *)tex_img.alloc(scaled_width, scaled_height, scaled_depth);
+ texture_pixels = (StorageType *)img->mem->alloc(scaled_width, scaled_height, scaled_depth);
}
memcpy(texture_pixels, &scaled_pixels[0], scaled_pixels.size() * sizeof(StorageType));
}
+
return true;
}
-void ImageManager::device_load_image(
- Device *device, Scene *scene, ImageDataType type, int slot, Progress *progress)
+void ImageManager::device_load_image(Device *device, Scene *scene, int slot, Progress *progress)
{
- if (progress->get_cancel())
+ if (progress->get_cancel()) {
return;
+ }
- Image *img = images[type][slot];
-
- if (osl_texture_system && !img->builtin_data)
- return;
+ Image *img = images[slot];
- string filename = path_filename(images[type][slot]->filename);
- progress->set_status("Updating Images", "Loading " + filename);
+ progress->set_status("Updating Images", "Loading " + img->loader->name());
const int texture_limit = scene->params.texture_limit;
- /* Slot assignment */
- int flat_slot = type_index_to_flattened_slot(slot, type);
- img->mem_name = string_printf("__tex_image_%s_%03d", name_from_type(type), flat_slot);
+ load_image_metadata(img);
+ ImageDataType type = img->metadata.type;
+
+ /* Name for debugging. */
+ img->mem_name = string_printf("__tex_image_%s_%03d", name_from_type(type), slot);
/* Free previous texture in slot. */
if (img->mem) {
@@ -714,195 +633,131 @@ void ImageManager::device_load_image(
img->mem = NULL;
}
+ img->mem = new device_texture(
+ device, img->mem_name.c_str(), slot, type, img->params.interpolation, img->params.extension);
+ img->mem->info.use_transform_3d = img->metadata.use_transform_3d;
+ img->mem->info.transform_3d = img->metadata.transform_3d;
+
/* Create new texture. */
if (type == IMAGE_DATA_TYPE_FLOAT4) {
- device_vector<float4> *tex_img = new device_vector<float4>(
- device, img->mem_name.c_str(), MEM_TEXTURE);
-
- if (!file_load_image<TypeDesc::FLOAT, float>(img, type, texture_limit, *tex_img)) {
+ if (!file_load_image<TypeDesc::FLOAT, float>(img, texture_limit)) {
/* on failure to load, we set a 1x1 pixels pink image */
thread_scoped_lock device_lock(device_mutex);
- float *pixels = (float *)tex_img->alloc(1, 1);
+ float *pixels = (float *)img->mem->alloc(1, 1);
pixels[0] = TEX_IMAGE_MISSING_R;
pixels[1] = TEX_IMAGE_MISSING_G;
pixels[2] = TEX_IMAGE_MISSING_B;
pixels[3] = TEX_IMAGE_MISSING_A;
}
-
- img->mem = tex_img;
- img->mem->interpolation = img->interpolation;
- img->mem->extension = img->extension;
-
- thread_scoped_lock device_lock(device_mutex);
- tex_img->copy_to_device();
}
else if (type == IMAGE_DATA_TYPE_FLOAT) {
- device_vector<float> *tex_img = new device_vector<float>(
- device, img->mem_name.c_str(), MEM_TEXTURE);
-
- if (!file_load_image<TypeDesc::FLOAT, float>(img, type, texture_limit, *tex_img)) {
+ if (!file_load_image<TypeDesc::FLOAT, float>(img, texture_limit)) {
/* on failure to load, we set a 1x1 pixels pink image */
thread_scoped_lock device_lock(device_mutex);
- float *pixels = (float *)tex_img->alloc(1, 1);
+ float *pixels = (float *)img->mem->alloc(1, 1);
pixels[0] = TEX_IMAGE_MISSING_R;
}
-
- img->mem = tex_img;
- img->mem->interpolation = img->interpolation;
- img->mem->extension = img->extension;
-
- thread_scoped_lock device_lock(device_mutex);
- tex_img->copy_to_device();
}
else if (type == IMAGE_DATA_TYPE_BYTE4) {
- device_vector<uchar4> *tex_img = new device_vector<uchar4>(
- device, img->mem_name.c_str(), MEM_TEXTURE);
-
- if (!file_load_image<TypeDesc::UINT8, uchar>(img, type, texture_limit, *tex_img)) {
+ if (!file_load_image<TypeDesc::UINT8, uchar>(img, texture_limit)) {
/* on failure to load, we set a 1x1 pixels pink image */
thread_scoped_lock device_lock(device_mutex);
- uchar *pixels = (uchar *)tex_img->alloc(1, 1);
+ uchar *pixels = (uchar *)img->mem->alloc(1, 1);
pixels[0] = (TEX_IMAGE_MISSING_R * 255);
pixels[1] = (TEX_IMAGE_MISSING_G * 255);
pixels[2] = (TEX_IMAGE_MISSING_B * 255);
pixels[3] = (TEX_IMAGE_MISSING_A * 255);
}
-
- img->mem = tex_img;
- img->mem->interpolation = img->interpolation;
- img->mem->extension = img->extension;
-
- thread_scoped_lock device_lock(device_mutex);
- tex_img->copy_to_device();
}
else if (type == IMAGE_DATA_TYPE_BYTE) {
- device_vector<uchar> *tex_img = new device_vector<uchar>(
- device, img->mem_name.c_str(), MEM_TEXTURE);
-
- if (!file_load_image<TypeDesc::UINT8, uchar>(img, type, texture_limit, *tex_img)) {
+ if (!file_load_image<TypeDesc::UINT8, uchar>(img, texture_limit)) {
/* on failure to load, we set a 1x1 pixels pink image */
thread_scoped_lock device_lock(device_mutex);
- uchar *pixels = (uchar *)tex_img->alloc(1, 1);
+ uchar *pixels = (uchar *)img->mem->alloc(1, 1);
pixels[0] = (TEX_IMAGE_MISSING_R * 255);
}
-
- img->mem = tex_img;
- img->mem->interpolation = img->interpolation;
- img->mem->extension = img->extension;
-
- thread_scoped_lock device_lock(device_mutex);
- tex_img->copy_to_device();
}
else if (type == IMAGE_DATA_TYPE_HALF4) {
- device_vector<half4> *tex_img = new device_vector<half4>(
- device, img->mem_name.c_str(), MEM_TEXTURE);
-
- if (!file_load_image<TypeDesc::HALF, half>(img, type, texture_limit, *tex_img)) {
+ if (!file_load_image<TypeDesc::HALF, half>(img, texture_limit)) {
/* on failure to load, we set a 1x1 pixels pink image */
thread_scoped_lock device_lock(device_mutex);
- half *pixels = (half *)tex_img->alloc(1, 1);
+ half *pixels = (half *)img->mem->alloc(1, 1);
pixels[0] = TEX_IMAGE_MISSING_R;
pixels[1] = TEX_IMAGE_MISSING_G;
pixels[2] = TEX_IMAGE_MISSING_B;
pixels[3] = TEX_IMAGE_MISSING_A;
}
-
- img->mem = tex_img;
- img->mem->interpolation = img->interpolation;
- img->mem->extension = img->extension;
-
- thread_scoped_lock device_lock(device_mutex);
- tex_img->copy_to_device();
}
else if (type == IMAGE_DATA_TYPE_USHORT) {
- device_vector<uint16_t> *tex_img = new device_vector<uint16_t>(
- device, img->mem_name.c_str(), MEM_TEXTURE);
-
- if (!file_load_image<TypeDesc::USHORT, uint16_t>(img, type, texture_limit, *tex_img)) {
+ if (!file_load_image<TypeDesc::USHORT, uint16_t>(img, texture_limit)) {
/* on failure to load, we set a 1x1 pixels pink image */
thread_scoped_lock device_lock(device_mutex);
- uint16_t *pixels = (uint16_t *)tex_img->alloc(1, 1);
+ uint16_t *pixels = (uint16_t *)img->mem->alloc(1, 1);
pixels[0] = (TEX_IMAGE_MISSING_R * 65535);
}
-
- img->mem = tex_img;
- img->mem->interpolation = img->interpolation;
- img->mem->extension = img->extension;
-
- thread_scoped_lock device_lock(device_mutex);
- tex_img->copy_to_device();
}
else if (type == IMAGE_DATA_TYPE_USHORT4) {
- device_vector<ushort4> *tex_img = new device_vector<ushort4>(
- device, img->mem_name.c_str(), MEM_TEXTURE);
-
- if (!file_load_image<TypeDesc::USHORT, uint16_t>(img, type, texture_limit, *tex_img)) {
+ if (!file_load_image<TypeDesc::USHORT, uint16_t>(img, texture_limit)) {
/* on failure to load, we set a 1x1 pixels pink image */
thread_scoped_lock device_lock(device_mutex);
- uint16_t *pixels = (uint16_t *)tex_img->alloc(1, 1);
+ uint16_t *pixels = (uint16_t *)img->mem->alloc(1, 1);
pixels[0] = (TEX_IMAGE_MISSING_R * 65535);
pixels[1] = (TEX_IMAGE_MISSING_G * 65535);
pixels[2] = (TEX_IMAGE_MISSING_B * 65535);
pixels[3] = (TEX_IMAGE_MISSING_A * 65535);
}
-
- img->mem = tex_img;
- img->mem->interpolation = img->interpolation;
- img->mem->extension = img->extension;
-
- thread_scoped_lock device_lock(device_mutex);
- tex_img->copy_to_device();
}
else if (type == IMAGE_DATA_TYPE_HALF) {
- device_vector<half> *tex_img = new device_vector<half>(
- device, img->mem_name.c_str(), MEM_TEXTURE);
-
- if (!file_load_image<TypeDesc::HALF, half>(img, type, texture_limit, *tex_img)) {
+ if (!file_load_image<TypeDesc::HALF, half>(img, texture_limit)) {
/* on failure to load, we set a 1x1 pixels pink image */
thread_scoped_lock device_lock(device_mutex);
- half *pixels = (half *)tex_img->alloc(1, 1);
+ half *pixels = (half *)img->mem->alloc(1, 1);
pixels[0] = TEX_IMAGE_MISSING_R;
}
+ }
- img->mem = tex_img;
- img->mem->interpolation = img->interpolation;
- img->mem->extension = img->extension;
-
+ {
thread_scoped_lock device_lock(device_mutex);
- tex_img->copy_to_device();
+ img->mem->copy_to_device();
}
+
+ /* Cleanup memory in image loader. */
+ img->loader->cleanup();
img->need_load = false;
}
-void ImageManager::device_free_image(Device *, ImageDataType type, int slot)
+void ImageManager::device_free_image(Device *, int slot)
{
- Image *img = images[type][slot];
+ Image *img = images[slot];
+ if (img == NULL) {
+ return;
+ }
- if (img) {
- if (osl_texture_system && !img->builtin_data) {
+ if (osl_texture_system) {
#ifdef WITH_OSL
- ustring filename(images[type][slot]->filename);
- ((OSL::TextureSystem *)osl_texture_system)->invalidate(filename);
-#endif
- }
-
- if (img->mem) {
- thread_scoped_lock device_lock(device_mutex);
- delete img->mem;
+ ustring filepath = img->loader->osl_filepath();
+ if (!filepath.empty()) {
+ ((OSL::TextureSystem *)osl_texture_system)->invalidate(filepath);
}
+#endif
+ }
- delete img;
- images[type][slot] = NULL;
- --tex_num_images[type];
+ if (img->mem) {
+ thread_scoped_lock device_lock(device_mutex);
+ delete img->mem;
}
+
+ delete img->loader;
+ delete img;
+ images[slot] = NULL;
}
void ImageManager::device_update(Device *device, Scene *scene, Progress &progress)
@@ -912,24 +767,14 @@ void ImageManager::device_update(Device *device, Scene *scene, Progress &progres
}
TaskPool pool;
- for (int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
- for (size_t slot = 0; slot < images[type].size(); slot++) {
- if (!images[type][slot])
- continue;
-
- if (images[type][slot]->users == 0) {
- device_free_image(device, (ImageDataType)type, slot);
- }
- else if (images[type][slot]->need_load) {
- if (!osl_texture_system || images[type][slot]->builtin_data)
- pool.push(function_bind(&ImageManager::device_load_image,
- this,
- device,
- scene,
- (ImageDataType)type,
- slot,
- &progress));
- }
+ for (size_t slot = 0; slot < images.size(); slot++) {
+ Image *img = images[slot];
+ if (img && img->users == 0) {
+ device_free_image(device, slot);
+ }
+ else if (img && img->need_load) {
+ pool.push(
+ function_bind(&ImageManager::device_load_image, this, device, scene, slot, &progress));
}
}
@@ -938,23 +783,16 @@ void ImageManager::device_update(Device *device, Scene *scene, Progress &progres
need_update = false;
}
-void ImageManager::device_update_slot(Device *device,
- Scene *scene,
- int flat_slot,
- Progress *progress)
+void ImageManager::device_update_slot(Device *device, Scene *scene, int slot, Progress *progress)
{
- ImageDataType type;
- int slot = flattened_slot_to_type_index(flat_slot, &type);
-
- Image *image = images[type][slot];
- assert(image != NULL);
+ Image *img = images[slot];
+ assert(img != NULL);
- if (image->users == 0) {
- device_free_image(device, type, slot);
+ if (img->users == 0) {
+ device_free_image(device, slot);
}
- else if (image->need_load) {
- if (!osl_texture_system || image->builtin_data)
- device_load_image(device, scene, type, slot, progress);
+ else if (img->need_load) {
+ device_load_image(device, scene, slot, progress);
}
}
@@ -967,22 +805,11 @@ void ImageManager::device_load_builtin(Device *device, Scene *scene, Progress &p
}
TaskPool pool;
- for (int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
- for (size_t slot = 0; slot < images[type].size(); slot++) {
- if (!images[type][slot])
- continue;
-
- if (images[type][slot]->need_load) {
- if (images[type][slot]->builtin_data) {
- pool.push(function_bind(&ImageManager::device_load_image,
- this,
- device,
- scene,
- (ImageDataType)type,
- slot,
- &progress));
- }
- }
+ for (size_t slot = 0; slot < images.size(); slot++) {
+ Image *img = images[slot];
+ if (img && img->need_load && img->builtin) {
+ pool.push(
+ function_bind(&ImageManager::device_load_image, this, device, scene, slot, &progress));
}
}
@@ -991,31 +818,27 @@ void ImageManager::device_load_builtin(Device *device, Scene *scene, Progress &p
void ImageManager::device_free_builtin(Device *device)
{
- for (int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
- for (size_t slot = 0; slot < images[type].size(); slot++) {
- if (images[type][slot] && images[type][slot]->builtin_data)
- device_free_image(device, (ImageDataType)type, slot);
+ for (size_t slot = 0; slot < images.size(); slot++) {
+ Image *img = images[slot];
+ if (img && img->builtin) {
+ device_free_image(device, slot);
}
}
}
void ImageManager::device_free(Device *device)
{
- for (int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
- for (size_t slot = 0; slot < images[type].size(); slot++) {
- device_free_image(device, (ImageDataType)type, slot);
- }
- images[type].clear();
+ for (size_t slot = 0; slot < images.size(); slot++) {
+ device_free_image(device, slot);
}
+ images.clear();
}
void ImageManager::collect_statistics(RenderStats *stats)
{
- for (int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
- foreach (const Image *image, images[type]) {
- stats->image.textures.add_entry(
- NamedSizeEntry(path_filename(image->filename), image->mem->memory_size()));
- }
+ foreach (const Image *image, images) {
+ stats->image.textures.add_entry(
+ NamedSizeEntry(image->loader->name(), image->mem->memory_size()));
}
}
diff --git a/intern/cycles/render/image.h b/intern/cycles/render/image.h
index 34f046692f6..fffe7c5152a 100644
--- a/intern/cycles/render/image.h
+++ b/intern/cycles/render/image.h
@@ -17,71 +17,162 @@
#ifndef __IMAGE_H__
#define __IMAGE_H__
-#include "device/device.h"
#include "device/device_memory.h"
-#include "util/util_image.h"
+#include "render/colorspace.h"
+
#include "util/util_string.h"
#include "util/util_thread.h"
+#include "util/util_transform.h"
#include "util/util_unique_ptr.h"
#include "util/util_vector.h"
CCL_NAMESPACE_BEGIN
class Device;
+class DeviceInfo;
+class ImageHandle;
+class ImageKey;
+class ImageMetaData;
+class ImageManager;
class Progress;
class RenderStats;
class Scene;
+class ColorSpaceProcessor;
+
+/* Image Parameters */
+class ImageParams {
+ public:
+ bool animated;
+ InterpolationType interpolation;
+ ExtensionType extension;
+ ImageAlphaType alpha_type;
+ ustring colorspace;
+ float frame;
+
+ ImageParams()
+ : animated(false),
+ interpolation(INTERPOLATION_LINEAR),
+ extension(EXTENSION_CLIP),
+ alpha_type(IMAGE_ALPHA_AUTO),
+ colorspace(u_colorspace_raw),
+ frame(0.0f)
+ {
+ }
+ bool operator==(const ImageParams &other) const
+ {
+ return (animated == other.animated && interpolation == other.interpolation &&
+ extension == other.extension && alpha_type == other.alpha_type &&
+ colorspace == other.colorspace && frame == other.frame);
+ }
+};
+
+/* Image MetaData
+ *
+ * Information about the image that is available before the image pixels are loaded. */
class ImageMetaData {
public:
- /* Must be set by image file or builtin callback. */
- bool is_float, is_half;
+ /* Set by ImageLoader.load_metadata(). */
int channels;
size_t width, height, depth;
- bool builtin_free_cache;
+ ImageDataType type;
+
+ /* Optional color space, defaults to raw. */
+ ustring colorspace;
+ const char *colorspace_file_format;
+
+ /* Optional transform for 3D images. */
+ bool use_transform_3d;
+ Transform transform_3d;
/* Automatically set. */
- ImageDataType type;
- bool is_linear;
+ bool compress_as_srgb;
- bool operator==(const ImageMetaData &other) const
- {
- return is_float == other.is_float && is_half == other.is_half && channels == other.channels &&
- width == other.width && height == other.height && depth == other.depth &&
- type == other.type && is_linear == other.is_linear;
- }
+ ImageMetaData();
+ bool operator==(const ImageMetaData &other) const;
+ bool is_float() const;
+ void detect_colorspace();
};
+/* Image loader base class, that can be subclassed to load image data
+ * from custom sources (file, memory, procedurally generated, etc). */
+class ImageLoader {
+ public:
+ ImageLoader();
+ virtual ~ImageLoader(){};
+
+ /* Load metadata without actual image yet, should be fast. */
+ virtual bool load_metadata(ImageMetaData &metadata) = 0;
+
+ /* Load actual image contents. */
+ virtual bool load_pixels(const ImageMetaData &metadata,
+ void *pixels,
+ const size_t pixels_size,
+ const bool associate_alpha) = 0;
+
+ /* Name for logs and stats. */
+ virtual string name() const = 0;
+
+ /* Optional for OSL texture cache. */
+ virtual ustring osl_filepath() const;
+
+ /* Free any memory used for loading metadata and pixels. */
+ virtual void cleanup(){};
+
+ /* Compare avoid loading the same image multiple times. */
+ virtual bool equals(const ImageLoader &other) const = 0;
+ static bool equals(const ImageLoader *a, const ImageLoader *b);
+
+ /* Work around for no RTTI. */
+};
+
+/* Image Handle
+ *
+ * Access handle for image in the image manager. Multiple shader nodes may
+ * share the same image, and this class handles reference counting for that. */
+class ImageHandle {
+ public:
+ ImageHandle();
+ ImageHandle(const ImageHandle &other);
+ ImageHandle &operator=(const ImageHandle &other);
+ ~ImageHandle();
+
+ bool operator==(const ImageHandle &other) const;
+
+ void clear();
+
+ bool empty();
+ int num_tiles();
+
+ ImageMetaData metadata();
+ int svm_slot(const int tile_index = 0) const;
+ device_texture *image_memory(const int tile_index = 0) const;
+
+ protected:
+ vector<int> tile_slots;
+ ImageManager *manager;
+
+ friend class ImageManager;
+};
+
+/* Image Manager
+ *
+ * Handles loading and storage of all images in the scene. This includes 2D
+ * texture images and 3D volume images. */
class ImageManager {
public:
explicit ImageManager(const DeviceInfo &info);
~ImageManager();
- int add_image(const string &filename,
- void *builtin_data,
- bool animated,
- float frame,
- InterpolationType interpolation,
- ExtensionType extension,
- bool use_alpha,
- ImageMetaData &metadata);
- void remove_image(int flat_slot);
- void remove_image(const string &filename,
- void *builtin_data,
- InterpolationType interpolation,
- ExtensionType extension,
- bool use_alpha);
- void tag_reload_image(const string &filename,
- void *builtin_data,
- InterpolationType interpolation,
- ExtensionType extension,
- bool use_alpha);
- bool get_image_metadata(const string &filename, void *builtin_data, ImageMetaData &metadata);
- bool get_image_metadata(int flat_slot, ImageMetaData &metadata);
+ ImageHandle add_image(const string &filename, const ImageParams &params);
+ ImageHandle add_image(const string &filename,
+ const ImageParams &params,
+ const vector<int> &tiles);
+ ImageHandle add_image(ImageLoader *loader, const ImageParams &params);
void device_update(Device *device, Scene *scene, Progress &progress);
- void device_update_slot(Device *device, Scene *scene, int flat_slot, Progress *progress);
+ void device_update_slot(Device *device, Scene *scene, int slot, Progress *progress);
void device_free(Device *device);
void device_load_builtin(Device *device, Scene *scene, Progress &progress);
@@ -90,71 +181,50 @@ class ImageManager {
void set_osl_texture_system(void *texture_system);
bool set_animation_frame_update(int frame);
- device_memory *image_memory(int flat_slot);
-
void collect_statistics(RenderStats *stats);
bool need_update;
- /* NOTE: Here pixels_size is a size of storage, which equals to
- * width * height * depth.
- * Use this to avoid some nasty memory corruptions.
- */
- function<void(const string &filename, void *data, ImageMetaData &metadata)>
- builtin_image_info_cb;
- function<bool(const string &filename,
- void *data,
- unsigned char *pixels,
- const size_t pixels_size,
- const bool free_cache)>
- builtin_image_pixels_cb;
- function<bool(const string &filename,
- void *data,
- float *pixels,
- const size_t pixels_size,
- const bool free_cache)>
- builtin_image_float_pixels_cb;
-
struct Image {
- string filename;
- void *builtin_data;
+ ImageParams params;
ImageMetaData metadata;
+ ImageLoader *loader;
- bool use_alpha;
- bool need_load;
- bool animated;
float frame;
- InterpolationType interpolation;
- ExtensionType extension;
+ bool need_metadata;
+ bool need_load;
+ bool builtin;
string mem_name;
- device_memory *mem;
+ device_texture *mem;
int users;
+ thread_mutex mutex;
};
private:
- int tex_num_images[IMAGE_DATA_NUM_TYPES];
- int max_num_images;
bool has_half_images;
thread_mutex device_mutex;
+ thread_mutex images_mutex;
int animation_frame;
- vector<Image *> images[IMAGE_DATA_NUM_TYPES];
+ vector<Image *> images;
void *osl_texture_system;
- bool file_load_image_generic(Image *img, unique_ptr<ImageInput> *in);
+ int add_image_slot(ImageLoader *loader, const ImageParams &params, const bool builtin);
+ void add_image_user(int slot);
+ void remove_image_user(int slot);
+
+ void load_image_metadata(Image *img);
+
+ template<TypeDesc::BASETYPE FileFormat, typename StorageType>
+ bool file_load_image(Image *img, int texture_limit);
- template<TypeDesc::BASETYPE FileFormat, typename StorageType, typename DeviceType>
- bool file_load_image(Image *img,
- ImageDataType type,
- int texture_limit,
- device_vector<DeviceType> &tex_img);
+ void device_load_image(Device *device, Scene *scene, int slot, Progress *progress);
+ void device_free_image(Device *device, int slot);
- void device_load_image(
- Device *device, Scene *scene, ImageDataType type, int slot, Progress *progress);
- void device_free_image(Device *device, ImageDataType type, int slot);
+ friend class ImageHandle;
};
CCL_NAMESPACE_END
diff --git a/intern/cycles/render/image_oiio.cpp b/intern/cycles/render/image_oiio.cpp
new file mode 100644
index 00000000000..c4f95c6b4bc
--- /dev/null
+++ b/intern/cycles/render/image_oiio.cpp
@@ -0,0 +1,236 @@
+/*
+ * Copyright 2011-2020 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "render/image_oiio.h"
+
+#include "util/util_image.h"
+#include "util/util_logging.h"
+#include "util/util_path.h"
+
+CCL_NAMESPACE_BEGIN
+
+OIIOImageLoader::OIIOImageLoader(const string &filepath) : filepath(filepath)
+{
+}
+
+OIIOImageLoader::~OIIOImageLoader()
+{
+}
+
+bool OIIOImageLoader::load_metadata(ImageMetaData &metadata)
+{
+ /* Perform preliminary checks, with meaningful logging. */
+ if (!path_exists(filepath.string())) {
+ VLOG(1) << "File '" << filepath.string() << "' does not exist.";
+ return false;
+ }
+ if (path_is_directory(filepath.string())) {
+ VLOG(1) << "File '" << filepath.string() << "' is a directory, can't use as image.";
+ return false;
+ }
+
+ unique_ptr<ImageInput> in(ImageInput::create(filepath.string()));
+
+ if (!in) {
+ return false;
+ }
+
+ ImageSpec spec;
+ if (!in->open(filepath.string(), spec)) {
+ return false;
+ }
+
+ metadata.width = spec.width;
+ metadata.height = spec.height;
+ metadata.depth = spec.depth;
+ metadata.compress_as_srgb = false;
+
+ /* Check the main format, and channel formats. */
+ size_t channel_size = spec.format.basesize();
+
+ bool is_float = false;
+ bool is_half = false;
+
+ if (spec.format.is_floating_point()) {
+ is_float = true;
+ }
+
+ for (size_t channel = 0; channel < spec.channelformats.size(); channel++) {
+ channel_size = max(channel_size, spec.channelformats[channel].basesize());
+ if (spec.channelformats[channel].is_floating_point()) {
+ is_float = true;
+ }
+ }
+
+ /* check if it's half float */
+ if (spec.format == TypeDesc::HALF) {
+ is_half = true;
+ }
+
+ /* set type and channels */
+ metadata.channels = spec.nchannels;
+
+ if (is_half) {
+ metadata.type = (metadata.channels > 1) ? IMAGE_DATA_TYPE_HALF4 : IMAGE_DATA_TYPE_HALF;
+ }
+ else if (is_float) {
+ metadata.type = (metadata.channels > 1) ? IMAGE_DATA_TYPE_FLOAT4 : IMAGE_DATA_TYPE_FLOAT;
+ }
+ else if (spec.format == TypeDesc::USHORT) {
+ metadata.type = (metadata.channels > 1) ? IMAGE_DATA_TYPE_USHORT4 : IMAGE_DATA_TYPE_USHORT;
+ }
+ else {
+ metadata.type = (metadata.channels > 1) ? IMAGE_DATA_TYPE_BYTE4 : IMAGE_DATA_TYPE_BYTE;
+ }
+
+ metadata.colorspace_file_format = in->format_name();
+
+ in->close();
+
+ return true;
+}
+
+template<TypeDesc::BASETYPE FileFormat, typename StorageType>
+static void oiio_load_pixels(const ImageMetaData &metadata,
+ const unique_ptr<ImageInput> &in,
+ StorageType *pixels)
+{
+ const int width = metadata.width;
+ const int height = metadata.height;
+ const int depth = metadata.depth;
+ const int components = metadata.channels;
+
+ /* Read pixels through OpenImageIO. */
+ StorageType *readpixels = pixels;
+ vector<StorageType> tmppixels;
+ if (components > 4) {
+ tmppixels.resize(((size_t)width) * height * components);
+ readpixels = &tmppixels[0];
+ }
+
+ if (depth <= 1) {
+ size_t scanlinesize = ((size_t)width) * components * sizeof(StorageType);
+ in->read_image(FileFormat,
+ (uchar *)readpixels + (height - 1) * scanlinesize,
+ AutoStride,
+ -scanlinesize,
+ AutoStride);
+ }
+ else {
+ in->read_image(FileFormat, (uchar *)readpixels);
+ }
+
+ if (components > 4) {
+ size_t dimensions = ((size_t)width) * height;
+ for (size_t i = dimensions - 1, pixel = 0; pixel < dimensions; pixel++, i--) {
+ pixels[i * 4 + 3] = tmppixels[i * components + 3];
+ pixels[i * 4 + 2] = tmppixels[i * components + 2];
+ pixels[i * 4 + 1] = tmppixels[i * components + 1];
+ pixels[i * 4 + 0] = tmppixels[i * components + 0];
+ }
+ tmppixels.clear();
+ }
+
+ /* CMYK to RGBA. */
+ const bool cmyk = strcmp(in->format_name(), "jpeg") == 0 && components == 4;
+ if (cmyk) {
+ const StorageType one = util_image_cast_from_float<StorageType>(1.0f);
+
+ const size_t num_pixels = ((size_t)width) * height * depth;
+ for (size_t i = num_pixels - 1, pixel = 0; pixel < num_pixels; pixel++, i--) {
+ float c = util_image_cast_to_float(pixels[i * 4 + 0]);
+ float m = util_image_cast_to_float(pixels[i * 4 + 1]);
+ float y = util_image_cast_to_float(pixels[i * 4 + 2]);
+ float k = util_image_cast_to_float(pixels[i * 4 + 3]);
+ pixels[i * 4 + 0] = util_image_cast_from_float<StorageType>((1.0f - c) * (1.0f - k));
+ pixels[i * 4 + 1] = util_image_cast_from_float<StorageType>((1.0f - m) * (1.0f - k));
+ pixels[i * 4 + 2] = util_image_cast_from_float<StorageType>((1.0f - y) * (1.0f - k));
+ pixels[i * 4 + 3] = one;
+ }
+ }
+}
+
+bool OIIOImageLoader::load_pixels(const ImageMetaData &metadata,
+ void *pixels,
+ const size_t,
+ const bool associate_alpha)
+{
+ unique_ptr<ImageInput> in = NULL;
+
+ /* NOTE: Error logging is done in meta data acquisition. */
+ if (!path_exists(filepath.string()) || path_is_directory(filepath.string())) {
+ return false;
+ }
+
+ /* load image from file through OIIO */
+ in = unique_ptr<ImageInput>(ImageInput::create(filepath.string()));
+ if (!in) {
+ return false;
+ }
+
+ ImageSpec spec = ImageSpec();
+ ImageSpec config = ImageSpec();
+
+ if (!associate_alpha) {
+ config.attribute("oiio:UnassociatedAlpha", 1);
+ }
+
+ if (!in->open(filepath.string(), spec, config)) {
+ return false;
+ }
+
+ switch (metadata.type) {
+ case IMAGE_DATA_TYPE_BYTE:
+ case IMAGE_DATA_TYPE_BYTE4:
+ oiio_load_pixels<TypeDesc::UINT8, uchar>(metadata, in, (uchar *)pixels);
+ break;
+ case IMAGE_DATA_TYPE_USHORT:
+ case IMAGE_DATA_TYPE_USHORT4:
+ oiio_load_pixels<TypeDesc::USHORT, uint16_t>(metadata, in, (uint16_t *)pixels);
+ break;
+ case IMAGE_DATA_TYPE_HALF:
+ case IMAGE_DATA_TYPE_HALF4:
+ oiio_load_pixels<TypeDesc::HALF, half>(metadata, in, (half *)pixels);
+ break;
+ case IMAGE_DATA_TYPE_FLOAT:
+ case IMAGE_DATA_TYPE_FLOAT4:
+ oiio_load_pixels<TypeDesc::FLOAT, float>(metadata, in, (float *)pixels);
+ break;
+ case IMAGE_DATA_NUM_TYPES:
+ break;
+ }
+
+ in->close();
+ return true;
+}
+
+string OIIOImageLoader::name() const
+{
+ return path_filename(filepath.string());
+}
+
+ustring OIIOImageLoader::osl_filepath() const
+{
+ return filepath;
+}
+
+bool OIIOImageLoader::equals(const ImageLoader &other) const
+{
+ const OIIOImageLoader &other_loader = (const OIIOImageLoader &)other;
+ return filepath == other_loader.filepath;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/render/image_oiio.h b/intern/cycles/render/image_oiio.h
new file mode 100644
index 00000000000..a234b968557
--- /dev/null
+++ b/intern/cycles/render/image_oiio.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2011-2020 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __IMAGE_OIIO__
+#define __IMAGE_OIIO__
+
+#include "render/image.h"
+
+CCL_NAMESPACE_BEGIN
+
+class OIIOImageLoader : public ImageLoader {
+ public:
+ OIIOImageLoader(const string &filepath);
+ ~OIIOImageLoader();
+
+ bool load_metadata(ImageMetaData &metadata) override;
+
+ bool load_pixels(const ImageMetaData &metadata,
+ void *pixels,
+ const size_t pixels_size,
+ const bool associate_alpha) override;
+
+ string name() const override;
+
+ ustring osl_filepath() const override;
+
+ bool equals(const ImageLoader &other) const override;
+
+ protected:
+ ustring filepath;
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __IMAGE_OIIO__ */
diff --git a/intern/cycles/render/image_sky.cpp b/intern/cycles/render/image_sky.cpp
new file mode 100644
index 00000000000..0560907c63e
--- /dev/null
+++ b/intern/cycles/render/image_sky.cpp
@@ -0,0 +1,94 @@
+/*
+ * Copyright 2011-2020 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "render/image_sky.h"
+
+#include "sky_model.h"
+
+#include "util/util_image.h"
+#include "util/util_logging.h"
+#include "util/util_path.h"
+#include "util/util_task.h"
+
+CCL_NAMESPACE_BEGIN
+
+SkyLoader::SkyLoader(float sun_elevation,
+ float altitude,
+ float air_density,
+ float dust_density,
+ float ozone_density)
+ : sun_elevation(sun_elevation),
+ altitude(altitude),
+ air_density(air_density),
+ dust_density(dust_density),
+ ozone_density(ozone_density)
+{
+}
+
+SkyLoader::~SkyLoader(){};
+
+bool SkyLoader::load_metadata(ImageMetaData &metadata)
+{
+ metadata.width = 512;
+ metadata.height = 128;
+ metadata.channels = 3;
+ metadata.depth = 1;
+ metadata.type = IMAGE_DATA_TYPE_FLOAT4;
+ metadata.compress_as_srgb = false;
+ return true;
+}
+
+bool SkyLoader::load_pixels(const ImageMetaData &metadata,
+ void *pixels,
+ const size_t /*pixels_size*/,
+ const bool /*associate_alpha*/)
+{
+ /* definitions */
+ int width = metadata.width;
+ int height = metadata.height;
+ float *pixel_data = (float *)pixels;
+
+ /* precompute sky texture */
+ const int rows_per_task = divide_up(1024, width);
+ parallel_for(blocked_range<size_t>(0, height, rows_per_task),
+ [&](const blocked_range<size_t> &r) {
+ SKY_nishita_skymodel_precompute_texture(pixel_data,
+ metadata.channels,
+ r.begin(),
+ r.end(),
+ width,
+ height,
+ sun_elevation,
+ altitude,
+ air_density,
+ dust_density,
+ ozone_density);
+ });
+
+ return true;
+}
+
+string SkyLoader::name() const
+{
+ return "sky_nishita";
+}
+
+bool SkyLoader::equals(const ImageLoader & /*other*/) const
+{
+ return false;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/render/image_sky.h b/intern/cycles/render/image_sky.h
new file mode 100644
index 00000000000..686f4e5b885
--- /dev/null
+++ b/intern/cycles/render/image_sky.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright 2011-2020 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "render/image.h"
+
+CCL_NAMESPACE_BEGIN
+
+class SkyLoader : public ImageLoader {
+ private:
+ float sun_elevation;
+ float altitude;
+ float air_density;
+ float dust_density;
+ float ozone_density;
+
+ public:
+ SkyLoader(float sun_elevation,
+ float altitude,
+ float air_density,
+ float dust_density,
+ float ozone_density);
+ ~SkyLoader();
+
+ bool load_metadata(ImageMetaData &metadata) override;
+
+ bool load_pixels(const ImageMetaData &metadata,
+ void *pixels,
+ const size_t /*pixels_size*/,
+ const bool /*associate_alpha*/) override;
+
+ string name() const override;
+
+ bool equals(const ImageLoader & /*other*/) const override;
+};
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/render/image_vdb.cpp b/intern/cycles/render/image_vdb.cpp
new file mode 100644
index 00000000000..500131c2d84
--- /dev/null
+++ b/intern/cycles/render/image_vdb.cpp
@@ -0,0 +1,188 @@
+/*
+ * Copyright 2011-2020 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "render/image_vdb.h"
+
+#ifdef WITH_OPENVDB
+# include <openvdb/openvdb.h>
+# include <openvdb/tools/Dense.h>
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+VDBImageLoader::VDBImageLoader(const string &grid_name) : grid_name(grid_name)
+{
+}
+
+VDBImageLoader::~VDBImageLoader()
+{
+}
+
+bool VDBImageLoader::load_metadata(ImageMetaData &metadata)
+{
+#ifdef WITH_OPENVDB
+ if (!grid) {
+ return false;
+ }
+
+ bbox = grid->evalActiveVoxelBoundingBox();
+ if (bbox.empty()) {
+ return false;
+ }
+
+ /* Set dimensions. */
+ openvdb::Coord dim = bbox.dim();
+ openvdb::Coord min = bbox.min();
+ metadata.width = dim.x();
+ metadata.height = dim.y();
+ metadata.depth = dim.z();
+
+ /* Set data type. */
+ if (grid->isType<openvdb::FloatGrid>()) {
+ metadata.channels = 1;
+ }
+ else if (grid->isType<openvdb::Vec3fGrid>()) {
+ metadata.channels = 3;
+ }
+ else if (grid->isType<openvdb::BoolGrid>()) {
+ metadata.channels = 1;
+ }
+ else if (grid->isType<openvdb::DoubleGrid>()) {
+ metadata.channels = 1;
+ }
+ else if (grid->isType<openvdb::Int32Grid>()) {
+ metadata.channels = 1;
+ }
+ else if (grid->isType<openvdb::Int64Grid>()) {
+ metadata.channels = 1;
+ }
+ else if (grid->isType<openvdb::Vec3IGrid>()) {
+ metadata.channels = 3;
+ }
+ else if (grid->isType<openvdb::Vec3dGrid>()) {
+ metadata.channels = 3;
+ }
+ else if (grid->isType<openvdb::MaskGrid>()) {
+ metadata.channels = 1;
+ }
+ else {
+ return false;
+ }
+
+ if (metadata.channels == 1) {
+ metadata.type = IMAGE_DATA_TYPE_FLOAT;
+ }
+ else {
+ metadata.type = IMAGE_DATA_TYPE_FLOAT4;
+ }
+
+ /* Set transform from object space to voxel index. */
+ openvdb::math::Mat4f grid_matrix = grid->transform().baseMap()->getAffineMap()->getMat4();
+ Transform index_to_object;
+ for (int col = 0; col < 4; col++) {
+ for (int row = 0; row < 3; row++) {
+ index_to_object[row][col] = (float)grid_matrix[col][row];
+ }
+ }
+
+ Transform texture_to_index = transform_translate(min.x(), min.y(), min.z()) *
+ transform_scale(dim.x(), dim.y(), dim.z());
+
+ metadata.transform_3d = transform_inverse(index_to_object * texture_to_index);
+ metadata.use_transform_3d = true;
+
+ return true;
+#else
+ (void)metadata;
+ return false;
+#endif
+}
+
+bool VDBImageLoader::load_pixels(const ImageMetaData &, void *pixels, const size_t, const bool)
+{
+#ifdef WITH_OPENVDB
+ if (grid->isType<openvdb::FloatGrid>()) {
+ openvdb::tools::Dense<float, openvdb::tools::LayoutXYZ> dense(bbox, (float *)pixels);
+ openvdb::tools::copyToDense(*openvdb::gridConstPtrCast<openvdb::FloatGrid>(grid), dense);
+ }
+ else if (grid->isType<openvdb::Vec3fGrid>()) {
+ openvdb::tools::Dense<openvdb::Vec3f, openvdb::tools::LayoutXYZ> dense(
+ bbox, (openvdb::Vec3f *)pixels);
+ openvdb::tools::copyToDense(*openvdb::gridConstPtrCast<openvdb::Vec3fGrid>(grid), dense);
+ }
+ else if (grid->isType<openvdb::BoolGrid>()) {
+ openvdb::tools::Dense<float, openvdb::tools::LayoutXYZ> dense(bbox, (float *)pixels);
+ openvdb::tools::copyToDense(*openvdb::gridConstPtrCast<openvdb::BoolGrid>(grid), dense);
+ }
+ else if (grid->isType<openvdb::DoubleGrid>()) {
+ openvdb::tools::Dense<float, openvdb::tools::LayoutXYZ> dense(bbox, (float *)pixels);
+ openvdb::tools::copyToDense(*openvdb::gridConstPtrCast<openvdb::DoubleGrid>(grid), dense);
+ }
+ else if (grid->isType<openvdb::Int32Grid>()) {
+ openvdb::tools::Dense<float, openvdb::tools::LayoutXYZ> dense(bbox, (float *)pixels);
+ openvdb::tools::copyToDense(*openvdb::gridConstPtrCast<openvdb::Int32Grid>(grid), dense);
+ }
+ else if (grid->isType<openvdb::Int64Grid>()) {
+ openvdb::tools::Dense<float, openvdb::tools::LayoutXYZ> dense(bbox, (float *)pixels);
+ openvdb::tools::copyToDense(*openvdb::gridConstPtrCast<openvdb::Int64Grid>(grid), dense);
+ }
+ else if (grid->isType<openvdb::Vec3IGrid>()) {
+ openvdb::tools::Dense<openvdb::Vec3f, openvdb::tools::LayoutXYZ> dense(
+ bbox, (openvdb::Vec3f *)pixels);
+ openvdb::tools::copyToDense(*openvdb::gridConstPtrCast<openvdb::Vec3IGrid>(grid), dense);
+ }
+ else if (grid->isType<openvdb::Vec3dGrid>()) {
+ openvdb::tools::Dense<openvdb::Vec3f, openvdb::tools::LayoutXYZ> dense(
+ bbox, (openvdb::Vec3f *)pixels);
+ openvdb::tools::copyToDense(*openvdb::gridConstPtrCast<openvdb::Vec3dGrid>(grid), dense);
+ }
+ else if (grid->isType<openvdb::MaskGrid>()) {
+ openvdb::tools::Dense<float, openvdb::tools::LayoutXYZ> dense(bbox, (float *)pixels);
+ openvdb::tools::copyToDense(*openvdb::gridConstPtrCast<openvdb::MaskGrid>(grid), dense);
+ }
+
+ return true;
+#else
+ (void)pixels;
+ return false;
+#endif
+}
+
+string VDBImageLoader::name() const
+{
+ return grid_name;
+}
+
+bool VDBImageLoader::equals(const ImageLoader &other) const
+{
+#ifdef WITH_OPENVDB
+ const VDBImageLoader &other_loader = (const VDBImageLoader &)other;
+ return grid == other_loader.grid;
+#else
+ (void)other;
+ return true;
+#endif
+}
+
+void VDBImageLoader::cleanup()
+{
+#ifdef WITH_OPENVDB
+ /* Free OpenVDB grid memory as soon as we can. */
+ grid.reset();
+#endif
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/render/image_vdb.h b/intern/cycles/render/image_vdb.h
new file mode 100644
index 00000000000..7dec63b11e6
--- /dev/null
+++ b/intern/cycles/render/image_vdb.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2011-2020 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __IMAGE_VDB__
+#define __IMAGE_VDB__
+
+#ifdef WITH_OPENVDB
+# include <openvdb/openvdb.h>
+#endif
+
+#include "render/image.h"
+
+CCL_NAMESPACE_BEGIN
+
+class VDBImageLoader : public ImageLoader {
+ public:
+ VDBImageLoader(const string &grid_name);
+ ~VDBImageLoader();
+
+ virtual bool load_metadata(ImageMetaData &metadata) override;
+
+ virtual bool load_pixels(const ImageMetaData &metadata,
+ void *pixels,
+ const size_t pixels_size,
+ const bool associate_alpha) override;
+
+ virtual string name() const override;
+
+ virtual bool equals(const ImageLoader &other) const override;
+
+ virtual void cleanup() override;
+
+ protected:
+ string grid_name;
+#ifdef WITH_OPENVDB
+ openvdb::GridBase::ConstPtr grid;
+ openvdb::CoordBBox bbox;
+#endif
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __IMAGE_VDB__ */
diff --git a/intern/cycles/render/integrator.cpp b/intern/cycles/render/integrator.cpp
index d21fc3f757c..93e50fd170c 100644
--- a/intern/cycles/render/integrator.cpp
+++ b/intern/cycles/render/integrator.cpp
@@ -14,17 +14,22 @@
* limitations under the License.
*/
+#include "render/integrator.h"
#include "device/device.h"
#include "render/background.h"
-#include "render/integrator.h"
#include "render/film.h"
+#include "render/jitter.h"
#include "render/light.h"
#include "render/scene.h"
#include "render/shader.h"
#include "render/sobol.h"
+#include "kernel/kernel_types.h"
+
#include "util/util_foreach.h"
#include "util/util_hash.h"
+#include "util/util_logging.h"
+#include "util/util_task.h"
CCL_NAMESPACE_BEGIN
@@ -32,6 +37,7 @@ NODE_DEFINE(Integrator)
{
NodeType *type = NodeType::add("integrator", create);
+ SOCKET_INT(min_bounce, "Min Bounce", 0);
SOCKET_INT(max_bounce, "Max Bounce", 7);
SOCKET_INT(max_diffuse_bounce, "Max Diffuse Bounce", 7);
@@ -39,12 +45,13 @@ NODE_DEFINE(Integrator)
SOCKET_INT(max_transmission_bounce, "Max Transmission Bounce", 7);
SOCKET_INT(max_volume_bounce, "Max Volume Bounce", 7);
+ SOCKET_INT(transparent_min_bounce, "Transparent Min Bounce", 0);
SOCKET_INT(transparent_max_bounce, "Transparent Max Bounce", 7);
SOCKET_INT(ao_bounces, "AO Bounces", 0);
SOCKET_INT(volume_max_steps, "Volume Max Steps", 1024);
- SOCKET_FLOAT(volume_step_size, "Volume Step Size", 0.1f);
+ SOCKET_FLOAT(volume_step_rate, "Volume Step Rate", 1.0f);
SOCKET_BOOLEAN(caustics_reflective, "Reflective Caustics", true);
SOCKET_BOOLEAN(caustics_refractive, "Refractive Caustics", true);
@@ -64,6 +71,9 @@ NODE_DEFINE(Integrator)
SOCKET_INT(volume_samples, "Volume Samples", 1);
SOCKET_INT(start_sample, "Start Sample", 0);
+ SOCKET_FLOAT(adaptive_threshold, "Adaptive Threshold", 0.0f);
+ SOCKET_INT(adaptive_min_samples, "Adaptive Min Samples", 0);
+
SOCKET_BOOLEAN(sample_all_lights_direct, "Sample All Lights Direct", true);
SOCKET_BOOLEAN(sample_all_lights_indirect, "Sample All Lights Indirect", true);
SOCKET_FLOAT(light_sampling_threshold, "Light Sampling Threshold", 0.05f);
@@ -81,6 +91,7 @@ NODE_DEFINE(Integrator)
static NodeEnum sampling_pattern_enum;
sampling_pattern_enum.insert("sobol", SAMPLING_PATTERN_SOBOL);
sampling_pattern_enum.insert("cmj", SAMPLING_PATTERN_CMJ);
+ sampling_pattern_enum.insert("pmj", SAMPLING_PATTERN_PMJ);
SOCKET_ENUM(sampling_pattern, "Sampling Pattern", sampling_pattern_enum, SAMPLING_PATTERN_SOBOL);
return type;
@@ -105,6 +116,7 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene
KernelIntegrator *kintegrator = &dscene->data.integrator;
/* integrator parameters */
+ kintegrator->min_bounce = min_bounce + 1;
kintegrator->max_bounce = max_bounce + 1;
kintegrator->max_diffuse_bounce = max_diffuse_bounce + 1;
@@ -112,6 +124,7 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene
kintegrator->max_transmission_bounce = max_transmission_bounce + 1;
kintegrator->max_volume_bounce = max_volume_bounce + 1;
+ kintegrator->transparent_min_bounce = transparent_min_bounce + 1;
kintegrator->transparent_max_bounce = transparent_max_bounce + 1;
if (ao_bounces == 0) {
@@ -136,13 +149,13 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene
}
kintegrator->volume_max_steps = volume_max_steps;
- kintegrator->volume_step_size = volume_step_size;
+ kintegrator->volume_step_rate = volume_step_rate;
kintegrator->caustics_reflective = caustics_reflective;
kintegrator->caustics_refractive = caustics_refractive;
kintegrator->filter_glossy = (filter_glossy == 0.0f) ? FLT_MAX : 1.0f / filter_glossy;
- kintegrator->seed = hash_int(seed);
+ kintegrator->seed = hash_uint2(seed, 0);
kintegrator->use_ambient_occlusion = ((Pass::contains(scene->film->passes, PASS_AO)) ||
dscene->data.background.ao_factor != 0.0f);
@@ -175,6 +188,29 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene
kintegrator->sampling_pattern = sampling_pattern;
kintegrator->aa_samples = aa_samples;
+ if (aa_samples > 0 && adaptive_min_samples == 0) {
+ kintegrator->adaptive_min_samples = max(4, (int)sqrtf(aa_samples));
+ VLOG(1) << "Cycles adaptive sampling: automatic min samples = "
+ << kintegrator->adaptive_min_samples;
+ }
+ else {
+ kintegrator->adaptive_min_samples = max(4, adaptive_min_samples);
+ }
+
+ kintegrator->adaptive_step = 4;
+ kintegrator->adaptive_stop_per_sample = device->info.has_adaptive_stop_per_sample;
+
+ /* Adaptive step must be a power of two for bitwise operations to work. */
+ assert((kintegrator->adaptive_step & (kintegrator->adaptive_step - 1)) == 0);
+
+ if (aa_samples > 0 && adaptive_threshold == 0.0f) {
+ kintegrator->adaptive_threshold = max(0.001f, 1.0f / (float)aa_samples);
+ VLOG(1) << "Cycles adaptive sampling: automatic threshold = "
+ << kintegrator->adaptive_threshold;
+ }
+ else {
+ kintegrator->adaptive_threshold = adaptive_threshold;
+ }
if (light_sampling_threshold > 0.0f) {
kintegrator->light_inv_rr_threshold = 1.0f / light_sampling_threshold;
@@ -204,17 +240,26 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene
int dimensions = PRNG_BASE_NUM + max_samples * PRNG_BOUNCE_NUM;
dimensions = min(dimensions, SOBOL_MAX_DIMENSIONS);
- uint *directions = dscene->sobol_directions.alloc(SOBOL_BITS * dimensions);
+ if (sampling_pattern == SAMPLING_PATTERN_SOBOL) {
+ uint *directions = dscene->sample_pattern_lut.alloc(SOBOL_BITS * dimensions);
- sobol_generate_direction_vectors((uint(*)[SOBOL_BITS])directions, dimensions);
+ sobol_generate_direction_vectors((uint(*)[SOBOL_BITS])directions, dimensions);
- dscene->sobol_directions.copy_to_device();
-
- /* Clamping. */
- bool use_sample_clamp = (sample_clamp_direct != 0.0f || sample_clamp_indirect != 0.0f);
- if (use_sample_clamp != scene->film->use_sample_clamp) {
- scene->film->use_sample_clamp = use_sample_clamp;
- scene->film->tag_update(scene);
+ dscene->sample_pattern_lut.copy_to_device();
+ }
+ else {
+ constexpr int sequence_size = NUM_PMJ_SAMPLES;
+ constexpr int num_sequences = NUM_PMJ_PATTERNS;
+ float2 *directions = (float2 *)dscene->sample_pattern_lut.alloc(sequence_size * num_sequences *
+ 2);
+ TaskPool pool;
+ for (int j = 0; j < num_sequences; ++j) {
+ float2 *sequence = directions + j * sequence_size;
+ pool.push(
+ function_bind(&progressive_multi_jitter_02_generate_2D, sequence, sequence_size, j));
+ }
+ pool.wait_work();
+ dscene->sample_pattern_lut.copy_to_device();
}
need_update = false;
@@ -222,7 +267,7 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene
void Integrator::device_free(Device *, DeviceScene *dscene)
{
- dscene->sobol_directions.free();
+ dscene->sample_pattern_lut.free();
}
bool Integrator::modified(const Integrator &integrator)
diff --git a/intern/cycles/render/integrator.h b/intern/cycles/render/integrator.h
index 8270c78b94e..847cb6b3538 100644
--- a/intern/cycles/render/integrator.h
+++ b/intern/cycles/render/integrator.h
@@ -31,6 +31,7 @@ class Integrator : public Node {
public:
NODE_DECLARE
+ int min_bounce;
int max_bounce;
int max_diffuse_bounce;
@@ -38,12 +39,13 @@ class Integrator : public Node {
int max_transmission_bounce;
int max_volume_bounce;
+ int transparent_min_bounce;
int transparent_max_bounce;
int ao_bounces;
int volume_max_steps;
- float volume_step_size;
+ float volume_step_rate;
bool caustics_reflective;
bool caustics_refractive;
@@ -75,6 +77,9 @@ class Integrator : public Node {
bool use_light_tree;
float splitting_threshold;
+ int adaptive_min_samples;
+ float adaptive_threshold;
+
enum Method {
BRANCHED_PATH = 0,
PATH = 1,
diff --git a/intern/cycles/render/jitter.cpp b/intern/cycles/render/jitter.cpp
new file mode 100644
index 00000000000..fc47b0e8f0a
--- /dev/null
+++ b/intern/cycles/render/jitter.cpp
@@ -0,0 +1,287 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* This file is based on "Progressive Multi-Jittered Sample Sequences"
+ * by Per Christensen, Andrew Kensler and Charlie Kilpatrick.
+ * http://graphics.pixar.com/library/ProgressiveMultiJitteredSampling/paper.pdf
+ *
+ * Performance can be improved in the future by implementing the new
+ * algorithm from Matt Pharr in http://jcgt.org/published/0008/01/04/
+ * "Efficient Generation of Points that Satisfy Two-Dimensional Elementary Intervals"
+ */
+
+#include "render/jitter.h"
+
+#include <math.h>
+#include <vector>
+
+CCL_NAMESPACE_BEGIN
+
+static uint cmj_hash(uint i, uint p)
+{
+ i ^= p;
+ i ^= i >> 17;
+ i ^= i >> 10;
+ i *= 0xb36534e5;
+ i ^= i >> 12;
+ i ^= i >> 21;
+ i *= 0x93fc4795;
+ i ^= 0xdf6e307f;
+ i ^= i >> 17;
+ i *= 1 | p >> 18;
+
+ return i;
+}
+
+static float cmj_randfloat(uint i, uint p)
+{
+ return cmj_hash(i, p) * (1.0f / 4294967808.0f);
+}
+
+class PMJ_Generator {
+ public:
+ static void generate_2D(float2 points[], int size, int rng_seed_in)
+ {
+ PMJ_Generator g(rng_seed_in);
+ points[0].x = g.rnd();
+ points[0].y = g.rnd();
+ int N = 1;
+ while (N < size) {
+ g.extend_sequence_even(points, N);
+ g.extend_sequence_odd(points, 2 * N);
+ N = 4 * N;
+ }
+ }
+
+ protected:
+ PMJ_Generator(int rnd_seed_in) : num_samples(1), rnd_index(2), rnd_seed(rnd_seed_in)
+ {
+ }
+
+ float rnd()
+ {
+ return cmj_randfloat(++rnd_index, rnd_seed);
+ }
+
+ virtual void mark_occupied_strata(float2 points[], int N)
+ {
+ int NN = 2 * N;
+ for (int s = 0; s < NN; ++s) {
+ occupied1Dx[s] = occupied1Dy[s] = false;
+ }
+ for (int s = 0; s < N; ++s) {
+ int xstratum = (int)(NN * points[s].x);
+ int ystratum = (int)(NN * points[s].y);
+ occupied1Dx[xstratum] = true;
+ occupied1Dy[ystratum] = true;
+ }
+ }
+
+ virtual void generate_sample_point(
+ float2 points[], float i, float j, float xhalf, float yhalf, int n, int N)
+ {
+ int NN = 2 * N;
+ float2 pt;
+ int xstratum, ystratum;
+ do {
+ pt.x = (i + 0.5f * (xhalf + rnd())) / n;
+ xstratum = (int)(NN * pt.x);
+ } while (occupied1Dx[xstratum]);
+ do {
+ pt.y = (j + 0.5f * (yhalf + rnd())) / n;
+ ystratum = (int)(NN * pt.y);
+ } while (occupied1Dy[ystratum]);
+ occupied1Dx[xstratum] = true;
+ occupied1Dy[ystratum] = true;
+ points[num_samples] = pt;
+ ++num_samples;
+ }
+
+ void extend_sequence_even(float2 points[], int N)
+ {
+ int n = (int)sqrtf(N);
+ occupied1Dx.resize(2 * N);
+ occupied1Dy.resize(2 * N);
+ mark_occupied_strata(points, N);
+ for (int s = 0; s < N; ++s) {
+ float2 oldpt = points[s];
+ float i = floorf(n * oldpt.x);
+ float j = floorf(n * oldpt.y);
+ float xhalf = floorf(2.0f * (n * oldpt.x - i));
+ float yhalf = floorf(2.0f * (n * oldpt.y - j));
+ xhalf = 1.0f - xhalf;
+ yhalf = 1.0f - yhalf;
+ generate_sample_point(points, i, j, xhalf, yhalf, n, N);
+ }
+ }
+
+ void extend_sequence_odd(float2 points[], int N)
+ {
+ int n = (int)sqrtf(N / 2);
+ occupied1Dx.resize(2 * N);
+ occupied1Dy.resize(2 * N);
+ mark_occupied_strata(points, N);
+ std::vector<float> xhalves(N / 2);
+ std::vector<float> yhalves(N / 2);
+ for (int s = 0; s < N / 2; ++s) {
+ float2 oldpt = points[s];
+ float i = floorf(n * oldpt.x);
+ float j = floorf(n * oldpt.y);
+ float xhalf = floorf(2.0f * (n * oldpt.x - i));
+ float yhalf = floorf(2.0f * (n * oldpt.y - j));
+ if (rnd() > 0.5f) {
+ xhalf = 1.0f - xhalf;
+ }
+ else {
+ yhalf = 1.0f - yhalf;
+ }
+ xhalves[s] = xhalf;
+ yhalves[s] = yhalf;
+ generate_sample_point(points, i, j, xhalf, yhalf, n, N);
+ }
+ for (int s = 0; s < N / 2; ++s) {
+ float2 oldpt = points[s];
+ float i = floorf(n * oldpt.x);
+ float j = floorf(n * oldpt.y);
+ float xhalf = 1.0f - xhalves[s];
+ float yhalf = 1.0f - yhalves[s];
+ generate_sample_point(points, i, j, xhalf, yhalf, n, N);
+ }
+ }
+
+ std::vector<bool> occupied1Dx, occupied1Dy;
+ int num_samples;
+ int rnd_index, rnd_seed;
+};
+
+class PMJ02_Generator : public PMJ_Generator {
+ protected:
+ void generate_sample_point(
+ float2 points[], float i, float j, float xhalf, float yhalf, int n, int N) override
+ {
+ int NN = 2 * N;
+ float2 pt;
+ do {
+ pt.x = (i + 0.5f * (xhalf + rnd())) / n;
+ pt.y = (j + 0.5f * (yhalf + rnd())) / n;
+ } while (is_occupied(pt, NN));
+ mark_occupied_strata1(pt, NN);
+ points[num_samples] = pt;
+ ++num_samples;
+ }
+
+ void mark_occupied_strata(float2 points[], int N) override
+ {
+ int NN = 2 * N;
+ int num_shapes = (int)log2f(NN) + 1;
+ occupiedStrata.resize(num_shapes);
+ for (int shape = 0; shape < num_shapes; ++shape) {
+ occupiedStrata[shape].resize(NN);
+ for (int n = 0; n < NN; ++n) {
+ occupiedStrata[shape][n] = false;
+ }
+ }
+ for (int s = 0; s < N; ++s) {
+ mark_occupied_strata1(points[s], NN);
+ }
+ }
+
+ void mark_occupied_strata1(float2 pt, int NN)
+ {
+ int shape = 0;
+ int xdivs = NN;
+ int ydivs = 1;
+ do {
+ int xstratum = (int)(xdivs * pt.x);
+ int ystratum = (int)(ydivs * pt.y);
+ size_t index = ystratum * xdivs + xstratum;
+ assert(index < NN);
+ occupiedStrata[shape][index] = true;
+ shape = shape + 1;
+ xdivs = xdivs / 2;
+ ydivs = ydivs * 2;
+ } while (xdivs > 0);
+ }
+
+ bool is_occupied(float2 pt, int NN)
+ {
+ int shape = 0;
+ int xdivs = NN;
+ int ydivs = 1;
+ do {
+ int xstratum = (int)(xdivs * pt.x);
+ int ystratum = (int)(ydivs * pt.y);
+ size_t index = ystratum * xdivs + xstratum;
+ assert(index < NN);
+ if (occupiedStrata[shape][index]) {
+ return true;
+ }
+ shape = shape + 1;
+ xdivs = xdivs / 2;
+ ydivs = ydivs * 2;
+ } while (xdivs > 0);
+ return false;
+ }
+
+ private:
+ std::vector<std::vector<bool>> occupiedStrata;
+};
+
+static void shuffle(float2 points[], int size, int rng_seed)
+{
+ /* Offset samples by 1.0 for faster scrambling in kernel_random.h */
+ for (int i = 0; i < size; ++i) {
+ points[i].x += 1.0f;
+ points[i].y += 1.0f;
+ }
+
+ if (rng_seed == 0) {
+ return;
+ }
+
+ constexpr int odd[8] = {0, 1, 4, 5, 10, 11, 14, 15};
+ constexpr int even[8] = {2, 3, 6, 7, 8, 9, 12, 13};
+
+ int rng_index = 0;
+ for (int yy = 0; yy < size / 16; ++yy) {
+ for (int xx = 0; xx < 8; ++xx) {
+ int other = (int)(cmj_randfloat(++rng_index, rng_seed) * (8.0f - xx) + xx);
+ float2 tmp = points[odd[other] + yy * 16];
+ points[odd[other] + yy * 16] = points[odd[xx] + yy * 16];
+ points[odd[xx] + yy * 16] = tmp;
+ }
+ for (int xx = 0; xx < 8; ++xx) {
+ int other = (int)(cmj_randfloat(++rng_index, rng_seed) * (8.0f - xx) + xx);
+ float2 tmp = points[even[other] + yy * 16];
+ points[even[other] + yy * 16] = points[even[xx] + yy * 16];
+ points[even[xx] + yy * 16] = tmp;
+ }
+ }
+}
+
+void progressive_multi_jitter_generate_2D(float2 points[], int size, int rng_seed)
+{
+ PMJ_Generator::generate_2D(points, size, rng_seed);
+ shuffle(points, size, rng_seed);
+}
+
+void progressive_multi_jitter_02_generate_2D(float2 points[], int size, int rng_seed)
+{
+ PMJ02_Generator::generate_2D(points, size, rng_seed);
+ shuffle(points, size, rng_seed);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/render/jitter.h b/intern/cycles/render/jitter.h
new file mode 100644
index 00000000000..ed34c7a4f4d
--- /dev/null
+++ b/intern/cycles/render/jitter.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __JITTER_H__
+#define __JITTER_H__
+
+#include "util/util_types.h"
+
+CCL_NAMESPACE_BEGIN
+
+void progressive_multi_jitter_generate_2D(float2 points[], int size, int rng_seed);
+void progressive_multi_jitter_02_generate_2D(float2 points[], int size, int rng_seed);
+
+CCL_NAMESPACE_END
+
+#endif /* __JITTER_H__ */
diff --git a/intern/cycles/render/light.cpp b/intern/cycles/render/light.cpp
index 1ed7e6967ac..7afa28b29fb 100644
--- a/intern/cycles/render/light.cpp
+++ b/intern/cycles/render/light.cpp
@@ -14,12 +14,12 @@
* limitations under the License.
*/
-#include "render/background.h"
+#include "render/light.h"
#include "device/device.h"
-#include "render/integrator.h"
+#include "render/background.h"
#include "render/film.h"
#include "render/graph.h"
-#include "render/light.h"
+#include "render/integrator.h"
#include "render/light_tree.h"
#include "render/mesh.h"
#include "render/nodes.h"
@@ -29,9 +29,10 @@
#include "util/util_foreach.h"
#include "util/util_hash.h"
+#include "util/util_logging.h"
#include "util/util_path.h"
#include "util/util_progress.h"
-#include "util/util_logging.h"
+#include "util/util_task.h"
CCL_NAMESPACE_BEGIN
@@ -115,10 +116,13 @@ NODE_DEFINE(Light)
type_enum.insert("spot", LIGHT_SPOT);
SOCKET_ENUM(type, "Type", type_enum, LIGHT_POINT);
+ SOCKET_COLOR(strength, "Strength", make_float3(1.0f, 1.0f, 1.0f));
+
SOCKET_POINT(co, "Co", make_float3(0.0f, 0.0f, 0.0f));
SOCKET_VECTOR(dir, "Dir", make_float3(0.0f, 0.0f, 0.0f));
SOCKET_FLOAT(size, "Size", 0.0f);
+ SOCKET_FLOAT(angle, "Angle", 0.0f);
SOCKET_VECTOR(axisu, "Axis U", make_float3(0.0f, 0.0f, 0.0f));
SOCKET_FLOAT(sizeu, "Size U", 1.0f);
@@ -163,6 +167,9 @@ void Light::tag_update(Scene *scene)
bool Light::has_contribution(Scene *scene)
{
+ if (strength == make_float3(0.0f, 0.0f, 0.0f)) {
+ return false;
+ }
if (is_portal) {
return false;
}
@@ -177,7 +184,10 @@ bool Light::has_contribution(Scene *scene)
LightManager::LightManager()
{
need_update = true;
+ need_update_background = true;
use_light_visibility = false;
+ last_background_enabled = false;
+ last_background_resolution = 0;
}
LightManager::~LightManager()
@@ -197,7 +207,7 @@ bool LightManager::has_background_light(Scene *scene)
return false;
}
-void LightManager::disable_ineffective_light(Scene *scene)
+void LightManager::test_enabled_lights(Scene *scene)
{
/* Make all lights enabled by default, and perform some preliminary checks
* needed for finer-tuning of settings (for example, check whether we've
@@ -210,28 +220,40 @@ void LightManager::disable_ineffective_light(Scene *scene)
has_background |= light->type == LIGHT_BACKGROUND;
}
+ bool background_enabled = false;
+ int background_resolution = 0;
+
if (has_background) {
/* Ignore background light if:
* - If unsupported on a device
* - If we don't need it (no HDRs etc.)
*/
- Shader *shader = (scene->background->shader) ? scene->background->shader :
- scene->default_background;
- bool disable_mis = !(has_portal || shader->has_surface_spatial_varying);
- if (disable_mis) {
- VLOG(1) << "Background MIS has been disabled.\n";
- foreach (Light *light, scene->lights) {
- if (light->type == LIGHT_BACKGROUND) {
- light->is_enabled = false;
- }
+ Shader *shader = scene->background->get_shader(scene);
+ const bool disable_mis = !(has_portal || shader->has_surface_spatial_varying);
+ VLOG_IF(1, disable_mis) << "Background MIS has been disabled.\n";
+ foreach (Light *light, scene->lights) {
+ if (light->type == LIGHT_BACKGROUND) {
+ light->is_enabled = !disable_mis;
+ background_enabled = !disable_mis;
+ background_resolution = light->map_resolution;
}
}
}
+
+ if (last_background_enabled != background_enabled ||
+ last_background_resolution != background_resolution) {
+ last_background_enabled = background_enabled;
+ last_background_resolution = background_resolution;
+ need_update_background = true;
+ }
}
bool LightManager::object_usable_as_light(Object *object)
{
- Mesh *mesh = object->mesh;
+ Geometry *geom = object->geometry;
+ if (geom->type != Geometry::MESH) {
+ return false;
+ }
/* Skip objects with NaNs */
if (!object->bounds.valid()) {
return false;
@@ -242,10 +264,10 @@ bool LightManager::object_usable_as_light(Object *object)
}
/* Skip if we have no emission shaders. */
/* TODO(sergey): Ideally we want to avoid such duplicated loop, since it'll
- * iterate all mesh shaders twice (when counting and when calculating
+ * iterate all geometry shaders twice (when counting and when calculating
* triangle area.
*/
- foreach (const Shader *shader, mesh->used_shaders) {
+ foreach (const Shader *shader, geom->used_shaders) {
if (shader->use_mis && shader->has_surface_emission) {
return true;
}
@@ -375,7 +397,7 @@ void LightManager::device_update_distribution(Device *device,
continue;
}
/* Count emissive triangles. */
- Mesh *mesh = object->mesh;
+ Mesh *mesh = static_cast<Mesh *>(object->geometry);
size_t mesh_num_triangles = mesh->num_triangles();
for (size_t i = 0; i < mesh_num_triangles; i++) {
int shader_index = mesh->shader[i];
@@ -649,7 +671,7 @@ void LightManager::device_update_distribution(Device *device,
const Object *object = scene->objects[prim.object_id];
/* Sum area. */
- const Mesh *mesh = object->mesh;
+ Mesh *mesh = static_cast<Mesh *>(object->geometry);
int shader_flag = 0;
if (!(object->visibility & PATH_RAY_DIFFUSE)) {
@@ -707,14 +729,19 @@ void LightManager::device_update_distribution(Device *device,
distribution[offset].lamp.size = light->size;
totarea += lightarea;
- if (light->size > 0.0f && light->use_mis)
- use_lamp_mis = true;
- if (light->type == LIGHT_BACKGROUND) {
- num_background_lights++;
- background_mis = light->use_mis;
+ if (light->type == LIGHT_DISTANT) {
+ num_distant_lights++;
+ use_lamp_mis |= (light->angle > 0.0f && light->use_mis);
}
- else if (light->type == LIGHT_DISTANT) {
- num_distant_lights++;
+ else if (light->type == LIGHT_POINT || light->type == LIGHT_SPOT) {
+ use_lamp_mis |= (light->size > 0.0f && light->use_mis);
+ }
+ else if (light->type == LIGHT_AREA) {
+ use_lamp_mis |= light->use_mis;
+ }
+ else if (light->type == LIGHT_BACKGROUND) {
+ num_background_lights++;
+ background_mis |= light->use_mis;
}
offset++;
@@ -738,6 +765,7 @@ void LightManager::device_update_distribution(Device *device,
/* update device */
KernelIntegrator *kintegrator = &dscene->data.integrator;
+ KernelBackground *kbackground = &dscene->data.background;
KernelFilm *kfilm = &dscene->data.film;
kintegrator->use_direct_light = (totarea > 0.0f);
@@ -803,15 +831,18 @@ void LightManager::device_update_distribution(Device *device,
/* Portals */
if (num_portals > 0) {
- kintegrator->portal_offset = light_index;
- kintegrator->num_portals = num_portals;
- kintegrator->portal_pdf = background_mis ? 0.5f : 1.0f;
+ kbackground->portal_offset = light_index;
+ kbackground->num_portals = num_portals;
+ kbackground->portal_weight = 1.0f;
}
else {
- kintegrator->num_portals = 0;
- kintegrator->portal_offset = 0;
- kintegrator->portal_pdf = 0.0f;
+ kbackground->num_portals = 0;
+ kbackground->portal_offset = 0;
+ kbackground->portal_weight = 0.0f;
}
+
+ /* Map */
+ kbackground->map_weight = background_mis ? 1.0f : 0.0f;
}
else {
dscene->light_group_sample_cdf.free();
@@ -835,11 +866,14 @@ void LightManager::device_update_distribution(Device *device,
kintegrator->pdf_triangles = 0.0f;
kintegrator->pdf_lights = 0.0f;
kintegrator->use_lamp_mis = false;
- kintegrator->num_portals = 0;
- kintegrator->portal_offset = 0;
- kintegrator->portal_pdf = 0.0f;
+
+ kbackground->num_portals = 0;
+ kbackground->portal_offset = 0;
+ kbackground->portal_weight = 0.0f;
+ kbackground->sun_weight = 0.0f;
kintegrator->distant_lights_offset = 0;
kintegrator->background_light_index = 0;
+ kbackground->map_weight = 0.0f;
kfilm->pass_shadow_scale = 1.0f;
}
}
@@ -887,7 +921,7 @@ void LightManager::device_update_background(Device *device,
Scene *scene,
Progress &progress)
{
- KernelIntegrator *kintegrator = &dscene->data.integrator;
+ KernelBackground *kbackground = &dscene->data.background;
Light *background_light = NULL;
/* find background light */
@@ -900,20 +934,87 @@ void LightManager::device_update_background(Device *device,
/* no background light found, signal renderer to skip sampling */
if (!background_light || !background_light->is_enabled) {
- kintegrator->pdf_background_res_x = 0;
- kintegrator->pdf_background_res_y = 0;
+ kbackground->map_res_x = 0;
+ kbackground->map_res_y = 0;
+ kbackground->map_weight = 0.0f;
+ kbackground->sun_weight = 0.0f;
+ kbackground->use_mis = (kbackground->portal_weight > 0.0f);
return;
}
progress.set_status("Updating Lights", "Importance map");
- assert(kintegrator->use_direct_light);
+ assert(dscene->data.integrator.use_direct_light);
+
+ int2 environment_res = make_int2(0, 0);
+ Shader *shader = scene->background->get_shader(scene);
+ int num_suns = 0;
+ foreach (ShaderNode *node, shader->graph->nodes) {
+ if (node->type == EnvironmentTextureNode::node_type) {
+ EnvironmentTextureNode *env = (EnvironmentTextureNode *)node;
+ ImageMetaData metadata;
+ if (!env->handle.empty()) {
+ ImageMetaData metadata = env->handle.metadata();
+ environment_res.x = max(environment_res.x, metadata.width);
+ environment_res.y = max(environment_res.y, metadata.height);
+ }
+ }
+ if (node->type == SkyTextureNode::node_type) {
+ SkyTextureNode *sky = (SkyTextureNode *)node;
+ if (sky->type == NODE_SKY_NISHITA && sky->sun_disc) {
+ /* Ensure that the input coordinates aren't transformed before they reach the node.
+ * If that is the case, the logic used for sampling the sun's location does not work
+ * and we have to fall back to map-based sampling. */
+ const ShaderInput *vec_in = sky->input("Vector");
+ if (vec_in && vec_in->link && vec_in->link->parent) {
+ ShaderNode *vec_src = vec_in->link->parent;
+ if ((vec_src->type != TextureCoordinateNode::node_type) ||
+ (vec_in->link != vec_src->output("Generated"))) {
+ environment_res.x = max(environment_res.x, 4096);
+ environment_res.y = max(environment_res.y, 2048);
+ continue;
+ }
+ }
- /* get the resolution from the light's size (we stuff it in there) */
- int2 res = get_background_map_resolution(background_light, scene);
+ /* Determine sun direction from lat/long and texture mapping. */
+ float latitude = sky->sun_elevation;
+ float longitude = M_2PI_F - sky->sun_rotation + M_PI_2_F;
+ float3 sun_direction = make_float3(
+ cosf(latitude) * cosf(longitude), cosf(latitude) * sinf(longitude), sinf(latitude));
+ Transform sky_transform = transform_inverse(sky->tex_mapping.compute_transform());
+ sun_direction = transform_direction(&sky_transform, sun_direction);
+
+ /* Pack sun direction and size. */
+ float half_angle = sky->sun_size * 0.5f;
+ kbackground->sun = make_float4(
+ sun_direction.x, sun_direction.y, sun_direction.z, half_angle);
+
+ kbackground->sun_weight = 4.0f;
+ environment_res.x = max(environment_res.x, 512);
+ environment_res.y = max(environment_res.y, 256);
+ num_suns++;
+ }
+ }
+ }
- kintegrator->pdf_background_res_x = res.x;
- kintegrator->pdf_background_res_y = res.y;
+ /* If there's more than one sun, fall back to map sampling instead. */
+ if (num_suns != 1) {
+ kbackground->sun_weight = 0.0f;
+ environment_res.x = max(environment_res.x, 4096);
+ environment_res.y = max(environment_res.y, 2048);
+ }
+
+ /* Enable MIS for background sampling if any strategy is active. */
+ kbackground->use_mis = (kbackground->portal_weight + kbackground->map_weight +
+ kbackground->sun_weight) > 0.0f;
+
+ /* get the resolution from the light's size (we stuff it in there) */
+ int2 res = make_int2(background_light->map_resolution, background_light->map_resolution / 2);
+ /* If the resolution isn't set manually, try to find an environment texture. */
+ if (res.x == 0) {
+ res = environment_res;
+ kbackground->map_res_x = res.x;
+ kbackground->map_res_y = res.y;
vector<float3> pixels;
shade_background_pixels(device, dscene, res.x, res.y, pixels, progress);
@@ -927,29 +1028,13 @@ void LightManager::device_update_background(Device *device,
float2 *cond_cdf = dscene->light_background_conditional_cdf.alloc(cdf_width * res.y);
double time_start = time_dt();
- if (max(res.x, res.y) < 512) {
- /* Small enough resolution, faster to do single-threaded. */
- background_cdf(0, res.y, res.x, res.y, &pixels, cond_cdf);
- }
- else {
- /* Threaded evaluation for large resolution. */
- const int num_blocks = TaskScheduler::num_threads();
- const int chunk_size = res.y / num_blocks;
- int start_row = 0;
- TaskPool pool;
- for (int i = 0; i < num_blocks; ++i) {
- const int current_chunk_size = (i != num_blocks - 1) ? chunk_size : (res.y - i * chunk_size);
- pool.push(function_bind(&background_cdf,
- start_row,
- start_row + current_chunk_size,
- res.x,
- res.y,
- &pixels,
- cond_cdf));
- start_row += current_chunk_size;
- }
- pool.wait_work();
- }
+
+ /* Create CDF in parallel. */
+ const int rows_per_task = divide_up(10240, res.x);
+ parallel_for(blocked_range<size_t>(0, res.y, rows_per_task),
+ [&](const blocked_range<size_t> &r) {
+ background_cdf(r.begin(), r.end(), res.x, res.y, &pixels, cond_cdf);
+ });
/* marginal CDFs (column, V direction, sum of rows) */
marg_cdf[0].x = cond_cdf[res.x].x;
@@ -1004,7 +1089,6 @@ void LightManager::device_update_points(Device *, DeviceScene *dscene, Scene *sc
float3 co = light->co;
Shader *shader = (light->shader) ? light->shader : scene->default_light;
int shader_id = scene->shader_manager->get_shader_id(shader);
- int samples = light->samples;
int max_bounces = light->max_bounces;
float random = (float)light->random_id * (1.0f / (float)0xFFFFFFFF);
@@ -1029,7 +1113,10 @@ void LightManager::device_update_points(Device *, DeviceScene *dscene, Scene *sc
}
klights[light_index].type = light->type;
- klights[light_index].samples = samples;
+ klights[light_index].samples = light->samples;
+ klights[light_index].strength[0] = light->strength.x;
+ klights[light_index].strength[1] = light->strength.y;
+ klights[light_index].strength[2] = light->strength.z;
if (light->type == LIGHT_POINT) {
shader_id &= ~SHADER_AREA_LIGHT;
@@ -1050,8 +1137,8 @@ void LightManager::device_update_points(Device *, DeviceScene *dscene, Scene *sc
else if (light->type == LIGHT_DISTANT) {
shader_id &= ~SHADER_AREA_LIGHT;
- float radius = light->size;
- float angle = atanf(radius);
+ float angle = light->angle / 2.0f;
+ float radius = tanf(angle);
float cosangle = cosf(angle);
float area = M_PI_F * radius * radius;
float invarea = (area > 0.0f) ? 1.0f / area : 1.0f;
@@ -1218,11 +1305,12 @@ void LightManager::device_update(Device *device,
VLOG(1) << "Total " << scene->lights.size() << " lights.";
- device_free(device, dscene);
+ /* Detect which lights are enabled, also determins if we need to update the background. */
+ test_enabled_lights(scene);
- use_light_visibility = false;
+ device_free(device, dscene, need_update_background);
- disable_ineffective_light(scene);
+ use_light_visibility = false;
device_update_points(device, dscene, scene);
if (progress.get_cancel())
@@ -1232,9 +1320,11 @@ void LightManager::device_update(Device *device,
if (progress.get_cancel())
return;
- device_update_background(device, dscene, scene, progress);
- if (progress.get_cancel())
- return;
+ if (need_update_background) {
+ device_update_background(device, dscene, scene, progress);
+ if (progress.get_cancel())
+ return;
+ }
device_update_ies(dscene);
if (progress.get_cancel())
@@ -1246,9 +1336,10 @@ void LightManager::device_update(Device *device,
}
need_update = false;
+ need_update_background = false;
}
-void LightManager::device_free(Device *, DeviceScene *dscene)
+void LightManager::device_free(Device *, DeviceScene *dscene, const bool free_background)
{
dscene->light_distribution.free();
dscene->light_tree_nodes.free();
@@ -1256,8 +1347,10 @@ void LightManager::device_free(Device *, DeviceScene *dscene)
dscene->lamp_to_distribution.free();
dscene->triangle_to_distribution.free();
dscene->lights.free();
- dscene->light_background_marginal_cdf.free();
- dscene->light_background_conditional_cdf.free();
+ if (free_background) {
+ dscene->light_background_marginal_cdf.free();
+ dscene->light_background_conditional_cdf.free();
+ }
dscene->light_group_sample_prob.free();
dscene->light_group_sample_cdf.free();
dscene->leaf_to_first_emitter.free();
@@ -1270,7 +1363,7 @@ void LightManager::tag_update(Scene * /*scene*/)
need_update = true;
}
-int LightManager::add_ies_from_file(ustring filename)
+int LightManager::add_ies_from_file(const string &filename)
{
string content;
@@ -1279,10 +1372,10 @@ int LightManager::add_ies_from_file(ustring filename)
content = "\n";
}
- return add_ies(ustring(content));
+ return add_ies(content);
}
-int LightManager::add_ies(ustring content)
+int LightManager::add_ies(const string &content)
{
uint hash = hash_string(content.c_str());
@@ -1314,6 +1407,7 @@ int LightManager::add_ies(ustring content)
ies_slots[slot]->hash = hash;
need_update = true;
+ need_update_background = true;
return slot;
}
@@ -1332,6 +1426,7 @@ void LightManager::remove_ies(int slot)
/* If the slot has no more users, update the device to remove it. */
need_update |= (ies_slots[slot]->users == 0);
+ need_update_background |= need_update;
}
void LightManager::device_update_ies(DeviceScene *dscene)
diff --git a/intern/cycles/render/light.h b/intern/cycles/render/light.h
index de84f6a5457..41a627882bd 100644
--- a/intern/cycles/render/light.h
+++ b/intern/cycles/render/light.h
@@ -43,10 +43,12 @@ class Light : public Node {
Light();
LightType type;
+ float3 strength;
float3 co;
float3 dir;
float size;
+ float angle;
float3 axisu;
float sizeu;
@@ -78,7 +80,7 @@ class Light : public Node {
void tag_update(Scene *scene);
- /* Check whether the light has contribution the the scene. */
+ /* Check whether the light has contribution the scene. */
bool has_contribution(Scene *scene);
};
@@ -87,16 +89,19 @@ class LightManager {
bool use_light_visibility;
bool need_update;
+ /* Need to update background (including multiple importance map) */
+ bool need_update_background;
+
LightManager();
~LightManager();
/* IES texture management */
- int add_ies(ustring ies);
- int add_ies_from_file(ustring filename);
+ int add_ies(const string &ies);
+ int add_ies_from_file(const string &filename);
void remove_ies(int slot);
void device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress &progress);
- void device_free(Device *device, DeviceScene *dscene);
+ void device_free(Device *device, DeviceScene *dscene, const bool free_background = true);
void tag_update(Scene *scene);
@@ -108,7 +113,7 @@ class LightManager {
* which doesn't contribute to the scene or which is only used for MIS
* and scene doesn't need MIS.
*/
- void disable_ineffective_light(Scene *scene);
+ void test_enabled_lights(Scene *scene);
void device_update_points(Device *device, DeviceScene *dscene, Scene *scene);
void device_update_distribution(Device *device,
@@ -144,6 +149,9 @@ class LightManager {
vector<IESSlot *> ies_slots;
thread_mutex ies_mutex;
+
+ bool last_background_enabled;
+ int last_background_resolution;
};
CCL_NAMESPACE_END
diff --git a/intern/cycles/render/merge.cpp b/intern/cycles/render/merge.cpp
index cac07e59fe3..3ea3952b96c 100644
--- a/intern/cycles/render/merge.cpp
+++ b/intern/cycles/render/merge.cpp
@@ -22,8 +22,8 @@
#include "util/util_time.h"
#include "util/util_unique_ptr.h"
-#include <OpenImageIO/imageio.h>
#include <OpenImageIO/filesystem.h>
+#include <OpenImageIO/imageio.h>
OIIO_NAMESPACE_USING
diff --git a/intern/cycles/render/mesh.cpp b/intern/cycles/render/mesh.cpp
index 54dacf5d1f4..c262d770331 100644
--- a/intern/cycles/render/mesh.cpp
+++ b/intern/cycles/render/mesh.cpp
@@ -17,32 +17,22 @@
#include "bvh/bvh.h"
#include "bvh/bvh_build.h"
-#include "render/camera.h"
-#include "render/curves.h"
#include "device/device.h"
+
#include "render/graph.h"
-#include "render/shader.h"
-#include "render/light.h"
+#include "render/hair.h"
#include "render/mesh.h"
-#include "render/nodes.h"
#include "render/object.h"
#include "render/scene.h"
-#include "render/stats.h"
-
-#include "kernel/osl/osl_globals.h"
-#include "subd/subd_split.h"
#include "subd/subd_patch_table.h"
+#include "subd/subd_split.h"
#include "util/util_foreach.h"
#include "util/util_logging.h"
#include "util/util_progress.h"
#include "util/util_set.h"
-#ifdef WITH_EMBREE
-# include "bvh/bvh_embree.h"
-#endif
-
CCL_NAMESPACE_BEGIN
/* Triangle */
@@ -120,263 +110,6 @@ bool Mesh::Triangle::valid(const float3 *verts) const
return isfinite3_safe(verts[v[0]]) && isfinite3_safe(verts[v[1]]) && isfinite3_safe(verts[v[2]]);
}
-/* Curve */
-
-void Mesh::Curve::bounds_grow(const int k,
- const float3 *curve_keys,
- const float *curve_radius,
- BoundBox &bounds) const
-{
- float3 P[4];
-
- P[0] = curve_keys[max(first_key + k - 1, first_key)];
- P[1] = curve_keys[first_key + k];
- P[2] = curve_keys[first_key + k + 1];
- P[3] = curve_keys[min(first_key + k + 2, first_key + num_keys - 1)];
-
- float3 lower;
- float3 upper;
-
- curvebounds(&lower.x, &upper.x, P, 0);
- curvebounds(&lower.y, &upper.y, P, 1);
- curvebounds(&lower.z, &upper.z, P, 2);
-
- float mr = max(curve_radius[first_key + k], curve_radius[first_key + k + 1]);
-
- bounds.grow(lower, mr);
- bounds.grow(upper, mr);
-}
-
-void Mesh::Curve::bounds_grow(const int k,
- const float3 *curve_keys,
- const float *curve_radius,
- const Transform &aligned_space,
- BoundBox &bounds) const
-{
- float3 P[4];
-
- P[0] = curve_keys[max(first_key + k - 1, first_key)];
- P[1] = curve_keys[first_key + k];
- P[2] = curve_keys[first_key + k + 1];
- P[3] = curve_keys[min(first_key + k + 2, first_key + num_keys - 1)];
-
- P[0] = transform_point(&aligned_space, P[0]);
- P[1] = transform_point(&aligned_space, P[1]);
- P[2] = transform_point(&aligned_space, P[2]);
- P[3] = transform_point(&aligned_space, P[3]);
-
- float3 lower;
- float3 upper;
-
- curvebounds(&lower.x, &upper.x, P, 0);
- curvebounds(&lower.y, &upper.y, P, 1);
- curvebounds(&lower.z, &upper.z, P, 2);
-
- float mr = max(curve_radius[first_key + k], curve_radius[first_key + k + 1]);
-
- bounds.grow(lower, mr);
- bounds.grow(upper, mr);
-}
-
-void Mesh::Curve::bounds_grow(float4 keys[4], BoundBox &bounds) const
-{
- float3 P[4] = {
- float4_to_float3(keys[0]),
- float4_to_float3(keys[1]),
- float4_to_float3(keys[2]),
- float4_to_float3(keys[3]),
- };
-
- float3 lower;
- float3 upper;
-
- curvebounds(&lower.x, &upper.x, P, 0);
- curvebounds(&lower.y, &upper.y, P, 1);
- curvebounds(&lower.z, &upper.z, P, 2);
-
- float mr = max(keys[1].w, keys[2].w);
-
- bounds.grow(lower, mr);
- bounds.grow(upper, mr);
-}
-
-void Mesh::Curve::motion_keys(const float3 *curve_keys,
- const float *curve_radius,
- const float3 *key_steps,
- size_t num_curve_keys,
- size_t num_steps,
- float time,
- size_t k0,
- size_t k1,
- float4 r_keys[2]) const
-{
- /* Figure out which steps we need to fetch and their interpolation factor. */
- const size_t max_step = num_steps - 1;
- const size_t step = min((int)(time * max_step), max_step - 1);
- const float t = time * max_step - step;
- /* Fetch vertex coordinates. */
- float4 curr_keys[2];
- float4 next_keys[2];
- keys_for_step(
- curve_keys, curve_radius, key_steps, num_curve_keys, num_steps, step, k0, k1, curr_keys);
- keys_for_step(
- curve_keys, curve_radius, key_steps, num_curve_keys, num_steps, step + 1, k0, k1, next_keys);
- /* Interpolate between steps. */
- r_keys[0] = (1.0f - t) * curr_keys[0] + t * next_keys[0];
- r_keys[1] = (1.0f - t) * curr_keys[1] + t * next_keys[1];
-}
-
-void Mesh::Curve::cardinal_motion_keys(const float3 *curve_keys,
- const float *curve_radius,
- const float3 *key_steps,
- size_t num_curve_keys,
- size_t num_steps,
- float time,
- size_t k0,
- size_t k1,
- size_t k2,
- size_t k3,
- float4 r_keys[4]) const
-{
- /* Figure out which steps we need to fetch and their interpolation factor. */
- const size_t max_step = num_steps - 1;
- const size_t step = min((int)(time * max_step), max_step - 1);
- const float t = time * max_step - step;
- /* Fetch vertex coordinates. */
- float4 curr_keys[4];
- float4 next_keys[4];
- cardinal_keys_for_step(curve_keys,
- curve_radius,
- key_steps,
- num_curve_keys,
- num_steps,
- step,
- k0,
- k1,
- k2,
- k3,
- curr_keys);
- cardinal_keys_for_step(curve_keys,
- curve_radius,
- key_steps,
- num_curve_keys,
- num_steps,
- step + 1,
- k0,
- k1,
- k2,
- k3,
- next_keys);
- /* Interpolate between steps. */
- r_keys[0] = (1.0f - t) * curr_keys[0] + t * next_keys[0];
- r_keys[1] = (1.0f - t) * curr_keys[1] + t * next_keys[1];
- r_keys[2] = (1.0f - t) * curr_keys[2] + t * next_keys[2];
- r_keys[3] = (1.0f - t) * curr_keys[3] + t * next_keys[3];
-}
-
-void Mesh::Curve::keys_for_step(const float3 *curve_keys,
- const float *curve_radius,
- const float3 *key_steps,
- size_t num_curve_keys,
- size_t num_steps,
- size_t step,
- size_t k0,
- size_t k1,
- float4 r_keys[2]) const
-{
- k0 = max(k0, 0);
- k1 = min(k1, num_keys - 1);
- const size_t center_step = ((num_steps - 1) / 2);
- if (step == center_step) {
- /* Center step: regular key location. */
- /* TODO(sergey): Consider adding make_float4(float3, float)
- * function.
- */
- r_keys[0] = make_float4(curve_keys[first_key + k0].x,
- curve_keys[first_key + k0].y,
- curve_keys[first_key + k0].z,
- curve_radius[first_key + k0]);
- r_keys[1] = make_float4(curve_keys[first_key + k1].x,
- curve_keys[first_key + k1].y,
- curve_keys[first_key + k1].z,
- curve_radius[first_key + k1]);
- }
- else {
- /* Center step is not stored in this array. */
- if (step > center_step) {
- step--;
- }
- const size_t offset = first_key + step * num_curve_keys;
- r_keys[0] = make_float4(key_steps[offset + k0].x,
- key_steps[offset + k0].y,
- key_steps[offset + k0].z,
- curve_radius[first_key + k0]);
- r_keys[1] = make_float4(key_steps[offset + k1].x,
- key_steps[offset + k1].y,
- key_steps[offset + k1].z,
- curve_radius[first_key + k1]);
- }
-}
-
-void Mesh::Curve::cardinal_keys_for_step(const float3 *curve_keys,
- const float *curve_radius,
- const float3 *key_steps,
- size_t num_curve_keys,
- size_t num_steps,
- size_t step,
- size_t k0,
- size_t k1,
- size_t k2,
- size_t k3,
- float4 r_keys[4]) const
-{
- k0 = max(k0, 0);
- k3 = min(k3, num_keys - 1);
- const size_t center_step = ((num_steps - 1) / 2);
- if (step == center_step) {
- /* Center step: regular key location. */
- r_keys[0] = make_float4(curve_keys[first_key + k0].x,
- curve_keys[first_key + k0].y,
- curve_keys[first_key + k0].z,
- curve_radius[first_key + k0]);
- r_keys[1] = make_float4(curve_keys[first_key + k1].x,
- curve_keys[first_key + k1].y,
- curve_keys[first_key + k1].z,
- curve_radius[first_key + k1]);
- r_keys[2] = make_float4(curve_keys[first_key + k2].x,
- curve_keys[first_key + k2].y,
- curve_keys[first_key + k2].z,
- curve_radius[first_key + k2]);
- r_keys[3] = make_float4(curve_keys[first_key + k3].x,
- curve_keys[first_key + k3].y,
- curve_keys[first_key + k3].z,
- curve_radius[first_key + k3]);
- }
- else {
- /* Center step is not stored in this array. */
- if (step > center_step) {
- step--;
- }
- const size_t offset = first_key + step * num_curve_keys;
- r_keys[0] = make_float4(key_steps[offset + k0].x,
- key_steps[offset + k0].y,
- key_steps[offset + k0].z,
- curve_radius[first_key + k0]);
- r_keys[1] = make_float4(key_steps[offset + k1].x,
- key_steps[offset + k1].y,
- key_steps[offset + k1].z,
- curve_radius[first_key + k1]);
- r_keys[2] = make_float4(key_steps[offset + k2].x,
- key_steps[offset + k2].y,
- key_steps[offset + k2].z,
- curve_radius[first_key + k2]);
- r_keys[3] = make_float4(key_steps[offset + k3].x,
- key_steps[offset + k3].y,
- key_steps[offset + k3].z,
- curve_radius[first_key + k3]);
- }
-}
-
/* SubdFace */
float3 Mesh::SubdFace::normal(const Mesh *mesh) const
@@ -392,58 +125,29 @@ float3 Mesh::SubdFace::normal(const Mesh *mesh) const
NODE_DEFINE(Mesh)
{
- NodeType *type = NodeType::add("mesh", create);
-
- SOCKET_UINT(motion_steps, "Motion Steps", 3);
- SOCKET_BOOLEAN(use_motion_blur, "Use Motion Blur", false);
+ NodeType *type = NodeType::add("mesh", create, NodeType::NONE, Geometry::node_base_type);
SOCKET_INT_ARRAY(triangles, "Triangles", array<int>());
SOCKET_POINT_ARRAY(verts, "Vertices", array<float3>());
SOCKET_INT_ARRAY(shader, "Shader", array<int>());
SOCKET_BOOLEAN_ARRAY(smooth, "Smooth", array<bool>());
- SOCKET_POINT_ARRAY(curve_keys, "Curve Keys", array<float3>());
- SOCKET_FLOAT_ARRAY(curve_radius, "Curve Radius", array<float>());
- SOCKET_INT_ARRAY(curve_first_key, "Curve First Key", array<int>());
- SOCKET_INT_ARRAY(curve_shader, "Curve Shader", array<int>());
-
return type;
}
-Mesh::Mesh() : Node(node_type)
+Mesh::Mesh() : Geometry(node_type, Geometry::MESH), subd_attributes(this, ATTR_PRIM_SUBD)
{
- need_update = true;
- need_update_rebuild = false;
- transform_applied = false;
- transform_negative_scaled = false;
- transform_normal = transform_identity();
- bounds = BoundBox::empty;
-
- bvh = NULL;
-
- tri_offset = 0;
vert_offset = 0;
- curve_offset = 0;
- curvekey_offset = 0;
-
patch_offset = 0;
face_offset = 0;
corner_offset = 0;
- attr_map_offset = 0;
-
num_subd_verts = 0;
- attributes.triangle_mesh = this;
- curve_attributes.curve_mesh = this;
- subd_attributes.subd_mesh = this;
-
- geometry_flags = GEOMETRY_NONE;
-
- volume_isovalue = 0.001f;
- has_volume = false;
- has_surface_bssrdf = false;
+ volume_clipping = 0.001f;
+ volume_step_size = 0.0f;
+ volume_object_space = false;
num_ngons = 0;
@@ -455,7 +159,6 @@ Mesh::Mesh() : Node(node_type)
Mesh::~Mesh()
{
- delete bvh;
delete patch_table;
delete subd_params;
}
@@ -491,26 +194,6 @@ void Mesh::reserve_mesh(int numverts, int numtris)
attributes.resize(true);
}
-void Mesh::resize_curves(int numcurves, int numkeys)
-{
- curve_keys.resize(numkeys);
- curve_radius.resize(numkeys);
- curve_first_key.resize(numcurves);
- curve_shader.resize(numcurves);
-
- curve_attributes.resize();
-}
-
-void Mesh::reserve_curves(int numcurves, int numkeys)
-{
- curve_keys.reserve(numkeys);
- curve_radius.reserve(numkeys);
- curve_first_key.reserve(numcurves);
- curve_shader.reserve(numcurves);
-
- curve_attributes.resize(true);
-}
-
void Mesh::resize_subd_faces(int numfaces, int num_ngons_, int numcorners)
{
subd_faces.resize(numfaces);
@@ -531,6 +214,8 @@ void Mesh::reserve_subd_faces(int numfaces, int num_ngons_, int numcorners)
void Mesh::clear(bool preserve_voxel_data)
{
+ Geometry::clear();
+
/* clear all verts and triangles */
verts.clear();
triangles.clear();
@@ -540,11 +225,6 @@ void Mesh::clear(bool preserve_voxel_data)
triangle_patch.clear();
vert_patch_uv.clear();
- curve_keys.clear();
- curve_radius.clear();
- curve_first_key.clear();
- curve_shader.clear();
-
subd_faces.clear();
subd_face_corners.clear();
@@ -552,24 +232,21 @@ void Mesh::clear(bool preserve_voxel_data)
subd_creases.clear();
- curve_attributes.clear();
subd_attributes.clear();
attributes.clear(preserve_voxel_data);
- used_shaders.clear();
-
- if (!preserve_voxel_data) {
- geometry_flags = GEOMETRY_NONE;
- }
-
- transform_applied = false;
- transform_negative_scaled = false;
- transform_normal = transform_identity();
+ vert_to_stitching_key_map.clear();
+ vert_stitching_map.clear();
delete patch_table;
patch_table = NULL;
}
+void Mesh::clear()
+{
+ clear(false);
+}
+
void Mesh::add_vertex(float3 P)
{
verts.push_back_reserved(P);
@@ -601,18 +278,6 @@ void Mesh::add_triangle(int v0, int v1, int v2, int shader_, bool smooth_)
}
}
-void Mesh::add_curve_key(float3 co, float radius)
-{
- curve_keys.push_back_reserved(co);
- curve_radius.push_back_reserved(radius);
-}
-
-void Mesh::add_curve(int first_key, int shader)
-{
- curve_first_key.push_back_reserved(first_key);
- curve_shader.push_back_reserved(shader);
-}
-
void Mesh::add_subd_face(int *corners, int num_corners, int shader_, bool smooth_)
{
int start_corner = subd_face_corners.size();
@@ -632,19 +297,53 @@ void Mesh::add_subd_face(int *corners, int num_corners, int shader_, bool smooth
subd_faces.push_back_reserved(face);
}
+void Mesh::copy_center_to_motion_step(const int motion_step)
+{
+ Attribute *attr_mP = attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+
+ if (attr_mP) {
+ Attribute *attr_mN = attributes.find(ATTR_STD_MOTION_VERTEX_NORMAL);
+ Attribute *attr_N = attributes.find(ATTR_STD_VERTEX_NORMAL);
+ float3 *P = &verts[0];
+ float3 *N = (attr_N) ? attr_N->data_float3() : NULL;
+ size_t numverts = verts.size();
+
+ memcpy(attr_mP->data_float3() + motion_step * numverts, P, sizeof(float3) * numverts);
+ if (attr_mN)
+ memcpy(attr_mN->data_float3() + motion_step * numverts, N, sizeof(float3) * numverts);
+ }
+}
+
+void Mesh::get_uv_tiles(ustring map, unordered_set<int> &tiles)
+{
+ Attribute *attr, *subd_attr;
+
+ if (map.empty()) {
+ attr = attributes.find(ATTR_STD_UV);
+ subd_attr = subd_attributes.find(ATTR_STD_UV);
+ }
+ else {
+ attr = attributes.find(map);
+ subd_attr = subd_attributes.find(map);
+ }
+
+ if (attr) {
+ attr->get_uv_tiles(this, ATTR_PRIM_GEOMETRY, tiles);
+ }
+ if (subd_attr) {
+ subd_attr->get_uv_tiles(this, ATTR_PRIM_SUBD, tiles);
+ }
+}
+
void Mesh::compute_bounds()
{
BoundBox bnds = BoundBox::empty;
size_t verts_size = verts.size();
- size_t curve_keys_size = curve_keys.size();
- if (verts_size + curve_keys_size > 0) {
+ if (verts_size > 0) {
for (size_t i = 0; i < verts_size; i++)
bnds.grow(verts[i]);
- for (size_t i = 0; i < curve_keys_size; i++)
- bnds.grow(curve_keys[i], curve_radius[i]);
-
Attribute *attr = attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
if (use_motion_blur && attr) {
size_t steps_size = verts.size() * (motion_steps - 1);
@@ -654,15 +353,6 @@ void Mesh::compute_bounds()
bnds.grow(vert_steps[i]);
}
- Attribute *curve_attr = curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
- if (use_motion_blur && curve_attr) {
- size_t steps_size = curve_keys.size() * (motion_steps - 1);
- float3 *key_steps = curve_attr->data_float3();
-
- for (size_t i = 0; i < steps_size; i++)
- bnds.grow(key_steps[i]);
- }
-
if (!bnds.valid()) {
bnds = BoundBox::empty;
@@ -670,9 +360,6 @@ void Mesh::compute_bounds()
for (size_t i = 0; i < verts_size; i++)
bnds.grow_safe(verts[i]);
- for (size_t i = 0; i < curve_keys_size; i++)
- bnds.grow_safe(curve_keys[i], curve_radius[i]);
-
if (use_motion_blur && attr) {
size_t steps_size = verts.size() * (motion_steps - 1);
float3 *vert_steps = attr->data_float3();
@@ -680,14 +367,6 @@ void Mesh::compute_bounds()
for (size_t i = 0; i < steps_size; i++)
bnds.grow_safe(vert_steps[i]);
}
-
- if (use_motion_blur && curve_attr) {
- size_t steps_size = curve_keys.size() * (motion_steps - 1);
- float3 *key_steps = curve_attr->data_float3();
-
- for (size_t i = 0; i < steps_size; i++)
- bnds.grow_safe(key_steps[i]);
- }
}
}
@@ -699,6 +378,38 @@ void Mesh::compute_bounds()
bounds = bnds;
}
+void Mesh::apply_transform(const Transform &tfm, const bool apply_to_motion)
+{
+ transform_normal = transform_transposed_inverse(tfm);
+
+ /* apply to mesh vertices */
+ for (size_t i = 0; i < verts.size(); i++)
+ verts[i] = transform_point(&tfm, verts[i]);
+
+ if (apply_to_motion) {
+ Attribute *attr = attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+
+ if (attr) {
+ size_t steps_size = verts.size() * (motion_steps - 1);
+ float3 *vert_steps = attr->data_float3();
+
+ for (size_t i = 0; i < steps_size; i++)
+ vert_steps[i] = transform_point(&tfm, vert_steps[i]);
+ }
+
+ Attribute *attr_N = attributes.find(ATTR_STD_MOTION_VERTEX_NORMAL);
+
+ if (attr_N) {
+ Transform ntfm = transform_normal;
+ size_t steps_size = verts.size() * (motion_steps - 1);
+ float3 *normal_steps = attr_N->data_float3();
+
+ for (size_t i = 0; i < steps_size; i++)
+ normal_steps[i] = normalize(transform_direction(&ntfm, normal_steps[i]));
+ }
+ }
+}
+
void Mesh::add_face_normals()
{
/* don't compute if already there */
@@ -836,11 +547,11 @@ void Mesh::add_undisplaced()
float3 *data = attr->data_float3();
/* copy verts */
- size_t size = attr->buffer_size(
- this, (subdivision_type == SUBDIVISION_NONE) ? ATTR_PRIM_TRIANGLE : ATTR_PRIM_SUBD);
+ size_t size = attr->buffer_size(this, attrs.prim);
- /* Center points for ngons aren't stored in Mesh::verts but are included in size since they will be
- * calculated later, we subtract them from size here so we don't have an overflow while copying.
+ /* Center points for ngons aren't stored in Mesh::verts but are included in size since they will
+ * be calculated later, we subtract them from size here so we don't have an overflow while
+ * copying.
*/
size -= num_ngons * attr->data_sizeof();
@@ -925,39 +636,6 @@ void Mesh::pack_verts(const vector<uint> &tri_prim_index,
}
}
-void Mesh::pack_curves(Scene *scene,
- float4 *curve_key_co,
- float4 *curve_data,
- size_t curvekey_offset)
-{
- size_t curve_keys_size = curve_keys.size();
-
- /* pack curve keys */
- if (curve_keys_size) {
- float3 *keys_ptr = curve_keys.data();
- float *radius_ptr = curve_radius.data();
-
- for (size_t i = 0; i < curve_keys_size; i++)
- curve_key_co[i] = make_float4(keys_ptr[i].x, keys_ptr[i].y, keys_ptr[i].z, radius_ptr[i]);
- }
-
- /* pack curve segments */
- size_t curve_num = num_curves();
-
- for (size_t i = 0; i < curve_num; i++) {
- Curve curve = get_curve(i);
- int shader_id = curve_shader[i];
- Shader *shader = (shader_id < used_shaders.size()) ? used_shaders[shader_id] :
- scene->default_surface;
- shader_id = scene->shader_manager->get_shader_id(shader, false);
-
- curve_data[i] = make_float4(__int_as_float(curve.first_key + curvekey_offset),
- __int_as_float(curve.num_keys),
- __int_as_float(shader_id),
- 0.0f);
- }
-}
-
void Mesh::pack_patches(uint *patch_data, uint vert_offset, uint face_offset, uint corner_offset)
{
size_t num_faces = subd_faces.size();
@@ -1004,1364 +682,4 @@ void Mesh::pack_patches(uint *patch_data, uint vert_offset, uint face_offset, ui
}
}
-void Mesh::compute_bvh(
- Device *device, DeviceScene *dscene, SceneParams *params, Progress *progress, int n, int total)
-{
- if (progress->get_cancel())
- return;
-
- compute_bounds();
-
- if (need_build_bvh()) {
- string msg = "Updating Mesh BVH ";
- if (name == "")
- msg += string_printf("%u/%u", (uint)(n + 1), (uint)total);
- else
- msg += string_printf("%s %u/%u", name.c_str(), (uint)(n + 1), (uint)total);
-
- Object object;
- object.mesh = this;
-
- vector<Object *> objects;
- objects.push_back(&object);
-
- if (bvh && !need_update_rebuild) {
- progress->set_status(msg, "Refitting BVH");
- bvh->objects = objects;
- bvh->refit(*progress);
- }
- else {
- progress->set_status(msg, "Building BVH");
-
- BVHParams bparams;
- bparams.use_spatial_split = params->use_bvh_spatial_split;
- bparams.bvh_layout = BVHParams::best_bvh_layout(params->bvh_layout,
- device->get_bvh_layout_mask());
- bparams.use_unaligned_nodes = dscene->data.bvh.have_curves &&
- params->use_bvh_unaligned_nodes;
- bparams.num_motion_triangle_steps = params->num_bvh_time_steps;
- bparams.num_motion_curve_steps = params->num_bvh_time_steps;
- bparams.bvh_type = params->bvh_type;
- bparams.curve_flags = dscene->data.curve.curveflags;
- bparams.curve_subdivisions = dscene->data.curve.subdivisions;
-
- delete bvh;
- bvh = BVH::create(bparams, objects);
- MEM_GUARDED_CALL(progress, bvh->build, *progress);
- }
- }
-
- need_update = false;
- need_update_rebuild = false;
-}
-
-void Mesh::tag_update(Scene *scene, bool rebuild)
-{
- need_update = true;
-
- if (rebuild) {
- need_update_rebuild = true;
- scene->light_manager->need_update = true;
- }
- else {
- foreach (Shader *shader, used_shaders)
- if (shader->has_surface_emission)
- scene->light_manager->need_update = true;
- }
-
- scene->mesh_manager->need_update = true;
- scene->object_manager->need_update = true;
-}
-
-bool Mesh::has_motion_blur() const
-{
- return (use_motion_blur && (attributes.find(ATTR_STD_MOTION_VERTEX_POSITION) ||
- curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION)));
-}
-
-bool Mesh::has_true_displacement() const
-{
- foreach (Shader *shader, used_shaders) {
- if (shader->has_displacement && shader->displacement_method != DISPLACE_BUMP) {
- return true;
- }
- }
-
- return false;
-}
-
-float Mesh::motion_time(int step) const
-{
- return (motion_steps > 1) ? 2.0f * step / (motion_steps - 1) - 1.0f : 0.0f;
-}
-
-int Mesh::motion_step(float time) const
-{
- if (motion_steps > 1) {
- int attr_step = 0;
-
- for (int step = 0; step < motion_steps; step++) {
- float step_time = motion_time(step);
- if (step_time == time) {
- return attr_step;
- }
-
- /* Center step is stored in a separate attribute. */
- if (step != motion_steps / 2) {
- attr_step++;
- }
- }
- }
-
- return -1;
-}
-
-bool Mesh::need_build_bvh() const
-{
- return !transform_applied || has_surface_bssrdf;
-}
-
-bool Mesh::is_instanced() const
-{
- /* Currently we treat subsurface objects as instanced.
- *
- * While it might be not very optimal for ray traversal, it avoids having
- * duplicated BVH in the memory, saving quite some space.
- */
- return !transform_applied || has_surface_bssrdf;
-}
-
-/* Mesh Manager */
-
-MeshManager::MeshManager()
-{
- need_update = true;
- need_flags_update = true;
-}
-
-MeshManager::~MeshManager()
-{
-}
-
-void MeshManager::update_osl_attributes(Device *device,
- Scene *scene,
- vector<AttributeRequestSet> &mesh_attributes)
-{
-#ifdef WITH_OSL
- /* for OSL, a hash map is used to lookup the attribute by name. */
- OSLGlobals *og = (OSLGlobals *)device->osl_memory();
-
- og->object_name_map.clear();
- og->attribute_map.clear();
- og->object_names.clear();
-
- og->attribute_map.resize(scene->objects.size() * ATTR_PRIM_TYPES);
-
- for (size_t i = 0; i < scene->objects.size(); i++) {
- /* set object name to object index map */
- Object *object = scene->objects[i];
- og->object_name_map[object->name] = i;
- og->object_names.push_back(object->name);
-
- /* set object attributes */
- foreach (ParamValue &attr, object->attributes) {
- OSLGlobals::Attribute osl_attr;
-
- osl_attr.type = attr.type();
- osl_attr.desc.element = ATTR_ELEMENT_OBJECT;
- osl_attr.value = attr;
- osl_attr.desc.offset = 0;
- osl_attr.desc.flags = 0;
-
- og->attribute_map[i * ATTR_PRIM_TYPES + ATTR_PRIM_TRIANGLE][attr.name()] = osl_attr;
- og->attribute_map[i * ATTR_PRIM_TYPES + ATTR_PRIM_CURVE][attr.name()] = osl_attr;
- og->attribute_map[i * ATTR_PRIM_TYPES + ATTR_PRIM_SUBD][attr.name()] = osl_attr;
- }
-
- /* find mesh attributes */
- size_t j;
-
- for (j = 0; j < scene->meshes.size(); j++)
- if (scene->meshes[j] == object->mesh)
- break;
-
- AttributeRequestSet &attributes = mesh_attributes[j];
-
- /* set object attributes */
- foreach (AttributeRequest &req, attributes.requests) {
- OSLGlobals::Attribute osl_attr;
-
- if (req.triangle_desc.element != ATTR_ELEMENT_NONE) {
- osl_attr.desc = req.triangle_desc;
-
- if (req.triangle_type == TypeDesc::TypeFloat)
- osl_attr.type = TypeDesc::TypeFloat;
- else if (req.triangle_type == TypeDesc::TypeMatrix)
- osl_attr.type = TypeDesc::TypeMatrix;
- else if (req.triangle_type == TypeFloat2)
- osl_attr.type = TypeFloat2;
- else
- osl_attr.type = TypeDesc::TypeColor;
-
- if (req.std != ATTR_STD_NONE) {
- /* if standard attribute, add lookup by geom: name convention */
- ustring stdname(string("geom:") + string(Attribute::standard_name(req.std)));
- og->attribute_map[i * ATTR_PRIM_TYPES + ATTR_PRIM_TRIANGLE][stdname] = osl_attr;
- }
- else if (req.name != ustring()) {
- /* add lookup by mesh attribute name */
- og->attribute_map[i * ATTR_PRIM_TYPES + ATTR_PRIM_TRIANGLE][req.name] = osl_attr;
- }
- }
-
- if (req.curve_desc.element != ATTR_ELEMENT_NONE) {
- osl_attr.desc = req.curve_desc;
-
- if (req.curve_type == TypeDesc::TypeFloat)
- osl_attr.type = TypeDesc::TypeFloat;
- else if (req.curve_type == TypeDesc::TypeMatrix)
- osl_attr.type = TypeDesc::TypeMatrix;
- else
- osl_attr.type = TypeDesc::TypeColor;
-
- if (req.std != ATTR_STD_NONE) {
- /* if standard attribute, add lookup by geom: name convention */
- ustring stdname(string("geom:") + string(Attribute::standard_name(req.std)));
- og->attribute_map[i * ATTR_PRIM_TYPES + ATTR_PRIM_CURVE][stdname] = osl_attr;
- }
- else if (req.name != ustring()) {
- /* add lookup by mesh attribute name */
- og->attribute_map[i * ATTR_PRIM_TYPES + ATTR_PRIM_CURVE][req.name] = osl_attr;
- }
- }
-
- if (req.subd_desc.element != ATTR_ELEMENT_NONE) {
- osl_attr.desc = req.subd_desc;
-
- if (req.subd_type == TypeDesc::TypeFloat)
- osl_attr.type = TypeDesc::TypeFloat;
- else if (req.subd_type == TypeDesc::TypeMatrix)
- osl_attr.type = TypeDesc::TypeMatrix;
- else
- osl_attr.type = TypeDesc::TypeColor;
-
- if (req.std != ATTR_STD_NONE) {
- /* if standard attribute, add lookup by geom: name convention */
- ustring stdname(string("geom:") + string(Attribute::standard_name(req.std)));
- og->attribute_map[i * ATTR_PRIM_TYPES + ATTR_PRIM_SUBD][stdname] = osl_attr;
- }
- else if (req.name != ustring()) {
- /* add lookup by mesh attribute name */
- og->attribute_map[i * ATTR_PRIM_TYPES + ATTR_PRIM_SUBD][req.name] = osl_attr;
- }
- }
- }
- }
-#else
- (void)device;
- (void)scene;
- (void)mesh_attributes;
-#endif
-}
-
-void MeshManager::update_svm_attributes(Device *,
- DeviceScene *dscene,
- Scene *scene,
- vector<AttributeRequestSet> &mesh_attributes)
-{
- /* for SVM, the attributes_map table is used to lookup the offset of an
- * attribute, based on a unique shader attribute id. */
-
- /* compute array stride */
- int attr_map_size = 0;
-
- for (size_t i = 0; i < scene->meshes.size(); i++) {
- Mesh *mesh = scene->meshes[i];
- mesh->attr_map_offset = attr_map_size;
- attr_map_size += (mesh_attributes[i].size() + 1) * ATTR_PRIM_TYPES;
- }
-
- if (attr_map_size == 0)
- return;
-
- /* create attribute map */
- uint4 *attr_map = dscene->attributes_map.alloc(attr_map_size);
- memset(attr_map, 0, dscene->attributes_map.size() * sizeof(uint));
-
- for (size_t i = 0; i < scene->meshes.size(); i++) {
- Mesh *mesh = scene->meshes[i];
- AttributeRequestSet &attributes = mesh_attributes[i];
-
- /* set object attributes */
- int index = mesh->attr_map_offset;
-
- foreach (AttributeRequest &req, attributes.requests) {
- uint id;
-
- if (req.std == ATTR_STD_NONE)
- id = scene->shader_manager->get_attribute_id(req.name);
- else
- id = scene->shader_manager->get_attribute_id(req.std);
-
- if (mesh->num_triangles()) {
- attr_map[index].x = id;
- attr_map[index].y = req.triangle_desc.element;
- attr_map[index].z = as_uint(req.triangle_desc.offset);
-
- if (req.triangle_type == TypeDesc::TypeFloat)
- attr_map[index].w = NODE_ATTR_FLOAT;
- else if (req.triangle_type == TypeDesc::TypeMatrix)
- attr_map[index].w = NODE_ATTR_MATRIX;
- else if (req.triangle_type == TypeFloat2)
- attr_map[index].w = NODE_ATTR_FLOAT2;
- else
- attr_map[index].w = NODE_ATTR_FLOAT3;
-
- attr_map[index].w |= req.triangle_desc.flags << 8;
- }
-
- index++;
-
- if (mesh->num_curves()) {
- attr_map[index].x = id;
- attr_map[index].y = req.curve_desc.element;
- attr_map[index].z = as_uint(req.curve_desc.offset);
-
- if (req.curve_type == TypeDesc::TypeFloat)
- attr_map[index].w = NODE_ATTR_FLOAT;
- else if (req.curve_type == TypeDesc::TypeMatrix)
- attr_map[index].w = NODE_ATTR_MATRIX;
- else if (req.curve_type == TypeFloat2)
- attr_map[index].w = NODE_ATTR_FLOAT2;
- else
- attr_map[index].w = NODE_ATTR_FLOAT3;
-
- attr_map[index].w |= req.curve_desc.flags << 8;
- }
-
- index++;
-
- if (mesh->subd_faces.size()) {
- attr_map[index].x = id;
- attr_map[index].y = req.subd_desc.element;
- attr_map[index].z = as_uint(req.subd_desc.offset);
-
- if (req.subd_type == TypeDesc::TypeFloat)
- attr_map[index].w = NODE_ATTR_FLOAT;
- else if (req.subd_type == TypeDesc::TypeMatrix)
- attr_map[index].w = NODE_ATTR_MATRIX;
- else if (req.subd_type == TypeFloat2)
- attr_map[index].w = NODE_ATTR_FLOAT2;
- else
- attr_map[index].w = NODE_ATTR_FLOAT3;
-
- attr_map[index].w |= req.subd_desc.flags << 8;
- }
-
- index++;
- }
-
- /* terminator */
- for (int j = 0; j < ATTR_PRIM_TYPES; j++) {
- attr_map[index].x = ATTR_STD_NONE;
- attr_map[index].y = 0;
- attr_map[index].z = 0;
- attr_map[index].w = 0;
-
- index++;
- }
- }
-
- /* copy to device */
- dscene->attributes_map.copy_to_device();
-}
-
-static void update_attribute_element_size(Mesh *mesh,
- Attribute *mattr,
- AttributePrimitive prim,
- size_t *attr_float_size,
- size_t *attr_float2_size,
- size_t *attr_float3_size,
- size_t *attr_uchar4_size)
-{
- if (mattr) {
- size_t size = mattr->element_size(mesh, prim);
-
- if (mattr->element == ATTR_ELEMENT_VOXEL) {
- /* pass */
- }
- else if (mattr->element == ATTR_ELEMENT_CORNER_BYTE) {
- *attr_uchar4_size += size;
- }
- else if (mattr->type == TypeDesc::TypeFloat) {
- *attr_float_size += size;
- }
- else if (mattr->type == TypeFloat2) {
- *attr_float2_size += size;
- }
- else if (mattr->type == TypeDesc::TypeMatrix) {
- *attr_float3_size += size * 4;
- }
- else {
- *attr_float3_size += size;
- }
- }
-}
-
-static void update_attribute_element_offset(Mesh *mesh,
- device_vector<float> &attr_float,
- size_t &attr_float_offset,
- device_vector<float2> &attr_float2,
- size_t &attr_float2_offset,
- device_vector<float4> &attr_float3,
- size_t &attr_float3_offset,
- device_vector<uchar4> &attr_uchar4,
- size_t &attr_uchar4_offset,
- Attribute *mattr,
- AttributePrimitive prim,
- TypeDesc &type,
- AttributeDescriptor &desc)
-{
- if (mattr) {
- /* store element and type */
- desc.element = mattr->element;
- desc.flags = mattr->flags;
- type = mattr->type;
-
- /* store attribute data in arrays */
- size_t size = mattr->element_size(mesh, prim);
-
- AttributeElement &element = desc.element;
- int &offset = desc.offset;
-
- if (mattr->element == ATTR_ELEMENT_VOXEL) {
- /* store slot in offset value */
- VoxelAttribute *voxel_data = mattr->data_voxel();
- offset = voxel_data->slot;
- }
- else if (mattr->element == ATTR_ELEMENT_CORNER_BYTE) {
- uchar4 *data = mattr->data_uchar4();
- offset = attr_uchar4_offset;
-
- assert(attr_uchar4.size() >= offset + size);
- for (size_t k = 0; k < size; k++) {
- attr_uchar4[offset + k] = data[k];
- }
- attr_uchar4_offset += size;
- }
- else if (mattr->type == TypeDesc::TypeFloat) {
- float *data = mattr->data_float();
- offset = attr_float_offset;
-
- assert(attr_float.size() >= offset + size);
- for (size_t k = 0; k < size; k++) {
- attr_float[offset + k] = data[k];
- }
- attr_float_offset += size;
- }
- else if (mattr->type == TypeFloat2) {
- float2 *data = mattr->data_float2();
- offset = attr_float2_offset;
-
- assert(attr_float2.size() >= offset + size);
- for (size_t k = 0; k < size; k++) {
- attr_float2[offset + k] = data[k];
- }
- attr_float2_offset += size;
- }
- else if (mattr->type == TypeDesc::TypeMatrix) {
- Transform *tfm = mattr->data_transform();
- offset = attr_float3_offset;
-
- assert(attr_float3.size() >= offset + size * 3);
- for (size_t k = 0; k < size * 3; k++) {
- attr_float3[offset + k] = (&tfm->x)[k];
- }
- attr_float3_offset += size * 3;
- }
- else {
- float4 *data = mattr->data_float4();
- offset = attr_float3_offset;
-
- assert(attr_float3.size() >= offset + size);
- for (size_t k = 0; k < size; k++) {
- attr_float3[offset + k] = data[k];
- }
- attr_float3_offset += size;
- }
-
- /* mesh vertex/curve index is global, not per object, so we sneak
- * a correction for that in here */
- if (mesh->subdivision_type == Mesh::SUBDIVISION_CATMULL_CLARK &&
- desc.flags & ATTR_SUBDIVIDED) {
- /* indices for subdivided attributes are retrieved
- * from patch table so no need for correction here*/
- }
- else if (element == ATTR_ELEMENT_VERTEX)
- offset -= mesh->vert_offset;
- else if (element == ATTR_ELEMENT_VERTEX_MOTION)
- offset -= mesh->vert_offset;
- else if (element == ATTR_ELEMENT_FACE) {
- if (prim == ATTR_PRIM_TRIANGLE)
- offset -= mesh->tri_offset;
- else
- offset -= mesh->face_offset;
- }
- else if (element == ATTR_ELEMENT_CORNER || element == ATTR_ELEMENT_CORNER_BYTE) {
- if (prim == ATTR_PRIM_TRIANGLE)
- offset -= 3 * mesh->tri_offset;
- else
- offset -= mesh->corner_offset;
- }
- else if (element == ATTR_ELEMENT_CURVE)
- offset -= mesh->curve_offset;
- else if (element == ATTR_ELEMENT_CURVE_KEY)
- offset -= mesh->curvekey_offset;
- else if (element == ATTR_ELEMENT_CURVE_KEY_MOTION)
- offset -= mesh->curvekey_offset;
- }
- else {
- /* attribute not found */
- desc.element = ATTR_ELEMENT_NONE;
- desc.offset = 0;
- }
-}
-
-void MeshManager::device_update_attributes(Device *device,
- DeviceScene *dscene,
- Scene *scene,
- Progress &progress)
-{
- progress.set_status("Updating Mesh", "Computing attributes");
-
- /* gather per mesh requested attributes. as meshes may have multiple
- * shaders assigned, this merges the requested attributes that have
- * been set per shader by the shader manager */
- vector<AttributeRequestSet> mesh_attributes(scene->meshes.size());
-
- for (size_t i = 0; i < scene->meshes.size(); i++) {
- Mesh *mesh = scene->meshes[i];
-
- scene->need_global_attributes(mesh_attributes[i]);
-
- foreach (Shader *shader, mesh->used_shaders) {
- mesh_attributes[i].add(shader->attributes);
- }
- }
-
- /* mesh attribute are stored in a single array per data type. here we fill
- * those arrays, and set the offset and element type to create attribute
- * maps next */
-
- /* Pre-allocate attributes to avoid arrays re-allocation which would
- * take 2x of overall attribute memory usage.
- */
- size_t attr_float_size = 0;
- size_t attr_float2_size = 0;
- size_t attr_float3_size = 0;
- size_t attr_uchar4_size = 0;
- for (size_t i = 0; i < scene->meshes.size(); i++) {
- Mesh *mesh = scene->meshes[i];
- AttributeRequestSet &attributes = mesh_attributes[i];
- foreach (AttributeRequest &req, attributes.requests) {
- Attribute *triangle_mattr = mesh->attributes.find(req);
- Attribute *curve_mattr = mesh->curve_attributes.find(req);
- Attribute *subd_mattr = mesh->subd_attributes.find(req);
-
- update_attribute_element_size(mesh,
- triangle_mattr,
- ATTR_PRIM_TRIANGLE,
- &attr_float_size,
- &attr_float2_size,
- &attr_float3_size,
- &attr_uchar4_size);
- update_attribute_element_size(mesh,
- curve_mattr,
- ATTR_PRIM_CURVE,
- &attr_float_size,
- &attr_float2_size,
- &attr_float3_size,
- &attr_uchar4_size);
- update_attribute_element_size(mesh,
- subd_mattr,
- ATTR_PRIM_SUBD,
- &attr_float_size,
- &attr_float2_size,
- &attr_float3_size,
- &attr_uchar4_size);
- }
- }
-
- dscene->attributes_float.alloc(attr_float_size);
- dscene->attributes_float2.alloc(attr_float2_size);
- dscene->attributes_float3.alloc(attr_float3_size);
- dscene->attributes_uchar4.alloc(attr_uchar4_size);
-
- size_t attr_float_offset = 0;
- size_t attr_float2_offset = 0;
- size_t attr_float3_offset = 0;
- size_t attr_uchar4_offset = 0;
-
- /* Fill in attributes. */
- for (size_t i = 0; i < scene->meshes.size(); i++) {
- Mesh *mesh = scene->meshes[i];
- AttributeRequestSet &attributes = mesh_attributes[i];
-
- /* todo: we now store std and name attributes from requests even if
- * they actually refer to the same mesh attributes, optimize */
- foreach (AttributeRequest &req, attributes.requests) {
- Attribute *triangle_mattr = mesh->attributes.find(req);
- Attribute *curve_mattr = mesh->curve_attributes.find(req);
- Attribute *subd_mattr = mesh->subd_attributes.find(req);
-
- update_attribute_element_offset(mesh,
- dscene->attributes_float,
- attr_float_offset,
- dscene->attributes_float2,
- attr_float2_offset,
- dscene->attributes_float3,
- attr_float3_offset,
- dscene->attributes_uchar4,
- attr_uchar4_offset,
- triangle_mattr,
- ATTR_PRIM_TRIANGLE,
- req.triangle_type,
- req.triangle_desc);
-
- update_attribute_element_offset(mesh,
- dscene->attributes_float,
- attr_float_offset,
- dscene->attributes_float2,
- attr_float2_offset,
- dscene->attributes_float3,
- attr_float3_offset,
- dscene->attributes_uchar4,
- attr_uchar4_offset,
- curve_mattr,
- ATTR_PRIM_CURVE,
- req.curve_type,
- req.curve_desc);
-
- update_attribute_element_offset(mesh,
- dscene->attributes_float,
- attr_float_offset,
- dscene->attributes_float2,
- attr_float2_offset,
- dscene->attributes_float3,
- attr_float3_offset,
- dscene->attributes_uchar4,
- attr_uchar4_offset,
- subd_mattr,
- ATTR_PRIM_SUBD,
- req.subd_type,
- req.subd_desc);
-
- if (progress.get_cancel())
- return;
- }
- }
-
- /* create attribute lookup maps */
- if (scene->shader_manager->use_osl())
- update_osl_attributes(device, scene, mesh_attributes);
-
- update_svm_attributes(device, dscene, scene, mesh_attributes);
-
- if (progress.get_cancel())
- return;
-
- /* copy to device */
- progress.set_status("Updating Mesh", "Copying Attributes to device");
-
- if (dscene->attributes_float.size()) {
- dscene->attributes_float.copy_to_device();
- }
- if (dscene->attributes_float2.size()) {
- dscene->attributes_float2.copy_to_device();
- }
- if (dscene->attributes_float3.size()) {
- dscene->attributes_float3.copy_to_device();
- }
- if (dscene->attributes_uchar4.size()) {
- dscene->attributes_uchar4.copy_to_device();
- }
-
- if (progress.get_cancel())
- return;
-
- /* After mesh attributes and patch tables have been copied to device memory,
- * we need to update offsets in the objects. */
- scene->object_manager->device_update_mesh_offsets(device, dscene, scene);
-}
-
-void MeshManager::mesh_calc_offset(Scene *scene)
-{
- size_t vert_size = 0;
- size_t tri_size = 0;
-
- size_t curve_key_size = 0;
- size_t curve_size = 0;
-
- size_t patch_size = 0;
- size_t face_size = 0;
- size_t corner_size = 0;
-
- foreach (Mesh *mesh, scene->meshes) {
- mesh->vert_offset = vert_size;
- mesh->tri_offset = tri_size;
-
- mesh->curvekey_offset = curve_key_size;
- mesh->curve_offset = curve_size;
-
- mesh->patch_offset = patch_size;
- mesh->face_offset = face_size;
- mesh->corner_offset = corner_size;
-
- vert_size += mesh->verts.size();
- tri_size += mesh->num_triangles();
-
- curve_key_size += mesh->curve_keys.size();
- curve_size += mesh->num_curves();
-
- if (mesh->subd_faces.size()) {
- Mesh::SubdFace &last = mesh->subd_faces[mesh->subd_faces.size() - 1];
- patch_size += (last.ptex_offset + last.num_ptex_faces()) * 8;
-
- /* patch tables are stored in same array so include them in patch_size */
- if (mesh->patch_table) {
- mesh->patch_table_offset = patch_size;
- patch_size += mesh->patch_table->total_size();
- }
- }
- face_size += mesh->subd_faces.size();
- corner_size += mesh->subd_face_corners.size();
- }
-}
-
-void MeshManager::device_update_mesh(
- Device *, DeviceScene *dscene, Scene *scene, bool for_displacement, Progress &progress)
-{
- /* Count. */
- size_t vert_size = 0;
- size_t tri_size = 0;
-
- size_t curve_key_size = 0;
- size_t curve_size = 0;
-
- size_t patch_size = 0;
-
- foreach (Mesh *mesh, scene->meshes) {
- vert_size += mesh->verts.size();
- tri_size += mesh->num_triangles();
-
- curve_key_size += mesh->curve_keys.size();
- curve_size += mesh->num_curves();
-
- if (mesh->subd_faces.size()) {
- Mesh::SubdFace &last = mesh->subd_faces[mesh->subd_faces.size() - 1];
- patch_size += (last.ptex_offset + last.num_ptex_faces()) * 8;
-
- /* patch tables are stored in same array so include them in patch_size */
- if (mesh->patch_table) {
- mesh->patch_table_offset = patch_size;
- patch_size += mesh->patch_table->total_size();
- }
- }
- }
-
- /* Create mapping from triangle to primitive triangle array. */
- vector<uint> tri_prim_index(tri_size);
- if (for_displacement) {
- /* For displacement kernels we do some trickery to make them believe
- * we've got all required data ready. However, that data is different
- * from final render kernels since we don't have BVH yet, so can't
- * really use same semantic of arrays.
- */
- foreach (Mesh *mesh, scene->meshes) {
- for (size_t i = 0; i < mesh->num_triangles(); ++i) {
- tri_prim_index[i + mesh->tri_offset] = 3 * (i + mesh->tri_offset);
- }
- }
- }
- else {
- for (size_t i = 0; i < dscene->prim_index.size(); ++i) {
- if ((dscene->prim_type[i] & PRIMITIVE_ALL_TRIANGLE) != 0) {
- tri_prim_index[dscene->prim_index[i]] = dscene->prim_tri_index[i];
- }
- }
- }
-
- /* Fill in all the arrays. */
- if (tri_size != 0) {
- /* normals */
- progress.set_status("Updating Mesh", "Computing normals");
-
- uint *tri_shader = dscene->tri_shader.alloc(tri_size);
- float4 *vnormal = dscene->tri_vnormal.alloc(vert_size);
- uint4 *tri_vindex = dscene->tri_vindex.alloc(tri_size);
- uint *tri_patch = dscene->tri_patch.alloc(tri_size);
- float2 *tri_patch_uv = dscene->tri_patch_uv.alloc(vert_size);
-
- foreach (Mesh *mesh, scene->meshes) {
- mesh->pack_shaders(scene, &tri_shader[mesh->tri_offset]);
- mesh->pack_normals(&vnormal[mesh->vert_offset]);
- mesh->pack_verts(tri_prim_index,
- &tri_vindex[mesh->tri_offset],
- &tri_patch[mesh->tri_offset],
- &tri_patch_uv[mesh->vert_offset],
- mesh->vert_offset,
- mesh->tri_offset);
- if (progress.get_cancel())
- return;
- }
-
- /* vertex coordinates */
- progress.set_status("Updating Mesh", "Copying Mesh to device");
-
- dscene->tri_shader.copy_to_device();
- dscene->tri_vnormal.copy_to_device();
- dscene->tri_vindex.copy_to_device();
- dscene->tri_patch.copy_to_device();
- dscene->tri_patch_uv.copy_to_device();
- }
-
- if (curve_size != 0) {
- progress.set_status("Updating Mesh", "Copying Strands to device");
-
- float4 *curve_keys = dscene->curve_keys.alloc(curve_key_size);
- float4 *curves = dscene->curves.alloc(curve_size);
-
- foreach (Mesh *mesh, scene->meshes) {
- mesh->pack_curves(scene,
- &curve_keys[mesh->curvekey_offset],
- &curves[mesh->curve_offset],
- mesh->curvekey_offset);
- if (progress.get_cancel())
- return;
- }
-
- dscene->curve_keys.copy_to_device();
- dscene->curves.copy_to_device();
- }
-
- if (patch_size != 0) {
- progress.set_status("Updating Mesh", "Copying Patches to device");
-
- uint *patch_data = dscene->patches.alloc(patch_size);
-
- foreach (Mesh *mesh, scene->meshes) {
- mesh->pack_patches(&patch_data[mesh->patch_offset],
- mesh->vert_offset,
- mesh->face_offset,
- mesh->corner_offset);
-
- if (mesh->patch_table) {
- mesh->patch_table->copy_adjusting_offsets(&patch_data[mesh->patch_table_offset],
- mesh->patch_table_offset);
- }
-
- if (progress.get_cancel())
- return;
- }
-
- dscene->patches.copy_to_device();
- }
-
- if (for_displacement) {
- float4 *prim_tri_verts = dscene->prim_tri_verts.alloc(tri_size * 3);
- foreach (Mesh *mesh, scene->meshes) {
- for (size_t i = 0; i < mesh->num_triangles(); ++i) {
- Mesh::Triangle t = mesh->get_triangle(i);
- size_t offset = 3 * (i + mesh->tri_offset);
- prim_tri_verts[offset + 0] = float3_to_float4(mesh->verts[t.v[0]]);
- prim_tri_verts[offset + 1] = float3_to_float4(mesh->verts[t.v[1]]);
- prim_tri_verts[offset + 2] = float3_to_float4(mesh->verts[t.v[2]]);
- }
- }
- dscene->prim_tri_verts.copy_to_device();
- }
-}
-
-void MeshManager::device_update_bvh(Device *device,
- DeviceScene *dscene,
- Scene *scene,
- Progress &progress)
-{
- /* bvh build */
- progress.set_status("Updating Scene BVH", "Building");
-
- BVHParams bparams;
- bparams.top_level = true;
- bparams.bvh_layout = BVHParams::best_bvh_layout(scene->params.bvh_layout,
- device->get_bvh_layout_mask());
- bparams.use_spatial_split = scene->params.use_bvh_spatial_split;
- bparams.use_unaligned_nodes = dscene->data.bvh.have_curves &&
- scene->params.use_bvh_unaligned_nodes;
- bparams.num_motion_triangle_steps = scene->params.num_bvh_time_steps;
- bparams.num_motion_curve_steps = scene->params.num_bvh_time_steps;
- bparams.bvh_type = scene->params.bvh_type;
- bparams.curve_flags = dscene->data.curve.curveflags;
- bparams.curve_subdivisions = dscene->data.curve.subdivisions;
-
- VLOG(1) << "Using " << bvh_layout_name(bparams.bvh_layout) << " layout.";
-
-#ifdef WITH_EMBREE
- if (bparams.bvh_layout == BVH_LAYOUT_EMBREE) {
- if (dscene->data.bvh.scene) {
- BVHEmbree::destroy(dscene->data.bvh.scene);
- }
- }
-#endif
-
- BVH *bvh = BVH::create(bparams, scene->objects);
- bvh->build(progress, &device->stats);
-
- if (progress.get_cancel()) {
-#ifdef WITH_EMBREE
- if (bparams.bvh_layout == BVH_LAYOUT_EMBREE) {
- if (dscene->data.bvh.scene) {
- BVHEmbree::destroy(dscene->data.bvh.scene);
- }
- }
-#endif
- delete bvh;
- return;
- }
-
- /* copy to device */
- progress.set_status("Updating Scene BVH", "Copying BVH to device");
-
- PackedBVH &pack = bvh->pack;
-
- if (pack.nodes.size()) {
- dscene->bvh_nodes.steal_data(pack.nodes);
- dscene->bvh_nodes.copy_to_device();
- }
- if (pack.leaf_nodes.size()) {
- dscene->bvh_leaf_nodes.steal_data(pack.leaf_nodes);
- dscene->bvh_leaf_nodes.copy_to_device();
- }
- if (pack.object_node.size()) {
- dscene->object_node.steal_data(pack.object_node);
- dscene->object_node.copy_to_device();
- }
- if (pack.prim_tri_index.size()) {
- dscene->prim_tri_index.steal_data(pack.prim_tri_index);
- dscene->prim_tri_index.copy_to_device();
- }
- if (pack.prim_tri_verts.size()) {
- dscene->prim_tri_verts.steal_data(pack.prim_tri_verts);
- dscene->prim_tri_verts.copy_to_device();
- }
- if (pack.prim_type.size()) {
- dscene->prim_type.steal_data(pack.prim_type);
- dscene->prim_type.copy_to_device();
- }
- if (pack.prim_visibility.size()) {
- dscene->prim_visibility.steal_data(pack.prim_visibility);
- dscene->prim_visibility.copy_to_device();
- }
- if (pack.prim_index.size()) {
- dscene->prim_index.steal_data(pack.prim_index);
- dscene->prim_index.copy_to_device();
- }
- if (pack.prim_object.size()) {
- dscene->prim_object.steal_data(pack.prim_object);
- dscene->prim_object.copy_to_device();
- }
- if (pack.prim_time.size()) {
- dscene->prim_time.steal_data(pack.prim_time);
- dscene->prim_time.copy_to_device();
- }
-
- dscene->data.bvh.root = pack.root_index;
- dscene->data.bvh.bvh_layout = bparams.bvh_layout;
- dscene->data.bvh.use_bvh_steps = (scene->params.num_bvh_time_steps != 0);
-
-#ifdef WITH_EMBREE
- if (bparams.bvh_layout == BVH_LAYOUT_EMBREE) {
- dscene->data.bvh.scene = ((BVHEmbree *)bvh)->scene;
- }
- else {
- dscene->data.bvh.scene = NULL;
- }
-#endif
-
- delete bvh;
-}
-
-void MeshManager::device_update_preprocess(Device *device, Scene *scene, Progress &progress)
-{
- if (!need_update && !need_flags_update) {
- return;
- }
-
- progress.set_status("Updating Meshes Flags");
-
- /* Update flags. */
- bool volume_images_updated = false;
-
- foreach (Mesh *mesh, scene->meshes) {
- mesh->has_volume = false;
-
- foreach (const Shader *shader, mesh->used_shaders) {
- if (shader->has_volume) {
- mesh->has_volume = true;
- }
- if (shader->has_surface_bssrdf) {
- mesh->has_surface_bssrdf = true;
- }
- }
-
- if (need_update && mesh->has_volume) {
- /* Create volume meshes if there is voxel data. */
- bool has_voxel_attributes = false;
-
- foreach (Attribute &attr, mesh->attributes.attributes) {
- if (attr.element == ATTR_ELEMENT_VOXEL) {
- has_voxel_attributes = true;
- }
- }
-
- if (has_voxel_attributes) {
- if (!volume_images_updated) {
- progress.set_status("Updating Meshes Volume Bounds");
- device_update_volume_images(device, scene, progress);
- volume_images_updated = true;
- }
-
- create_volume_mesh(scene, mesh, progress);
- }
- }
- }
-
- need_flags_update = false;
-}
-
-void MeshManager::device_update_displacement_images(Device *device,
- Scene *scene,
- Progress &progress)
-{
- progress.set_status("Updating Displacement Images");
- TaskPool pool;
- ImageManager *image_manager = scene->image_manager;
- set<int> bump_images;
- foreach (Mesh *mesh, scene->meshes) {
- if (mesh->need_update) {
- foreach (Shader *shader, mesh->used_shaders) {
- if (!shader->has_displacement || shader->displacement_method == DISPLACE_BUMP) {
- continue;
- }
- foreach (ShaderNode *node, shader->graph->nodes) {
- if (node->special_type != SHADER_SPECIAL_TYPE_IMAGE_SLOT) {
- continue;
- }
-
- ImageSlotTextureNode *image_node = static_cast<ImageSlotTextureNode *>(node);
- int slot = image_node->slot;
- if (slot != -1) {
- bump_images.insert(slot);
- }
- }
- }
- }
- }
- foreach (int slot, bump_images) {
- pool.push(function_bind(
- &ImageManager::device_update_slot, image_manager, device, scene, slot, &progress));
- }
- pool.wait_work();
-}
-
-void MeshManager::device_update_volume_images(Device *device, Scene *scene, Progress &progress)
-{
- progress.set_status("Updating Volume Images");
- TaskPool pool;
- ImageManager *image_manager = scene->image_manager;
- set<int> volume_images;
-
- foreach (Mesh *mesh, scene->meshes) {
- if (!mesh->need_update) {
- continue;
- }
-
- foreach (Attribute &attr, mesh->attributes.attributes) {
- if (attr.element != ATTR_ELEMENT_VOXEL) {
- continue;
- }
-
- VoxelAttribute *voxel = attr.data_voxel();
-
- if (voxel->slot != -1) {
- volume_images.insert(voxel->slot);
- }
- }
- }
-
- foreach (int slot, volume_images) {
- pool.push(function_bind(
- &ImageManager::device_update_slot, image_manager, device, scene, slot, &progress));
- }
- pool.wait_work();
-}
-
-void MeshManager::device_update(Device *device,
- DeviceScene *dscene,
- Scene *scene,
- Progress &progress)
-{
- if (!need_update)
- return;
-
- VLOG(1) << "Total " << scene->meshes.size() << " meshes.";
-
- bool true_displacement_used = false;
- size_t total_tess_needed = 0;
-
- foreach (Mesh *mesh, scene->meshes) {
- foreach (Shader *shader, mesh->used_shaders) {
- if (shader->need_update_mesh)
- mesh->need_update = true;
- }
-
- if (mesh->need_update) {
- /* Update normals. */
- mesh->add_face_normals();
- mesh->add_vertex_normals();
-
- if (mesh->need_attribute(scene, ATTR_STD_POSITION_UNDISPLACED)) {
- mesh->add_undisplaced();
- }
-
- /* Test if we need tessellation. */
- if (mesh->subdivision_type != Mesh::SUBDIVISION_NONE && mesh->num_subd_verts == 0 &&
- mesh->subd_params) {
- total_tess_needed++;
- }
-
- /* Test if we need displacement. */
- if (mesh->has_true_displacement()) {
- true_displacement_used = true;
- }
-
- if (progress.get_cancel())
- return;
- }
- }
-
- /* Tessellate meshes that are using subdivision */
- if (total_tess_needed) {
- Camera *dicing_camera = scene->dicing_camera;
- dicing_camera->update(scene);
-
- size_t i = 0;
- foreach (Mesh *mesh, scene->meshes) {
- if (mesh->need_update && mesh->subdivision_type != Mesh::SUBDIVISION_NONE &&
- mesh->num_subd_verts == 0 && mesh->subd_params) {
- string msg = "Tessellating ";
- if (mesh->name == "")
- msg += string_printf("%u/%u", (uint)(i + 1), (uint)total_tess_needed);
- else
- msg += string_printf(
- "%s %u/%u", mesh->name.c_str(), (uint)(i + 1), (uint)total_tess_needed);
-
- progress.set_status("Updating Mesh", msg);
-
- mesh->subd_params->camera = dicing_camera;
- DiagSplit dsplit(*mesh->subd_params);
- mesh->tessellate(&dsplit);
-
- i++;
-
- if (progress.get_cancel())
- return;
- }
- }
- }
-
- /* Update images needed for true displacement. */
- bool old_need_object_flags_update = false;
- if (true_displacement_used) {
- VLOG(1) << "Updating images used for true displacement.";
- device_update_displacement_images(device, scene, progress);
- old_need_object_flags_update = scene->object_manager->need_flags_update;
- scene->object_manager->device_update_flags(device, dscene, scene, progress, false);
- }
-
- /* Device update. */
- device_free(device, dscene);
-
- mesh_calc_offset(scene);
- if (true_displacement_used) {
- device_update_mesh(device, dscene, scene, true, progress);
- }
- if (progress.get_cancel())
- return;
-
- device_update_attributes(device, dscene, scene, progress);
- if (progress.get_cancel())
- return;
-
- /* Update displacement. */
- bool displacement_done = false;
- size_t num_bvh = 0;
-
- foreach (Mesh *mesh, scene->meshes) {
- if (mesh->need_update) {
- if (displace(device, dscene, scene, mesh, progress)) {
- displacement_done = true;
- }
-
- if (mesh->need_build_bvh()) {
- num_bvh++;
- }
- }
-
- if (progress.get_cancel())
- return;
- }
-
- /* Device re-update after displacement. */
- if (displacement_done) {
- device_free(device, dscene);
-
- device_update_attributes(device, dscene, scene, progress);
- if (progress.get_cancel())
- return;
- }
-
- TaskPool pool;
-
- size_t i = 0;
- foreach (Mesh *mesh, scene->meshes) {
- if (mesh->need_update) {
- pool.push(function_bind(
- &Mesh::compute_bvh, mesh, device, dscene, &scene->params, &progress, i, num_bvh));
- if (mesh->need_build_bvh()) {
- i++;
- }
- }
- }
-
- TaskPool::Summary summary;
- pool.wait_work(&summary);
- VLOG(2) << "Objects BVH build pool statistics:\n" << summary.full_report();
-
- foreach (Shader *shader, scene->shaders) {
- shader->need_update_mesh = false;
- }
-
- Scene::MotionType need_motion = scene->need_motion();
- bool motion_blur = need_motion == Scene::MOTION_BLUR;
-
- /* Update objects. */
- vector<Object *> volume_objects;
- foreach (Object *object, scene->objects) {
- object->compute_bounds(motion_blur);
- }
-
- if (progress.get_cancel())
- return;
-
- device_update_bvh(device, dscene, scene, progress);
- if (progress.get_cancel())
- return;
-
- device_update_mesh(device, dscene, scene, false, progress);
- if (progress.get_cancel())
- return;
-
- need_update = false;
-
- if (true_displacement_used) {
- /* Re-tag flags for update, so they're re-evaluated
- * for meshes with correct bounding boxes.
- *
- * This wouldn't cause wrong results, just true
- * displacement might be less optimal ot calculate.
- */
- scene->object_manager->need_flags_update = old_need_object_flags_update;
- }
-}
-
-void MeshManager::device_free(Device *device, DeviceScene *dscene)
-{
- dscene->bvh_nodes.free();
- dscene->bvh_leaf_nodes.free();
- dscene->object_node.free();
- dscene->prim_tri_verts.free();
- dscene->prim_tri_index.free();
- dscene->prim_type.free();
- dscene->prim_visibility.free();
- dscene->prim_index.free();
- dscene->prim_object.free();
- dscene->prim_time.free();
- dscene->tri_shader.free();
- dscene->tri_vnormal.free();
- dscene->tri_vindex.free();
- dscene->tri_patch.free();
- dscene->tri_patch_uv.free();
- dscene->curves.free();
- dscene->curve_keys.free();
- dscene->patches.free();
- dscene->attributes_map.free();
- dscene->attributes_float.free();
- dscene->attributes_float2.free();
- dscene->attributes_float3.free();
- dscene->attributes_uchar4.free();
-
-#ifdef WITH_OSL
- OSLGlobals *og = (OSLGlobals *)device->osl_memory();
-
- if (og) {
- og->object_name_map.clear();
- og->attribute_map.clear();
- og->object_names.clear();
- }
-#else
- (void)device;
-#endif
-}
-
-void MeshManager::tag_update(Scene *scene)
-{
- need_update = true;
- scene->object_manager->need_update = true;
-}
-
-void MeshManager::collect_statistics(const Scene *scene, RenderStats *stats)
-{
- foreach (Mesh *mesh, scene->meshes) {
- stats->mesh.geometry.add_entry(
- NamedSizeEntry(string(mesh->name.c_str()), mesh->get_total_size_in_bytes()));
- }
-}
-
-bool Mesh::need_attribute(Scene *scene, AttributeStandard std)
-{
- if (std == ATTR_STD_NONE)
- return false;
-
- if (scene->need_global_attribute(std))
- return true;
-
- foreach (Shader *shader, used_shaders)
- if (shader->attributes.find(std))
- return true;
-
- return false;
-}
-
-bool Mesh::need_attribute(Scene * /*scene*/, ustring name)
-{
- if (name == ustring())
- return false;
-
- foreach (Shader *shader, used_shaders)
- if (shader->attributes.find(name))
- return true;
-
- return false;
-}
-
CCL_NAMESPACE_END
diff --git a/intern/cycles/render/mesh.h b/intern/cycles/render/mesh.h
index 2bf7b3972e5..3654732b13d 100644
--- a/intern/cycles/render/mesh.h
+++ b/intern/cycles/render/mesh.h
@@ -19,7 +19,9 @@
#include "graph/node.h"
+#include "bvh/bvh_params.h"
#include "render/attribute.h"
+#include "render/geometry.h"
#include "render/shader.h"
#include "util/util_array.h"
@@ -27,7 +29,7 @@
#include "util/util_list.h"
#include "util/util_map.h"
#include "util/util_param.h"
-#include "util/util_transform.h"
+#include "util/util_set.h"
#include "util/util_types.h"
#include "util/util_vector.h"
@@ -49,7 +51,7 @@ struct PackedPatchTable;
/* Mesh */
-class Mesh : public Node {
+class Mesh : public Geometry {
public:
NODE_DECLARE
@@ -108,84 +110,6 @@ class Mesh : public Node {
return triangles.size() / 3;
}
- /* Mesh Curve */
- struct Curve {
- int first_key;
- int num_keys;
-
- int num_segments()
- {
- return num_keys - 1;
- }
-
- void bounds_grow(const int k,
- const float3 *curve_keys,
- const float *curve_radius,
- BoundBox &bounds) const;
- void bounds_grow(float4 keys[4], BoundBox &bounds) const;
- void bounds_grow(const int k,
- const float3 *curve_keys,
- const float *curve_radius,
- const Transform &aligned_space,
- BoundBox &bounds) const;
-
- void motion_keys(const float3 *curve_keys,
- const float *curve_radius,
- const float3 *key_steps,
- size_t num_curve_keys,
- size_t num_steps,
- float time,
- size_t k0,
- size_t k1,
- float4 r_keys[2]) const;
- void cardinal_motion_keys(const float3 *curve_keys,
- const float *curve_radius,
- const float3 *key_steps,
- size_t num_curve_keys,
- size_t num_steps,
- float time,
- size_t k0,
- size_t k1,
- size_t k2,
- size_t k3,
- float4 r_keys[4]) const;
-
- void keys_for_step(const float3 *curve_keys,
- const float *curve_radius,
- const float3 *key_steps,
- size_t num_curve_keys,
- size_t num_steps,
- size_t step,
- size_t k0,
- size_t k1,
- float4 r_keys[2]) const;
- void cardinal_keys_for_step(const float3 *curve_keys,
- const float *curve_radius,
- const float3 *key_steps,
- size_t num_curve_keys,
- size_t num_steps,
- size_t step,
- size_t k0,
- size_t k1,
- size_t k2,
- size_t k3,
- float4 r_keys[4]) const;
- };
-
- Curve get_curve(size_t i) const
- {
- int first = curve_first_key[i];
- int next_first = (i + 1 < curve_first_key.size()) ? curve_first_key[i + 1] : curve_keys.size();
-
- Curve curve = {first, next_first - first};
- return curve;
- }
-
- size_t num_curves() const
- {
- return curve_first_key.size();
- }
-
/* Mesh SubdFace */
struct SubdFace {
int start_corner;
@@ -219,14 +143,6 @@ class Mesh : public Node {
SubdivisionType subdivision_type;
/* Mesh Data */
- enum GeometryFlags {
- GEOMETRY_NONE = 0,
- GEOMETRY_TRIANGLES = (1 << 0),
- GEOMETRY_CURVES = (1 << 1),
- };
- int geometry_flags; /* used to distinguish meshes with no verts
- and meshed for which geometry is not created */
-
array<int> triangles;
array<float3> verts;
array<int> shader;
@@ -236,14 +152,9 @@ class Mesh : public Node {
array<int> triangle_patch; /* must be < 0 for non subd triangles */
array<float2> vert_patch_uv;
- float volume_isovalue;
- bool has_volume; /* Set in the device_update_flags(). */
- bool has_surface_bssrdf; /* Set in the device_update_flags(). */
-
- array<float3> curve_keys;
- array<float> curve_radius;
- array<int> curve_first_key;
- array<int> curve_shader;
+ float volume_clipping;
+ float volume_step_size;
+ bool volume_object_space;
array<SubdFace> subd_faces;
array<int> subd_face_corners;
@@ -253,65 +164,53 @@ class Mesh : public Node {
SubdParams *subd_params;
- vector<Shader *> used_shaders;
- AttributeSet attributes;
- AttributeSet curve_attributes;
AttributeSet subd_attributes;
- BoundBox bounds;
- bool transform_applied;
- bool transform_negative_scaled;
- Transform transform_normal;
-
PackedPatchTable *patch_table;
- uint motion_steps;
- bool use_motion_blur;
-
- /* Update Flags */
- bool need_update;
- bool need_update_rebuild;
-
/* BVH */
- BVH *bvh;
- size_t tri_offset;
size_t vert_offset;
- size_t curve_offset;
- size_t curvekey_offset;
-
size_t patch_offset;
size_t patch_table_offset;
size_t face_offset;
size_t corner_offset;
- size_t attr_map_offset;
-
size_t num_subd_verts;
+ private:
+ unordered_map<int, int> vert_to_stitching_key_map; /* real vert index -> stitching index */
+ unordered_multimap<int, int>
+ vert_stitching_map; /* stitching index -> multiple real vert indices */
+ friend class DiagSplit;
+ friend class GeometryManager;
+
+ public:
/* Functions */
Mesh();
~Mesh();
void resize_mesh(int numverts, int numfaces);
void reserve_mesh(int numverts, int numfaces);
- void resize_curves(int numcurves, int numkeys);
- void reserve_curves(int numcurves, int numkeys);
void resize_subd_faces(int numfaces, int num_ngons, int numcorners);
void reserve_subd_faces(int numfaces, int num_ngons, int numcorners);
- void clear(bool preserve_voxel_data = false);
+ void clear(bool preserve_voxel_data);
+ void clear() override;
void add_vertex(float3 P);
void add_vertex_slow(float3 P);
void add_triangle(int v0, int v1, int v2, int shader, bool smooth);
- void add_curve_key(float3 loc, float radius);
- void add_curve(int first_key, int shader);
void add_subd_face(int *corners, int num_corners, int shader_, bool smooth_);
- void compute_bounds();
+ void copy_center_to_motion_step(const int motion_step);
+
+ void compute_bounds() override;
+ void apply_transform(const Transform &tfm, const bool apply_to_motion) override;
void add_face_normals();
void add_vertex_normals();
void add_undisplaced();
+ void get_uv_tiles(ustring map, unordered_set<int> &tiles) override;
+
void pack_shaders(Scene *scene, uint *shader);
void pack_normals(float4 *vnormal);
void pack_verts(const vector<uint> &tri_prim_index,
@@ -320,101 +219,11 @@ class Mesh : public Node {
float2 *tri_patch_uv,
size_t vert_offset,
size_t tri_offset);
- void pack_curves(Scene *scene, float4 *curve_key_co, float4 *curve_data, size_t curvekey_offset);
void pack_patches(uint *patch_data, uint vert_offset, uint face_offset, uint corner_offset);
- void compute_bvh(Device *device,
- DeviceScene *dscene,
- SceneParams *params,
- Progress *progress,
- int n,
- int total);
-
- bool need_attribute(Scene *scene, AttributeStandard std);
- bool need_attribute(Scene *scene, ustring name);
-
- void tag_update(Scene *scene, bool rebuild);
-
- bool has_motion_blur() const;
- bool has_true_displacement() const;
-
- /* Convert between normalized -1..1 motion time and index
- * in the VERTEX_MOTION attribute. */
- float motion_time(int step) const;
- int motion_step(float time) const;
-
- /* Check whether the mesh should have own BVH built separately. Briefly,
- * own BVH is needed for mesh, if:
- *
- * - It is instanced multiple times, so each instance object should share the
- * same BVH tree.
- * - Special ray intersection is needed, for example to limit subsurface rays
- * to only the mesh itself.
- */
- bool need_build_bvh() const;
-
- /* Check if the mesh should be treated as instanced. */
- bool is_instanced() const;
-
void tessellate(DiagSplit *split);
};
-/* Mesh Manager */
-
-class MeshManager {
- public:
- bool need_update;
- bool need_flags_update;
-
- MeshManager();
- ~MeshManager();
-
- bool displace(Device *device, DeviceScene *dscene, Scene *scene, Mesh *mesh, Progress &progress);
-
- /* attributes */
- void update_osl_attributes(Device *device,
- Scene *scene,
- vector<AttributeRequestSet> &mesh_attributes);
- void update_svm_attributes(Device *device,
- DeviceScene *dscene,
- Scene *scene,
- vector<AttributeRequestSet> &mesh_attributes);
-
- void device_update_preprocess(Device *device, Scene *scene, Progress &progress);
- void device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress &progress);
-
- void device_free(Device *device, DeviceScene *dscene);
-
- void tag_update(Scene *scene);
-
- void create_volume_mesh(Scene *scene, Mesh *mesh, Progress &progress);
-
- void collect_statistics(const Scene *scene, RenderStats *stats);
-
- protected:
- /* Calculate verts/triangles/curves offsets in global arrays. */
- void mesh_calc_offset(Scene *scene);
-
- void device_update_object(Device *device, DeviceScene *dscene, Scene *scene, Progress &progress);
-
- void device_update_mesh(Device *device,
- DeviceScene *dscene,
- Scene *scene,
- bool for_displacement,
- Progress &progress);
-
- void device_update_attributes(Device *device,
- DeviceScene *dscene,
- Scene *scene,
- Progress &progress);
-
- void device_update_bvh(Device *device, DeviceScene *dscene, Scene *scene, Progress &progress);
-
- void device_update_displacement_images(Device *device, Scene *scene, Progress &progress);
-
- void device_update_volume_images(Device *device, Scene *scene, Progress &progress);
-};
-
CCL_NAMESPACE_END
#endif /* __MESH_H__ */
diff --git a/intern/cycles/render/mesh_displace.cpp b/intern/cycles/render/mesh_displace.cpp
index 5ae9348d83e..467810f9273 100644
--- a/intern/cycles/render/mesh_displace.cpp
+++ b/intern/cycles/render/mesh_displace.cpp
@@ -22,7 +22,9 @@
#include "render/shader.h"
#include "util/util_foreach.h"
+#include "util/util_map.h"
#include "util/util_progress.h"
+#include "util/util_set.h"
CCL_NAMESPACE_BEGIN
@@ -41,7 +43,7 @@ static float3 compute_face_normal(const Mesh::Triangle &t, float3 *verts)
return norm / normlen;
}
-bool MeshManager::displace(
+bool GeometryManager::displace(
Device *device, DeviceScene *dscene, Scene *scene, Mesh *mesh, Progress &progress)
{
/* verify if we have a displacement shader */
@@ -56,7 +58,7 @@ bool MeshManager::displace(
size_t object_index = OBJECT_NONE;
for (size_t i = 0; i < scene->objects.size(); i++) {
- if (scene->objects[i]->mesh == mesh) {
+ if (scene->objects[i]->geometry == mesh) {
object_index = i;
break;
}
@@ -89,7 +91,7 @@ bool MeshManager::displace(
/* set up object, primitive and barycentric coordinates */
int object = object_index;
- int prim = mesh->tri_offset + i;
+ int prim = mesh->prim_offset + i;
float u, v;
switch (j) {
@@ -184,6 +186,38 @@ bool MeshManager::displace(
d_output.free();
+ /* stitch */
+ unordered_set<int> stitch_keys;
+ for (pair<int, int> i : mesh->vert_to_stitching_key_map) {
+ stitch_keys.insert(i.second); /* stitching index */
+ }
+
+ typedef unordered_multimap<int, int>::iterator map_it_t;
+
+ for (int key : stitch_keys) {
+ pair<map_it_t, map_it_t> verts = mesh->vert_stitching_map.equal_range(key);
+
+ float3 pos = make_float3(0.0f, 0.0f, 0.0f);
+ int num = 0;
+
+ for (map_it_t v = verts.first; v != verts.second; ++v) {
+ int vert = v->second;
+
+ pos += mesh->verts[vert];
+ num++;
+ }
+
+ if (num <= 1) {
+ continue;
+ }
+
+ pos *= 1.0f / num;
+
+ for (map_it_t v = verts.first; v != verts.second; ++v) {
+ mesh->verts[v->second] = pos;
+ }
+ }
+
/* for displacement method both, we only need to recompute the face
* normals, as bump mapping in the shader will already alter the
* vertex normal, so we start from the non-displaced vertex normals
@@ -238,7 +272,25 @@ bool MeshManager::displace(
for (size_t i = 0; i < num_triangles; i++) {
if (tri_has_true_disp[i]) {
for (size_t j = 0; j < 3; j++) {
- vN[mesh->get_triangle(i).v[j]] += fN[i];
+ int vert = mesh->get_triangle(i).v[j];
+ vN[vert] += fN[i];
+
+ /* add face normals to stitched vertices */
+ if (stitch_keys.size()) {
+ map_it_t key = mesh->vert_to_stitching_key_map.find(vert);
+
+ if (key != mesh->vert_to_stitching_key_map.end()) {
+ pair<map_it_t, map_it_t> verts = mesh->vert_stitching_map.equal_range(key->second);
+
+ for (map_it_t v = verts.first; v != verts.second; ++v) {
+ if (v->second == vert) {
+ continue;
+ }
+
+ vN[v->second] += fN[i];
+ }
+ }
+ }
}
}
}
@@ -289,8 +341,27 @@ bool MeshManager::displace(
for (size_t i = 0; i < num_triangles; i++) {
if (tri_has_true_disp[i]) {
for (size_t j = 0; j < 3; j++) {
+ int vert = mesh->get_triangle(i).v[j];
float3 fN = compute_face_normal(mesh->get_triangle(i), mP);
- mN[mesh->get_triangle(i).v[j]] += fN;
+ mN[vert] += fN;
+
+ /* add face normals to stitched vertices */
+ if (stitch_keys.size()) {
+ map_it_t key = mesh->vert_to_stitching_key_map.find(vert);
+
+ if (key != mesh->vert_to_stitching_key_map.end()) {
+ pair<map_it_t, map_it_t> verts = mesh->vert_stitching_map.equal_range(
+ key->second);
+
+ for (map_it_t v = verts.first; v != verts.second; ++v) {
+ if (v->second == vert) {
+ continue;
+ }
+
+ mN[v->second] += fN;
+ }
+ }
+ }
}
}
}
diff --git a/intern/cycles/render/mesh_subdivision.cpp b/intern/cycles/render/mesh_subdivision.cpp
index 46c8240fb71..3d72b2fab91 100644
--- a/intern/cycles/render/mesh_subdivision.cpp
+++ b/intern/cycles/render/mesh_subdivision.cpp
@@ -14,16 +14,17 @@
* limitations under the License.
*/
-#include "render/mesh.h"
#include "render/attribute.h"
#include "render/camera.h"
+#include "render/mesh.h"
-#include "subd/subd_split.h"
#include "subd/subd_patch.h"
#include "subd/subd_patch_table.h"
+#include "subd/subd_split.h"
-#include "util/util_foreach.h"
#include "util/util_algorithm.h"
+#include "util/util_foreach.h"
+#include "util/util_hash.h"
CCL_NAMESPACE_BEGIN
@@ -31,10 +32,10 @@ CCL_NAMESPACE_BEGIN
CCL_NAMESPACE_END
-# include <opensubdiv/far/topologyRefinerFactory.h>
-# include <opensubdiv/far/primvarRefiner.h>
-# include <opensubdiv/far/patchTableFactory.h>
# include <opensubdiv/far/patchMap.h>
+# include <opensubdiv/far/patchTableFactory.h>
+# include <opensubdiv/far/primvarRefiner.h>
+# include <opensubdiv/far/topologyRefinerFactory.h>
/* specializations of TopologyRefinerFactory for ccl::Mesh */
@@ -318,6 +319,9 @@ class OsdData {
struct OsdPatch : Patch {
OsdData *osd_data;
+ OsdPatch()
+ {
+ }
OsdPatch(OsdData *data) : osd_data(data)
{
}
@@ -358,11 +362,6 @@ struct OsdPatch : Patch {
*N = (t != 0.0f) ? *N / t : make_float3(0.0f, 0.0f, 1.0f);
}
}
-
- BoundBox bound()
- {
- return BoundBox::empty;
- }
};
#endif
@@ -395,31 +394,64 @@ void Mesh::tessellate(DiagSplit *split)
int num_faces = subd_faces.size();
Attribute *attr_vN = subd_attributes.find(ATTR_STD_VERTEX_NORMAL);
- float3 *vN = attr_vN->data_float3();
+ float3 *vN = (attr_vN) ? attr_vN->data_float3() : NULL;
+ /* count patches */
+ int num_patches = 0;
for (int f = 0; f < num_faces; f++) {
SubdFace &face = subd_faces[f];
if (face.is_quad()) {
- /* quad */
- QuadDice::SubPatch subpatch;
+ num_patches++;
+ }
+ else {
+ num_patches += face.num_corners;
+ }
+ }
- LinearQuadPatch quad_patch;
+ /* build patches from faces */
#ifdef WITH_OPENSUBDIV
- OsdPatch osd_patch(&osd_data);
+ if (subdivision_type == SUBDIVISION_CATMULL_CLARK) {
+ vector<OsdPatch> osd_patches(num_patches, &osd_data);
+ OsdPatch *patch = osd_patches.data();
- if (subdivision_type == SUBDIVISION_CATMULL_CLARK) {
- osd_patch.patch_index = face.ptex_offset;
+ for (int f = 0; f < num_faces; f++) {
+ SubdFace &face = subd_faces[f];
- subpatch.patch = &osd_patch;
+ if (face.is_quad()) {
+ patch->patch_index = face.ptex_offset;
+ patch->from_ngon = false;
+ patch->shader = face.shader;
+ patch++;
+ }
+ else {
+ for (int corner = 0; corner < face.num_corners; corner++) {
+ patch->patch_index = face.ptex_offset + corner;
+ patch->from_ngon = true;
+ patch->shader = face.shader;
+ patch++;
+ }
}
- else
+ }
+
+ /* split patches */
+ split->split_patches(osd_patches.data(), sizeof(OsdPatch));
+ }
+ else
#endif
- {
- float3 *hull = quad_patch.hull;
- float3 *normals = quad_patch.normals;
+ {
+ vector<LinearQuadPatch> linear_patches(num_patches);
+ LinearQuadPatch *patch = linear_patches.data();
+
+ for (int f = 0; f < num_faces; f++) {
+ SubdFace &face = subd_faces[f];
- quad_patch.patch_index = face.ptex_offset;
+ if (face.is_quad()) {
+ float3 *hull = patch->hull;
+ float3 *normals = patch->normals;
+
+ patch->patch_index = face.ptex_offset;
+ patch->from_ngon = false;
for (int i = 0; i < 4; i++) {
hull[i] = verts[subd_face_corners[face.start_corner + i]];
@@ -440,55 +472,11 @@ void Mesh::tessellate(DiagSplit *split)
swap(hull[2], hull[3]);
swap(normals[2], normals[3]);
- subpatch.patch = &quad_patch;
+ patch->shader = face.shader;
+ patch++;
}
-
- subpatch.patch->shader = face.shader;
-
- /* Quad faces need to be split at least once to line up with split ngons, we do this
- * here in this manner because if we do it later edge factors may end up slightly off.
- */
- subpatch.P00 = make_float2(0.0f, 0.0f);
- subpatch.P10 = make_float2(0.5f, 0.0f);
- subpatch.P01 = make_float2(0.0f, 0.5f);
- subpatch.P11 = make_float2(0.5f, 0.5f);
- split->split_quad(subpatch.patch, &subpatch);
-
- subpatch.P00 = make_float2(0.5f, 0.0f);
- subpatch.P10 = make_float2(1.0f, 0.0f);
- subpatch.P01 = make_float2(0.5f, 0.5f);
- subpatch.P11 = make_float2(1.0f, 0.5f);
- split->split_quad(subpatch.patch, &subpatch);
-
- subpatch.P00 = make_float2(0.0f, 0.5f);
- subpatch.P10 = make_float2(0.5f, 0.5f);
- subpatch.P01 = make_float2(0.0f, 1.0f);
- subpatch.P11 = make_float2(0.5f, 1.0f);
- split->split_quad(subpatch.patch, &subpatch);
-
- subpatch.P00 = make_float2(0.5f, 0.5f);
- subpatch.P10 = make_float2(1.0f, 0.5f);
- subpatch.P01 = make_float2(0.5f, 1.0f);
- subpatch.P11 = make_float2(1.0f, 1.0f);
- split->split_quad(subpatch.patch, &subpatch);
- }
- else {
- /* ngon */
-#ifdef WITH_OPENSUBDIV
- if (subdivision_type == SUBDIVISION_CATMULL_CLARK) {
- OsdPatch patch(&osd_data);
-
- patch.shader = face.shader;
-
- for (int corner = 0; corner < face.num_corners; corner++) {
- patch.patch_index = face.ptex_offset + corner;
-
- split->split_quad(&patch);
- }
- }
- else
-#endif
- {
+ else {
+ /* ngon */
float3 center_vert = make_float3(0.0f, 0.0f, 0.0f);
float3 center_normal = make_float3(0.0f, 0.0f, 0.0f);
@@ -499,13 +487,13 @@ void Mesh::tessellate(DiagSplit *split)
}
for (int corner = 0; corner < face.num_corners; corner++) {
- LinearQuadPatch patch;
- float3 *hull = patch.hull;
- float3 *normals = patch.normals;
+ float3 *hull = patch->hull;
+ float3 *normals = patch->normals;
- patch.patch_index = face.ptex_offset + corner;
+ patch->patch_index = face.ptex_offset + corner;
+ patch->from_ngon = true;
- patch.shader = face.shader;
+ patch->shader = face.shader;
hull[0] =
verts[subd_face_corners[face.start_corner + mod(corner + 0, face.num_corners)]];
@@ -537,10 +525,13 @@ void Mesh::tessellate(DiagSplit *split)
}
}
- split->split_quad(&patch);
+ patch++;
}
}
}
+
+ /* split patches */
+ split->split_patches(linear_patches.data(), sizeof(LinearQuadPatch));
}
/* interpolate center points for attributes */
diff --git a/intern/cycles/render/mesh_volume.cpp b/intern/cycles/render/mesh_volume.cpp
index a1d61fd4db7..607363d01c6 100644
--- a/intern/cycles/render/mesh_volume.cpp
+++ b/intern/cycles/render/mesh_volume.cpp
@@ -14,29 +14,30 @@
* limitations under the License.
*/
-#include "render/mesh.h"
#include "render/attribute.h"
+#include "render/mesh.h"
#include "render/scene.h"
#include "util/util_foreach.h"
+#include "util/util_hash.h"
#include "util/util_logging.h"
#include "util/util_progress.h"
#include "util/util_types.h"
CCL_NAMESPACE_BEGIN
-static size_t compute_voxel_index(const int3 &resolution, size_t x, size_t y, size_t z)
+const int64_t VOXEL_INDEX_NONE = -1;
+
+static int64_t compute_voxel_index(const int3 &resolution, int64_t x, int64_t y, int64_t z)
{
- if (x == -1 || x >= resolution.x) {
- return -1;
+ if (x < 0 || x >= resolution.x) {
+ return VOXEL_INDEX_NONE;
}
-
- if (y == -1 || y >= resolution.y) {
- return -1;
+ else if (y < 0 || y >= resolution.y) {
+ return VOXEL_INDEX_NONE;
}
-
- if (z == -1 || z >= resolution.z) {
- return -1;
+ else if (z < 0 || z >= resolution.z) {
+ return VOXEL_INDEX_NONE;
}
return x + y * resolution.x + z * resolution.x * resolution.y;
@@ -135,18 +136,18 @@ static const int CUBE_SIZE = 8;
*
* The way the algorithm works is as follows:
*
- * - the coordinates of active voxels from a dense volume (or 3d image) are
- * gathered inside an auxialliary volume.
- * - each set of coordinates of an CUBE_SIZE cube are mapped to the same
- * coordinate of the auxilliary volume.
- * - quads are created between active and non-active voxels in the auxialliary
- * volume to generate a tight mesh around the volume.
+ * - The coordinates of active voxels from a dense volume (or 3d image) are
+ * gathered inside an auxiliary volume.
+ * - Each set of coordinates of an CUBE_SIZE cube are mapped to the same
+ * coordinate of the auxiliary volume.
+ * - Quads are created between active and non-active voxels in the auxiliary
+ * volume to generate a tight mesh around the volume.
*/
class VolumeMeshBuilder {
- /* Auxilliary volume that is used to check if a node already added. */
+ /* Auxiliary volume that is used to check if a node already added. */
vector<char> grid;
- /* The resolution of the auxilliary volume, set to be equal to 1/CUBE_SIZE
+ /* The resolution of the auxiliary volume, set to be equal to 1/CUBE_SIZE
* of the original volume on each axis. */
int3 res;
@@ -184,15 +185,15 @@ VolumeMeshBuilder::VolumeMeshBuilder(VolumeParams *volume_params)
params = volume_params;
number_of_nodes = 0;
- const size_t x = divide_up(params->resolution.x, CUBE_SIZE);
- const size_t y = divide_up(params->resolution.y, CUBE_SIZE);
- const size_t z = divide_up(params->resolution.z, CUBE_SIZE);
+ const int64_t x = divide_up(params->resolution.x, CUBE_SIZE);
+ const int64_t y = divide_up(params->resolution.y, CUBE_SIZE);
+ const int64_t z = divide_up(params->resolution.z, CUBE_SIZE);
/* Adding 2*pad_size since we pad in both positive and negative directions
* along the axis. */
- const size_t px = divide_up(params->resolution.x + 2 * params->pad_size, CUBE_SIZE);
- const size_t py = divide_up(params->resolution.y + 2 * params->pad_size, CUBE_SIZE);
- const size_t pz = divide_up(params->resolution.z + 2 * params->pad_size, CUBE_SIZE);
+ const int64_t px = divide_up(params->resolution.x + 2 * params->pad_size, CUBE_SIZE);
+ const int64_t py = divide_up(params->resolution.y + 2 * params->pad_size, CUBE_SIZE);
+ const int64_t pz = divide_up(params->resolution.z + 2 * params->pad_size, CUBE_SIZE);
res = make_int3(px, py, pz);
pad_offset = make_int3(px - x, py - y, pz - z);
@@ -209,7 +210,10 @@ void VolumeMeshBuilder::add_node(int x, int y, int z)
assert((index_x >= 0) && (index_y >= 0) && (index_z >= 0));
- const size_t index = compute_voxel_index(res, index_x, index_y, index_z);
+ const int64_t index = compute_voxel_index(res, index_x, index_y, index_z);
+ if (index == VOXEL_INDEX_NONE) {
+ return;
+ }
/* We already have a node here. */
if (grid[index] == 1) {
@@ -256,7 +260,7 @@ void VolumeMeshBuilder::generate_vertices_and_quads(vector<ccl::int3> &vertices_
for (int z = 0; z < res.z; ++z) {
for (int y = 0; y < res.y; ++y) {
for (int x = 0; x < res.x; ++x) {
- size_t voxel_index = compute_voxel_index(res, x, y, z);
+ int64_t voxel_index = compute_voxel_index(res, x, y, z);
if (grid[voxel_index] == 0) {
continue;
}
@@ -285,32 +289,32 @@ void VolumeMeshBuilder::generate_vertices_and_quads(vector<ccl::int3> &vertices_
*/
voxel_index = compute_voxel_index(res, x - 1, y, z);
- if (voxel_index == -1 || grid[voxel_index] == 0) {
+ if (voxel_index == VOXEL_INDEX_NONE || grid[voxel_index] == 0) {
create_quad(corners, vertices_is, quads, res, used_verts, QUAD_X_MIN);
}
voxel_index = compute_voxel_index(res, x + 1, y, z);
- if (voxel_index == -1 || grid[voxel_index] == 0) {
+ if (voxel_index == VOXEL_INDEX_NONE || grid[voxel_index] == 0) {
create_quad(corners, vertices_is, quads, res, used_verts, QUAD_X_MAX);
}
voxel_index = compute_voxel_index(res, x, y - 1, z);
- if (voxel_index == -1 || grid[voxel_index] == 0) {
+ if (voxel_index == VOXEL_INDEX_NONE || grid[voxel_index] == 0) {
create_quad(corners, vertices_is, quads, res, used_verts, QUAD_Y_MIN);
}
voxel_index = compute_voxel_index(res, x, y + 1, z);
- if (voxel_index == -1 || grid[voxel_index] == 0) {
+ if (voxel_index == VOXEL_INDEX_NONE || grid[voxel_index] == 0) {
create_quad(corners, vertices_is, quads, res, used_verts, QUAD_Y_MAX);
}
voxel_index = compute_voxel_index(res, x, y, z - 1);
- if (voxel_index == -1 || grid[voxel_index] == 0) {
+ if (voxel_index == VOXEL_INDEX_NONE || grid[voxel_index] == 0) {
create_quad(corners, vertices_is, quads, res, used_verts, QUAD_Z_MIN);
}
voxel_index = compute_voxel_index(res, x, y, z + 1);
- if (voxel_index == -1 || grid[voxel_index] == 0) {
+ if (voxel_index == VOXEL_INDEX_NONE || grid[voxel_index] == 0) {
create_quad(corners, vertices_is, quads, res, used_verts, QUAD_Z_MAX);
}
}
@@ -362,7 +366,7 @@ struct VoxelAttributeGrid {
int channels;
};
-void MeshManager::create_volume_mesh(Scene *scene, Mesh *mesh, Progress &progress)
+void GeometryManager::create_volume_mesh(Mesh *mesh, Progress &progress)
{
string msg = string_printf("Computing Volume Mesh %s", mesh->name.c_str());
progress.set_status("Updating Mesh", msg);
@@ -373,13 +377,15 @@ void MeshManager::create_volume_mesh(Scene *scene, Mesh *mesh, Progress &progres
VolumeParams volume_params;
volume_params.resolution = make_int3(0, 0, 0);
+ Transform transform = transform_identity();
+
foreach (Attribute &attr, mesh->attributes.attributes) {
if (attr.element != ATTR_ELEMENT_VOXEL) {
continue;
}
- VoxelAttribute *voxel = attr.data_voxel();
- device_memory *image_memory = scene->image_manager->image_memory(voxel->slot);
+ ImageHandle &handle = attr.data_voxel();
+ device_texture *image_memory = handle.image_memory();
int3 resolution = make_int3(
image_memory->data_width, image_memory->data_height, image_memory->data_depth);
@@ -387,14 +393,20 @@ void MeshManager::create_volume_mesh(Scene *scene, Mesh *mesh, Progress &progres
volume_params.resolution = resolution;
}
else if (volume_params.resolution != resolution) {
- VLOG(1) << "Can't create volume mesh, all voxel grid resolutions must be equal\n";
- return;
+ /* TODO: support this as it's common for OpenVDB. */
+ VLOG(1) << "Can't create accurate volume mesh, all voxel grid resolutions must be equal\n";
+ continue;
}
VoxelAttributeGrid voxel_grid;
voxel_grid.data = static_cast<float *>(image_memory->host_pointer);
voxel_grid.channels = image_memory->data_elements;
voxel_grids.push_back(voxel_grid);
+
+ /* TODO: support multiple transforms. */
+ if (image_memory->info.use_transform_3d) {
+ transform = image_memory->info.transform_3d;
+ }
}
if (voxel_grids.empty()) {
@@ -427,37 +439,41 @@ void MeshManager::create_volume_mesh(Scene *scene, Mesh *mesh, Progress &progres
}
/* Compute start point and cell size from transform. */
- Attribute *attr = mesh->attributes.find(ATTR_STD_GENERATED_TRANSFORM);
const int3 resolution = volume_params.resolution;
float3 start_point = make_float3(0.0f, 0.0f, 0.0f);
float3 cell_size = make_float3(1.0f / resolution.x, 1.0f / resolution.y, 1.0f / resolution.z);
- if (attr) {
- const Transform *tfm = attr->data_transform();
- const Transform itfm = transform_inverse(*tfm);
- start_point = transform_point(&itfm, start_point);
- cell_size = transform_direction(&itfm, cell_size);
- }
+ /* TODO: support arbitrary transforms, not just scale + translate. */
+ const Transform itfm = transform_inverse(transform);
+ start_point = transform_point(&itfm, start_point);
+ cell_size = transform_direction(&itfm, cell_size);
+
+ /* Slightly offset vertex coordinates to avoid overlapping faces with other
+ * volumes or meshes. The proper solution would be to improve intersection in
+ * the kernel to support robust handling of multiple overlapping faces or use
+ * an all-hit intersection similar to shadows. */
+ const float3 face_overlap_avoidance = cell_size * 0.1f *
+ hash_uint_to_float(hash_string(mesh->name.c_str()));
- volume_params.start_point = start_point;
+ volume_params.start_point = start_point + face_overlap_avoidance;
volume_params.cell_size = cell_size;
volume_params.pad_size = pad_size;
/* Build bounding mesh around non-empty volume cells. */
VolumeMeshBuilder builder(&volume_params);
- const float isovalue = mesh->volume_isovalue;
+ const float clipping = mesh->volume_clipping;
for (int z = 0; z < resolution.z; ++z) {
for (int y = 0; y < resolution.y; ++y) {
for (int x = 0; x < resolution.x; ++x) {
- size_t voxel_index = compute_voxel_index(resolution, x, y, z);
+ int64_t voxel_index = compute_voxel_index(resolution, x, y, z);
for (size_t i = 0; i < voxel_grids.size(); ++i) {
const VoxelAttributeGrid &voxel_grid = voxel_grids[i];
const int channels = voxel_grid.channels;
for (int c = 0; c < channels; c++) {
- if (voxel_grid.data[voxel_index * channels + c] >= isovalue) {
+ if (voxel_grid.data[voxel_index * channels + c] >= clipping) {
builder.add_node_with_padding(x, y, z);
break;
}
diff --git a/intern/cycles/render/nodes.cpp b/intern/cycles/render/nodes.cpp
index f3572ee1585..d5f65fb54db 100644
--- a/intern/cycles/render/nodes.cpp
+++ b/intern/cycles/render/nodes.cpp
@@ -14,24 +14,30 @@
* limitations under the License.
*/
+#include "render/nodes.h"
+#include "render/colorspace.h"
+#include "render/constant_fold.h"
#include "render/film.h"
#include "render/image.h"
+#include "render/image_sky.h"
#include "render/integrator.h"
#include "render/light.h"
-#include "render/nodes.h"
+#include "render/mesh.h"
+#include "render/osl.h"
#include "render/scene.h"
#include "render/svm.h"
-#include "kernel/svm/svm_color_util.h"
-#include "kernel/svm/svm_ramp_util.h"
-#include "kernel/svm/svm_math_util.h"
-#include "render/osl.h"
-#include "render/constant_fold.h"
-#include "util/util_sky_model.h"
+#include "sky_model.h"
+
#include "util/util_foreach.h"
#include "util/util_logging.h"
#include "util/util_transform.h"
+#include "kernel/svm/svm_color_util.h"
+#include "kernel/svm/svm_mapping_util.h"
+#include "kernel/svm/svm_math_util.h"
+#include "kernel/svm/svm_ramp_util.h"
+
CCL_NAMESPACE_BEGIN
/* Texture Mapping */
@@ -148,7 +154,7 @@ bool TextureMapping::skip()
void TextureMapping::compile(SVMCompiler &compiler, int offset_in, int offset_out)
{
- compiler.add_node(NODE_MAPPING, offset_in, offset_out);
+ compiler.add_node(NODE_TEXTURE_MAPPING, offset_in, offset_out);
Transform tfm = compute_transform();
compiler.add_node(tfm.x);
@@ -162,8 +168,10 @@ void TextureMapping::compile(SVMCompiler &compiler, int offset_in, int offset_ou
}
if (type == NORMAL) {
- compiler.add_node(NODE_VECTOR_MATH, NODE_VECTOR_MATH_NORMALIZE, offset_out, offset_out);
- compiler.add_node(NODE_VECTOR_MATH, SVM_STACK_INVALID, offset_out);
+ compiler.add_node(NODE_VECTOR_MATH,
+ NODE_VECTOR_MATH_NORMALIZE,
+ compiler.encode_uchar4(offset_out, offset_out, offset_out),
+ compiler.encode_uchar4(SVM_STACK_INVALID, offset_out));
}
}
@@ -207,13 +215,15 @@ NODE_DEFINE(ImageTextureNode)
TEXTURE_MAPPING_DEFINE(ImageTextureNode);
SOCKET_STRING(filename, "Filename", ustring());
+ SOCKET_STRING(colorspace, "Colorspace", u_colorspace_auto);
- static NodeEnum color_space_enum;
- color_space_enum.insert("none", NODE_COLOR_SPACE_NONE);
- color_space_enum.insert("color", NODE_COLOR_SPACE_COLOR);
- SOCKET_ENUM(color_space, "Color Space", color_space_enum, NODE_COLOR_SPACE_COLOR);
-
- SOCKET_BOOLEAN(use_alpha, "Use Alpha", true);
+ static NodeEnum alpha_type_enum;
+ alpha_type_enum.insert("auto", IMAGE_ALPHA_AUTO);
+ alpha_type_enum.insert("unassociated", IMAGE_ALPHA_UNASSOCIATED);
+ alpha_type_enum.insert("associated", IMAGE_ALPHA_ASSOCIATED);
+ alpha_type_enum.insert("channel_packed", IMAGE_ALPHA_CHANNEL_PACKED);
+ alpha_type_enum.insert("ignore", IMAGE_ALPHA_IGNORE);
+ SOCKET_ENUM(alpha_type, "Alpha Type", alpha_type_enum, IMAGE_ALPHA_AUTO);
static NodeEnum interpolation_enum;
interpolation_enum.insert("closest", INTERPOLATION_CLOSEST);
@@ -247,32 +257,90 @@ NODE_DEFINE(ImageTextureNode)
ImageTextureNode::ImageTextureNode() : ImageSlotTextureNode(node_type)
{
- image_manager = NULL;
- slot = -1;
- is_float = -1;
- is_linear = false;
- builtin_data = NULL;
+ colorspace = u_colorspace_raw;
animated = false;
-}
-
-ImageTextureNode::~ImageTextureNode()
-{
- if (image_manager) {
- image_manager->remove_image(
- filename.string(), builtin_data, interpolation, extension, use_alpha);
- }
+ tiles.push_back(1001);
}
ShaderNode *ImageTextureNode::clone() const
{
ImageTextureNode *node = new ImageTextureNode(*this);
- node->image_manager = NULL;
- node->slot = -1;
- node->is_float = -1;
- node->is_linear = false;
+ node->handle = handle;
return node;
}
+ImageParams ImageTextureNode::image_params() const
+{
+ ImageParams params;
+ params.animated = animated;
+ params.interpolation = interpolation;
+ params.extension = extension;
+ params.alpha_type = alpha_type;
+ params.colorspace = colorspace;
+ return params;
+}
+
+void ImageTextureNode::cull_tiles(Scene *scene, ShaderGraph *graph)
+{
+ /* Box projection computes its own UVs that always lie in the
+ * 1001 tile, so there's no point in loading any others. */
+ if (projection == NODE_IMAGE_PROJ_BOX) {
+ tiles.clear();
+ tiles.push_back(1001);
+ return;
+ }
+
+ if (!scene->params.background) {
+ /* During interactive renders, all tiles are loaded.
+ * While we could support updating this when UVs change, that could lead
+ * to annoying interruptions when loading images while editing UVs. */
+ return;
+ }
+
+ /* Only check UVs for tile culling if there are multiple tiles. */
+ if (tiles.size() < 2) {
+ return;
+ }
+
+ ShaderInput *vector_in = input("Vector");
+ ustring attribute;
+ if (vector_in->link) {
+ ShaderNode *node = vector_in->link->parent;
+ if (node->type == UVMapNode::node_type) {
+ UVMapNode *uvmap = (UVMapNode *)node;
+ attribute = uvmap->attribute;
+ }
+ else if (node->type == TextureCoordinateNode::node_type) {
+ if (vector_in->link != node->output("UV")) {
+ return;
+ }
+ }
+ else {
+ return;
+ }
+ }
+
+ unordered_set<int> used_tiles;
+ /* TODO(lukas): This is quite inefficient. A fairly simple improvement would
+ * be to have a cache in each mesh that is indexed by attribute.
+ * Additionally, building a graph-to-meshes list once could help. */
+ foreach (Geometry *geom, scene->geometry) {
+ foreach (Shader *shader, geom->used_shaders) {
+ if (shader->graph == graph) {
+ geom->get_uv_tiles(attribute, used_tiles);
+ }
+ }
+ }
+
+ ccl::vector<int> new_tiles;
+ foreach (int tile, tiles) {
+ if (used_tiles.count(tile)) {
+ new_tiles.push_back(tile);
+ }
+ }
+ tiles.swap(new_tiles);
+}
+
void ImageTextureNode::attributes(Shader *shader, AttributeRequestSet *attributes)
{
#ifdef WITH_PTEX
@@ -294,58 +362,80 @@ void ImageTextureNode::compile(SVMCompiler &compiler)
ShaderOutput *color_out = output("Color");
ShaderOutput *alpha_out = output("Alpha");
- image_manager = compiler.image_manager;
- if (is_float == -1) {
- ImageMetaData metadata;
- slot = image_manager->add_image(filename.string(),
- builtin_data,
- animated,
- 0,
- interpolation,
- extension,
- use_alpha,
- metadata);
- is_float = metadata.is_float;
- is_linear = metadata.is_linear;
- }
-
- if (slot != -1) {
- int srgb = (is_linear || color_space != NODE_COLOR_SPACE_COLOR) ? 0 : 1;
- int vector_offset = tex_mapping.compile_begin(compiler, vector_in);
-
- if (projection != NODE_IMAGE_PROJ_BOX) {
- compiler.add_node(NODE_TEX_IMAGE,
- slot,
- compiler.encode_uchar4(vector_offset,
- compiler.stack_assign_if_linked(color_out),
- compiler.stack_assign_if_linked(alpha_out),
- srgb),
- projection);
+ if (handle.empty()) {
+ cull_tiles(compiler.scene, compiler.current_graph);
+ ImageManager *image_manager = compiler.scene->image_manager;
+ handle = image_manager->add_image(filename.string(), image_params(), tiles);
+ }
+
+ /* All tiles have the same metadata. */
+ const ImageMetaData metadata = handle.metadata();
+ const bool compress_as_srgb = metadata.compress_as_srgb;
+ const ustring known_colorspace = metadata.colorspace;
+
+ int vector_offset = tex_mapping.compile_begin(compiler, vector_in);
+ uint flags = 0;
+
+ if (compress_as_srgb) {
+ flags |= NODE_IMAGE_COMPRESS_AS_SRGB;
+ }
+ if (!alpha_out->links.empty()) {
+ const bool unassociate_alpha = !(ColorSpaceManager::colorspace_is_data(colorspace) ||
+ alpha_type == IMAGE_ALPHA_CHANNEL_PACKED ||
+ alpha_type == IMAGE_ALPHA_IGNORE);
+
+ if (unassociate_alpha) {
+ flags |= NODE_IMAGE_ALPHA_UNASSOCIATE;
+ }
+ }
+
+ if (projection != NODE_IMAGE_PROJ_BOX) {
+ /* If there only is one image (a very common case), we encode it as a negative value. */
+ int num_nodes;
+ if (handle.num_tiles() == 1) {
+ num_nodes = -handle.svm_slot();
}
else {
- compiler.add_node(NODE_TEX_IMAGE_BOX,
- slot,
- compiler.encode_uchar4(vector_offset,
- compiler.stack_assign_if_linked(color_out),
- compiler.stack_assign_if_linked(alpha_out),
- srgb),
- __float_as_int(projection_blend));
+ num_nodes = divide_up(handle.num_tiles(), 2);
}
- tex_mapping.compile_end(compiler, vector_in, vector_offset);
+ compiler.add_node(NODE_TEX_IMAGE,
+ num_nodes,
+ compiler.encode_uchar4(vector_offset,
+ compiler.stack_assign_if_linked(color_out),
+ compiler.stack_assign_if_linked(alpha_out),
+ flags),
+ projection);
+
+ if (num_nodes > 0) {
+ for (int i = 0; i < num_nodes; i++) {
+ int4 node;
+ node.x = tiles[2 * i];
+ node.y = handle.svm_slot(2 * i);
+ if (2 * i + 1 < tiles.size()) {
+ node.z = tiles[2 * i + 1];
+ node.w = handle.svm_slot(2 * i + 1);
+ }
+ else {
+ node.z = -1;
+ node.w = -1;
+ }
+ compiler.add_node(node.x, node.y, node.z, node.w);
+ }
+ }
}
else {
- /* image not found */
- if (!color_out->links.empty()) {
- compiler.add_node(NODE_VALUE_V, compiler.stack_assign(color_out));
- compiler.add_node(
- NODE_VALUE_V,
- make_float3(TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B));
- }
- if (!alpha_out->links.empty())
- compiler.add_node(
- NODE_VALUE_F, __float_as_int(TEX_IMAGE_MISSING_A), compiler.stack_assign(alpha_out));
+ assert(handle.num_tiles() == 1);
+ compiler.add_node(NODE_TEX_IMAGE_BOX,
+ handle.svm_slot(),
+ compiler.encode_uchar4(vector_offset,
+ compiler.stack_assign_if_linked(color_out),
+ compiler.stack_assign_if_linked(alpha_out),
+ flags),
+ __float_as_int(projection_blend));
}
+
+ tex_mapping.compile_end(compiler, vector_in, vector_offset);
}
void ImageTextureNode::compile(OSLCompiler &compiler)
@@ -354,46 +444,36 @@ void ImageTextureNode::compile(OSLCompiler &compiler)
tex_mapping.compile(compiler);
- image_manager = compiler.image_manager;
- if (is_float == -1) {
- ImageMetaData metadata;
- if (builtin_data == NULL) {
- image_manager->get_image_metadata(filename.string(), NULL, metadata);
- }
- else {
- slot = image_manager->add_image(filename.string(),
- builtin_data,
- animated,
- 0,
- interpolation,
- extension,
- use_alpha,
- metadata);
- }
- is_float = metadata.is_float;
- is_linear = metadata.is_linear;
+ if (handle.empty()) {
+ ImageManager *image_manager = compiler.scene->image_manager;
+ handle = image_manager->add_image(filename.string(), image_params());
}
- if (slot == -1) {
- compiler.parameter(this, "filename");
+ const ImageMetaData metadata = handle.metadata();
+ const bool is_float = metadata.is_float();
+ const bool compress_as_srgb = metadata.compress_as_srgb;
+ const ustring known_colorspace = metadata.colorspace;
+
+ if (handle.svm_slot() == -1) {
+ compiler.parameter_texture(
+ "filename", filename, compress_as_srgb ? u_colorspace_raw : known_colorspace);
}
else {
- /* TODO(sergey): It's not so simple to pass custom attribute
- * to the texture() function in order to make builtin images
- * support more clear. So we use special file name which is
- * "@i<slot_number>" and check whether file name matches this
- * mask in the OSLRenderServices::texture().
- */
- compiler.parameter("filename", string_printf("@i%d", slot).c_str());
+ compiler.parameter_texture("filename", handle.svm_slot());
}
- if (is_linear || color_space != NODE_COLOR_SPACE_COLOR)
- compiler.parameter("color_space", "linear");
- else
- compiler.parameter("color_space", "sRGB");
+
+ const bool unassociate_alpha = !(ColorSpaceManager::colorspace_is_data(colorspace) ||
+ alpha_type == IMAGE_ALPHA_CHANNEL_PACKED ||
+ alpha_type == IMAGE_ALPHA_IGNORE);
+ const bool is_tiled = (filename.find("<UDIM>") != string::npos);
+
compiler.parameter(this, "projection");
compiler.parameter(this, "projection_blend");
+ compiler.parameter("compress_as_srgb", compress_as_srgb);
+ compiler.parameter("ignore_alpha", alpha_type == IMAGE_ALPHA_IGNORE);
+ compiler.parameter("unassociate_alpha", !alpha_out->links.empty() && unassociate_alpha);
compiler.parameter("is_float", is_float);
- compiler.parameter("use_alpha", !alpha_out->links.empty());
+ compiler.parameter("is_tiled", is_tiled);
compiler.parameter(this, "interpolation");
compiler.parameter(this, "extension");
@@ -409,13 +489,15 @@ NODE_DEFINE(EnvironmentTextureNode)
TEXTURE_MAPPING_DEFINE(EnvironmentTextureNode);
SOCKET_STRING(filename, "Filename", ustring());
+ SOCKET_STRING(colorspace, "Colorspace", u_colorspace_auto);
- static NodeEnum color_space_enum;
- color_space_enum.insert("none", NODE_COLOR_SPACE_NONE);
- color_space_enum.insert("color", NODE_COLOR_SPACE_COLOR);
- SOCKET_ENUM(color_space, "Color Space", color_space_enum, NODE_COLOR_SPACE_COLOR);
-
- SOCKET_BOOLEAN(use_alpha, "Use Alpha", true);
+ static NodeEnum alpha_type_enum;
+ alpha_type_enum.insert("auto", IMAGE_ALPHA_AUTO);
+ alpha_type_enum.insert("unassociated", IMAGE_ALPHA_UNASSOCIATED);
+ alpha_type_enum.insert("associated", IMAGE_ALPHA_ASSOCIATED);
+ alpha_type_enum.insert("channel_packed", IMAGE_ALPHA_CHANNEL_PACKED);
+ alpha_type_enum.insert("ignore", IMAGE_ALPHA_IGNORE);
+ SOCKET_ENUM(alpha_type, "Alpha Type", alpha_type_enum, IMAGE_ALPHA_AUTO);
static NodeEnum interpolation_enum;
interpolation_enum.insert("closest", INTERPOLATION_CLOSEST);
@@ -439,32 +521,28 @@ NODE_DEFINE(EnvironmentTextureNode)
EnvironmentTextureNode::EnvironmentTextureNode() : ImageSlotTextureNode(node_type)
{
- image_manager = NULL;
- slot = -1;
- is_float = -1;
- is_linear = false;
- builtin_data = NULL;
+ colorspace = u_colorspace_raw;
animated = false;
}
-EnvironmentTextureNode::~EnvironmentTextureNode()
-{
- if (image_manager) {
- image_manager->remove_image(
- filename.string(), builtin_data, interpolation, EXTENSION_REPEAT, use_alpha);
- }
-}
-
ShaderNode *EnvironmentTextureNode::clone() const
{
EnvironmentTextureNode *node = new EnvironmentTextureNode(*this);
- node->image_manager = NULL;
- node->slot = -1;
- node->is_float = -1;
- node->is_linear = false;
+ node->handle = handle;
return node;
}
+ImageParams EnvironmentTextureNode::image_params() const
+{
+ ImageParams params;
+ params.animated = animated;
+ params.interpolation = interpolation;
+ params.extension = EXTENSION_REPEAT;
+ params.alpha_type = alpha_type;
+ params.colorspace = colorspace;
+ return params;
+}
+
void EnvironmentTextureNode::attributes(Shader *shader, AttributeRequestSet *attributes)
{
#ifdef WITH_PTEX
@@ -484,93 +562,60 @@ void EnvironmentTextureNode::compile(SVMCompiler &compiler)
ShaderOutput *color_out = output("Color");
ShaderOutput *alpha_out = output("Alpha");
- image_manager = compiler.image_manager;
- if (slot == -1) {
- ImageMetaData metadata;
- slot = image_manager->add_image(filename.string(),
- builtin_data,
- animated,
- 0,
- interpolation,
- EXTENSION_REPEAT,
- use_alpha,
- metadata);
- is_float = metadata.is_float;
- is_linear = metadata.is_linear;
- }
-
- if (slot != -1) {
- int srgb = (is_linear || color_space != NODE_COLOR_SPACE_COLOR) ? 0 : 1;
- int vector_offset = tex_mapping.compile_begin(compiler, vector_in);
-
- compiler.add_node(NODE_TEX_ENVIRONMENT,
- slot,
- compiler.encode_uchar4(vector_offset,
- compiler.stack_assign_if_linked(color_out),
- compiler.stack_assign_if_linked(alpha_out),
- srgb),
- projection);
-
- tex_mapping.compile_end(compiler, vector_in, vector_offset);
+ if (handle.empty()) {
+ ImageManager *image_manager = compiler.scene->image_manager;
+ handle = image_manager->add_image(filename.string(), image_params());
}
- else {
- /* image not found */
- if (!color_out->links.empty()) {
- compiler.add_node(NODE_VALUE_V, compiler.stack_assign(color_out));
- compiler.add_node(
- NODE_VALUE_V,
- make_float3(TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B));
- }
- if (!alpha_out->links.empty())
- compiler.add_node(
- NODE_VALUE_F, __float_as_int(TEX_IMAGE_MISSING_A), compiler.stack_assign(alpha_out));
+
+ const ImageMetaData metadata = handle.metadata();
+ const bool compress_as_srgb = metadata.compress_as_srgb;
+ const ustring known_colorspace = metadata.colorspace;
+
+ int vector_offset = tex_mapping.compile_begin(compiler, vector_in);
+ uint flags = 0;
+
+ if (compress_as_srgb) {
+ flags |= NODE_IMAGE_COMPRESS_AS_SRGB;
}
+
+ compiler.add_node(NODE_TEX_ENVIRONMENT,
+ handle.svm_slot(),
+ compiler.encode_uchar4(vector_offset,
+ compiler.stack_assign_if_linked(color_out),
+ compiler.stack_assign_if_linked(alpha_out),
+ flags),
+ projection);
+
+ tex_mapping.compile_end(compiler, vector_in, vector_offset);
}
void EnvironmentTextureNode::compile(OSLCompiler &compiler)
{
- ShaderOutput *alpha_out = output("Alpha");
+ if (handle.empty()) {
+ ImageManager *image_manager = compiler.scene->image_manager;
+ handle = image_manager->add_image(filename.string(), image_params());
+ }
tex_mapping.compile(compiler);
- /* See comments in ImageTextureNode::compile about support
- * of builtin images.
- */
- image_manager = compiler.image_manager;
- if (is_float == -1) {
- ImageMetaData metadata;
- if (builtin_data == NULL) {
- image_manager->get_image_metadata(filename.string(), NULL, metadata);
- }
- else {
- slot = image_manager->add_image(filename.string(),
- builtin_data,
- animated,
- 0,
- interpolation,
- EXTENSION_REPEAT,
- use_alpha,
- metadata);
- }
- is_float = metadata.is_float;
- is_linear = metadata.is_linear;
- }
+ const ImageMetaData metadata = handle.metadata();
+ const bool is_float = metadata.is_float();
+ const bool compress_as_srgb = metadata.compress_as_srgb;
+ const ustring known_colorspace = metadata.colorspace;
- if (slot == -1) {
- compiler.parameter(this, "filename");
+ if (handle.svm_slot() == -1) {
+ compiler.parameter_texture(
+ "filename", filename, compress_as_srgb ? u_colorspace_raw : known_colorspace);
}
else {
- compiler.parameter("filename", string_printf("@i%d", slot).c_str());
+ compiler.parameter_texture("filename", handle.svm_slot());
}
- compiler.parameter(this, "projection");
- if (is_linear || color_space != NODE_COLOR_SPACE_COLOR)
- compiler.parameter("color_space", "linear");
- else
- compiler.parameter("color_space", "sRGB");
+ compiler.parameter(this, "projection");
compiler.parameter(this, "interpolation");
+ compiler.parameter("compress_as_srgb", compress_as_srgb);
+ compiler.parameter("ignore_alpha", alpha_type == IMAGE_ALPHA_IGNORE);
compiler.parameter("is_float", is_float);
- compiler.parameter("use_alpha", !alpha_out->links.empty());
compiler.add(this, "node_environment_texture");
}
@@ -587,7 +632,7 @@ typedef struct SunSky {
/* Parameter */
float radiance_x, radiance_y, radiance_z;
- float config_x[9], config_y[9], config_z[9];
+ float config_x[9], config_y[9], config_z[9], nishita_data[10];
} SunSky;
/* Preetham model */
@@ -597,7 +642,7 @@ static float sky_perez_function(float lam[6], float theta, float gamma)
(1.0f + lam[2] * expf(lam[3] * gamma) + lam[4] * cosf(gamma) * cosf(gamma));
}
-static void sky_texture_precompute_old(SunSky *sunsky, float3 dir, float turbidity)
+static void sky_texture_precompute_preetham(SunSky *sunsky, float3 dir, float turbidity)
{
/*
* We re-use the SunSky struct of the new model, to avoid extra variables
@@ -660,10 +705,10 @@ static void sky_texture_precompute_old(SunSky *sunsky, float3 dir, float turbidi
}
/* Hosek / Wilkie */
-static void sky_texture_precompute_new(SunSky *sunsky,
- float3 dir,
- float turbidity,
- float ground_albedo)
+static void sky_texture_precompute_hosek(SunSky *sunsky,
+ float3 dir,
+ float turbidity,
+ float ground_albedo)
{
/* Calculate Sun Direction and save coordinates */
float2 spherical = sky_spherical_coordinates(dir);
@@ -682,8 +727,8 @@ static void sky_texture_precompute_new(SunSky *sunsky,
float solarElevation = M_PI_2_F - theta;
/* Initialize Sky Model */
- ArHosekSkyModelState *sky_state;
- sky_state = arhosek_xyz_skymodelstate_alloc_init(
+ SKY_ArHosekSkyModelState *sky_state;
+ sky_state = SKY_arhosek_xyz_skymodelstate_alloc_init(
(double)turbidity, (double)ground_albedo, (double)solarElevation);
/* Copy values from sky_state to SunSky */
@@ -697,7 +742,42 @@ static void sky_texture_precompute_new(SunSky *sunsky,
sunsky->radiance_z = (float)sky_state->radiances[2];
/* Free sky_state */
- arhosekskymodelstate_free(sky_state);
+ SKY_arhosekskymodelstate_free(sky_state);
+}
+
+/* Nishita improved */
+static void sky_texture_precompute_nishita(SunSky *sunsky,
+ bool sun_disc,
+ float sun_size,
+ float sun_intensity,
+ float sun_elevation,
+ float sun_rotation,
+ float altitude,
+ float air_density,
+ float dust_density)
+{
+ /* sample 2 sun pixels */
+ float pixel_bottom[3];
+ float pixel_top[3];
+ SKY_nishita_skymodel_precompute_sun(
+ sun_elevation, sun_size, altitude, air_density, dust_density, pixel_bottom, pixel_top);
+ /* limit sun rotation between 0 and 360 degrees */
+ sun_rotation = fmodf(sun_rotation, M_2PI_F);
+ if (sun_rotation < 0.0f) {
+ sun_rotation += M_2PI_F;
+ }
+ sun_rotation = M_2PI_F - sun_rotation;
+ /* send data to svm_sky */
+ sunsky->nishita_data[0] = pixel_bottom[0];
+ sunsky->nishita_data[1] = pixel_bottom[1];
+ sunsky->nishita_data[2] = pixel_bottom[2];
+ sunsky->nishita_data[3] = pixel_top[0];
+ sunsky->nishita_data[4] = pixel_top[1];
+ sunsky->nishita_data[5] = pixel_top[2];
+ sunsky->nishita_data[6] = sun_elevation;
+ sunsky->nishita_data[7] = sun_rotation;
+ sunsky->nishita_data[8] = sun_disc ? sun_size : 0.0f;
+ sunsky->nishita_data[9] = sun_intensity;
}
NODE_DEFINE(SkyTextureNode)
@@ -707,13 +787,23 @@ NODE_DEFINE(SkyTextureNode)
TEXTURE_MAPPING_DEFINE(SkyTextureNode);
static NodeEnum type_enum;
- type_enum.insert("preetham", NODE_SKY_OLD);
- type_enum.insert("hosek_wilkie", NODE_SKY_NEW);
- SOCKET_ENUM(type, "Type", type_enum, NODE_SKY_NEW);
+ type_enum.insert("preetham", NODE_SKY_PREETHAM);
+ type_enum.insert("hosek_wilkie", NODE_SKY_HOSEK);
+ type_enum.insert("nishita_improved", NODE_SKY_NISHITA);
+ SOCKET_ENUM(type, "Type", type_enum, NODE_SKY_NISHITA);
SOCKET_VECTOR(sun_direction, "Sun Direction", make_float3(0.0f, 0.0f, 1.0f));
SOCKET_FLOAT(turbidity, "Turbidity", 2.2f);
SOCKET_FLOAT(ground_albedo, "Ground Albedo", 0.3f);
+ SOCKET_BOOLEAN(sun_disc, "Sun Disc", true);
+ SOCKET_FLOAT(sun_size, "Sun Size", 0.009512f);
+ SOCKET_FLOAT(sun_intensity, "Sun Intensity", 1.0f);
+ SOCKET_FLOAT(sun_elevation, "Sun Elevation", 15.0f * M_PI_F / 180.0f);
+ SOCKET_FLOAT(sun_rotation, "Sun Rotation", 0.0f);
+ SOCKET_FLOAT(altitude, "Altitude", 1.0f);
+ SOCKET_FLOAT(air_density, "Air", 1.0f);
+ SOCKET_FLOAT(dust_density, "Dust", 1.0f);
+ SOCKET_FLOAT(ozone_density, "Ozone", 1.0f);
SOCKET_IN_POINT(
vector, "Vector", make_float3(0.0f, 0.0f, 0.0f), SocketType::LINK_TEXTURE_GENERATED);
@@ -733,10 +823,37 @@ void SkyTextureNode::compile(SVMCompiler &compiler)
ShaderOutput *color_out = output("Color");
SunSky sunsky;
- if (type == NODE_SKY_OLD)
- sky_texture_precompute_old(&sunsky, sun_direction, turbidity);
- else if (type == NODE_SKY_NEW)
- sky_texture_precompute_new(&sunsky, sun_direction, turbidity, ground_albedo);
+ if (type == NODE_SKY_PREETHAM)
+ sky_texture_precompute_preetham(&sunsky, sun_direction, turbidity);
+ else if (type == NODE_SKY_HOSEK)
+ sky_texture_precompute_hosek(&sunsky, sun_direction, turbidity, ground_albedo);
+ else if (type == NODE_SKY_NISHITA) {
+ /* Clamp altitude to reasonable values.
+ * Below 1m causes numerical issues and above 60km is space. */
+ float clamped_altitude = clamp(altitude, 1.0f, 59999.0f);
+
+ sky_texture_precompute_nishita(&sunsky,
+ sun_disc,
+ sun_size,
+ sun_intensity,
+ sun_elevation,
+ sun_rotation,
+ clamped_altitude,
+ air_density,
+ dust_density);
+ /* precomputed texture image parameters */
+ ImageManager *image_manager = compiler.scene->image_manager;
+ ImageParams impar;
+ impar.interpolation = INTERPOLATION_LINEAR;
+ impar.extension = EXTENSION_EXTEND;
+
+ /* precompute sky texture */
+ if (handle.empty()) {
+ SkyLoader *loader = new SkyLoader(
+ sun_elevation, clamped_altitude, air_density, dust_density, ozone_density);
+ handle = image_manager->add_image(loader, impar);
+ }
+ }
else
assert(false);
@@ -744,38 +861,55 @@ void SkyTextureNode::compile(SVMCompiler &compiler)
compiler.stack_assign(color_out);
compiler.add_node(NODE_TEX_SKY, vector_offset, compiler.stack_assign(color_out), type);
- compiler.add_node(__float_as_uint(sunsky.phi),
- __float_as_uint(sunsky.theta),
- __float_as_uint(sunsky.radiance_x),
- __float_as_uint(sunsky.radiance_y));
- compiler.add_node(__float_as_uint(sunsky.radiance_z),
- __float_as_uint(sunsky.config_x[0]),
- __float_as_uint(sunsky.config_x[1]),
- __float_as_uint(sunsky.config_x[2]));
- compiler.add_node(__float_as_uint(sunsky.config_x[3]),
- __float_as_uint(sunsky.config_x[4]),
- __float_as_uint(sunsky.config_x[5]),
- __float_as_uint(sunsky.config_x[6]));
- compiler.add_node(__float_as_uint(sunsky.config_x[7]),
- __float_as_uint(sunsky.config_x[8]),
- __float_as_uint(sunsky.config_y[0]),
- __float_as_uint(sunsky.config_y[1]));
- compiler.add_node(__float_as_uint(sunsky.config_y[2]),
- __float_as_uint(sunsky.config_y[3]),
- __float_as_uint(sunsky.config_y[4]),
- __float_as_uint(sunsky.config_y[5]));
- compiler.add_node(__float_as_uint(sunsky.config_y[6]),
- __float_as_uint(sunsky.config_y[7]),
- __float_as_uint(sunsky.config_y[8]),
- __float_as_uint(sunsky.config_z[0]));
- compiler.add_node(__float_as_uint(sunsky.config_z[1]),
- __float_as_uint(sunsky.config_z[2]),
- __float_as_uint(sunsky.config_z[3]),
- __float_as_uint(sunsky.config_z[4]));
- compiler.add_node(__float_as_uint(sunsky.config_z[5]),
- __float_as_uint(sunsky.config_z[6]),
- __float_as_uint(sunsky.config_z[7]),
- __float_as_uint(sunsky.config_z[8]));
+ /* nishita doesn't need this data */
+ if (type != NODE_SKY_NISHITA) {
+ compiler.add_node(__float_as_uint(sunsky.phi),
+ __float_as_uint(sunsky.theta),
+ __float_as_uint(sunsky.radiance_x),
+ __float_as_uint(sunsky.radiance_y));
+ compiler.add_node(__float_as_uint(sunsky.radiance_z),
+ __float_as_uint(sunsky.config_x[0]),
+ __float_as_uint(sunsky.config_x[1]),
+ __float_as_uint(sunsky.config_x[2]));
+ compiler.add_node(__float_as_uint(sunsky.config_x[3]),
+ __float_as_uint(sunsky.config_x[4]),
+ __float_as_uint(sunsky.config_x[5]),
+ __float_as_uint(sunsky.config_x[6]));
+ compiler.add_node(__float_as_uint(sunsky.config_x[7]),
+ __float_as_uint(sunsky.config_x[8]),
+ __float_as_uint(sunsky.config_y[0]),
+ __float_as_uint(sunsky.config_y[1]));
+ compiler.add_node(__float_as_uint(sunsky.config_y[2]),
+ __float_as_uint(sunsky.config_y[3]),
+ __float_as_uint(sunsky.config_y[4]),
+ __float_as_uint(sunsky.config_y[5]));
+ compiler.add_node(__float_as_uint(sunsky.config_y[6]),
+ __float_as_uint(sunsky.config_y[7]),
+ __float_as_uint(sunsky.config_y[8]),
+ __float_as_uint(sunsky.config_z[0]));
+ compiler.add_node(__float_as_uint(sunsky.config_z[1]),
+ __float_as_uint(sunsky.config_z[2]),
+ __float_as_uint(sunsky.config_z[3]),
+ __float_as_uint(sunsky.config_z[4]));
+ compiler.add_node(__float_as_uint(sunsky.config_z[5]),
+ __float_as_uint(sunsky.config_z[6]),
+ __float_as_uint(sunsky.config_z[7]),
+ __float_as_uint(sunsky.config_z[8]));
+ }
+ else {
+ compiler.add_node(__float_as_uint(sunsky.nishita_data[0]),
+ __float_as_uint(sunsky.nishita_data[1]),
+ __float_as_uint(sunsky.nishita_data[2]),
+ __float_as_uint(sunsky.nishita_data[3]));
+ compiler.add_node(__float_as_uint(sunsky.nishita_data[4]),
+ __float_as_uint(sunsky.nishita_data[5]),
+ __float_as_uint(sunsky.nishita_data[6]),
+ __float_as_uint(sunsky.nishita_data[7]));
+ compiler.add_node(__float_as_uint(sunsky.nishita_data[8]),
+ __float_as_uint(sunsky.nishita_data[9]),
+ handle.svm_slot(),
+ 0);
+ }
tex_mapping.compile_end(compiler, vector_in, vector_offset);
}
@@ -785,10 +919,37 @@ void SkyTextureNode::compile(OSLCompiler &compiler)
tex_mapping.compile(compiler);
SunSky sunsky;
- if (type == NODE_SKY_OLD)
- sky_texture_precompute_old(&sunsky, sun_direction, turbidity);
- else if (type == NODE_SKY_NEW)
- sky_texture_precompute_new(&sunsky, sun_direction, turbidity, ground_albedo);
+ if (type == NODE_SKY_PREETHAM)
+ sky_texture_precompute_preetham(&sunsky, sun_direction, turbidity);
+ else if (type == NODE_SKY_HOSEK)
+ sky_texture_precompute_hosek(&sunsky, sun_direction, turbidity, ground_albedo);
+ else if (type == NODE_SKY_NISHITA) {
+ /* Clamp altitude to reasonable values.
+ * Below 1m causes numerical issues and above 60km is space. */
+ float clamped_altitude = clamp(altitude, 1.0f, 59999.0f);
+
+ sky_texture_precompute_nishita(&sunsky,
+ sun_disc,
+ sun_size,
+ sun_intensity,
+ sun_elevation,
+ sun_rotation,
+ clamped_altitude,
+ air_density,
+ dust_density);
+ /* precomputed texture image parameters */
+ ImageManager *image_manager = compiler.scene->image_manager;
+ ImageParams impar;
+ impar.interpolation = INTERPOLATION_LINEAR;
+ impar.extension = EXTENSION_EXTEND;
+
+ /* precompute sky texture */
+ if (handle.empty()) {
+ SkyLoader *loader = new SkyLoader(
+ sun_elevation, clamped_altitude, air_density, dust_density, ozone_density);
+ handle = image_manager->add_image(loader, impar);
+ }
+ }
else
assert(false);
@@ -800,6 +961,11 @@ void SkyTextureNode::compile(OSLCompiler &compiler)
compiler.parameter_array("config_x", sunsky.config_x, 9);
compiler.parameter_array("config_y", sunsky.config_y, 9);
compiler.parameter_array("config_z", sunsky.config_z, 9);
+ compiler.parameter_array("nishita_data", sunsky.nishita_data, 10);
+ /* nishita texture */
+ if (type == NODE_SKY_NISHITA) {
+ compiler.parameter_texture("filename", handle.svm_slot());
+ }
compiler.add(this, "node_sky_texture");
}
@@ -867,14 +1033,23 @@ NODE_DEFINE(NoiseTextureNode)
TEXTURE_MAPPING_DEFINE(NoiseTextureNode);
+ static NodeEnum dimensions_enum;
+ dimensions_enum.insert("1D", 1);
+ dimensions_enum.insert("2D", 2);
+ dimensions_enum.insert("3D", 3);
+ dimensions_enum.insert("4D", 4);
+ SOCKET_ENUM(dimensions, "Dimensions", dimensions_enum, 3);
+
+ SOCKET_IN_POINT(
+ vector, "Vector", make_float3(0.0f, 0.0f, 0.0f), SocketType::LINK_TEXTURE_GENERATED);
+ SOCKET_IN_FLOAT(w, "W", 0.0f);
SOCKET_IN_FLOAT(scale, "Scale", 1.0f);
SOCKET_IN_FLOAT(detail, "Detail", 2.0f);
+ SOCKET_IN_FLOAT(roughness, "Roughness", 0.5f);
SOCKET_IN_FLOAT(distortion, "Distortion", 0.0f);
- SOCKET_IN_POINT(
- vector, "Vector", make_float3(0.0f, 0.0f, 0.0f), SocketType::LINK_TEXTURE_GENERATED);
- SOCKET_OUT_COLOR(color, "Color");
SOCKET_OUT_FLOAT(fac, "Fac");
+ SOCKET_OUT_COLOR(color, "Color");
return type;
}
@@ -885,31 +1060,44 @@ NoiseTextureNode::NoiseTextureNode() : TextureNode(node_type)
void NoiseTextureNode::compile(SVMCompiler &compiler)
{
- ShaderInput *distortion_in = input("Distortion");
- ShaderInput *detail_in = input("Detail");
- ShaderInput *scale_in = input("Scale");
ShaderInput *vector_in = input("Vector");
- ShaderOutput *color_out = output("Color");
+ ShaderInput *w_in = input("W");
+ ShaderInput *scale_in = input("Scale");
+ ShaderInput *detail_in = input("Detail");
+ ShaderInput *roughness_in = input("Roughness");
+ ShaderInput *distortion_in = input("Distortion");
ShaderOutput *fac_out = output("Fac");
+ ShaderOutput *color_out = output("Color");
- int vector_offset = tex_mapping.compile_begin(compiler, vector_in);
+ int vector_stack_offset = tex_mapping.compile_begin(compiler, vector_in);
+ int w_stack_offset = compiler.stack_assign_if_linked(w_in);
+ int scale_stack_offset = compiler.stack_assign_if_linked(scale_in);
+ int detail_stack_offset = compiler.stack_assign_if_linked(detail_in);
+ int roughness_stack_offset = compiler.stack_assign_if_linked(roughness_in);
+ int distortion_stack_offset = compiler.stack_assign_if_linked(distortion_in);
+ int fac_stack_offset = compiler.stack_assign_if_linked(fac_out);
+ int color_stack_offset = compiler.stack_assign_if_linked(color_out);
- compiler.add_node(NODE_TEX_NOISE,
- compiler.encode_uchar4(vector_offset,
- compiler.stack_assign_if_linked(scale_in),
- compiler.stack_assign_if_linked(detail_in),
- compiler.stack_assign_if_linked(distortion_in)),
- compiler.encode_uchar4(compiler.stack_assign_if_linked(color_out),
- compiler.stack_assign_if_linked(fac_out)));
- compiler.add_node(__float_as_int(scale), __float_as_int(detail), __float_as_int(distortion));
+ compiler.add_node(
+ NODE_TEX_NOISE,
+ dimensions,
+ compiler.encode_uchar4(
+ vector_stack_offset, w_stack_offset, scale_stack_offset, detail_stack_offset),
+ compiler.encode_uchar4(
+ roughness_stack_offset, distortion_stack_offset, fac_stack_offset, color_stack_offset));
+ compiler.add_node(
+ __float_as_int(w), __float_as_int(scale), __float_as_int(detail), __float_as_int(roughness));
- tex_mapping.compile_end(compiler, vector_in, vector_offset);
+ compiler.add_node(
+ __float_as_int(distortion), SVM_STACK_INVALID, SVM_STACK_INVALID, SVM_STACK_INVALID);
+
+ tex_mapping.compile_end(compiler, vector_in, vector_stack_offset);
}
void NoiseTextureNode::compile(OSLCompiler &compiler)
{
tex_mapping.compile(compiler);
-
+ compiler.parameter(this, "dimensions");
compiler.add(this, "node_noise_texture");
}
@@ -921,33 +1109,41 @@ NODE_DEFINE(VoronoiTextureNode)
TEXTURE_MAPPING_DEFINE(VoronoiTextureNode);
- static NodeEnum coloring_enum;
- coloring_enum.insert("intensity", NODE_VORONOI_INTENSITY);
- coloring_enum.insert("cells", NODE_VORONOI_CELLS);
- SOCKET_ENUM(coloring, "Coloring", coloring_enum, NODE_VORONOI_INTENSITY);
+ static NodeEnum dimensions_enum;
+ dimensions_enum.insert("1D", 1);
+ dimensions_enum.insert("2D", 2);
+ dimensions_enum.insert("3D", 3);
+ dimensions_enum.insert("4D", 4);
+ SOCKET_ENUM(dimensions, "Dimensions", dimensions_enum, 3);
- static NodeEnum metric;
- metric.insert("distance", NODE_VORONOI_DISTANCE);
- metric.insert("manhattan", NODE_VORONOI_MANHATTAN);
- metric.insert("chebychev", NODE_VORONOI_CHEBYCHEV);
- metric.insert("minkowski", NODE_VORONOI_MINKOWSKI);
- SOCKET_ENUM(metric, "Distance Metric", metric, NODE_VORONOI_INTENSITY);
+ static NodeEnum metric_enum;
+ metric_enum.insert("euclidean", NODE_VORONOI_EUCLIDEAN);
+ metric_enum.insert("manhattan", NODE_VORONOI_MANHATTAN);
+ metric_enum.insert("chebychev", NODE_VORONOI_CHEBYCHEV);
+ metric_enum.insert("minkowski", NODE_VORONOI_MINKOWSKI);
+ SOCKET_ENUM(metric, "Distance Metric", metric_enum, NODE_VORONOI_EUCLIDEAN);
static NodeEnum feature_enum;
- feature_enum.insert("F1", NODE_VORONOI_F1);
- feature_enum.insert("F2", NODE_VORONOI_F2);
- feature_enum.insert("F3", NODE_VORONOI_F3);
- feature_enum.insert("F4", NODE_VORONOI_F4);
- feature_enum.insert("F2F1", NODE_VORONOI_F2F1);
- SOCKET_ENUM(feature, "Feature", feature_enum, NODE_VORONOI_INTENSITY);
+ feature_enum.insert("f1", NODE_VORONOI_F1);
+ feature_enum.insert("f2", NODE_VORONOI_F2);
+ feature_enum.insert("smooth_f1", NODE_VORONOI_SMOOTH_F1);
+ feature_enum.insert("distance_to_edge", NODE_VORONOI_DISTANCE_TO_EDGE);
+ feature_enum.insert("n_sphere_radius", NODE_VORONOI_N_SPHERE_RADIUS);
+ SOCKET_ENUM(feature, "Feature", feature_enum, NODE_VORONOI_F1);
- SOCKET_IN_FLOAT(scale, "Scale", 1.0f);
- SOCKET_IN_FLOAT(exponent, "Exponent", 0.5f);
SOCKET_IN_POINT(
vector, "Vector", make_float3(0.0f, 0.0f, 0.0f), SocketType::LINK_TEXTURE_GENERATED);
+ SOCKET_IN_FLOAT(w, "W", 0.0f);
+ SOCKET_IN_FLOAT(scale, "Scale", 5.0f);
+ SOCKET_IN_FLOAT(smoothness, "Smoothness", 5.0f);
+ SOCKET_IN_FLOAT(exponent, "Exponent", 0.5f);
+ SOCKET_IN_FLOAT(randomness, "Randomness", 1.0f);
+ SOCKET_OUT_FLOAT(distance, "Distance");
SOCKET_OUT_COLOR(color, "Color");
- SOCKET_OUT_FLOAT(fac, "Fac");
+ SOCKET_OUT_POINT(position, "Position");
+ SOCKET_OUT_FLOAT(w, "W");
+ SOCKET_OUT_FLOAT(radius, "Radius");
return type;
}
@@ -958,39 +1154,57 @@ VoronoiTextureNode::VoronoiTextureNode() : TextureNode(node_type)
void VoronoiTextureNode::compile(SVMCompiler &compiler)
{
- ShaderInput *scale_in = input("Scale");
ShaderInput *vector_in = input("Vector");
+ ShaderInput *w_in = input("W");
+ ShaderInput *scale_in = input("Scale");
+ ShaderInput *smoothness_in = input("Smoothness");
ShaderInput *exponent_in = input("Exponent");
- ShaderOutput *color_out = output("Color");
- ShaderOutput *fac_out = output("Fac");
+ ShaderInput *randomness_in = input("Randomness");
- if (vector_in->link)
- compiler.stack_assign(vector_in);
- if (scale_in->link)
- compiler.stack_assign(scale_in);
- if (exponent_in->link)
- compiler.stack_assign(exponent_in);
-
- int vector_offset = tex_mapping.compile_begin(compiler, vector_in);
+ ShaderOutput *distance_out = output("Distance");
+ ShaderOutput *color_out = output("Color");
+ ShaderOutput *position_out = output("Position");
+ ShaderOutput *w_out = output("W");
+ ShaderOutput *radius_out = output("Radius");
+
+ int vector_stack_offset = tex_mapping.compile_begin(compiler, vector_in);
+ int w_in_stack_offset = compiler.stack_assign_if_linked(w_in);
+ int scale_stack_offset = compiler.stack_assign_if_linked(scale_in);
+ int smoothness_stack_offset = compiler.stack_assign_if_linked(smoothness_in);
+ int exponent_stack_offset = compiler.stack_assign_if_linked(exponent_in);
+ int randomness_stack_offset = compiler.stack_assign_if_linked(randomness_in);
+ int distance_stack_offset = compiler.stack_assign_if_linked(distance_out);
+ int color_stack_offset = compiler.stack_assign_if_linked(color_out);
+ int position_stack_offset = compiler.stack_assign_if_linked(position_out);
+ int w_out_stack_offset = compiler.stack_assign_if_linked(w_out);
+ int radius_stack_offset = compiler.stack_assign_if_linked(radius_out);
+
+ compiler.add_node(NODE_TEX_VORONOI, dimensions, feature, metric);
+ compiler.add_node(
+ compiler.encode_uchar4(
+ vector_stack_offset, w_in_stack_offset, scale_stack_offset, smoothness_stack_offset),
+ compiler.encode_uchar4(exponent_stack_offset,
+ randomness_stack_offset,
+ distance_stack_offset,
+ color_stack_offset),
+ compiler.encode_uchar4(position_stack_offset, w_out_stack_offset, radius_stack_offset),
+ __float_as_int(w));
- compiler.add_node(NODE_TEX_VORONOI,
- compiler.encode_uchar4(vector_offset, coloring, metric, feature),
- compiler.encode_uchar4(compiler.stack_assign_if_linked(scale_in),
- compiler.stack_assign_if_linked(exponent_in),
- compiler.stack_assign(fac_out),
- compiler.stack_assign(color_out)));
- compiler.add_node(__float_as_int(scale), __float_as_int(exponent));
+ compiler.add_node(__float_as_int(scale),
+ __float_as_int(smoothness),
+ __float_as_int(exponent),
+ __float_as_int(randomness));
- tex_mapping.compile_end(compiler, vector_in, vector_offset);
+ tex_mapping.compile_end(compiler, vector_in, vector_stack_offset);
}
void VoronoiTextureNode::compile(OSLCompiler &compiler)
{
tex_mapping.compile(compiler);
- compiler.parameter(this, "coloring");
- compiler.parameter(this, "metric");
+ compiler.parameter(this, "dimensions");
compiler.parameter(this, "feature");
+ compiler.parameter(this, "metric");
compiler.add(this, "node_voronoi_texture");
}
@@ -1043,17 +1257,17 @@ void IESLightNode::get_slot()
if (slot == -1) {
if (ies.empty()) {
- slot = light_manager->add_ies_from_file(filename);
+ slot = light_manager->add_ies_from_file(filename.string());
}
else {
- slot = light_manager->add_ies(ies);
+ slot = light_manager->add_ies(ies.string());
}
}
}
void IESLightNode::compile(SVMCompiler &compiler)
{
- light_manager = compiler.light_manager;
+ light_manager = compiler.scene->light_manager;
get_slot();
ShaderInput *strength_in = input("Strength");
@@ -1075,15 +1289,65 @@ void IESLightNode::compile(SVMCompiler &compiler)
void IESLightNode::compile(OSLCompiler &compiler)
{
- light_manager = compiler.light_manager;
+ light_manager = compiler.scene->light_manager;
get_slot();
tex_mapping.compile(compiler);
- compiler.parameter("slot", slot);
+ compiler.parameter_texture_ies("filename", slot);
compiler.add(this, "node_ies_light");
}
+/* White Noise Texture */
+
+NODE_DEFINE(WhiteNoiseTextureNode)
+{
+ NodeType *type = NodeType::add("white_noise_texture", create, NodeType::SHADER);
+
+ static NodeEnum dimensions_enum;
+ dimensions_enum.insert("1D", 1);
+ dimensions_enum.insert("2D", 2);
+ dimensions_enum.insert("3D", 3);
+ dimensions_enum.insert("4D", 4);
+ SOCKET_ENUM(dimensions, "Dimensions", dimensions_enum, 3);
+
+ SOCKET_IN_POINT(vector, "Vector", make_float3(0.0f, 0.0f, 0.0f));
+ SOCKET_IN_FLOAT(w, "W", 0.0f);
+
+ SOCKET_OUT_FLOAT(value, "Value");
+ SOCKET_OUT_COLOR(color, "Color");
+
+ return type;
+}
+
+WhiteNoiseTextureNode::WhiteNoiseTextureNode() : ShaderNode(node_type)
+{
+}
+
+void WhiteNoiseTextureNode::compile(SVMCompiler &compiler)
+{
+ ShaderInput *vector_in = input("Vector");
+ ShaderInput *w_in = input("W");
+ ShaderOutput *value_out = output("Value");
+ ShaderOutput *color_out = output("Color");
+
+ int vector_stack_offset = compiler.stack_assign(vector_in);
+ int w_stack_offset = compiler.stack_assign(w_in);
+ int value_stack_offset = compiler.stack_assign(value_out);
+ int color_stack_offset = compiler.stack_assign(color_out);
+
+ compiler.add_node(NODE_TEX_WHITE_NOISE,
+ dimensions,
+ compiler.encode_uchar4(vector_stack_offset, w_stack_offset),
+ compiler.encode_uchar4(value_stack_offset, color_stack_offset));
+}
+
+void WhiteNoiseTextureNode::compile(OSLCompiler &compiler)
+{
+ compiler.parameter(this, "dimensions");
+ compiler.add(this, "node_white_noise_texture");
+}
+
/* Musgrave Texture */
NODE_DEFINE(MusgraveTextureNode)
@@ -1092,6 +1356,13 @@ NODE_DEFINE(MusgraveTextureNode)
TEXTURE_MAPPING_DEFINE(MusgraveTextureNode);
+ static NodeEnum dimensions_enum;
+ dimensions_enum.insert("1D", 1);
+ dimensions_enum.insert("2D", 2);
+ dimensions_enum.insert("3D", 3);
+ dimensions_enum.insert("4D", 4);
+ SOCKET_ENUM(dimensions, "Dimensions", dimensions_enum, 3);
+
static NodeEnum type_enum;
type_enum.insert("multifractal", NODE_MUSGRAVE_MULTIFRACTAL);
type_enum.insert("fBM", NODE_MUSGRAVE_FBM);
@@ -1100,16 +1371,16 @@ NODE_DEFINE(MusgraveTextureNode)
type_enum.insert("hetero_terrain", NODE_MUSGRAVE_HETERO_TERRAIN);
SOCKET_ENUM(type, "Type", type_enum, NODE_MUSGRAVE_FBM);
+ SOCKET_IN_POINT(
+ vector, "Vector", make_float3(0.0f, 0.0f, 0.0f), SocketType::LINK_TEXTURE_GENERATED);
+ SOCKET_IN_FLOAT(w, "W", 0.0f);
SOCKET_IN_FLOAT(scale, "Scale", 1.0f);
SOCKET_IN_FLOAT(detail, "Detail", 2.0f);
SOCKET_IN_FLOAT(dimension, "Dimension", 2.0f);
- SOCKET_IN_FLOAT(lacunarity, "Lacunarity", 1.0f);
+ SOCKET_IN_FLOAT(lacunarity, "Lacunarity", 2.0f);
SOCKET_IN_FLOAT(offset, "Offset", 0.0f);
SOCKET_IN_FLOAT(gain, "Gain", 1.0f);
- SOCKET_IN_POINT(
- vector, "Vector", make_float3(0.0f, 0.0f, 0.0f), SocketType::LINK_TEXTURE_GENERATED);
- SOCKET_OUT_COLOR(color, "Color");
SOCKET_OUT_FLOAT(fac, "Fac");
return type;
@@ -1122,35 +1393,38 @@ MusgraveTextureNode::MusgraveTextureNode() : TextureNode(node_type)
void MusgraveTextureNode::compile(SVMCompiler &compiler)
{
ShaderInput *vector_in = input("Vector");
+ ShaderInput *w_in = input("W");
ShaderInput *scale_in = input("Scale");
+ ShaderInput *detail_in = input("Detail");
ShaderInput *dimension_in = input("Dimension");
ShaderInput *lacunarity_in = input("Lacunarity");
- ShaderInput *detail_in = input("Detail");
ShaderInput *offset_in = input("Offset");
ShaderInput *gain_in = input("Gain");
ShaderOutput *fac_out = output("Fac");
- ShaderOutput *color_out = output("Color");
- int vector_offset = tex_mapping.compile_begin(compiler, vector_in);
+ int vector_stack_offset = tex_mapping.compile_begin(compiler, vector_in);
+ int w_stack_offset = compiler.stack_assign_if_linked(w_in);
+ int scale_stack_offset = compiler.stack_assign_if_linked(scale_in);
+ int detail_stack_offset = compiler.stack_assign_if_linked(detail_in);
+ int dimension_stack_offset = compiler.stack_assign_if_linked(dimension_in);
+ int lacunarity_stack_offset = compiler.stack_assign_if_linked(lacunarity_in);
+ int offset_stack_offset = compiler.stack_assign_if_linked(offset_in);
+ int gain_stack_offset = compiler.stack_assign_if_linked(gain_in);
+ int fac_stack_offset = compiler.stack_assign(fac_out);
- compiler.add_node(NODE_TEX_MUSGRAVE,
- compiler.encode_uchar4(type,
- vector_offset,
- compiler.stack_assign_if_linked(color_out),
- compiler.stack_assign_if_linked(fac_out)),
- compiler.encode_uchar4(compiler.stack_assign_if_linked(dimension_in),
- compiler.stack_assign_if_linked(lacunarity_in),
- compiler.stack_assign_if_linked(detail_in),
- compiler.stack_assign_if_linked(offset_in)),
- compiler.encode_uchar4(compiler.stack_assign_if_linked(gain_in),
- compiler.stack_assign_if_linked(scale_in)));
- compiler.add_node(__float_as_int(dimension),
- __float_as_int(lacunarity),
- __float_as_int(detail),
- __float_as_int(offset));
- compiler.add_node(__float_as_int(gain), __float_as_int(scale));
+ compiler.add_node(
+ NODE_TEX_MUSGRAVE,
+ compiler.encode_uchar4(type, dimensions, vector_stack_offset, w_stack_offset),
+ compiler.encode_uchar4(scale_stack_offset,
+ detail_stack_offset,
+ dimension_stack_offset,
+ lacunarity_stack_offset),
+ compiler.encode_uchar4(offset_stack_offset, gain_stack_offset, fac_stack_offset));
+ compiler.add_node(
+ __float_as_int(w), __float_as_int(scale), __float_as_int(detail), __float_as_int(dimension));
+ compiler.add_node(__float_as_int(lacunarity), __float_as_int(offset), __float_as_int(gain));
- tex_mapping.compile_end(compiler, vector_in, vector_offset);
+ tex_mapping.compile_end(compiler, vector_in, vector_stack_offset);
}
void MusgraveTextureNode::compile(OSLCompiler &compiler)
@@ -1158,6 +1432,7 @@ void MusgraveTextureNode::compile(OSLCompiler &compiler)
tex_mapping.compile(compiler);
compiler.parameter(this, "type");
+ compiler.parameter(this, "dimensions");
compiler.add(this, "node_musgrave_texture");
}
@@ -1174,18 +1449,36 @@ NODE_DEFINE(WaveTextureNode)
type_enum.insert("rings", NODE_WAVE_RINGS);
SOCKET_ENUM(type, "Type", type_enum, NODE_WAVE_BANDS);
+ static NodeEnum bands_direction_enum;
+ bands_direction_enum.insert("x", NODE_WAVE_BANDS_DIRECTION_X);
+ bands_direction_enum.insert("y", NODE_WAVE_BANDS_DIRECTION_Y);
+ bands_direction_enum.insert("z", NODE_WAVE_BANDS_DIRECTION_Z);
+ bands_direction_enum.insert("diagonal", NODE_WAVE_BANDS_DIRECTION_DIAGONAL);
+ SOCKET_ENUM(
+ bands_direction, "Bands Direction", bands_direction_enum, NODE_WAVE_BANDS_DIRECTION_X);
+
+ static NodeEnum rings_direction_enum;
+ rings_direction_enum.insert("x", NODE_WAVE_RINGS_DIRECTION_X);
+ rings_direction_enum.insert("y", NODE_WAVE_RINGS_DIRECTION_Y);
+ rings_direction_enum.insert("z", NODE_WAVE_RINGS_DIRECTION_Z);
+ rings_direction_enum.insert("spherical", NODE_WAVE_RINGS_DIRECTION_SPHERICAL);
+ SOCKET_ENUM(
+ rings_direction, "Rings Direction", rings_direction_enum, NODE_WAVE_BANDS_DIRECTION_X);
+
static NodeEnum profile_enum;
profile_enum.insert("sine", NODE_WAVE_PROFILE_SIN);
profile_enum.insert("saw", NODE_WAVE_PROFILE_SAW);
+ profile_enum.insert("tri", NODE_WAVE_PROFILE_TRI);
SOCKET_ENUM(profile, "Profile", profile_enum, NODE_WAVE_PROFILE_SIN);
+ SOCKET_IN_POINT(
+ vector, "Vector", make_float3(0.0f, 0.0f, 0.0f), SocketType::LINK_TEXTURE_GENERATED);
SOCKET_IN_FLOAT(scale, "Scale", 1.0f);
SOCKET_IN_FLOAT(distortion, "Distortion", 0.0f);
SOCKET_IN_FLOAT(detail, "Detail", 2.0f);
SOCKET_IN_FLOAT(detail_scale, "Detail Scale", 0.0f);
- SOCKET_IN_POINT(
- vector, "Vector", make_float3(0.0f, 0.0f, 0.0f), SocketType::LINK_TEXTURE_GENERATED);
-
+ SOCKET_IN_FLOAT(detail_roughness, "Detail Roughness", 0.5f);
+ SOCKET_IN_FLOAT(phase, "Phase Offset", 0.0f);
SOCKET_OUT_COLOR(color, "Color");
SOCKET_OUT_FLOAT(fac, "Fac");
@@ -1198,31 +1491,38 @@ WaveTextureNode::WaveTextureNode() : TextureNode(node_type)
void WaveTextureNode::compile(SVMCompiler &compiler)
{
+ ShaderInput *vector_in = input("Vector");
ShaderInput *scale_in = input("Scale");
ShaderInput *distortion_in = input("Distortion");
- ShaderInput *dscale_in = input("Detail Scale");
ShaderInput *detail_in = input("Detail");
- ShaderInput *vector_in = input("Vector");
- ShaderOutput *fac_out = output("Fac");
+ ShaderInput *dscale_in = input("Detail Scale");
+ ShaderInput *droughness_in = input("Detail Roughness");
+ ShaderInput *phase_in = input("Phase Offset");
ShaderOutput *color_out = output("Color");
+ ShaderOutput *fac_out = output("Fac");
int vector_offset = tex_mapping.compile_begin(compiler, vector_in);
compiler.add_node(NODE_TEX_WAVE,
- compiler.encode_uchar4(type,
- compiler.stack_assign_if_linked(color_out),
- compiler.stack_assign_if_linked(fac_out),
- compiler.stack_assign_if_linked(dscale_in)),
+ compiler.encode_uchar4(type, bands_direction, rings_direction, profile),
compiler.encode_uchar4(vector_offset,
compiler.stack_assign_if_linked(scale_in),
- compiler.stack_assign_if_linked(detail_in),
compiler.stack_assign_if_linked(distortion_in)),
- profile);
+ compiler.encode_uchar4(compiler.stack_assign_if_linked(detail_in),
+ compiler.stack_assign_if_linked(dscale_in),
+ compiler.stack_assign_if_linked(droughness_in),
+ compiler.stack_assign_if_linked(phase_in)));
- compiler.add_node(__float_as_int(scale),
- __float_as_int(detail),
+ compiler.add_node(compiler.encode_uchar4(compiler.stack_assign_if_linked(color_out),
+ compiler.stack_assign_if_linked(fac_out)),
+ __float_as_int(scale),
__float_as_int(distortion),
- __float_as_int(detail_scale));
+ __float_as_int(detail));
+
+ compiler.add_node(__float_as_int(detail_scale),
+ __float_as_int(detail_roughness),
+ __float_as_int(phase),
+ SVM_STACK_INVALID);
tex_mapping.compile_end(compiler, vector_in, vector_offset);
}
@@ -1232,6 +1532,8 @@ void WaveTextureNode::compile(OSLCompiler &compiler)
tex_mapping.compile(compiler);
compiler.parameter(this, "type");
+ compiler.parameter(this, "bands_direction");
+ compiler.parameter(this, "rings_direction");
compiler.parameter(this, "profile");
compiler.add(this, "node_wave_texture");
@@ -1474,24 +1776,19 @@ NODE_DEFINE(PointDensityTextureNode)
PointDensityTextureNode::PointDensityTextureNode() : ShaderNode(node_type)
{
- image_manager = NULL;
- slot = -1;
- builtin_data = NULL;
}
PointDensityTextureNode::~PointDensityTextureNode()
{
- if (image_manager) {
- image_manager->remove_image(
- filename.string(), builtin_data, interpolation, EXTENSION_CLIP, true);
- }
}
ShaderNode *PointDensityTextureNode::clone() const
{
+ /* Increase image user count for new node. We need to ensure to not call
+ * add_image again, to work around access of freed data on the Blender
+ * side. A better solution should be found to avoid this. */
PointDensityTextureNode *node = new PointDensityTextureNode(*this);
- node->image_manager = NULL;
- node->slot = -1;
+ node->handle = handle; /* TODO: not needed? */
return node;
}
@@ -1503,13 +1800,11 @@ void PointDensityTextureNode::attributes(Shader *shader, AttributeRequestSet *at
ShaderNode::attributes(shader, attributes);
}
-void PointDensityTextureNode::add_image()
+ImageParams PointDensityTextureNode::image_params() const
{
- if (slot == -1) {
- ImageMetaData metadata;
- slot = image_manager->add_image(
- filename.string(), builtin_data, false, 0, interpolation, EXTENSION_CLIP, true, metadata);
- }
+ ImageParams params;
+ params.interpolation = interpolation;
+ return params;
}
void PointDensityTextureNode::compile(SVMCompiler &compiler)
@@ -1521,11 +1816,13 @@ void PointDensityTextureNode::compile(SVMCompiler &compiler)
const bool use_density = !density_out->links.empty();
const bool use_color = !color_out->links.empty();
- image_manager = compiler.image_manager;
-
if (use_density || use_color) {
- add_image();
+ if (handle.empty()) {
+ ImageManager *image_manager = compiler.scene->image_manager;
+ handle = image_manager->add_image(filename.string(), image_params());
+ }
+ const int slot = handle.svm_slot();
if (slot != -1) {
compiler.stack_assign(vector_in);
compiler.add_node(NODE_TEX_VOXEL,
@@ -1562,14 +1859,13 @@ void PointDensityTextureNode::compile(OSLCompiler &compiler)
const bool use_density = !density_out->links.empty();
const bool use_color = !color_out->links.empty();
- image_manager = compiler.image_manager;
-
if (use_density || use_color) {
- add_image();
-
- if (slot != -1) {
- compiler.parameter("filename", string_printf("@i%d", slot).c_str());
+ if (handle.empty()) {
+ ImageManager *image_manager = compiler.scene->image_manager;
+ handle = image_manager->add_image(filename.string(), image_params());
}
+
+ compiler.parameter_texture("filename", handle.svm_slot());
if (space == NODE_TEX_VOXEL_SPACE_WORLD) {
compiler.parameter("mapping", tfm);
compiler.parameter("use_mapping", 1);
@@ -1625,9 +1921,18 @@ NODE_DEFINE(MappingNode)
{
NodeType *type = NodeType::add("mapping", create, NodeType::SHADER);
- TEXTURE_MAPPING_DEFINE(MappingNode);
+ static NodeEnum type_enum;
+ type_enum.insert("point", NODE_MAPPING_TYPE_POINT);
+ type_enum.insert("texture", NODE_MAPPING_TYPE_TEXTURE);
+ type_enum.insert("vector", NODE_MAPPING_TYPE_VECTOR);
+ type_enum.insert("normal", NODE_MAPPING_TYPE_NORMAL);
+ SOCKET_ENUM(type, "Type", type_enum, NODE_MAPPING_TYPE_POINT);
SOCKET_IN_POINT(vector, "Vector", make_float3(0.0f, 0.0f, 0.0f));
+ SOCKET_IN_POINT(location, "Location", make_float3(0.0f, 0.0f, 0.0f));
+ SOCKET_IN_POINT(rotation, "Rotation", make_float3(0.0f, 0.0f, 0.0f));
+ SOCKET_IN_POINT(scale, "Scale", make_float3(1.0f, 1.0f, 1.0f));
+
SOCKET_OUT_POINT(vector, "Vector");
return type;
@@ -1637,22 +1942,42 @@ MappingNode::MappingNode() : ShaderNode(node_type)
{
}
+void MappingNode::constant_fold(const ConstantFolder &folder)
+{
+ if (folder.all_inputs_constant()) {
+ float3 result = svm_mapping((NodeMappingType)type, vector, location, rotation, scale);
+ folder.make_constant(result);
+ }
+ else {
+ folder.fold_mapping((NodeMappingType)type);
+ }
+}
+
void MappingNode::compile(SVMCompiler &compiler)
{
ShaderInput *vector_in = input("Vector");
+ ShaderInput *location_in = input("Location");
+ ShaderInput *rotation_in = input("Rotation");
+ ShaderInput *scale_in = input("Scale");
ShaderOutput *vector_out = output("Vector");
- tex_mapping.compile(
- compiler, compiler.stack_assign(vector_in), compiler.stack_assign(vector_out));
+ int vector_stack_offset = compiler.stack_assign(vector_in);
+ int location_stack_offset = compiler.stack_assign(location_in);
+ int rotation_stack_offset = compiler.stack_assign(rotation_in);
+ int scale_stack_offset = compiler.stack_assign(scale_in);
+ int result_stack_offset = compiler.stack_assign(vector_out);
+
+ compiler.add_node(
+ NODE_MAPPING,
+ type,
+ compiler.encode_uchar4(
+ vector_stack_offset, location_stack_offset, rotation_stack_offset, scale_stack_offset),
+ result_stack_offset);
}
void MappingNode::compile(OSLCompiler &compiler)
{
- compiler.parameter("Matrix", tex_mapping.compute_transform());
- compiler.parameter_point("mapping_min", tex_mapping.min);
- compiler.parameter_point("mapping_max", tex_mapping.max);
- compiler.parameter("use_minmax", tex_mapping.use_minmax);
-
+ compiler.parameter(this, "type");
compiler.add(this, "node_mapping");
}
@@ -1964,12 +2289,11 @@ NODE_DEFINE(AnisotropicBsdfNode)
SOCKET_IN_FLOAT(surface_mix_weight, "SurfaceMixWeight", 0.0f, SocketType::SVM_INTERNAL);
static NodeEnum distribution_enum;
- distribution_enum.insert("beckmann", CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID);
- distribution_enum.insert("GGX", CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID);
- distribution_enum.insert("Multiscatter GGX", CLOSURE_BSDF_MICROFACET_MULTI_GGX_ANISO_ID);
- distribution_enum.insert("ashikhmin_shirley", CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID);
- SOCKET_ENUM(
- distribution, "Distribution", distribution_enum, CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID);
+ distribution_enum.insert("beckmann", CLOSURE_BSDF_MICROFACET_BECKMANN_ID);
+ distribution_enum.insert("GGX", CLOSURE_BSDF_MICROFACET_GGX_ID);
+ distribution_enum.insert("Multiscatter GGX", CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID);
+ distribution_enum.insert("ashikhmin_shirley", CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID);
+ SOCKET_ENUM(distribution, "Distribution", distribution_enum, CLOSURE_BSDF_MICROFACET_GGX_ID);
SOCKET_IN_VECTOR(tangent, "Tangent", make_float3(0.0f, 0.0f, 0.0f), SocketType::LINK_TANGENT);
@@ -1984,7 +2308,7 @@ NODE_DEFINE(AnisotropicBsdfNode)
AnisotropicBsdfNode::AnisotropicBsdfNode() : BsdfNode(node_type)
{
- closure = CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID;
+ closure = CLOSURE_BSDF_MICROFACET_GGX_ID;
}
void AnisotropicBsdfNode::attributes(Shader *shader, AttributeRequestSet *attributes)
@@ -2003,7 +2327,7 @@ void AnisotropicBsdfNode::compile(SVMCompiler &compiler)
{
closure = distribution;
- if (closure == CLOSURE_BSDF_MICROFACET_MULTI_GGX_ANISO_ID)
+ if (closure == CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID)
BsdfNode::compile(
compiler, input("Roughness"), input("Anisotropy"), input("Rotation"), input("Color"));
else
@@ -2097,7 +2421,7 @@ void GlossyBsdfNode::compile(SVMCompiler &compiler)
if (closure == CLOSURE_BSDF_REFLECTION_ID)
BsdfNode::compile(compiler, NULL, NULL);
else if (closure == CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID)
- BsdfNode::compile(compiler, input("Roughness"), NULL, input("Color"));
+ BsdfNode::compile(compiler, input("Roughness"), NULL, NULL, input("Color"));
else
BsdfNode::compile(compiler, input("Roughness"), NULL);
}
@@ -2430,6 +2754,8 @@ NODE_DEFINE(PrincipledBsdfNode)
SOCKET_IN_FLOAT(transmission, "Transmission", 0.0f);
SOCKET_IN_FLOAT(transmission_roughness, "Transmission Roughness", 0.0f);
SOCKET_IN_FLOAT(anisotropic_rotation, "Anisotropic Rotation", 0.0f);
+ SOCKET_IN_COLOR(emission, "Emission", make_float3(0.0f, 0.0f, 0.0f));
+ SOCKET_IN_FLOAT(alpha, "Alpha", 1.0f);
SOCKET_IN_NORMAL(normal, "Normal", make_float3(0.0f, 0.0f, 0.0f), SocketType::LINK_NORMAL);
SOCKET_IN_NORMAL(clearcoat_normal,
"Clearcoat Normal",
@@ -2450,6 +2776,48 @@ PrincipledBsdfNode::PrincipledBsdfNode() : BsdfBaseNode(node_type)
distribution_orig = NBUILTIN_CLOSURES;
}
+void PrincipledBsdfNode::expand(ShaderGraph *graph)
+{
+ ShaderOutput *principled_out = output("BSDF");
+
+ ShaderInput *emission_in = input("Emission");
+ if (emission_in->link || emission != make_float3(0.0f, 0.0f, 0.0f)) {
+ /* Create add closure and emission. */
+ AddClosureNode *add = new AddClosureNode();
+ EmissionNode *emission_node = new EmissionNode();
+ ShaderOutput *new_out = add->output("Closure");
+
+ graph->add(add);
+ graph->add(emission_node);
+
+ emission_node->strength = 1.0f;
+ graph->relink(emission_in, emission_node->input("Color"));
+ graph->relink(principled_out, new_out);
+ graph->connect(emission_node->output("Emission"), add->input("Closure1"));
+ graph->connect(principled_out, add->input("Closure2"));
+
+ principled_out = new_out;
+ }
+
+ ShaderInput *alpha_in = input("Alpha");
+ if (alpha_in->link || alpha != 1.0f) {
+ /* Create mix and transparent BSDF for alpha transparency. */
+ MixClosureNode *mix = new MixClosureNode();
+ TransparentBsdfNode *transparent = new TransparentBsdfNode();
+
+ graph->add(mix);
+ graph->add(transparent);
+
+ graph->relink(alpha_in, mix->input("Fac"));
+ graph->relink(principled_out, mix->output("Closure"));
+ graph->connect(transparent->output("BSDF"), mix->input("Closure1"));
+ graph->connect(principled_out, mix->input("Closure2"));
+ }
+
+ remove_input(emission_in);
+ remove_input(alpha_in);
+}
+
bool PrincipledBsdfNode::has_surface_bssrdf()
{
ShaderInput *subsurface_in = input("Subsurface");
@@ -2630,7 +2998,7 @@ NODE_DEFINE(TransparentBsdfNode)
{
NodeType *type = NodeType::add("transparent_bsdf", create, NodeType::SHADER);
- SOCKET_IN_COLOR(color, "Color", make_float3(0.8f, 0.8f, 0.8f));
+ SOCKET_IN_COLOR(color, "Color", make_float3(1.0f, 1.0f, 1.0f));
SOCKET_IN_FLOAT(surface_mix_weight, "SurfaceMixWeight", 0.0f, SocketType::SVM_INTERNAL);
SOCKET_OUT_CLOSURE(BSDF, "BSDF");
@@ -3010,7 +3378,7 @@ NODE_DEFINE(PrincipledVolumeNode)
SOCKET_IN_COLOR(emission_color, "Emission Color", make_float3(1.0f, 1.0f, 1.0f));
SOCKET_IN_FLOAT(blackbody_intensity, "Blackbody Intensity", 0.0f);
SOCKET_IN_COLOR(blackbody_tint, "Blackbody Tint", make_float3(1.0f, 1.0f, 1.0f));
- SOCKET_IN_FLOAT(temperature, "Temperature", 1500.0f);
+ SOCKET_IN_FLOAT(temperature, "Temperature", 1000.0f);
SOCKET_IN_FLOAT(volume_mix_weight, "VolumeMixWeight", 0.0f, SocketType::SVM_INTERNAL);
SOCKET_OUT_CLOSURE(volume, "Volume");
@@ -3021,6 +3389,8 @@ NODE_DEFINE(PrincipledVolumeNode)
PrincipledVolumeNode::PrincipledVolumeNode() : VolumeNode(node_type)
{
closure = CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID;
+ density_attribute = ustring("density");
+ temperature_attribute = ustring("temperature");
}
void PrincipledVolumeNode::attributes(Shader *shader, AttributeRequestSet *attributes)
@@ -3289,6 +3659,7 @@ NODE_DEFINE(GeometryNode)
SOCKET_OUT_POINT(parametric, "Parametric");
SOCKET_OUT_FLOAT(backfacing, "Backfacing");
SOCKET_OUT_FLOAT(pointiness, "Pointiness");
+ SOCKET_OUT_FLOAT(random_per_island, "Random Per Island");
return type;
}
@@ -3307,6 +3678,9 @@ void GeometryNode::attributes(Shader *shader, AttributeRequestSet *attributes)
if (!output("Pointiness")->links.empty()) {
attributes->add(ATTR_STD_POINTINESS);
}
+ if (!output("Random Per Island")->links.empty()) {
+ attributes->add(ATTR_STD_RANDOM_PER_ISLAND);
+ }
}
ShaderNode::attributes(shader, attributes);
@@ -3372,6 +3746,17 @@ void GeometryNode::compile(SVMCompiler &compiler)
compiler.add_node(NODE_VALUE_F, __float_as_int(0.0f), compiler.stack_assign(out));
}
}
+
+ out = output("Random Per Island");
+ if (!out->links.empty()) {
+ if (compiler.output_type() != SHADER_TYPE_VOLUME) {
+ compiler.add_node(
+ attr_node, ATTR_STD_RANDOM_PER_ISLAND, compiler.stack_assign(out), NODE_ATTR_FLOAT);
+ }
+ else {
+ compiler.add_node(NODE_VALUE_F, __float_as_int(0.0f), compiler.stack_assign(out));
+ }
+ }
}
void GeometryNode::compile(OSLCompiler &compiler)
@@ -3811,6 +4196,7 @@ NODE_DEFINE(ObjectInfoNode)
NodeType *type = NodeType::add("object_info", create, NodeType::SHADER);
SOCKET_OUT_VECTOR(location, "Location");
+ SOCKET_OUT_COLOR(color, "Color");
SOCKET_OUT_FLOAT(object_index, "Object Index");
SOCKET_OUT_FLOAT(material_index, "Material Index");
SOCKET_OUT_FLOAT(random, "Random");
@@ -3829,6 +4215,11 @@ void ObjectInfoNode::compile(SVMCompiler &compiler)
compiler.add_node(NODE_OBJECT_INFO, NODE_INFO_OB_LOCATION, compiler.stack_assign(out));
}
+ out = output("Color");
+ if (!out->links.empty()) {
+ compiler.add_node(NODE_OBJECT_INFO, NODE_INFO_OB_COLOR, compiler.stack_assign(out));
+ }
+
out = output("Object Index");
if (!out->links.empty()) {
compiler.add_node(NODE_OBJECT_INFO, NODE_INFO_OB_INDEX, compiler.stack_assign(out));
@@ -3888,7 +4279,7 @@ void ParticleInfoNode::attributes(Shader *shader, AttributeRequestSet *attribute
if (!output("Location")->links.empty())
attributes->add(ATTR_STD_PARTICLE);
#if 0 /* not yet supported */
- if(!output("Rotation")->links.empty())
+ if (!output("Rotation")->links.empty())
attributes->add(ATTR_STD_PARTICLE);
#endif
if (!output("Size")->links.empty())
@@ -3933,7 +4324,7 @@ void ParticleInfoNode::compile(SVMCompiler &compiler)
/* quaternion data is not yet supported by Cycles */
#if 0
out = output("Rotation");
- if(!out->links.empty()) {
+ if (!out->links.empty()) {
compiler.add_node(NODE_PARTICLE_INFO, NODE_INFO_PAR_ROTATION, compiler.stack_assign(out));
}
#endif
@@ -4039,6 +4430,170 @@ void HairInfoNode::compile(OSLCompiler &compiler)
compiler.add(this, "node_hair_info");
}
+/* Volume Info */
+
+NODE_DEFINE(VolumeInfoNode)
+{
+ NodeType *type = NodeType::add("volume_info", create, NodeType::SHADER);
+
+ SOCKET_OUT_COLOR(color, "Color");
+ SOCKET_OUT_FLOAT(density, "Density");
+ SOCKET_OUT_FLOAT(flame, "Flame");
+ SOCKET_OUT_FLOAT(temperature, "Temperature");
+
+ return type;
+}
+
+VolumeInfoNode::VolumeInfoNode() : ShaderNode(node_type)
+{
+}
+
+/* The requested attributes are not updated after node expansion.
+ * So we explicitly request the required attributes.
+ */
+void VolumeInfoNode::attributes(Shader *shader, AttributeRequestSet *attributes)
+{
+ if (shader->has_volume) {
+ if (!output("Color")->links.empty()) {
+ attributes->add(ATTR_STD_VOLUME_COLOR);
+ }
+ if (!output("Density")->links.empty()) {
+ attributes->add(ATTR_STD_VOLUME_DENSITY);
+ }
+ if (!output("Flame")->links.empty()) {
+ attributes->add(ATTR_STD_VOLUME_FLAME);
+ }
+ if (!output("Temperature")->links.empty()) {
+ attributes->add(ATTR_STD_VOLUME_TEMPERATURE);
+ }
+ attributes->add(ATTR_STD_GENERATED_TRANSFORM);
+ }
+ ShaderNode::attributes(shader, attributes);
+}
+
+void VolumeInfoNode::expand(ShaderGraph *graph)
+{
+ ShaderOutput *color_out = output("Color");
+ if (!color_out->links.empty()) {
+ AttributeNode *attr = new AttributeNode();
+ attr->attribute = "color";
+ graph->add(attr);
+ graph->relink(color_out, attr->output("Color"));
+ }
+
+ ShaderOutput *density_out = output("Density");
+ if (!density_out->links.empty()) {
+ AttributeNode *attr = new AttributeNode();
+ attr->attribute = "density";
+ graph->add(attr);
+ graph->relink(density_out, attr->output("Fac"));
+ }
+
+ ShaderOutput *flame_out = output("Flame");
+ if (!flame_out->links.empty()) {
+ AttributeNode *attr = new AttributeNode();
+ attr->attribute = "flame";
+ graph->add(attr);
+ graph->relink(flame_out, attr->output("Fac"));
+ }
+
+ ShaderOutput *temperature_out = output("Temperature");
+ if (!temperature_out->links.empty()) {
+ AttributeNode *attr = new AttributeNode();
+ attr->attribute = "temperature";
+ graph->add(attr);
+ graph->relink(temperature_out, attr->output("Fac"));
+ }
+}
+
+void VolumeInfoNode::compile(SVMCompiler &)
+{
+}
+
+void VolumeInfoNode::compile(OSLCompiler &)
+{
+}
+
+NODE_DEFINE(VertexColorNode)
+{
+ NodeType *type = NodeType::add("vertex_color", create, NodeType::SHADER);
+
+ SOCKET_STRING(layer_name, "Layer Name", ustring());
+ SOCKET_OUT_COLOR(color, "Color");
+ SOCKET_OUT_FLOAT(alpha, "Alpha");
+
+ return type;
+}
+
+VertexColorNode::VertexColorNode() : ShaderNode(node_type)
+{
+}
+
+void VertexColorNode::attributes(Shader *shader, AttributeRequestSet *attributes)
+{
+ if (!(output("Color")->links.empty() && output("Alpha")->links.empty())) {
+ if (layer_name != "")
+ attributes->add_standard(layer_name);
+ else
+ attributes->add(ATTR_STD_VERTEX_COLOR);
+ }
+ ShaderNode::attributes(shader, attributes);
+}
+
+void VertexColorNode::compile(SVMCompiler &compiler)
+{
+ ShaderOutput *color_out = output("Color");
+ ShaderOutput *alpha_out = output("Alpha");
+ int layer_id = 0;
+
+ if (layer_name != "") {
+ layer_id = compiler.attribute(layer_name);
+ }
+ else {
+ layer_id = compiler.attribute(ATTR_STD_VERTEX_COLOR);
+ }
+
+ ShaderNodeType node;
+
+ if (bump == SHADER_BUMP_DX)
+ node = NODE_VERTEX_COLOR_BUMP_DX;
+ else if (bump == SHADER_BUMP_DY)
+ node = NODE_VERTEX_COLOR_BUMP_DY;
+ else {
+ node = NODE_VERTEX_COLOR;
+ }
+
+ compiler.add_node(
+ node, layer_id, compiler.stack_assign(color_out), compiler.stack_assign(alpha_out));
+}
+
+void VertexColorNode::compile(OSLCompiler &compiler)
+{
+ if (bump == SHADER_BUMP_DX) {
+ compiler.parameter("bump_offset", "dx");
+ }
+ else if (bump == SHADER_BUMP_DY) {
+ compiler.parameter("bump_offset", "dy");
+ }
+ else {
+ compiler.parameter("bump_offset", "center");
+ }
+
+ if (layer_name.empty()) {
+ compiler.parameter("layer_name", ustring("geom:vertex_color"));
+ }
+ else {
+ if (Attribute::name_standard(layer_name.c_str()) != ATTR_STD_NONE) {
+ compiler.parameter("name", (string("geom:") + layer_name.c_str()).c_str());
+ }
+ else {
+ compiler.parameter("layer_name", layer_name.c_str());
+ }
+ }
+
+ compiler.add(this, "node_vertex_color");
+}
+
/* Value */
NODE_DEFINE(ValueNode)
@@ -5179,6 +5734,212 @@ void OutputNode::compile(OSLCompiler &compiler)
compiler.add(this, "node_output_displacement");
}
+/* Map Range Node */
+
+NODE_DEFINE(MapRangeNode)
+{
+ NodeType *type = NodeType::add("map_range", create, NodeType::SHADER);
+
+ static NodeEnum type_enum;
+ type_enum.insert("linear", NODE_MAP_RANGE_LINEAR);
+ type_enum.insert("stepped", NODE_MAP_RANGE_STEPPED);
+ type_enum.insert("smoothstep", NODE_MAP_RANGE_SMOOTHSTEP);
+ type_enum.insert("smootherstep", NODE_MAP_RANGE_SMOOTHERSTEP);
+ SOCKET_ENUM(type, "Type", type_enum, NODE_MAP_RANGE_LINEAR);
+
+ SOCKET_IN_FLOAT(value, "Value", 1.0f);
+ SOCKET_IN_FLOAT(from_min, "From Min", 0.0f);
+ SOCKET_IN_FLOAT(from_max, "From Max", 1.0f);
+ SOCKET_IN_FLOAT(to_min, "To Min", 0.0f);
+ SOCKET_IN_FLOAT(to_max, "To Max", 1.0f);
+ SOCKET_IN_FLOAT(steps, "Steps", 4.0f);
+
+ SOCKET_OUT_FLOAT(result, "Result");
+
+ return type;
+}
+
+MapRangeNode::MapRangeNode() : ShaderNode(node_type)
+{
+}
+
+void MapRangeNode::expand(ShaderGraph *graph)
+{
+ if (clamp) {
+ ShaderOutput *result_out = output("Result");
+ if (!result_out->links.empty()) {
+ ClampNode *clamp_node = new ClampNode();
+ clamp_node->type = NODE_CLAMP_RANGE;
+ graph->add(clamp_node);
+ graph->relink(result_out, clamp_node->output("Result"));
+ graph->connect(result_out, clamp_node->input("Value"));
+ if (input("To Min")->link) {
+ graph->connect(input("To Min")->link, clamp_node->input("Min"));
+ }
+ else {
+ clamp_node->min = to_min;
+ }
+ if (input("To Max")->link) {
+ graph->connect(input("To Max")->link, clamp_node->input("Max"));
+ }
+ else {
+ clamp_node->max = to_max;
+ }
+ }
+ }
+}
+
+void MapRangeNode::compile(SVMCompiler &compiler)
+{
+ ShaderInput *value_in = input("Value");
+ ShaderInput *from_min_in = input("From Min");
+ ShaderInput *from_max_in = input("From Max");
+ ShaderInput *to_min_in = input("To Min");
+ ShaderInput *to_max_in = input("To Max");
+ ShaderInput *steps_in = input("Steps");
+ ShaderOutput *result_out = output("Result");
+
+ int value_stack_offset = compiler.stack_assign(value_in);
+ int from_min_stack_offset = compiler.stack_assign_if_linked(from_min_in);
+ int from_max_stack_offset = compiler.stack_assign_if_linked(from_max_in);
+ int to_min_stack_offset = compiler.stack_assign_if_linked(to_min_in);
+ int to_max_stack_offset = compiler.stack_assign_if_linked(to_max_in);
+ int steps_stack_offset = compiler.stack_assign(steps_in);
+ int result_stack_offset = compiler.stack_assign(result_out);
+
+ compiler.add_node(
+ NODE_MAP_RANGE,
+ value_stack_offset,
+ compiler.encode_uchar4(
+ from_min_stack_offset, from_max_stack_offset, to_min_stack_offset, to_max_stack_offset),
+ compiler.encode_uchar4(type, steps_stack_offset, result_stack_offset));
+
+ compiler.add_node(__float_as_int(from_min),
+ __float_as_int(from_max),
+ __float_as_int(to_min),
+ __float_as_int(to_max));
+ compiler.add_node(__float_as_int(steps));
+}
+
+void MapRangeNode::compile(OSLCompiler &compiler)
+{
+ compiler.parameter(this, "type");
+ compiler.add(this, "node_map_range");
+}
+
+/* Clamp Node */
+
+NODE_DEFINE(ClampNode)
+{
+ NodeType *type = NodeType::add("clamp", create, NodeType::SHADER);
+
+ static NodeEnum type_enum;
+ type_enum.insert("minmax", NODE_CLAMP_MINMAX);
+ type_enum.insert("range", NODE_CLAMP_RANGE);
+ SOCKET_ENUM(type, "Type", type_enum, NODE_CLAMP_MINMAX);
+
+ SOCKET_IN_FLOAT(value, "Value", 1.0f);
+ SOCKET_IN_FLOAT(min, "Min", 0.0f);
+ SOCKET_IN_FLOAT(max, "Max", 1.0f);
+
+ SOCKET_OUT_FLOAT(result, "Result");
+
+ return type;
+}
+
+ClampNode::ClampNode() : ShaderNode(node_type)
+{
+}
+
+void ClampNode::constant_fold(const ConstantFolder &folder)
+{
+ if (folder.all_inputs_constant()) {
+ if (type == NODE_CLAMP_RANGE && (min > max)) {
+ folder.make_constant(clamp(value, max, min));
+ }
+ else {
+ folder.make_constant(clamp(value, min, max));
+ }
+ }
+}
+
+void ClampNode::compile(SVMCompiler &compiler)
+{
+ ShaderInput *value_in = input("Value");
+ ShaderInput *min_in = input("Min");
+ ShaderInput *max_in = input("Max");
+ ShaderOutput *result_out = output("Result");
+
+ int value_stack_offset = compiler.stack_assign(value_in);
+ int min_stack_offset = compiler.stack_assign(min_in);
+ int max_stack_offset = compiler.stack_assign(max_in);
+ int result_stack_offset = compiler.stack_assign(result_out);
+
+ compiler.add_node(NODE_CLAMP,
+ value_stack_offset,
+ compiler.encode_uchar4(min_stack_offset, max_stack_offset, type),
+ result_stack_offset);
+ compiler.add_node(__float_as_int(min), __float_as_int(max));
+}
+
+void ClampNode::compile(OSLCompiler &compiler)
+{
+ compiler.parameter(this, "type");
+ compiler.add(this, "node_clamp");
+}
+
+/* AOV Output */
+
+NODE_DEFINE(OutputAOVNode)
+{
+ NodeType *type = NodeType::add("aov_output", create, NodeType::SHADER);
+
+ SOCKET_IN_COLOR(color, "Color", make_float3(0.0f, 0.0f, 0.0f));
+ SOCKET_IN_FLOAT(value, "Value", 0.0f);
+
+ SOCKET_STRING(name, "AOV Name", ustring(""));
+
+ return type;
+}
+
+OutputAOVNode::OutputAOVNode() : ShaderNode(node_type)
+{
+ special_type = SHADER_SPECIAL_TYPE_OUTPUT_AOV;
+ slot = -1;
+}
+
+void OutputAOVNode::simplify_settings(Scene *scene)
+{
+ slot = scene->film->get_aov_offset(name.string(), is_color);
+ if (slot == -1) {
+ slot = scene->film->get_aov_offset(name.string(), is_color);
+ }
+
+ if (slot == -1 || is_color) {
+ input("Value")->disconnect();
+ }
+ if (slot == -1 || !is_color) {
+ input("Color")->disconnect();
+ }
+}
+
+void OutputAOVNode::compile(SVMCompiler &compiler)
+{
+ assert(slot >= 0);
+
+ if (is_color) {
+ compiler.add_node(NODE_AOV_COLOR, compiler.stack_assign(input("Color")), slot);
+ }
+ else {
+ compiler.add_node(NODE_AOV_VALUE, compiler.stack_assign(input("Value")), slot);
+ }
+}
+
+void OutputAOVNode::compile(OSLCompiler & /*compiler*/)
+{
+ /* TODO */
+}
+
/* Math */
NODE_DEFINE(MathNode)
@@ -5190,9 +5951,13 @@ NODE_DEFINE(MathNode)
type_enum.insert("subtract", NODE_MATH_SUBTRACT);
type_enum.insert("multiply", NODE_MATH_MULTIPLY);
type_enum.insert("divide", NODE_MATH_DIVIDE);
+ type_enum.insert("multiply_add", NODE_MATH_MULTIPLY_ADD);
type_enum.insert("sine", NODE_MATH_SINE);
type_enum.insert("cosine", NODE_MATH_COSINE);
type_enum.insert("tangent", NODE_MATH_TANGENT);
+ type_enum.insert("sinh", NODE_MATH_SINH);
+ type_enum.insert("cosh", NODE_MATH_COSH);
+ type_enum.insert("tanh", NODE_MATH_TANH);
type_enum.insert("arcsine", NODE_MATH_ARCSINE);
type_enum.insert("arccosine", NODE_MATH_ARCCOSINE);
type_enum.insert("arctangent", NODE_MATH_ARCTANGENT);
@@ -5208,14 +5973,27 @@ NODE_DEFINE(MathNode)
type_enum.insert("arctan2", NODE_MATH_ARCTAN2);
type_enum.insert("floor", NODE_MATH_FLOOR);
type_enum.insert("ceil", NODE_MATH_CEIL);
- type_enum.insert("fract", NODE_MATH_FRACT);
+ type_enum.insert("fraction", NODE_MATH_FRACTION);
+ type_enum.insert("trunc", NODE_MATH_TRUNC);
+ type_enum.insert("snap", NODE_MATH_SNAP);
+ type_enum.insert("wrap", NODE_MATH_WRAP);
+ type_enum.insert("pingpong", NODE_MATH_PINGPONG);
type_enum.insert("sqrt", NODE_MATH_SQRT);
+ type_enum.insert("inversesqrt", NODE_MATH_INV_SQRT);
+ type_enum.insert("sign", NODE_MATH_SIGN);
+ type_enum.insert("exponent", NODE_MATH_EXPONENT);
+ type_enum.insert("radians", NODE_MATH_RADIANS);
+ type_enum.insert("degrees", NODE_MATH_DEGREES);
+ type_enum.insert("smoothmin", NODE_MATH_SMOOTH_MIN);
+ type_enum.insert("smoothmax", NODE_MATH_SMOOTH_MAX);
+ type_enum.insert("compare", NODE_MATH_COMPARE);
SOCKET_ENUM(type, "Type", type_enum, NODE_MATH_ADD);
SOCKET_BOOLEAN(use_clamp, "Use Clamp", false);
- SOCKET_IN_FLOAT(value1, "Value1", 0.0f);
- SOCKET_IN_FLOAT(value2, "Value2", 0.0f);
+ SOCKET_IN_FLOAT(value1, "Value1", 0.5f);
+ SOCKET_IN_FLOAT(value2, "Value2", 0.5f);
+ SOCKET_IN_FLOAT(value3, "Value3", 0.0f);
SOCKET_OUT_FLOAT(value, "Value");
@@ -5226,13 +6004,29 @@ MathNode::MathNode() : ShaderNode(node_type)
{
}
+void MathNode::expand(ShaderGraph *graph)
+{
+ if (use_clamp) {
+ ShaderOutput *result_out = output("Value");
+ if (!result_out->links.empty()) {
+ ClampNode *clamp_node = new ClampNode();
+ clamp_node->type = NODE_CLAMP_MINMAX;
+ clamp_node->min = 0.0f;
+ clamp_node->max = 1.0f;
+ graph->add(clamp_node);
+ graph->relink(result_out, clamp_node->output("Result"));
+ graph->connect(result_out, clamp_node->input("Value"));
+ }
+ }
+}
+
void MathNode::constant_fold(const ConstantFolder &folder)
{
if (folder.all_inputs_constant()) {
- folder.make_constant_clamp(svm_math(type, value1, value2), use_clamp);
+ folder.make_constant(svm_math(type, value1, value2, value3));
}
else {
- folder.fold_math(type, use_clamp);
+ folder.fold_math(type);
}
}
@@ -5240,22 +6034,24 @@ void MathNode::compile(SVMCompiler &compiler)
{
ShaderInput *value1_in = input("Value1");
ShaderInput *value2_in = input("Value2");
+ ShaderInput *value3_in = input("Value3");
ShaderOutput *value_out = output("Value");
- compiler.add_node(
- NODE_MATH, type, compiler.stack_assign(value1_in), compiler.stack_assign(value2_in));
- compiler.add_node(NODE_MATH, compiler.stack_assign(value_out));
+ int value1_stack_offset = compiler.stack_assign(value1_in);
+ int value2_stack_offset = compiler.stack_assign(value2_in);
+ int value3_stack_offset = compiler.stack_assign(value3_in);
+ int value_stack_offset = compiler.stack_assign(value_out);
- if (use_clamp) {
- compiler.add_node(NODE_MATH, NODE_MATH_CLAMP, compiler.stack_assign(value_out));
- compiler.add_node(NODE_MATH, compiler.stack_assign(value_out));
- }
+ compiler.add_node(
+ NODE_MATH,
+ type,
+ compiler.encode_uchar4(value1_stack_offset, value2_stack_offset, value3_stack_offset),
+ value_stack_offset);
}
void MathNode::compile(OSLCompiler &compiler)
{
compiler.parameter(this, "type");
- compiler.parameter(this, "use_clamp");
compiler.add(this, "node_math");
}
@@ -5268,14 +6064,38 @@ NODE_DEFINE(VectorMathNode)
static NodeEnum type_enum;
type_enum.insert("add", NODE_VECTOR_MATH_ADD);
type_enum.insert("subtract", NODE_VECTOR_MATH_SUBTRACT);
- type_enum.insert("average", NODE_VECTOR_MATH_AVERAGE);
- type_enum.insert("dot_product", NODE_VECTOR_MATH_DOT_PRODUCT);
+ type_enum.insert("multiply", NODE_VECTOR_MATH_MULTIPLY);
+ type_enum.insert("divide", NODE_VECTOR_MATH_DIVIDE);
+
type_enum.insert("cross_product", NODE_VECTOR_MATH_CROSS_PRODUCT);
+ type_enum.insert("project", NODE_VECTOR_MATH_PROJECT);
+ type_enum.insert("reflect", NODE_VECTOR_MATH_REFLECT);
+ type_enum.insert("dot_product", NODE_VECTOR_MATH_DOT_PRODUCT);
+
+ type_enum.insert("distance", NODE_VECTOR_MATH_DISTANCE);
+ type_enum.insert("length", NODE_VECTOR_MATH_LENGTH);
+ type_enum.insert("scale", NODE_VECTOR_MATH_SCALE);
type_enum.insert("normalize", NODE_VECTOR_MATH_NORMALIZE);
+
+ type_enum.insert("snap", NODE_VECTOR_MATH_SNAP);
+ type_enum.insert("floor", NODE_VECTOR_MATH_FLOOR);
+ type_enum.insert("ceil", NODE_VECTOR_MATH_CEIL);
+ type_enum.insert("modulo", NODE_VECTOR_MATH_MODULO);
+ type_enum.insert("wrap", NODE_VECTOR_MATH_WRAP);
+ type_enum.insert("fraction", NODE_VECTOR_MATH_FRACTION);
+ type_enum.insert("absolute", NODE_VECTOR_MATH_ABSOLUTE);
+ type_enum.insert("minimum", NODE_VECTOR_MATH_MINIMUM);
+ type_enum.insert("maximum", NODE_VECTOR_MATH_MAXIMUM);
+
+ type_enum.insert("sine", NODE_VECTOR_MATH_SINE);
+ type_enum.insert("cosine", NODE_VECTOR_MATH_COSINE);
+ type_enum.insert("tangent", NODE_VECTOR_MATH_TANGENT);
SOCKET_ENUM(type, "Type", type_enum, NODE_VECTOR_MATH_ADD);
SOCKET_IN_VECTOR(vector1, "Vector1", make_float3(0.0f, 0.0f, 0.0f));
SOCKET_IN_VECTOR(vector2, "Vector2", make_float3(0.0f, 0.0f, 0.0f));
+ SOCKET_IN_VECTOR(vector3, "Vector3", make_float3(0.0f, 0.0f, 0.0f));
+ SOCKET_IN_FLOAT(scale, "Scale", 1.0f);
SOCKET_OUT_FLOAT(value, "Value");
SOCKET_OUT_VECTOR(vector, "Vector");
@@ -5289,12 +6109,11 @@ VectorMathNode::VectorMathNode() : ShaderNode(node_type)
void VectorMathNode::constant_fold(const ConstantFolder &folder)
{
- float value;
- float3 vector;
+ float value = 0.0f;
+ float3 vector = make_float3(0.0f, 0.0f, 0.0f);
if (folder.all_inputs_constant()) {
- svm_vector_math(&value, &vector, type, vector1, vector2);
-
+ svm_vector_math(&value, &vector, type, vector1, vector2, vector3, scale);
if (folder.output == output("Value")) {
folder.make_constant(value);
}
@@ -5311,15 +6130,34 @@ void VectorMathNode::compile(SVMCompiler &compiler)
{
ShaderInput *vector1_in = input("Vector1");
ShaderInput *vector2_in = input("Vector2");
+ ShaderInput *scale_in = input("Scale");
ShaderOutput *value_out = output("Value");
ShaderOutput *vector_out = output("Vector");
- compiler.add_node(NODE_VECTOR_MATH,
- type,
- compiler.stack_assign(vector1_in),
- compiler.stack_assign(vector2_in));
- compiler.add_node(
- NODE_VECTOR_MATH, compiler.stack_assign(value_out), compiler.stack_assign(vector_out));
+ int vector1_stack_offset = compiler.stack_assign(vector1_in);
+ int vector2_stack_offset = compiler.stack_assign(vector2_in);
+ int scale_stack_offset = compiler.stack_assign(scale_in);
+ int value_stack_offset = compiler.stack_assign_if_linked(value_out);
+ int vector_stack_offset = compiler.stack_assign_if_linked(vector_out);
+
+ /* 3 Vector Operators */
+ if (type == NODE_VECTOR_MATH_WRAP) {
+ ShaderInput *vector3_in = input("Vector3");
+ int vector3_stack_offset = compiler.stack_assign(vector3_in);
+ compiler.add_node(
+ NODE_VECTOR_MATH,
+ type,
+ compiler.encode_uchar4(vector1_stack_offset, vector2_stack_offset, scale_stack_offset),
+ compiler.encode_uchar4(value_stack_offset, vector_stack_offset));
+ compiler.add_node(vector3_stack_offset);
+ }
+ else {
+ compiler.add_node(
+ NODE_VECTOR_MATH,
+ type,
+ compiler.encode_uchar4(vector1_stack_offset, vector2_stack_offset, scale_stack_offset),
+ compiler.encode_uchar4(value_stack_offset, vector_stack_offset));
+ }
}
void VectorMathNode::compile(OSLCompiler &compiler)
@@ -5328,6 +6166,62 @@ void VectorMathNode::compile(OSLCompiler &compiler)
compiler.add(this, "node_vector_math");
}
+/* Vector Rotate */
+
+NODE_DEFINE(VectorRotateNode)
+{
+ NodeType *type = NodeType::add("vector_rotate", create, NodeType::SHADER);
+
+ static NodeEnum type_enum;
+ type_enum.insert("axis", NODE_VECTOR_ROTATE_TYPE_AXIS);
+ type_enum.insert("x_axis", NODE_VECTOR_ROTATE_TYPE_AXIS_X);
+ type_enum.insert("y_axis", NODE_VECTOR_ROTATE_TYPE_AXIS_Y);
+ type_enum.insert("z_axis", NODE_VECTOR_ROTATE_TYPE_AXIS_Z);
+ type_enum.insert("euler_xyz", NODE_VECTOR_ROTATE_TYPE_EULER_XYZ);
+ SOCKET_ENUM(type, "Type", type_enum, NODE_VECTOR_ROTATE_TYPE_AXIS);
+
+ SOCKET_BOOLEAN(invert, "Invert", false);
+
+ SOCKET_IN_VECTOR(vector, "Vector", make_float3(0.0f, 0.0f, 0.0f));
+ SOCKET_IN_POINT(rotation, "Rotation", make_float3(0.0f, 0.0f, 0.0f));
+ SOCKET_IN_POINT(center, "Center", make_float3(0.0f, 0.0f, 0.0f));
+ SOCKET_IN_VECTOR(axis, "Axis", make_float3(0.0f, 0.0f, 1.0f));
+ SOCKET_IN_FLOAT(angle, "Angle", 0.0f);
+ SOCKET_OUT_VECTOR(vector, "Vector");
+
+ return type;
+}
+
+VectorRotateNode::VectorRotateNode() : ShaderNode(node_type)
+{
+}
+
+void VectorRotateNode::compile(SVMCompiler &compiler)
+{
+ ShaderInput *vector_in = input("Vector");
+ ShaderInput *rotation_in = input("Rotation");
+ ShaderInput *center_in = input("Center");
+ ShaderInput *axis_in = input("Axis");
+ ShaderInput *angle_in = input("Angle");
+ ShaderOutput *vector_out = output("Vector");
+
+ compiler.add_node(
+ NODE_VECTOR_ROTATE,
+ compiler.encode_uchar4(
+ type, compiler.stack_assign(vector_in), compiler.stack_assign(rotation_in), invert),
+ compiler.encode_uchar4(compiler.stack_assign(center_in),
+ compiler.stack_assign(axis_in),
+ compiler.stack_assign(angle_in)),
+ compiler.stack_assign(vector_out));
+}
+
+void VectorRotateNode::compile(OSLCompiler &compiler)
+{
+ compiler.parameter(this, "type");
+ compiler.parameter(this, "invert");
+ compiler.add(this, "node_vector_rotate");
+}
+
/* VectorTransform */
NODE_DEFINE(VectorTransformNode)
@@ -5718,7 +6612,7 @@ void SetNormalNode::compile(OSLCompiler &compiler)
OSLNode::OSLNode() : ShaderNode(new NodeType(NodeType::SHADER))
{
- special_type = SHADER_SPECIAL_TYPE_SCRIPT;
+ special_type = SHADER_SPECIAL_TYPE_OSL;
}
OSLNode::~OSLNode()
diff --git a/intern/cycles/render/nodes.h b/intern/cycles/render/nodes.h
index 7796711115e..326f1d14168 100644
--- a/intern/cycles/render/nodes.h
+++ b/intern/cycles/render/nodes.h
@@ -17,8 +17,9 @@
#ifndef __NODES_H__
#define __NODES_H__
-#include "render/graph.h"
#include "graph/node.h"
+#include "render/graph.h"
+#include "render/image.h"
#include "util/util_array.h"
#include "util/util_string.h"
@@ -78,13 +79,19 @@ class ImageSlotTextureNode : public TextureNode {
{
special_type = SHADER_SPECIAL_TYPE_IMAGE_SLOT;
}
- int slot;
+
+ virtual bool equals(const ShaderNode &other)
+ {
+ const ImageSlotTextureNode &other_node = (const ImageSlotTextureNode &)other;
+ return TextureNode::equals(other) && handle == other_node.handle;
+ }
+
+ ImageHandle handle;
};
class ImageTextureNode : public ImageSlotTextureNode {
public:
SHADER_NODE_NO_CLONE_CLASS(ImageTextureNode)
- ~ImageTextureNode();
ShaderNode *clone() const;
void attributes(Shader *shader, AttributeRequestSet *attributes);
bool has_attribute_dependency()
@@ -92,32 +99,33 @@ class ImageTextureNode : public ImageSlotTextureNode {
return true;
}
- ImageManager *image_manager;
- int is_float;
- bool is_linear;
- bool use_alpha;
+ virtual bool equals(const ShaderNode &other)
+ {
+ const ImageTextureNode &other_node = (const ImageTextureNode &)other;
+ return ImageSlotTextureNode::equals(other) && animated == other_node.animated;
+ }
+
+ ImageParams image_params() const;
+
+ /* Parameters. */
ustring filename;
- void *builtin_data;
- NodeImageColorSpace color_space;
+ ustring colorspace;
+ ImageAlphaType alpha_type;
NodeImageProjection projection;
InterpolationType interpolation;
ExtensionType extension;
float projection_blend;
bool animated;
float3 vector;
+ ccl::vector<int> tiles;
- virtual bool equals(const ShaderNode &other)
- {
- const ImageTextureNode &image_node = (const ImageTextureNode &)other;
- return ImageSlotTextureNode::equals(other) && builtin_data == image_node.builtin_data &&
- animated == image_node.animated;
- }
+ protected:
+ void cull_tiles(Scene *scene, ShaderGraph *graph);
};
class EnvironmentTextureNode : public ImageSlotTextureNode {
public:
SHADER_NODE_NO_CLONE_CLASS(EnvironmentTextureNode)
- ~EnvironmentTextureNode();
ShaderNode *clone() const;
void attributes(Shader *shader, AttributeRequestSet *attributes);
bool has_attribute_dependency()
@@ -129,24 +137,22 @@ class EnvironmentTextureNode : public ImageSlotTextureNode {
return NODE_GROUP_LEVEL_2;
}
- ImageManager *image_manager;
- int is_float;
- bool is_linear;
- bool use_alpha;
+ virtual bool equals(const ShaderNode &other)
+ {
+ const EnvironmentTextureNode &other_node = (const EnvironmentTextureNode &)other;
+ return ImageSlotTextureNode::equals(other) && animated == other_node.animated;
+ }
+
+ ImageParams image_params() const;
+
+ /* Parameters. */
ustring filename;
- void *builtin_data;
- NodeImageColorSpace color_space;
+ ustring colorspace;
+ ImageAlphaType alpha_type;
NodeEnvironmentProjection projection;
InterpolationType interpolation;
bool animated;
float3 vector;
-
- virtual bool equals(const ShaderNode &other)
- {
- const EnvironmentTextureNode &env_node = (const EnvironmentTextureNode &)other;
- return ImageSlotTextureNode::equals(other) && builtin_data == env_node.builtin_data &&
- animated == env_node.animated;
- }
};
class SkyTextureNode : public TextureNode {
@@ -162,7 +168,17 @@ class SkyTextureNode : public TextureNode {
float3 sun_direction;
float turbidity;
float ground_albedo;
+ bool sun_disc;
+ float sun_size;
+ float sun_intensity;
+ float sun_elevation;
+ float sun_rotation;
+ float altitude;
+ float air_density;
+ float dust_density;
+ float ozone_density;
float3 vector;
+ ImageHandle handle;
};
class OutputNode : public ShaderNode {
@@ -181,6 +197,31 @@ class OutputNode : public ShaderNode {
}
};
+class OutputAOVNode : public ShaderNode {
+ public:
+ SHADER_NODE_CLASS(OutputAOVNode)
+ virtual void simplify_settings(Scene *scene);
+
+ float value;
+ float3 color;
+
+ ustring name;
+
+ virtual int get_group()
+ {
+ return NODE_GROUP_LEVEL_4;
+ }
+
+ /* Don't allow output node de-duplication. */
+ virtual bool equals(const ShaderNode & /*other*/)
+ {
+ return false;
+ }
+
+ int slot;
+ bool is_color;
+};
+
class GradientTextureNode : public TextureNode {
public:
SHADER_NODE_CLASS(GradientTextureNode)
@@ -198,7 +239,8 @@ class NoiseTextureNode : public TextureNode {
public:
SHADER_NODE_CLASS(NoiseTextureNode)
- float scale, detail, distortion;
+ int dimensions;
+ float w, scale, detail, roughness, distortion;
float3 vector;
};
@@ -211,10 +253,22 @@ class VoronoiTextureNode : public TextureNode {
return NODE_GROUP_LEVEL_2;
}
- NodeVoronoiColoring coloring;
+ virtual int get_feature()
+ {
+ int result = ShaderNode::get_feature();
+ if (dimensions == 4) {
+ result |= NODE_FEATURE_VORONOI_EXTRA;
+ }
+ else if (dimensions >= 2 && feature == NODE_VORONOI_SMOOTH_F1) {
+ result |= NODE_FEATURE_VORONOI_EXTRA;
+ }
+ return result;
+ }
+
+ int dimensions;
NodeVoronoiDistanceMetric metric;
NodeVoronoiFeature feature;
- float scale, exponent;
+ float w, scale, exponent, smoothness, randomness;
float3 vector;
};
@@ -227,8 +281,9 @@ class MusgraveTextureNode : public TextureNode {
return NODE_GROUP_LEVEL_2;
}
+ int dimensions;
NodeMusgraveType type;
- float scale, detail, dimension, lacunarity, offset, gain;
+ float w, scale, detail, dimension, lacunarity, offset, gain;
float3 vector;
};
@@ -242,9 +297,11 @@ class WaveTextureNode : public TextureNode {
}
NodeWaveType type;
+ NodeWaveBandsDirection bands_direction;
+ NodeWaveRingsDirection rings_direction;
NodeWaveProfile profile;
- float scale, distortion, detail, detail_scale;
+ float scale, distortion, detail, detail_scale, detail_roughness, phase;
float3 vector;
};
@@ -297,7 +354,7 @@ class PointDensityTextureNode : public ShaderNode {
SHADER_NODE_NO_CLONE_CLASS(PointDensityTextureNode)
virtual int get_group()
{
- return NODE_GROUP_LEVEL_3;
+ return NODE_GROUP_LEVEL_4;
}
~PointDensityTextureNode();
@@ -312,27 +369,23 @@ class PointDensityTextureNode : public ShaderNode {
{
return true;
}
- bool has_object_dependency()
- {
- return true;
- }
-
- void add_image();
+ /* Parameters. */
ustring filename;
NodeTexVoxelSpace space;
InterpolationType interpolation;
Transform tfm;
float3 vector;
- ImageManager *image_manager;
- int slot;
- void *builtin_data;
+ /* Runtime. */
+ ImageHandle handle;
+
+ ImageParams image_params() const;
virtual bool equals(const ShaderNode &other)
{
- const PointDensityTextureNode &point_dendity_node = (const PointDensityTextureNode &)other;
- return ShaderNode::equals(other) && builtin_data == point_dendity_node.builtin_data;
+ const PointDensityTextureNode &other_node = (const PointDensityTextureNode &)other;
+ return ShaderNode::equals(other) && handle == other_node.handle;
}
};
@@ -360,6 +413,19 @@ class IESLightNode : public TextureNode {
void get_slot();
};
+class WhiteNoiseTextureNode : public ShaderNode {
+ public:
+ SHADER_NODE_CLASS(WhiteNoiseTextureNode)
+ virtual int get_group()
+ {
+ return NODE_GROUP_LEVEL_2;
+ }
+
+ int dimensions;
+ float3 vector;
+ float w;
+};
+
class MappingNode : public ShaderNode {
public:
SHADER_NODE_CLASS(MappingNode)
@@ -367,9 +433,10 @@ class MappingNode : public ShaderNode {
{
return NODE_GROUP_LEVEL_2;
}
+ void constant_fold(const ConstantFolder &folder);
- float3 vector;
- TextureMapping tex_mapping;
+ float3 vector, location, rotation, scale;
+ NodeMappingType type;
};
class RGBToBWNode : public ShaderNode {
@@ -477,6 +544,7 @@ class PrincipledBsdfNode : public BsdfBaseNode {
public:
SHADER_NODE_CLASS(PrincipledBsdfNode)
+ void expand(ShaderGraph *graph);
bool has_surface_bssrdf();
bool has_bssrdf_bump();
void compile(SVMCompiler &compiler,
@@ -505,6 +573,8 @@ class PrincipledBsdfNode : public BsdfBaseNode {
float surface_mix_weight;
ClosureType distribution, distribution_orig;
ClosureType subsurface_method;
+ float3 emission;
+ float alpha;
bool has_integrator_dependency();
void attributes(Shader *shader, AttributeRequestSet *attributes);
@@ -832,10 +902,6 @@ class TextureCoordinateNode : public ShaderNode {
{
return true;
}
- bool has_object_dependency()
- {
- return use_transform;
- }
float3 normal_osl;
bool from_dupli;
@@ -935,6 +1001,37 @@ class HairInfoNode : public ShaderNode {
}
};
+class VolumeInfoNode : public ShaderNode {
+ public:
+ SHADER_NODE_CLASS(VolumeInfoNode)
+ void attributes(Shader *shader, AttributeRequestSet *attributes);
+ bool has_attribute_dependency()
+ {
+ return true;
+ }
+ bool has_spatial_varying()
+ {
+ return true;
+ }
+ void expand(ShaderGraph *graph);
+};
+
+class VertexColorNode : public ShaderNode {
+ public:
+ SHADER_NODE_CLASS(VertexColorNode)
+ void attributes(Shader *shader, AttributeRequestSet *attributes);
+ bool has_attribute_dependency()
+ {
+ return true;
+ }
+ bool has_spatial_varying()
+ {
+ return true;
+ }
+
+ ustring layer_name;
+};
+
class ValueNode : public ShaderNode {
public:
SHADER_NODE_CLASS(ValueNode)
@@ -1215,6 +1312,32 @@ class BlackbodyNode : public ShaderNode {
float temperature;
};
+class MapRangeNode : public ShaderNode {
+ public:
+ SHADER_NODE_CLASS(MapRangeNode)
+ virtual int get_group()
+ {
+ return NODE_GROUP_LEVEL_3;
+ }
+ void expand(ShaderGraph *graph);
+
+ float value, from_min, from_max, to_min, to_max, steps;
+ NodeMapRangeType type;
+ bool clamp;
+};
+
+class ClampNode : public ShaderNode {
+ public:
+ SHADER_NODE_CLASS(ClampNode)
+ void constant_fold(const ConstantFolder &folder);
+ virtual int get_group()
+ {
+ return NODE_GROUP_LEVEL_3;
+ }
+ float value, min, max;
+ NodeClampType type;
+};
+
class MathNode : public ShaderNode {
public:
SHADER_NODE_CLASS(MathNode)
@@ -1222,11 +1345,13 @@ class MathNode : public ShaderNode {
{
return NODE_GROUP_LEVEL_1;
}
+ void expand(ShaderGraph *graph);
void constant_fold(const ConstantFolder &folder);
float value1;
float value2;
- NodeMath type;
+ float value3;
+ NodeMathType type;
bool use_clamp;
};
@@ -1253,7 +1378,26 @@ class VectorMathNode : public ShaderNode {
float3 vector1;
float3 vector2;
- NodeVectorMath type;
+ float3 vector3;
+ float scale;
+ NodeVectorMathType type;
+};
+
+class VectorRotateNode : public ShaderNode {
+ public:
+ SHADER_NODE_CLASS(VectorRotateNode)
+
+ virtual int get_group()
+ {
+ return NODE_GROUP_LEVEL_3;
+ }
+ NodeVectorRotateType type;
+ bool invert;
+ float3 vector;
+ float3 center;
+ float3 axis;
+ float angle;
+ float3 rotation;
};
class VectorTransformNode : public ShaderNode {
diff --git a/intern/cycles/render/object.cpp b/intern/cycles/render/object.cpp
index 6c6f8810412..f200e409b9e 100644
--- a/intern/cycles/render/object.cpp
+++ b/intern/cycles/render/object.cpp
@@ -14,22 +14,25 @@
* limitations under the License.
*/
-#include "render/camera.h"
+#include "render/object.h"
#include "device/device.h"
+#include "render/camera.h"
+#include "render/curves.h"
+#include "render/hair.h"
+#include "render/integrator.h"
#include "render/light.h"
#include "render/mesh.h"
-#include "render/curves.h"
-#include "render/object.h"
#include "render/particles.h"
#include "render/scene.h"
#include "util/util_foreach.h"
#include "util/util_logging.h"
#include "util/util_map.h"
+#include "util/util_murmurhash.h"
#include "util/util_progress.h"
#include "util/util_set.h"
+#include "util/util_task.h"
#include "util/util_vector.h"
-#include "util/util_murmurhash.h"
#include "subd/subd_patch_table.h"
@@ -64,6 +67,7 @@ struct UpdateObjectTransformState {
KernelObject *objects;
Transform *object_motion_pass;
DecomposedTransform *object_motion;
+ float *object_volume_step;
/* Flags which will be synchronized to Integrator. */
bool have_motion;
@@ -74,7 +78,6 @@ struct UpdateObjectTransformState {
Scene *scene;
/* Some locks to keep everything thread-safe. */
- thread_spin_lock queue_lock;
thread_spin_lock surface_area_lock;
/* First unused object index in the queue. */
@@ -87,9 +90,10 @@ NODE_DEFINE(Object)
{
NodeType *type = NodeType::add("object", create);
- SOCKET_NODE(mesh, "Mesh", &Mesh::node_type);
+ SOCKET_NODE(geometry, "Geometry", &Geometry::node_base_type);
SOCKET_TRANSFORM(tfm, "Transform", transform_identity());
SOCKET_UINT(visibility, "Visibility", ~0);
+ SOCKET_COLOR(color, "Color", make_float3(0.0f, 0.0f, 0.0f));
SOCKET_UINT(random_id, "Random ID", 0);
SOCKET_INT(pass_id, "Pass ID", 0);
SOCKET_BOOLEAN(use_holdout, "Use Holdout", false);
@@ -97,6 +101,7 @@ NODE_DEFINE(Object)
SOCKET_POINT(dupli_generated, "Dupli Generated", make_float3(0.0f, 0.0f, 0.0f));
SOCKET_POINT2(dupli_uv, "Dupli UV", make_float2(0.0f, 0.0f));
SOCKET_TRANSFORM_ARRAY(motion, "Motion", array<Transform>());
+ SOCKET_FLOAT(shadow_terminator_offset, "Terminator Offset", 0.0f);
SOCKET_BOOLEAN(is_shadow_catcher, "Shadow Catcher", false);
@@ -151,7 +156,7 @@ void Object::update_motion()
void Object::compute_bounds(bool motion_blur)
{
- BoundBox mbounds = mesh->bounds;
+ BoundBox mbounds = geometry->bounds;
if (motion_blur && use_motion()) {
array<DecomposedTransform> decomp(motion.size());
@@ -171,7 +176,7 @@ void Object::compute_bounds(bool motion_blur)
}
else {
/* No motion blur case. */
- if (mesh->transform_applied) {
+ if (geometry->transform_applied) {
bounds = mbounds;
}
else {
@@ -182,89 +187,18 @@ void Object::compute_bounds(bool motion_blur)
void Object::apply_transform(bool apply_to_motion)
{
- if (!mesh || tfm == transform_identity())
+ if (!geometry || tfm == transform_identity())
return;
- /* triangles */
- if (mesh->verts.size()) {
- /* store matrix to transform later. when accessing these as attributes we
- * do not want the transform to be applied for consistency between static
- * and dynamic BVH, so we do it on packing. */
- mesh->transform_normal = transform_transposed_inverse(tfm);
-
- /* apply to mesh vertices */
- for (size_t i = 0; i < mesh->verts.size(); i++)
- mesh->verts[i] = transform_point(&tfm, mesh->verts[i]);
-
- if (apply_to_motion) {
- Attribute *attr = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
-
- if (attr) {
- size_t steps_size = mesh->verts.size() * (mesh->motion_steps - 1);
- float3 *vert_steps = attr->data_float3();
-
- for (size_t i = 0; i < steps_size; i++)
- vert_steps[i] = transform_point(&tfm, vert_steps[i]);
- }
-
- Attribute *attr_N = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_NORMAL);
-
- if (attr_N) {
- Transform ntfm = mesh->transform_normal;
- size_t steps_size = mesh->verts.size() * (mesh->motion_steps - 1);
- float3 *normal_steps = attr_N->data_float3();
-
- for (size_t i = 0; i < steps_size; i++)
- normal_steps[i] = normalize(transform_direction(&ntfm, normal_steps[i]));
- }
- }
- }
-
- /* curves */
- if (mesh->curve_keys.size()) {
- /* compute uniform scale */
- float3 c0 = transform_get_column(&tfm, 0);
- float3 c1 = transform_get_column(&tfm, 1);
- float3 c2 = transform_get_column(&tfm, 2);
- float scalar = powf(fabsf(dot(cross(c0, c1), c2)), 1.0f / 3.0f);
-
- /* apply transform to curve keys */
- for (size_t i = 0; i < mesh->curve_keys.size(); i++) {
- float3 co = transform_point(&tfm, mesh->curve_keys[i]);
- float radius = mesh->curve_radius[i] * scalar;
-
- /* scale for curve radius is only correct for uniform scale */
- mesh->curve_keys[i] = co;
- mesh->curve_radius[i] = radius;
- }
-
- if (apply_to_motion) {
- Attribute *curve_attr = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
-
- if (curve_attr) {
- /* apply transform to motion curve keys */
- size_t steps_size = mesh->curve_keys.size() * (mesh->motion_steps - 1);
- float4 *key_steps = curve_attr->data_float4();
-
- for (size_t i = 0; i < steps_size; i++) {
- float3 co = transform_point(&tfm, float4_to_float3(key_steps[i]));
- float radius = key_steps[i].w * scalar;
-
- /* scale for curve radius is only correct for uniform scale */
- key_steps[i] = float3_to_float4(co);
- key_steps[i].w = radius;
- }
- }
- }
- }
+ geometry->apply_transform(tfm, apply_to_motion);
/* we keep normals pointing in same direction on negative scale, notify
- * mesh about this in it (re)calculates normals */
+ * geometry about this in it (re)calculates normals */
if (transform_negative_scale(tfm))
- mesh->transform_negative_scaled = true;
+ geometry->transform_negative_scaled = true;
if (bounds.valid()) {
- mesh->compute_bounds();
+ geometry->compute_bounds();
compute_bounds(false);
}
@@ -274,19 +208,18 @@ void Object::apply_transform(bool apply_to_motion)
void Object::tag_update(Scene *scene)
{
- if (mesh) {
- if (mesh->transform_applied)
- mesh->need_update = true;
+ if (geometry) {
+ if (geometry->transform_applied)
+ geometry->need_update = true;
- foreach (Shader *shader, mesh->used_shaders) {
+ foreach (Shader *shader, geometry->used_shaders) {
if (shader->use_mis && shader->has_surface_emission)
scene->light_manager->need_update = true;
}
}
scene->camera->need_flags_update = true;
- scene->curve_system_manager->need_update = true;
- scene->mesh_manager->need_update = true;
+ scene->geometry_manager->need_update = true;
scene->object_manager->need_update = true;
}
@@ -335,6 +268,82 @@ uint Object::visibility_for_tracing() const
return trace_visibility;
}
+float Object::compute_volume_step_size() const
+{
+ if (geometry->type != Geometry::MESH) {
+ return FLT_MAX;
+ }
+
+ Mesh *mesh = static_cast<Mesh *>(geometry);
+
+ if (!mesh->has_volume) {
+ return FLT_MAX;
+ }
+
+ /* Compute step rate from shaders. */
+ float step_rate = FLT_MAX;
+
+ foreach (Shader *shader, mesh->used_shaders) {
+ if (shader->has_volume) {
+ if ((shader->heterogeneous_volume && shader->has_volume_spatial_varying) ||
+ (shader->has_volume_attribute_dependency)) {
+ step_rate = fminf(shader->volume_step_rate, step_rate);
+ }
+ }
+ }
+
+ if (step_rate == FLT_MAX) {
+ return FLT_MAX;
+ }
+
+ /* Compute step size from voxel grids. */
+ float step_size = FLT_MAX;
+
+ foreach (Attribute &attr, mesh->attributes.attributes) {
+ if (attr.element == ATTR_ELEMENT_VOXEL) {
+ ImageHandle &handle = attr.data_voxel();
+ const ImageMetaData &metadata = handle.metadata();
+ if (metadata.width == 0 || metadata.height == 0 || metadata.depth == 0) {
+ continue;
+ }
+
+ /* User specified step size. */
+ float voxel_step_size = mesh->volume_step_size;
+
+ if (voxel_step_size == 0.0f) {
+ /* Auto detect step size. */
+ float3 size = make_float3(
+ 1.0f / metadata.width, 1.0f / metadata.height, 1.0f / metadata.depth);
+
+ /* Step size is transformed from voxel to world space. */
+ Transform voxel_tfm = tfm;
+ if (metadata.use_transform_3d) {
+ voxel_tfm = tfm * transform_inverse(metadata.transform_3d);
+ }
+ voxel_step_size = min3(fabs(transform_direction(&voxel_tfm, size)));
+ }
+ else if (mesh->volume_object_space) {
+ /* User specified step size in object space. */
+ float3 size = make_float3(voxel_step_size, voxel_step_size, voxel_step_size);
+ voxel_step_size = min3(fabs(transform_direction(&tfm, size)));
+ }
+
+ if (voxel_step_size > 0.0f) {
+ step_size = fminf(voxel_step_size, step_size);
+ }
+ }
+ }
+
+ if (step_size == FLT_MAX) {
+ /* Fall back to 1/10th of bounds for procedural volumes. */
+ step_size = 0.1f * average(bounds.size());
+ }
+
+ step_size *= step_rate;
+
+ return step_size;
+}
+
int Object::get_device_index() const
{
return index;
@@ -352,31 +361,33 @@ ObjectManager::~ObjectManager()
{
}
-void ObjectManager::device_update_object_transform(UpdateObjectTransformState *state, Object *ob)
+static float object_surface_area(UpdateObjectTransformState *state,
+ const Transform &tfm,
+ Geometry *geom)
{
- KernelObject &kobject = state->objects[ob->index];
- Transform *object_motion_pass = state->object_motion_pass;
-
- Mesh *mesh = ob->mesh;
- uint flag = 0;
+ if (geom->type != Geometry::MESH) {
+ return 0.0f;
+ }
- /* Compute transformations. */
- Transform tfm = ob->tfm;
- Transform itfm = transform_inverse(tfm);
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ if (mesh->has_volume) {
+ /* Volume density automatically adjust to object scale. */
+ if (mesh->volume_object_space) {
+ const float3 unit = normalize(make_float3(1.0f, 1.0f, 1.0f));
+ return 1.0f / len(transform_direction(&tfm, unit));
+ }
+ else {
+ return 1.0f;
+ }
+ }
/* Compute surface area. for uniform scale we can do avoid the many
* transform calls and share computation for instances.
*
* TODO(brecht): Correct for displacement, and move to a better place.
*/
- float uniform_scale;
float surface_area = 0.0f;
- float pass_id = ob->pass_id;
- float random_number = (float)ob->random_id * (1.0f / (float)0xFFFFFFFF);
- int particle_index = (ob->particle_system) ?
- ob->particle_index + state->particle_offset[ob->particle_system] :
- 0;
-
+ float uniform_scale;
if (transform_uniform_scale(tfm, uniform_scale)) {
map<Mesh *, float>::iterator it;
@@ -422,19 +433,49 @@ void ObjectManager::device_update_object_transform(UpdateObjectTransformState *s
}
}
+ return surface_area;
+}
+
+void ObjectManager::device_update_object_transform(UpdateObjectTransformState *state, Object *ob)
+{
+ KernelObject &kobject = state->objects[ob->index];
+ Transform *object_motion_pass = state->object_motion_pass;
+
+ Geometry *geom = ob->geometry;
+ uint flag = 0;
+
+ /* Compute transformations. */
+ Transform tfm = ob->tfm;
+ Transform itfm = transform_inverse(tfm);
+
+ float3 color = ob->color;
+ float pass_id = ob->pass_id;
+ float random_number = (float)ob->random_id * (1.0f / (float)0xFFFFFFFF);
+ int particle_index = (ob->particle_system) ?
+ ob->particle_index + state->particle_offset[ob->particle_system] :
+ 0;
+
kobject.tfm = tfm;
kobject.itfm = itfm;
- kobject.surface_area = surface_area;
+ kobject.surface_area = object_surface_area(state, tfm, geom);
+ kobject.color[0] = color.x;
+ kobject.color[1] = color.y;
+ kobject.color[2] = color.z;
kobject.pass_id = pass_id;
kobject.random_number = random_number;
kobject.particle_index = particle_index;
kobject.motion_offset = 0;
- if (mesh->use_motion_blur) {
+ if (geom->use_motion_blur) {
state->have_motion = true;
}
- if (mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION)) {
- flag |= SD_OBJECT_HAS_VERTEX_MOTION;
+
+ if (geom->type == Geometry::MESH) {
+ /* TODO: why only mesh? */
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ if (mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION)) {
+ flag |= SD_OBJECT_HAS_VERTEX_MOTION;
+ }
}
if (state->need_motion == Scene::MOTION_PASS) {
@@ -455,7 +496,7 @@ void ObjectManager::device_update_object_transform(UpdateObjectTransformState *s
/* Motion transformations, is world/object space depending if mesh
* comes with deformed position in object space, or if we transform
* the shading point in world space. */
- if (!mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION)) {
+ if (!(flag & SD_OBJECT_HAS_VERTEX_MOTION)) {
tfm_pre = tfm_pre * itfm;
tfm_post = tfm_post * itfm;
}
@@ -480,66 +521,34 @@ void ObjectManager::device_update_object_transform(UpdateObjectTransformState *s
kobject.dupli_generated[0] = ob->dupli_generated[0];
kobject.dupli_generated[1] = ob->dupli_generated[1];
kobject.dupli_generated[2] = ob->dupli_generated[2];
- kobject.numkeys = mesh->curve_keys.size();
+ kobject.numkeys = (geom->type == Geometry::HAIR) ? static_cast<Hair *>(geom)->curve_keys.size() :
+ 0;
kobject.dupli_uv[0] = ob->dupli_uv[0];
kobject.dupli_uv[1] = ob->dupli_uv[1];
- int totalsteps = mesh->motion_steps;
+ int totalsteps = geom->motion_steps;
kobject.numsteps = (totalsteps - 1) / 2;
- kobject.numverts = mesh->verts.size();
+ kobject.numverts = (geom->type == Geometry::MESH) ? static_cast<Mesh *>(geom)->verts.size() : 0;
kobject.patch_map_offset = 0;
kobject.attribute_map_offset = 0;
uint32_t hash_name = util_murmur_hash3(ob->name.c_str(), ob->name.length(), 0);
uint32_t hash_asset = util_murmur_hash3(ob->asset_name.c_str(), ob->asset_name.length(), 0);
kobject.cryptomatte_object = util_hash_to_float(hash_name);
kobject.cryptomatte_asset = util_hash_to_float(hash_asset);
+ kobject.shadow_terminator_offset = 1.0f / (1.0f - 0.5f * ob->shadow_terminator_offset);
/* Object flag. */
if (ob->use_holdout) {
flag |= SD_OBJECT_HOLDOUT_MASK;
}
state->object_flag[ob->index] = flag;
+ state->object_volume_step[ob->index] = FLT_MAX;
/* Have curves. */
- if (mesh->num_curves()) {
+ if (geom->type == Geometry::HAIR) {
state->have_curves = true;
}
}
-bool ObjectManager::device_update_object_transform_pop_work(UpdateObjectTransformState *state,
- int *start_index,
- int *num_objects)
-{
- /* Tweakable parameter, number of objects per chunk.
- * Too small value will cause some extra overhead due to spin lock,
- * too big value might not use all threads nicely.
- */
- static const int OBJECTS_PER_TASK = 32;
- bool have_work = false;
- state->queue_lock.lock();
- int num_scene_objects = state->scene->objects.size();
- if (state->queue_start_object < num_scene_objects) {
- int count = min(OBJECTS_PER_TASK, num_scene_objects - state->queue_start_object);
- *start_index = state->queue_start_object;
- *num_objects = count;
- state->queue_start_object += count;
- have_work = true;
- }
- state->queue_lock.unlock();
- return have_work;
-}
-
-void ObjectManager::device_update_object_transform_task(UpdateObjectTransformState *state)
-{
- int start_index, num_objects;
- while (device_update_object_transform_pop_work(state, &start_index, &num_objects)) {
- for (int i = 0; i < num_objects; ++i) {
- const int object_index = start_index + i;
- Object *ob = state->scene->objects[object_index];
- device_update_object_transform(state, ob);
- }
- }
-}
-
void ObjectManager::device_update_transforms(DeviceScene *dscene, Scene *scene, Progress &progress)
{
UpdateObjectTransformState state;
@@ -551,6 +560,7 @@ void ObjectManager::device_update_transforms(DeviceScene *dscene, Scene *scene,
state.objects = dscene->objects.alloc(scene->objects.size());
state.object_flag = dscene->object_flag.alloc(scene->objects.size());
+ state.object_volume_step = dscene->object_volume_step.alloc(scene->objects.size());
state.object_motion = NULL;
state.object_motion_pass = NULL;
@@ -584,28 +594,19 @@ void ObjectManager::device_update_transforms(DeviceScene *dscene, Scene *scene,
numparticles += psys->particles.size();
}
- /* NOTE: If it's just a handful of objects we deal with them in a single
- * thread to avoid threading overhead. However, this threshold is might
- * need some tweaks to make mid-complex scenes optimal.
- */
- if (scene->objects.size() < 64) {
- foreach (Object *ob, scene->objects) {
- device_update_object_transform(&state, ob);
- if (progress.get_cancel()) {
- return;
- }
- }
- }
- else {
- const int num_threads = TaskScheduler::num_threads();
- TaskPool pool;
- for (int i = 0; i < num_threads; ++i) {
- pool.push(function_bind(&ObjectManager::device_update_object_transform_task, this, &state));
- }
- pool.wait_work();
- if (progress.get_cancel()) {
- return;
- }
+ /* Parallel object update, with grain size to avoid too much threading overhead
+ * for individual objects. */
+ static const int OBJECTS_PER_TASK = 32;
+ parallel_for(blocked_range<size_t>(0, scene->objects.size(), OBJECTS_PER_TASK),
+ [&](const blocked_range<size_t> &r) {
+ for (size_t i = r.begin(); i != r.end(); i++) {
+ Object *ob = state.scene->objects[i];
+ device_update_object_transform(&state, ob);
+ }
+ });
+
+ if (progress.get_cancel()) {
+ return;
}
dscene->objects.copy_to_device();
@@ -618,7 +619,6 @@ void ObjectManager::device_update_transforms(DeviceScene *dscene, Scene *scene,
dscene->data.bvh.have_motion = state.have_motion;
dscene->data.bvh.have_curves = state.have_curves;
- dscene->data.bvh.have_instancing = true;
}
void ObjectManager::device_update(Device *device,
@@ -671,25 +671,30 @@ void ObjectManager::device_update_flags(
/* Object info flag. */
uint *object_flag = dscene->object_flag.data();
+ float *object_volume_step = dscene->object_volume_step.data();
/* Object volume intersection. */
vector<Object *> volume_objects;
bool has_volume_objects = false;
foreach (Object *object, scene->objects) {
- if (object->mesh->has_volume) {
+ if (object->geometry->has_volume) {
if (bounds_valid) {
volume_objects.push_back(object);
}
has_volume_objects = true;
+ object_volume_step[object->index] = object->compute_volume_step_size();
+ }
+ else {
+ object_volume_step[object->index] = FLT_MAX;
}
}
foreach (Object *object, scene->objects) {
- if (object->mesh->has_volume) {
+ if (object->geometry->has_volume) {
object_flag[object->index] |= SD_OBJECT_HAS_VOLUME;
object_flag[object->index] &= ~SD_OBJECT_HAS_VOLUME_ATTRIBUTES;
- foreach (Attribute &attr, object->mesh->attributes.attributes) {
+ foreach (Attribute &attr, object->geometry->attributes.attributes) {
if (attr.element == ATTR_ELEMENT_VOXEL) {
object_flag[object->index] |= SD_OBJECT_HAS_VOLUME_ATTRIBUTES;
}
@@ -698,6 +703,7 @@ void ObjectManager::device_update_flags(
else {
object_flag[object->index] &= ~(SD_OBJECT_HAS_VOLUME | SD_OBJECT_HAS_VOLUME_ATTRIBUTES);
}
+
if (object->is_shadow_catcher) {
object_flag[object->index] |= SD_OBJECT_SHADOW_CATCHER;
}
@@ -726,6 +732,7 @@ void ObjectManager::device_update_flags(
/* Copy object flag. */
dscene->object_flag.copy_to_device();
+ dscene->object_volume_step.copy_to_device();
}
void ObjectManager::device_update_mesh_offsets(Device *, DeviceScene *dscene, Scene *scene)
@@ -739,21 +746,24 @@ void ObjectManager::device_update_mesh_offsets(Device *, DeviceScene *dscene, Sc
bool update = false;
foreach (Object *object, scene->objects) {
- Mesh *mesh = object->mesh;
-
- if (mesh->patch_table) {
- uint patch_map_offset = 2 * (mesh->patch_table_offset + mesh->patch_table->total_size() -
- mesh->patch_table->num_nodes * PATCH_NODE_SIZE) -
- mesh->patch_offset;
-
- if (kobjects[object->index].patch_map_offset != patch_map_offset) {
- kobjects[object->index].patch_map_offset = patch_map_offset;
- update = true;
+ Geometry *geom = object->geometry;
+
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ if (mesh->patch_table) {
+ uint patch_map_offset = 2 * (mesh->patch_table_offset + mesh->patch_table->total_size() -
+ mesh->patch_table->num_nodes * PATCH_NODE_SIZE) -
+ mesh->patch_offset;
+
+ if (kobjects[object->index].patch_map_offset != patch_map_offset) {
+ kobjects[object->index].patch_map_offset = patch_map_offset;
+ update = true;
+ }
}
}
- if (kobjects[object->index].attribute_map_offset != mesh->attr_map_offset) {
- kobjects[object->index].attribute_map_offset = mesh->attr_map_offset;
+ if (kobjects[object->index].attribute_map_offset != geom->attr_map_offset) {
+ kobjects[object->index].attribute_map_offset = geom->attr_map_offset;
update = true;
}
}
@@ -769,26 +779,26 @@ void ObjectManager::device_free(Device *, DeviceScene *dscene)
dscene->object_motion_pass.free();
dscene->object_motion.free();
dscene->object_flag.free();
+ dscene->object_volume_step.free();
}
void ObjectManager::apply_static_transforms(DeviceScene *dscene, Scene *scene, Progress &progress)
{
/* todo: normals and displacement should be done before applying transform! */
- /* todo: create objects/meshes in right order! */
+ /* todo: create objects/geometry in right order! */
- /* counter mesh users */
- map<Mesh *, int> mesh_users;
+ /* counter geometry users */
+ map<Geometry *, int> geometry_users;
Scene::MotionType need_motion = scene->need_motion();
bool motion_blur = need_motion == Scene::MOTION_BLUR;
bool apply_to_motion = need_motion != Scene::MOTION_PASS;
int i = 0;
- bool have_instancing = false;
foreach (Object *object, scene->objects) {
- map<Mesh *, int>::iterator it = mesh_users.find(object->mesh);
+ map<Geometry *, int>::iterator it = geometry_users.find(object->geometry);
- if (it == mesh_users.end())
- mesh_users[object->mesh] = 1;
+ if (it == geometry_users.end())
+ geometry_users[object->geometry] = 1;
else
it->second++;
}
@@ -798,46 +808,52 @@ void ObjectManager::apply_static_transforms(DeviceScene *dscene, Scene *scene, P
uint *object_flag = dscene->object_flag.data();
- /* apply transforms for objects with single user meshes */
+ /* apply transforms for objects with single user geometry */
foreach (Object *object, scene->objects) {
/* Annoying feedback loop here: we can't use is_instanced() because
* it'll use uninitialized transform_applied flag.
*
- * Could be solved by moving reference counter to Mesh.
+ * Could be solved by moving reference counter to Geometry.
*/
- if ((mesh_users[object->mesh] == 1 && !object->mesh->has_surface_bssrdf) &&
- !object->mesh->has_true_displacement() &&
- object->mesh->subdivision_type == Mesh::SUBDIVISION_NONE) {
+ Geometry *geom = object->geometry;
+ bool apply = (geometry_users[geom] == 1) && !geom->has_surface_bssrdf &&
+ !geom->has_true_displacement();
+
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ apply = apply && mesh->subdivision_type == Mesh::SUBDIVISION_NONE;
+ }
+ else if (geom->type == Geometry::HAIR) {
+ /* Can't apply non-uniform scale to curves, this can't be represented by
+ * control points and radius alone. */
+ float scale;
+ apply = apply && transform_uniform_scale(object->tfm, scale);
+ }
+
+ if (apply) {
if (!(motion_blur && object->use_motion())) {
- if (!object->mesh->transform_applied) {
+ if (!geom->transform_applied) {
object->apply_transform(apply_to_motion);
- object->mesh->transform_applied = true;
+ geom->transform_applied = true;
if (progress.get_cancel())
return;
}
object_flag[i] |= SD_OBJECT_TRANSFORM_APPLIED;
- if (object->mesh->transform_negative_scaled)
+ if (geom->transform_negative_scaled)
object_flag[i] |= SD_OBJECT_NEGATIVE_SCALE_APPLIED;
}
- else
- have_instancing = true;
}
- else
- have_instancing = true;
i++;
}
-
- dscene->data.bvh.have_instancing = have_instancing;
}
void ObjectManager::tag_update(Scene *scene)
{
need_update = true;
- scene->curve_system_manager->need_update = true;
- scene->mesh_manager->need_update = true;
+ scene->geometry_manager->need_update = true;
scene->light_manager->need_update = true;
}
diff --git a/intern/cycles/render/object.h b/intern/cycles/render/object.h
index 2fd43900da1..ac9b4c331f5 100644
--- a/intern/cycles/render/object.h
+++ b/intern/cycles/render/object.h
@@ -23,8 +23,8 @@
#include "util/util_array.h"
#include "util/util_boundbox.h"
#include "util/util_param.h"
-#include "util/util_transform.h"
#include "util/util_thread.h"
+#include "util/util_transform.h"
#include "util/util_types.h"
#include "util/util_vector.h"
@@ -32,7 +32,7 @@ CCL_NAMESPACE_BEGIN
class Device;
class DeviceScene;
-class Mesh;
+class Geometry;
class ParticleSystem;
class Progress;
class Scene;
@@ -46,11 +46,12 @@ class Object : public Node {
public:
NODE_DECLARE
- Mesh *mesh;
+ Geometry *geometry;
Transform tfm;
BoundBox bounds;
uint random_id;
int pass_id;
+ float3 color;
ustring asset_name;
vector<ParamValue> attributes;
uint visibility;
@@ -58,6 +59,7 @@ class Object : public Node {
bool hide_on_missing_motion;
bool use_holdout;
bool is_shadow_catcher;
+ float shadow_terminator_offset;
float3 dupli_generated;
float2 dupli_uv;
@@ -80,6 +82,9 @@ class Object : public Node {
int motion_step(float time) const;
void update_motion();
+ /* Maximum number of motion steps supported (due to Embree). */
+ static const uint MAX_MOTION_STEPS = 129;
+
/* Check whether object is traceable and it worth adding it to
* kernel scene.
*/
@@ -93,6 +98,9 @@ class Object : public Node {
/* Returns the index that is used in the kernel for this object. */
int get_device_index() const;
+ /* Compute step size from attributes, shaders, transforms. */
+ float compute_volume_step_size() const;
+
protected:
/* Specifies the position of the object in scene->objects and
* in the device vectors. Gets set in device_update. */
diff --git a/intern/cycles/render/osl.cpp b/intern/cycles/render/osl.cpp
index b66a46938be..5c62ae73e47 100644
--- a/intern/cycles/render/osl.cpp
+++ b/intern/cycles/render/osl.cpp
@@ -16,12 +16,14 @@
#include "device/device.h"
+#include "render/background.h"
+#include "render/colorspace.h"
#include "render/graph.h"
#include "render/light.h"
+#include "render/nodes.h"
#include "render/osl.h"
#include "render/scene.h"
#include "render/shader.h"
-#include "render/nodes.h"
#ifdef WITH_OSL
@@ -29,6 +31,7 @@
# include "kernel/osl/osl_services.h"
# include "kernel/osl/osl_shader.h"
+# include "util/util_aligned_malloc.h"
# include "util/util_foreach.h"
# include "util/util_logging.h"
# include "util/util_md5.h"
@@ -53,6 +56,7 @@ OSLRenderServices *OSLShaderManager::services_shared = NULL;
int OSLShaderManager::ss_shared_users = 0;
thread_mutex OSLShaderManager::ss_shared_mutex;
thread_mutex OSLShaderManager::ss_mutex;
+int OSLCompiler::texture_shared_unique_id = 0;
/* Shader Manager */
@@ -98,11 +102,12 @@ void OSLShaderManager::device_update(Device *device,
device_free(device, dscene, scene);
- /* determine which shaders are in use */
- device_update_shaders_used(scene);
+ /* set texture system */
+ scene->image_manager->set_osl_texture_system((void *)ts);
/* create shaders */
OSLGlobals *og = (OSLGlobals *)device->osl_memory();
+ Shader *background_shader = scene->background->get_shader(scene);
foreach (Shader *shader, scene->shaders) {
assert(shader->graph);
@@ -115,9 +120,9 @@ void OSLShaderManager::device_update(Device *device,
* compile shaders alternating */
thread_scoped_lock lock(ss_mutex);
- OSLCompiler compiler((void *)this, (void *)ss, scene->image_manager, scene->light_manager);
- compiler.background = (shader == scene->default_background);
- compiler.compile(scene, og, shader);
+ OSLCompiler compiler(this, services, ss, scene);
+ compiler.background = (shader == background_shader);
+ compiler.compile(og, shader);
if (shader->use_mis && shader->has_surface_emission)
scene->light_manager->need_update = true;
@@ -128,7 +133,7 @@ void OSLShaderManager::device_update(Device *device,
og->ts = ts;
og->services = services;
- int background_id = scene->shader_manager->get_shader_id(scene->default_background);
+ int background_id = scene->shader_manager->get_shader_id(background_shader);
og->background_state = og->surface_state[background_id & SHADER_MASK];
og->use = true;
@@ -137,15 +142,16 @@ void OSLShaderManager::device_update(Device *device,
need_update = false;
- /* set texture system */
- scene->image_manager->set_osl_texture_system((void *)ts);
+ /* add special builtin texture types */
+ services->textures.insert(ustring("@ao"), new OSLTextureHandle(OSLTextureHandle::AO));
+ services->textures.insert(ustring("@bevel"), new OSLTextureHandle(OSLTextureHandle::BEVEL));
device_update_common(device, dscene, scene, progress);
{
/* Perform greedyjit optimization.
*
- * This might waste time on optimizing gorups which are never actually
+ * This might waste time on optimizing groups which are never actually
* used, but this prevents OSL from allocating data on TLS at render
* time.
*
@@ -218,7 +224,8 @@ void OSLShaderManager::shading_system_init()
thread_scoped_lock lock(ss_shared_mutex);
if (ss_shared_users == 0) {
- services_shared = new OSLRenderServices();
+ /* Must use aligned new due to concurrent hash map. */
+ services_shared = util_aligned_new<OSLRenderServices>(ts_shared);
string shader_path = path_get("shader");
# ifdef _WIN32
@@ -287,7 +294,7 @@ void OSLShaderManager::shading_system_free()
delete ss_shared;
ss_shared = NULL;
- delete services_shared;
+ util_aligned_delete(services_shared);
services_shared = NULL;
}
@@ -309,7 +316,7 @@ bool OSLShaderManager::osl_compile(const string &inputfile, const string &output
string include_path_arg = string("-I") + shader_path;
options.push_back(include_path_arg);
- stdosl_path = path_get("shader/stdosl.h");
+ stdosl_path = path_get("shader/stdcycles.h");
/* compile */
OSL::OSLCompiler *compiler = new OSL::OSLCompiler(&OSL::ErrorHandler::default_handler());
@@ -430,27 +437,35 @@ const char *OSLShaderManager::shader_load_bytecode(const string &hash, const str
return loaded_shaders.find(hash)->first.c_str();
}
-OSLNode *OSLShaderManager::osl_node(const std::string &filepath,
+/* This is a static function to avoid RTTI link errors with only this
+ * file being compiled without RTTI to match OSL and LLVM libraries. */
+OSLNode *OSLShaderManager::osl_node(ShaderManager *manager,
+ const std::string &filepath,
const std::string &bytecode_hash,
const std::string &bytecode)
{
+ if (!manager->use_osl()) {
+ return NULL;
+ }
+
/* create query */
+ OSLShaderManager *osl_manager = static_cast<OSLShaderManager *>(manager);
const char *hash;
if (!filepath.empty()) {
- hash = shader_load_filepath(filepath);
+ hash = osl_manager->shader_load_filepath(filepath);
}
else {
- hash = shader_test_loaded(bytecode_hash);
+ hash = osl_manager->shader_test_loaded(bytecode_hash);
if (!hash)
- hash = shader_load_bytecode(bytecode_hash, bytecode);
+ hash = osl_manager->shader_load_bytecode(bytecode_hash, bytecode);
}
if (!hash) {
return NULL;
}
- OSLShaderInfo *info = shader_loaded_info(hash);
+ OSLShaderInfo *info = osl_manager->shader_loaded_info(hash);
/* count number of inputs */
size_t num_inputs = 0;
@@ -555,15 +570,12 @@ OSLNode *OSLShaderManager::osl_node(const std::string &filepath,
/* Graph Compiler */
-OSLCompiler::OSLCompiler(void *manager_,
- void *shadingsys_,
- ImageManager *image_manager_,
- LightManager *light_manager_)
+OSLCompiler::OSLCompiler(OSLShaderManager *manager,
+ OSLRenderServices *services,
+ OSL::ShadingSystem *ss,
+ Scene *scene)
+ : scene(scene), manager(manager), services(services), ss(ss)
{
- manager = manager_;
- shadingsys = shadingsys_;
- image_manager = image_manager_;
- light_manager = light_manager_;
current_type = SHADER_TYPE_SURFACE;
current_shader = NULL;
background = false;
@@ -649,11 +661,9 @@ bool OSLCompiler::node_skip_input(ShaderNode *node, ShaderInput *input)
void OSLCompiler::add(ShaderNode *node, const char *name, bool isfilepath)
{
- OSL::ShadingSystem *ss = (OSL::ShadingSystem *)shadingsys;
-
/* load filepath */
if (isfilepath) {
- name = ((OSLShaderManager *)manager)->shader_load_filepath(name);
+ name = manager->shader_load_filepath(name);
if (name == NULL)
return;
@@ -665,9 +675,6 @@ void OSLCompiler::add(ShaderNode *node, const char *name, bool isfilepath)
/* checks to untangle graphs */
if (node_skip_input(node, input))
continue;
- /* already has default value assigned */
- else if (input->flags() & SocketType::DEFAULT_LINK_MASK)
- continue;
string param_name = compatible_name(node, input);
const SocketType &socket = input->socket_type;
@@ -731,7 +738,7 @@ void OSLCompiler::add(ShaderNode *node, const char *name, bool isfilepath)
}
/* test if we shader contains specific closures */
- OSLShaderInfo *info = ((OSLShaderManager *)manager)->shader_loaded_info(name);
+ OSLShaderInfo *info = manager->shader_loaded_info(name);
if (current_type == SHADER_TYPE_SURFACE) {
if (info) {
@@ -753,14 +760,8 @@ void OSLCompiler::add(ShaderNode *node, const char *name, bool isfilepath)
else if (current_type == SHADER_TYPE_VOLUME) {
if (node->has_spatial_varying())
current_shader->has_volume_spatial_varying = true;
- }
-
- if (node->has_object_dependency()) {
- current_shader->has_object_dependency = true;
- }
-
- if (node->has_attribute_dependency()) {
- current_shader->has_attribute_dependency = true;
+ if (node->has_attribute_dependency())
+ current_shader->has_volume_attribute_dependency = true;
}
if (node->has_integrator_dependency()) {
@@ -778,7 +779,6 @@ static TypeDesc array_typedesc(TypeDesc typedesc, int arraylength)
void OSLCompiler::parameter(ShaderNode *node, const char *name)
{
- OSL::ShadingSystem *ss = (OSL::ShadingSystem *)shadingsys;
ustring uname = ustring(name);
const SocketType &socket = *(node->type->find_input(uname));
@@ -930,56 +930,47 @@ void OSLCompiler::parameter(ShaderNode *node, const char *name)
void OSLCompiler::parameter(const char *name, float f)
{
- OSL::ShadingSystem *ss = (OSL::ShadingSystem *)shadingsys;
ss->Parameter(name, TypeDesc::TypeFloat, &f);
}
void OSLCompiler::parameter_color(const char *name, float3 f)
{
- OSL::ShadingSystem *ss = (OSL::ShadingSystem *)shadingsys;
ss->Parameter(name, TypeDesc::TypeColor, &f);
}
void OSLCompiler::parameter_point(const char *name, float3 f)
{
- OSL::ShadingSystem *ss = (OSL::ShadingSystem *)shadingsys;
ss->Parameter(name, TypeDesc::TypePoint, &f);
}
void OSLCompiler::parameter_normal(const char *name, float3 f)
{
- OSL::ShadingSystem *ss = (OSL::ShadingSystem *)shadingsys;
ss->Parameter(name, TypeDesc::TypeNormal, &f);
}
void OSLCompiler::parameter_vector(const char *name, float3 f)
{
- OSL::ShadingSystem *ss = (OSL::ShadingSystem *)shadingsys;
ss->Parameter(name, TypeDesc::TypeVector, &f);
}
void OSLCompiler::parameter(const char *name, int f)
{
- OSL::ShadingSystem *ss = (OSL::ShadingSystem *)shadingsys;
ss->Parameter(name, TypeDesc::TypeInt, &f);
}
void OSLCompiler::parameter(const char *name, const char *s)
{
- OSL::ShadingSystem *ss = (OSL::ShadingSystem *)shadingsys;
ss->Parameter(name, TypeDesc::TypeString, &s);
}
void OSLCompiler::parameter(const char *name, ustring s)
{
- OSL::ShadingSystem *ss = (OSL::ShadingSystem *)shadingsys;
const char *str = s.c_str();
ss->Parameter(name, TypeDesc::TypeString, &str);
}
void OSLCompiler::parameter(const char *name, const Transform &tfm)
{
- OSL::ShadingSystem *ss = (OSL::ShadingSystem *)shadingsys;
ProjectionTransform projection(tfm);
projection = projection_transpose(projection);
ss->Parameter(name, TypeDesc::TypeMatrix, (float *)&projection);
@@ -987,7 +978,6 @@ void OSLCompiler::parameter(const char *name, const Transform &tfm)
void OSLCompiler::parameter_array(const char *name, const float f[], int arraylen)
{
- OSL::ShadingSystem *ss = (OSL::ShadingSystem *)shadingsys;
TypeDesc type = TypeDesc::TypeFloat;
type.arraylen = arraylen;
ss->Parameter(name, type, f);
@@ -1004,7 +994,6 @@ void OSLCompiler::parameter_color_array(const char *name, const array<float3> &f
table[i][2] = f[i].z;
}
- OSL::ShadingSystem *ss = (OSL::ShadingSystem *)shadingsys;
TypeDesc type = TypeDesc::TypeColor;
type.arraylen = table.size();
ss->Parameter(name, type, table.data());
@@ -1082,8 +1071,6 @@ void OSLCompiler::generate_nodes(const ShaderNodeSet &nodes)
OSL::ShaderGroupRef OSLCompiler::compile_type(Shader *shader, ShaderGraph *graph, ShaderType type)
{
- OSL::ShadingSystem *ss = (OSL::ShadingSystem *)shadingsys;
-
current_type = type;
OSL::ShaderGroupRef group = ss->ShaderGroupBegin(shader->name.c_str());
@@ -1123,7 +1110,7 @@ OSL::ShaderGroupRef OSLCompiler::compile_type(Shader *shader, ShaderGraph *graph
return group;
}
-void OSLCompiler::compile(Scene *scene, OSLGlobals *og, Shader *shader)
+void OSLCompiler::compile(OSLGlobals *og, Shader *shader)
{
if (shader->need_update) {
ShaderGraph *graph = shader->graph;
@@ -1150,8 +1137,7 @@ void OSLCompiler::compile(Scene *scene, OSLGlobals *og, Shader *shader)
shader->has_displacement = false;
shader->has_surface_spatial_varying = false;
shader->has_volume_spatial_varying = false;
- shader->has_object_dependency = false;
- shader->has_attribute_dependency = false;
+ shader->has_volume_attribute_dependency = false;
shader->has_integrator_dependency = false;
/* generate surface shader */
@@ -1194,6 +1180,35 @@ void OSLCompiler::compile(Scene *scene, OSLGlobals *og, Shader *shader)
og->bump_state.push_back(shader->osl_surface_bump_ref);
}
+void OSLCompiler::parameter_texture(const char *name, ustring filename, ustring colorspace)
+{
+ /* Textured loaded through the OpenImageIO texture cache. For this
+ * case we need to do runtime color space conversion. */
+ OSLTextureHandle *handle = new OSLTextureHandle(OSLTextureHandle::OIIO);
+ handle->processor = ColorSpaceManager::get_processor(colorspace);
+ services->textures.insert(filename, handle);
+ parameter(name, filename);
+}
+
+void OSLCompiler::parameter_texture(const char *name, int svm_slot)
+{
+ /* Texture loaded through SVM image texture system. We generate a unique
+ * name, which ends up being used in OSLRenderServices::get_texture_handle
+ * to get handle again. Note that this name must be unique between multiple
+ * render sessions as the render services are shared. */
+ ustring filename(string_printf("@svm%d", texture_shared_unique_id++).c_str());
+ services->textures.insert(filename, new OSLTextureHandle(OSLTextureHandle::SVM, svm_slot));
+ parameter(name, filename);
+}
+
+void OSLCompiler::parameter_texture_ies(const char *name, int svm_slot)
+{
+ /* IES light textures stored in SVM. */
+ ustring filename(string_printf("@svm%d", texture_shared_unique_id++).c_str());
+ services->textures.insert(filename, new OSLTextureHandle(OSLTextureHandle::IES, svm_slot));
+ parameter(name, filename);
+}
+
#else
void OSLCompiler::add(ShaderNode * /*node*/, const char * /*name*/, bool /*isfilepath*/)
@@ -1248,6 +1263,20 @@ void OSLCompiler::parameter_color_array(const char * /*name*/, const array<float
{
}
+void OSLCompiler::parameter_texture(const char * /* name */,
+ ustring /* filename */,
+ ustring /* colorspace */)
+{
+}
+
+void OSLCompiler::parameter_texture(const char * /* name */, int /* svm_slot */)
+{
+}
+
+void OSLCompiler::parameter_texture_ies(const char * /* name */, int /* svm_slot */)
+{
+}
+
#endif /* WITH_OSL */
CCL_NAMESPACE_END
diff --git a/intern/cycles/render/osl.h b/intern/cycles/render/osl.h
index aec518a6c2b..4dd9f6630f2 100644
--- a/intern/cycles/render/osl.h
+++ b/intern/cycles/render/osl.h
@@ -93,9 +93,10 @@ class OSLShaderManager : public ShaderManager {
OSLShaderInfo *shader_loaded_info(const string &hash);
/* create OSL node using OSLQuery */
- OSLNode *osl_node(const std::string &filepath,
- const std::string &bytecode_hash = "",
- const std::string &bytecode = "");
+ static OSLNode *osl_node(ShaderManager *manager,
+ const std::string &filepath,
+ const std::string &bytecode_hash = "",
+ const std::string &bytecode = "");
protected:
void texture_system_init();
@@ -127,11 +128,13 @@ class OSLShaderManager : public ShaderManager {
class OSLCompiler {
public:
- OSLCompiler(void *manager,
- void *shadingsys,
- ImageManager *image_manager,
- LightManager *light_manager);
- void compile(Scene *scene, OSLGlobals *og, Shader *shader);
+#ifdef WITH_OSL
+ OSLCompiler(OSLShaderManager *manager,
+ OSLRenderServices *services,
+ OSL::ShadingSystem *shadingsys,
+ Scene *scene);
+#endif
+ void compile(OSLGlobals *og, Shader *shader);
void add(ShaderNode *node, const char *name, bool isfilepath = false);
@@ -152,14 +155,17 @@ class OSLCompiler {
void parameter_attribute(const char *name, ustring s);
+ void parameter_texture(const char *name, ustring filename, ustring colorspace);
+ void parameter_texture(const char *name, int svm_slot);
+ void parameter_texture_ies(const char *name, int svm_slot);
+
ShaderType output_type()
{
return current_type;
}
bool background;
- ImageManager *image_manager;
- LightManager *light_manager;
+ Scene *scene;
private:
#ifdef WITH_OSL
@@ -171,12 +177,16 @@ class OSLCompiler {
void find_dependencies(ShaderNodeSet &dependencies, ShaderInput *input);
void generate_nodes(const ShaderNodeSet &nodes);
+
+ OSLShaderManager *manager;
+ OSLRenderServices *services;
+ OSL::ShadingSystem *ss;
#endif
- void *shadingsys;
- void *manager;
ShaderType current_type;
Shader *current_shader;
+
+ static int texture_shared_unique_id;
};
CCL_NAMESPACE_END
diff --git a/intern/cycles/render/particles.cpp b/intern/cycles/render/particles.cpp
index 8335404b197..ec9276eff86 100644
--- a/intern/cycles/render/particles.cpp
+++ b/intern/cycles/render/particles.cpp
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include "device/device.h"
#include "render/particles.h"
+#include "device/device.h"
#include "render/scene.h"
#include "util/util_foreach.h"
diff --git a/intern/cycles/render/scene.cpp b/intern/cycles/render/scene.cpp
index 98fccf406d4..843d4738a95 100644
--- a/intern/cycles/render/scene.cpp
+++ b/intern/cycles/render/scene.cpp
@@ -16,11 +16,11 @@
#include <stdlib.h>
+#include "device/device.h"
#include "render/background.h"
#include "render/bake.h"
#include "render/camera.h"
#include "render/curves.h"
-#include "device/device.h"
#include "render/film.h"
#include "render/integrator.h"
#include "render/light.h"
@@ -41,38 +41,45 @@
CCL_NAMESPACE_BEGIN
DeviceScene::DeviceScene(Device *device)
- : bvh_nodes(device, "__bvh_nodes", MEM_TEXTURE),
- bvh_leaf_nodes(device, "__bvh_leaf_nodes", MEM_TEXTURE),
- object_node(device, "__object_node", MEM_TEXTURE),
- prim_tri_index(device, "__prim_tri_index", MEM_TEXTURE),
- prim_tri_verts(device, "__prim_tri_verts", MEM_TEXTURE),
- prim_type(device, "__prim_type", MEM_TEXTURE),
- prim_visibility(device, "__prim_visibility", MEM_TEXTURE),
- prim_index(device, "__prim_index", MEM_TEXTURE),
- prim_object(device, "__prim_object", MEM_TEXTURE),
- prim_time(device, "__prim_time", MEM_TEXTURE),
- tri_shader(device, "__tri_shader", MEM_TEXTURE),
- tri_vnormal(device, "__tri_vnormal", MEM_TEXTURE),
- tri_vindex(device, "__tri_vindex", MEM_TEXTURE),
- tri_patch(device, "__tri_patch", MEM_TEXTURE),
- tri_patch_uv(device, "__tri_patch_uv", MEM_TEXTURE),
- curves(device, "__curves", MEM_TEXTURE),
- curve_keys(device, "__curve_keys", MEM_TEXTURE),
- patches(device, "__patches", MEM_TEXTURE),
- objects(device, "__objects", MEM_TEXTURE),
- object_motion_pass(device, "__object_motion_pass", MEM_TEXTURE),
- object_motion(device, "__object_motion", MEM_TEXTURE),
- object_flag(device, "__object_flag", MEM_TEXTURE),
- camera_motion(device, "__camera_motion", MEM_TEXTURE),
- attributes_map(device, "__attributes_map", MEM_TEXTURE),
- attributes_float(device, "__attributes_float", MEM_TEXTURE),
- attributes_float2(device, "__attributes_float2", MEM_TEXTURE),
- attributes_float3(device, "__attributes_float3", MEM_TEXTURE),
- attributes_uchar4(device, "__attributes_uchar4", MEM_TEXTURE),
- light_distribution(device, "__light_distribution", MEM_TEXTURE),
- lights(device, "__lights", MEM_TEXTURE),
- light_background_marginal_cdf(device, "__light_background_marginal_cdf", MEM_TEXTURE),
- light_background_conditional_cdf(device, "__light_background_conditional_cdf", MEM_TEXTURE),
+ : bvh_nodes(device, "__bvh_nodes", MEM_GLOBAL),
+ bvh_leaf_nodes(device, "__bvh_leaf_nodes", MEM_GLOBAL),
+ object_node(device, "__object_node", MEM_GLOBAL),
+ prim_tri_index(device, "__prim_tri_index", MEM_GLOBAL),
+ prim_tri_verts(device, "__prim_tri_verts", MEM_GLOBAL),
+ prim_type(device, "__prim_type", MEM_GLOBAL),
+ prim_visibility(device, "__prim_visibility", MEM_GLOBAL),
+ prim_index(device, "__prim_index", MEM_GLOBAL),
+ prim_object(device, "__prim_object", MEM_GLOBAL),
+ prim_time(device, "__prim_time", MEM_GLOBAL),
+ tri_shader(device, "__tri_shader", MEM_GLOBAL),
+ tri_vnormal(device, "__tri_vnormal", MEM_GLOBAL),
+ tri_vindex(device, "__tri_vindex", MEM_GLOBAL),
+ tri_patch(device, "__tri_patch", MEM_GLOBAL),
+ tri_patch_uv(device, "__tri_patch_uv", MEM_GLOBAL),
+ curves(device, "__curves", MEM_GLOBAL),
+ curve_keys(device, "__curve_keys", MEM_GLOBAL),
+ patches(device, "__patches", MEM_GLOBAL),
+ objects(device, "__objects", MEM_GLOBAL),
+ object_motion_pass(device, "__object_motion_pass", MEM_GLOBAL),
+ object_motion(device, "__object_motion", MEM_GLOBAL),
+ object_flag(device, "__object_flag", MEM_GLOBAL),
+ object_volume_step(device, "__object_volume_step", MEM_GLOBAL),
+ camera_motion(device, "__camera_motion", MEM_GLOBAL),
+ attributes_map(device, "__attributes_map", MEM_GLOBAL),
+ attributes_float(device, "__attributes_float", MEM_GLOBAL),
+ attributes_float2(device, "__attributes_float2", MEM_GLOBAL),
+ attributes_float3(device, "__attributes_float3", MEM_GLOBAL),
+ attributes_uchar4(device, "__attributes_uchar4", MEM_GLOBAL),
+ light_distribution(device, "__light_distribution", MEM_GLOBAL),
+ lights(device, "__lights", MEM_GLOBAL),
+ light_background_marginal_cdf(device, "__light_background_marginal_cdf", MEM_GLOBAL),
+ light_background_conditional_cdf(device, "__light_background_conditional_cdf", MEM_GLOBAL),
+ particles(device, "__particles", MEM_GLOBAL),
+ svm_nodes(device, "__svm_nodes", MEM_GLOBAL),
+ shaders(device, "__shaders", MEM_GLOBAL),
+ lookup_table(device, "__lookup_table", MEM_GLOBAL),
+ sample_pattern_lut(device, "__sample_pattern_lut", MEM_GLOBAL),
+ ies_lights(device, "__ies", MEM_GLOBAL)
light_tree_nodes(device, "__light_tree_nodes", MEM_TEXTURE),
light_distribution_to_node(device, "__light_distribution_to_node", MEM_TEXTURE),
lamp_to_distribution(device, "__lamp_to_distribution", MEM_TEXTURE),
@@ -81,18 +88,20 @@ DeviceScene::DeviceScene(Device *device)
light_group_sample_prob(device, "__light_group_sample_prob", MEM_TEXTURE),
leaf_to_first_emitter(device, "__leaf_to_first_emitter", MEM_TEXTURE),
light_tree_leaf_emitters(device, "__light_tree_leaf_emitters", MEM_TEXTURE),
- particles(device, "__particles", MEM_TEXTURE),
- svm_nodes(device, "__svm_nodes", MEM_TEXTURE),
- shaders(device, "__shaders", MEM_TEXTURE),
- lookup_table(device, "__lookup_table", MEM_TEXTURE),
- sobol_directions(device, "__sobol_directions", MEM_TEXTURE),
- ies_lights(device, "__ies", MEM_TEXTURE)
{
memset((void *)&data, 0, sizeof(data));
}
Scene::Scene(const SceneParams &params_, Device *device)
- : name("Scene"), device(device), dscene(device), params(params_)
+ : name("Scene"),
+ default_surface(NULL),
+ default_volume(NULL),
+ default_light(NULL),
+ default_background(NULL),
+ default_empty(NULL),
+ device(device),
+ dscene(device),
+ params(params_)
{
memset((void *)&dscene.data, 0, sizeof(dscene.data));
@@ -102,19 +111,20 @@ Scene::Scene(const SceneParams &params_, Device *device)
film = new Film();
background = new Background();
light_manager = new LightManager();
- mesh_manager = new MeshManager();
+ geometry_manager = new GeometryManager();
object_manager = new ObjectManager();
integrator = new Integrator();
image_manager = new ImageManager(device->info);
particle_system_manager = new ParticleSystemManager();
- curve_system_manager = new CurveSystemManager();
bake_manager = new BakeManager();
/* OSL only works on the CPU */
if (device->info.has_osl)
- shader_manager = ShaderManager::create(this, params.shadingsystem);
+ shader_manager = ShaderManager::create(params.shadingsystem);
else
- shader_manager = ShaderManager::create(this, SHADINGSYSTEM_SVM);
+ shader_manager = ShaderManager::create(SHADINGSYSTEM_SVM);
+
+ shader_manager->add_default(this);
}
Scene::~Scene()
@@ -126,8 +136,8 @@ void Scene::free_memory(bool final)
{
foreach (Shader *s, shaders)
delete s;
- foreach (Mesh *m, meshes)
- delete m;
+ foreach (Geometry *g, geometry)
+ delete g;
foreach (Object *o, objects)
delete o;
foreach (Light *l, lights)
@@ -136,7 +146,7 @@ void Scene::free_memory(bool final)
delete p;
shaders.clear();
- meshes.clear();
+ geometry.clear();
objects.clear();
lights.clear();
particle_systems.clear();
@@ -148,12 +158,11 @@ void Scene::free_memory(bool final)
integrator->device_free(device, &dscene);
object_manager->device_free(device, &dscene);
- mesh_manager->device_free(device, &dscene);
+ geometry_manager->device_free(device, &dscene);
shader_manager->device_free(device, &dscene, this);
light_manager->device_free(device, &dscene);
particle_system_manager->device_free(device, &dscene);
- curve_system_manager->device_free(device, &dscene);
bake_manager->device_free(device, &dscene);
@@ -173,11 +182,10 @@ void Scene::free_memory(bool final)
delete background;
delete integrator;
delete object_manager;
- delete mesh_manager;
+ delete geometry_manager;
delete shader_manager;
delete light_manager;
delete particle_system_manager;
- delete curve_system_manager;
delete image_manager;
delete bake_manager;
}
@@ -219,7 +227,7 @@ void Scene::device_update(Device *device_, Progress &progress)
if (progress.get_cancel() || device->have_error())
return;
- mesh_manager->device_update_preprocess(device, this, progress);
+ geometry_manager->device_update_preprocess(device, this, progress);
if (progress.get_cancel() || device->have_error())
return;
@@ -230,12 +238,6 @@ void Scene::device_update(Device *device_, Progress &progress)
if (progress.get_cancel() || device->have_error())
return;
- progress.set_status("Updating Hair Systems");
- curve_system_manager->device_update(device, &dscene, this, progress);
-
- if (progress.get_cancel() || device->have_error())
- return;
-
progress.set_status("Updating Particle Systems");
particle_system_manager->device_update(device, &dscene, this, progress);
@@ -243,7 +245,7 @@ void Scene::device_update(Device *device_, Progress &progress)
return;
progress.set_status("Updating Meshes");
- mesh_manager->device_update(device, &dscene, this, progress);
+ geometry_manager->device_update(device, &dscene, this, progress);
if (progress.get_cancel() || device->have_error())
return;
@@ -364,10 +366,9 @@ bool Scene::need_update()
bool Scene::need_data_update()
{
return (background->need_update || image_manager->need_update || object_manager->need_update ||
- mesh_manager->need_update || light_manager->need_update || lookup_tables->need_update ||
- integrator->need_update || shader_manager->need_update ||
- particle_system_manager->need_update || curve_system_manager->need_update ||
- bake_manager->need_update || film->need_update);
+ geometry_manager->need_update || light_manager->need_update ||
+ lookup_tables->need_update || integrator->need_update || shader_manager->need_update ||
+ particle_system_manager->need_update || bake_manager->need_update || film->need_update);
}
bool Scene::need_reset()
@@ -387,10 +388,9 @@ void Scene::reset()
background->tag_update(this);
integrator->tag_update(this);
object_manager->tag_update(this);
- mesh_manager->tag_update(this);
+ geometry_manager->tag_update(this);
light_manager->tag_update(this);
particle_system_manager->tag_update(this);
- curve_system_manager->tag_update(this);
}
void Scene::device_free()
@@ -400,7 +400,7 @@ void Scene::device_free()
void Scene::collect_statistics(RenderStats *stats)
{
- mesh_manager->collect_statistics(this, stats);
+ geometry_manager->collect_statistics(this, stats);
image_manager->collect_statistics(stats);
}
diff --git a/intern/cycles/render/scene.h b/intern/cycles/render/scene.h
index e48cec1fd57..721ecbe14ac 100644
--- a/intern/cycles/render/scene.h
+++ b/intern/cycles/render/scene.h
@@ -44,8 +44,8 @@ class Integrator;
class Light;
class LightManager;
class LookupTables;
-class Mesh;
-class MeshManager;
+class Geometry;
+class GeometryManager;
class Object;
class ObjectManager;
class ParticleSystemManager;
@@ -91,6 +91,7 @@ class DeviceScene {
device_vector<Transform> object_motion_pass;
device_vector<DecomposedTransform> object_motion;
device_vector<uint> object_flag;
+ device_vector<float> object_volume_step;
/* cameras */
device_vector<DecomposedTransform> camera_motion;
@@ -127,7 +128,7 @@ class DeviceScene {
device_vector<float> lookup_table;
/* integrator */
- device_vector<uint> sobol_directions;
+ device_vector<uint> sample_pattern_lut;
/* ies lights */
device_vector<float> ies_lights;
@@ -175,9 +176,13 @@ class SceneParams {
bool use_bvh_spatial_split;
bool use_bvh_unaligned_nodes;
int num_bvh_time_steps;
+ int hair_subdivisions;
+ CurveShapeType hair_shape;
bool persistent_data;
int texture_limit;
+ bool background;
+
SceneParams()
{
shadingsystem = SHADINGSYSTEM_SVM;
@@ -186,8 +191,11 @@ class SceneParams {
use_bvh_spatial_split = false;
use_bvh_unaligned_nodes = true;
num_bvh_time_steps = 0;
+ hair_subdivisions = 3;
+ hair_shape = CURVE_RIBBON;
persistent_data = false;
texture_limit = 0;
+ background = true;
}
bool modified(const SceneParams &params)
@@ -197,8 +205,15 @@ class SceneParams {
use_bvh_spatial_split == params.use_bvh_spatial_split &&
use_bvh_unaligned_nodes == params.use_bvh_unaligned_nodes &&
num_bvh_time_steps == params.num_bvh_time_steps &&
+ hair_subdivisions == params.hair_subdivisions && hair_shape == params.hair_shape &&
persistent_data == params.persistent_data && texture_limit == params.texture_limit);
}
+
+ int curve_subdivisions()
+ {
+ /* Matching the tesselation rate limit in Embree. */
+ return clamp(1 << hair_subdivisions, 1, 16);
+ }
};
/* Scene */
@@ -218,7 +233,7 @@ class Scene {
/* data lists */
vector<Object *> objects;
- vector<Mesh *> meshes;
+ vector<Geometry *> geometry;
vector<Shader *> shaders;
vector<Light *> lights;
vector<ParticleSystem *> particle_systems;
@@ -227,14 +242,14 @@ class Scene {
ImageManager *image_manager;
LightManager *light_manager;
ShaderManager *shader_manager;
- MeshManager *mesh_manager;
+ GeometryManager *geometry_manager;
ObjectManager *object_manager;
ParticleSystemManager *particle_system_manager;
- CurveSystemManager *curve_system_manager;
BakeManager *bake_manager;
/* default shaders */
Shader *default_surface;
+ Shader *default_volume;
Shader *default_light;
Shader *default_background;
Shader *default_empty;
diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp
index 29eb779a7d6..c5033359c6b 100644
--- a/intern/cycles/render/session.cpp
+++ b/intern/cycles/render/session.cpp
@@ -14,12 +14,13 @@
* limitations under the License.
*/
-#include <string.h>
#include <limits.h>
+#include <string.h>
+#include "device/device.h"
+#include "render/bake.h"
#include "render/buffers.h"
#include "render/camera.h"
-#include "device/device.h"
#include "render/graph.h"
#include "render/integrator.h"
#include "render/light.h"
@@ -27,7 +28,6 @@
#include "render/object.h"
#include "render/scene.h"
#include "render/session.h"
-#include "render/bake.h"
#include "util/util_foreach.h"
#include "util/util_function.h"
@@ -61,8 +61,10 @@ Session::Session(const SessionParams &params_)
TaskScheduler::init(params.threads);
+ /* Create CPU/GPU devices. */
device = Device::create(params.device, stats, profiler, params.background);
+ /* Create buffers for interactive rendering. */
if (params.background && !params.write_render_cb) {
buffers = NULL;
display = NULL;
@@ -72,6 +74,9 @@ Session::Session(const SessionParams &params_)
display = new DisplayBuffer(device, params.display_buffer_linear);
}
+ /* Validate denoising parameters. */
+ set_denoising(params.denoising);
+
session_thread = NULL;
scene = NULL;
@@ -83,7 +88,7 @@ Session::Session(const SessionParams &params_)
display_outdated = false;
gpu_draw_ready = false;
- gpu_need_tonemap = false;
+ gpu_need_display_buffer_update = false;
pause = false;
kernels_loaded = false;
@@ -97,8 +102,8 @@ Session::~Session()
/* wait for session thread to end */
progress.set_cancel("Exiting");
- gpu_need_tonemap = false;
- gpu_need_tonemap_cond.notify_all();
+ gpu_need_display_buffer_update = false;
+ gpu_need_display_buffer_update_cond.notify_all();
{
thread_scoped_lock pause_lock(pause_mutex);
@@ -110,12 +115,12 @@ Session::~Session()
}
if (params.write_render_cb) {
- /* tonemap and write out image if requested */
+ /* Copy to display buffer and write out image if requested */
delete display;
display = new DisplayBuffer(device, false);
display->reset(buffers->params);
- tonemap(params.samples);
+ copy_to_display_buffer(params.samples);
int w = display->draw_width;
int h = display->draw_height;
@@ -168,8 +173,8 @@ void Session::reset_gpu(BufferParams &buffer_params, int samples)
reset_(buffer_params, samples);
- gpu_need_tonemap = false;
- gpu_need_tonemap_cond.notify_all();
+ gpu_need_display_buffer_update = false;
+ gpu_need_display_buffer_update_cond.notify_all();
pause_cond.notify_all();
}
@@ -183,14 +188,15 @@ bool Session::draw_gpu(BufferParams &buffer_params, DeviceDrawParams &draw_param
if (gpu_draw_ready) {
/* then verify the buffers have the expected size, so we don't
* draw previous results in a resized window */
- if (!buffer_params.modified(display->params)) {
- /* for CUDA we need to do tonemapping still, since we can
- * only access GL buffers from the main thread */
- if (gpu_need_tonemap) {
+ if (buffer_params.width == display->params.width &&
+ buffer_params.height == display->params.height) {
+ /* for CUDA we need to do tone-mapping still, since we can
+ * only access GL buffers from the main thread. */
+ if (gpu_need_display_buffer_update) {
thread_scoped_lock buffers_lock(buffers_mutex);
- tonemap(tile_manager.state.sample);
- gpu_need_tonemap = false;
- gpu_need_tonemap_cond.notify_all();
+ copy_to_display_buffer(tile_manager.state.sample);
+ gpu_need_display_buffer_update = false;
+ gpu_need_display_buffer_update_cond.notify_all();
}
display->draw(device, draw_params);
@@ -211,6 +217,7 @@ void Session::run_gpu()
reset_time = time_dt();
last_update_time = time_dt();
+ last_display_time = last_update_time;
progress.set_render_start_time();
@@ -232,7 +239,7 @@ void Session::run_gpu()
}
/* Don't go in pause mode when image was rendered with preview kernels
- * When feature kernels become available the session will be resetted. */
+ * When feature kernels become available the session will be reset. */
else if (no_tiles && kernel_state == DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL) {
time_sleep(0.1);
}
@@ -285,9 +292,7 @@ void Session::run_gpu()
if (progress.get_cancel())
break;
- }
- if (!no_tiles) {
/* buffers mutex is locked entirely while rendering each
* sample, and released/reacquired on each iteration to allow
* reset and draw in between */
@@ -297,7 +302,9 @@ void Session::run_gpu()
update_status_time();
/* render */
- render();
+ bool delayed_denoise = false;
+ const bool need_denoise = render_need_denoise(delayed_denoise);
+ render(need_denoise);
device->task_wait();
@@ -307,17 +314,17 @@ void Session::run_gpu()
/* update status and timing */
update_status_time();
- gpu_need_tonemap = true;
+ gpu_need_display_buffer_update = !delayed_denoise;
gpu_draw_ready = true;
progress.set_update();
- /* wait for tonemap */
+ /* wait for until display buffer is updated */
if (!params.background) {
- while (gpu_need_tonemap) {
+ while (gpu_need_display_buffer_update) {
if (progress.get_cancel())
break;
- gpu_need_tonemap_cond.wait(buffers_lock);
+ gpu_need_display_buffer_update_cond.wait(buffers_lock);
}
}
@@ -361,7 +368,8 @@ bool Session::draw_cpu(BufferParams &buffer_params, DeviceDrawParams &draw_param
if (display->draw_ready()) {
/* then verify the buffers have the expected size, so we don't
* draw previous results in a resized window */
- if (!buffer_params.modified(display->params)) {
+ if (buffer_params.width == display->params.width &&
+ buffer_params.height == display->params.height) {
display->draw(device, draw_params);
if (display_outdated && (time_dt() - reset_time) > params.text_timeout)
@@ -374,7 +382,7 @@ bool Session::draw_cpu(BufferParams &buffer_params, DeviceDrawParams &draw_param
return false;
}
-bool Session::acquire_tile(Device *tile_device, RenderTile &rtile)
+bool Session::acquire_tile(RenderTile &rtile, Device *tile_device, uint tile_types)
{
if (progress.get_cancel()) {
if (params.progressive_refine == false) {
@@ -389,8 +397,14 @@ bool Session::acquire_tile(Device *tile_device, RenderTile &rtile)
Tile *tile;
int device_num = device->device_number(tile_device);
- if (!tile_manager.next_tile(tile, device_num))
+ while (!tile_manager.next_tile(tile, device_num, tile_types)) {
+ /* Wait for denoising tiles to become available */
+ if ((tile_types & RenderTile::DENOISE) && !progress.get_cancel() && tile_manager.has_tiles()) {
+ denoising_cond.wait(tile_lock);
+ continue;
+ }
return false;
+ }
/* fill render tile */
rtile.x = tile_manager.state.buffer.full_x + tile->x;
@@ -401,7 +415,16 @@ bool Session::acquire_tile(Device *tile_device, RenderTile &rtile)
rtile.num_samples = tile_manager.state.num_samples;
rtile.resolution = tile_manager.state.resolution_divider;
rtile.tile_index = tile->index;
- rtile.task = (tile->state == Tile::DENOISE) ? RenderTile::DENOISE : RenderTile::PATH_TRACE;
+
+ if (tile->state == Tile::DENOISE) {
+ rtile.task = RenderTile::DENOISE;
+ }
+ else if (read_bake_tile_cb) {
+ rtile.task = RenderTile::BAKE;
+ }
+ else {
+ rtile.task = RenderTile::PATH_TRACE;
+ }
tile_lock.unlock();
@@ -415,6 +438,15 @@ bool Session::acquire_tile(Device *tile_device, RenderTile &rtile)
device->map_tile(tile_device, rtile);
+ /* Reset copy state, since buffer contents change after the tile was acquired */
+ buffers->map_neighbor_copied = false;
+
+ /* This hack ensures that the copy in 'MultiDevice::map_neighbor_tiles' accounts
+ * for the buffer resolution divider. */
+ buffers->buffer.data_width = (buffers->params.width * buffers->params.get_passes_size()) /
+ tile_manager.state.resolution_divider;
+ buffers->buffer.data_height = buffers->params.height / tile_manager.state.resolution_divider;
+
return true;
}
@@ -431,17 +463,28 @@ bool Session::acquire_tile(Device *tile_device, RenderTile &rtile)
tile->buffers->reset(buffer_params);
}
+ tile->buffers->map_neighbor_copied = false;
+
tile->buffers->params.get_offset_stride(rtile.offset, rtile.stride);
rtile.buffer = tile->buffers->buffer.device_pointer;
rtile.buffers = tile->buffers;
rtile.sample = tile_manager.state.sample;
- /* this will tag tile as IN PROGRESS in blender-side render pipeline,
- * which is needed to highlight currently rendering tile before first
- * sample was processed for it
- */
- update_tile_sample(rtile);
+ if (read_bake_tile_cb) {
+ /* This will read any passes needed as input for baking. */
+ {
+ thread_scoped_lock tile_lock(tile_mutex);
+ read_bake_tile_cb(rtile);
+ }
+ rtile.buffers->buffer.copy_to_device();
+ }
+ else {
+ /* This will tag tile as IN PROGRESS in blender-side render pipeline,
+ * which is needed to highlight currently rendering tile before first
+ * sample was processed for it. */
+ update_tile_sample(rtile);
+ }
return true;
}
@@ -461,7 +504,7 @@ void Session::update_tile_sample(RenderTile &rtile)
update_status_time();
}
-void Session::release_tile(RenderTile &rtile)
+void Session::release_tile(RenderTile &rtile, const bool need_denoise)
{
thread_scoped_lock tile_lock(tile_mutex);
@@ -469,7 +512,8 @@ void Session::release_tile(RenderTile &rtile)
bool delete_tile;
- if (tile_manager.finish_tile(rtile.tile_index, delete_tile)) {
+ if (tile_manager.finish_tile(rtile.tile_index, need_denoise, delete_tile)) {
+ /* Finished tile pixels write. */
if (write_render_tile_cb && params.progressive_refine == false) {
write_render_tile_cb(rtile);
}
@@ -480,66 +524,99 @@ void Session::release_tile(RenderTile &rtile)
}
}
else {
+ /* In progress tile pixels update. */
if (update_render_tile_cb && params.progressive_refine == false) {
update_render_tile_cb(rtile, false);
}
}
update_status_time();
+
+ /* Notify denoising thread that a tile was finished. */
+ denoising_cond.notify_all();
}
-void Session::map_neighbor_tiles(RenderTile *tiles, Device *tile_device)
+void Session::map_neighbor_tiles(RenderTileNeighbors &neighbors, Device *tile_device)
{
thread_scoped_lock tile_lock(tile_mutex);
- int center_idx = tiles[4].tile_index;
- assert(tile_manager.state.tiles[center_idx].state == Tile::DENOISE);
- BufferParams buffer_params = tile_manager.params;
- int4 image_region = make_int4(buffer_params.full_x,
- buffer_params.full_y,
- buffer_params.full_x + buffer_params.width,
- buffer_params.full_y + buffer_params.height);
-
- for (int dy = -1, i = 0; dy <= 1; dy++) {
- for (int dx = -1; dx <= 1; dx++, i++) {
- int px = tiles[4].x + dx * params.tile_size.x;
- int py = tiles[4].y + dy * params.tile_size.y;
- if (px >= image_region.x && py >= image_region.y && px < image_region.z &&
- py < image_region.w) {
- int tile_index = center_idx + dy * tile_manager.state.tile_stride + dx;
- Tile *tile = &tile_manager.state.tiles[tile_index];
- assert(tile->buffers);
-
- tiles[i].buffer = tile->buffers->buffer.device_pointer;
- tiles[i].x = tile_manager.state.buffer.full_x + tile->x;
- tiles[i].y = tile_manager.state.buffer.full_y + tile->y;
- tiles[i].w = tile->w;
- tiles[i].h = tile->h;
- tiles[i].buffers = tile->buffers;
-
- tile->buffers->params.get_offset_stride(tiles[i].offset, tiles[i].stride);
- }
- else {
- tiles[i].buffer = (device_ptr)NULL;
- tiles[i].buffers = NULL;
- tiles[i].x = clamp(px, image_region.x, image_region.z);
- tiles[i].y = clamp(py, image_region.y, image_region.w);
- tiles[i].w = tiles[i].h = 0;
+ const int4 image_region = make_int4(
+ tile_manager.state.buffer.full_x,
+ tile_manager.state.buffer.full_y,
+ tile_manager.state.buffer.full_x + tile_manager.state.buffer.width,
+ tile_manager.state.buffer.full_y + tile_manager.state.buffer.height);
+
+ RenderTile &center_tile = neighbors.tiles[RenderTileNeighbors::CENTER];
+
+ if (!tile_manager.schedule_denoising) {
+ /* Fix up tile slices with overlap. */
+ if (tile_manager.slice_overlap != 0) {
+ int y = max(center_tile.y - tile_manager.slice_overlap, image_region.y);
+ center_tile.h = min(center_tile.y + center_tile.h + tile_manager.slice_overlap,
+ image_region.w) -
+ y;
+ center_tile.y = y;
+ }
+
+ /* Tiles are not being denoised individually, which means the entire image is processed. */
+ neighbors.set_bounds_from_center();
+ }
+ else {
+ int center_idx = center_tile.tile_index;
+ assert(tile_manager.state.tiles[center_idx].state == Tile::DENOISE);
+
+ for (int dy = -1, i = 0; dy <= 1; dy++) {
+ for (int dx = -1; dx <= 1; dx++, i++) {
+ RenderTile &rtile = neighbors.tiles[i];
+ int nindex = tile_manager.get_neighbor_index(center_idx, i);
+ if (nindex >= 0) {
+ Tile *tile = &tile_manager.state.tiles[nindex];
+
+ rtile.x = image_region.x + tile->x;
+ rtile.y = image_region.y + tile->y;
+ rtile.w = tile->w;
+ rtile.h = tile->h;
+
+ if (buffers) {
+ tile_manager.state.buffer.get_offset_stride(rtile.offset, rtile.stride);
+
+ rtile.buffer = buffers->buffer.device_pointer;
+ rtile.buffers = buffers;
+ }
+ else {
+ assert(tile->buffers);
+ tile->buffers->params.get_offset_stride(rtile.offset, rtile.stride);
+
+ rtile.buffer = tile->buffers->buffer.device_pointer;
+ rtile.buffers = tile->buffers;
+ }
+ }
+ else {
+ int px = center_tile.x + dx * params.tile_size.x;
+ int py = center_tile.y + dy * params.tile_size.y;
+
+ rtile.x = clamp(px, image_region.x, image_region.z);
+ rtile.y = clamp(py, image_region.y, image_region.w);
+ rtile.w = rtile.h = 0;
+
+ rtile.buffer = (device_ptr)NULL;
+ rtile.buffers = NULL;
+ }
}
}
}
- assert(tiles[4].buffers);
- device->map_neighbor_tiles(tile_device, tiles);
+ assert(center_tile.buffers);
+ device->map_neighbor_tiles(tile_device, neighbors);
/* The denoised result is written back to the original tile. */
- tiles[9] = tiles[4];
+ neighbors.target = center_tile;
}
-void Session::unmap_neighbor_tiles(RenderTile *tiles, Device *tile_device)
+void Session::unmap_neighbor_tiles(RenderTileNeighbors &neighbors, Device *tile_device)
{
thread_scoped_lock tile_lock(tile_mutex);
- device->unmap_neighbor_tiles(tile_device, tiles);
+ device->unmap_neighbor_tiles(tile_device, neighbors);
}
void Session::run_cpu()
@@ -547,6 +624,7 @@ void Session::run_cpu()
bool tiles_written = false;
last_update_time = time_dt();
+ last_display_time = last_update_time;
{
/* reset once to start */
@@ -561,7 +639,7 @@ void Session::run_cpu()
while (!progress.get_cancel()) {
/* advance to next tile */
bool no_tiles = !tile_manager.next();
- bool need_tonemap = false;
+ bool need_copy_to_display_buffer = false;
DeviceKernelStatus kernel_state = DEVICE_KERNEL_UNKNOWN;
if (no_tiles) {
@@ -577,7 +655,7 @@ void Session::run_cpu()
}
/* Don't go in pause mode when preview kernels are used
- * When feature kernels become available the session will be resetted. */
+ * When feature kernels become available the session will be reset. */
else if (no_tiles && kernel_state == DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL) {
time_sleep(0.1);
}
@@ -622,11 +700,6 @@ void Session::run_cpu()
}
if (!no_tiles) {
- /* buffers mutex is locked entirely while rendering each
- * sample, and released/reacquired on each iteration to allow
- * reset and draw in between */
- thread_scoped_lock buffers_lock(buffers_mutex);
-
/* update scene */
scoped_timer update_timer;
if (update_scene()) {
@@ -640,17 +713,24 @@ void Session::run_cpu()
if (progress.get_cancel())
break;
+ /* buffers mutex is locked entirely while rendering each
+ * sample, and released/reacquired on each iteration to allow
+ * reset and draw in between */
+ thread_scoped_lock buffers_lock(buffers_mutex);
+
/* update status and timing */
update_status_time();
/* render */
- render();
+ bool delayed_denoise = false;
+ const bool need_denoise = render_need_denoise(delayed_denoise);
+ render(need_denoise);
/* update status and timing */
update_status_time();
if (!params.background)
- need_tonemap = true;
+ need_copy_to_display_buffer = !delayed_denoise;
if (!device->error_message().empty())
progress.set_error(device->error_message());
@@ -668,10 +748,10 @@ void Session::run_cpu()
delayed_reset.do_reset = false;
reset_(delayed_reset.params, delayed_reset.samples);
}
- else if (need_tonemap) {
- /* tonemap only if we do not reset, we don't we don't
+ else if (need_copy_to_display_buffer) {
+ /* Only copy to display_buffer if we do not reset, we don't
* want to show the result of an incomplete sample */
- tonemap(tile_manager.state.sample);
+ copy_to_display_buffer(tile_manager.state.sample);
}
if (!device->error_message().empty())
@@ -700,26 +780,30 @@ DeviceRequestedFeatures Session::get_requested_device_features()
*/
bool use_motion = scene->need_motion() == Scene::MotionType::MOTION_BLUR;
requested_features.use_hair = false;
+ requested_features.use_hair_thick = (scene->params.hair_shape == CURVE_THICK);
requested_features.use_object_motion = false;
requested_features.use_camera_motion = use_motion && scene->camera->use_motion();
foreach (Object *object, scene->objects) {
- Mesh *mesh = object->mesh;
- if (mesh->num_curves()) {
- requested_features.use_hair = true;
- }
+ Geometry *geom = object->geometry;
if (use_motion) {
- requested_features.use_object_motion |= object->use_motion() | mesh->use_motion_blur;
- requested_features.use_camera_motion |= mesh->use_motion_blur;
+ requested_features.use_object_motion |= object->use_motion() | geom->use_motion_blur;
+ requested_features.use_camera_motion |= geom->use_motion_blur;
}
-#ifdef WITH_OPENSUBDIV
- if (mesh->subdivision_type != Mesh::SUBDIVISION_NONE) {
- requested_features.use_patch_evaluation = true;
- }
-#endif
if (object->is_shadow_catcher) {
requested_features.use_shadow_tricks = true;
}
- requested_features.use_true_displacement |= mesh->has_true_displacement();
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+#ifdef WITH_OPENSUBDIV
+ if (mesh->subdivision_type != Mesh::SUBDIVISION_NONE) {
+ requested_features.use_patch_evaluation = true;
+ }
+#endif
+ requested_features.use_true_displacement |= mesh->has_true_displacement();
+ }
+ else if (geom->type == Geometry::HAIR) {
+ requested_features.use_hair = true;
+ }
}
requested_features.use_background_light = scene->light_manager->has_background_light(scene);
@@ -728,7 +812,7 @@ DeviceRequestedFeatures Session::get_requested_device_features()
requested_features.use_baking = bake_manager->get_baking();
requested_features.use_integrator_branched = (scene->integrator->method ==
Integrator::BRANCHED_PATH);
- if (params.run_denoising) {
+ if (params.denoising.use || params.denoising.store_passes) {
requested_features.use_denoising = true;
requested_features.use_shadow_tricks = true;
}
@@ -757,7 +841,7 @@ bool Session::load_kernels(bool lock_scene)
message = "Failed loading render kernel, see console for errors";
progress.set_error(message);
- progress.set_status("Error", message);
+ progress.set_status(message);
progress.set_update();
return false;
}
@@ -796,7 +880,7 @@ void Session::run()
/* progress update */
if (progress.get_cancel())
- progress.set_status("Cancel", progress.get_cancel_message());
+ progress.set_status(progress.get_cancel_message());
else
progress.set_update();
}
@@ -844,9 +928,6 @@ void Session::set_samples(int samples)
params.samples = samples;
tile_manager.set_samples(samples);
- {
- thread_scoped_lock pause_lock(pause_mutex);
- }
pause_cond.notify_all();
}
}
@@ -868,6 +949,40 @@ void Session::set_pause(bool pause_)
pause_cond.notify_all();
}
+void Session::set_denoising(const DenoiseParams &denoising)
+{
+ bool need_denoise = denoising.need_denoising_task();
+
+ /* Lock buffers so no denoising operation is triggered while the settings are changed here. */
+ thread_scoped_lock buffers_lock(buffers_mutex);
+ params.denoising = denoising;
+
+ if (!(params.device.denoisers & denoising.type)) {
+ if (need_denoise) {
+ progress.set_error("Denoiser type not supported by compute device");
+ }
+
+ params.denoising.use = false;
+ need_denoise = false;
+ }
+
+ // TODO(pmours): Query the required overlap value for denoising from the device?
+ tile_manager.slice_overlap = need_denoise && !params.background ? 64 : 0;
+
+ /* Schedule per tile denoising for final renders if we are either denoising or
+ * need prefiltered passes for the native denoiser. */
+ tile_manager.schedule_denoising = need_denoise && !buffers;
+}
+
+void Session::set_denoising_start_sample(int sample)
+{
+ if (sample != params.denoising.start_sample) {
+ params.denoising.start_sample = sample;
+
+ pause_cond.notify_all();
+ }
+}
+
void Session::wait()
{
if (session_thread) {
@@ -890,7 +1005,7 @@ bool Session::update_scene()
int height = tile_manager.state.buffer.full_height;
int resolution = tile_manager.state.resolution_divider;
- if (width != cam->width || height != cam->height) {
+ if (width != cam->width || height != cam->height || resolution != cam->resolution) {
cam->width = width;
cam->height = height;
cam->resolution = resolution;
@@ -902,7 +1017,7 @@ bool Session::update_scene()
Integrator *integrator = scene->integrator;
BakeManager *bake_manager = scene->bake_manager;
- if (integrator->sampling_pattern == SAMPLING_PATTERN_CMJ || bake_manager->get_baking()) {
+ if (integrator->sampling_pattern != SAMPLING_PATTERN_SOBOL || bake_manager->get_baking()) {
int aa_samples = tile_manager.num_samples;
if (aa_samples != integrator->aa_samples) {
@@ -913,7 +1028,8 @@ bool Session::update_scene()
/* update scene */
if (scene->need_update()) {
- bool new_kernels_needed = load_kernels(false);
+ /* Updated used shader tag so we know which features are need for the kernel. */
+ scene->shader_manager->update_shaders_used(scene);
/* Update max_closures. */
KernelIntegrator *kintegrator = &scene->dscene.data.integrator;
@@ -925,6 +1041,9 @@ bool Session::update_scene()
kintegrator->max_closures = MAX_CLOSURE;
}
+ /* Load render kernels, before device update where we upload data to the GPU. */
+ bool new_kernels_needed = load_kernels(false);
+
progress.set_status("Updating Scene");
MEM_GUARDED_CALL(&progress, scene->device_update, device, progress);
@@ -978,14 +1097,14 @@ void Session::update_status_time(bool show_pause, bool show_done)
*/
substatus += string_printf(", Sample %d/%d", progress.get_current_sample(), num_samples);
}
- if (params.full_denoising) {
+ if (params.denoising.use && params.denoising.type != DENOISER_OPENIMAGEDENOISE) {
substatus += string_printf(", Denoised %d tiles", progress.get_denoised_tiles());
}
- else if (params.run_denoising) {
+ else if (params.denoising.store_passes && params.denoising.type == DENOISER_NLM) {
substatus += string_printf(", Prefiltered %d tiles", progress.get_denoised_tiles());
}
}
- else if (tile_manager.num_samples == INT_MAX)
+ else if (tile_manager.num_samples == Integrator::MAX_SAMPLES)
substatus = string_printf("Path Tracing Sample %d", progressive_sample + 1);
else
substatus = string_printf("Path Tracing Sample %d/%d", progressive_sample + 1, num_samples);
@@ -1005,18 +1124,59 @@ void Session::update_status_time(bool show_pause, bool show_done)
progress.set_status(status, substatus);
}
-void Session::render()
+bool Session::render_need_denoise(bool &delayed)
+{
+ delayed = false;
+
+ /* Not supported yet for baking. */
+ if (read_bake_tile_cb) {
+ return false;
+ }
+
+ /* Denoising enabled? */
+ if (!params.denoising.need_denoising_task()) {
+ return false;
+ }
+
+ if (params.background) {
+ /* Background render, only denoise when rendering the last sample. */
+ return tile_manager.done();
+ }
+
+ /* Viewport render. */
+
+ /* It can happen that denoising was already enabled, but the scene still needs an update. */
+ if (scene->film->need_update || !scene->film->denoising_data_offset) {
+ return false;
+ }
+
+ /* Do not denoise until the sample at which denoising should start is reached. */
+ if (tile_manager.state.sample < min(params.denoising.start_sample, params.samples - 1)) {
+ return false;
+ }
+
+ /* Avoid excessive denoising in viewport after reaching a certain amount of samples. */
+ delayed = (tile_manager.state.sample >= 20 &&
+ (time_dt() - last_display_time) < params.progressive_update_timeout);
+ return !delayed;
+}
+
+void Session::render(bool need_denoise)
{
- /* Clear buffers. */
if (buffers && tile_manager.state.sample == tile_manager.range_start_sample) {
+ /* Clear buffers. */
buffers->zero();
}
+ if (tile_manager.state.buffer.width == 0 || tile_manager.state.buffer.height == 0) {
+ return; /* Avoid empty launches. */
+ }
+
/* Add path trace task. */
DeviceTask task(DeviceTask::RENDER);
- task.acquire_tile = function_bind(&Session::acquire_tile, this, _1, _2);
- task.release_tile = function_bind(&Session::release_tile, this, _1);
+ task.acquire_tile = function_bind(&Session::acquire_tile, this, _2, _1, _3);
+ task.release_tile = function_bind(&Session::release_tile, this, _1, need_denoise);
task.map_neighbor_tiles = function_bind(&Session::map_neighbor_tiles, this, _1, _2);
task.unmap_neighbor_tiles = function_bind(&Session::unmap_neighbor_tiles, this, _1, _2);
task.get_cancel = function_bind(&Progress::get_cancel, &this->progress);
@@ -1024,29 +1184,56 @@ void Session::render()
task.update_progress_sample = function_bind(&Progress::add_samples, &this->progress, _1, _2);
task.need_finish_queue = params.progressive_refine;
task.integrator_branched = scene->integrator->method == Integrator::BRANCHED_PATH;
- task.requested_tile_size = params.tile_size;
- task.passes_size = tile_manager.params.get_passes_size();
- if (params.run_denoising) {
+ task.adaptive_sampling.use = (scene->integrator->sampling_pattern == SAMPLING_PATTERN_PMJ) &&
+ scene->dscene.data.film.pass_adaptive_aux_buffer;
+ task.adaptive_sampling.min_samples = scene->dscene.data.integrator.adaptive_min_samples;
+ task.adaptive_sampling.adaptive_step = scene->dscene.data.integrator.adaptive_step;
+
+ /* Acquire render tiles by default. */
+ task.tile_types = RenderTile::PATH_TRACE;
+
+ if (need_denoise) {
task.denoising = params.denoising;
- assert(!scene->film->need_update);
task.pass_stride = scene->film->pass_stride;
task.target_pass_stride = task.pass_stride;
task.pass_denoising_data = scene->film->denoising_data_offset;
task.pass_denoising_clean = scene->film->denoising_clean_offset;
task.denoising_from_render = true;
- task.denoising_do_filter = params.full_denoising;
- task.denoising_write_passes = params.write_denoising_passes;
+
+ if (tile_manager.schedule_denoising) {
+ /* Acquire denoising tiles during rendering. */
+ task.tile_types |= RenderTile::DENOISE;
+ }
+ else {
+ assert(buffers);
+
+ /* Schedule rendering and wait for it to finish. */
+ device->task_add(task);
+ device->task_wait();
+
+ /* Then run denoising on the whole image at once. */
+ task.type = DeviceTask::DENOISE_BUFFER;
+ task.x = tile_manager.state.buffer.full_x;
+ task.y = tile_manager.state.buffer.full_y;
+ task.w = tile_manager.state.buffer.width;
+ task.h = tile_manager.state.buffer.height;
+ task.buffer = buffers->buffer.device_pointer;
+ task.sample = tile_manager.state.sample;
+ task.num_samples = tile_manager.state.num_samples;
+ tile_manager.state.buffer.get_offset_stride(task.offset, task.stride);
+ task.buffers = buffers;
+ }
}
device->task_add(task);
}
-void Session::tonemap(int sample)
+void Session::copy_to_display_buffer(int sample)
{
- /* add tonemap task */
+ /* add film conversion task */
DeviceTask task(DeviceTask::FILM_CONVERT);
task.x = tile_manager.state.buffer.full_x;
@@ -1065,6 +1252,8 @@ void Session::tonemap(int sample)
/* set display to new size */
display->draw_set(task.w, task.h);
+
+ last_display_time = time_dt();
}
display_outdated = false;
@@ -1142,8 +1331,11 @@ int Session::get_max_closure_count()
int max_closures = 0;
for (int i = 0; i < scene->shaders.size(); i++) {
- int num_closures = scene->shaders[i]->graph->get_num_closures();
- max_closures = max(max_closures, num_closures);
+ Shader *shader = scene->shaders[i];
+ if (shader->used) {
+ int num_closures = shader->graph->get_num_closures();
+ max_closures = max(max_closures, num_closures);
+ }
}
max_closure_global = max(max_closure_global, max_closures);
diff --git a/intern/cycles/render/session.h b/intern/cycles/render/session.h
index 60d8f7a8b14..e3ac054ead3 100644
--- a/intern/cycles/render/session.h
+++ b/intern/cycles/render/session.h
@@ -17,8 +17,8 @@
#ifndef __SESSION_H__
#define __SESSION_H__
-#include "render/buffers.h"
#include "device/device.h"
+#include "render/buffers.h"
#include "render/shader.h"
#include "render/stats.h"
#include "render/tile.h"
@@ -53,16 +53,15 @@ class SessionParams {
int2 tile_size;
TileOrder tile_order;
int start_resolution;
+ int denoising_start_sample;
int pixel_size;
int threads;
+ bool adaptive_sampling;
bool use_profiling;
bool display_buffer_linear;
- bool run_denoising;
- bool write_denoising_passes;
- bool full_denoising;
DenoiseParams denoising;
double cancel_timeout;
@@ -84,15 +83,13 @@ class SessionParams {
samples = 1024;
tile_size = make_int2(64, 64);
start_resolution = INT_MAX;
+ denoising_start_sample = 0;
pixel_size = 1;
threads = 0;
+ adaptive_sampling = false;
use_profiling = false;
- run_denoising = false;
- write_denoising_passes = false;
- full_denoising = false;
-
display_buffer_linear = false;
cancel_timeout = 0.1;
@@ -107,17 +104,20 @@ class SessionParams {
bool modified(const SessionParams &params)
{
return !(device == params.device && background == params.background &&
- progressive_refine == params.progressive_refine
- /* && samples == params.samples */
- && progressive == params.progressive && experimental == params.experimental &&
+ progressive_refine == params.progressive_refine &&
+ /* samples == params.samples && denoising_start_sample ==
+ params.denoising_start_sample && */
+ progressive == params.progressive && experimental == params.experimental &&
tile_size == params.tile_size && start_resolution == params.start_resolution &&
pixel_size == params.pixel_size && threads == params.threads &&
+ adaptive_sampling == params.adaptive_sampling &&
use_profiling == params.use_profiling &&
display_buffer_linear == params.display_buffer_linear &&
cancel_timeout == params.cancel_timeout && reset_timeout == params.reset_timeout &&
text_timeout == params.text_timeout &&
progressive_update_timeout == params.progressive_update_timeout &&
- tile_order == params.tile_order && shadingsystem == params.shadingsystem);
+ tile_order == params.tile_order && shadingsystem == params.shadingsystem &&
+ denoising.type == params.denoising.type);
}
};
@@ -140,6 +140,7 @@ class Session {
function<void(RenderTile &)> write_render_tile_cb;
function<void(RenderTile &, bool)> update_render_tile_cb;
+ function<void(RenderTile &)> read_bake_tile_cb;
explicit Session(const SessionParams &params);
~Session();
@@ -150,8 +151,10 @@ class Session {
bool ready_to_reset();
void reset(BufferParams &params, int samples);
- void set_samples(int samples);
void set_pause(bool pause);
+ void set_samples(int samples);
+ void set_denoising(const DenoiseParams &denoising);
+ void set_denoising_start_sample(int sample);
bool update_scene();
bool load_kernels(bool lock_scene = true);
@@ -176,8 +179,9 @@ class Session {
void update_status_time(bool show_pause = false, bool show_done = false);
- void tonemap(int sample);
- void render();
+ void render(bool use_denoise);
+ void copy_to_display_buffer(int sample);
+
void reset_(BufferParams &params, int samples);
void run_cpu();
@@ -188,12 +192,14 @@ class Session {
bool draw_gpu(BufferParams &params, DeviceDrawParams &draw_params);
void reset_gpu(BufferParams &params, int samples);
- bool acquire_tile(Device *tile_device, RenderTile &tile);
+ bool render_need_denoise(bool &delayed);
+
+ bool acquire_tile(RenderTile &tile, Device *tile_device, uint tile_types);
void update_tile_sample(RenderTile &tile);
- void release_tile(RenderTile &tile);
+ void release_tile(RenderTile &tile, const bool need_denoise);
- void map_neighbor_tiles(RenderTile *tiles, Device *tile_device);
- void unmap_neighbor_tiles(RenderTile *tiles, Device *tile_device);
+ void map_neighbor_tiles(RenderTileNeighbors &neighbors, Device *tile_device);
+ void unmap_neighbor_tiles(RenderTileNeighbors &neighbors, Device *tile_device);
bool device_use_gl;
@@ -202,8 +208,8 @@ class Session {
volatile bool display_outdated;
volatile bool gpu_draw_ready;
- volatile bool gpu_need_tonemap;
- thread_condition_variable gpu_need_tonemap_cond;
+ volatile bool gpu_need_display_buffer_update;
+ thread_condition_variable gpu_need_display_buffer_update_cond;
bool pause;
thread_condition_variable pause_cond;
@@ -211,14 +217,16 @@ class Session {
thread_mutex tile_mutex;
thread_mutex buffers_mutex;
thread_mutex display_mutex;
+ thread_condition_variable denoising_cond;
bool kernels_loaded;
DeviceRequestedFeatures loaded_kernel_features;
double reset_time;
+ double last_update_time;
+ double last_display_time;
/* progressive refine */
- double last_update_time;
bool update_progressive_refine(bool cancel);
DeviceRequestedFeatures get_requested_device_features();
diff --git a/intern/cycles/render/shader.cpp b/intern/cycles/render/shader.cpp
index ac3303cbfeb..1120d909e98 100644
--- a/intern/cycles/render/shader.cpp
+++ b/intern/cycles/render/shader.cpp
@@ -14,9 +14,11 @@
* limitations under the License.
*/
+#include "device/device.h"
+
#include "render/background.h"
#include "render/camera.h"
-#include "device/device.h"
+#include "render/colorspace.h"
#include "render/graph.h"
#include "render/integrator.h"
#include "render/light.h"
@@ -31,6 +33,7 @@
#include "util/util_foreach.h"
#include "util/util_murmurhash.h"
+#include "util/util_task.h"
#ifdef WITH_OCIO
# include <OpenColorIO/OpenColorIO.h>
@@ -166,7 +169,7 @@ NODE_DEFINE(Shader)
SOCKET_ENUM(volume_sampling_method,
"Volume Sampling Method",
volume_sampling_method_enum,
- VOLUME_SAMPLING_DISTANCE);
+ VOLUME_SAMPLING_MULTIPLE_IMPORTANCE);
static NodeEnum volume_interpolation_method_enum;
volume_interpolation_method_enum.insert("linear", VOLUME_INTERPOLATION_LINEAR);
@@ -176,6 +179,8 @@ NODE_DEFINE(Shader)
volume_interpolation_method_enum,
VOLUME_INTERPOLATION_LINEAR);
+ SOCKET_FLOAT(volume_step_rate, "Volume Step Rate", 1.0f);
+
static NodeEnum displacement_method_enum;
displacement_method_enum.insert("bump", DISPLACE_BUMP);
displacement_method_enum.insert("true", DISPLACE_TRUE);
@@ -201,10 +206,10 @@ Shader::Shader() : Node(node_type)
has_bssrdf_bump = false;
has_surface_spatial_varying = false;
has_volume_spatial_varying = false;
- has_object_dependency = false;
- has_attribute_dependency = false;
+ has_volume_attribute_dependency = false;
has_integrator_dependency = false;
has_volume_connected = false;
+ prev_volume_step_rate = 0.0f;
displacement_method = DISPLACE_BUMP;
@@ -212,8 +217,7 @@ Shader::Shader() : Node(node_type)
used = false;
need_update = true;
- need_update_mesh = true;
- need_sync_object = false;
+ need_update_geometry = true;
}
Shader::~Shader()
@@ -223,6 +227,13 @@ Shader::~Shader()
bool Shader::is_constant_emission(float3 *emission)
{
+ /* If the shader has AOVs, they need to be evaluated, so we can't skip the shader. */
+ foreach (ShaderNode *node, graph->nodes) {
+ if (node->special_type == SHADER_SPECIAL_TYPE_OUTPUT_AOV) {
+ return false;
+ }
+ }
+
ShaderInput *surf = graph->output()->input("Surface");
if (surf->link == NULL) {
@@ -279,7 +290,7 @@ void Shader::set_graph(ShaderGraph *graph_)
const char *new_hash = (graph_) ? graph_->displacement_hash.c_str() : "";
if (strcmp(old_hash, new_hash) != 0) {
- need_update_mesh = true;
+ need_update_geometry = true;
}
}
@@ -308,8 +319,11 @@ void Shader::tag_update(Scene *scene)
* has use_mis set to false. We are quite close to release now, so
* better to be safe.
*/
- if (this == scene->default_background && scene->light_manager->has_background_light(scene)) {
- scene->light_manager->need_update = true;
+ if (this == scene->background->get_shader(scene)) {
+ scene->light_manager->need_update_background = true;
+ if (scene->light_manager->has_background_light(scene)) {
+ scene->light_manager->need_update = true;
+ }
}
/* quick detection of which kind of shaders we have to avoid loading
@@ -337,15 +351,16 @@ void Shader::tag_update(Scene *scene)
}
/* compare if the attributes changed, mesh manager will check
- * need_update_mesh, update the relevant meshes and clear it. */
+ * need_update_geometry, update the relevant meshes and clear it. */
if (attributes.modified(prev_attributes)) {
- need_update_mesh = true;
- scene->mesh_manager->need_update = true;
+ need_update_geometry = true;
+ scene->geometry_manager->need_update = true;
}
- if (has_volume != prev_has_volume) {
- scene->mesh_manager->need_flags_update = true;
+ if (has_volume != prev_has_volume || volume_step_rate != prev_volume_step_rate) {
+ scene->geometry_manager->need_flags_update = true;
scene->object_manager->need_flags_update = true;
+ prev_volume_step_rate = volume_step_rate;
}
}
@@ -405,7 +420,7 @@ ShaderManager::~ShaderManager()
{
}
-ShaderManager *ShaderManager::create(Scene *scene, int shadingsystem)
+ShaderManager *ShaderManager::create(int shadingsystem)
{
ShaderManager *manager;
@@ -421,8 +436,6 @@ ShaderManager *ShaderManager::create(Scene *scene, int shadingsystem)
manager = new SVMShaderManager();
}
- add_default(scene);
-
return manager;
}
@@ -461,8 +474,12 @@ int ShaderManager::get_shader_id(Shader *shader, bool smooth)
return id;
}
-void ShaderManager::device_update_shaders_used(Scene *scene)
+void ShaderManager::update_shaders_used(Scene *scene)
{
+ if (!need_update) {
+ return;
+ }
+
/* figure out which shaders are in use, so SVM/OSL can skip compiling them
* for speed and avoid loading image textures into memory */
uint id = 0;
@@ -479,8 +496,8 @@ void ShaderManager::device_update_shaders_used(Scene *scene)
if (scene->background->shader)
scene->background->shader->used = true;
- foreach (Mesh *mesh, scene->meshes)
- foreach (Shader *shader, mesh->used_shaders)
+ foreach (Geometry *geom, scene->geometry)
+ foreach (Shader *shader, geom->used_shaders)
shader->used = true;
foreach (Light *light, scene->lights)
@@ -521,10 +538,12 @@ void ShaderManager::device_update_common(Device *device,
/* in this case we can assume transparent surface */
if (shader->has_volume_connected && !shader->has_surface)
flag |= SD_HAS_ONLY_VOLUME;
- if (shader->heterogeneous_volume && shader->has_volume_spatial_varying)
- flag |= SD_HETEROGENEOUS_VOLUME;
- if (shader->has_attribute_dependency)
- flag |= SD_NEED_ATTRIBUTES;
+ if (shader->has_volume) {
+ if (shader->heterogeneous_volume && shader->has_volume_spatial_varying)
+ flag |= SD_HETEROGENEOUS_VOLUME;
+ }
+ if (shader->has_volume_attribute_dependency)
+ flag |= SD_NEED_VOLUME_ATTRIBUTES;
if (shader->has_bssrdf_bump)
flag |= SD_HAS_BSSRDF_BUMP;
if (device->info.has_volume_decoupled) {
@@ -613,9 +632,27 @@ void ShaderManager::add_default(Scene *scene)
Shader *shader = new Shader();
shader->name = "default_surface";
- shader->graph = graph;
+ shader->set_graph(graph);
scene->shaders.push_back(shader);
scene->default_surface = shader;
+ shader->tag_update(scene);
+ }
+
+ /* default volume */
+ {
+ ShaderGraph *graph = new ShaderGraph();
+
+ PrincipledVolumeNode *principled = new PrincipledVolumeNode();
+ graph->add(principled);
+
+ graph->connect(principled->output("Volume"), graph->output()->input("Volume"));
+
+ Shader *shader = new Shader();
+ shader->name = "default_volume";
+ shader->set_graph(graph);
+ scene->shaders.push_back(shader);
+ scene->default_volume = shader;
+ shader->tag_update(scene);
}
/* default light */
@@ -631,9 +668,10 @@ void ShaderManager::add_default(Scene *scene)
Shader *shader = new Shader();
shader->name = "default_light";
- shader->graph = graph;
+ shader->set_graph(graph);
scene->shaders.push_back(shader);
scene->default_light = shader;
+ shader->tag_update(scene);
}
/* default background */
@@ -642,9 +680,10 @@ void ShaderManager::add_default(Scene *scene)
Shader *shader = new Shader();
shader->name = "default_background";
- shader->graph = graph;
+ shader->set_graph(graph);
scene->shaders.push_back(shader);
scene->default_background = shader;
+ shader->tag_update(scene);
}
/* default empty */
@@ -653,9 +692,10 @@ void ShaderManager::add_default(Scene *scene)
Shader *shader = new Shader();
shader->name = "default_empty";
- shader->graph = graph;
+ shader->set_graph(graph);
scene->shaders.push_back(shader);
scene->default_empty = shader;
+ shader->tag_update(scene);
}
}
@@ -694,6 +734,10 @@ void ShaderManager::get_requested_features(Scene *scene,
requested_features->nodes_features = 0;
for (int i = 0; i < scene->shaders.size(); i++) {
Shader *shader = scene->shaders[i];
+ if (!shader->used) {
+ continue;
+ }
+
/* Gather requested features from all the nodes from the graph nodes. */
get_requested_graph_features(shader->graph, requested_features);
ShaderNode *output_node = shader->graph->output();
@@ -701,6 +745,8 @@ void ShaderManager::get_requested_features(Scene *scene,
requested_features->nodes_features |= NODE_FEATURE_BUMP;
if (shader->displacement_method == DISPLACE_BOTH) {
requested_features->nodes_features |= NODE_FEATURE_BUMP_STATE;
+ requested_features->max_nodes_group = max(requested_features->max_nodes_group,
+ NODE_GROUP_LEVEL_1);
}
}
/* On top of volume nodes, also check if we need volume sampling because
@@ -717,6 +763,8 @@ void ShaderManager::free_memory()
#ifdef WITH_OSL
OSLShaderManager::free_memory();
#endif
+
+ ColorSpaceManager::free_memory();
}
float ShaderManager::linear_rgb_to_gray(float3 c)
diff --git a/intern/cycles/render/shader.h b/intern/cycles/render/shader.h
index 600b0cc59d4..993b467b396 100644
--- a/intern/cycles/render/shader.h
+++ b/intern/cycles/render/shader.h
@@ -23,8 +23,8 @@
# include <OSL/oslexec.h>
#endif
-#include "render/attribute.h"
#include "kernel/kernel_types.h"
+#include "render/attribute.h"
#include "graph/node.h"
@@ -92,11 +92,12 @@ class Shader : public Node {
bool heterogeneous_volume;
VolumeSampling volume_sampling_method;
int volume_interpolation_method;
+ float volume_step_rate;
+ float prev_volume_step_rate;
/* synchronization */
bool need_update;
- bool need_update_mesh;
- bool need_sync_object;
+ bool need_update_geometry;
/* If the shader has only volume components, the surface is assumed to
* be transparent.
@@ -118,8 +119,7 @@ class Shader : public Node {
bool has_bssrdf_bump;
bool has_surface_spatial_varying;
bool has_volume_spatial_varying;
- bool has_object_dependency;
- bool has_attribute_dependency;
+ bool has_volume_attribute_dependency;
bool has_integrator_dependency;
/* displacement */
@@ -143,8 +143,10 @@ class Shader : public Node {
Shader();
~Shader();
- /* Checks whether the shader consists of just a emission node with fixed inputs that's connected directly to the output.
- * If yes, it sets the content of emission to the constant value (color * strength), which is then used for speeding up light evaluation. */
+ /* Checks whether the shader consists of just a emission node with fixed inputs that's connected
+ * directly to the output.
+ * If yes, it sets the content of emission to the constant value (color * strength), which is
+ * then used for speeding up light evaluation. */
bool is_constant_emission(float3 *emission);
void set_graph(ShaderGraph *graph);
@@ -161,7 +163,7 @@ class ShaderManager {
public:
bool need_update;
- static ShaderManager *create(Scene *scene, int shadingsystem);
+ static ShaderManager *create(int shadingsystem);
virtual ~ShaderManager();
virtual void reset(Scene *scene) = 0;
@@ -178,7 +180,6 @@ class ShaderManager {
Progress &progress) = 0;
virtual void device_free(Device *device, DeviceScene *dscene, Scene *scene) = 0;
- void device_update_shaders_used(Scene *scene);
void device_update_common(Device *device, DeviceScene *dscene, Scene *scene, Progress &progress);
void device_free_common(Device *device, DeviceScene *dscene, Scene *scene);
@@ -194,6 +195,7 @@ class ShaderManager {
static void add_default(Scene *scene);
/* Selective nodes compilation. */
+ void update_shaders_used(Scene *scene);
void get_requested_features(Scene *scene, DeviceRequestedFeatures *requested_features);
static void free_memory();
diff --git a/intern/cycles/render/sobol.cpp b/intern/cycles/render/sobol.cpp
index 487599476d4..c821249b239 100644
--- a/intern/cycles/render/sobol.cpp
+++ b/intern/cycles/render/sobol.cpp
@@ -54,15 +54,15 @@ CCL_NAMESPACE_BEGIN
#define SOBOL_MAX_NUMBER 32
typedef struct SobolDirectionNumbers {
- uint d, s, a;
- uint m[SOBOL_MAX_NUMBER];
+ uint d, s, a;
+ uint m[SOBOL_MAX_NUMBER];
} SobolDirectionNumbers;
/* Note: this file is skipped by clang-format. */
/* Keep simple alignment. */
/* clang-format off */
-static SobolDirectionNumbers SOBOL_NUMBERS[SOBOL_MAX_DIMENSIONS - 1] = {
+static const SobolDirectionNumbers SOBOL_NUMBERS[SOBOL_MAX_DIMENSIONS - 1] = {
{2, 1, 0, {1}},
{3, 2, 1, {1, 3}},
{4, 3, 1, {1, 3, 1}},
@@ -21268,40 +21268,40 @@ static SobolDirectionNumbers SOBOL_NUMBERS[SOBOL_MAX_DIMENSIONS - 1] = {
void sobol_generate_direction_vectors(uint vectors[][SOBOL_BITS], int dimensions)
{
- assert(dimensions <= SOBOL_MAX_DIMENSIONS);
+ assert(dimensions <= SOBOL_MAX_DIMENSIONS);
- const uint L = SOBOL_BITS;
+ const uint L = SOBOL_BITS;
- /* first dimension is exception */
- uint *v = vectors[0];
+ /* first dimension is exception */
+ uint *v = vectors[0];
- for(uint i = 0; i < L; i++)
- v[i] = 1 << (31-i); // all m's = 1
+ for (uint i = 0; i < L; i++)
+ v[i] = 1 << (31 - i); // all m's = 1
- for(int dim = 1; dim < dimensions; dim++) {
- SobolDirectionNumbers *numbers = &SOBOL_NUMBERS[dim-1];
- uint s = numbers->s;
- uint a = numbers->a;
- uint *m = numbers->m;
+ for (int dim = 1; dim < dimensions; dim++) {
+ const SobolDirectionNumbers *numbers = &SOBOL_NUMBERS[dim - 1];
+ const uint s = numbers->s;
+ const uint a = numbers->a;
+ const uint *m = numbers->m;
- v = vectors[dim];
+ v = vectors[dim];
- if(L <= s) {
- for(uint i = 0; i < L; i++)
- v[i] = m[i] << (31-i);
- }
- else {
- for(uint i = 0; i < s; i++)
- v[i] = m[i] << (31-i);
+ if (L <= s) {
+ for (uint i = 0; i < L; i++)
+ v[i] = m[i] << (31 - i);
+ }
+ else {
+ for (uint i = 0; i < s; i++)
+ v[i] = m[i] << (31 - i);
- for(uint i = s; i < L; i++) {
- v[i] = v[i-s] ^ (v[i-s] >> s);
+ for (uint i = s; i < L; i++) {
+ v[i] = v[i - s] ^ (v[i - s] >> s);
- for(uint k = 1; k < s; k++)
- v[i] ^= (((a >> (s-1-k)) & 1) * v[i-k]);
- }
- }
- }
+ for (uint k = 1; k < s; k++)
+ v[i] ^= (((a >> (s - 1 - k)) & 1) * v[i - k]);
+ }
+ }
+ }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/render/stats.h b/intern/cycles/render/stats.h
index f1bf1903483..e45403a3754 100644
--- a/intern/cycles/render/stats.h
+++ b/intern/cycles/render/stats.h
@@ -29,7 +29,7 @@ CCL_NAMESPACE_BEGIN
* semantic around the units of size, it just should be the same for all
* entries.
*
- * This is a generic entry foi all size-related statistics, which helps
+ * This is a generic entry for all size-related statistics, which helps
* avoiding duplicating code for things like sorting.
*/
class NamedSizeEntry {
diff --git a/intern/cycles/render/svm.cpp b/intern/cycles/render/svm.cpp
index d8e3e24f39e..88714e20a90 100644
--- a/intern/cycles/render/svm.cpp
+++ b/intern/cycles/render/svm.cpp
@@ -15,6 +15,8 @@
*/
#include "device/device.h"
+
+#include "render/background.h"
#include "render/graph.h"
#include "render/light.h"
#include "render/mesh.h"
@@ -23,8 +25,8 @@
#include "render/shader.h"
#include "render/svm.h"
-#include "util/util_logging.h"
#include "util/util_foreach.h"
+#include "util/util_logging.h"
#include "util/util_progress.h"
#include "util/util_task.h"
@@ -47,46 +49,23 @@ void SVMShaderManager::reset(Scene * /*scene*/)
void SVMShaderManager::device_update_shader(Scene *scene,
Shader *shader,
Progress *progress,
- array<int4> *global_svm_nodes)
+ array<int4> *svm_nodes)
{
if (progress->get_cancel()) {
return;
}
assert(shader->graph);
- array<int4> svm_nodes;
- svm_nodes.push_back_slow(make_int4(NODE_SHADER_JUMP, 0, 0, 0));
+ svm_nodes->push_back_slow(make_int4(NODE_SHADER_JUMP, 0, 0, 0));
SVMCompiler::Summary summary;
- SVMCompiler compiler(scene->shader_manager, scene->image_manager, scene->light_manager);
- compiler.background = (shader == scene->default_background);
- compiler.compile(scene, shader, svm_nodes, 0, &summary);
+ SVMCompiler compiler(scene);
+ compiler.background = (shader == scene->background->get_shader(scene));
+ compiler.compile(shader, *svm_nodes, 0, &summary);
VLOG(2) << "Compilation summary:\n"
<< "Shader name: " << shader->name << "\n"
<< summary.full_report();
-
- nodes_lock_.lock();
- if (shader->use_mis && shader->has_surface_emission) {
- scene->light_manager->need_update = true;
- }
-
- /* The copy needs to be done inside the lock, if another thread resizes the array
- * while memcpy is running, it'll be copying into possibly invalid/freed ram.
- */
- size_t global_nodes_size = global_svm_nodes->size();
- global_svm_nodes->resize(global_nodes_size + svm_nodes.size());
-
- /* Offset local SVM nodes to a global address space. */
- int4 &jump_node = (*global_svm_nodes)[shader->id];
- jump_node.y = svm_nodes[0].y + global_nodes_size - 1;
- jump_node.z = svm_nodes[0].z + global_nodes_size - 1;
- jump_node.w = svm_nodes[0].w + global_nodes_size - 1;
- /* Copy new nodes to global storage. */
- memcpy(&(*global_svm_nodes)[global_nodes_size],
- &svm_nodes[1],
- sizeof(int4) * (svm_nodes.size() - 1));
- nodes_lock_.unlock();
}
void SVMShaderManager::device_update(Device *device,
@@ -97,30 +76,25 @@ void SVMShaderManager::device_update(Device *device,
if (!need_update)
return;
- VLOG(1) << "Total " << scene->shaders.size() << " shaders.";
+ const int num_shaders = scene->shaders.size();
+
+ VLOG(1) << "Total " << num_shaders << " shaders.";
double start_time = time_dt();
/* test if we need to update */
device_free(device, dscene, scene);
- /* determine which shaders are in use */
- device_update_shaders_used(scene);
-
- /* svm_nodes */
- array<int4> svm_nodes;
- size_t i;
-
- for (i = 0; i < scene->shaders.size(); i++) {
- svm_nodes.push_back_slow(make_int4(NODE_SHADER_JUMP, 0, 0, 0));
- }
-
+ /* Build all shaders. */
TaskPool task_pool;
- foreach (Shader *shader, scene->shaders) {
- task_pool.push(
- function_bind(
- &SVMShaderManager::device_update_shader, this, scene, shader, &progress, &svm_nodes),
- false);
+ vector<array<int4>> shader_svm_nodes(num_shaders);
+ for (int i = 0; i < num_shaders; i++) {
+ task_pool.push(function_bind(&SVMShaderManager::device_update_shader,
+ this,
+ scene,
+ scene->shaders[i],
+ &progress,
+ &shader_svm_nodes[i]));
}
task_pool.wait_work();
@@ -128,20 +102,60 @@ void SVMShaderManager::device_update(Device *device,
return;
}
- dscene->svm_nodes.steal_data(svm_nodes);
- dscene->svm_nodes.copy_to_device();
+ /* The global node list contains a jump table (one node per shader)
+ * followed by the nodes of all shaders. */
+ int svm_nodes_size = num_shaders;
+ for (int i = 0; i < num_shaders; i++) {
+ /* Since we're not copying the local jump node, the size ends up being one node lower. */
+ svm_nodes_size += shader_svm_nodes[i].size() - 1;
+ }
+
+ int4 *svm_nodes = dscene->svm_nodes.alloc(svm_nodes_size);
- for (i = 0; i < scene->shaders.size(); i++) {
+ int node_offset = num_shaders;
+ for (int i = 0; i < num_shaders; i++) {
Shader *shader = scene->shaders[i];
+
shader->need_update = false;
+ if (shader->use_mis && shader->has_surface_emission) {
+ scene->light_manager->need_update = true;
+ }
+
+ /* Update the global jump table.
+ * Each compiled shader starts with a jump node that has offsets local
+ * to the shader, so copy those and add the offset into the global node list. */
+ int4 &global_jump_node = svm_nodes[shader->id];
+ int4 &local_jump_node = shader_svm_nodes[i][0];
+
+ global_jump_node.x = NODE_SHADER_JUMP;
+ global_jump_node.y = local_jump_node.y - 1 + node_offset;
+ global_jump_node.z = local_jump_node.z - 1 + node_offset;
+ global_jump_node.w = local_jump_node.w - 1 + node_offset;
+
+ node_offset += shader_svm_nodes[i].size() - 1;
+ }
+
+ /* Copy the nodes of each shader into the correct location. */
+ svm_nodes += num_shaders;
+ for (int i = 0; i < num_shaders; i++) {
+ int shader_size = shader_svm_nodes[i].size() - 1;
+
+ memcpy(svm_nodes, &shader_svm_nodes[i][1], sizeof(int4) * shader_size);
+ svm_nodes += shader_size;
+ }
+
+ if (progress.get_cancel()) {
+ return;
}
+ dscene->svm_nodes.copy_to_device();
+
device_update_common(device, dscene, scene, progress);
need_update = false;
- VLOG(1) << "Shader manager updated " << scene->shaders.size() << " shaders in "
- << time_dt() - start_time << " seconds.";
+ VLOG(1) << "Shader manager updated " << num_shaders << " shaders in " << time_dt() - start_time
+ << " seconds.";
}
void SVMShaderManager::device_free(Device *device, DeviceScene *dscene, Scene *scene)
@@ -153,13 +167,8 @@ void SVMShaderManager::device_free(Device *device, DeviceScene *dscene, Scene *s
/* Graph Compiler */
-SVMCompiler::SVMCompiler(ShaderManager *shader_manager_,
- ImageManager *image_manager_,
- LightManager *light_manager_)
+SVMCompiler::SVMCompiler(Scene *scene) : scene(scene)
{
- shader_manager = shader_manager_;
- image_manager = image_manager_;
- light_manager = light_manager_;
max_stack_use = 0;
current_type = SHADER_TYPE_SURFACE;
current_shader = NULL;
@@ -392,12 +401,12 @@ void SVMCompiler::add_node(const float4 &f)
uint SVMCompiler::attribute(ustring name)
{
- return shader_manager->get_attribute_id(name);
+ return scene->shader_manager->get_attribute_id(name);
}
uint SVMCompiler::attribute(AttributeStandard std)
{
- return shader_manager->get_attribute_id(std);
+ return scene->shader_manager->get_attribute_id(std);
}
uint SVMCompiler::attribute_standard(ustring name)
@@ -434,14 +443,8 @@ void SVMCompiler::generate_node(ShaderNode *node, ShaderNodeSet &done)
else if (current_type == SHADER_TYPE_VOLUME) {
if (node->has_spatial_varying())
current_shader->has_volume_spatial_varying = true;
- }
-
- if (node->has_object_dependency()) {
- current_shader->has_object_dependency = true;
- }
-
- if (node->has_attribute_dependency()) {
- current_shader->has_attribute_dependency = true;
+ if (node->has_attribute_dependency())
+ current_shader->has_volume_attribute_dependency = true;
}
if (node->has_integrator_dependency()) {
@@ -538,6 +541,24 @@ void SVMCompiler::generated_shared_closure_nodes(ShaderNode *root_node,
}
}
+void SVMCompiler::generate_aov_node(ShaderNode *node, CompilerState *state)
+{
+ /* execute dependencies for node */
+ foreach (ShaderInput *in, node->inputs) {
+ if (in->link != NULL) {
+ ShaderNodeSet dependencies;
+ find_dependencies(dependencies, state->nodes_done, in);
+ generate_svm_nodes(dependencies, state);
+ }
+ }
+
+ /* compile node itself */
+ generate_node(node, state->nodes_done);
+
+ state->nodes_done.insert(node);
+ state->nodes_done_flag[node->id] = true;
+}
+
void SVMCompiler::generate_multi_closure(ShaderNode *root_node,
ShaderNode *node,
CompilerState *state)
@@ -687,21 +708,21 @@ void SVMCompiler::compile_type(Shader *shader, ShaderGraph *graph, ShaderType ty
current_graph = graph;
/* get input in output node */
- ShaderNode *node = graph->output();
+ ShaderNode *output = graph->output();
ShaderInput *clin = NULL;
switch (type) {
case SHADER_TYPE_SURFACE:
- clin = node->input("Surface");
+ clin = output->input("Surface");
break;
case SHADER_TYPE_VOLUME:
- clin = node->input("Volume");
+ clin = output->input("Volume");
break;
case SHADER_TYPE_DISPLACEMENT:
- clin = node->input("Displacement");
+ clin = output->input("Displacement");
break;
case SHADER_TYPE_BUMP:
- clin = node->input("Normal");
+ clin = output->input("Normal");
break;
default:
assert(0);
@@ -712,10 +733,10 @@ void SVMCompiler::compile_type(Shader *shader, ShaderGraph *graph, ShaderType ty
memset((void *)&active_stack, 0, sizeof(active_stack));
current_svm_nodes.clear();
- foreach (ShaderNode *node_iter, graph->nodes) {
- foreach (ShaderInput *input, node_iter->inputs)
+ foreach (ShaderNode *node, graph->nodes) {
+ foreach (ShaderInput *input, node->inputs)
input->stack_offset = SVM_STACK_INVALID;
- foreach (ShaderOutput *output, node_iter->outputs)
+ foreach (ShaderOutput *output, node->outputs)
output->stack_offset = SVM_STACK_INVALID;
}
@@ -729,6 +750,7 @@ void SVMCompiler::compile_type(Shader *shader, ShaderGraph *graph, ShaderType ty
}
if (shader->used) {
+ CompilerState state(graph);
if (clin->link) {
bool generate = false;
@@ -753,13 +775,36 @@ void SVMCompiler::compile_type(Shader *shader, ShaderGraph *graph, ShaderType ty
}
if (generate) {
- CompilerState state(graph);
generate_multi_closure(clin->link->parent, clin->link->parent, &state);
}
}
/* compile output node */
- node->compile(*this);
+ output->compile(*this);
+
+ if (type == SHADER_TYPE_SURFACE) {
+ vector<OutputAOVNode *> aov_outputs;
+ foreach (ShaderNode *node, graph->nodes) {
+ if (node->special_type == SHADER_SPECIAL_TYPE_OUTPUT_AOV) {
+ OutputAOVNode *aov_node = static_cast<OutputAOVNode *>(node);
+ if (aov_node->slot >= 0) {
+ aov_outputs.push_back(aov_node);
+ }
+ }
+ }
+ if (aov_outputs.size() > 0) {
+ /* AOV passes are only written if the object is directly visible, so
+ * there is no point in evaluating all the nodes generated only for the
+ * AOV outputs if that's not the case. Therefore, we insert
+ * NODE_AOV_START into the shader before the AOV-only nodes are
+ * generated which tells the kernel that it can stop evaluation
+ * early if AOVs will not be written. */
+ add_node(NODE_AOV_START, 0, 0, 0);
+ foreach (OutputAOVNode *node, aov_outputs) {
+ generate_aov_node(node, &state);
+ }
+ }
+ }
}
/* add node to restore state after bump shader has finished */
@@ -773,14 +818,14 @@ void SVMCompiler::compile_type(Shader *shader, ShaderGraph *graph, ShaderType ty
compile_failed = false;
}
- /* for bump shaders we fall thru to the surface shader, but if this is any other kind of shader it ends here */
+ /* for bump shaders we fall thru to the surface shader, but if this is any other kind of shader
+ * it ends here */
if (type != SHADER_TYPE_BUMP) {
add_node(NODE_END, 0, 0, 0);
}
}
-void SVMCompiler::compile(
- Scene *scene, Shader *shader, array<int4> &svm_nodes, int index, Summary *summary)
+void SVMCompiler::compile(Shader *shader, array<int4> &svm_nodes, int index, Summary *summary)
{
/* copy graph for shader with bump mapping */
ShaderNode *output = shader->graph->output();
@@ -812,8 +857,7 @@ void SVMCompiler::compile(
shader->has_displacement = false;
shader->has_surface_spatial_varying = false;
shader->has_volume_spatial_varying = false;
- shader->has_object_dependency = false;
- shader->has_attribute_dependency = false;
+ shader->has_volume_attribute_dependency = false;
shader->has_integrator_dependency = false;
/* generate bump shader */
@@ -828,7 +872,8 @@ void SVMCompiler::compile(
{
scoped_timer timer((summary != NULL) ? &summary->time_generate_surface : NULL);
compile_type(shader, shader->graph, SHADER_TYPE_SURFACE);
- /* only set jump offset if there's no bump shader, as the bump shader will fall thru to this one if it exists */
+ /* only set jump offset if there's no bump shader, as the bump shader will fall thru to this
+ * one if it exists */
if (!has_bump) {
svm_nodes[index].y = svm_nodes.size();
}
diff --git a/intern/cycles/render/svm.h b/intern/cycles/render/svm.h
index d6964fb158b..61923fc40ac 100644
--- a/intern/cycles/render/svm.h
+++ b/intern/cycles/render/svm.h
@@ -50,13 +50,10 @@ class SVMShaderManager : public ShaderManager {
void device_free(Device *device, DeviceScene *dscene, Scene *scene);
protected:
- /* Lock used to synchronize threaded nodes compilation. */
- thread_spin_lock nodes_lock_;
-
void device_update_shader(Scene *scene,
Shader *shader,
Progress *progress,
- array<int4> *global_svm_nodes);
+ array<int4> *svm_nodes);
};
/* Graph Compiler */
@@ -96,11 +93,8 @@ class SVMCompiler {
string full_report() const;
};
- SVMCompiler(ShaderManager *shader_manager,
- ImageManager *image_manager,
- LightManager *light_manager);
- void compile(
- Scene *scene, Shader *shader, array<int4> &svm_nodes, int index, Summary *summary = NULL);
+ SVMCompiler(Scene *scene);
+ void compile(Shader *shader, array<int4> &svm_nodes, int index, Summary *summary = NULL);
int stack_assign(ShaderOutput *output);
int stack_assign(ShaderInput *input);
@@ -129,9 +123,8 @@ class SVMCompiler {
return current_type;
}
- ImageManager *image_manager;
- ShaderManager *shader_manager;
- LightManager *light_manager;
+ Scene *scene;
+ ShaderGraph *current_graph;
bool background;
protected:
@@ -207,6 +200,7 @@ class SVMCompiler {
ShaderInput *input,
ShaderNode *skip_node = NULL);
void generate_node(ShaderNode *node, ShaderNodeSet &done);
+ void generate_aov_node(ShaderNode *node, CompilerState *state);
void generate_closure_node(ShaderNode *node, CompilerState *state);
void generated_shared_closure_nodes(ShaderNode *root_node,
ShaderNode *node,
@@ -223,7 +217,6 @@ class SVMCompiler {
array<int4> current_svm_nodes;
ShaderType current_type;
Shader *current_shader;
- ShaderGraph *current_graph;
Stack active_stack;
int max_stack_use;
uint mix_weight_offset;
diff --git a/intern/cycles/render/tables.cpp b/intern/cycles/render/tables.cpp
index d88925939e3..270e05abe29 100644
--- a/intern/cycles/render/tables.cpp
+++ b/intern/cycles/render/tables.cpp
@@ -14,9 +14,9 @@
* limitations under the License.
*/
+#include "render/tables.h"
#include "device/device.h"
#include "render/scene.h"
-#include "render/tables.h"
#include "util/util_logging.h"
diff --git a/intern/cycles/render/tables.h b/intern/cycles/render/tables.h
index 12b59bb0aeb..3ed2959ae59 100644
--- a/intern/cycles/render/tables.h
+++ b/intern/cycles/render/tables.h
@@ -18,6 +18,7 @@
#define __TABLES_H__
#include "util/util_list.h"
+#include "util/util_vector.h"
CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/render/tile.cpp b/intern/cycles/render/tile.cpp
index 3148b5ef664..375c9fd8e09 100644
--- a/intern/cycles/render/tile.cpp
+++ b/intern/cycles/render/tile.cpp
@@ -101,6 +101,7 @@ TileManager::TileManager(bool progressive_,
tile_order = tile_order_;
start_resolution = start_resolution_;
pixel_size = pixel_size_;
+ slice_overlap = 0;
num_samples = num_samples_;
num_devices = num_devices_;
preserve_tile_device = preserve_tile_device_;
@@ -170,8 +171,9 @@ void TileManager::set_samples(int num_samples_)
}
else {
uint64_t pixel_samples = 0;
- /* While rendering in the viewport, the initial preview resolution is increased to the native resolution
- * before the actual rendering begins. Therefore, additional pixel samples will be rendered. */
+ /* While rendering in the viewport, the initial preview resolution is increased to the native
+ * resolution before the actual rendering begins. Therefore, additional pixel samples will be
+ * rendered. */
int divider = max(get_divider(params.width, params.height, start_resolution) / 2, pixel_size);
while (divider > pixel_size) {
int image_w = max(1, params.width / divider);
@@ -190,8 +192,9 @@ void TileManager::set_samples(int num_samples_)
}
}
-/* If sliced is false, splits image into tiles and assigns equal amount of tiles to every render device.
- * If sliced is true, slice image into as much pieces as how many devices are rendering this image. */
+/* If sliced is false, splits image into tiles and assigns equal amount of tiles to every render
+ * device. If sliced is true, slice image into as much pieces as how many devices are rendering
+ * this image. */
int TileManager::gen_tiles(bool sliced)
{
int resolution = state.resolution_divider;
@@ -199,8 +202,7 @@ int TileManager::gen_tiles(bool sliced)
int image_h = max(1, params.height / resolution);
int2 center = make_int2(image_w / 2, image_h / 2);
- int num_logical_devices = preserve_tile_device ? num_devices : 1;
- int num = min(image_h, num_logical_devices);
+ int num = preserve_tile_device || sliced ? min(image_h, num_devices) : 1;
int slice_num = sliced ? num : 1;
int tile_w = (tile_size.x >= image_w) ? 1 : divide_up(image_w, tile_size.x);
@@ -214,7 +216,7 @@ int TileManager::gen_tiles(bool sliced)
tile_list = state.render_tiles.begin();
if (tile_order == TILE_HILBERT_SPIRAL) {
- assert(!sliced);
+ assert(!sliced && slice_overlap == 0);
int tile_h = (tile_size.y >= image_h) ? 1 : divide_up(image_h, tile_size.y);
state.tiles.resize(tile_w * tile_h);
@@ -255,7 +257,8 @@ int TileManager::gen_tiles(bool sliced)
}
int2 pos = block * block_size + tile * tile_size + offset;
- /* Only add tiles which are in the image (tiles outside of the image can be generated since the spiral is always square). */
+ /* Only add tiles which are in the image (tiles outside of the image can be generated since
+ * the spiral is always square). */
if (pos.x >= 0 && pos.y >= 0 && pos.x < image_w && pos.y < image_h) {
int w = min(tile_size.x, image_w - pos.x);
int h = min(tile_size.y, image_h - pos.y);
@@ -316,6 +319,12 @@ int TileManager::gen_tiles(bool sliced)
int slice_h = (slice == slice_num - 1) ? image_h - slice * (image_h / slice_num) :
image_h / slice_num;
+ if (slice_overlap != 0) {
+ int slice_y_offset = max(slice_y - slice_overlap, 0);
+ slice_h = min(slice_y + slice_h + slice_overlap, image_h) - slice_y_offset;
+ slice_y = slice_y_offset;
+ }
+
int tile_h = (tile_size.y >= slice_h) ? 1 : divide_up(slice_h, tile_size.y);
int tiles_per_device = divide_up(tile_w * tile_h, num);
@@ -336,7 +345,8 @@ int TileManager::gen_tiles(bool sliced)
cur_tiles++;
if (cur_tiles == tiles_per_device) {
- /* Tiles are already generated in Bottom-to-Top order, so no sort is necessary in that case. */
+ /* Tiles are already generated in Bottom-to-Top order, so no sort is necessary in that
+ * case. */
if (tile_order != TILE_BOTTOM_TO_TOP) {
tile_list->sort(TileComparator(tile_order, center, &state.tiles[0]));
}
@@ -359,6 +369,7 @@ void TileManager::gen_render_tiles()
{
/* Regenerate just the render tiles for progressive render. */
foreach (Tile &tile, state.tiles) {
+ tile.state = Tile::RENDER;
state.render_tiles[tile.device].push_back(tile.index);
}
}
@@ -382,23 +393,36 @@ void TileManager::set_tiles()
int TileManager::get_neighbor_index(int index, int neighbor)
{
- static const int dx[] = {-1, 0, 1, -1, 1, -1, 0, 1, 0}, dy[] = {-1, -1, -1, 0, 0, 1, 1, 1, 0};
+ /* Neighbor indices:
+ * 0 1 2
+ * 3 4 5
+ * 6 7 8
+ */
+ static const int dx[] = {-1, 0, 1, -1, 0, 1, -1, 0, 1};
+ static const int dy[] = {-1, -1, -1, 0, 0, 0, 1, 1, 1};
int resolution = state.resolution_divider;
int image_w = max(1, params.width / resolution);
int image_h = max(1, params.height / resolution);
+
+ int num = min(image_h, num_devices);
+ int slice_num = !background ? num : 1;
+ int slice_h = image_h / slice_num;
+
int tile_w = (tile_size.x >= image_w) ? 1 : divide_up(image_w, tile_size.x);
- int tile_h = (tile_size.y >= image_h) ? 1 : divide_up(image_h, tile_size.y);
+ int tile_h = (tile_size.y >= slice_h) ? 1 : divide_up(slice_h, tile_size.y);
- int nx = state.tiles[index].x / tile_size.x + dx[neighbor],
- ny = state.tiles[index].y / tile_size.y + dy[neighbor];
- if (nx < 0 || ny < 0 || nx >= tile_w || ny >= tile_h)
+ /* Tiles in the state tile list are always indexed from left to right, top to bottom. */
+ int nx = (index % tile_w) + dx[neighbor];
+ int ny = (index / tile_w) + dy[neighbor];
+ if (nx < 0 || ny < 0 || nx >= tile_w || ny >= tile_h * slice_num)
return -1;
return ny * state.tile_stride + nx;
}
-/* Checks whether all neighbors of a tile (as well as the tile itself) are at least at state min_state. */
+/* Checks whether all neighbors of a tile (as well as the tile itself) are at least at state
+ * min_state. */
bool TileManager::check_neighbor_state(int index, Tile::State min_state)
{
if (index < 0 || state.tiles[index].state < min_state) {
@@ -415,24 +439,22 @@ bool TileManager::check_neighbor_state(int index, Tile::State min_state)
return true;
}
-/* Returns whether the tile should be written (and freed if no denoising is used) instead of updating. */
-bool TileManager::finish_tile(int index, bool &delete_tile)
+/* Returns whether the tile should be written (and freed if no denoising is used) instead of
+ * updating. */
+bool TileManager::finish_tile(const int index, const bool need_denoise, bool &delete_tile)
{
delete_tile = false;
- if (progressive) {
- return true;
- }
-
switch (state.tiles[index].state) {
case Tile::RENDER: {
- if (!schedule_denoising) {
+ if (!(schedule_denoising && need_denoise)) {
state.tiles[index].state = Tile::DONE;
- delete_tile = true;
+ delete_tile = !progressive;
return true;
}
state.tiles[index].state = Tile::RENDERED;
- /* For each neighbor and the tile itself, check whether all of its neighbors have been rendered. If yes, it can be denoised. */
+ /* For each neighbor and the tile itself, check whether all of its neighbors have been
+ * rendered. If yes, it can be denoised. */
for (int neighbor = 0; neighbor < 9; neighbor++) {
int nindex = get_neighbor_index(index, neighbor);
if (check_neighbor_state(nindex, Tile::RENDERED)) {
@@ -444,19 +466,24 @@ bool TileManager::finish_tile(int index, bool &delete_tile)
}
case Tile::DENOISE: {
state.tiles[index].state = Tile::DENOISED;
- /* For each neighbor and the tile itself, check whether all of its neighbors have been denoised. If yes, it can be freed. */
+ /* For each neighbor and the tile itself, check whether all of its neighbors have been
+ * denoised. If yes, it can be freed. */
for (int neighbor = 0; neighbor < 9; neighbor++) {
int nindex = get_neighbor_index(index, neighbor);
if (check_neighbor_state(nindex, Tile::DENOISED)) {
state.tiles[nindex].state = Tile::DONE;
- /* It can happen that the tile just finished denoising and already can be freed here.
- * However, in that case it still has to be written before deleting, so we can't delete it yet. */
- if (neighbor == 8) {
- delete_tile = true;
- }
- else {
- delete state.tiles[nindex].buffers;
- state.tiles[nindex].buffers = NULL;
+ /* Do not delete finished tiles in progressive mode. */
+ if (!progressive) {
+ /* It can happen that the tile just finished denoising and already can be freed here.
+ * However, in that case it still has to be written before deleting, so we can't delete
+ * it yet. */
+ if (neighbor == 4) {
+ delete_tile = true;
+ }
+ else {
+ delete state.tiles[nindex].buffers;
+ state.tiles[nindex].buffers = NULL;
+ }
}
}
}
@@ -468,27 +495,65 @@ bool TileManager::finish_tile(int index, bool &delete_tile)
}
}
-bool TileManager::next_tile(Tile *&tile, int device)
+bool TileManager::next_tile(Tile *&tile, int device, uint tile_types)
{
- int logical_device = preserve_tile_device ? device : 0;
+ /* Preserve device if requested, unless this is a separate denoising device that just wants to
+ * grab any available tile. */
+ const bool preserve_device = preserve_tile_device && device < num_devices;
- if (logical_device >= state.render_tiles.size())
- return false;
+ if (tile_types & RenderTile::DENOISE) {
+ int tile_index = -1;
+ int logical_device = preserve_device ? device : 0;
- if (!state.denoising_tiles[logical_device].empty()) {
- int idx = state.denoising_tiles[logical_device].front();
- state.denoising_tiles[logical_device].pop_front();
- tile = &state.tiles[idx];
- return true;
+ while (logical_device < state.denoising_tiles.size()) {
+ if (state.denoising_tiles[logical_device].empty()) {
+ if (preserve_device) {
+ break;
+ }
+ else {
+ logical_device++;
+ continue;
+ }
+ }
+
+ tile_index = state.denoising_tiles[logical_device].front();
+ state.denoising_tiles[logical_device].pop_front();
+ break;
+ }
+
+ if (tile_index >= 0) {
+ tile = &state.tiles[tile_index];
+ return true;
+ }
}
- if (state.render_tiles[logical_device].empty())
- return false;
+ if (tile_types & RenderTile::PATH_TRACE) {
+ int tile_index = -1;
+ int logical_device = preserve_device ? device : 0;
- int idx = state.render_tiles[logical_device].front();
- state.render_tiles[logical_device].pop_front();
- tile = &state.tiles[idx];
- return true;
+ while (logical_device < state.render_tiles.size()) {
+ if (state.render_tiles[logical_device].empty()) {
+ if (preserve_device) {
+ break;
+ }
+ else {
+ logical_device++;
+ continue;
+ }
+ }
+
+ tile_index = state.render_tiles[logical_device].front();
+ state.render_tiles[logical_device].pop_front();
+ break;
+ }
+
+ if (tile_index >= 0) {
+ tile = &state.tiles[tile_index];
+ return true;
+ }
+ }
+
+ return false;
}
bool TileManager::done()
@@ -499,6 +564,16 @@ bool TileManager::done()
(state.sample + state.num_samples >= end_sample);
}
+bool TileManager::has_tiles()
+{
+ foreach (Tile &tile, state.tiles) {
+ if (tile.state != Tile::DONE) {
+ return true;
+ }
+ }
+ return false;
+}
+
bool TileManager::next()
{
if (done())
diff --git a/intern/cycles/render/tile.h b/intern/cycles/render/tile.h
index 017c1af0ead..4858a275d5c 100644
--- a/intern/cycles/render/tile.h
+++ b/intern/cycles/render/tile.h
@@ -89,6 +89,7 @@ class TileManager {
} state;
int num_samples;
+ int slice_overlap;
TileManager(bool progressive,
int num_samples,
@@ -105,15 +106,19 @@ class TileManager {
void reset(BufferParams &params, int num_samples);
void set_samples(int num_samples);
bool next();
- bool next_tile(Tile *&tile, int device = 0);
- bool finish_tile(int index, bool &delete_tile);
+ bool next_tile(Tile *&tile, int device, uint tile_types);
+ bool finish_tile(const int index, const bool need_denoise, bool &delete_tile);
bool done();
+ bool has_tiles();
void set_tile_order(TileOrder tile_order_)
{
tile_order = tile_order_;
}
+ int get_neighbor_index(int index, int neighbor);
+ bool check_neighbor_state(int index, Tile::State state);
+
/* ** Sample range rendering. ** */
/* Start sample in the range. */
@@ -160,9 +165,6 @@ class TileManager {
/* Generate tile list, return number of tiles. */
int gen_tiles(bool sliced);
void gen_render_tiles();
-
- int get_neighbor_index(int index, int neighbor);
- bool check_neighbor_state(int index, Tile::State state);
};
CCL_NAMESPACE_END
diff --git a/intern/cycles/subd/CMakeLists.txt b/intern/cycles/subd/CMakeLists.txt
index f5ceaa0436d..b874c5c3c2d 100644
--- a/intern/cycles/subd/CMakeLists.txt
+++ b/intern/cycles/subd/CMakeLists.txt
@@ -19,6 +19,7 @@ set(SRC_HEADERS
subd_patch.h
subd_patch_table.h
subd_split.h
+ subd_subpatch.h
)
set(LIB
diff --git a/intern/cycles/subd/subd_dice.cpp b/intern/cycles/subd/subd_dice.cpp
index 6b062ecfea2..91c7f4bea05 100644
--- a/intern/cycles/subd/subd_dice.cpp
+++ b/intern/cycles/subd/subd_dice.cpp
@@ -38,107 +38,91 @@ EdgeDice::EdgeDice(const SubdParams &params_) : params(params_)
}
}
-void EdgeDice::reserve(int num_verts)
+void EdgeDice::reserve(int num_verts, int num_triangles)
{
Mesh *mesh = params.mesh;
vert_offset = mesh->verts.size();
tri_offset = mesh->num_triangles();
- /* todo: optimize so we can reserve in advance, this is like push_back_slow() */
- if (vert_offset + num_verts > mesh->verts.capacity()) {
- mesh->reserve_mesh(size_t((vert_offset + num_verts) * 1.2), mesh->num_triangles());
- }
-
- mesh->resize_mesh(vert_offset + num_verts, tri_offset);
+ mesh->resize_mesh(mesh->verts.size() + num_verts, mesh->num_triangles());
+ mesh->reserve_mesh(mesh->verts.size() + num_verts, mesh->num_triangles() + num_triangles);
Attribute *attr_vN = mesh->attributes.add(ATTR_STD_VERTEX_NORMAL);
- mesh_P = mesh->verts.data();
- mesh_N = attr_vN->data_float3();
+ mesh_P = mesh->verts.data() + vert_offset;
+ mesh_N = attr_vN->data_float3() + vert_offset;
+
+ params.mesh->num_subd_verts += num_verts;
}
-int EdgeDice::add_vert(Patch *patch, float2 uv)
+void EdgeDice::set_vert(Patch *patch, int index, float2 uv)
{
float3 P, N;
patch->eval(&P, NULL, NULL, &N, uv.x, uv.y);
- assert(vert_offset < params.mesh->verts.size());
-
- mesh_P[vert_offset] = P;
- mesh_N[vert_offset] = N;
- params.mesh->vert_patch_uv[vert_offset] = make_float2(uv.x, uv.y);
-
- if (params.ptex) {
- Attribute *attr_ptex_uv = params.mesh->attributes.add(ATTR_STD_PTEX_UV);
- params.mesh->attributes.resize();
+ assert(index < params.mesh->verts.size());
- float3 *ptex_uv = attr_ptex_uv->data_float3();
- ptex_uv[vert_offset] = make_float3(uv.x, uv.y, 0.0f);
- }
-
- params.mesh->num_subd_verts++;
-
- return vert_offset++;
+ mesh_P[index] = P;
+ mesh_N[index] = N;
+ params.mesh->vert_patch_uv[index + vert_offset] = make_float2(uv.x, uv.y);
}
void EdgeDice::add_triangle(Patch *patch, int v0, int v1, int v2)
{
Mesh *mesh = params.mesh;
- /* todo: optimize so we can reserve in advance, this is like push_back_slow() */
- if (mesh->triangles.size() == mesh->triangles.capacity())
- mesh->reserve_mesh(mesh->verts.size(), size_t(max(mesh->num_triangles() + 1, 1) * 1.2));
-
- mesh->add_triangle(v0, v1, v2, patch->shader, true);
+ mesh->add_triangle(v0 + vert_offset, v1 + vert_offset, v2 + vert_offset, patch->shader, true);
params.mesh->triangle_patch[params.mesh->num_triangles() - 1] = patch->patch_index;
- if (params.ptex) {
- Attribute *attr_ptex_face_id = params.mesh->attributes.add(ATTR_STD_PTEX_FACE_ID);
- params.mesh->attributes.resize();
-
- float *ptex_face_id = attr_ptex_face_id->data_float();
- ptex_face_id[tri_offset] = (float)patch->ptex_face_id();
- }
-
tri_offset++;
}
-void EdgeDice::stitch_triangles(Patch *patch, vector<int> &outer, vector<int> &inner)
+void EdgeDice::stitch_triangles(Subpatch &sub, int edge)
{
- if (inner.size() == 0 || outer.size() == 0)
+ int Mu = max(sub.edge_u0.T, sub.edge_u1.T);
+ int Mv = max(sub.edge_v0.T, sub.edge_v1.T);
+ Mu = max(Mu, 2);
+ Mv = max(Mv, 2);
+
+ int outer_T = sub.edges[edge].T;
+ int inner_T = ((edge % 2) == 0) ? Mv - 2 : Mu - 2;
+
+ if (inner_T < 0 || outer_T < 0)
return; // XXX avoid crashes for Mu or Mv == 1, missing polygons
/* stitch together two arrays of verts with triangles. at each step,
* we compare using the next verts on both sides, to find the split
* direction with the smallest diagonal, and use that in order to keep
* the triangle shape reasonable. */
- for (size_t i = 0, j = 0; i + 1 < inner.size() || j + 1 < outer.size();) {
+ for (size_t i = 0, j = 0; i < inner_T || j < outer_T;) {
int v0, v1, v2;
- v0 = inner[i];
- v1 = outer[j];
+ v0 = sub.get_vert_along_grid_edge(edge, i);
+ v1 = sub.get_vert_along_edge(edge, j);
- if (j + 1 == outer.size()) {
- v2 = inner[++i];
+ if (j == outer_T) {
+ v2 = sub.get_vert_along_grid_edge(edge, ++i);
}
- else if (i + 1 == inner.size()) {
- v2 = outer[++j];
+ else if (i == inner_T) {
+ v2 = sub.get_vert_along_edge(edge, ++j);
}
else {
/* length of diagonals */
- float len1 = len_squared(mesh_P[inner[i]] - mesh_P[outer[j + 1]]);
- float len2 = len_squared(mesh_P[outer[j]] - mesh_P[inner[i + 1]]);
+ float len1 = len_squared(mesh_P[sub.get_vert_along_grid_edge(edge, i)] -
+ mesh_P[sub.get_vert_along_edge(edge, j + 1)]);
+ float len2 = len_squared(mesh_P[sub.get_vert_along_edge(edge, j)] -
+ mesh_P[sub.get_vert_along_grid_edge(edge, i + 1)]);
/* use smallest diagonal */
if (len1 < len2)
- v2 = outer[++j];
+ v2 = sub.get_vert_along_edge(edge, ++j);
else
- v2 = inner[++i];
+ v2 = sub.get_vert_along_grid_edge(edge, ++i);
}
- add_triangle(patch, v0, v1, v2);
+ add_triangle(sub.patch, v1, v0, v2);
}
}
@@ -148,22 +132,15 @@ QuadDice::QuadDice(const SubdParams &params_) : EdgeDice(params_)
{
}
-void QuadDice::reserve(EdgeFactors &ef, int Mu, int Mv)
-{
- /* XXX need to make this also work for edge factor 0 and 1 */
- int num_verts = (ef.tu0 + ef.tu1 + ef.tv0 + ef.tv1) + (Mu - 1) * (Mv - 1);
- EdgeDice::reserve(num_verts);
-}
-
-float2 QuadDice::map_uv(SubPatch &sub, float u, float v)
+float2 QuadDice::map_uv(Subpatch &sub, float u, float v)
{
/* map UV from subpatch to patch parametric coordinates */
- float2 d0 = interp(sub.P00, sub.P01, v);
- float2 d1 = interp(sub.P10, sub.P11, v);
+ float2 d0 = interp(sub.c00, sub.c01, v);
+ float2 d1 = interp(sub.c10, sub.c11, v);
return interp(d0, d1, u);
}
-float3 QuadDice::eval_projected(SubPatch &sub, float u, float v)
+float3 QuadDice::eval_projected(Subpatch &sub, float u, float v)
{
float2 uv = map_uv(sub, u, v);
float3 P;
@@ -175,70 +152,41 @@ float3 QuadDice::eval_projected(SubPatch &sub, float u, float v)
return P;
}
-int QuadDice::add_vert(SubPatch &sub, float u, float v)
+void QuadDice::set_vert(Subpatch &sub, int index, float u, float v)
{
- return EdgeDice::add_vert(sub.patch, map_uv(sub, u, v));
+ EdgeDice::set_vert(sub.patch, index, map_uv(sub, u, v));
}
-void QuadDice::add_side_u(SubPatch &sub,
- vector<int> &outer,
- vector<int> &inner,
- int Mu,
- int Mv,
- int tu,
- int side,
- int offset)
+void QuadDice::set_side(Subpatch &sub, int edge)
{
- outer.clear();
- inner.clear();
+ int t = sub.edges[edge].T;
/* set verts on the edge of the patch */
- outer.push_back(offset + ((side) ? 2 : 0));
-
- for (int i = 1; i < tu; i++) {
- float u = i / (float)tu;
- float v = (side) ? 1.0f : 0.0f;
-
- outer.push_back(add_vert(sub, u, v));
- }
-
- outer.push_back(offset + ((side) ? 3 : 1));
-
- /* set verts on the edge of the inner grid */
- for (int i = 0; i < Mu - 1; i++) {
- int j = (side) ? Mv - 1 - 1 : 0;
- inner.push_back(offset + 4 + i + j * (Mu - 1));
- }
-}
-
-void QuadDice::add_side_v(SubPatch &sub,
- vector<int> &outer,
- vector<int> &inner,
- int Mu,
- int Mv,
- int tv,
- int side,
- int offset)
-{
- outer.clear();
- inner.clear();
-
- /* set verts on the edge of the patch */
- outer.push_back(offset + ((side) ? 1 : 0));
-
- for (int j = 1; j < tv; j++) {
- float u = (side) ? 1.0f : 0.0f;
- float v = j / (float)tv;
-
- outer.push_back(add_vert(sub, u, v));
- }
-
- outer.push_back(offset + ((side) ? 3 : 2));
+ for (int i = 0; i < t; i++) {
+ float f = i / (float)t;
+
+ float u, v;
+ switch (edge) {
+ case 0:
+ u = 0;
+ v = f;
+ break;
+ case 1:
+ u = f;
+ v = 1;
+ break;
+ case 2:
+ u = 1;
+ v = 1.0f - f;
+ break;
+ case 3:
+ default:
+ u = 1.0f - f;
+ v = 0;
+ break;
+ }
- /* set verts on the edge of the inner grid */
- for (int j = 0; j < Mv - 1; j++) {
- int i = (side) ? Mu - 1 - 1 : 0;
- inner.push_back(offset + 4 + i + j * (Mu - 1));
+ set_vert(sub, sub.get_vert_along_edge(edge, i), u, v);
}
}
@@ -247,7 +195,7 @@ float QuadDice::quad_area(const float3 &a, const float3 &b, const float3 &c, con
return triangle_area(a, b, d) + triangle_area(a, d, c);
}
-float QuadDice::scale_factor(SubPatch &sub, EdgeFactors &ef, int Mu, int Mv)
+float QuadDice::scale_factor(Subpatch &sub, int Mu, int Mv)
{
/* estimate area as 4x largest of 4 quads */
float3 P[3][3];
@@ -269,23 +217,14 @@ float QuadDice::scale_factor(SubPatch &sub, EdgeFactors &ef, int Mu, int Mv)
// XXX does the -sqrt solution matter
// XXX max(D, 0.0) is highly suspicious, need to test cases
// where D goes negative
- float N = 0.5f * (Ntris - (ef.tu0 + ef.tu1 + ef.tv0 + ef.tv1));
+ float N = 0.5f * (Ntris - (sub.edge_u0.T + sub.edge_u1.T + sub.edge_v0.T + sub.edge_v1.T));
float D = 4.0f * N * Mu * Mv + (Mu + Mv) * (Mu + Mv);
float S = (Mu + Mv + sqrtf(max(D, 0.0f))) / (2 * Mu * Mv);
return S;
}
-void QuadDice::add_corners(SubPatch &sub)
-{
- /* add verts for patch corners */
- add_vert(sub, 0.0f, 0.0f);
- add_vert(sub, 1.0f, 0.0f);
- add_vert(sub, 0.0f, 1.0f);
- add_vert(sub, 1.0f, 1.0f);
-}
-
-void QuadDice::add_grid(SubPatch &sub, int Mu, int Mv, int offset)
+void QuadDice::add_grid(Subpatch &sub, int Mu, int Mv, int offset)
{
/* create inner grid */
float du = 1.0f / (float)Mu;
@@ -296,13 +235,13 @@ void QuadDice::add_grid(SubPatch &sub, int Mu, int Mv, int offset)
float u = i * du;
float v = j * dv;
- add_vert(sub, u, v);
+ set_vert(sub, offset + (i - 1) + (j - 1) * (Mu - 1), u, v);
if (i < Mu - 1 && j < Mv - 1) {
- int i1 = offset + 4 + (i - 1) + (j - 1) * (Mu - 1);
- int i2 = offset + 4 + i + (j - 1) * (Mu - 1);
- int i3 = offset + 4 + i + j * (Mu - 1);
- int i4 = offset + 4 + (i - 1) + j * (Mu - 1);
+ int i1 = offset + (i - 1) + (j - 1) * (Mu - 1);
+ int i2 = offset + i + (j - 1) * (Mu - 1);
+ int i3 = offset + i + j * (Mu - 1);
+ int i4 = offset + (i - 1) + j * (Mu - 1);
add_triangle(sub.patch, i1, i2, i3);
add_triangle(sub.patch, i1, i3, i4);
@@ -311,48 +250,34 @@ void QuadDice::add_grid(SubPatch &sub, int Mu, int Mv, int offset)
}
}
-void QuadDice::dice(SubPatch &sub, EdgeFactors &ef)
+void QuadDice::dice(Subpatch &sub)
{
/* compute inner grid size with scale factor */
- int Mu = max(ef.tu0, ef.tu1);
- int Mv = max(ef.tv0, ef.tv1);
+ int Mu = max(sub.edge_u0.T, sub.edge_u1.T);
+ int Mv = max(sub.edge_v0.T, sub.edge_v1.T);
-#if 0 /* Doesnt work very well, especially at grazing angles. */
+#if 0 /* Doesn't work very well, especially at grazing angles. */
float S = scale_factor(sub, ef, Mu, Mv);
#else
float S = 1.0f;
#endif
- Mu = max((int)ceil(S * Mu), 2); // XXX handle 0 & 1?
- Mv = max((int)ceil(S * Mv), 2); // XXX handle 0 & 1?
-
- /* reserve space for new verts */
- int offset = params.mesh->verts.size();
- reserve(ef, Mu, Mv);
-
- /* corners and inner grid */
- add_corners(sub);
- add_grid(sub, Mu, Mv, offset);
-
- /* bottom side */
- vector<int> outer, inner;
-
- add_side_u(sub, outer, inner, Mu, Mv, ef.tu0, 0, offset);
- stitch_triangles(sub.patch, outer, inner);
-
- /* top side */
- add_side_u(sub, outer, inner, Mu, Mv, ef.tu1, 1, offset);
- stitch_triangles(sub.patch, inner, outer);
+ Mu = max((int)ceilf(S * Mu), 2); // XXX handle 0 & 1?
+ Mv = max((int)ceilf(S * Mv), 2); // XXX handle 0 & 1?
- /* left side */
- add_side_v(sub, outer, inner, Mu, Mv, ef.tv0, 0, offset);
- stitch_triangles(sub.patch, inner, outer);
+ /* inner grid */
+ add_grid(sub, Mu, Mv, sub.inner_grid_vert_offset);
- /* right side */
- add_side_v(sub, outer, inner, Mu, Mv, ef.tv1, 1, offset);
- stitch_triangles(sub.patch, outer, inner);
+ /* sides */
+ set_side(sub, 0);
+ set_side(sub, 1);
+ set_side(sub, 2);
+ set_side(sub, 3);
- assert(vert_offset == params.mesh->verts.size());
+ stitch_triangles(sub, 0);
+ stitch_triangles(sub, 1);
+ stitch_triangles(sub, 2);
+ stitch_triangles(sub, 3);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/subd/subd_dice.h b/intern/cycles/subd/subd_dice.h
index eee54e01861..ee63403d40c 100644
--- a/intern/cycles/subd/subd_dice.h
+++ b/intern/cycles/subd/subd_dice.h
@@ -25,6 +25,8 @@
#include "util/util_types.h"
#include "util/util_vector.h"
+#include "subd/subd_subpatch.h"
+
CCL_NAMESPACE_BEGIN
class Camera;
@@ -67,78 +69,33 @@ class EdgeDice {
explicit EdgeDice(const SubdParams &params);
- void reserve(int num_verts);
+ void reserve(int num_verts, int num_triangles);
- int add_vert(Patch *patch, float2 uv);
+ void set_vert(Patch *patch, int index, float2 uv);
void add_triangle(Patch *patch, int v0, int v1, int v2);
- void stitch_triangles(Patch *patch, vector<int> &outer, vector<int> &inner);
+ void stitch_triangles(Subpatch &sub, int edge);
};
-/* Quad EdgeDice
- *
- * Edge tessellation factors and subpatch coordinates are as follows:
- *
- * tu1
- * P01 --------- P11
- * | |
- * tv0 | | tv1
- * | |
- * P00 --------- P10
- * tu0
- */
+/* Quad EdgeDice */
class QuadDice : public EdgeDice {
public:
- struct SubPatch {
- Patch *patch;
-
- float2 P00;
- float2 P10;
- float2 P01;
- float2 P11;
- };
-
- struct EdgeFactors {
- int tu0;
- int tu1;
- int tv0;
- int tv1;
- };
-
explicit QuadDice(const SubdParams &params);
- void reserve(EdgeFactors &ef, int Mu, int Mv);
- float3 eval_projected(SubPatch &sub, float u, float v);
-
- float2 map_uv(SubPatch &sub, float u, float v);
- int add_vert(SubPatch &sub, float u, float v);
-
- void add_corners(SubPatch &sub);
- void add_grid(SubPatch &sub, int Mu, int Mv, int offset);
-
- void add_side_u(SubPatch &sub,
- vector<int> &outer,
- vector<int> &inner,
- int Mu,
- int Mv,
- int tu,
- int side,
- int offset);
-
- void add_side_v(SubPatch &sub,
- vector<int> &outer,
- vector<int> &inner,
- int Mu,
- int Mv,
- int tv,
- int side,
- int offset);
+ float3 eval_projected(Subpatch &sub, float u, float v);
+
+ float2 map_uv(Subpatch &sub, float u, float v);
+ void set_vert(Subpatch &sub, int index, float u, float v);
+
+ void add_grid(Subpatch &sub, int Mu, int Mv, int offset);
+
+ void set_side(Subpatch &sub, int edge);
float quad_area(const float3 &a, const float3 &b, const float3 &c, const float3 &d);
- float scale_factor(SubPatch &sub, EdgeFactors &ef, int Mu, int Mv);
+ float scale_factor(Subpatch &sub, int Mu, int Mv);
- void dice(SubPatch &sub, EdgeFactors &ef);
+ void dice(Subpatch &sub);
};
CCL_NAMESPACE_END
diff --git a/intern/cycles/subd/subd_patch.h b/intern/cycles/subd/subd_patch.h
index 5209d4d0b07..8fe423bc94d 100644
--- a/intern/cycles/subd/subd_patch.h
+++ b/intern/cycles/subd/subd_patch.h
@@ -24,18 +24,17 @@ CCL_NAMESPACE_BEGIN
class Patch {
public:
- virtual ~Patch()
+ Patch() : patch_index(0), shader(0), from_ngon(false)
{
}
+
+ virtual ~Patch() = default;
+
virtual void eval(float3 *P, float3 *dPdu, float3 *dPdv, float3 *N, float u, float v) = 0;
- virtual BoundBox bound() = 0;
- virtual int ptex_face_id()
- {
- return -1;
- }
int patch_index;
int shader;
+ bool from_ngon;
};
/* Linear Quad Patch */
diff --git a/intern/cycles/subd/subd_split.cpp b/intern/cycles/subd/subd_split.cpp
index 803363bc240..1a8c182510c 100644
--- a/intern/cycles/subd/subd_split.cpp
+++ b/intern/cycles/subd/subd_split.cpp
@@ -21,6 +21,9 @@
#include "subd/subd_patch.h"
#include "subd/subd_split.h"
+#include "util/util_algorithm.h"
+#include "util/util_foreach.h"
+#include "util/util_hash.h"
#include "util/util_math.h"
#include "util/util_types.h"
@@ -28,14 +31,12 @@ CCL_NAMESPACE_BEGIN
/* DiagSplit */
-DiagSplit::DiagSplit(const SubdParams &params_) : params(params_)
-{
-}
+#define DSPLIT_NON_UNIFORM -1
+#define STITCH_NGON_CENTER_VERT_INDEX_OFFSET 0x60000000
+#define STITCH_NGON_SPLIT_EDGE_CENTER_VERT_TAG (0x60000000 - 1)
-void DiagSplit::dispatch(QuadDice::SubPatch &sub, QuadDice::EdgeFactors &ef)
+DiagSplit::DiagSplit(const SubdParams &params_) : params(params_)
{
- subpatches_quad.push_back(sub);
- edgefactors_quad.push_back(ef);
}
float3 DiagSplit::to_world(Patch *patch, float2 uv)
@@ -49,45 +50,62 @@ float3 DiagSplit::to_world(Patch *patch, float2 uv)
return P;
}
-int DiagSplit::T(Patch *patch, float2 Pstart, float2 Pend)
+static void order_float2(float2 &a, float2 &b)
{
- float3 Plast = make_float3(0.0f, 0.0f, 0.0f);
+ if (b.x < a.x || b.y < a.y) {
+ swap(a, b);
+ }
+}
+
+int DiagSplit::T(Patch *patch, float2 Pstart, float2 Pend, bool recursive_resolve)
+{
+ order_float2(Pstart, Pend); /* May not be necessary, but better to be safe. */
+
float Lsum = 0.0f;
float Lmax = 0.0f;
- for (int i = 0; i < params.test_steps; i++) {
+ float3 Plast = to_world(patch, Pstart);
+
+ for (int i = 1; i < params.test_steps; i++) {
float t = i / (float)(params.test_steps - 1);
float3 P = to_world(patch, Pstart + t * (Pend - Pstart));
- if (i > 0) {
- float L;
+ float L;
- if (!params.camera) {
- L = len(P - Plast);
- }
- else {
- Camera *cam = params.camera;
-
- float pixel_width = cam->world_to_raster_size((P + Plast) * 0.5f);
- L = len(P - Plast) / pixel_width;
- }
+ if (!params.camera) {
+ L = len(P - Plast);
+ }
+ else {
+ Camera *cam = params.camera;
- Lsum += L;
- Lmax = max(L, Lmax);
+ float pixel_width = cam->world_to_raster_size((P + Plast) * 0.5f);
+ L = len(P - Plast) / pixel_width;
}
+ Lsum += L;
+ Lmax = max(L, Lmax);
+
Plast = P;
}
- int tmin = (int)ceil(Lsum / params.dicing_rate);
- int tmax = (int)ceil((params.test_steps - 1) * Lmax /
- params.dicing_rate); // XXX paper says N instead of N-1, seems wrong?
+ int tmin = (int)ceilf(Lsum / params.dicing_rate);
+ int tmax = (int)ceilf((params.test_steps - 1) * Lmax /
+ params.dicing_rate); // XXX paper says N instead of N-1, seems wrong?
+ int res = max(tmax, 1);
- if (tmax - tmin > params.split_threshold)
- return DSPLIT_NON_UNIFORM;
+ if (tmax - tmin > params.split_threshold) {
+ if (!recursive_resolve) {
+ res = DSPLIT_NON_UNIFORM;
+ }
+ else {
+ float2 P = (Pstart + Pend) * 0.5f;
+ res = T(patch, Pstart, P, true) + T(patch, P, Pend, true);
+ }
+ }
- return tmax;
+ limit_edge_factor(res, patch, Pstart, Pend);
+ return res;
}
void DiagSplit::partition_edge(
@@ -99,159 +117,632 @@ void DiagSplit::partition_edge(
*t1 = T(patch, *P, Pend);
}
else {
- int I = (int)floor((float)t * 0.5f);
- *P = interp(Pstart, Pend, (t == 0) ? 0 : I / (float)t); /* XXX is t faces or verts */
+ assert(t >= 2); /* Need at least two segments to partition into. */
+
+ int I = (int)floorf((float)t * 0.5f);
+ *P = interp(Pstart, Pend, I / (float)t);
*t0 = I;
*t1 = t - I;
}
}
-static void limit_edge_factors(const QuadDice::SubPatch &sub, QuadDice::EdgeFactors &ef, int max_t)
+void DiagSplit::limit_edge_factor(int &T, Patch *patch, float2 Pstart, float2 Pend)
+{
+ int max_t = 1 << params.max_level;
+ int max_t_for_edge = int(max_t * len(Pstart - Pend));
+
+ if (patch->from_ngon) {
+ max_t_for_edge >>= 1; /* Initial split of ngon causes edges to extend half the distance. */
+ }
+
+ T = (max_t_for_edge <= 1) ? 1 : min(T, max_t_for_edge);
+
+ assert(T >= 1 || T == DSPLIT_NON_UNIFORM);
+}
+
+void DiagSplit::resolve_edge_factors(Subpatch &sub)
{
- float2 P00 = sub.P00;
- float2 P01 = sub.P01;
- float2 P10 = sub.P10;
- float2 P11 = sub.P11;
-
- int tu0 = int(max_t * len(P10 - P00));
- int tu1 = int(max_t * len(P11 - P01));
- int tv0 = int(max_t * len(P01 - P00));
- int tv1 = int(max_t * len(P11 - P10));
-
- ef.tu0 = tu0 <= 1 ? 1 : min(ef.tu0, tu0);
- ef.tu1 = tu1 <= 1 ? 1 : min(ef.tu1, tu1);
- ef.tv0 = tv0 <= 1 ? 1 : min(ef.tv0, tv0);
- ef.tv1 = tv1 <= 1 ? 1 : min(ef.tv1, tv1);
+ /* Resolve DSPLIT_NON_UNIFORM to actual T value if splitting is no longer possible. */
+ if (sub.edge_u0.T == 1 && sub.edge_u1.T == DSPLIT_NON_UNIFORM) {
+ sub.edge_u1.T = T(sub.patch, sub.c01, sub.c11, true);
+ }
+ if (sub.edge_u1.T == 1 && sub.edge_u0.T == DSPLIT_NON_UNIFORM) {
+ sub.edge_u0.T = T(sub.patch, sub.c00, sub.c10, true);
+ }
+ if (sub.edge_v0.T == 1 && sub.edge_v1.T == DSPLIT_NON_UNIFORM) {
+ sub.edge_v1.T = T(sub.patch, sub.c11, sub.c10, true);
+ }
+ if (sub.edge_v1.T == 1 && sub.edge_v0.T == DSPLIT_NON_UNIFORM) {
+ sub.edge_v0.T = T(sub.patch, sub.c01, sub.c00, true);
+ }
}
-void DiagSplit::split(QuadDice::SubPatch &sub, QuadDice::EdgeFactors &ef, int depth)
+void DiagSplit::split(Subpatch &sub, int depth)
{
if (depth > 32) {
/* We should never get here, but just in case end recursion safely. */
- ef.tu0 = 1;
- ef.tu1 = 1;
- ef.tv0 = 1;
- ef.tv1 = 1;
+ assert(!"diagsplit recursion limit reached");
+
+ sub.edge_u0.T = 1;
+ sub.edge_u1.T = 1;
+ sub.edge_v0.T = 1;
+ sub.edge_v1.T = 1;
- dispatch(sub, ef);
+ subpatches.push_back(sub);
return;
}
- bool split_u = (ef.tu0 == DSPLIT_NON_UNIFORM || ef.tu1 == DSPLIT_NON_UNIFORM);
- bool split_v = (ef.tv0 == DSPLIT_NON_UNIFORM || ef.tv1 == DSPLIT_NON_UNIFORM);
+ bool split_u = (sub.edge_u0.T == DSPLIT_NON_UNIFORM || sub.edge_u1.T == DSPLIT_NON_UNIFORM);
+ bool split_v = (sub.edge_v0.T == DSPLIT_NON_UNIFORM || sub.edge_v1.T == DSPLIT_NON_UNIFORM);
/* Split subpatches such that the ratio of T for opposite edges doesn't
- * exceed 1.5, this reduces over tessellation for some patches
+ * exceed 1.5, this reduces over tessellation for some patches
*/
- bool tmp_split_v = split_v;
- if (!split_u && min(ef.tu0, ef.tu1) > 8 && min(ef.tu0, ef.tu1) * 1.5f < max(ef.tu0, ef.tu1))
+ /* clang-format off */
+ if (min(sub.edge_u0.T, sub.edge_u1.T) > 8 && /* Must be uniform and preferably greater than 8 to split. */
+ min(sub.edge_v0.T, sub.edge_v1.T) >= 2 && /* Must be uniform and at least 2 to split. */
+ max(sub.edge_u0.T, sub.edge_u1.T) / min(sub.edge_u0.T, sub.edge_u1.T) > 1.5f)
+ {
split_v = true;
- if (!tmp_split_v && min(ef.tu0, ef.tu1) > 8 && min(ef.tv0, ef.tv1) * 1.5f < max(ef.tv0, ef.tv1))
+ }
+ if (min(sub.edge_v0.T, sub.edge_v1.T) > 8 &&
+ min(sub.edge_u0.T, sub.edge_u1.T) >= 2 &&
+ max(sub.edge_v0.T, sub.edge_v1.T) / min(sub.edge_v0.T, sub.edge_v1.T) > 1.5f)
+ {
split_u = true;
+ }
+ /* clang-format on */
- /* alternate axis */
+ /* Alternate axis. */
if (split_u && split_v) {
split_u = depth % 2;
}
- if (split_u) {
- /* partition edges */
- QuadDice::EdgeFactors ef0, ef1;
- float2 Pu0, Pu1;
+ if (!split_u && !split_v) {
+ /* Add the unsplit subpatch. */
+ subpatches.push_back(sub);
+ Subpatch &subpatch = subpatches[subpatches.size() - 1];
+
+ /* Update T values and offsets. */
+ for (int i = 0; i < 4; i++) {
+ Subpatch::edge_t &edge = subpatch.edges[i];
+
+ edge.offset = edge.edge->T;
+ edge.edge->T += edge.T;
+ }
+ }
+ else {
+ /* Copy into new subpatches. */
+ Subpatch sub_a = sub;
+ Subpatch sub_b = sub;
+
+ /* Pointers to various subpatch elements. */
+ Subpatch::edge_t *sub_across_0, *sub_across_1;
+ Subpatch::edge_t *sub_a_across_0, *sub_a_across_1;
+ Subpatch::edge_t *sub_b_across_0, *sub_b_across_1;
+
+ Subpatch::edge_t *sub_a_split, *sub_b_split;
+
+ float2 *Pa, *Pb, *Pc, *Pd;
+
+ /* Set pointers based on split axis. */
+ if (split_u) {
+ sub_across_0 = &sub.edge_u0;
+ sub_across_1 = &sub.edge_u1;
+ sub_a_across_0 = &sub_a.edge_u0;
+ sub_a_across_1 = &sub_a.edge_u1;
+ sub_b_across_0 = &sub_b.edge_u0;
+ sub_b_across_1 = &sub_b.edge_u1;
+
+ sub_a_split = &sub_a.edge_v1;
+ sub_b_split = &sub_b.edge_v0;
+
+ Pa = &sub_a.c11;
+ Pb = &sub_a.c10;
+ Pc = &sub_b.c01;
+ Pd = &sub_b.c00;
+ }
+ else {
+ sub_across_0 = &sub.edge_v0;
+ sub_across_1 = &sub.edge_v1;
+ sub_a_across_0 = &sub_a.edge_v0;
+ sub_a_across_1 = &sub_a.edge_v1;
+ sub_b_across_0 = &sub_b.edge_v0;
+ sub_b_across_1 = &sub_b.edge_v1;
+
+ sub_a_split = &sub_a.edge_u0;
+ sub_b_split = &sub_b.edge_u1;
+
+ Pa = &sub_a.c10;
+ Pb = &sub_a.c00;
+ Pc = &sub_b.c11;
+ Pd = &sub_b.c01;
+ }
- partition_edge(sub.patch, &Pu0, &ef0.tu0, &ef1.tu0, sub.P00, sub.P10, ef.tu0);
- partition_edge(sub.patch, &Pu1, &ef0.tu1, &ef1.tu1, sub.P01, sub.P11, ef.tu1);
+ /* Partition edges */
+ float2 P0, P1;
- /* split */
- int tsplit = T(sub.patch, Pu0, Pu1);
- ef0.tv0 = ef.tv0;
- ef0.tv1 = tsplit;
+ partition_edge(
+ sub.patch, &P0, &sub_a_across_0->T, &sub_b_across_0->T, *Pd, *Pb, sub_across_0->T);
+ partition_edge(
+ sub.patch, &P1, &sub_a_across_1->T, &sub_b_across_1->T, *Pc, *Pa, sub_across_1->T);
- ef1.tv0 = tsplit;
- ef1.tv1 = ef.tv1;
+ /* Split */
+ *Pa = P1;
+ *Pb = P0;
- /* create subpatches */
- QuadDice::SubPatch sub0 = {sub.patch, sub.P00, Pu0, sub.P01, Pu1};
- QuadDice::SubPatch sub1 = {sub.patch, Pu0, sub.P10, Pu1, sub.P11};
+ *Pc = P1;
+ *Pd = P0;
- limit_edge_factors(sub0, ef0, 1 << params.max_level);
- limit_edge_factors(sub1, ef1, 1 << params.max_level);
+ int tsplit = T(sub.patch, P0, P1);
- split(sub0, ef0, depth + 1);
- split(sub1, ef1, depth + 1);
+ if (depth == -2 && tsplit == 1) {
+ tsplit = 2; /* Ensure we can always split at depth -1. */
+ }
+
+ sub_a_split->T = tsplit;
+ sub_b_split->T = tsplit;
+
+ resolve_edge_factors(sub_a);
+ resolve_edge_factors(sub_b);
+
+ /* Create new edge */
+ Edge &edge = *alloc_edge();
+
+ sub_a_split->edge = &edge;
+ sub_b_split->edge = &edge;
+
+ sub_a_split->offset = 0;
+ sub_b_split->offset = 0;
+
+ sub_a_split->indices_decrease_along_edge = false;
+ sub_b_split->indices_decrease_along_edge = true;
+
+ sub_a_split->sub_edges_created_in_reverse_order = !split_u;
+ sub_b_split->sub_edges_created_in_reverse_order = !split_u;
+
+ edge.top_indices_decrease = sub_across_1->sub_edges_created_in_reverse_order;
+ edge.bottom_indices_decrease = sub_across_0->sub_edges_created_in_reverse_order;
+
+ /* Recurse */
+ edge.T = 0;
+ split(sub_a, depth + 1);
+
+ int edge_t = edge.T;
+ (void)edge_t;
+
+ edge.top_offset = sub_across_1->edge->T;
+ edge.bottom_offset = sub_across_0->edge->T;
+
+ edge.T = 0; /* We calculate T twice along each edge. :/ */
+ split(sub_b, depth + 1);
+
+ assert(edge.T == edge_t); /* If this fails we will crash at some later point! */
+
+ edge.top = sub_across_1->edge;
+ edge.bottom = sub_across_0->edge;
}
- else if (split_v) {
- /* partition edges */
- QuadDice::EdgeFactors ef0, ef1;
- float2 Pv0, Pv1;
+}
+
+int DiagSplit::alloc_verts(int n)
+{
+ int a = num_alloced_verts;
+ num_alloced_verts += n;
+ return a;
+}
- partition_edge(sub.patch, &Pv0, &ef0.tv0, &ef1.tv0, sub.P00, sub.P01, ef.tv0);
- partition_edge(sub.patch, &Pv1, &ef0.tv1, &ef1.tv1, sub.P10, sub.P11, ef.tv1);
+Edge *DiagSplit::alloc_edge()
+{
+ edges.emplace_back();
+ return &edges.back();
+}
- /* split */
- int tsplit = T(sub.patch, Pv0, Pv1);
- ef0.tu0 = ef.tu0;
- ef0.tu1 = tsplit;
+void DiagSplit::split_patches(Patch *patches, size_t patches_byte_stride)
+{
+ int patch_index = 0;
- ef1.tu0 = tsplit;
- ef1.tu1 = ef.tu1;
+ for (int f = 0; f < params.mesh->subd_faces.size(); f++) {
+ Mesh::SubdFace &face = params.mesh->subd_faces[f];
- /* create subpatches */
- QuadDice::SubPatch sub0 = {sub.patch, sub.P00, sub.P10, Pv0, Pv1};
- QuadDice::SubPatch sub1 = {sub.patch, Pv0, Pv1, sub.P01, sub.P11};
+ Patch *patch = (Patch *)(((char *)patches) + patch_index * patches_byte_stride);
- limit_edge_factors(sub0, ef0, 1 << params.max_level);
- limit_edge_factors(sub1, ef1, 1 << params.max_level);
+ if (face.is_quad()) {
+ patch_index++;
- split(sub0, ef0, depth + 1);
- split(sub1, ef1, depth + 1);
+ split_quad(face, patch);
+ }
+ else {
+ patch_index += face.num_corners;
+
+ split_ngon(face, patch, patches_byte_stride);
+ }
}
- else {
- dispatch(sub, ef);
+
+ params.mesh->vert_to_stitching_key_map.clear();
+ params.mesh->vert_stitching_map.clear();
+
+ post_split();
+}
+
+static Edge *create_edge_from_corner(DiagSplit *split,
+ const Mesh *mesh,
+ const Mesh::SubdFace &face,
+ int corner,
+ bool &reversed,
+ int v0,
+ int v1)
+{
+ int a = mesh->subd_face_corners[face.start_corner + mod(corner + 0, face.num_corners)];
+ int b = mesh->subd_face_corners[face.start_corner + mod(corner + 1, face.num_corners)];
+
+ reversed = !(b < a);
+
+ if (b < a) {
+ swap(a, b);
+ swap(v0, v1);
}
+
+ Edge *edge = split->alloc_edge();
+
+ edge->is_stitch_edge = true;
+ edge->stitch_start_vert_index = a;
+ edge->stitch_end_vert_index = b;
+
+ edge->start_vert_index = v0;
+ edge->end_vert_index = v1;
+
+ edge->stitch_edge_key = {a, b};
+
+ return edge;
}
-void DiagSplit::split_quad(Patch *patch, QuadDice::SubPatch *subpatch)
+void DiagSplit::split_quad(const Mesh::SubdFace &face, Patch *patch)
{
- QuadDice::SubPatch sub_split;
- QuadDice::EdgeFactors ef_split;
+ Subpatch subpatch(patch);
+
+ int v = alloc_verts(4);
+
+ bool v0_reversed, u1_reversed, v1_reversed, u0_reversed;
+ subpatch.edge_v0.edge = create_edge_from_corner(
+ this, params.mesh, face, 3, v0_reversed, v + 3, v + 0);
+ subpatch.edge_u1.edge = create_edge_from_corner(
+ this, params.mesh, face, 2, u1_reversed, v + 2, v + 3);
+ subpatch.edge_v1.edge = create_edge_from_corner(
+ this, params.mesh, face, 1, v1_reversed, v + 1, v + 2);
+ subpatch.edge_u0.edge = create_edge_from_corner(
+ this, params.mesh, face, 0, u0_reversed, v + 0, v + 1);
+
+ subpatch.edge_v0.sub_edges_created_in_reverse_order = !v0_reversed;
+ subpatch.edge_u1.sub_edges_created_in_reverse_order = u1_reversed;
+ subpatch.edge_v1.sub_edges_created_in_reverse_order = v1_reversed;
+ subpatch.edge_u0.sub_edges_created_in_reverse_order = !u0_reversed;
+
+ subpatch.edge_v0.indices_decrease_along_edge = v0_reversed;
+ subpatch.edge_u1.indices_decrease_along_edge = u1_reversed;
+ subpatch.edge_v1.indices_decrease_along_edge = v1_reversed;
+ subpatch.edge_u0.indices_decrease_along_edge = u0_reversed;
+
+ /* Forces a split in both axis for quads, needed to match split of ngons into quads. */
+ subpatch.edge_u0.T = DSPLIT_NON_UNIFORM;
+ subpatch.edge_u1.T = DSPLIT_NON_UNIFORM;
+ subpatch.edge_v0.T = DSPLIT_NON_UNIFORM;
+ subpatch.edge_v1.T = DSPLIT_NON_UNIFORM;
+
+ split(subpatch, -2);
+}
+
+static Edge *create_split_edge_from_corner(DiagSplit *split,
+ const Mesh *mesh,
+ const Mesh::SubdFace &face,
+ int corner,
+ int side,
+ bool &reversed,
+ int v0,
+ int v1,
+ int vc)
+{
+ Edge *edge = split->alloc_edge();
+
+ int a = mesh->subd_face_corners[face.start_corner + mod(corner + 0, face.num_corners)];
+ int b = mesh->subd_face_corners[face.start_corner + mod(corner + 1, face.num_corners)];
- if (subpatch) {
- sub_split = *subpatch;
+ if (b < a) {
+ edge->stitch_edge_key = {b, a};
}
else {
- sub_split.patch = patch;
- sub_split.P00 = make_float2(0.0f, 0.0f);
- sub_split.P10 = make_float2(1.0f, 0.0f);
- sub_split.P01 = make_float2(0.0f, 1.0f);
- sub_split.P11 = make_float2(1.0f, 1.0f);
+ edge->stitch_edge_key = {a, b};
}
- ef_split.tu0 = T(patch, sub_split.P00, sub_split.P10);
- ef_split.tu1 = T(patch, sub_split.P01, sub_split.P11);
- ef_split.tv0 = T(patch, sub_split.P00, sub_split.P01);
- ef_split.tv1 = T(patch, sub_split.P10, sub_split.P11);
+ reversed = !(b < a);
- limit_edge_factors(sub_split, ef_split, 1 << params.max_level);
+ if (side == 0) {
+ a = vc;
+ }
+ else {
+ b = vc;
+ }
+
+ if (!reversed) {
+ swap(a, b);
+ swap(v0, v1);
+ }
+
+ edge->is_stitch_edge = true;
+ edge->stitch_start_vert_index = a;
+ edge->stitch_end_vert_index = b;
+
+ edge->start_vert_index = v0;
+ edge->end_vert_index = v1;
+
+ return edge;
+}
+
+void DiagSplit::split_ngon(const Mesh::SubdFace &face, Patch *patches, size_t patches_byte_stride)
+{
+ Edge *prev_edge_u0 = nullptr;
+ Edge *first_edge_v0 = nullptr;
+
+ for (int corner = 0; corner < face.num_corners; corner++) {
+ Patch *patch = (Patch *)(((char *)patches) + corner * patches_byte_stride);
+
+ Subpatch subpatch(patch);
+
+ int v = alloc_verts(4);
+
+ /* Setup edges. */
+ Edge *edge_u1 = alloc_edge();
+ Edge *edge_v1 = alloc_edge();
+
+ edge_v1->is_stitch_edge = true;
+ edge_u1->is_stitch_edge = true;
+
+ edge_u1->stitch_start_vert_index = -(face.start_corner + mod(corner + 0, face.num_corners)) -
+ 1;
+ edge_u1->stitch_end_vert_index = STITCH_NGON_CENTER_VERT_INDEX_OFFSET + face.ptex_offset;
+
+ edge_u1->start_vert_index = v + 3;
+ edge_u1->end_vert_index = v + 2;
+
+ edge_u1->stitch_edge_key = {edge_u1->stitch_start_vert_index, edge_u1->stitch_end_vert_index};
+
+ edge_v1->stitch_start_vert_index = -(face.start_corner + mod(corner + 1, face.num_corners)) -
+ 1;
+ edge_v1->stitch_end_vert_index = STITCH_NGON_CENTER_VERT_INDEX_OFFSET + face.ptex_offset;
+
+ edge_v1->start_vert_index = v + 1;
+ edge_v1->end_vert_index = v + 2;
+
+ edge_v1->stitch_edge_key = {edge_v1->stitch_start_vert_index, edge_v1->stitch_end_vert_index};
+
+ bool v0_reversed, u0_reversed;
+
+ subpatch.edge_v0.edge = create_split_edge_from_corner(this,
+ params.mesh,
+ face,
+ corner - 1,
+ 0,
+ v0_reversed,
+ v + 3,
+ v + 0,
+ STITCH_NGON_SPLIT_EDGE_CENTER_VERT_TAG);
+
+ subpatch.edge_u1.edge = edge_u1;
+ subpatch.edge_v1.edge = edge_v1;
+
+ subpatch.edge_u0.edge = create_split_edge_from_corner(this,
+ params.mesh,
+ face,
+ corner + 0,
+ 1,
+ u0_reversed,
+ v + 0,
+ v + 1,
+ STITCH_NGON_SPLIT_EDGE_CENTER_VERT_TAG);
+
+ subpatch.edge_v0.sub_edges_created_in_reverse_order = !v0_reversed;
+ subpatch.edge_u1.sub_edges_created_in_reverse_order = false;
+ subpatch.edge_v1.sub_edges_created_in_reverse_order = true;
+ subpatch.edge_u0.sub_edges_created_in_reverse_order = !u0_reversed;
+
+ subpatch.edge_v0.indices_decrease_along_edge = v0_reversed;
+ subpatch.edge_u1.indices_decrease_along_edge = false;
+ subpatch.edge_v1.indices_decrease_along_edge = true;
+ subpatch.edge_u0.indices_decrease_along_edge = u0_reversed;
+
+ /* Perfrom split. */
+ {
+ subpatch.edge_u0.T = T(subpatch.patch, subpatch.c00, subpatch.c10);
+ subpatch.edge_u1.T = T(subpatch.patch, subpatch.c01, subpatch.c11);
+ subpatch.edge_v0.T = T(subpatch.patch, subpatch.c00, subpatch.c01);
+ subpatch.edge_v1.T = T(subpatch.patch, subpatch.c10, subpatch.c11);
+
+ resolve_edge_factors(subpatch);
+
+ split(subpatch, 0);
+ }
+
+ /* Update offsets after T is known from split. */
+ edge_u1->top = subpatch.edge_v0.edge;
+ edge_u1->stitch_top_offset = edge_u1->top->T * (v0_reversed ? -1 : 1);
+ edge_v1->top = subpatch.edge_u0.edge;
+ edge_v1->stitch_top_offset = edge_v1->top->T * (!u0_reversed ? -1 : 1);
+
+ if (corner == 0) {
+ first_edge_v0 = subpatch.edge_v0.edge;
+ }
+
+ if (prev_edge_u0) {
+ if (v0_reversed) {
+ subpatch.edge_v0.edge->stitch_offset = prev_edge_u0->T;
+ }
+ else {
+ prev_edge_u0->stitch_offset = subpatch.edge_v0.edge->T;
+ }
+
+ int T = subpatch.edge_v0.edge->T + prev_edge_u0->T;
+ subpatch.edge_v0.edge->stitch_edge_T = T;
+ prev_edge_u0->stitch_edge_T = T;
+ }
- split(sub_split, ef_split);
+ if (corner == face.num_corners - 1) {
+ if (v0_reversed) {
+ subpatch.edge_u0.edge->stitch_offset = first_edge_v0->T;
+ }
+ else {
+ first_edge_v0->stitch_offset = subpatch.edge_u0.edge->T;
+ }
+
+ int T = first_edge_v0->T + subpatch.edge_u0.edge->T;
+ first_edge_v0->stitch_edge_T = T;
+ subpatch.edge_u0.edge->stitch_edge_T = T;
+ }
+
+ prev_edge_u0 = subpatch.edge_u0.edge;
+ }
+}
+
+void DiagSplit::post_split()
+{
+ int num_stitch_verts = 0;
+
+ /* All patches are now split, and all T values known. */
+ foreach (Edge &edge, edges) {
+ if (edge.second_vert_index < 0) {
+ edge.second_vert_index = alloc_verts(edge.T - 1);
+ }
+
+ if (edge.is_stitch_edge) {
+ num_stitch_verts = max(num_stitch_verts,
+ max(edge.stitch_start_vert_index, edge.stitch_end_vert_index));
+ }
+ }
+
+ num_stitch_verts += 1;
+
+ /* Map of edge key to edge stitching vert offset. */
+ struct pair_hasher {
+ size_t operator()(const pair<int, int> &k) const
+ {
+ return hash_uint2(k.first, k.second);
+ }
+ };
+ typedef unordered_map<pair<int, int>, int, pair_hasher> edge_stitch_verts_map_t;
+ edge_stitch_verts_map_t edge_stitch_verts_map;
+
+ foreach (Edge &edge, edges) {
+ if (edge.is_stitch_edge) {
+ if (edge.stitch_edge_T == 0) {
+ edge.stitch_edge_T = edge.T;
+ }
+
+ if (edge_stitch_verts_map.find(edge.stitch_edge_key) == edge_stitch_verts_map.end()) {
+ edge_stitch_verts_map[edge.stitch_edge_key] = num_stitch_verts;
+ num_stitch_verts += edge.stitch_edge_T - 1;
+ }
+ }
+ }
+
+ /* Set start and end indices for edges generated from a split. */
+ foreach (Edge &edge, edges) {
+ if (edge.start_vert_index < 0) {
+ /* Fixup offsets. */
+ if (edge.top_indices_decrease) {
+ edge.top_offset = edge.top->T - edge.top_offset;
+ }
+
+ edge.start_vert_index = edge.top->get_vert_along_edge(edge.top_offset);
+ }
+
+ if (edge.end_vert_index < 0) {
+ if (edge.bottom_indices_decrease) {
+ edge.bottom_offset = edge.bottom->T - edge.bottom_offset;
+ }
+
+ edge.end_vert_index = edge.bottom->get_vert_along_edge(edge.bottom_offset);
+ }
+ }
+
+ int vert_offset = params.mesh->verts.size();
+
+ /* Add verts to stitching map. */
+ foreach (const Edge &edge, edges) {
+ if (edge.is_stitch_edge) {
+ int second_stitch_vert_index = edge_stitch_verts_map[edge.stitch_edge_key];
+
+ for (int i = 0; i <= edge.T; i++) {
+ /* Get proper stitching key. */
+ int key;
+
+ if (i == 0) {
+ key = edge.stitch_start_vert_index;
+ }
+ else if (i == edge.T) {
+ key = edge.stitch_end_vert_index;
+ }
+ else {
+ key = second_stitch_vert_index + i - 1 + edge.stitch_offset;
+ }
+
+ if (key == STITCH_NGON_SPLIT_EDGE_CENTER_VERT_TAG) {
+ if (i == 0) {
+ key = second_stitch_vert_index - 1 + edge.stitch_offset;
+ }
+ else if (i == edge.T) {
+ key = second_stitch_vert_index - 1 + edge.T;
+ }
+ }
+ else if (key < 0 && edge.top) { /* ngon spoke edge */
+ int s = edge_stitch_verts_map[edge.top->stitch_edge_key];
+ if (edge.stitch_top_offset >= 0) {
+ key = s - 1 + edge.stitch_top_offset;
+ }
+ else {
+ key = s - 1 + edge.top->stitch_edge_T + edge.stitch_top_offset;
+ }
+ }
+
+ /* Get real vert index. */
+ int vert = edge.get_vert_along_edge(i) + vert_offset;
+
+ /* Add to map */
+ if (params.mesh->vert_to_stitching_key_map.find(vert) ==
+ params.mesh->vert_to_stitching_key_map.end()) {
+ params.mesh->vert_to_stitching_key_map[vert] = key;
+ params.mesh->vert_stitching_map.insert({key, vert});
+ }
+ }
+ }
+ }
+
+ /* Dice; TODO(mai): Move this out of split. */
QuadDice dice(params);
- for (size_t i = 0; i < subpatches_quad.size(); i++) {
- QuadDice::SubPatch &sub = subpatches_quad[i];
- QuadDice::EdgeFactors &ef = edgefactors_quad[i];
+ int num_verts = num_alloced_verts;
+ int num_triangles = 0;
+
+ for (size_t i = 0; i < subpatches.size(); i++) {
+ subpatches[i].inner_grid_vert_offset = num_verts;
+ num_verts += subpatches[i].calc_num_inner_verts();
+ num_triangles += subpatches[i].calc_num_triangles();
+ }
+
+ dice.reserve(num_verts, num_triangles);
+
+ for (size_t i = 0; i < subpatches.size(); i++) {
+ Subpatch &sub = subpatches[i];
- ef.tu0 = max(ef.tu0, 1);
- ef.tu1 = max(ef.tu1, 1);
- ef.tv0 = max(ef.tv0, 1);
- ef.tv1 = max(ef.tv1, 1);
+ sub.edge_u0.T = max(sub.edge_u0.T, 1);
+ sub.edge_u1.T = max(sub.edge_u1.T, 1);
+ sub.edge_v0.T = max(sub.edge_v0.T, 1);
+ sub.edge_v1.T = max(sub.edge_v1.T, 1);
- dice.dice(sub, ef);
+ dice.dice(sub);
}
- subpatches_quad.clear();
- edgefactors_quad.clear();
+ /* Cleanup */
+ subpatches.clear();
+ edges.clear();
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/subd/subd_split.h b/intern/cycles/subd/subd_split.h
index 6e68d8ee598..773f4ddf120 100644
--- a/intern/cycles/subd/subd_split.h
+++ b/intern/cycles/subd/subd_split.h
@@ -23,35 +23,51 @@
* for more details. */
#include "subd/subd_dice.h"
+#include "subd/subd_subpatch.h"
+#include "util/util_deque.h"
#include "util/util_types.h"
#include "util/util_vector.h"
+#include <deque>
+
CCL_NAMESPACE_BEGIN
class Mesh;
class Patch;
-#define DSPLIT_NON_UNIFORM -1
-
class DiagSplit {
- public:
- vector<QuadDice::SubPatch> subpatches_quad;
- vector<QuadDice::EdgeFactors> edgefactors_quad;
-
SubdParams params;
- explicit DiagSplit(const SubdParams &params);
+ vector<Subpatch> subpatches;
+ /* deque is used so that element pointers remain vaild when size is changed. */
+ deque<Edge> edges;
float3 to_world(Patch *patch, float2 uv);
- int T(Patch *patch, float2 Pstart, float2 Pend);
+ int T(Patch *patch, float2 Pstart, float2 Pend, bool recursive_resolve = false);
+
+ void limit_edge_factor(int &T, Patch *patch, float2 Pstart, float2 Pend);
+ void resolve_edge_factors(Subpatch &sub);
+
void partition_edge(
Patch *patch, float2 *P, int *t0, int *t1, float2 Pstart, float2 Pend, int t);
- void dispatch(QuadDice::SubPatch &sub, QuadDice::EdgeFactors &ef);
- void split(QuadDice::SubPatch &sub, QuadDice::EdgeFactors &ef, int depth = 0);
+ void split(Subpatch &sub, int depth = 0);
+
+ int num_alloced_verts = 0;
+ int alloc_verts(int n); /* Returns start index of new verts. */
+
+ public:
+ Edge *alloc_edge();
+
+ explicit DiagSplit(const SubdParams &params);
+
+ void split_patches(Patch *patches, size_t patches_byte_stride);
+
+ void split_quad(const Mesh::SubdFace &face, Patch *patch);
+ void split_ngon(const Mesh::SubdFace &face, Patch *patches, size_t patches_byte_stride);
- void split_quad(Patch *patch, QuadDice::SubPatch *subpatch = NULL);
+ void post_split();
};
CCL_NAMESPACE_END
diff --git a/intern/cycles/subd/subd_subpatch.h b/intern/cycles/subd/subd_subpatch.h
new file mode 100644
index 00000000000..1a32b763cb8
--- /dev/null
+++ b/intern/cycles/subd/subd_subpatch.h
@@ -0,0 +1,219 @@
+/*
+ * Copyright 2011-2018 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SUBD_SUBPATCH_H__
+#define __SUBD_SUBPATCH_H__
+
+#include "util/util_map.h"
+#include "util/util_types.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Subpatch */
+
+class Subpatch {
+ public:
+ class Patch *patch; /* Patch this is a subpatch of. */
+ int inner_grid_vert_offset;
+
+ struct edge_t {
+ int T;
+ int offset; /* Offset along main edge, interpretation depends on the two flags below. */
+
+ bool indices_decrease_along_edge;
+ bool sub_edges_created_in_reverse_order;
+
+ struct Edge *edge;
+
+ int get_vert_along_edge(int n) const;
+ };
+
+ /*
+ * eu1
+ * c01 --------- c11
+ * | |
+ * ev0 | | ev1
+ * | |
+ * c00 --------- c10
+ * eu0
+ */
+
+ union {
+ float2 corners[4]; /* UV within patch, clockwise starting from uv (0, 0) towards (0, 1) etc. */
+ struct {
+ float2 c00, c01, c11, c10;
+ };
+ };
+
+ union {
+ edge_t
+ edges[4]; /* Edges of this subpatch, each edge starts at the corner of the same index. */
+ struct {
+ edge_t edge_v0, edge_u1, edge_v1, edge_u0;
+ };
+ };
+
+ explicit Subpatch(Patch *patch = nullptr)
+ : patch(patch),
+ c00(make_float2(0.0f, 0.0f)),
+ c01(make_float2(0.0f, 1.0f)),
+ c11(make_float2(1.0f, 1.0f)),
+ c10(make_float2(1.0f, 0.0f))
+ {
+ }
+
+ Subpatch(Patch *patch, float2 c00, float2 c01, float2 c11, float2 c10)
+ : patch(patch), c00(c00), c01(c01), c11(c11), c10(c10)
+ {
+ }
+
+ int calc_num_inner_verts() const
+ {
+ int Mu = max(edge_u0.T, edge_u1.T);
+ int Mv = max(edge_v0.T, edge_v1.T);
+ Mu = max(Mu, 2);
+ Mv = max(Mv, 2);
+ return (Mu - 1) * (Mv - 1);
+ }
+
+ int calc_num_triangles() const
+ {
+ int Mu = max(edge_u0.T, edge_u1.T);
+ int Mv = max(edge_v0.T, edge_v1.T);
+ Mu = max(Mu, 2);
+ Mv = max(Mv, 2);
+
+ int inner_triangles = (Mu - 2) * (Mv - 2) * 2;
+ int edge_triangles = edge_u0.T + edge_u1.T + edge_v0.T + edge_v1.T + (Mu - 2) * 2 +
+ (Mv - 2) * 2;
+
+ return inner_triangles + edge_triangles;
+ }
+
+ int get_vert_along_edge(int e, int n) const;
+
+ int get_vert_along_grid_edge(int edge, int n) const
+ {
+ int Mu = max(edge_u0.T, edge_u1.T);
+ int Mv = max(edge_v0.T, edge_v1.T);
+ Mu = max(Mu, 2);
+ Mv = max(Mv, 2);
+
+ switch (edge) {
+ case 0:
+ return inner_grid_vert_offset + n * (Mu - 1);
+ case 1:
+ return inner_grid_vert_offset + (Mu - 1) * (Mv - 2) + n;
+ case 2:
+ return inner_grid_vert_offset + ((Mu - 1) * (Mv - 1) - 1) - n * (Mu - 1);
+ case 3:
+ return inner_grid_vert_offset + (Mu - 2) - n;
+ }
+
+ return -1;
+ }
+};
+
+struct Edge {
+ /* Number of segments the edge will be diced into, see DiagSplit paper. */
+ int T;
+
+ /* top is edge adjacent to start, bottom is adjacent to end. */
+ Edge *top, *bottom;
+
+ int top_offset, bottom_offset;
+ bool top_indices_decrease, bottom_indices_decrease;
+
+ int start_vert_index;
+ int end_vert_index;
+
+ /* Index of the second vert from this edges corner along the edge towards the next corner. */
+ int second_vert_index;
+
+ /* Vertices on edge are to be stitched. */
+ bool is_stitch_edge;
+
+ /* Key to match this edge with others to be stitched with.
+ * The ints in the pair are ordered stitching indices */
+ pair<int, int> stitch_edge_key;
+
+ /* Full T along edge (may be larger than T for edges split from ngon edges) */
+ int stitch_edge_T;
+ int stitch_offset;
+ int stitch_top_offset;
+ int stitch_start_vert_index;
+ int stitch_end_vert_index;
+
+ Edge()
+ : T(0),
+ top(nullptr),
+ bottom(nullptr),
+ top_offset(-1),
+ bottom_offset(-1),
+ top_indices_decrease(false),
+ bottom_indices_decrease(false),
+ start_vert_index(-1),
+ end_vert_index(-1),
+ second_vert_index(-1),
+ is_stitch_edge(false),
+ stitch_edge_T(0),
+ stitch_offset(0)
+ {
+ }
+
+ int get_vert_along_edge(int n) const
+ {
+ assert(n >= 0 && n <= T);
+
+ if (n == 0) {
+ return start_vert_index;
+ }
+ else if (n == T) {
+ return end_vert_index;
+ }
+
+ return second_vert_index + n - 1;
+ }
+};
+
+inline int Subpatch::edge_t::get_vert_along_edge(int n) const
+{
+ assert(n >= 0 && n <= T);
+
+ if (!indices_decrease_along_edge && !sub_edges_created_in_reverse_order) {
+ n = offset + n;
+ }
+ else if (!indices_decrease_along_edge && sub_edges_created_in_reverse_order) {
+ n = edge->T - offset - T + n;
+ }
+ else if (indices_decrease_along_edge && !sub_edges_created_in_reverse_order) {
+ n = offset + T - n;
+ }
+ else if (indices_decrease_along_edge && sub_edges_created_in_reverse_order) {
+ n = edge->T - offset - n;
+ }
+
+ return edge->get_vert_along_edge(n);
+}
+
+inline int Subpatch::get_vert_along_edge(int edge, int n) const
+{
+ return edges[edge].get_vert_along_edge(n);
+}
+
+CCL_NAMESPACE_END
+
+#endif /* __SUBD_SUBPATCH_H__ */
diff --git a/intern/cycles/test/CMakeLists.txt b/intern/cycles/test/CMakeLists.txt
index 98fcc8cd15e..6dcc7f7b3dd 100644
--- a/intern/cycles/test/CMakeLists.txt
+++ b/intern/cycles/test/CMakeLists.txt
@@ -82,25 +82,33 @@ list(APPEND ALL_CYCLES_LIBRARIES
${TIFF_LIBRARY}
${OPENIMAGEIO_LIBRARIES}
${OPENEXR_LIBRARIES}
+ ${OPENVDB_LIBRARIES}
)
include_directories(${INC})
-link_directories(${OPENIMAGEIO_LIBPATH}
- ${BOOST_LIBPATH}
- ${PNG_LIBPATH}
- ${JPEG_LIBPATH}
- ${ZLIB_LIBPATH}
- ${TIFF_LIBPATH}
- ${OPENEXR_LIBPATH}
- ${OPENCOLORIO_LIBPATH})
+link_directories(
+ ${OPENIMAGEIO_LIBPATH}
+ ${BOOST_LIBPATH}
+ ${PNG_LIBPATH}
+ ${JPEG_LIBPATH}
+ ${ZLIB_LIBPATH}
+ ${TIFF_LIBPATH}
+ ${OPENEXR_LIBPATH}
+ ${OPENCOLORIO_LIBPATH}
+ ${OPENVDB_LIBPATH}
+)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${PLATFORM_LINKFLAGS}")
set(CMAKE_EXE_LINKER_FLAGS_DEBUG "${CMAKE_EXE_LINKER_FLAGS_DEBUG} ${PLATFORM_LINKFLAGS_DEBUG}")
CYCLES_TEST(render_graph_finalize "${ALL_CYCLES_LIBRARIES};bf_intern_numaapi")
CYCLES_TEST(util_aligned_malloc "cycles_util")
-CYCLES_TEST(util_path "cycles_util;${BOOST_LIBRARIES};${OPENIMAGEIO_LIBRARIES}")
-CYCLES_TEST(util_string "cycles_util;${BOOST_LIBRARIES};${OPENIMAGEIO_LIBRARIES}")
-CYCLES_TEST(util_task "cycles_util;${BOOST_LIBRARIES};${OPENIMAGEIO_LIBRARIES};bf_intern_numaapi")
-CYCLES_TEST(util_time "cycles_util;${BOOST_LIBRARIES};${OPENIMAGEIO_LIBRARIES}")
+CYCLES_TEST(util_path "cycles_util;${OPENIMAGEIO_LIBRARIES};${BOOST_LIBRARIES}")
+CYCLES_TEST(util_string "cycles_util;${OPENIMAGEIO_LIBRARIES};${BOOST_LIBRARIES}")
+CYCLES_TEST(util_task "cycles_util;${OPENIMAGEIO_LIBRARIES};${BOOST_LIBRARIES};bf_intern_numaapi")
+CYCLES_TEST(util_time "cycles_util;${OPENIMAGEIO_LIBRARIES};${BOOST_LIBRARIES}")
+set_source_files_properties(util_avxf_avx_test.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
+CYCLES_TEST(util_avxf_avx "cycles_util;bf_intern_numaapi;${OPENIMAGEIO_LIBRARIES};${BOOST_LIBRARIES}")
+set_source_files_properties(util_avxf_avx2_test.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
+CYCLES_TEST(util_avxf_avx2 "cycles_util;bf_intern_numaapi;${OPENIMAGEIO_LIBRARIES};${BOOST_LIBRARIES}")
diff --git a/intern/cycles/test/render_graph_finalize_test.cpp b/intern/cycles/test/render_graph_finalize_test.cpp
index 7fb92bfb862..4ea3470cda8 100644
--- a/intern/cycles/test/render_graph_finalize_test.cpp
+++ b/intern/cycles/test/render_graph_finalize_test.cpp
@@ -14,14 +14,18 @@
* limitations under the License.
*/
-#include "testing/testing.h"
#include "testing/mock_log.h"
+#include "testing/testing.h"
+
+#include "device/device.h"
#include "render/graph.h"
-#include "render/scene.h"
#include "render/nodes.h"
+#include "render/scene.h"
+
#include "util/util_array.h"
#include "util/util_logging.h"
+#include "util/util_stats.h"
#include "util/util_string.h"
#include "util/util_vector.h"
@@ -960,6 +964,13 @@ TEST_F(RenderGraph, constant_fold_blackbody)
graph.finalize(scene);
}
+/* A Note About The Math Node
+ *
+ * The clamp option is implemented using graph expansion, where a
+ * Clamp node named "clamp" is added and connected to the output.
+ * So the final result is actually from the node "clamp".
+ */
+
/*
* Tests: Math with all constant inputs (clamp false).
*/
@@ -985,7 +996,7 @@ TEST_F(RenderGraph, constant_fold_math)
TEST_F(RenderGraph, constant_fold_math_clamp)
{
EXPECT_ANY_MESSAGE(log);
- CORRECT_INFO_MESSAGE(log, "Folding Math::Value to constant (1).");
+ CORRECT_INFO_MESSAGE(log, "Folding clamp::Result to constant (1).");
builder
.add_node(ShaderNodeBuilder<MathNode>("Math")
@@ -1003,7 +1014,7 @@ TEST_F(RenderGraph, constant_fold_math_clamp)
* Includes 2 tests: constant on each side.
*/
static void build_math_partial_test_graph(ShaderGraphBuilder &builder,
- NodeMath type,
+ NodeMathType type,
float constval)
{
builder
@@ -1038,7 +1049,7 @@ TEST_F(RenderGraph, constant_fold_part_math_add_0)
/* X + 0 == 0 + X == X */
CORRECT_INFO_MESSAGE(log, "Folding Math_Cx::Value to socket Attribute::Fac.");
CORRECT_INFO_MESSAGE(log, "Folding Math_xC::Value to socket Attribute::Fac.");
- INVALID_INFO_MESSAGE(log, "Folding Out::");
+ INVALID_INFO_MESSAGE(log, "Folding clamp::");
build_math_partial_test_graph(builder, NODE_MATH_ADD, 0.0f);
graph.finalize(scene);
@@ -1053,7 +1064,7 @@ TEST_F(RenderGraph, constant_fold_part_math_sub_0)
/* X - 0 == X */
INVALID_INFO_MESSAGE(log, "Folding Math_Cx::");
CORRECT_INFO_MESSAGE(log, "Folding Math_xC::Value to socket Attribute::Fac.");
- INVALID_INFO_MESSAGE(log, "Folding Out::");
+ INVALID_INFO_MESSAGE(log, "Folding clamp::");
build_math_partial_test_graph(builder, NODE_MATH_SUBTRACT, 0.0f);
graph.finalize(scene);
@@ -1068,7 +1079,7 @@ TEST_F(RenderGraph, constant_fold_part_math_mul_1)
/* X * 1 == 1 * X == X */
CORRECT_INFO_MESSAGE(log, "Folding Math_Cx::Value to socket Attribute::Fac.");
CORRECT_INFO_MESSAGE(log, "Folding Math_xC::Value to socket Attribute::Fac.");
- INVALID_INFO_MESSAGE(log, "Folding Out::");
+ INVALID_INFO_MESSAGE(log, "Folding clamp::");
build_math_partial_test_graph(builder, NODE_MATH_MULTIPLY, 1.0f);
graph.finalize(scene);
@@ -1083,7 +1094,7 @@ TEST_F(RenderGraph, constant_fold_part_math_div_1)
/* X / 1 == X */
INVALID_INFO_MESSAGE(log, "Folding Math_Cx::");
CORRECT_INFO_MESSAGE(log, "Folding Math_xC::Value to socket Attribute::Fac.");
- INVALID_INFO_MESSAGE(log, "Folding Out::");
+ INVALID_INFO_MESSAGE(log, "Folding clamp::");
build_math_partial_test_graph(builder, NODE_MATH_DIVIDE, 1.0f);
graph.finalize(scene);
@@ -1098,7 +1109,7 @@ TEST_F(RenderGraph, constant_fold_part_math_mul_0)
/* X * 0 == 0 * X == 0 */
CORRECT_INFO_MESSAGE(log, "Folding Math_Cx::Value to constant (0).");
CORRECT_INFO_MESSAGE(log, "Folding Math_xC::Value to constant (0).");
- CORRECT_INFO_MESSAGE(log, "Folding Out::Value to constant (0)");
+ CORRECT_INFO_MESSAGE(log, "Folding clamp::Result to constant (0)");
CORRECT_INFO_MESSAGE(log, "Discarding closure EmissionNode.");
build_math_partial_test_graph(builder, NODE_MATH_MULTIPLY, 0.0f);
@@ -1114,7 +1125,7 @@ TEST_F(RenderGraph, constant_fold_part_math_div_0)
/* 0 / X == 0 */
CORRECT_INFO_MESSAGE(log, "Folding Math_Cx::Value to constant (0).");
INVALID_INFO_MESSAGE(log, "Folding Math_xC::");
- INVALID_INFO_MESSAGE(log, "Folding Out::");
+ INVALID_INFO_MESSAGE(log, "Folding clamp::");
build_math_partial_test_graph(builder, NODE_MATH_DIVIDE, 0.0f);
graph.finalize(scene);
@@ -1129,7 +1140,7 @@ TEST_F(RenderGraph, constant_fold_part_math_pow_0)
/* X ^ 0 == 1 */
INVALID_INFO_MESSAGE(log, "Folding Math_Cx::");
CORRECT_INFO_MESSAGE(log, "Folding Math_xC::Value to constant (1).");
- INVALID_INFO_MESSAGE(log, "Folding Out::");
+ INVALID_INFO_MESSAGE(log, "Folding clamp::");
build_math_partial_test_graph(builder, NODE_MATH_POWER, 0.0f);
graph.finalize(scene);
@@ -1144,7 +1155,7 @@ TEST_F(RenderGraph, constant_fold_part_math_pow_1)
/* 1 ^ X == 1; X ^ 1 == X */
CORRECT_INFO_MESSAGE(log, "Folding Math_Cx::Value to constant (1)");
CORRECT_INFO_MESSAGE(log, "Folding Math_xC::Value to socket Attribute::Fac.");
- INVALID_INFO_MESSAGE(log, "Folding Out::");
+ INVALID_INFO_MESSAGE(log, "Folding clamp::");
build_math_partial_test_graph(builder, NODE_MATH_POWER, 1.0f);
graph.finalize(scene);
@@ -1156,21 +1167,14 @@ TEST_F(RenderGraph, constant_fold_part_math_pow_1)
TEST_F(RenderGraph, constant_fold_vector_math)
{
EXPECT_ANY_MESSAGE(log);
- CORRECT_INFO_MESSAGE(log, "Folding VectorMath::Value to constant (1).");
CORRECT_INFO_MESSAGE(log, "Folding VectorMath::Vector to constant (3, 0, 0).");
- CORRECT_INFO_MESSAGE(log, "Folding convert_vector_to_float::value_float to constant (1).");
- CORRECT_INFO_MESSAGE(log, "Folding Math::Value to constant (2).");
- CORRECT_INFO_MESSAGE(log, "Folding convert_float_to_color::value_color to constant (2, 2, 2).");
builder
.add_node(ShaderNodeBuilder<VectorMathNode>("VectorMath")
.set(&VectorMathNode::type, NODE_VECTOR_MATH_SUBTRACT)
.set("Vector1", make_float3(1.3f, 0.5f, 0.7f))
.set("Vector2", make_float3(-1.7f, 0.5f, 0.7f)))
- .add_node(ShaderNodeBuilder<MathNode>("Math").set(&MathNode::type, NODE_MATH_ADD))
- .add_connection("VectorMath::Vector", "Math::Value1")
- .add_connection("VectorMath::Value", "Math::Value2")
- .output_color("Math::Value");
+ .output_color("VectorMath::Vector");
graph.finalize(scene);
}
@@ -1180,7 +1184,7 @@ TEST_F(RenderGraph, constant_fold_vector_math)
* Includes 2 tests: constant on each side.
*/
static void build_vecmath_partial_test_graph(ShaderGraphBuilder &builder,
- NodeVectorMath type,
+ NodeVectorMathType type,
float3 constval)
{
builder
@@ -1234,22 +1238,6 @@ TEST_F(RenderGraph, constant_fold_part_vecmath_sub_0)
}
/*
- * Tests: partial folding for Vector Math Dot Product with known 0.
- */
-TEST_F(RenderGraph, constant_fold_part_vecmath_dot_0)
-{
- EXPECT_ANY_MESSAGE(log);
- /* X * 0 == 0 * X == X */
- CORRECT_INFO_MESSAGE(log, "Folding Math_Cx::Vector to constant (0, 0, 0).");
- CORRECT_INFO_MESSAGE(log, "Folding Math_xC::Vector to constant (0, 0, 0).");
- CORRECT_INFO_MESSAGE(log, "Folding Out::Vector to constant (0, 0, 0).");
- CORRECT_INFO_MESSAGE(log, "Discarding closure EmissionNode.");
-
- build_vecmath_partial_test_graph(builder, NODE_VECTOR_MATH_DOT_PRODUCT, make_float3(0, 0, 0));
- graph.finalize(scene);
-}
-
-/*
* Tests: partial folding for Vector Math Cross Product with known 0.
*/
TEST_F(RenderGraph, constant_fold_part_vecmath_cross_0)
diff --git a/intern/cycles/test/util_avxf_avx2_test.cpp b/intern/cycles/test/util_avxf_avx2_test.cpp
new file mode 100644
index 00000000000..9b466ddd3a0
--- /dev/null
+++ b/intern/cycles/test/util_avxf_avx2_test.cpp
@@ -0,0 +1,21 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#define __KERNEL_AVX2__
+#define __KERNEL_CPU__
+
+#if defined(i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
+# include "util_avxf_test.h"
+#endif
diff --git a/intern/cycles/test/util_avxf_avx_test.cpp b/intern/cycles/test/util_avxf_avx_test.cpp
new file mode 100644
index 00000000000..cea67649b80
--- /dev/null
+++ b/intern/cycles/test/util_avxf_avx_test.cpp
@@ -0,0 +1,21 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#define __KERNEL_AVX__
+#define __KERNEL_CPU__
+
+#if defined(i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
+# include "util_avxf_test.h"
+#endif
diff --git a/intern/cycles/test/util_avxf_test.h b/intern/cycles/test/util_avxf_test.h
new file mode 100644
index 00000000000..d93563fdb3f
--- /dev/null
+++ b/intern/cycles/test/util_avxf_test.h
@@ -0,0 +1,222 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "testing/testing.h"
+#include "util/util_system.h"
+#include "util/util_types.h"
+
+CCL_NAMESPACE_BEGIN
+
+bool validate_cpu_capabilities()
+{
+
+#ifdef __KERNEL_AVX2__
+ return system_cpu_support_avx2();
+#else
+# ifdef __KERNEL_AVX__
+ return system_cpu_support_avx();
+# endif
+#endif
+}
+
+#define VALIDATECPU \
+ if (!validate_cpu_capabilities()) \
+ return;
+
+#define compare_vector_scalar(a, b) \
+ for (size_t index = 0; index < a.size; index++) \
+ EXPECT_FLOAT_EQ(a[index], b);
+
+#define compare_vector_vector(a, b) \
+ for (size_t index = 0; index < a.size; index++) \
+ EXPECT_FLOAT_EQ(a[index], b[index]);
+
+#define compare_vector_vector_near(a, b, abserror) \
+ for (size_t index = 0; index < a.size; index++) \
+ EXPECT_NEAR(a[index], b[index], abserror);
+
+#define basic_test_vv(a, b, op) \
+ VALIDATECPU \
+ avxf c = a op b; \
+ for (size_t i = 0; i < a.size; i++) \
+ EXPECT_FLOAT_EQ(c[i], a[i] op b[i]);
+
+/* vector op float tests */
+#define basic_test_vf(a, b, op) \
+ VALIDATECPU \
+ avxf c = a op b; \
+ for (size_t i = 0; i < a.size; i++) \
+ EXPECT_FLOAT_EQ(c[i], a[i] op b);
+
+const avxf avxf_a(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
+const avxf avxf_b(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
+const avxf avxf_c(1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f);
+const float float_b = 1.5f;
+
+TEST(util_avx, avxf_add_vv){basic_test_vv(avxf_a, avxf_b, +)} TEST(util_avx, avxf_sub_vv){
+ basic_test_vv(avxf_a, avxf_b, -)} TEST(util_avx, avxf_mul_vv){
+ basic_test_vv(avxf_a, avxf_b, *)} TEST(util_avx, avxf_div_vv){
+ basic_test_vv(avxf_a, avxf_b, /)} TEST(util_avx, avxf_add_vf){
+ basic_test_vf(avxf_a, float_b, +)} TEST(util_avx, avxf_sub_vf){
+ basic_test_vf(avxf_a, float_b, -)} TEST(util_avx, avxf_mul_vf){
+ basic_test_vf(avxf_a, float_b, *)} TEST(util_avx,
+ avxf_div_vf){basic_test_vf(avxf_a, float_b, /)}
+
+TEST(util_avx, avxf_ctor)
+{
+ VALIDATECPU
+ compare_vector_scalar(avxf(7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 0.0f),
+ static_cast<float>(index));
+ compare_vector_scalar(avxf(1.0f), 1.0f);
+ compare_vector_vector(avxf(1.0f, 2.0f), avxf(1.0f, 1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 2.0f));
+ compare_vector_vector(avxf(1.0f, 2.0f, 3.0f, 4.0f),
+ avxf(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f));
+ compare_vector_vector(avxf(make_float3(1.0f, 2.0f, 3.0f)),
+ avxf(0.0f, 3.0f, 2.0f, 1.0f, 0.0f, 3.0f, 2.0f, 1.0f));
+}
+
+TEST(util_avx, avxf_sqrt)
+{
+ VALIDATECPU
+ compare_vector_vector(mm256_sqrt(avxf(1.0f, 4.0f, 9.0f, 16.0f, 25.0f, 36.0f, 49.0f, 64.0f)),
+ avxf(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f));
+}
+
+TEST(util_avx, avxf_min_max)
+{
+ VALIDATECPU
+ compare_vector_vector(min(avxf_a, avxf_b), avxf_a);
+ compare_vector_vector(max(avxf_a, avxf_b), avxf_b);
+}
+
+TEST(util_avx, avxf_set_sign)
+{
+ VALIDATECPU
+ avxf res = set_sign_bit<1, 0, 0, 0, 0, 0, 0, 0>(avxf_a);
+ compare_vector_vector(res, avxf(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, -0.8f));
+}
+
+TEST(util_avx, avxf_msub)
+{
+ VALIDATECPU
+ avxf res = msub(avxf_a, avxf_b, avxf_c);
+ avxf exp = avxf((avxf_a[7] * avxf_b[7]) - avxf_c[7],
+ (avxf_a[6] * avxf_b[6]) - avxf_c[6],
+ (avxf_a[5] * avxf_b[5]) - avxf_c[5],
+ (avxf_a[4] * avxf_b[4]) - avxf_c[4],
+ (avxf_a[3] * avxf_b[3]) - avxf_c[3],
+ (avxf_a[2] * avxf_b[2]) - avxf_c[2],
+ (avxf_a[1] * avxf_b[1]) - avxf_c[1],
+ (avxf_a[0] * avxf_b[0]) - avxf_c[0]);
+ compare_vector_vector(res, exp);
+}
+
+TEST(util_avx, avxf_madd)
+{
+ VALIDATECPU
+ avxf res = madd(avxf_a, avxf_b, avxf_c);
+ avxf exp = avxf((avxf_a[7] * avxf_b[7]) + avxf_c[7],
+ (avxf_a[6] * avxf_b[6]) + avxf_c[6],
+ (avxf_a[5] * avxf_b[5]) + avxf_c[5],
+ (avxf_a[4] * avxf_b[4]) + avxf_c[4],
+ (avxf_a[3] * avxf_b[3]) + avxf_c[3],
+ (avxf_a[2] * avxf_b[2]) + avxf_c[2],
+ (avxf_a[1] * avxf_b[1]) + avxf_c[1],
+ (avxf_a[0] * avxf_b[0]) + avxf_c[0]);
+ compare_vector_vector(res, exp);
+}
+
+TEST(util_avx, avxf_nmadd)
+{
+ VALIDATECPU
+ avxf res = nmadd(avxf_a, avxf_b, avxf_c);
+ avxf exp = avxf(avxf_c[7] - (avxf_a[7] * avxf_b[7]),
+ avxf_c[6] - (avxf_a[6] * avxf_b[6]),
+ avxf_c[5] - (avxf_a[5] * avxf_b[5]),
+ avxf_c[4] - (avxf_a[4] * avxf_b[4]),
+ avxf_c[3] - (avxf_a[3] * avxf_b[3]),
+ avxf_c[2] - (avxf_a[2] * avxf_b[2]),
+ avxf_c[1] - (avxf_a[1] * avxf_b[1]),
+ avxf_c[0] - (avxf_a[0] * avxf_b[0]));
+ compare_vector_vector(res, exp);
+}
+
+TEST(util_avx, avxf_compare)
+{
+ VALIDATECPU
+ avxf a(0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f);
+ avxf b(7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 0.0f);
+ avxb res = a <= b;
+ int exp[8] = {
+ a[0] <= b[0] ? -1 : 0,
+ a[1] <= b[1] ? -1 : 0,
+ a[2] <= b[2] ? -1 : 0,
+ a[3] <= b[3] ? -1 : 0,
+ a[4] <= b[4] ? -1 : 0,
+ a[5] <= b[5] ? -1 : 0,
+ a[6] <= b[6] ? -1 : 0,
+ a[7] <= b[7] ? -1 : 0,
+ };
+ compare_vector_vector(res, exp);
+}
+
+TEST(util_avx, avxf_permute)
+{
+ VALIDATECPU
+ avxf res = permute<3, 0, 1, 7, 6, 5, 2, 4>(avxf_b);
+ compare_vector_vector(res, avxf(4.0f, 6.0f, 3.0f, 2.0f, 1.0f, 7.0f, 8.0f, 5.0f));
+}
+
+TEST(util_avx, avxf_blend)
+{
+ VALIDATECPU
+ avxf res = blend<0, 0, 1, 0, 1, 0, 1, 0>(avxf_a, avxf_b);
+ compare_vector_vector(res, avxf(0.1f, 0.2f, 3.0f, 0.4f, 5.0f, 0.6f, 7.0f, 0.8f));
+}
+
+TEST(util_avx, avxf_shuffle)
+{
+ VALIDATECPU
+ avxf res = shuffle<0, 1, 2, 3, 1, 3, 2, 0>(avxf_a);
+ compare_vector_vector(res, avxf(0.4f, 0.2f, 0.1f, 0.3f, 0.5f, 0.6f, 0.7f, 0.8f));
+}
+
+TEST(util_avx, avxf_cross)
+{
+ VALIDATECPU
+ avxf res = cross(avxf_b, avxf_c);
+ compare_vector_vector_near(res,
+ avxf(0.0f,
+ -9.5367432e-07f,
+ 0.0f,
+ 4.7683716e-07f,
+ 0.0f,
+ -3.8146973e-06f,
+ 3.8146973e-06f,
+ 3.8146973e-06f),
+ 0.000002000f);
+}
+
+TEST(util_avx, avxf_dot3)
+{
+ VALIDATECPU
+ float den, den2;
+ dot3(avxf_a, avxf_b, den, den2);
+ EXPECT_FLOAT_EQ(den, 14.9f);
+ EXPECT_FLOAT_EQ(den2, 2.9f);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/util/CMakeLists.txt b/intern/cycles/util/CMakeLists.txt
index 1c7a6549253..f5e488d1bd2 100644
--- a/intern/cycles/util/CMakeLists.txt
+++ b/intern/cycles/util/CMakeLists.txt
@@ -25,14 +25,15 @@ set(SRC
util_thread.cpp
util_time.cpp
util_transform.cpp
+ util_windows.cpp
)
set(LIB
-
+ ${TBB_LIBRARIES}
)
if(WITH_CYCLES_STANDALONE)
- if (WITH_CYCLES_STANDALONE_GUI)
+ if(WITH_CYCLES_STANDALONE_GUI)
list(APPEND SRC
util_view.cpp
)
@@ -40,9 +41,13 @@ if(WITH_CYCLES_STANDALONE)
endif()
if(CYCLES_STANDALONE_REPOSITORY)
- list(APPEND INC_SYS ../../third_party/numaapi/include)
+ list(APPEND INC_SYS
+ ../../third_party/numaapi/include
+ )
else()
- list(APPEND INC_SYS ../../numaapi/include)
+ list(APPEND INC_SYS
+ ../../numaapi/include
+ )
endif()
set(SRC_HEADERS
@@ -54,6 +59,8 @@ set(SRC_HEADERS
util_boundbox.h
util_debug.h
util_defines.h
+ util_deque.h
+ util_disjoint_set.h
util_guarded_allocator.cpp
util_foreach.h
util_function.h
@@ -79,6 +86,7 @@ set(SRC_HEADERS
util_math_matrix.h
util_md5.h
util_murmurhash.h
+ util_openimagedenoise.h
util_opengl.h
util_optimization.h
util_param.h
@@ -90,11 +98,10 @@ set(SRC_HEADERS
util_rect.h
util_set.h
util_simd.h
- util_sky_model.cpp
- util_sky_model.h
- util_sky_model_data.h
util_avxf.h
util_avxb.h
+ util_avxi.h
+ util_semaphore.h
util_sseb.h
util_ssef.h
util_ssei.h
@@ -104,6 +111,7 @@ set(SRC_HEADERS
util_string.h
util_system.h
util_task.h
+ util_tbb.h
util_texture.h
util_thread.h
util_time.h
@@ -117,7 +125,7 @@ set(SRC_HEADERS
util_types_float4_impl.h
util_types_float8.h
util_types_float8_impl.h
- util_types_int2.h
+ util_types_int2.h
util_types_int2_impl.h
util_types_int3.h
util_types_int3_impl.h
@@ -138,6 +146,7 @@ set(SRC_HEADERS
util_types_ushort4.h
util_types_vector3.h
util_types_vector3_impl.h
+ util_unique_ptr.h
util_vector.h
util_version.h
util_view.h
diff --git a/intern/cycles/util/util_algorithm.h b/intern/cycles/util/util_algorithm.h
index 62093039625..63abd4e92a3 100644
--- a/intern/cycles/util/util_algorithm.h
+++ b/intern/cycles/util/util_algorithm.h
@@ -25,6 +25,7 @@ using std::max;
using std::min;
using std::remove;
using std::sort;
+using std::stable_sort;
using std::swap;
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_aligned_malloc.cpp b/intern/cycles/util/util_aligned_malloc.cpp
index 104e6c5e3f4..9b729cd4fc4 100644
--- a/intern/cycles/util/util_aligned_malloc.cpp
+++ b/intern/cycles/util/util_aligned_malloc.cpp
@@ -46,13 +46,7 @@ void *util_aligned_malloc(size_t size, int alignment)
return MEM_mallocN_aligned(size, alignment, "Cycles Aligned Alloc");
#elif defined(_WIN32)
return _aligned_malloc(size, alignment);
-#elif defined(__APPLE__)
- /* On Mac OS X, both the heap and the stack are guaranteed 16-byte aligned so
- * they work natively with SSE types with no further work.
- */
- assert(alignment == 16);
- return malloc(size);
-#elif defined(__FreeBSD__) || defined(__NetBSD__)
+#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__APPLE__)
void *result;
if (posix_memalign(&result, alignment, size)) {
/* Non-zero means allocation error
diff --git a/intern/cycles/util/util_aligned_malloc.h b/intern/cycles/util/util_aligned_malloc.h
index 0f006e95f6a..df7d93c056d 100644
--- a/intern/cycles/util/util_aligned_malloc.h
+++ b/intern/cycles/util/util_aligned_malloc.h
@@ -30,6 +30,21 @@ void *util_aligned_malloc(size_t size, int alignment);
/* Free memory allocated by util_aligned_malloc. */
void util_aligned_free(void *ptr);
+/* Aligned new operator. */
+template<typename T, typename... Args> T *util_aligned_new(Args... args)
+{
+ void *mem = util_aligned_malloc(sizeof(T), alignof(T));
+ return new (mem) T(args...);
+}
+
+template<typename T> void util_aligned_delete(T *t)
+{
+ if (t) {
+ t->~T();
+ util_aligned_free(t);
+ }
+}
+
CCL_NAMESPACE_END
#endif /* __UTIL_ALIGNED_MALLOC_H__ */
diff --git a/intern/cycles/util/util_array.h b/intern/cycles/util/util_array.h
index 1d7e39344f6..db80ab474e0 100644
--- a/intern/cycles/util/util_array.h
+++ b/intern/cycles/util/util_array.h
@@ -63,7 +63,9 @@ template<typename T, size_t alignment = MIN_ALIGNMENT_CPU_DATA_TYPES> class arra
}
else {
data_ = mem_allocate(from.datasize_);
- memcpy(data_, from.data_, from.datasize_ * sizeof(T));
+ if (from.datasize_ > 0) {
+ memcpy(data_, from.data_, from.datasize_ * sizeof(T));
+ }
datasize_ = from.datasize_;
capacity_ = datasize_;
}
@@ -73,7 +75,9 @@ template<typename T, size_t alignment = MIN_ALIGNMENT_CPU_DATA_TYPES> class arra
{
if (this != &from) {
resize(from.size());
- memcpy((void *)data_, from.data_, datasize_ * sizeof(T));
+ if (datasize_ > 0) {
+ memcpy((void *)data_, from.data_, datasize_ * sizeof(T));
+ }
}
return *this;
@@ -83,7 +87,7 @@ template<typename T, size_t alignment = MIN_ALIGNMENT_CPU_DATA_TYPES> class arra
{
resize(from.size());
- if (from.size() > 0) {
+ if (from.size() > 0 && datasize_ > 0) {
memcpy(data_, &from[0], datasize_ * sizeof(T));
}
@@ -100,6 +104,9 @@ template<typename T, size_t alignment = MIN_ALIGNMENT_CPU_DATA_TYPES> class arra
if (datasize_ != other.datasize_) {
return false;
}
+ if (datasize_ == 0) {
+ return true;
+ }
return memcmp(data_, other.data_, datasize_ * sizeof(T)) == 0;
}
diff --git a/intern/cycles/util/util_atomic.h b/intern/cycles/util/util_atomic.h
index a8ea1dc925e..13d177d2b25 100644
--- a/intern/cycles/util/util_atomic.h
+++ b/intern/cycles/util/util_atomic.h
@@ -77,6 +77,7 @@ ccl_device_inline float atomic_compare_and_swap_float(volatile ccl_global float
# define atomic_fetch_and_add_uint32(p, x) atomic_add((p), (x))
# define atomic_fetch_and_inc_uint32(p) atomic_inc((p))
# define atomic_fetch_and_dec_uint32(p) atomic_dec((p))
+# define atomic_fetch_and_or_uint32(p, x) atomic_or((p), (x))
# define CCL_LOCAL_MEM_FENCE CLK_LOCAL_MEM_FENCE
# define ccl_barrier(flags) barrier(flags)
@@ -91,6 +92,7 @@ ccl_device_inline float atomic_compare_and_swap_float(volatile ccl_global float
# define atomic_fetch_and_sub_uint32(p, x) atomicSub((unsigned int *)(p), (unsigned int)(x))
# define atomic_fetch_and_inc_uint32(p) atomic_fetch_and_add_uint32((p), 1)
# define atomic_fetch_and_dec_uint32(p) atomic_fetch_and_sub_uint32((p), 1)
+# define atomic_fetch_and_or_uint32(p, x) atomicOr((unsigned int *)(p), (unsigned int)(x))
ccl_device_inline float atomic_compare_and_swap_float(volatile float *dest,
const float old_val,
diff --git a/intern/cycles/util/util_avxb.h b/intern/cycles/util/util_avxb.h
index 615dceaf9d5..5c03b1d88d7 100644
--- a/intern/cycles/util/util_avxb.h
+++ b/intern/cycles/util/util_avxb.h
@@ -16,7 +16,7 @@
*/
#ifndef __UTIL_AVXB_H__
-# define __UTIL_AVXB_H__
+#define __UTIL_AVXB_H__
CCL_NAMESPACE_BEGIN
@@ -53,6 +53,10 @@ struct avxb {
__forceinline avxb(const __m256 input) : m256(input)
{
}
+ __forceinline avxb(const __m128 &a, const __m128 &b)
+ : m256(_mm256_insertf128_ps(_mm256_castps128_ps256(a), b, 1))
+ {
+ }
__forceinline operator const __m256 &(void) const
{
return m256;
@@ -146,9 +150,9 @@ __forceinline const avxb operator!=(const avxb &a, const avxb &b)
}
__forceinline const avxb operator==(const avxb &a, const avxb &b)
{
-# ifdef __KERNEL_AVX2__
+#ifdef __KERNEL_AVX2__
return _mm256_castsi256_ps(_mm256_cmpeq_epi32(a, b));
-# else
+#else
__m128i a_lo = _mm_castps_si128(_mm256_extractf128_ps(a, 0));
__m128i a_hi = _mm_castps_si128(_mm256_extractf128_ps(a, 1));
__m128i b_lo = _mm_castps_si128(_mm256_extractf128_ps(b, 0));
@@ -157,16 +161,16 @@ __forceinline const avxb operator==(const avxb &a, const avxb &b)
__m128i c_hi = _mm_cmpeq_epi32(a_hi, b_hi);
__m256i result = _mm256_insertf128_si256(_mm256_castsi128_si256(c_lo), c_hi, 1);
return _mm256_castsi256_ps(result);
-# endif
+#endif
}
__forceinline const avxb select(const avxb &m, const avxb &t, const avxb &f)
{
-# if defined(__KERNEL_SSE41__)
+#if defined(__KERNEL_SSE41__)
return _mm256_blendv_ps(f, t, m);
-# else
+#else
return _mm256_or_ps(_mm256_and_ps(m, t), _mm256_andnot_ps(m, f));
-# endif
+#endif
}
////////////////////////////////////////////////////////////////////////////////
@@ -186,18 +190,18 @@ __forceinline const avxb unpackhi(const avxb &a, const avxb &b)
/// Reduction Operations
////////////////////////////////////////////////////////////////////////////////
-# if defined(__KERNEL_SSE41__)
+#if defined(__KERNEL_SSE41__)
__forceinline size_t popcnt(const avxb &a)
{
return __popcnt(_mm256_movemask_ps(a));
}
-# else
+#else
__forceinline size_t popcnt(const avxb &a)
{
return bool(a[0]) + bool(a[1]) + bool(a[2]) + bool(a[3]) + bool(a[4]) + bool(a[5]) + bool(a[6]) +
bool(a[7]);
}
-# endif
+#endif
__forceinline bool reduce_and(const avxb &a)
{
@@ -234,8 +238,6 @@ ccl_device_inline void print_avxb(const char *label, const avxb &a)
printf("%s: %d %d %d %d %d %d %d %d\n", label, a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7]);
}
-#endif
-
CCL_NAMESPACE_END
-//#endif
+#endif
diff --git a/intern/cycles/util/util_avxf.h b/intern/cycles/util/util_avxf.h
index 156607e65fb..1fb3ded422f 100644
--- a/intern/cycles/util/util_avxf.h
+++ b/intern/cycles/util/util_avxf.h
@@ -15,7 +15,7 @@
*/
#ifndef __UTIL_AVXF_H__
-# define __UTIL_AVXF_H__
+#define __UTIL_AVXF_H__
CCL_NAMESPACE_BEGIN
@@ -140,6 +140,11 @@ __forceinline void dot3(const avxf &a, const avxf &b, float &den, float &den2)
/// Unary Operators
////////////////////////////////////////////////////////////////////////////////
+__forceinline const avxf cast(const __m256i &a)
+{
+ return _mm256_castsi256_ps(a);
+}
+
__forceinline const avxf mm256_sqrt(const avxf &a)
{
return _mm256_sqrt_ps(a.m256);
@@ -259,16 +264,35 @@ template<size_t i0> __forceinline const avxf shuffle(const avxf &a)
return shuffle<i0>(a, a);
}
+template<size_t i> __forceinline float extract(const avxf &a)
+{
+ __m256 b = shuffle<i, i, i, i>(a).m256;
+ return _mm256_cvtss_f32(b);
+}
+template<> __forceinline float extract<0>(const avxf &a)
+{
+ return _mm256_cvtss_f32(a.m256);
+}
+
+__forceinline ssef low(const avxf &a)
+{
+ return _mm256_extractf128_ps(a.m256, 0);
+}
+__forceinline ssef high(const avxf &a)
+{
+ return _mm256_extractf128_ps(a.m256, 1);
+}
+
template<int i0, int i1, int i2, int i3, int i4, int i5, int i6, int i7>
__forceinline const avxf permute(const avxf &a)
{
-# ifdef __KERNEL_AVX2__
+#ifdef __KERNEL_AVX2__
return _mm256_permutevar8x32_ps(a, _mm256_set_epi32(i7, i6, i5, i4, i3, i2, i1, i0));
-# else
+#else
float temp[8];
_mm256_storeu_ps((float *)&temp, a);
return avxf(temp[i7], temp[i6], temp[i5], temp[i4], temp[i3], temp[i2], temp[i1], temp[i0]);
-# endif
+#endif
}
template<int S0, int S1, int S2, int S3, int S4, int S5, int S6, int S7>
@@ -309,39 +333,51 @@ __forceinline avxf mini(const avxf &a, const avxf &b)
////////////////////////////////////////////////////////////////////////////////
__forceinline const avxf madd(const avxf &a, const avxf &b, const avxf &c)
{
-# ifdef __KERNEL_AVX2__
+#ifdef __KERNEL_AVX2__
return _mm256_fmadd_ps(a, b, c);
-# else
+#else
return c + (a * b);
-# endif
+#endif
}
__forceinline const avxf nmadd(const avxf &a, const avxf &b, const avxf &c)
{
-# ifdef __KERNEL_AVX2__
+#ifdef __KERNEL_AVX2__
return _mm256_fnmadd_ps(a, b, c);
-# else
+#else
return c - (a * b);
-# endif
+#endif
}
__forceinline const avxf msub(const avxf &a, const avxf &b, const avxf &c)
{
-# ifdef __KERNEL_AVX2__
+#ifdef __KERNEL_AVX2__
return _mm256_fmsub_ps(a, b, c);
-# else
+#else
return (a * b) - c;
-# endif
+#endif
}
////////////////////////////////////////////////////////////////////////////////
-/// Comparison Operators
+/// Comparison Operators + Select
////////////////////////////////////////////////////////////////////////////////
__forceinline const avxb operator<=(const avxf &a, const avxf &b)
{
return _mm256_cmp_ps(a.m256, b.m256, _CMP_LE_OS);
}
-#endif
+__forceinline const avxf select(const avxb &m, const avxf &t, const avxf &f)
+{
+ return _mm256_blendv_ps(f, t, m);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Common Functions
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline avxf mix(const avxf &a, const avxf &b, const avxf &t)
+{
+ return madd(t, b, (avxf(1.0f) - t) * a);
+}
#ifndef _mm256_set_m128
# define _mm256_set_m128(/* __m128 */ hi, /* __m128 */ lo) \
@@ -352,3 +388,5 @@ __forceinline const avxb operator<=(const avxf &a, const avxf &b)
_mm256_set_m128(_mm_loadu_ps(hiaddr), _mm_loadu_ps(loaddr))
CCL_NAMESPACE_END
+
+#endif
diff --git a/intern/cycles/util/util_avxi.h b/intern/cycles/util/util_avxi.h
new file mode 100644
index 00000000000..e658a4f848f
--- /dev/null
+++ b/intern/cycles/util/util_avxi.h
@@ -0,0 +1,745 @@
+/*
+ * Copyright 2009-2013 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_AVXI_H__
+#define __UTIL_AVXI_H__
+
+CCL_NAMESPACE_BEGIN
+
+struct avxb;
+
+struct avxi {
+ typedef avxb Mask; // mask type for us
+ enum { size = 8 }; // number of SIMD elements
+ union { // data
+ __m256i m256;
+#if !defined(__KERNEL_AVX2__)
+ struct {
+ __m128i l, h;
+ };
+#endif
+ int32_t v[8];
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Constructors, Assignment & Cast Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline avxi()
+ {
+ }
+ __forceinline avxi(const avxi &a)
+ {
+ m256 = a.m256;
+ }
+ __forceinline avxi &operator=(const avxi &a)
+ {
+ m256 = a.m256;
+ return *this;
+ }
+
+ __forceinline avxi(const __m256i a) : m256(a)
+ {
+ }
+ __forceinline operator const __m256i &(void)const
+ {
+ return m256;
+ }
+ __forceinline operator __m256i &(void)
+ {
+ return m256;
+ }
+
+ __forceinline explicit avxi(const ssei &a)
+ : m256(_mm256_insertf128_si256(_mm256_castsi128_si256(a), a, 1))
+ {
+ }
+ __forceinline avxi(const ssei &a, const ssei &b)
+ : m256(_mm256_insertf128_si256(_mm256_castsi128_si256(a), b, 1))
+ {
+ }
+#if defined(__KERNEL_AVX2__)
+ __forceinline avxi(const __m128i &a, const __m128i &b)
+ : m256(_mm256_insertf128_si256(_mm256_castsi128_si256(a), b, 1))
+ {
+ }
+#else
+ __forceinline avxi(const __m128i &a, const __m128i &b) : l(a), h(b)
+ {
+ }
+#endif
+ __forceinline explicit avxi(const int32_t *const a)
+ : m256(_mm256_castps_si256(_mm256_loadu_ps((const float *)a)))
+ {
+ }
+ __forceinline avxi(int32_t a) : m256(_mm256_set1_epi32(a))
+ {
+ }
+ __forceinline avxi(int32_t a, int32_t b) : m256(_mm256_set_epi32(b, a, b, a, b, a, b, a))
+ {
+ }
+ __forceinline avxi(int32_t a, int32_t b, int32_t c, int32_t d)
+ : m256(_mm256_set_epi32(d, c, b, a, d, c, b, a))
+ {
+ }
+ __forceinline avxi(
+ int32_t a, int32_t b, int32_t c, int32_t d, int32_t e, int32_t f, int32_t g, int32_t h)
+ : m256(_mm256_set_epi32(h, g, f, e, d, c, b, a))
+ {
+ }
+
+ __forceinline explicit avxi(const __m256 a) : m256(_mm256_cvtps_epi32(a))
+ {
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Constants
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline avxi(ZeroTy) : m256(_mm256_setzero_si256())
+ {
+ }
+#if defined(__KERNEL_AVX2__)
+ __forceinline avxi(OneTy) : m256(_mm256_set1_epi32(1))
+ {
+ }
+ __forceinline avxi(PosInfTy) : m256(_mm256_set1_epi32(pos_inf))
+ {
+ }
+ __forceinline avxi(NegInfTy) : m256(_mm256_set1_epi32(neg_inf))
+ {
+ }
+#else
+ __forceinline avxi(OneTy) : m256(_mm256_set_epi32(1, 1, 1, 1, 1, 1, 1, 1))
+ {
+ }
+ __forceinline avxi(PosInfTy)
+ : m256(_mm256_set_epi32(
+ pos_inf, pos_inf, pos_inf, pos_inf, pos_inf, pos_inf, pos_inf, pos_inf))
+ {
+ }
+ __forceinline avxi(NegInfTy)
+ : m256(_mm256_set_epi32(
+ neg_inf, neg_inf, neg_inf, neg_inf, neg_inf, neg_inf, neg_inf, neg_inf))
+ {
+ }
+#endif
+ __forceinline avxi(StepTy) : m256(_mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0))
+ {
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Array Access
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline const int32_t &operator[](const size_t i) const
+ {
+ assert(i < 8);
+ return v[i];
+ }
+ __forceinline int32_t &operator[](const size_t i)
+ {
+ assert(i < 8);
+ return v[i];
+ }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+/// Unary Operators
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline const avxi cast(const __m256 &a)
+{
+ return _mm256_castps_si256(a);
+}
+__forceinline const avxi operator+(const avxi &a)
+{
+ return a;
+}
+#if defined(__KERNEL_AVX2__)
+__forceinline const avxi operator-(const avxi &a)
+{
+ return _mm256_sub_epi32(_mm256_setzero_si256(), a.m256);
+}
+__forceinline const avxi abs(const avxi &a)
+{
+ return _mm256_abs_epi32(a.m256);
+}
+#else
+__forceinline const avxi operator-(const avxi &a)
+{
+ return avxi(_mm_sub_epi32(_mm_setzero_si128(), a.l), _mm_sub_epi32(_mm_setzero_si128(), a.h));
+}
+__forceinline const avxi abs(const avxi &a)
+{
+ return avxi(_mm_abs_epi32(a.l), _mm_abs_epi32(a.h));
+}
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+/// Binary Operators
+////////////////////////////////////////////////////////////////////////////////
+
+#if defined(__KERNEL_AVX2__)
+__forceinline const avxi operator+(const avxi &a, const avxi &b)
+{
+ return _mm256_add_epi32(a.m256, b.m256);
+}
+#else
+__forceinline const avxi operator+(const avxi &a, const avxi &b)
+{
+ return avxi(_mm_add_epi32(a.l, b.l), _mm_add_epi32(a.h, b.h));
+}
+#endif
+__forceinline const avxi operator+(const avxi &a, const int32_t b)
+{
+ return a + avxi(b);
+}
+__forceinline const avxi operator+(const int32_t a, const avxi &b)
+{
+ return avxi(a) + b;
+}
+
+#if defined(__KERNEL_AVX2__)
+__forceinline const avxi operator-(const avxi &a, const avxi &b)
+{
+ return _mm256_sub_epi32(a.m256, b.m256);
+}
+#else
+__forceinline const avxi operator-(const avxi &a, const avxi &b)
+{
+ return avxi(_mm_sub_epi32(a.l, b.l), _mm_sub_epi32(a.h, b.h));
+}
+#endif
+__forceinline const avxi operator-(const avxi &a, const int32_t b)
+{
+ return a - avxi(b);
+}
+__forceinline const avxi operator-(const int32_t a, const avxi &b)
+{
+ return avxi(a) - b;
+}
+
+#if defined(__KERNEL_AVX2__)
+__forceinline const avxi operator*(const avxi &a, const avxi &b)
+{
+ return _mm256_mullo_epi32(a.m256, b.m256);
+}
+#else
+__forceinline const avxi operator*(const avxi &a, const avxi &b)
+{
+ return avxi(_mm_mullo_epi32(a.l, b.l), _mm_mullo_epi32(a.h, b.h));
+}
+#endif
+__forceinline const avxi operator*(const avxi &a, const int32_t b)
+{
+ return a * avxi(b);
+}
+__forceinline const avxi operator*(const int32_t a, const avxi &b)
+{
+ return avxi(a) * b;
+}
+
+#if defined(__KERNEL_AVX2__)
+__forceinline const avxi operator&(const avxi &a, const avxi &b)
+{
+ return _mm256_and_si256(a.m256, b.m256);
+}
+#else
+__forceinline const avxi operator&(const avxi &a, const avxi &b)
+{
+ return _mm256_castps_si256(_mm256_and_ps(_mm256_castsi256_ps(a), _mm256_castsi256_ps(b)));
+}
+#endif
+__forceinline const avxi operator&(const avxi &a, const int32_t b)
+{
+ return a & avxi(b);
+}
+__forceinline const avxi operator&(const int32_t a, const avxi &b)
+{
+ return avxi(a) & b;
+}
+
+#if defined(__KERNEL_AVX2__)
+__forceinline const avxi operator|(const avxi &a, const avxi &b)
+{
+ return _mm256_or_si256(a.m256, b.m256);
+}
+#else
+__forceinline const avxi operator|(const avxi &a, const avxi &b)
+{
+ return _mm256_castps_si256(_mm256_or_ps(_mm256_castsi256_ps(a), _mm256_castsi256_ps(b)));
+}
+#endif
+__forceinline const avxi operator|(const avxi &a, const int32_t b)
+{
+ return a | avxi(b);
+}
+__forceinline const avxi operator|(const int32_t a, const avxi &b)
+{
+ return avxi(a) | b;
+}
+
+#if defined(__KERNEL_AVX2__)
+__forceinline const avxi operator^(const avxi &a, const avxi &b)
+{
+ return _mm256_xor_si256(a.m256, b.m256);
+}
+#else
+__forceinline const avxi operator^(const avxi &a, const avxi &b)
+{
+ return _mm256_castps_si256(_mm256_xor_ps(_mm256_castsi256_ps(a), _mm256_castsi256_ps(b)));
+}
+#endif
+__forceinline const avxi operator^(const avxi &a, const int32_t b)
+{
+ return a ^ avxi(b);
+}
+__forceinline const avxi operator^(const int32_t a, const avxi &b)
+{
+ return avxi(a) ^ b;
+}
+
+#if defined(__KERNEL_AVX2__)
+__forceinline const avxi operator<<(const avxi &a, const int32_t n)
+{
+ return _mm256_slli_epi32(a.m256, n);
+}
+__forceinline const avxi operator>>(const avxi &a, const int32_t n)
+{
+ return _mm256_srai_epi32(a.m256, n);
+}
+
+__forceinline const avxi sra(const avxi &a, const int32_t b)
+{
+ return _mm256_srai_epi32(a.m256, b);
+}
+__forceinline const avxi srl(const avxi &a, const int32_t b)
+{
+ return _mm256_srli_epi32(a.m256, b);
+}
+#else
+__forceinline const avxi operator<<(const avxi &a, const int32_t n)
+{
+ return avxi(_mm_slli_epi32(a.l, n), _mm_slli_epi32(a.h, n));
+}
+__forceinline const avxi operator>>(const avxi &a, const int32_t n)
+{
+ return avxi(_mm_srai_epi32(a.l, n), _mm_srai_epi32(a.h, n));
+}
+
+__forceinline const avxi sra(const avxi &a, const int32_t b)
+{
+ return avxi(_mm_srai_epi32(a.l, b), _mm_srai_epi32(a.h, b));
+}
+__forceinline const avxi srl(const avxi &a, const int32_t b)
+{
+ return avxi(_mm_srli_epi32(a.l, b), _mm_srli_epi32(a.h, b));
+}
+#endif
+
+#if defined(__KERNEL_AVX2__)
+__forceinline const avxi min(const avxi &a, const avxi &b)
+{
+ return _mm256_min_epi32(a.m256, b.m256);
+}
+#else
+__forceinline const avxi min(const avxi &a, const avxi &b)
+{
+ return avxi(_mm_min_epi32(a.l, b.l), _mm_min_epi32(a.h, b.h));
+}
+#endif
+__forceinline const avxi min(const avxi &a, const int32_t b)
+{
+ return min(a, avxi(b));
+}
+__forceinline const avxi min(const int32_t a, const avxi &b)
+{
+ return min(avxi(a), b);
+}
+
+#if defined(__KERNEL_AVX2__)
+__forceinline const avxi max(const avxi &a, const avxi &b)
+{
+ return _mm256_max_epi32(a.m256, b.m256);
+}
+#else
+__forceinline const avxi max(const avxi &a, const avxi &b)
+{
+ return avxi(_mm_max_epi32(a.l, b.l), _mm_max_epi32(a.h, b.h));
+}
+#endif
+__forceinline const avxi max(const avxi &a, const int32_t b)
+{
+ return max(a, avxi(b));
+}
+__forceinline const avxi max(const int32_t a, const avxi &b)
+{
+ return max(avxi(a), b);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Assignment Operators
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline avxi &operator+=(avxi &a, const avxi &b)
+{
+ return a = a + b;
+}
+__forceinline avxi &operator+=(avxi &a, const int32_t b)
+{
+ return a = a + b;
+}
+
+__forceinline avxi &operator-=(avxi &a, const avxi &b)
+{
+ return a = a - b;
+}
+__forceinline avxi &operator-=(avxi &a, const int32_t b)
+{
+ return a = a - b;
+}
+
+__forceinline avxi &operator*=(avxi &a, const avxi &b)
+{
+ return a = a * b;
+}
+__forceinline avxi &operator*=(avxi &a, const int32_t b)
+{
+ return a = a * b;
+}
+
+__forceinline avxi &operator&=(avxi &a, const avxi &b)
+{
+ return a = a & b;
+}
+__forceinline avxi &operator&=(avxi &a, const int32_t b)
+{
+ return a = a & b;
+}
+
+__forceinline avxi &operator|=(avxi &a, const avxi &b)
+{
+ return a = a | b;
+}
+__forceinline avxi &operator|=(avxi &a, const int32_t b)
+{
+ return a = a | b;
+}
+
+__forceinline avxi &operator^=(avxi &a, const avxi &b)
+{
+ return a = a ^ b;
+}
+__forceinline avxi &operator^=(avxi &a, const int32_t b)
+{
+ return a = a ^ b;
+}
+
+__forceinline avxi &operator<<=(avxi &a, const int32_t b)
+{
+ return a = a << b;
+}
+__forceinline avxi &operator>>=(avxi &a, const int32_t b)
+{
+ return a = a >> b;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Comparison Operators + Select
+////////////////////////////////////////////////////////////////////////////////
+
+#if defined(__KERNEL_AVX2__)
+__forceinline const avxb operator==(const avxi &a, const avxi &b)
+{
+ return _mm256_castsi256_ps(_mm256_cmpeq_epi32(a.m256, b.m256));
+}
+#else
+__forceinline const avxb operator==(const avxi &a, const avxi &b)
+{
+ return avxb(_mm_castsi128_ps(_mm_cmpeq_epi32(a.l, b.l)),
+ _mm_castsi128_ps(_mm_cmpeq_epi32(a.h, b.h)));
+}
+#endif
+__forceinline const avxb operator==(const avxi &a, const int32_t b)
+{
+ return a == avxi(b);
+}
+__forceinline const avxb operator==(const int32_t a, const avxi &b)
+{
+ return avxi(a) == b;
+}
+
+__forceinline const avxb operator!=(const avxi &a, const avxi &b)
+{
+ return !(a == b);
+}
+__forceinline const avxb operator!=(const avxi &a, const int32_t b)
+{
+ return a != avxi(b);
+}
+__forceinline const avxb operator!=(const int32_t a, const avxi &b)
+{
+ return avxi(a) != b;
+}
+
+#if defined(__KERNEL_AVX2__)
+__forceinline const avxb operator<(const avxi &a, const avxi &b)
+{
+ return _mm256_castsi256_ps(_mm256_cmpgt_epi32(b.m256, a.m256));
+}
+#else
+__forceinline const avxb operator<(const avxi &a, const avxi &b)
+{
+ return avxb(_mm_castsi128_ps(_mm_cmplt_epi32(a.l, b.l)),
+ _mm_castsi128_ps(_mm_cmplt_epi32(a.h, b.h)));
+}
+#endif
+__forceinline const avxb operator<(const avxi &a, const int32_t b)
+{
+ return a < avxi(b);
+}
+__forceinline const avxb operator<(const int32_t a, const avxi &b)
+{
+ return avxi(a) < b;
+}
+
+__forceinline const avxb operator>=(const avxi &a, const avxi &b)
+{
+ return !(a < b);
+}
+__forceinline const avxb operator>=(const avxi &a, const int32_t b)
+{
+ return a >= avxi(b);
+}
+__forceinline const avxb operator>=(const int32_t a, const avxi &b)
+{
+ return avxi(a) >= b;
+}
+
+#if defined(__KERNEL_AVX2__)
+__forceinline const avxb operator>(const avxi &a, const avxi &b)
+{
+ return _mm256_castsi256_ps(_mm256_cmpgt_epi32(a.m256, b.m256));
+}
+#else
+__forceinline const avxb operator>(const avxi &a, const avxi &b)
+{
+ return avxb(_mm_castsi128_ps(_mm_cmpgt_epi32(a.l, b.l)),
+ _mm_castsi128_ps(_mm_cmpgt_epi32(a.h, b.h)));
+}
+#endif
+__forceinline const avxb operator>(const avxi &a, const int32_t b)
+{
+ return a > avxi(b);
+}
+__forceinline const avxb operator>(const int32_t a, const avxi &b)
+{
+ return avxi(a) > b;
+}
+
+__forceinline const avxb operator<=(const avxi &a, const avxi &b)
+{
+ return !(a > b);
+}
+__forceinline const avxb operator<=(const avxi &a, const int32_t b)
+{
+ return a <= avxi(b);
+}
+__forceinline const avxb operator<=(const int32_t a, const avxi &b)
+{
+ return avxi(a) <= b;
+}
+
+__forceinline const avxi select(const avxb &m, const avxi &t, const avxi &f)
+{
+ return _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(f), _mm256_castsi256_ps(t), m));
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Movement/Shifting/Shuffling Functions
+////////////////////////////////////////////////////////////////////////////////
+
+#if defined(__KERNEL_AVX2__)
+__forceinline avxi unpacklo(const avxi &a, const avxi &b)
+{
+ return _mm256_unpacklo_epi32(a.m256, b.m256);
+}
+__forceinline avxi unpackhi(const avxi &a, const avxi &b)
+{
+ return _mm256_unpackhi_epi32(a.m256, b.m256);
+}
+#else
+__forceinline avxi unpacklo(const avxi &a, const avxi &b)
+{
+ return _mm256_castps_si256(_mm256_unpacklo_ps(_mm256_castsi256_ps(a), _mm256_castsi256_ps(b)));
+}
+__forceinline avxi unpackhi(const avxi &a, const avxi &b)
+{
+ return _mm256_castps_si256(_mm256_unpackhi_ps(_mm256_castsi256_ps(a), _mm256_castsi256_ps(b)));
+}
+#endif
+
+template<size_t i> __forceinline const avxi shuffle(const avxi &a)
+{
+ return _mm256_castps_si256(_mm256_permute_ps(_mm256_castsi256_ps(a), _MM_SHUFFLE(i, i, i, i)));
+}
+
+template<size_t i0, size_t i1> __forceinline const avxi shuffle(const avxi &a)
+{
+ return _mm256_permute2f128_si256(a, a, (i1 << 4) | (i0 << 0));
+}
+
+template<size_t i0, size_t i1> __forceinline const avxi shuffle(const avxi &a, const avxi &b)
+{
+ return _mm256_permute2f128_si256(a, b, (i1 << 4) | (i0 << 0));
+}
+
+template<size_t i0, size_t i1, size_t i2, size_t i3>
+__forceinline const avxi shuffle(const avxi &a)
+{
+ return _mm256_castps_si256(
+ _mm256_permute_ps(_mm256_castsi256_ps(a), _MM_SHUFFLE(i3, i2, i1, i0)));
+}
+
+template<size_t i0, size_t i1, size_t i2, size_t i3>
+__forceinline const avxi shuffle(const avxi &a, const avxi &b)
+{
+ return _mm256_castps_si256(_mm256_shuffle_ps(
+ _mm256_castsi256_ps(a), _mm256_castsi256_ps(b), _MM_SHUFFLE(i3, i2, i1, i0)));
+}
+
+template<> __forceinline const avxi shuffle<0, 0, 2, 2>(const avxi &b)
+{
+ return _mm256_castps_si256(_mm256_moveldup_ps(_mm256_castsi256_ps(b)));
+}
+template<> __forceinline const avxi shuffle<1, 1, 3, 3>(const avxi &b)
+{
+ return _mm256_castps_si256(_mm256_movehdup_ps(_mm256_castsi256_ps(b)));
+}
+template<> __forceinline const avxi shuffle<0, 1, 0, 1>(const avxi &b)
+{
+ return _mm256_castps_si256(
+ _mm256_castpd_ps(_mm256_movedup_pd(_mm256_castps_pd(_mm256_castsi256_ps(b)))));
+}
+
+__forceinline const avxi broadcast(const int *ptr)
+{
+ return _mm256_castps_si256(_mm256_broadcast_ss((const float *)ptr));
+}
+template<size_t i> __forceinline const avxi insert(const avxi &a, const ssei &b)
+{
+ return _mm256_insertf128_si256(a, b, i);
+}
+template<size_t i> __forceinline const ssei extract(const avxi &a)
+{
+ return _mm256_extractf128_si256(a, i);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Reductions
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline const avxi vreduce_min2(const avxi &v)
+{
+ return min(v, shuffle<1, 0, 3, 2>(v));
+}
+__forceinline const avxi vreduce_min4(const avxi &v)
+{
+ avxi v1 = vreduce_min2(v);
+ return min(v1, shuffle<2, 3, 0, 1>(v1));
+}
+__forceinline const avxi vreduce_min(const avxi &v)
+{
+ avxi v1 = vreduce_min4(v);
+ return min(v1, shuffle<1, 0>(v1));
+}
+
+__forceinline const avxi vreduce_max2(const avxi &v)
+{
+ return max(v, shuffle<1, 0, 3, 2>(v));
+}
+__forceinline const avxi vreduce_max4(const avxi &v)
+{
+ avxi v1 = vreduce_max2(v);
+ return max(v1, shuffle<2, 3, 0, 1>(v1));
+}
+__forceinline const avxi vreduce_max(const avxi &v)
+{
+ avxi v1 = vreduce_max4(v);
+ return max(v1, shuffle<1, 0>(v1));
+}
+
+__forceinline const avxi vreduce_add2(const avxi &v)
+{
+ return v + shuffle<1, 0, 3, 2>(v);
+}
+__forceinline const avxi vreduce_add4(const avxi &v)
+{
+ avxi v1 = vreduce_add2(v);
+ return v1 + shuffle<2, 3, 0, 1>(v1);
+}
+__forceinline const avxi vreduce_add(const avxi &v)
+{
+ avxi v1 = vreduce_add4(v);
+ return v1 + shuffle<1, 0>(v1);
+}
+
+__forceinline int reduce_min(const avxi &v)
+{
+ return extract<0>(extract<0>(vreduce_min(v)));
+}
+__forceinline int reduce_max(const avxi &v)
+{
+ return extract<0>(extract<0>(vreduce_max(v)));
+}
+__forceinline int reduce_add(const avxi &v)
+{
+ return extract<0>(extract<0>(vreduce_add(v)));
+}
+
+__forceinline size_t select_min(const avxi &v)
+{
+ return __bsf(movemask(v == vreduce_min(v)));
+}
+__forceinline size_t select_max(const avxi &v)
+{
+ return __bsf(movemask(v == vreduce_max(v)));
+}
+
+__forceinline size_t select_min(const avxb &valid, const avxi &v)
+{
+ const avxi a = select(valid, v, avxi(pos_inf));
+ return __bsf(movemask(valid & (a == vreduce_min(a))));
+}
+__forceinline size_t select_max(const avxb &valid, const avxi &v)
+{
+ const avxi a = select(valid, v, avxi(neg_inf));
+ return __bsf(movemask(valid & (a == vreduce_max(a))));
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Output Operators
+////////////////////////////////////////////////////////////////////////////////
+
+ccl_device_inline void print_avxi(const char *label, const avxi &a)
+{
+ printf("%s: %d %d %d %d %d %d %d %d\n", label, a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7]);
+}
+
+CCL_NAMESPACE_END
+
+#endif
diff --git a/intern/cycles/util/util_boundbox.h b/intern/cycles/util/util_boundbox.h
index b5c3f1a8954..7fab7bd5a15 100644
--- a/intern/cycles/util/util_boundbox.h
+++ b/intern/cycles/util/util_boundbox.h
@@ -17,8 +17,8 @@
#ifndef __UTIL_BOUNDBOX_H__
#define __UTIL_BOUNDBOX_H__
-#include <math.h>
#include <float.h>
+#include <math.h>
#include "util/util_math.h"
#include "util/util_string.h"
diff --git a/intern/cycles/util/util_color.h b/intern/cycles/util/util_color.h
index ca4c393f66e..c6937ca78fe 100644
--- a/intern/cycles/util/util_color.h
+++ b/intern/cycles/util/util_color.h
@@ -43,11 +43,29 @@ ccl_device uchar4 color_float_to_byte(float3 c)
return make_uchar4(r, g, b, 0);
}
+ccl_device uchar4 color_float4_to_uchar4(float4 c)
+{
+ uchar r, g, b, a;
+
+ r = float_to_byte(c.x);
+ g = float_to_byte(c.y);
+ b = float_to_byte(c.z);
+ a = float_to_byte(c.w);
+
+ return make_uchar4(r, g, b, a);
+}
+
ccl_device_inline float3 color_byte_to_float(uchar4 c)
{
return make_float3(c.x * (1.0f / 255.0f), c.y * (1.0f / 255.0f), c.z * (1.0f / 255.0f));
}
+ccl_device_inline float4 color_uchar4_to_float4(uchar4 c)
+{
+ return make_float4(
+ c.x * (1.0f / 255.0f), c.y * (1.0f / 255.0f), c.z * (1.0f / 255.0f), c.w * (1.0f / 255.0f));
+}
+
ccl_device float color_srgb_to_linear(float c)
{
if (c < 0.04045f)
@@ -167,7 +185,8 @@ ccl_device float3 xyY_to_xyz(float x, float y, float Y)
#ifdef __KERNEL_SSE2__
/*
* Calculate initial guess for arg^exp based on float representation
- * This method gives a constant bias, which can be easily compensated by multiplication with bias_coeff.
+ * This method gives a constant bias,
+ * which can be easily compensated by multiplication with bias_coeff.
* Gives better results for exponents near 1 (e. g. 4/5).
* exp = exponent, encoded as uint32_t
* e2coeff = 2^(127/exponent - 127) * bias_coeff^(1/exponent), encoded as uint32_t
@@ -235,6 +254,12 @@ ccl_device float3 color_linear_to_srgb_v3(float3 c)
color_linear_to_srgb(c.x), color_linear_to_srgb(c.y), color_linear_to_srgb(c.z));
}
+ccl_device float4 color_linear_to_srgb_v4(float4 c)
+{
+ return make_float4(
+ color_linear_to_srgb(c.x), color_linear_to_srgb(c.y), color_linear_to_srgb(c.z), c.w);
+}
+
ccl_device float4 color_srgb_to_linear_v4(float4 c)
{
#ifdef __KERNEL_SSE2__
@@ -250,6 +275,20 @@ ccl_device float4 color_srgb_to_linear_v4(float4 c)
#endif
}
+ccl_device float3 color_highlight_compress(float3 color, float3 *variance)
+{
+ color += make_float3(1.0f, 1.0f, 1.0f);
+ if (variance) {
+ *variance *= sqr3(make_float3(1.0f, 1.0f, 1.0f) / color);
+ }
+ return log3(color);
+}
+
+ccl_device float3 color_highlight_uncompress(float3 color)
+{
+ return exp3(color) - make_float3(1.0f, 1.0f, 1.0f);
+}
+
CCL_NAMESPACE_END
#endif /* __UTIL_COLOR_H__ */
diff --git a/intern/cycles/util/util_debug.cpp b/intern/cycles/util/util_debug.cpp
index aabfea7fc49..74ecefa1917 100644
--- a/intern/cycles/util/util_debug.cpp
+++ b/intern/cycles/util/util_debug.cpp
@@ -31,7 +31,7 @@ DebugFlags::CPU::CPU()
sse41(true),
sse3(true),
sse2(true),
- bvh_layout(BVH_LAYOUT_DEFAULT),
+ bvh_layout(BVH_LAYOUT_AUTO),
split_kernel(false)
{
reset();
@@ -57,18 +57,7 @@ void DebugFlags::CPU::reset()
#undef STRINGIFY
#undef CHECK_CPU_FLAGS
- if (getenv("CYCLES_BVH2") != NULL) {
- bvh_layout = BVH_LAYOUT_BVH2;
- }
- else if (getenv("CYCLES_BVH4") != NULL) {
- bvh_layout = BVH_LAYOUT_BVH4;
- }
- else if (getenv("CYCLES_BVH8") != NULL) {
- bvh_layout = BVH_LAYOUT_BVH8;
- }
- else {
- bvh_layout = BVH_LAYOUT_DEFAULT;
- }
+ bvh_layout = BVH_LAYOUT_AUTO;
split_kernel = false;
}
@@ -86,6 +75,17 @@ void DebugFlags::CUDA::reset()
split_kernel = false;
}
+DebugFlags::OptiX::OptiX()
+{
+ reset();
+}
+
+void DebugFlags::OptiX::reset()
+{
+ cuda_streams = 1;
+ curves_api = false;
+}
+
DebugFlags::OpenCL::OpenCL() : device_type(DebugFlags::OpenCL::DEVICE_ALL), debug(false)
{
reset();
@@ -120,7 +120,7 @@ void DebugFlags::OpenCL::reset()
debug = (getenv("CYCLES_OPENCL_DEBUG") != NULL);
}
-DebugFlags::DebugFlags() : viewport_static_bvh(false)
+DebugFlags::DebugFlags() : viewport_static_bvh(false), running_inside_blender(false)
{
/* Nothing for now. */
}
@@ -130,6 +130,7 @@ void DebugFlags::reset()
viewport_static_bvh = false;
cpu.reset();
cuda.reset();
+ optix.reset();
opencl.reset();
}
@@ -145,7 +146,10 @@ std::ostream &operator<<(std::ostream &os, DebugFlagsConstRef debug_flags)
<< " Split : " << string_from_bool(debug_flags.cpu.split_kernel) << "\n";
os << "CUDA flags:\n"
- << " Adaptive Compile: " << string_from_bool(debug_flags.cuda.adaptive_compile) << "\n";
+ << " Adaptive Compile : " << string_from_bool(debug_flags.cuda.adaptive_compile) << "\n";
+
+ os << "OptiX flags:\n"
+ << " CUDA streams : " << debug_flags.optix.cuda_streams << "\n";
const char *opencl_device_type;
switch (debug_flags.opencl.device_type) {
diff --git a/intern/cycles/util/util_debug.h b/intern/cycles/util/util_debug.h
index d668ddc6d6c..6ac4beb55b8 100644
--- a/intern/cycles/util/util_debug.h
+++ b/intern/cycles/util/util_debug.h
@@ -33,6 +33,8 @@ class DebugFlags {
/* Use static BVH in viewport, to match final render exactly. */
bool viewport_static_bvh;
+ bool running_inside_blender;
+
/* Descriptor of CPU feature-set to be used. */
struct CPU {
CPU();
@@ -71,10 +73,10 @@ class DebugFlags {
return sse2;
}
- /* Requested BVH size.
+ /* Requested BVH layout.
*
- * Rendering will use widest possible BVH which is below or equal
- * this one.
+ * By default the fastest will be used. For debugging the BVH used by other
+ * CPUs and GPUs can be selected here instead.
*/
BVHLayout bvh_layout;
@@ -97,6 +99,20 @@ class DebugFlags {
bool split_kernel;
};
+ /* Descriptor of OptiX feature-set to be used. */
+ struct OptiX {
+ OptiX();
+
+ /* Reset flags to their defaults. */
+ void reset();
+
+ /* Number of CUDA streams to launch kernels concurrently from. */
+ int cuda_streams;
+
+ /* Use OptiX curves API for hair instead of custom implementation. */
+ bool curves_api;
+ };
+
/* Descriptor of OpenCL feature-set to be used. */
struct OpenCL {
OpenCL();
@@ -141,7 +157,8 @@ class DebugFlags {
/* Use debug version of the kernel. */
bool debug;
- /* TODO(mai): Currently this is only for OpenCL, but we should have it implemented for all devices. */
+ /* TODO(mai): Currently this is only for OpenCL, but we should have it implemented for all
+ * devices. */
/* Artificial memory limit in bytes (0 if disabled). */
size_t mem_limit;
};
@@ -162,6 +179,9 @@ class DebugFlags {
/* Requested CUDA flags. */
CUDA cuda;
+ /* Requested OptiX flags. */
+ OptiX optix;
+
/* Requested OpenCL flags. */
OpenCL opencl;
diff --git a/intern/cycles/util/util_defines.h b/intern/cycles/util/util_defines.h
index 391d20c04a4..e8e414587fb 100644
--- a/intern/cycles/util/util_defines.h
+++ b/intern/cycles/util/util_defines.h
@@ -15,6 +15,11 @@
* limitations under the License.
*/
+/* clang-format off */
+
+/* #define __forceinline triggers a bug in some clang-format versions, disable
+ * format for entire file to keep results consistent. */
+
#ifndef __UTIL_DEFINES_H__
#define __UTIL_DEFINES_H__
@@ -30,6 +35,7 @@
#ifndef __KERNEL_GPU__
# define ccl_device static inline
# define ccl_device_noinline static
+# define ccl_device_noinline_cpu ccl_device_noinline
# define ccl_global
# define ccl_static_constant static const
# define ccl_constant const
@@ -38,6 +44,8 @@
# define ccl_private
# define ccl_restrict __restrict
# define ccl_ref &
+# define ccl_optional_struct_init
+# define ccl_loop_no_unroll
# define __KERNEL_WITH_SSE_ALIGN__
# if defined(_WIN32) && !defined(FREE_WINDOWS)
diff --git a/intern/cycles/util/util_deque.h b/intern/cycles/util/util_deque.h
new file mode 100644
index 00000000000..ccac961aa7d
--- /dev/null
+++ b/intern/cycles/util/util_deque.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2011-2018 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_DEQUE_H__
+#define __UTIL_DEQUE_H__
+
+#include <deque>
+
+CCL_NAMESPACE_BEGIN
+
+using std::deque;
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_DEQUE_H__ */
diff --git a/intern/cycles/util/util_disjoint_set.h b/intern/cycles/util/util_disjoint_set.h
new file mode 100644
index 00000000000..946632371d2
--- /dev/null
+++ b/intern/cycles/util/util_disjoint_set.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_DISJOINT_SET_H__
+#define __UTIL_DISJOINT_SET_H__
+
+#include "util_array.h"
+#include <utility>
+
+CCL_NAMESPACE_BEGIN
+
+class DisjointSet {
+ private:
+ array<size_t> parents;
+ array<size_t> ranks;
+
+ public:
+ DisjointSet(size_t size) : parents(size), ranks(size)
+ {
+ for (size_t i = 0; i < size; i++) {
+ parents[i] = i;
+ ranks[i] = 0;
+ }
+ }
+
+ size_t find(size_t x)
+ {
+ size_t root = x;
+ while (parents[root] != root) {
+ root = parents[root];
+ }
+ while (parents[x] != root) {
+ size_t parent = parents[x];
+ parents[x] = root;
+ x = parent;
+ }
+ return root;
+ }
+
+ void join(size_t x, size_t y)
+ {
+ size_t x_root = find(x);
+ size_t y_root = find(y);
+
+ if (x_root == y_root) {
+ return;
+ }
+
+ if (ranks[x_root] < ranks[y_root]) {
+ std::swap(x_root, y_root);
+ }
+ parents[y_root] = x_root;
+
+ if (ranks[x_root] == ranks[y_root]) {
+ ranks[x_root]++;
+ }
+ }
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_DISJOINT_SET_H__ */
diff --git a/intern/cycles/util/util_guarded_allocator.h b/intern/cycles/util/util_guarded_allocator.h
index 2d09326d2ca..f78cc5f5da9 100644
--- a/intern/cycles/util/util_guarded_allocator.h
+++ b/intern/cycles/util/util_guarded_allocator.h
@@ -18,6 +18,7 @@
#define __UTIL_GUARDED_ALLOCATOR_H__
#include <cstddef>
+#include <cstdlib>
#include <memory>
#ifdef WITH_BLENDER_GUARDEDALLOC
diff --git a/intern/cycles/util/util_half.h b/intern/cycles/util/util_half.h
index 9c40f5310c2..8de62893ba8 100644
--- a/intern/cycles/util/util_half.h
+++ b/intern/cycles/util/util_half.h
@@ -17,8 +17,8 @@
#ifndef __UTIL_HALF_H__
#define __UTIL_HALF_H__
-#include "util/util_types.h"
#include "util/util_math.h"
+#include "util/util_types.h"
#ifdef __KERNEL_SSE2__
# include "util/util_simd.h"
@@ -36,7 +36,8 @@ CCL_NAMESPACE_BEGIN
/* CUDA has its own half data type, no need to define then */
# ifndef __KERNEL_CUDA__
-/* Implementing this as a class rather than a typedef so that the compiler can tell it apart from unsigned shorts. */
+/* Implementing this as a class rather than a typedef so that the compiler can tell it apart from
+ * unsigned shorts. */
class half {
public:
half() : v(0)
diff --git a/intern/cycles/util/util_hash.h b/intern/cycles/util/util_hash.h
index 785482967db..0021eec169b 100644
--- a/intern/cycles/util/util_hash.h
+++ b/intern/cycles/util/util_hash.h
@@ -21,41 +21,357 @@
CCL_NAMESPACE_BEGIN
-ccl_device_inline uint hash_int_2d(uint kx, uint ky)
-{
+/* ***** Jenkins Lookup3 Hash Functions ***** */
+
+/* Source: http://burtleburtle.net/bob/c/lookup3.c */
+
#define rot(x, k) (((x) << (k)) | ((x) >> (32 - (k))))
+#define mix(a, b, c) \
+ { \
+ a -= c; \
+ a ^= rot(c, 4); \
+ c += b; \
+ b -= a; \
+ b ^= rot(a, 6); \
+ a += c; \
+ c -= b; \
+ c ^= rot(b, 8); \
+ b += a; \
+ a -= c; \
+ a ^= rot(c, 16); \
+ c += b; \
+ b -= a; \
+ b ^= rot(a, 19); \
+ a += c; \
+ c -= b; \
+ c ^= rot(b, 4); \
+ b += a; \
+ } \
+ ((void)0)
+
+#define final(a, b, c) \
+ { \
+ c ^= b; \
+ c -= rot(b, 14); \
+ a ^= c; \
+ a -= rot(c, 11); \
+ b ^= a; \
+ b -= rot(a, 25); \
+ c ^= b; \
+ c -= rot(b, 16); \
+ a ^= c; \
+ a -= rot(c, 4); \
+ b ^= a; \
+ b -= rot(a, 14); \
+ c ^= b; \
+ c -= rot(b, 24); \
+ } \
+ ((void)0)
+
+ccl_device_inline uint hash_uint(uint kx)
+{
uint a, b, c;
+ a = b = c = 0xdeadbeef + (1 << 2) + 13;
+
+ a += kx;
+ final(a, b, c);
+ return c;
+}
+
+ccl_device_inline uint hash_uint2(uint kx, uint ky)
+{
+ uint a, b, c;
a = b = c = 0xdeadbeef + (2 << 2) + 13;
+
+ b += ky;
+ a += kx;
+ final(a, b, c);
+
+ return c;
+}
+
+ccl_device_inline uint hash_uint3(uint kx, uint ky, uint kz)
+{
+ uint a, b, c;
+ a = b = c = 0xdeadbeef + (3 << 2) + 13;
+
+ c += kz;
+ b += ky;
+ a += kx;
+ final(a, b, c);
+
+ return c;
+}
+
+ccl_device_inline uint hash_uint4(uint kx, uint ky, uint kz, uint kw)
+{
+ uint a, b, c;
+ a = b = c = 0xdeadbeef + (4 << 2) + 13;
+
a += kx;
b += ky;
+ c += kz;
+ mix(a, b, c);
- c ^= b;
- c -= rot(b, 14);
- a ^= c;
- a -= rot(c, 11);
- b ^= a;
- b -= rot(a, 25);
- c ^= b;
- c -= rot(b, 16);
- a ^= c;
- a -= rot(c, 4);
- b ^= a;
- b -= rot(a, 14);
- c ^= b;
- c -= rot(b, 24);
+ a += kw;
+ final(a, b, c);
return c;
+}
#undef rot
+#undef final
+#undef mix
+
+/* Hashing uint or uint[234] into a float in the range [0, 1]. */
+
+ccl_device_inline float hash_uint_to_float(uint kx)
+{
+ return (float)hash_uint(kx) / (float)0xFFFFFFFFu;
+}
+
+ccl_device_inline float hash_uint2_to_float(uint kx, uint ky)
+{
+ return (float)hash_uint2(kx, ky) / (float)0xFFFFFFFFu;
+}
+
+ccl_device_inline float hash_uint3_to_float(uint kx, uint ky, uint kz)
+{
+ return (float)hash_uint3(kx, ky, kz) / (float)0xFFFFFFFFu;
+}
+
+ccl_device_inline float hash_uint4_to_float(uint kx, uint ky, uint kz, uint kw)
+{
+ return (float)hash_uint4(kx, ky, kz, kw) / (float)0xFFFFFFFFu;
+}
+
+/* Hashing float or float[234] into a float in the range [0, 1]. */
+
+ccl_device_inline float hash_float_to_float(float k)
+{
+ return hash_uint_to_float(__float_as_uint(k));
+}
+
+ccl_device_inline float hash_float2_to_float(float2 k)
+{
+ return hash_uint2_to_float(__float_as_uint(k.x), __float_as_uint(k.y));
+}
+
+ccl_device_inline float hash_float3_to_float(float3 k)
+{
+ return hash_uint3_to_float(__float_as_uint(k.x), __float_as_uint(k.y), __float_as_uint(k.z));
+}
+
+ccl_device_inline float hash_float4_to_float(float4 k)
+{
+ return hash_uint4_to_float(
+ __float_as_uint(k.x), __float_as_uint(k.y), __float_as_uint(k.z), __float_as_uint(k.w));
+}
+
+/* Hashing float[234] into float[234] of components in the range [0, 1]. */
+
+ccl_device_inline float2 hash_float2_to_float2(float2 k)
+{
+ return make_float2(hash_float2_to_float(k), hash_float3_to_float(make_float3(k.x, k.y, 1.0)));
+}
+
+ccl_device_inline float3 hash_float3_to_float3(float3 k)
+{
+ return make_float3(hash_float3_to_float(k),
+ hash_float4_to_float(make_float4(k.x, k.y, k.z, 1.0)),
+ hash_float4_to_float(make_float4(k.x, k.y, k.z, 2.0)));
+}
+
+ccl_device_inline float4 hash_float4_to_float4(float4 k)
+{
+ return make_float4(hash_float4_to_float(k),
+ hash_float4_to_float(make_float4(k.w, k.x, k.y, k.z)),
+ hash_float4_to_float(make_float4(k.z, k.w, k.x, k.y)),
+ hash_float4_to_float(make_float4(k.y, k.z, k.w, k.x)));
+}
+
+/* Hashing float or float[234] into float3 of components in range [0, 1]. */
+
+ccl_device_inline float3 hash_float_to_float3(float k)
+{
+ return make_float3(hash_float_to_float(k),
+ hash_float2_to_float(make_float2(k, 1.0)),
+ hash_float2_to_float(make_float2(k, 2.0)));
+}
+
+ccl_device_inline float3 hash_float2_to_float3(float2 k)
+{
+ return make_float3(hash_float2_to_float(k),
+ hash_float3_to_float(make_float3(k.x, k.y, 1.0)),
+ hash_float3_to_float(make_float3(k.x, k.y, 2.0)));
}
-ccl_device_inline uint hash_int(uint k)
+ccl_device_inline float3 hash_float4_to_float3(float4 k)
{
- return hash_int_2d(k, 0);
+ return make_float3(hash_float4_to_float(k),
+ hash_float4_to_float(make_float4(k.z, k.x, k.w, k.y)),
+ hash_float4_to_float(make_float4(k.w, k.z, k.y, k.x)));
}
+/* SSE Versions Of Jenkins Lookup3 Hash Functions */
+
+#ifdef __KERNEL_SSE2__
+# define rot(x, k) (((x) << (k)) | (srl(x, 32 - (k))))
+
+# define mix(a, b, c) \
+ { \
+ a -= c; \
+ a ^= rot(c, 4); \
+ c += b; \
+ b -= a; \
+ b ^= rot(a, 6); \
+ a += c; \
+ c -= b; \
+ c ^= rot(b, 8); \
+ b += a; \
+ a -= c; \
+ a ^= rot(c, 16); \
+ c += b; \
+ b -= a; \
+ b ^= rot(a, 19); \
+ a += c; \
+ c -= b; \
+ c ^= rot(b, 4); \
+ b += a; \
+ }
+
+# define final(a, b, c) \
+ { \
+ c ^= b; \
+ c -= rot(b, 14); \
+ a ^= c; \
+ a -= rot(c, 11); \
+ b ^= a; \
+ b -= rot(a, 25); \
+ c ^= b; \
+ c -= rot(b, 16); \
+ a ^= c; \
+ a -= rot(c, 4); \
+ b ^= a; \
+ b -= rot(a, 14); \
+ c ^= b; \
+ c -= rot(b, 24); \
+ }
+
+ccl_device_inline ssei hash_ssei(ssei kx)
+{
+ ssei a, b, c;
+ a = b = c = ssei(0xdeadbeef + (1 << 2) + 13);
+
+ a += kx;
+ final(a, b, c);
+
+ return c;
+}
+
+ccl_device_inline ssei hash_ssei2(ssei kx, ssei ky)
+{
+ ssei a, b, c;
+ a = b = c = ssei(0xdeadbeef + (2 << 2) + 13);
+
+ b += ky;
+ a += kx;
+ final(a, b, c);
+
+ return c;
+}
+
+ccl_device_inline ssei hash_ssei3(ssei kx, ssei ky, ssei kz)
+{
+ ssei a, b, c;
+ a = b = c = ssei(0xdeadbeef + (3 << 2) + 13);
+
+ c += kz;
+ b += ky;
+ a += kx;
+ final(a, b, c);
+
+ return c;
+}
+
+ccl_device_inline ssei hash_ssei4(ssei kx, ssei ky, ssei kz, ssei kw)
+{
+ ssei a, b, c;
+ a = b = c = ssei(0xdeadbeef + (4 << 2) + 13);
+
+ a += kx;
+ b += ky;
+ c += kz;
+ mix(a, b, c);
+
+ a += kw;
+ final(a, b, c);
+
+ return c;
+}
+
+# if defined(__KERNEL_AVX__)
+ccl_device_inline avxi hash_avxi(avxi kx)
+{
+ avxi a, b, c;
+ a = b = c = avxi(0xdeadbeef + (1 << 2) + 13);
+
+ a += kx;
+ final(a, b, c);
+
+ return c;
+}
+
+ccl_device_inline avxi hash_avxi2(avxi kx, avxi ky)
+{
+ avxi a, b, c;
+ a = b = c = avxi(0xdeadbeef + (2 << 2) + 13);
+
+ b += ky;
+ a += kx;
+ final(a, b, c);
+
+ return c;
+}
+
+ccl_device_inline avxi hash_avxi3(avxi kx, avxi ky, avxi kz)
+{
+ avxi a, b, c;
+ a = b = c = avxi(0xdeadbeef + (3 << 2) + 13);
+
+ c += kz;
+ b += ky;
+ a += kx;
+ final(a, b, c);
+
+ return c;
+}
+
+ccl_device_inline avxi hash_avxi4(avxi kx, avxi ky, avxi kz, avxi kw)
+{
+ avxi a, b, c;
+ a = b = c = avxi(0xdeadbeef + (4 << 2) + 13);
+
+ a += kx;
+ b += ky;
+ c += kz;
+ mix(a, b, c);
+
+ a += kw;
+ final(a, b, c);
+
+ return c;
+}
+# endif
+
+# undef rot
+# undef final
+# undef mix
+
+#endif
+
#ifndef __KERNEL_GPU__
static inline uint hash_string(const char *str)
{
@@ -68,11 +384,6 @@ static inline uint hash_string(const char *str)
}
#endif
-ccl_device_inline float hash_int_01(uint k)
-{
- return (float)hash_int(k) * (1.0f / (float)0xFFFFFFFF);
-}
-
CCL_NAMESPACE_END
#endif /* __UTIL_HASH_H__ */
diff --git a/intern/cycles/util/util_ies.cpp b/intern/cycles/util/util_ies.cpp
index ff5c709b406..62d3d42186d 100644
--- a/intern/cycles/util/util_ies.cpp
+++ b/intern/cycles/util/util_ies.cpp
@@ -14,6 +14,8 @@
* limitations under the License.
*/
+#include <algorithm>
+
#include "util/util_foreach.h"
#include "util/util_ies.h"
#include "util/util_math.h"
@@ -28,7 +30,7 @@ CCL_NAMESPACE_BEGIN
// issue.
template class GuardedAllocator<char>;
-bool IESFile::load(ustring ies)
+bool IESFile::load(const string &ies)
{
clear();
if (!parse(ies) || !process()) {
@@ -76,7 +78,7 @@ class IESTextParser {
vector<char> text;
char *data;
- IESTextParser(ustring str) : text(str.begin(), str.end())
+ IESTextParser(const string &str) : text(str.begin(), str.end())
{
std::replace(text.begin(), text.end(), ',', ' ');
data = strstr(&text[0], "\nTILT=");
@@ -116,7 +118,7 @@ class IESTextParser {
}
};
-bool IESFile::parse(ustring ies)
+bool IESFile::parse(const string &ies)
{
if (ies.empty()) {
return false;
@@ -155,7 +157,8 @@ bool IESFile::parse(ustring ies)
type = (IESType)parser.get_long(); /* Photometric type */
/* TODO(lukas): Test whether the current type B processing can also deal with type A files.
- * In theory the only difference should be orientation which we ignore anyways, but with IES you never know...
+ * In theory the only difference should be orientation which we ignore anyways, but with IES you
+ * never know...
*/
if (type != TYPE_B && type != TYPE_C) {
return false;
@@ -173,12 +176,13 @@ bool IESFile::parse(ustring ies)
* Cycles expects radiometric quantities, though, which requires a conversion.
* However, the Luminous efficacy (ratio of lumens per Watt) depends on the spectral distribution
* of the light source since lumens take human perception into account.
- * Since this spectral distribution is not known from the IES file, a typical one must be assumed.
- * The D65 standard illuminant has a Luminous efficacy of 177.83, which is used here to convert to Watt/sr.
- * A more advanced approach would be to add a Blackbody Temperature input to the node and numerically
- * integrate the Luminous efficacy from the resulting spectral distribution.
- * Also, the Watt/sr value must be multiplied by 4*pi to get the Watt value that Cycles expects
- * for lamp strength. Therefore, the conversion here uses 4*pi/177.83 as a Candela to Watt factor.
+ * Since this spectral distribution is not known from the IES file, a typical one must be
+ * assumed. The D65 standard illuminant has a Luminous efficacy of 177.83, which is used here to
+ * convert to Watt/sr. A more advanced approach would be to add a Blackbody Temperature input to
+ * the node and numerically integrate the Luminous efficacy from the resulting spectral
+ * distribution. Also, the Watt/sr value must be multiplied by 4*pi to get the Watt value that
+ * Cycles expects for lamp strength. Therefore, the conversion here uses 4*pi/177.83 as a Candela
+ * to Watt factor.
*/
factor *= 0.0706650768394;
@@ -294,7 +298,8 @@ bool IESFile::process_type_b()
bool IESFile::process_type_c()
{
if (h_angles[0] == 90.0f) {
- /* Some files are stored from 90° to 270°, so we just rotate them to the regular 0°-180° range here. */
+ /* Some files are stored from 90° to 270°, so we just rotate them to the regular 0°-180° range
+ * here. */
for (int i = 0; i < h_angles.size(); i++) {
h_angles[i] -= 90.0f;
}
@@ -311,8 +316,9 @@ bool IESFile::process_type_c()
if (h_angles[h_angles.size() - 1] == 90.0f) {
/* Only one quadrant is defined, so we need to mirror twice (from one to two, then to four).
- * Since the two->four mirroring step might also be required if we get an input of two quadrants,
- * we only do the first mirror here and later do the second mirror in either case. */
+ * Since the two->four mirroring step might also be required if we get an input of two
+ * quadrants, we only do the first mirror here and later do the second mirror in either case.
+ */
int hnum = h_angles.size();
for (int i = hnum - 2; i >= 0; i--) {
h_angles.push_back(180.0f - h_angles[i]);
@@ -329,8 +335,8 @@ bool IESFile::process_type_c()
}
}
- /* Some files skip the 360° entry (contrary to standard) because it's supposed to be identical to the 0° entry.
- * If the file has a discernible order in its spacing, just fix this. */
+ /* Some files skip the 360° entry (contrary to standard) because it's supposed to be identical to
+ * the 0° entry. If the file has a discernible order in its spacing, just fix this. */
if (h_angles[h_angles.size() - 1] != 360.0f) {
int hnum = h_angles.size();
float last_step = h_angles[hnum - 1] - h_angles[hnum - 2];
diff --git a/intern/cycles/util/util_ies.h b/intern/cycles/util/util_ies.h
index ab1b9ea57cf..95473103614 100644
--- a/intern/cycles/util/util_ies.h
+++ b/intern/cycles/util/util_ies.h
@@ -17,7 +17,7 @@
#ifndef __UTIL_IES_H__
#define __UTIL_IES_H__
-#include "util/util_param.h"
+#include "util/util_string.h"
#include "util/util_vector.h"
CCL_NAMESPACE_BEGIN
@@ -32,11 +32,11 @@ class IESFile {
int packed_size();
void pack(float *data);
- bool load(ustring ies);
+ bool load(const string &ies);
void clear();
protected:
- bool parse(ustring ies);
+ bool parse(const string &ies);
bool process();
bool process_type_b();
bool process_type_c();
diff --git a/intern/cycles/util/util_image.h b/intern/cycles/util/util_image.h
index 8962c09d098..27ec7ffb423 100644
--- a/intern/cycles/util/util_image.h
+++ b/intern/cycles/util/util_image.h
@@ -21,6 +21,7 @@
# include <OpenImageIO/imageio.h>
+# include "util/util_half.h"
# include "util/util_vector.h"
CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/util/util_logging.cpp b/intern/cycles/util/util_logging.cpp
index 4a5e7e6a9ea..e41250ab1b9 100644
--- a/intern/cycles/util/util_logging.cpp
+++ b/intern/cycles/util/util_logging.cpp
@@ -17,6 +17,7 @@
#include "util/util_logging.h"
#include "util/util_math.h"
+#include "util/util_string.h"
#include <stdio.h>
#ifdef _MSC_VER
@@ -25,6 +26,21 @@
CCL_NAMESPACE_BEGIN
+static bool is_verbosity_set()
+{
+#ifdef WITH_CYCLES_LOGGING
+ using CYCLES_GFLAGS_NAMESPACE::GetCommandLineOption;
+
+ std::string verbosity;
+ if (!GetCommandLineOption("v", &verbosity)) {
+ return false;
+ }
+ return verbosity != "0";
+#else
+ return false;
+#endif
+}
+
void util_logging_init(const char *argv0)
{
#ifdef WITH_CYCLES_LOGGING
@@ -36,7 +52,9 @@ void util_logging_init(const char *argv0)
google::InitGoogleLogging(argv0);
SetCommandLineOption("logtostderr", "1");
- SetCommandLineOption("v", "0");
+ if (!is_verbosity_set()) {
+ SetCommandLineOption("v", "0");
+ }
SetCommandLineOption("stderrthreshold", severity_fatal);
SetCommandLineOption("minloglevel", severity_fatal);
#else
@@ -49,7 +67,9 @@ void util_logging_start()
#ifdef WITH_CYCLES_LOGGING
using CYCLES_GFLAGS_NAMESPACE::SetCommandLineOption;
SetCommandLineOption("logtostderr", "1");
- SetCommandLineOption("v", "2");
+ if (!is_verbosity_set()) {
+ SetCommandLineOption("v", "2");
+ }
SetCommandLineOption("stderrthreshold", "1");
SetCommandLineOption("minloglevel", "0");
#endif
diff --git a/intern/cycles/util/util_logging.h b/intern/cycles/util/util_logging.h
index 1a5e6666b32..3e56f0a0193 100644
--- a/intern/cycles/util/util_logging.h
+++ b/intern/cycles/util/util_logging.h
@@ -48,6 +48,7 @@ class LogMessageVoidify {
# define LOG_SUPPRESS() (true) ? ((void)0) : LogMessageVoidify() & StubStream()
# define LOG(severity) LOG_SUPPRESS()
# define VLOG(severity) LOG_SUPPRESS()
+# define VLOG_IF(severity, condition) LOG_SUPPRESS()
#endif
#define VLOG_ONCE(level, flag) \
diff --git a/intern/cycles/util/util_map.h b/intern/cycles/util/util_map.h
index 3c9288417cf..f1b2522362f 100644
--- a/intern/cycles/util/util_map.h
+++ b/intern/cycles/util/util_map.h
@@ -25,6 +25,14 @@ CCL_NAMESPACE_BEGIN
using std::map;
using std::pair;
using std::unordered_map;
+using std::unordered_multimap;
+
+template<typename T> static void map_free_memory(T &data)
+{
+ /* Use swap() trick to actually free all internal memory. */
+ T empty_data;
+ data.swap(empty_data);
+}
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h
index 2c7f826db93..8caabf6eac3 100644
--- a/intern/cycles/util/util_math.h
+++ b/intern/cycles/util/util_math.h
@@ -294,6 +294,21 @@ ccl_device_inline float mix(float a, float b, float t)
{
return a + t * (b - a);
}
+
+ccl_device_inline float smoothstep(float edge0, float edge1, float x)
+{
+ float result;
+ if (x < edge0)
+ result = 0.0f;
+ else if (x >= edge1)
+ result = 1.0f;
+ else {
+ float t = (x - edge0) / (edge1 - edge0);
+ result = (3.0f - 2.0f * t) * (t * t);
+ }
+ return result;
+}
+
#endif /* __KERNEL_OPENCL__ */
#ifndef __KERNEL_CUDA__
@@ -318,11 +333,45 @@ ccl_device_inline int quick_floor_to_int(float x)
return float_to_int(x) - ((x < 0) ? 1 : 0);
}
+ccl_device_inline float floorfrac(float x, int *i)
+{
+ *i = quick_floor_to_int(x);
+ return x - *i;
+}
+
ccl_device_inline int ceil_to_int(float f)
{
return float_to_int(ceilf(f));
}
+ccl_device_inline float fractf(float x)
+{
+ return x - floorf(x);
+}
+
+/* Adapted from godotengine math_funcs.h. */
+ccl_device_inline float wrapf(float value, float max, float min)
+{
+ float range = max - min;
+ return (range != 0.0f) ? value - (range * floorf((value - min) / range)) : min;
+}
+
+ccl_device_inline float pingpongf(float a, float b)
+{
+ return (b != 0.0f) ? fabsf(fractf((a - b) / (b * 2.0f)) * b * 2.0f - b) : 0.0f;
+}
+
+ccl_device_inline float smoothminf(float a, float b, float k)
+{
+ if (k != 0.0f) {
+ float h = fmaxf(k - fabsf(a - b), 0.0f) / k;
+ return fminf(a, b) - h * h * h * k * (1.0f / 6.0f);
+ }
+ else {
+ return fminf(a, b);
+ }
+}
+
ccl_device_inline float signf(float f)
{
return (f < 0.0f) ? -1.0f : 1.0f;
@@ -336,6 +385,17 @@ ccl_device_inline float nonzerof(float f, float eps)
return f;
}
+/* Signum function testing for zero. Matches GLSL and OSL functions. */
+ccl_device_inline float compatible_signf(float f)
+{
+ if (f == 0.0f) {
+ return 0.0f;
+ }
+ else {
+ return signf(f);
+ }
+}
+
ccl_device_inline float smoothstepf(float f)
{
float ff = f * f;
@@ -417,12 +477,12 @@ ccl_device_inline float triangle_area(const float3 v1, const float3 v2, const fl
ccl_device_inline void make_orthonormals(const float3 N, float3 *a, float3 *b)
{
#if 0
- if(fabsf(N.y) >= 0.999f) {
+ if (fabsf(N.y) >= 0.999f) {
*a = make_float3(1, 0, 0);
*b = make_float3(0, 0, 1);
return;
}
- if(fabsf(N.z) >= 0.999f) {
+ if (fabsf(N.z) >= 0.999f) {
*a = make_float3(1, 0, 0);
*b = make_float3(0, 1, 0);
return;
@@ -528,6 +588,11 @@ ccl_device_inline float safe_sqrtf(float f)
return sqrtf(max(f, 0.0f));
}
+ccl_device_inline float inversesqrtf(float f)
+{
+ return (f > 0.0f) ? 1.0f / sqrtf(f) : 0.0f;
+}
+
ccl_device float safe_asinf(float a)
{
return asinf(clamp(a, -1.0f, 1.0f));
@@ -617,6 +682,57 @@ ccl_device float bits_to_01(uint bits)
return bits * (1.0f / (float)0xFFFFFFFF);
}
+ccl_device_inline uint count_leading_zeros(uint x)
+{
+#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__)
+ return __clz(x);
+#elif defined(__KERNEL_OPENCL__)
+ return clz(x);
+#else
+ assert(x != 0);
+# ifdef _MSC_VER
+ unsigned long leading_zero = 0;
+ _BitScanReverse(&leading_zero, x);
+ return (31 - leading_zero);
+# else
+ return __builtin_clz(x);
+# endif
+#endif
+}
+
+ccl_device_inline uint count_trailing_zeros(uint x)
+{
+#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__)
+ return (__ffs(x) - 1);
+#elif defined(__KERNEL_OPENCL__)
+ return (31 - count_leading_zeros(x & -x));
+#else
+ assert(x != 0);
+# ifdef _MSC_VER
+ unsigned long ctz = 0;
+ _BitScanForward(&ctz, x);
+ return ctz;
+# else
+ return __builtin_ctz(x);
+# endif
+#endif
+}
+
+ccl_device_inline uint find_first_set(uint x)
+{
+#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__)
+ return __ffs(x);
+#elif defined(__KERNEL_OPENCL__)
+ return (x != 0) ? (32 - count_leading_zeros(x & (-x))) : 0;
+#else
+# ifdef _MSC_VER
+ return (x != 0) ? (32 - count_leading_zeros(x & (-x))) : 0;
+# else
+ return __builtin_ffs(x);
+# endif
+#endif
+}
+
/* projections */
ccl_device_inline float2 map_to_tube(const float3 co)
{
@@ -651,6 +767,36 @@ ccl_device_inline float2 map_to_sphere(const float3 co)
return make_float2(u, v);
}
+/* Compares two floats.
+ * Returns true if their absolute difference is smaller than abs_diff (for numbers near zero)
+ * or their relative difference is less than ulp_diff ULPs.
+ * Based on
+ * https://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/
+ */
+
+ccl_device_inline float compare_floats(float a, float b, float abs_diff, int ulp_diff)
+{
+ if (fabsf(a - b) < abs_diff) {
+ return true;
+ }
+
+ if ((a < 0.0f) != (b < 0.0f)) {
+ return false;
+ }
+
+ return (abs(__float_as_int(a) - __float_as_int(b)) < ulp_diff);
+}
+
+/* Calculate the angle between the two vectors a and b.
+ * The usual approach acos(dot(a, b)) has severe precision issues for small angles,
+ * which are avoided by this method.
+ * Based on "Mangled Angles" from https://people.eecs.berkeley.edu/~wkahan/Mindless.pdf
+ */
+ccl_device_inline float precise_angle(float3 a, float3 b)
+{
+ return 2.0f * atan2f(len(a - b), len(a + b));
+}
+
CCL_NAMESPACE_END
#endif /* __UTIL_MATH_H__ */
diff --git a/intern/cycles/util/util_math_fast.h b/intern/cycles/util/util_math_fast.h
index 872271666aa..e979bd9e0c0 100644
--- a/intern/cycles/util/util_math_fast.h
+++ b/intern/cycles/util/util_math_fast.h
@@ -282,8 +282,10 @@ ccl_device float fast_acosf(float x)
const float m = (f < 1.0f) ? 1.0f - (1.0f - f) : 1.0f;
/* Based on http://www.pouet.net/topic.php?which=9132&page=2
* 85% accurate (ulp 0)
- * Examined 2130706434 values of acos: 15.2000597 avg ulp diff, 4492 max ulp, 4.51803e-05 max error // without "denormal crush"
- * Examined 2130706434 values of acos: 15.2007108 avg ulp diff, 4492 max ulp, 4.51803e-05 max error // with "denormal crush"
+ * Examined 2130706434 values of acos:
+ * 15.2000597 avg ulp diff, 4492 max ulp, 4.51803e-05 max error // without "denormal crush"
+ * Examined 2130706434 values of acos:
+ * 15.2007108 avg ulp diff, 4492 max ulp, 4.51803e-05 max error // with "denormal crush"
*/
const float a = sqrtf(1.0f - m) *
(1.5707963267f + m * (-0.213300989f + m * (0.077980478f + m * -0.02164095f)));
@@ -312,8 +314,10 @@ ccl_device float fast_atanf(float x)
const float s = 1.0f - (1.0f - k); /* Crush denormals. */
const float t = s * s;
/* http://mathforum.org/library/drmath/view/62672.html
- * Examined 4278190080 values of atan: 2.36864877 avg ulp diff, 302 max ulp, 6.55651e-06 max error // (with denormals)
- * Examined 4278190080 values of atan: 171160502 avg ulp diff, 855638016 max ulp, 6.55651e-06 max error // (crush denormals)
+ * Examined 4278190080 values of atan:
+ * 2.36864877 avg ulp diff, 302 max ulp, 6.55651e-06 max error // (with denormals)
+ * Examined 4278190080 values of atan:
+ * 171160502 avg ulp diff, 855638016 max ulp, 6.55651e-06 max error // (crush denormals)
*/
float r = s * madd(0.43157974f, t, 1.0f) / madd(madd(0.05831938f, t, 0.76443945f), t, 1.0f);
if (a > 1.0f) {
@@ -442,6 +446,11 @@ ccl_device_inline float fast_expf(float x)
}
#ifndef __KERNEL_GPU__
+/* MSVC seems to have a code-gen bug here in at least SSE41/AVX
+ * see T78047 for details. */
+# ifdef _MSC_VER
+# pragma optimize("", off)
+# endif
ccl_device float4 fast_exp2f4(float4 x)
{
const float4 one = make_float4(1.0f);
@@ -457,6 +466,9 @@ ccl_device float4 fast_exp2f4(float4 x)
r = madd4(x, r, make_float4(1.0f));
return __int4_as_float4(__float4_as_int4(r) + (m << 23));
}
+# ifdef _MSC_VER
+# pragma optimize("", on)
+# endif
ccl_device_inline float4 fast_expf4(float4 x)
{
diff --git a/intern/cycles/util/util_math_float2.h b/intern/cycles/util/util_math_float2.h
index 9feaf042d19..bf21430af3c 100644
--- a/intern/cycles/util/util_math_float2.h
+++ b/intern/cycles/util/util_math_float2.h
@@ -35,7 +35,9 @@ ccl_device_inline float2 operator*(float f, const float2 &a);
ccl_device_inline float2 operator/(float f, const float2 &a);
ccl_device_inline float2 operator/(const float2 &a, float f);
ccl_device_inline float2 operator/(const float2 &a, const float2 &b);
+ccl_device_inline float2 operator+(const float2 &a, const float f);
ccl_device_inline float2 operator+(const float2 &a, const float2 &b);
+ccl_device_inline float2 operator-(const float2 &a, const float f);
ccl_device_inline float2 operator-(const float2 &a, const float2 &b);
ccl_device_inline float2 operator+=(float2 &a, const float2 &b);
ccl_device_inline float2 operator*=(float2 &a, const float2 &b);
@@ -48,6 +50,7 @@ ccl_device_inline bool operator!=(const float2 &a, const float2 &b);
ccl_device_inline bool is_zero(const float2 &a);
ccl_device_inline float average(const float2 &a);
+ccl_device_inline float distance(const float2 &a, const float2 &b);
ccl_device_inline float dot(const float2 &a, const float2 &b);
ccl_device_inline float cross(const float2 &a, const float2 &b);
ccl_device_inline float len(const float2 &a);
@@ -60,8 +63,11 @@ ccl_device_inline float2 clamp(const float2 &a, const float2 &mn, const float2 &
ccl_device_inline float2 fabs(const float2 &a);
ccl_device_inline float2 as_float2(const float4 &a);
ccl_device_inline float2 interp(const float2 &a, const float2 &b, float t);
+ccl_device_inline float2 floor(const float2 &a);
#endif /* !__KERNEL_OPENCL__ */
+ccl_device_inline float2 safe_divide_float2_float(const float2 a, const float b);
+
/*******************************************************************************
* Definition.
*/
@@ -103,11 +109,21 @@ ccl_device_inline float2 operator/(const float2 &a, const float2 &b)
return make_float2(a.x / b.x, a.y / b.y);
}
+ccl_device_inline float2 operator+(const float2 &a, const float f)
+{
+ return a + make_float2(f, f);
+}
+
ccl_device_inline float2 operator+(const float2 &a, const float2 &b)
{
return make_float2(a.x + b.x, a.y + b.y);
}
+ccl_device_inline float2 operator-(const float2 &a, const float f)
+{
+ return a - make_float2(f, f);
+}
+
ccl_device_inline float2 operator-(const float2 &a, const float2 &b)
{
return make_float2(a.x - b.x, a.y - b.y);
@@ -159,6 +175,11 @@ ccl_device_inline float average(const float2 &a)
return (a.x + a.y) * (1.0f / 2.0f);
}
+ccl_device_inline float distance(const float2 &a, const float2 &b)
+{
+ return len(a - b);
+}
+
ccl_device_inline float dot(const float2 &a, const float2 &b)
{
return a.x * b.x + a.y * b.y;
@@ -226,8 +247,18 @@ ccl_device_inline float2 mix(const float2 &a, const float2 &b, float t)
return a + t * (b - a);
}
+ccl_device_inline float2 floor(const float2 &a)
+{
+ return make_float2(floorf(a.x), floorf(a.y));
+}
+
#endif /* !__KERNEL_OPENCL__ */
+ccl_device_inline float2 safe_divide_float2_float(const float2 a, const float b)
+{
+ return (b != 0.0f) ? a / b : make_float2(0.0f, 0.0f);
+}
+
CCL_NAMESPACE_END
#endif /* __UTIL_MATH_FLOAT2_H__ */
diff --git a/intern/cycles/util/util_math_float3.h b/intern/cycles/util/util_math_float3.h
index 85e9b8114ff..dd2010715ba 100644
--- a/intern/cycles/util/util_math_float3.h
+++ b/intern/cycles/util/util_math_float3.h
@@ -35,7 +35,9 @@ ccl_device_inline float3 operator*(const float f, const float3 &a);
ccl_device_inline float3 operator/(const float f, const float3 &a);
ccl_device_inline float3 operator/(const float3 &a, const float f);
ccl_device_inline float3 operator/(const float3 &a, const float3 &b);
+ccl_device_inline float3 operator+(const float3 &a, const float f);
ccl_device_inline float3 operator+(const float3 &a, const float3 &b);
+ccl_device_inline float3 operator-(const float3 &a, const float f);
ccl_device_inline float3 operator-(const float3 &a, const float3 &b);
ccl_device_inline float3 operator+=(float3 &a, const float3 &b);
ccl_device_inline float3 operator-=(float3 &a, const float3 &b);
@@ -47,6 +49,7 @@ ccl_device_inline float3 operator/=(float3 &a, float f);
ccl_device_inline bool operator==(const float3 &a, const float3 &b);
ccl_device_inline bool operator!=(const float3 &a, const float3 &b);
+ccl_device_inline float distance(const float3 &a, const float3 &b);
ccl_device_inline float dot(const float3 &a, const float3 &b);
ccl_device_inline float dot_xy(const float3 &a, const float3 &b);
ccl_device_inline float3 cross(const float3 &a, const float3 &b);
@@ -58,6 +61,8 @@ ccl_device_inline float3 fabs(const float3 &a);
ccl_device_inline float3 mix(const float3 &a, const float3 &b, float t);
ccl_device_inline float3 rcp(const float3 &a);
ccl_device_inline float3 sqrt(const float3 &a);
+ccl_device_inline float3 floor(const float3 &a);
+ccl_device_inline float3 ceil(const float3 &a);
#endif /* !__KERNEL_OPENCL__ */
ccl_device_inline float min3(float3 a);
@@ -65,11 +70,17 @@ ccl_device_inline float max3(float3 a);
ccl_device_inline float len(const float3 a);
ccl_device_inline float len_squared(const float3 a);
+ccl_device_inline float3 reflect(const float3 incident, const float3 normal);
+ccl_device_inline float3 project(const float3 v, const float3 v_proj);
+
ccl_device_inline float3 saturate3(float3 a);
ccl_device_inline float3 safe_normalize(const float3 a);
ccl_device_inline float3 normalize_len(const float3 a, float *t);
ccl_device_inline float3 safe_normalize_len(const float3 a, float *t);
+ccl_device_inline float3 safe_divide_float3_float3(const float3 a, const float3 b);
+ccl_device_inline float3 safe_divide_float3_float(const float3 a, const float b);
ccl_device_inline float3 interp(float3 a, float3 b, float t);
+ccl_device_inline float3 sqr3(float3 a);
ccl_device_inline bool is_zero(const float3 a);
ccl_device_inline float reduce_add(const float3 a);
@@ -141,6 +152,11 @@ ccl_device_inline float3 operator/(const float3 &a, const float3 &b)
# endif
}
+ccl_device_inline float3 operator+(const float3 &a, const float f)
+{
+ return a + make_float3(f, f, f);
+}
+
ccl_device_inline float3 operator+(const float3 &a, const float3 &b)
{
# ifdef __KERNEL_SSE__
@@ -150,6 +166,11 @@ ccl_device_inline float3 operator+(const float3 &a, const float3 &b)
# endif
}
+ccl_device_inline float3 operator-(const float3 &a, const float f)
+{
+ return a - make_float3(f, f, f);
+}
+
ccl_device_inline float3 operator-(const float3 &a, const float3 &b)
{
# ifdef __KERNEL_SSE__
@@ -204,6 +225,11 @@ ccl_device_inline bool operator!=(const float3 &a, const float3 &b)
return !(a == b);
}
+ccl_device_inline float distance(const float3 &a, const float3 &b)
+{
+ return len(a - b);
+}
+
ccl_device_inline float dot(const float3 &a, const float3 &b)
{
# if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
@@ -280,6 +306,24 @@ ccl_device_inline float3 sqrt(const float3 &a)
# endif
}
+ccl_device_inline float3 floor(const float3 &a)
+{
+# ifdef __KERNEL_SSE__
+ return float3(_mm_floor_ps(a));
+# else
+ return make_float3(floorf(a.x), floorf(a.y), floorf(a.z));
+# endif
+}
+
+ccl_device_inline float3 ceil(const float3 &a)
+{
+# ifdef __KERNEL_SSE__
+ return float3(_mm_ceil_ps(a));
+# else
+ return make_float3(ceilf(a.x), ceilf(a.y), ceilf(a.z));
+# endif
+}
+
ccl_device_inline float3 mix(const float3 &a, const float3 &b, float t)
{
return a + t * (b - a);
@@ -320,6 +364,19 @@ ccl_device_inline float len_squared(const float3 a)
return dot(a, a);
}
+ccl_device_inline float3 reflect(const float3 incident, const float3 normal)
+{
+ float3 unit_normal = normalize(normal);
+ return incident - 2.0f * unit_normal * dot(incident, unit_normal);
+}
+
+ccl_device_inline float3 project(const float3 v, const float3 v_proj)
+{
+ float len_squared = dot(v_proj, v_proj);
+ return (len_squared != 0.0f) ? (dot(v, v_proj) / len_squared) * v_proj :
+ make_float3(0.0f, 0.0f, 0.0f);
+}
+
ccl_device_inline float3 saturate3(float3 a)
{
return make_float3(saturate(a.x), saturate(a.y), saturate(a.z));
@@ -344,11 +401,28 @@ ccl_device_inline float3 safe_normalize_len(const float3 a, float *t)
return (*t != 0.0f) ? a / (*t) : a;
}
+ccl_device_inline float3 safe_divide_float3_float3(const float3 a, const float3 b)
+{
+ return make_float3((b.x != 0.0f) ? a.x / b.x : 0.0f,
+ (b.y != 0.0f) ? a.y / b.y : 0.0f,
+ (b.z != 0.0f) ? a.z / b.z : 0.0f);
+}
+
+ccl_device_inline float3 safe_divide_float3_float(const float3 a, const float b)
+{
+ return (b != 0.0f) ? a / b : make_float3(0.0f, 0.0f, 0.0f);
+}
+
ccl_device_inline float3 interp(float3 a, float3 b, float t)
{
return a + t * (b - a);
}
+ccl_device_inline float3 sqr3(float3 a)
+{
+ return a * a;
+}
+
ccl_device_inline bool is_zero(const float3 a)
{
#ifdef __KERNEL_SSE__
diff --git a/intern/cycles/util/util_math_float4.h b/intern/cycles/util/util_math_float4.h
index 1fb886572e3..cd4b3e3b74c 100644
--- a/intern/cycles/util/util_math_float4.h
+++ b/intern/cycles/util/util_math_float4.h
@@ -34,7 +34,9 @@ ccl_device_inline float4 operator*(const float4 &a, float f);
ccl_device_inline float4 operator*(float f, const float4 &a);
ccl_device_inline float4 operator/(const float4 &a, float f);
ccl_device_inline float4 operator/(const float4 &a, const float4 &b);
+ccl_device_inline float4 operator+(const float4 &a, const float f);
ccl_device_inline float4 operator+(const float4 &a, const float4 &b);
+ccl_device_inline float4 operator-(const float4 &a, const float f);
ccl_device_inline float4 operator-(const float4 &a, const float4 &b);
ccl_device_inline float4 operator+=(float4 &a, const float4 &b);
ccl_device_inline float4 operator*=(float4 &a, const float4 &b);
@@ -46,6 +48,7 @@ ccl_device_inline int4 operator>=(const float4 &a, const float4 &b);
ccl_device_inline int4 operator<=(const float4 &a, const float4 &b);
ccl_device_inline bool operator==(const float4 &a, const float4 &b);
+ccl_device_inline float distance(const float4 &a, const float4 &b);
ccl_device_inline float dot(const float4 &a, const float4 &b);
ccl_device_inline float len_squared(const float4 &a);
ccl_device_inline float4 rcp(const float4 &a);
@@ -61,8 +64,12 @@ ccl_device_inline float4 min(const float4 &a, const float4 &b);
ccl_device_inline float4 max(const float4 &a, const float4 &b);
ccl_device_inline float4 clamp(const float4 &a, const float4 &mn, const float4 &mx);
ccl_device_inline float4 fabs(const float4 &a);
+ccl_device_inline float4 floor(const float4 &a);
+ccl_device_inline float4 mix(const float4 &a, const float4 &b, float t);
#endif /* !__KERNEL_OPENCL__*/
+ccl_device_inline float4 safe_divide_float4_float(const float4 a, const float b);
+
#ifdef __KERNEL_SSE__
template<size_t index_0, size_t index_1, size_t index_2, size_t index_3>
__forceinline const float4 shuffle(const float4 &b);
@@ -139,6 +146,11 @@ ccl_device_inline float4 operator/(const float4 &a, const float4 &b)
# endif
}
+ccl_device_inline float4 operator+(const float4 &a, const float f)
+{
+ return a + make_float4(f, f, f, f);
+}
+
ccl_device_inline float4 operator+(const float4 &a, const float4 &b)
{
# ifdef __KERNEL_SSE__
@@ -148,6 +160,11 @@ ccl_device_inline float4 operator+(const float4 &a, const float4 &b)
# endif
}
+ccl_device_inline float4 operator-(const float4 &a, const float f)
+{
+ return a - make_float4(f, f, f, f);
+}
+
ccl_device_inline float4 operator-(const float4 &a, const float4 &b)
{
# ifdef __KERNEL_SSE__
@@ -162,6 +179,11 @@ ccl_device_inline float4 operator+=(float4 &a, const float4 &b)
return a = a + b;
}
+ccl_device_inline float4 operator-=(float4 &a, const float4 &b)
+{
+ return a = a - b;
+}
+
ccl_device_inline float4 operator*=(float4 &a, const float4 &b)
{
return a = a * b;
@@ -213,6 +235,11 @@ ccl_device_inline bool operator==(const float4 &a, const float4 &b)
# endif
}
+ccl_device_inline float distance(const float4 &a, const float4 &b)
+{
+ return len(a - b);
+}
+
ccl_device_inline float dot(const float4 &a, const float4 &b)
{
# if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
@@ -338,6 +365,21 @@ ccl_device_inline float4 fabs(const float4 &a)
return make_float4(fabsf(a.x), fabsf(a.y), fabsf(a.z), fabsf(a.w));
# endif
}
+
+ccl_device_inline float4 floor(const float4 &a)
+{
+# ifdef __KERNEL_SSE__
+ return float4(_mm_floor_ps(a));
+# else
+ return make_float4(floorf(a.x), floorf(a.y), floorf(a.z), floorf(a.w));
+# endif
+}
+
+ccl_device_inline float4 mix(const float4 &a, const float4 &b, float t)
+{
+ return a + t * (b - a);
+}
+
#endif /* !__KERNEL_OPENCL__*/
#ifdef __KERNEL_SSE__
@@ -430,6 +472,11 @@ ccl_device_inline float4 load_float4(const float *v)
#endif /* !__KERNEL_GPU__ */
+ccl_device_inline float4 safe_divide_float4_float(const float4 a, const float b)
+{
+ return (b != 0.0f) ? a / b : make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+}
+
CCL_NAMESPACE_END
#endif /* __UTIL_MATH_FLOAT4_H__ */
diff --git a/intern/cycles/util/util_math_intersect.h b/intern/cycles/util/util_math_intersect.h
index 95ac231c611..fa3a541eea9 100644
--- a/intern/cycles/util/util_math_intersect.h
+++ b/intern/cycles/util/util_math_intersect.h
@@ -163,7 +163,7 @@ ccl_device_forceinline bool ray_triangle_intersect(float3 ray_P,
/* Calculate geometry normal and denominator. */
const float3 Ng1 = cross(e1, e0);
- //const Vec3vfM Ng1 = stable_triangle_normal(e2,e1,e0);
+ // const Vec3vfM Ng1 = stable_triangle_normal(e2,e1,e0);
const float3 Ng = Ng1 + Ng1;
const float den = dot3(Ng, dir);
/* Avoid division by 0. */
diff --git a/intern/cycles/util/util_math_matrix.h b/intern/cycles/util/util_math_matrix.h
index fe80fab6ebd..1dc661a7aa7 100644
--- a/intern/cycles/util/util_math_matrix.h
+++ b/intern/cycles/util/util_math_matrix.h
@@ -110,7 +110,8 @@ ccl_device_inline void math_vec3_add_strided(
}
/* Elementary matrix operations.
- * Note: TriMatrix refers to a square matrix that is symmetric, and therefore its upper-triangular part isn't stored. */
+ * Note: TriMatrix refers to a square matrix that is symmetric,
+ * and therefore its upper-triangular part isn't stored. */
ccl_device_inline void math_trimatrix_add_diagonal(ccl_global float *A,
int n,
@@ -196,7 +197,8 @@ ccl_device void math_trimatrix_cholesky(ccl_global float *A, int n, int stride)
}
}
-/* Solve A*S=y for S given A and y, where A is symmetrical positive-semidefinite and both inputs are destroyed in the process.
+/* Solve A*S=y for S given A and y,
+ * where A is symmetrical positive-semi-definite and both inputs are destroyed in the process.
*
* We can apply Cholesky decomposition to find a lower triangular L so that L*Lt = A.
* With that we get (L*Lt)*S = L*(Lt*S) = L*b = y, defining b as Lt*S.
@@ -204,15 +206,16 @@ ccl_device void math_trimatrix_cholesky(ccl_global float *A, int n, int stride)
* Then, the remaining problem is Lt*S = b, which again can be solved easily.
*
* This is useful for solving the normal equation S=inv(Xt*W*X)*Xt*W*y, since Xt*W*X is
- * symmetrical positive-semidefinite by construction, so we can just use this function with A=Xt*W*X and y=Xt*W*y. */
+ * symmetrical positive-semidefinite by construction,
+ * so we can just use this function with A=Xt*W*X and y=Xt*W*y. */
ccl_device_inline void math_trimatrix_vec3_solve(ccl_global float *A,
ccl_global float3 *y,
int n,
int stride)
{
/* Since the first entry of the design row is always 1, the upper-left element of XtWX is a good
- * heuristic for the amount of pixels considered (with weighting), therefore the amount of correction
- * is scaled based on it. */
+ * heuristic for the amount of pixels considered (with weighting),
+ * therefore the amount of correction is scaled based on it. */
math_trimatrix_add_diagonal(A, n, 3e-7f * A[0], stride); /* Improve the numerical stability. */
math_trimatrix_cholesky(A, n, stride); /* Replace A with L so that L*Lt = A. */
@@ -233,9 +236,9 @@ ccl_device_inline void math_trimatrix_vec3_solve(ccl_global float *A,
}
}
-/* Perform the Jacobi Eigenvalue Methon on matrix A.
- * A is assumed to be a symmetrical matrix, therefore only the lower-triangular part is ever accessed.
- * The algorithm overwrites the contents of A.
+/* Perform the Jacobi Eigenvalue Method on matrix A.
+ * A is assumed to be a symmetrical matrix, therefore only the lower-triangular part is ever
+ * accessed. The algorithm overwrites the contents of A.
*
* After returning, A will be overwritten with D, which is (almost) diagonal,
* and V will contain the eigenvectors of the original A in its rows (!),
@@ -263,7 +266,8 @@ ccl_device void math_matrix_jacobi_eigendecomposition(float *A,
}
if (off_diagonal < 1e-7f) {
/* The matrix has nearly reached diagonal form.
- * Since the eigenvalues are only used to determine truncation, their exact values aren't required - a relative error of a few ULPs won't matter at all. */
+ * Since the eigenvalues are only used to determine truncation, their exact values aren't
+ * required - a relative error of a few ULPs won't matter at all. */
break;
}
@@ -277,7 +281,8 @@ ccl_device void math_matrix_jacobi_eigendecomposition(float *A,
float element = MAT(A, n, row, col);
float abs_element = fabsf(element);
- /* If we're in a later sweep and the element already is very small, just set it to zero and skip the rotation. */
+ /* If we're in a later sweep and the element already is very small,
+ * just set it to zero and skip the rotation. */
if (sweep > 3 && abs_element <= singular_epsilon * fabsf(MAT(A, n, row, row)) &&
abs_element <= singular_epsilon * fabsf(MAT(A, n, col, col))) {
MAT(A, n, row, col) = 0.0f;
@@ -288,13 +293,16 @@ ccl_device void math_matrix_jacobi_eigendecomposition(float *A,
continue;
}
- /* If we're in one of the first sweeps and the element is smaller than the threshold, skip it. */
+ /* If we're in one of the first sweeps and the element is smaller than the threshold,
+ * skip it. */
if (sweep < 3 && (abs_element < threshold)) {
continue;
}
- /* Determine rotation: The rotation is characterized by its angle phi - or, in the actual implementation, sin(phi) and cos(phi).
- * To find those, we first compute their ratio - that might be unstable if the angle approaches 90°, so there's a fallback for that case.
+ /* Determine rotation: The rotation is characterized by its angle phi - or,
+ * in the actual implementation, sin(phi) and cos(phi).
+ * To find those, we first compute their ratio - that might be unstable if the angle
+ * approaches 90°, so there's a fallback for that case.
* Then, we compute sin(phi) and cos(phi) themselves. */
float singular_diff = MAT(A, n, row, row) - MAT(A, n, col, col);
float ratio;
@@ -310,7 +318,8 @@ ccl_device void math_matrix_jacobi_eigendecomposition(float *A,
float c = 1.0f / sqrtf(1.0f + ratio * ratio);
float s = ratio * c;
- /* To improve numerical stability by avoiding cancellation, the update equations are reformulized to use sin(phi) and tan(phi/2) instead. */
+ /* To improve numerical stability by avoiding cancellation, the update equations are
+ * reformulized to use sin(phi) and tan(phi/2) instead. */
float tan_phi_2 = s / (1.0f + c);
/* Update the singular values in the diagonal. */
@@ -330,7 +339,8 @@ ccl_device void math_matrix_jacobi_eigendecomposition(float *A,
MATS(M, n, r2, c2, stride) += s * (M1 - tan_phi_2 * M2); \
}
- /* Split into three parts to ensure correct accesses since we only store the lower-triangular part of A. */
+ /* Split into three parts to ensure correct accesses since we only store the
+ * lower-triangular part of A. */
for (int i = 0; i < col; i++)
ROT(A, col, i, row, i, 1);
for (int i = col + 1; i < row; i++)
diff --git a/intern/cycles/util/util_md5.cpp b/intern/cycles/util/util_md5.cpp
index c11f495f785..0df521c2b58 100644
--- a/intern/cycles/util/util_md5.cpp
+++ b/intern/cycles/util/util_md5.cpp
@@ -26,8 +26,8 @@
#include "util_md5.h"
#include "util_path.h"
-#include <string.h>
#include <stdio.h>
+#include <string.h>
CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/util/util_openimagedenoise.h b/intern/cycles/util/util_openimagedenoise.h
new file mode 100644
index 00000000000..aafa69cb530
--- /dev/null
+++ b/intern/cycles/util/util_openimagedenoise.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_OPENIMAGEDENOISE_H__
+#define __UTIL_OPENIMAGEDENOISE_H__
+
+#ifdef WITH_OPENIMAGEDENOISE
+# include <OpenImageDenoise/oidn.hpp>
+#endif
+
+#include "util_system.h"
+
+CCL_NAMESPACE_BEGIN
+
+static inline bool openimagedenoise_supported()
+{
+#ifdef WITH_OPENIMAGEDENOISE
+ return system_cpu_support_sse41();
+#else
+ return false;
+#endif
+}
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_OPENIMAGEDENOISE_H__ */
diff --git a/intern/cycles/util/util_param.h b/intern/cycles/util/util_param.h
index cfbe416aba1..3f8e2d6d700 100644
--- a/intern/cycles/util/util_param.h
+++ b/intern/cycles/util/util_param.h
@@ -29,6 +29,11 @@ CCL_NAMESPACE_BEGIN
OIIO_NAMESPACE_USING
static constexpr TypeDesc TypeFloat2(TypeDesc::FLOAT, TypeDesc::VEC2);
+static constexpr TypeDesc TypeRGBA(TypeDesc::FLOAT, TypeDesc::VEC4, TypeDesc::COLOR);
+static constexpr TypeDesc TypeFloatArray4(TypeDesc::FLOAT,
+ TypeDesc::SCALAR,
+ TypeDesc::NOSEMANTICS,
+ 4);
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_path.cpp b/intern/cycles/util/util_path.cpp
index 77293c45f6b..8905c8bc7f0 100644
--- a/intern/cycles/util/util_path.cpp
+++ b/intern/cycles/util/util_path.cpp
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include "util/util_md5.h"
#include "util/util_path.h"
+#include "util/util_md5.h"
#include "util/util_string.h"
#include <OpenImageIO/filesystem.h>
@@ -36,8 +36,8 @@ OIIO_NAMESPACE_USING
# define DIR_SEP '/'
# include <dirent.h>
# include <pwd.h>
-# include <unistd.h>
# include <sys/types.h>
+# include <unistd.h>
#endif
#ifdef HAVE_SHLWAPI_H
diff --git a/intern/cycles/util/util_profiling.cpp b/intern/cycles/util/util_profiling.cpp
index e3edf219435..073b09f719f 100644
--- a/intern/cycles/util/util_profiling.cpp
+++ b/intern/cycles/util/util_profiling.cpp
@@ -14,8 +14,9 @@
* limitations under the License.
*/
-#include "util/util_algorithm.h"
#include "util/util_profiling.h"
+#include "util/util_algorithm.h"
+#include "util/util_foreach.h"
#include "util/util_set.h"
CCL_NAMESPACE_BEGIN
@@ -47,7 +48,8 @@ void Profiler::run()
}
if (cur_shader >= 0 && cur_shader < shader_samples.size()) {
- /* Only consider the active shader during events whose runtime significantly depends on it. */
+ /* Only consider the active shader during events whose runtime significantly depends on it.
+ */
if (((cur_event >= PROFILING_SHADER_EVAL) && (cur_event <= PROFILING_SUBSURFACE)) ||
((cur_event >= PROFILING_CLOSURE_EVAL) &&
(cur_event <= PROFILING_CLOSURE_VOLUME_SAMPLE))) {
diff --git a/intern/cycles/util/util_profiling.h b/intern/cycles/util/util_profiling.h
index f5f500239f2..ceec08ed894 100644
--- a/intern/cycles/util/util_profiling.h
+++ b/intern/cycles/util/util_profiling.h
@@ -19,7 +19,6 @@
#include <atomic>
-#include "util/util_foreach.h"
#include "util/util_map.h"
#include "util/util_thread.h"
#include "util/util_vector.h"
diff --git a/intern/cycles/util/util_progress.h b/intern/cycles/util/util_progress.h
index f05e5b918f3..26534a29dfe 100644
--- a/intern/cycles/util/util_progress.h
+++ b/intern/cycles/util/util_progress.h
@@ -25,8 +25,8 @@
#include "util/util_function.h"
#include "util/util_string.h"
-#include "util/util_time.h"
#include "util/util_thread.h"
+#include "util/util_time.h"
CCL_NAMESPACE_BEGIN
@@ -204,6 +204,8 @@ class Progress {
float get_progress()
{
+ thread_scoped_lock lock(progress_mutex);
+
if (total_pixel_samples > 0) {
return ((float)pixel_samples) / total_pixel_samples;
}
@@ -362,7 +364,8 @@ class Progress {
* It's used to display the sample count if only one tile is active. */
int current_tile_sample;
/* Stores the number of tiles that's already finished.
- * Used to determine whether all but the last tile are finished rendering, in which case the current_tile_sample is displayed. */
+ * Used to determine whether all but the last tile are finished rendering,
+ * in which case the current_tile_sample is displayed. */
int rendered_tiles, denoised_tiles;
double start_time, render_start_time;
diff --git a/intern/cycles/util/util_semaphore.h b/intern/cycles/util/util_semaphore.h
new file mode 100644
index 00000000000..d995b0732b8
--- /dev/null
+++ b/intern/cycles/util/util_semaphore.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2011-2020 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_SEMAPHORE_H__
+#define __UTIL_SEMAPHORE_H__
+
+#include "util/util_thread.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Counting Semaphore
+ *
+ * To restrict concurrent access to a resource to a specified number
+ * of threads. Similar to std::counting_semaphore from C++20. */
+
+class thread_counting_semaphore {
+ public:
+ explicit thread_counting_semaphore(const int count) : count(count)
+ {
+ }
+
+ thread_counting_semaphore(const thread_counting_semaphore &) = delete;
+
+ void acquire()
+ {
+ thread_scoped_lock lock(mutex);
+ while (count == 0) {
+ condition.wait(lock);
+ }
+ count--;
+ }
+
+ void release()
+ {
+ thread_scoped_lock lock(mutex);
+ count++;
+ condition.notify_one();
+ }
+
+ protected:
+ thread_mutex mutex;
+ thread_condition_variable condition;
+ int count;
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_SEMAPHORE_H__ */
diff --git a/intern/cycles/util/util_simd.h b/intern/cycles/util/util_simd.h
index 8fcaadc5f53..de0e3c39f30 100644
--- a/intern/cycles/util/util_simd.h
+++ b/intern/cycles/util/util_simd.h
@@ -45,7 +45,7 @@
# endif
-# if defined(__x86_64__) || defined(__i386__) || defined(_M_X64) || defined(_M_IX86)
+# if defined(__x86_64__) || defined(_M_X64)
# define SIMD_SET_FLUSH_TO_ZERO \
_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); \
_MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
@@ -75,6 +75,28 @@ static struct FalseTy {
}
} False ccl_maybe_unused;
+static struct ZeroTy {
+ __forceinline operator float() const
+ {
+ return 0;
+ }
+ __forceinline operator int() const
+ {
+ return 0;
+ }
+} zero ccl_maybe_unused;
+
+static struct OneTy {
+ __forceinline operator float() const
+ {
+ return 1;
+ }
+ __forceinline operator int() const
+ {
+ return 1;
+ }
+} one ccl_maybe_unused;
+
static struct NegInfTy {
__forceinline operator float() const
{
@@ -97,6 +119,9 @@ static struct PosInfTy {
}
} inf ccl_maybe_unused, pos_inf ccl_maybe_unused;
+static struct StepTy {
+} step ccl_maybe_unused;
+
/* Intrinsics Functions */
# if defined(__BMI__) && defined(__GNUC__)
@@ -563,6 +588,13 @@ __forceinline __m128 _mm_round_ps_emu(__m128 value, const int flags)
# endif /* !(defined(__KERNEL_SSE41__) || defined(__SSE4_1__) || defined(__SSE4_2__)) */
+/* Older GCC versions do not have _mm256_cvtss_f32 yet, so define it ourselves.
+ * _mm256_castps256_ps128 generates no instructions so this is just as efficient. */
+# if defined(__KERNEL_AVX__) || defined(__KERNEL_AVX2__)
+# undef _mm256_cvtss_f32
+# define _mm256_cvtss_f32(a) (_mm_cvtss_f32(_mm256_castps256_ps128(a)))
+# endif
+
# else /* __KERNEL_SSE2__ */
/* This section is for utility functions which operates on non-register data
diff --git a/intern/cycles/util/util_sky_model.cpp b/intern/cycles/util/util_sky_model.cpp
deleted file mode 100644
index 4a6a9f32607..00000000000
--- a/intern/cycles/util/util_sky_model.cpp
+++ /dev/null
@@ -1,349 +0,0 @@
-/*
-This source is published under the following 3-clause BSD license.
-
-Copyright (c) 2012 - 2013, Lukas Hosek and Alexander Wilkie
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * None of the names of the contributors may be used to endorse or promote
- products derived from this software without specific prior written
- permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
-DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-/* ============================================================================
-
-This file is part of a sample implementation of the analytical skylight and
-solar radiance models presented in the SIGGRAPH 2012 paper
-
-
- "An Analytic Model for Full Spectral Sky-Dome Radiance"
-
-and the 2013 IEEE CG&A paper
-
- "Adding a Solar Radiance Function to the Hosek Skylight Model"
-
- both by
-
- Lukas Hosek and Alexander Wilkie
- Charles University in Prague, Czech Republic
-
-
- Version: 1.4a, February 22nd, 2013
-
-Version history:
-
-1.4a February 22nd, 2013
- Removed unnecessary and counter-intuitive solar radius parameters
- from the interface of the colourspace sky dome initialisation functions.
-
-1.4 February 11th, 2013
- Fixed a bug which caused the relative brightness of the solar disc
- and the sky dome to be off by a factor of about 6. The sun was too
- bright: this affected both normal and alien sun scenarios. The
- coefficients of the solar radiance function were changed to fix this.
-
-1.3 January 21st, 2013 (not released to the public)
- Added support for solar discs that are not exactly the same size as
- the terrestrial sun. Also added support for suns with a different
- emission spectrum ("Alien World" functionality).
-
-1.2a December 18th, 2012
- Fixed a mistake and some inaccuracies in the solar radiance function
- explanations found in ArHosekSkyModel.h. The actual source code is
- unchanged compared to version 1.2.
-
-1.2 December 17th, 2012
- Native RGB data and a solar radiance function that matches the turbidity
- conditions were added.
-
-1.1 September 2012
- The coefficients of the spectral model are now scaled so that the output
- is given in physical units: W / (m^-2 * sr * nm). Also, the output of the
- XYZ model is now no longer scaled to the range [0...1]. Instead, it is
- the result of a simple conversion from spectral data via the CIE 2 degree
- standard observer matching functions. Therefore, after multiplication
- with 683 lm / W, the Y channel now corresponds to luminance in lm.
-
-1.0 May 11th, 2012
- Initial release.
-
-
-Please visit http://cgg.mff.cuni.cz/projects/SkylightModelling/ to check if
-an updated version of this code has been published!
-
-============================================================================ */
-
-/*
-
-All instructions on how to use this code are in the accompanying header file.
-
-*/
-
-#include "util/util_sky_model.h"
-#include "util/util_sky_model_data.h"
-
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-
-CCL_NAMESPACE_BEGIN
-
-// Some macro definitions that occur elsewhere in ART, and that have to be
-// replicated to make this a stand-alone module.
-
-#ifndef MATH_PI
-# define MATH_PI 3.141592653589793
-#endif
-
-#ifndef MATH_DEG_TO_RAD
-# define MATH_DEG_TO_RAD (MATH_PI / 180.0)
-#endif
-
-#ifndef DEGREES
-# define DEGREES *MATH_DEG_TO_RAD
-#endif
-
-#ifndef TERRESTRIAL_SOLAR_RADIUS
-# define TERRESTRIAL_SOLAR_RADIUS ((0.51 DEGREES) / 2.0)
-#endif
-
-#ifndef ALLOC
-# define ALLOC(_struct) ((_struct *)malloc(sizeof(_struct)))
-#endif
-
-// internal definitions
-
-typedef const double *ArHosekSkyModel_Dataset;
-typedef const double *ArHosekSkyModel_Radiance_Dataset;
-
-// internal functions
-
-static void ArHosekSkyModel_CookConfiguration(ArHosekSkyModel_Dataset dataset,
- ArHosekSkyModelConfiguration config,
- double turbidity,
- double albedo,
- double solar_elevation)
-{
- const double *elev_matrix;
-
- int int_turbidity = (int)turbidity;
- double turbidity_rem = turbidity - (double)int_turbidity;
-
- solar_elevation = pow(solar_elevation / (MATH_PI / 2.0), (1.0 / 3.0));
-
- // alb 0 low turb
-
- elev_matrix = dataset + (9 * 6 * (int_turbidity - 1));
-
- for (unsigned int i = 0; i < 9; ++i) {
- //(1-t).^3* A1 + 3*(1-t).^2.*t * A2 + 3*(1-t) .* t .^ 2 * A3 + t.^3 * A4;
- config[i] =
- (1.0 - albedo) * (1.0 - turbidity_rem) *
- (pow(1.0 - solar_elevation, 5.0) * elev_matrix[i] +
- 5.0 * pow(1.0 - solar_elevation, 4.0) * solar_elevation * elev_matrix[i + 9] +
- 10.0 * pow(1.0 - solar_elevation, 3.0) * pow(solar_elevation, 2.0) * elev_matrix[i + 18] +
- 10.0 * pow(1.0 - solar_elevation, 2.0) * pow(solar_elevation, 3.0) * elev_matrix[i + 27] +
- 5.0 * (1.0 - solar_elevation) * pow(solar_elevation, 4.0) * elev_matrix[i + 36] +
- pow(solar_elevation, 5.0) * elev_matrix[i + 45]);
- }
-
- // alb 1 low turb
- elev_matrix = dataset + (9 * 6 * 10 + 9 * 6 * (int_turbidity - 1));
- for (unsigned int i = 0; i < 9; ++i) {
- //(1-t).^3* A1 + 3*(1-t).^2.*t * A2 + 3*(1-t) .* t .^ 2 * A3 + t.^3 * A4;
- config[i] +=
- (albedo) * (1.0 - turbidity_rem) *
- (pow(1.0 - solar_elevation, 5.0) * elev_matrix[i] +
- 5.0 * pow(1.0 - solar_elevation, 4.0) * solar_elevation * elev_matrix[i + 9] +
- 10.0 * pow(1.0 - solar_elevation, 3.0) * pow(solar_elevation, 2.0) * elev_matrix[i + 18] +
- 10.0 * pow(1.0 - solar_elevation, 2.0) * pow(solar_elevation, 3.0) * elev_matrix[i + 27] +
- 5.0 * (1.0 - solar_elevation) * pow(solar_elevation, 4.0) * elev_matrix[i + 36] +
- pow(solar_elevation, 5.0) * elev_matrix[i + 45]);
- }
-
- if (int_turbidity == 10)
- return;
-
- // alb 0 high turb
- elev_matrix = dataset + (9 * 6 * (int_turbidity));
- for (unsigned int i = 0; i < 9; ++i) {
- //(1-t).^3* A1 + 3*(1-t).^2.*t * A2 + 3*(1-t) .* t .^ 2 * A3 + t.^3 * A4;
- config[i] +=
- (1.0 - albedo) * (turbidity_rem) *
- (pow(1.0 - solar_elevation, 5.0) * elev_matrix[i] +
- 5.0 * pow(1.0 - solar_elevation, 4.0) * solar_elevation * elev_matrix[i + 9] +
- 10.0 * pow(1.0 - solar_elevation, 3.0) * pow(solar_elevation, 2.0) * elev_matrix[i + 18] +
- 10.0 * pow(1.0 - solar_elevation, 2.0) * pow(solar_elevation, 3.0) * elev_matrix[i + 27] +
- 5.0 * (1.0 - solar_elevation) * pow(solar_elevation, 4.0) * elev_matrix[i + 36] +
- pow(solar_elevation, 5.0) * elev_matrix[i + 45]);
- }
-
- // alb 1 high turb
- elev_matrix = dataset + (9 * 6 * 10 + 9 * 6 * (int_turbidity));
- for (unsigned int i = 0; i < 9; ++i) {
- //(1-t).^3* A1 + 3*(1-t).^2.*t * A2 + 3*(1-t) .* t .^ 2 * A3 + t.^3 * A4;
- config[i] +=
- (albedo) * (turbidity_rem) *
- (pow(1.0 - solar_elevation, 5.0) * elev_matrix[i] +
- 5.0 * pow(1.0 - solar_elevation, 4.0) * solar_elevation * elev_matrix[i + 9] +
- 10.0 * pow(1.0 - solar_elevation, 3.0) * pow(solar_elevation, 2.0) * elev_matrix[i + 18] +
- 10.0 * pow(1.0 - solar_elevation, 2.0) * pow(solar_elevation, 3.0) * elev_matrix[i + 27] +
- 5.0 * (1.0 - solar_elevation) * pow(solar_elevation, 4.0) * elev_matrix[i + 36] +
- pow(solar_elevation, 5.0) * elev_matrix[i + 45]);
- }
-}
-
-static double ArHosekSkyModel_CookRadianceConfiguration(ArHosekSkyModel_Radiance_Dataset dataset,
- double turbidity,
- double albedo,
- double solar_elevation)
-{
- const double *elev_matrix;
-
- int int_turbidity = (int)turbidity;
- double turbidity_rem = turbidity - (double)int_turbidity;
- double res;
- solar_elevation = pow(solar_elevation / (MATH_PI / 2.0), (1.0 / 3.0));
-
- // alb 0 low turb
- elev_matrix = dataset + (6 * (int_turbidity - 1));
- //(1-t).^3* A1 + 3*(1-t).^2.*t * A2 + 3*(1-t) .* t .^ 2 * A3 + t.^3 * A4;
- res = (1.0 - albedo) * (1.0 - turbidity_rem) *
- (pow(1.0 - solar_elevation, 5.0) * elev_matrix[0] +
- 5.0 * pow(1.0 - solar_elevation, 4.0) * solar_elevation * elev_matrix[1] +
- 10.0 * pow(1.0 - solar_elevation, 3.0) * pow(solar_elevation, 2.0) * elev_matrix[2] +
- 10.0 * pow(1.0 - solar_elevation, 2.0) * pow(solar_elevation, 3.0) * elev_matrix[3] +
- 5.0 * (1.0 - solar_elevation) * pow(solar_elevation, 4.0) * elev_matrix[4] +
- pow(solar_elevation, 5.0) * elev_matrix[5]);
-
- // alb 1 low turb
- elev_matrix = dataset + (6 * 10 + 6 * (int_turbidity - 1));
- //(1-t).^3* A1 + 3*(1-t).^2.*t * A2 + 3*(1-t) .* t .^ 2 * A3 + t.^3 * A4;
- res += (albedo) * (1.0 - turbidity_rem) *
- (pow(1.0 - solar_elevation, 5.0) * elev_matrix[0] +
- 5.0 * pow(1.0 - solar_elevation, 4.0) * solar_elevation * elev_matrix[1] +
- 10.0 * pow(1.0 - solar_elevation, 3.0) * pow(solar_elevation, 2.0) * elev_matrix[2] +
- 10.0 * pow(1.0 - solar_elevation, 2.0) * pow(solar_elevation, 3.0) * elev_matrix[3] +
- 5.0 * (1.0 - solar_elevation) * pow(solar_elevation, 4.0) * elev_matrix[4] +
- pow(solar_elevation, 5.0) * elev_matrix[5]);
- if (int_turbidity == 10)
- return res;
-
- // alb 0 high turb
- elev_matrix = dataset + (6 * (int_turbidity));
- //(1-t).^3* A1 + 3*(1-t).^2.*t * A2 + 3*(1-t) .* t .^ 2 * A3 + t.^3 * A4;
- res += (1.0 - albedo) * (turbidity_rem) *
- (pow(1.0 - solar_elevation, 5.0) * elev_matrix[0] +
- 5.0 * pow(1.0 - solar_elevation, 4.0) * solar_elevation * elev_matrix[1] +
- 10.0 * pow(1.0 - solar_elevation, 3.0) * pow(solar_elevation, 2.0) * elev_matrix[2] +
- 10.0 * pow(1.0 - solar_elevation, 2.0) * pow(solar_elevation, 3.0) * elev_matrix[3] +
- 5.0 * (1.0 - solar_elevation) * pow(solar_elevation, 4.0) * elev_matrix[4] +
- pow(solar_elevation, 5.0) * elev_matrix[5]);
-
- // alb 1 high turb
- elev_matrix = dataset + (6 * 10 + 6 * (int_turbidity));
- //(1-t).^3* A1 + 3*(1-t).^2.*t * A2 + 3*(1-t) .* t .^ 2 * A3 + t.^3 * A4;
- res += (albedo) * (turbidity_rem) *
- (pow(1.0 - solar_elevation, 5.0) * elev_matrix[0] +
- 5.0 * pow(1.0 - solar_elevation, 4.0) * solar_elevation * elev_matrix[1] +
- 10.0 * pow(1.0 - solar_elevation, 3.0) * pow(solar_elevation, 2.0) * elev_matrix[2] +
- 10.0 * pow(1.0 - solar_elevation, 2.0) * pow(solar_elevation, 3.0) * elev_matrix[3] +
- 5.0 * (1.0 - solar_elevation) * pow(solar_elevation, 4.0) * elev_matrix[4] +
- pow(solar_elevation, 5.0) * elev_matrix[5]);
- return res;
-}
-
-static double ArHosekSkyModel_GetRadianceInternal(ArHosekSkyModelConfiguration configuration,
- double theta,
- double gamma)
-{
- const double expM = exp(configuration[4] * gamma);
- const double rayM = cos(gamma) * cos(gamma);
- const double mieM =
- (1.0 + cos(gamma) * cos(gamma)) /
- pow((1.0 + configuration[8] * configuration[8] - 2.0 * configuration[8] * cos(gamma)), 1.5);
- const double zenith = sqrt(cos(theta));
-
- return (1.0 + configuration[0] * exp(configuration[1] / (cos(theta) + 0.01))) *
- (configuration[2] + configuration[3] * expM + configuration[5] * rayM +
- configuration[6] * mieM + configuration[7] * zenith);
-}
-
-void arhosekskymodelstate_free(ArHosekSkyModelState *state)
-{
- free(state);
-}
-
-double arhosekskymodel_radiance(ArHosekSkyModelState *state,
- double theta,
- double gamma,
- double wavelength)
-{
- int low_wl = (int)((wavelength - 320.0) / 40.0);
-
- if (low_wl < 0 || low_wl >= 11)
- return 0.0;
-
- double interp = fmod((wavelength - 320.0) / 40.0, 1.0);
-
- double val_low = ArHosekSkyModel_GetRadianceInternal(state->configs[low_wl], theta, gamma) *
- state->radiances[low_wl] * state->emission_correction_factor_sky[low_wl];
-
- if (interp < 1e-6)
- return val_low;
-
- double result = (1.0 - interp) * val_low;
-
- if (low_wl + 1 < 11) {
- result += interp *
- ArHosekSkyModel_GetRadianceInternal(state->configs[low_wl + 1], theta, gamma) *
- state->radiances[low_wl + 1] * state->emission_correction_factor_sky[low_wl + 1];
- }
-
- return result;
-}
-
-// xyz and rgb versions
-
-ArHosekSkyModelState *arhosek_xyz_skymodelstate_alloc_init(const double turbidity,
- const double albedo,
- const double elevation)
-{
- ArHosekSkyModelState *state = ALLOC(ArHosekSkyModelState);
-
- state->solar_radius = TERRESTRIAL_SOLAR_RADIUS;
- state->turbidity = turbidity;
- state->albedo = albedo;
- state->elevation = elevation;
-
- for (unsigned int channel = 0; channel < 3; ++channel) {
- ArHosekSkyModel_CookConfiguration(
- datasetsXYZ[channel], state->configs[channel], turbidity, albedo, elevation);
-
- state->radiances[channel] = ArHosekSkyModel_CookRadianceConfiguration(
- datasetsXYZRad[channel], turbidity, albedo, elevation);
- }
-
- return state;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_sky_model.h b/intern/cycles/util/util_sky_model.h
deleted file mode 100644
index 84340614b2c..00000000000
--- a/intern/cycles/util/util_sky_model.h
+++ /dev/null
@@ -1,429 +0,0 @@
-/*
-This source is published under the following 3-clause BSD license.
-
-Copyright (c) 2012 - 2013, Lukas Hosek and Alexander Wilkie
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * None of the names of the contributors may be used to endorse or promote
- products derived from this software without specific prior written
- permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
-DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-/* ============================================================================
-
-This file is part of a sample implementation of the analytical skylight and
-solar radiance models presented in the SIGGRAPH 2012 paper
-
-
- "An Analytic Model for Full Spectral Sky-Dome Radiance"
-
-and the 2013 IEEE CG&A paper
-
- "Adding a Solar Radiance Function to the Hosek Skylight Model"
-
- both by
-
- Lukas Hosek and Alexander Wilkie
- Charles University in Prague, Czech Republic
-
-
- Version: 1.4a, February 22nd, 2013
-
-Version history:
-
-1.4a February 22nd, 2013
- Removed unnecessary and counter-intuitive solar radius parameters
- from the interface of the colourspace sky dome initialisation functions.
-
-1.4 February 11th, 2013
- Fixed a bug which caused the relative brightness of the solar disc
- and the sky dome to be off by a factor of about 6. The sun was too
- bright: this affected both normal and alien sun scenarios. The
- coefficients of the solar radiance function were changed to fix this.
-
-1.3 January 21st, 2013 (not released to the public)
- Added support for solar discs that are not exactly the same size as
- the terrestrial sun. Also added support for suns with a different
- emission spectrum ("Alien World" functionality).
-
-1.2a December 18th, 2012
- Fixed a mistake and some inaccuracies in the solar radiance function
- explanations found in ArHosekSkyModel.h. The actual source code is
- unchanged compared to version 1.2.
-
-1.2 December 17th, 2012
- Native RGB data and a solar radiance function that matches the turbidity
- conditions were added.
-
-1.1 September 2012
- The coefficients of the spectral model are now scaled so that the output
- is given in physical units: W / (m^-2 * sr * nm). Also, the output of the
- XYZ model is now no longer scaled to the range [0...1]. Instead, it is
- the result of a simple conversion from spectral data via the CIE 2 degree
- standard observer matching functions. Therefore, after multiplication
- with 683 lm / W, the Y channel now corresponds to luminance in lm.
-
-1.0 May 11th, 2012
- Initial release.
-
-
-Please visit http://cgg.mff.cuni.cz/projects/SkylightModelling/ to check if
-an updated version of this code has been published!
-
-============================================================================ */
-
-/*
-
-This code is taken from ART, a rendering research system written in a
-mix of C99 / Objective C. Since ART is not a small system and is intended to
-be inter-operable with other libraries, and since C does not have namespaces,
-the structures and functions in ART all have to have somewhat wordy
-canonical names that begin with Ar.../ar..., like those seen in this example.
-
-Usage information:
-==================
-
-
-Model initialisation
---------------------
-
-A separate ArHosekSkyModelState has to be maintained for each spectral
-band you want to use the model for. So in a renderer with 'num_channels'
-bands, you would need something like
-
- ArHosekSkyModelState * skymodel_state[num_channels];
-
-You then have to allocate and initialise these states. In the following code
-snippet, we assume that 'albedo' is defined as
-
- double albedo[num_channels];
-
-with a ground albedo value between [0,1] for each channel. The solar elevation
-is given in radians.
-
- for ( unsigned int i = 0; i < num_channels; i++ )
- skymodel_state[i] =
- arhosekskymodelstate_alloc_init(
- turbidity,
- albedo[i],
- solarElevation
- );
-
-Note that starting with version 1.3, there is also a second initialisation
-function which generates skydome states for different solar emission spectra
-and solar radii: 'arhosekskymodelstate_alienworld_alloc_init()'.
-
-See the notes about the "Alien World" functionality provided further down for a
-discussion of the usefulness and limits of that second initalisation function.
-Sky model states that have been initialized with either function behave in a
-completely identical fashion during use and cleanup.
-
-Using the model to generate skydome samples
--------------------------------------------
-
-Generating a skydome radiance spectrum "skydome_result" for a given location
-on the skydome determined via the angles theta and gamma works as follows:
-
- double skydome_result[num_channels];
-
- for ( unsigned int i = 0; i < num_channels; i++ )
- skydome_result[i] =
- arhosekskymodel_radiance(
- skymodel_state[i],
- theta,
- gamma,
- channel_center[i]
- );
-
-The variable "channel_center" is assumed to hold the channel center wavelengths
-for each of the num_channels samples of the spectrum we are building.
-
-
-Cleanup after use
------------------
-
-After rendering is complete, the content of the sky model states should be
-disposed of via
-
- for ( unsigned int i = 0; i < num_channels; i++ )
- arhosekskymodelstate_free( skymodel_state[i] );
-
-
-CIE XYZ Version of the Model
-----------------------------
-
-Usage of the CIE XYZ version of the model is exactly the same, except that
-num_channels is of course always 3, and that ArHosekTristimSkyModelState and
-arhosek_tristim_skymodel_radiance() have to be used instead of their spectral
-counterparts.
-
-RGB Version of the Model
-------------------------
-
-The RGB version uses sRGB primaries with a linear gamma ramp. The same set of
-functions as with the XYZ data is used, except the model is initialized
-by calling arhosek_rgb_skymodelstate_alloc_init.
-
-Solar Radiance Function
------------------------
-
-For each position on the solar disc, this function returns the entire radiance
-one sees - direct emission, as well as in-scattered light in the area of the
-solar disc. The latter is important for low solar elevations - nice images of
-the setting sun would not be possible without this. This is also the reason why
-this function, just like the regular sky dome model evaluation function, needs
-access to the sky dome data structures, as these provide information on
-in-scattered radiance.
-
-CAVEAT #1: in this release, this function is only provided in spectral form!
- RGB/XYZ versions to follow at a later date.
-
-CAVEAT #2: (fixed from release 1.3 onwards)
-
-CAVEAT #3: limb darkening renders the brightness of the solar disc
- inhomogeneous even for high solar elevations - only taking a single
- sample at the centre of the sun will yield an incorrect power
- estimate for the solar disc! Always take multiple random samples
- across the entire solar disc to estimate its power!
-
-CAVEAT #4: in this version, the limb darkening calculations still use a fairly
- computationally expensive 5th order polynomial that was directly
- taken from astronomical literature. For the purposes of Computer
- Graphics, this is needlessly accurate, though, and will be replaced
- by a cheaper approximation in a future release.
-
-"Alien World" functionality
----------------------------
-
-The Hosek sky model can be used to roughly (!) predict the appearance of
-outdoor scenes on earth-like planets, i.e. planets of a similar size and
-atmospheric make-up. Since the spectral version of our model predicts sky dome
-luminance patterns and solar radiance independently for each waveband, and
-since the intensity of each waveband is solely dependent on the input radiance
-from the star that the world in question is orbiting, it is trivial to re-scale
-the wavebands to match a different star radiance.
-
-At least in theory, the spectral version of the model has always been capable
-of this sort of thing, and the actual sky dome and solar radiance models were
-actually not altered at all in this release. All we did was to add some support
-functionality for doing this more easily with the existing data and functions,
-and to add some explanations.
-
-Just use 'arhosekskymodelstate_alienworld_alloc_init()' to initialise the sky
-model states (you will have to provide values for star temperature and solar
-intensity compared to the terrestrial sun), and do everything else as you
-did before.
-
-CAVEAT #1: we assume the emission of the star that illuminates the alien world
- to be a perfect blackbody emission spectrum. This is never entirely
- realistic - real star emission spectra are considerably more complex
- than this, mainly due to absorption effects in the outer layers of
- stars. However, blackbody spectra are a reasonable first assumption
- in a usage scenario like this, where 100% accuracy is simply not
- necessary: for rendering purposes, there are likely no visible
- differences between a highly accurate solution based on a more
- involved simulation, and this approximation.
-
-CAVEAT #2: we always use limb darkening data from our own sun to provide this
- "appearance feature", even for suns of strongly different
- temperature. Which is presumably not very realistic, but (as with
- the unaltered blackbody spectrum from caveat #1) probably not a bad
- first guess, either. If you need more accuracy than we provide here,
- please make inquiries with a friendly astro-physicst of your choice.
-
-CAVEAT #3: you have to provide a value for the solar intensity of the star
- which illuminates the alien world. For this, please bear in mind
- that there is very likely a comparatively tight range of absolute
- solar irradiance values for which an earth-like planet with an
- atmosphere like the one we assume in our model can exist in the
- first place!
-
- Too much irradiance, and the atmosphere probably boils off into
- space, too little, it freezes. Which means that stars of
- considerably different emission colour than our sun will have to be
- fairly different in size from it, to still provide a reasonable and
- inhabitable amount of irradiance. Red stars will need to be much
- larger than our sun, while white or blue stars will have to be
- comparatively tiny. The initialisation function handles this and
- computes a plausible solar radius for a given emission spectrum. In
- terms of absolute radiometric values, you should probably not stray
- all too far from a solar intensity value of 1.0.
-
-CAVEAT #4: although we now support different solar radii for the actual solar
- disc, the sky dome luminance patterns are *not* parameterised by
- this value - i.e. the patterns stay exactly the same for different
- solar radii! Which is of course not correct. But in our experience,
- solar discs up to several degrees in diameter (! - our own sun is
- half a degree across) do not cause the luminance patterns on the sky
- to change perceptibly. The reason we know this is that we initially
- used unrealistically large suns in our brute force path tracer, in
- order to improve convergence speeds (which in the beginning were
- abysmal). Later, we managed to do the reference renderings much
- faster even with realistically small suns, and found that there was
- no real difference in skydome appearance anyway.
- Conclusion: changing the solar radius should not be over-done, so
- close orbits around red supergiants are a no-no. But for the
- purposes of getting a fairly credible first impression of what an
- alien world with a reasonably sized sun would look like, what we are
- doing here is probably still o.k.
-
-HINT #1: if you want to model the sky of an earth-like planet that orbits
- a binary star, just super-impose two of these models with solar
- intensity of ~0.5 each, and closely spaced solar positions. Light is
- additive, after all. Tattooine, here we come... :-)
-
- P.S. according to Star Wars canon, Tattooine orbits a binary
- that is made up of a G and K class star, respectively.
- So ~5500K and ~4200K should be good first guesses for their
- temperature. Just in case you were wondering, after reading the
- previous paragraph.
-*/
-
-CCL_NAMESPACE_BEGIN
-
-#ifndef _SKY_MODEL_H_
-# define _SKY_MODEL_H_
-
-typedef double ArHosekSkyModelConfiguration[9];
-
-// Spectral version of the model
-
-/* ----------------------------------------------------------------------------
-
- ArHosekSkyModelState struct
- ---------------------------
-
- This struct holds the pre-computation data for one particular albedo value.
- Most fields are self-explanatory, but users should never directly
- manipulate any of them anyway. The only consistent way to manipulate such
- structs is via the functions 'arhosekskymodelstate_alloc_init' and
- 'arhosekskymodelstate_free'.
-
- 'emission_correction_factor_sky'
- 'emission_correction_factor_sun'
-
- The original model coefficients were fitted against the emission of
- our local sun. If a different solar emission is desired (i.e. if the
- model is being used to predict skydome appearance for an earth-like
- planet that orbits a different star), these correction factors, which
- are determined during the alloc_init step, are applied to each waveband
- separately (they default to 1.0 in normal usage). This is the simplest
- way to retrofit this sort of capability to the existing model. The
- different factors for sky and sun are needed since the solar disc may
- be of a different size compared to the terrestrial sun.
-
----------------------------------------------------------------------------- */
-
-typedef struct ArHosekSkyModelState {
- ArHosekSkyModelConfiguration configs[11];
- double radiances[11];
- double turbidity;
- double solar_radius;
- double emission_correction_factor_sky[11];
- double emission_correction_factor_sun[11];
- double albedo;
- double elevation;
-} ArHosekSkyModelState;
-
-/* ----------------------------------------------------------------------------
-
- arhosekskymodelstate_alloc_init() function
- ------------------------------------------
-
- Initialises an ArHosekSkyModelState struct for a terrestrial setting.
-
----------------------------------------------------------------------------- */
-
-ArHosekSkyModelState *arhosekskymodelstate_alloc_init(const double solar_elevation,
- const double atmospheric_turbidity,
- const double ground_albedo);
-
-/* ----------------------------------------------------------------------------
-
- arhosekskymodelstate_alienworld_alloc_init() function
- -----------------------------------------------------
-
- Initialises an ArHosekSkyModelState struct for an "alien world" setting
- with a sun of a surface temperature given in 'kelvin'. The parameter
- 'solar_intensity' controls the overall brightness of the sky, relative
- to the solar irradiance on Earth. A value of 1.0 yields a sky dome that
- is, on average over the wavelenghts covered in the model (!), as bright
- as the terrestrial sky in radiometric terms.
-
- Which means that the solar radius has to be adjusted, since the
- emissivity of a solar surface with a given temperature is more or less
- fixed. So hotter suns have to be smaller to be equally bright as the
- terrestrial sun, while cooler suns have to be larger. Note that there are
- limits to the validity of the luminance patterns of the underlying model:
- see the discussion above for more on this. In particular, an alien sun with
- a surface temperature of only 2000 Kelvin has to be very large if it is
- to be as bright as the terrestrial sun - so large that the luminance
- patterns are no longer a really good fit in that case.
-
- If you need information about the solar radius that the model computes
- for a given temperature (say, for light source sampling purposes), you
- have to query the 'solar_radius' variable of the sky model state returned
- *after* running this function.
-
----------------------------------------------------------------------------- */
-
-ArHosekSkyModelState *arhosekskymodelstate_alienworld_alloc_init(
- const double solar_elevation,
- const double solar_intensity,
- const double solar_surface_temperature_kelvin,
- const double atmospheric_turbidity,
- const double ground_albedo);
-
-void arhosekskymodelstate_free(ArHosekSkyModelState *state);
-
-double arhosekskymodel_radiance(ArHosekSkyModelState *state,
- double theta,
- double gamma,
- double wavelength);
-
-// CIE XYZ and RGB versions
-
-ArHosekSkyModelState *arhosek_xyz_skymodelstate_alloc_init(const double turbidity,
- const double albedo,
- const double elevation);
-
-ArHosekSkyModelState *arhosek_rgb_skymodelstate_alloc_init(const double turbidity,
- const double albedo,
- const double elevation);
-
-double arhosek_tristim_skymodel_radiance(ArHosekSkyModelState *state,
- double theta,
- double gamma,
- int channel);
-
-// Delivers the complete function: sky + sun, including limb darkening.
-// Please read the above description before using this - there are several
-// caveats!
-
-double arhosekskymodel_solar_radiance(ArHosekSkyModelState *state,
- double theta,
- double gamma,
- double wavelength);
-
-#endif // _SKY_MODEL_H_
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_sky_model_data.h b/intern/cycles/util/util_sky_model_data.h
deleted file mode 100644
index a2a3935eb84..00000000000
--- a/intern/cycles/util/util_sky_model_data.h
+++ /dev/null
@@ -1,3847 +0,0 @@
-/*
-This source is published under the following 3-clause BSD license.
-
-Copyright (c) 2012 - 2013, Lukas Hosek and Alexander Wilkie
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * None of the names of the contributors may be used to endorse or promote
- products derived from this software without specific prior written
- permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
-DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-/* ============================================================================
-
-This file is part of a sample implementation of the analytical skylight and
-solar radiance models presented in the SIGGRAPH 2012 paper
-
-
- "An Analytic Model for Full Spectral Sky-Dome Radiance"
-
-and the 2013 IEEE CG&A paper
-
- "Adding a Solar Radiance Function to the Hosek Skylight Model"
-
- both by
-
- Lukas Hosek and Alexander Wilkie
- Charles University in Prague, Czech Republic
-
-
- Version: 1.4a, February 22nd, 2013
-
-Version history:
-
-1.4a February 22nd, 2013
- Removed unnecessary and counter-intuitive solar radius parameters
- from the interface of the colourspace sky dome initialisation functions.
-
-1.4 February 11th, 2013
- Fixed a bug which caused the relative brightness of the solar disc
- and the sky dome to be off by a factor of about 6. The sun was too
- bright: this affected both normal and alien sun scenarios. The
- coefficients of the solar radiance function were changed to fix this.
-
-1.3 January 21st, 2013 (not released to the public)
- Added support for solar discs that are not exactly the same size as
- the terrestrial sun. Also added support for suns with a different
- emission spectrum ("Alien World" functionality).
-
-1.2a December 18th, 2012
- Fixed a mistake and some inaccuracies in the solar radiance function
- explanations found in ArHosekSkyModel.h. The actual source code is
- unchanged compared to version 1.2.
-
-1.2 December 17th, 2012
- Native RGB data and a solar radiance function that matches the turbidity
- conditions were added.
-
-1.1 September 2012
- The coefficients of the spectral model are now scaled so that the output
- is given in physical units: W / (m^-2 * sr * nm). Also, the output of the
- XYZ model is now no longer scaled to the range [0...1]. Instead, it is
- the result of a simple conversion from spectral data via the CIE 2 degree
- standard observer matching functions. Therefore, after multiplication
- with 683 lm / W, the Y channel now corresponds to luminance in lm.
-
-1.0 May 11th, 2012
- Initial release.
-
-
-Please visit http://cgg.mff.cuni.cz/projects/SkylightModelling/ to check if
-an updated version of this code has been published!
-
-============================================================================ */
-
-CCL_NAMESPACE_BEGIN
-
-/*
-
-This file contains the coefficient data for the XYZ colour space version of
-the model.
-
-*/
-
-// Uses Sep 9 pattern / Aug 23 mean dataset
-
-static const double datasetXYZ1[] = {
- // albedo 0, turbidity 1
- -1.117001e+000,
- -1.867262e-001,
- -1.113505e+001,
- 1.259865e+001,
- -3.937339e-002,
- 1.167571e+000,
- 7.100686e-003,
- 3.592678e+000,
- 6.083296e-001,
- -1.152006e+000,
- -1.926669e-001,
- 6.152049e+000,
- -4.770802e+000,
- -8.704701e-002,
- 7.483626e-001,
- 3.372718e-002,
- 4.464592e+000,
- 4.036546e-001,
- -1.072371e+000,
- -2.696632e-001,
- 2.816168e-001,
- 1.820571e+000,
- -3.742666e-001,
- 2.080607e+000,
- -7.675295e-002,
- -2.835366e+000,
- 1.129329e+000,
- -1.109935e+000,
- -1.532764e-001,
- 1.198787e+000,
- -9.015183e-001,
- 5.173015e-003,
- 5.749178e-001,
- 1.075633e-001,
- 4.387949e+000,
- 2.650413e-001,
- -1.052297e+000,
- -2.229452e-001,
- 1.952347e+000,
- 5.727205e-001,
- -4.885070e+000,
- 1.984016e+000,
- -1.106197e-001,
- -4.898361e-001,
- 8.907873e-001,
- -1.070108e+000,
- -1.600465e-001,
- 1.593886e+000,
- -4.479251e-005,
- -3.306541e+000,
- 9.390193e-001,
- 9.513168e-002,
- 2.343583e+000,
- 5.335404e-001,
- // albedo 0, turbidity 2
- -1.113253e+000,
- -1.699600e-001,
- -1.038822e+001,
- 1.137513e+001,
- -4.040911e-002,
- 1.037455e+000,
- 4.991792e-002,
- 4.801919e+000,
- 6.302710e-001,
- -1.135747e+000,
- -1.678594e-001,
- 4.970755e+000,
- -4.430230e+000,
- -6.657408e-002,
- 3.636161e-001,
- 1.558009e-001,
- 6.013370e+000,
- 3.959601e-001,
- -1.095892e+000,
- -2.732595e-001,
- 7.666496e-001,
- 1.350731e+000,
- -4.401401e-001,
- 2.470135e+000,
- -1.707929e-001,
- -3.260793e+000,
- 1.170337e+000,
- -1.073668e+000,
- -2.603929e-002,
- -1.944589e-001,
- 4.575207e-001,
- 6.878164e-001,
- -1.390770e-001,
- 3.690299e-001,
- 7.885781e+000,
- 1.877694e-001,
- -1.070091e+000,
- -2.798957e-001,
- 2.338478e+000,
- -2.647221e+000,
- -7.387808e+000,
- 2.329210e+000,
- -1.644639e-001,
- -2.003710e+000,
- 9.874527e-001,
- -1.067120e+000,
- -1.418866e-001,
- 1.254090e+000,
- 6.053048e+000,
- -2.918892e+000,
- 5.322812e-001,
- 1.613053e-001,
- 3.018161e+000,
- 5.274090e-001,
- // albedo 0, turbidity 3
- -1.129483e+000,
- -1.890619e-001,
- -9.065101e+000,
- 9.659923e+000,
- -3.607819e-002,
- 8.314359e-001,
- 8.181661e-002,
- 4.768868e+000,
- 6.339777e-001,
- -1.146420e+000,
- -1.883579e-001,
- 3.309173e+000,
- -3.127882e+000,
- -6.938176e-002,
- 3.987113e-001,
- 1.400581e-001,
- 6.283042e+000,
- 5.267076e-001,
- -1.128348e+000,
- -2.641305e-001,
- 1.223176e+000,
- 5.514952e-002,
- -3.490649e-001,
- 1.997784e+000,
- -4.123709e-002,
- -2.251251e+000,
- 9.483466e-001,
- -1.025820e+000,
- 1.404690e-002,
- -1.187406e+000,
- 2.729900e+000,
- 5.877588e-001,
- -2.761140e-001,
- 4.602633e-001,
- 8.305125e+000,
- 3.945001e-001,
- -1.083957e+000,
- -2.606679e-001,
- 2.207108e+000,
- -7.202803e+000,
- -5.968103e+000,
- 2.129455e+000,
- -7.789512e-002,
- -1.137688e+000,
- 8.871769e-001,
- -1.062465e+000,
- -1.512189e-001,
- 1.042881e+000,
- 1.427839e+001,
- -4.242214e+000,
- 4.038100e-001,
- 1.997780e-001,
- 2.814449e+000,
- 5.803196e-001,
- // albedo 0, turbidity 4
- -1.175099e+000,
- -2.410789e-001,
- -1.108587e+001,
- 1.133404e+001,
- -1.819300e-002,
- 6.772942e-001,
- 9.605043e-002,
- 4.231166e+000,
- 6.239972e-001,
- -1.224207e+000,
- -2.883527e-001,
- 3.002206e+000,
- -2.649612e+000,
- -4.795418e-002,
- 4.984398e-001,
- 3.251434e-002,
- 4.851611e+000,
- 6.551019e-001,
- -1.136955e+000,
- -2.423048e-001,
- 1.058823e+000,
- -2.489236e-001,
- -2.462179e-001,
- 1.933140e+000,
- 9.106828e-002,
- -1.905869e-001,
- 8.171065e-001,
- -1.014535e+000,
- -8.262500e-003,
- -1.448017e+000,
- 2.295788e+000,
- 3.510334e-001,
- -1.477418e+000,
- 5.432449e-001,
- 5.762796e+000,
- 4.908751e-001,
- -1.070666e+000,
- -2.379780e-001,
- 1.844589e+000,
- -5.442448e+000,
- -4.012768e+000,
- 2.945275e+000,
- 9.854725e-003,
- 8.455959e-002,
- 8.145030e-001,
- -1.071525e+000,
- -1.777132e-001,
- 8.076590e-001,
- 9.925865e+000,
- -3.324623e+000,
- -6.367437e-001,
- 2.844581e-001,
- 2.248384e+000,
- 6.544022e-001,
- // albedo 0, turbidity 5
- -1.218818e+000,
- -2.952382e-001,
- -1.345975e+001,
- 1.347153e+001,
- -6.814585e-003,
- 5.079068e-001,
- 1.197230e-001,
- 3.776949e+000,
- 5.836961e-001,
- -1.409868e+000,
- -5.114330e-001,
- 2.776539e+000,
- -2.039001e+000,
- -2.673769e-002,
- 4.145288e-001,
- 7.829342e-004,
- 2.275883e+000,
- 6.629691e-001,
- -1.069151e+000,
- -9.434247e-002,
- 7.293972e-001,
- -1.222473e+000,
- -1.533461e-001,
- 2.160357e+000,
- 4.626837e-002,
- 3.852415e+000,
- 8.593570e-001,
- -1.021306e+000,
- -1.149551e-001,
- -1.108414e+000,
- 4.178343e+000,
- 4.013665e-001,
- -2.222814e+000,
- 6.929462e-001,
- 1.392652e+000,
- 4.401662e-001,
- -1.074251e+000,
- -2.224002e-001,
- 1.372356e+000,
- -8.858704e+000,
- -3.922660e+000,
- 3.020018e+000,
- -1.458724e-002,
- 1.511186e+000,
- 8.288064e-001,
- -1.062048e+000,
- -1.526582e-001,
- 4.921067e-001,
- 1.485522e+001,
- -3.229936e+000,
- -8.426604e-001,
- 3.916243e-001,
- 2.678994e+000,
- 6.689264e-001,
- // albedo 0, turbidity 6
- -1.257023e+000,
- -3.364700e-001,
- -1.527795e+001,
- 1.504223e+001,
- 2.717715e-003,
- 3.029910e-001,
- 1.636851e-001,
- 3.561663e+000,
- 5.283161e-001,
- -1.635124e+000,
- -7.329993e-001,
- 3.523939e+000,
- -2.566337e+000,
- -1.902543e-002,
- 5.505483e-001,
- -6.242176e-002,
- 1.065992e+000,
- 6.654236e-001,
- -9.295823e-001,
- 4.845834e-002,
- -2.992990e-001,
- -2.001327e-001,
- -8.019339e-002,
- 1.807806e+000,
- 9.020277e-002,
- 5.095372e+000,
- 8.639936e-001,
- -1.093740e+000,
- -2.148608e-001,
- -5.216240e-001,
- 2.119777e+000,
- 9.506454e-002,
- -1.831439e+000,
- 6.961204e-001,
- 1.102084e-001,
- 4.384319e-001,
- -1.044181e+000,
- -1.849257e-001,
- 9.071246e-001,
- -4.648901e+000,
- -2.279385e+000,
- 2.356502e+000,
- -4.169147e-002,
- 1.932557e+000,
- 8.296550e-001,
- -1.061451e+000,
- -1.458745e-001,
- 2.952267e-001,
- 8.967214e+000,
- -3.726228e+000,
- -5.022316e-001,
- 5.684877e-001,
- 3.102347e+000,
- 6.658443e-001,
- // albedo 0, turbidity 7
- -1.332391e+000,
- -4.127769e-001,
- -9.328643e+000,
- 9.046194e+000,
- 3.457775e-003,
- 3.377425e-001,
- 1.530909e-001,
- 3.301209e+000,
- 4.997917e-001,
- -1.932002e+000,
- -9.947777e-001,
- -2.042329e+000,
- 3.586940e+000,
- -5.642182e-002,
- 8.130478e-001,
- -8.195988e-002,
- 1.118294e-001,
- 5.617231e-001,
- -8.707374e-001,
- 1.286999e-001,
- 1.820054e+000,
- -4.674706e+000,
- 3.317471e-003,
- 5.919018e-001,
- 1.975278e-001,
- 6.686519e+000,
- 9.631727e-001,
- -1.070378e+000,
- -3.030579e-001,
- -9.041938e-001,
- 6.200201e+000,
- 1.232207e-001,
- -3.650628e-001,
- 5.029403e-001,
- -2.903162e+000,
- 3.811408e-001,
- -1.063035e+000,
- -1.637545e-001,
- 5.853072e-001,
- -7.889906e+000,
- -1.200641e+000,
- 1.035018e+000,
- 1.192093e-001,
- 3.267054e+000,
- 8.416151e-001,
- -1.053655e+000,
- -1.562286e-001,
- 2.423683e-001,
- 1.128575e+001,
- -4.363262e+000,
- -7.314160e-002,
- 5.642088e-001,
- 2.514023e+000,
- 6.670457e-001,
- // albedo 0, turbidity 8
- -1.366112e+000,
- -4.718287e-001,
- -7.876222e+000,
- 7.746900e+000,
- -9.182309e-003,
- 4.716076e-001,
- 8.320252e-002,
- 3.165603e+000,
- 5.392334e-001,
- -2.468204e+000,
- -1.336340e+000,
- -5.386723e+000,
- 7.072672e+000,
- -8.329266e-002,
- 8.636876e-001,
- -1.978177e-002,
- -1.326218e-001,
- 2.979222e-001,
- -9.653522e-001,
- -2.373416e-002,
- 1.810250e+000,
- -6.467262e+000,
- 1.410706e-001,
- -4.753717e-001,
- 3.003095e-001,
- 6.551163e+000,
- 1.151083e+000,
- -8.943186e-001,
- -2.487152e-001,
- -2.308960e-001,
- 8.512648e+000,
- 1.298402e-001,
- 1.034705e+000,
- 2.303509e-001,
- -3.924095e+000,
- 2.982717e-001,
- -1.146999e+000,
- -2.318784e-001,
- 8.992419e-002,
- -9.933614e+000,
- -8.860920e-001,
- -3.071656e-002,
- 2.852012e-001,
- 3.046199e+000,
- 8.599001e-001,
- -1.032399e+000,
- -1.645145e-001,
- 2.683599e-001,
- 1.327701e+001,
- -4.407670e+000,
- 7.709869e-002,
- 4.951727e-001,
- 1.957277e+000,
- 6.630943e-001,
- // albedo 0, turbidity 9
- -1.469070e+000,
- -6.135092e-001,
- -6.506263e+000,
- 6.661315e+000,
- -3.835383e-002,
- 7.150413e-001,
- 7.784318e-003,
- 2.820577e+000,
- 6.756784e-001,
- -2.501583e+000,
- -1.247404e+000,
- -1.523462e+001,
- 1.633191e+001,
- -1.204803e-002,
- 5.896471e-001,
- -2.002023e-002,
- 1.144647e+000,
- 6.177874e-002,
- -2.438672e+000,
- -1.127291e+000,
- 5.731172e+000,
- -1.021350e+001,
- 6.165610e-002,
- -7.752641e-001,
- 4.708254e-001,
- 4.176847e+000,
- 1.200881e+000,
- -1.513427e-001,
- 9.792731e-002,
- -1.612349e+000,
- 9.814289e+000,
- 5.188921e-002,
- 1.716403e+000,
- -7.039255e-002,
- -2.815115e+000,
- 3.291874e-001,
- -1.318511e+000,
- -3.650554e-001,
- 4.221268e-001,
- -9.294529e+000,
- -4.397520e-002,
- -8.100625e-001,
- 3.742719e-001,
- 1.834166e+000,
- 8.223450e-001,
- -1.016009e+000,
- -1.820264e-001,
- 1.278426e-001,
- 1.182696e+001,
- -4.801528e+000,
- 4.947899e-001,
- 4.660378e-001,
- 1.601254e+000,
- 6.702359e-001,
- // albedo 0, turbidity 10
- -1.841310e+000,
- -9.781779e-001,
- -4.610903e+000,
- 4.824662e+000,
- -5.100806e-002,
- 6.463776e-001,
- -6.377724e-006,
- 2.216875e+000,
- 8.618530e-001,
- -2.376373e+000,
- -1.108657e+000,
- -1.489799e+001,
- 1.546458e+001,
- 4.091025e-002,
- 9.761780e-002,
- -1.048958e-002,
- 2.165834e+000,
- -1.609171e-001,
- -4.710318e+000,
- -2.261963e+000,
- 6.947327e+000,
- -1.034828e+001,
- -1.325542e-001,
- 7.508674e-001,
- 2.247553e-001,
- 2.873142e+000,
- 1.297100e+000,
- 2.163750e-001,
- -1.944345e-001,
- -2.437860e+000,
- 1.011314e+001,
- 4.450500e-001,
- 3.111492e-001,
- 2.751323e-001,
- -1.627906e+000,
- 2.531213e-001,
- -1.258794e+000,
- -3.524641e-001,
- 8.425444e-001,
- -1.085313e+001,
- -1.154381e+000,
- -4.638014e-001,
- -2.781115e-003,
- 4.344498e-001,
- 8.507091e-001,
- -1.018938e+000,
- -1.804153e-001,
- -6.354054e-002,
- 1.573150e+001,
- -4.386999e+000,
- 6.211115e-001,
- 5.294648e-001,
- 1.580749e+000,
- 6.586655e-001,
- // albedo 1, turbidity 1
- -1.116416e+000,
- -1.917524e-001,
- -1.068233e+001,
- 1.222221e+001,
- -3.668978e-002,
- 1.054022e+000,
- 1.592132e-002,
- 3.180583e+000,
- 5.627370e-001,
- -1.132341e+000,
- -1.671286e-001,
- 5.976499e+000,
- -4.227366e+000,
- -9.542489e-002,
- 8.664938e-001,
- 8.351793e-003,
- 4.876068e+000,
- 4.492779e-001,
- -1.087635e+000,
- -3.173679e-001,
- 4.314407e-001,
- 1.100555e+000,
- -4.410057e-001,
- 1.677253e+000,
- -3.005925e-002,
- -4.201249e+000,
- 1.070902e+000,
- -1.083031e+000,
- -8.847705e-002,
- 1.291773e+000,
- 4.546776e-001,
- 3.091894e-001,
- 7.261760e-001,
- 4.203659e-002,
- 5.990615e+000,
- 3.704756e-001,
- -1.057899e+000,
- -2.246706e-001,
- 2.329563e+000,
- -1.219656e+000,
- -5.335260e+000,
- 8.545378e-001,
- -3.906209e-002,
- -9.025499e-001,
- 7.797348e-001,
- -1.073305e+000,
- -1.522553e-001,
- 1.767063e+000,
- 1.904280e+000,
- -3.101673e+000,
- 3.995856e-001,
- 2.905192e-002,
- 2.563977e+000,
- 5.753067e-001,
- // albedo 1, turbidity 2
- -1.113674e+000,
- -1.759694e-001,
- -9.754125e+000,
- 1.087391e+001,
- -3.841093e-002,
- 9.524272e-001,
- 5.680219e-002,
- 4.227034e+000,
- 6.029571e-001,
- -1.126496e+000,
- -1.680281e-001,
- 5.332352e+000,
- -4.575579e+000,
- -6.761755e-002,
- 3.295335e-001,
- 1.194896e-001,
- 5.570901e+000,
- 4.536185e-001,
- -1.103074e+000,
- -2.681801e-001,
- 6.571479e-002,
- 2.396522e+000,
- -4.551280e-001,
- 2.466331e+000,
- -1.232022e-001,
- -3.023201e+000,
- 1.086379e+000,
- -1.053299e+000,
- -2.697173e-002,
- 8.379121e-001,
- -9.681458e-001,
- 5.890692e-001,
- -4.872027e-001,
- 2.936929e-001,
- 7.510139e+000,
- 3.079122e-001,
- -1.079553e+000,
- -2.710448e-001,
- 2.462379e+000,
- -3.713554e-001,
- -8.534512e+000,
- 1.828242e+000,
- -1.686398e-001,
- -1.961340e+000,
- 8.941077e-001,
- -1.069741e+000,
- -1.396394e-001,
- 1.657868e+000,
- 3.236313e+000,
- -2.706344e+000,
- -2.948122e-001,
- 1.314816e-001,
- 2.868457e+000,
- 5.413403e-001,
- // albedo 1, turbidity 3
- -1.131649e+000,
- -1.954455e-001,
- -7.751595e+000,
- 8.685861e+000,
- -4.910871e-002,
- 8.992952e-001,
- 4.710143e-002,
- 4.254818e+000,
- 6.821116e-001,
- -1.156689e+000,
- -1.884324e-001,
- 3.163519e+000,
- -3.091522e+000,
- -6.613927e-002,
- -2.575883e-002,
- 1.640065e-001,
- 6.073643e+000,
- 4.453468e-001,
- -1.079224e+000,
- -2.621389e-001,
- 9.446437e-001,
- 1.448479e+000,
- -3.969384e-001,
- 2.626638e+000,
- -8.101186e-002,
- -3.016355e+000,
- 1.076295e+000,
- -1.080832e+000,
- 1.033057e-002,
- -3.500156e-001,
- -3.281419e-002,
- 5.655512e-001,
- -1.156742e+000,
- 4.534710e-001,
- 8.774122e+000,
- 2.772869e-001,
- -1.051202e+000,
- -2.679975e-001,
- 2.719109e+000,
- -2.190316e+000,
- -6.878798e+000,
- 2.250481e+000,
- -2.030252e-001,
- -2.026527e+000,
- 9.701096e-001,
- -1.089849e+000,
- -1.598589e-001,
- 1.564748e+000,
- 6.869187e+000,
- -3.053670e+000,
- -6.110435e-001,
- 1.644472e-001,
- 2.370452e+000,
- 5.511770e-001,
- // albedo 1, turbidity 4
- -1.171419e+000,
- -2.429746e-001,
- -8.991334e+000,
- 9.571216e+000,
- -2.772861e-002,
- 6.688262e-001,
- 7.683478e-002,
- 3.785611e+000,
- 6.347635e-001,
- -1.228554e+000,
- -2.917562e-001,
- 2.753986e+000,
- -2.491780e+000,
- -4.663434e-002,
- 3.118303e-001,
- 7.546506e-002,
- 4.463096e+000,
- 5.955071e-001,
- -1.093124e+000,
- -2.447767e-001,
- 9.097406e-001,
- 5.448296e-001,
- -2.957824e-001,
- 2.024167e+000,
- -5.152333e-004,
- -1.069081e+000,
- 9.369565e-001,
- -1.056994e+000,
- 1.569507e-002,
- -8.217491e-001,
- 1.870818e+000,
- 7.061930e-001,
- -1.483928e+000,
- 5.978206e-001,
- 6.864902e+000,
- 3.673332e-001,
- -1.054871e+000,
- -2.758129e-001,
- 2.712807e+000,
- -5.950110e+000,
- -6.554039e+000,
- 2.447523e+000,
- -1.895171e-001,
- -1.454292e+000,
- 9.131738e-001,
- -1.100218e+000,
- -1.746241e-001,
- 1.438505e+000,
- 1.115481e+001,
- -3.266076e+000,
- -8.837357e-001,
- 1.970100e-001,
- 1.991595e+000,
- 5.907821e-001,
- // albedo 1, turbidity 5
- -1.207267e+000,
- -2.913610e-001,
- -1.103767e+001,
- 1.140724e+001,
- -1.416800e-002,
- 5.564047e-001,
- 8.476262e-002,
- 3.371255e+000,
- 6.221335e-001,
- -1.429698e+000,
- -5.374218e-001,
- 2.837524e+000,
- -2.221936e+000,
- -2.422337e-002,
- 9.313758e-002,
- 7.190250e-002,
- 1.869022e+000,
- 5.609035e-001,
- -1.002274e+000,
- -6.972810e-002,
- 4.031308e-001,
- -3.932997e-001,
- -1.521923e-001,
- 2.390646e+000,
- -6.893990e-002,
- 2.999661e+000,
- 1.017843e+000,
- -1.081168e+000,
- -1.178666e-001,
- -4.968080e-001,
- 3.919299e+000,
- 6.046866e-001,
- -2.440615e+000,
- 7.891538e-001,
- 2.140835e+000,
- 2.740470e-001,
- -1.050727e+000,
- -2.307688e-001,
- 2.276396e+000,
- -9.454407e+000,
- -5.505176e+000,
- 2.992620e+000,
- -2.450942e-001,
- 6.078372e-001,
- 9.606765e-001,
- -1.103752e+000,
- -1.810202e-001,
- 1.375044e+000,
- 1.589095e+001,
- -3.438954e+000,
- -1.265669e+000,
- 2.475172e-001,
- 1.680768e+000,
- 5.978056e-001,
- // albedo 1, turbidity 6
- -1.244324e+000,
- -3.378542e-001,
- -1.111001e+001,
- 1.137784e+001,
- -7.896794e-003,
- 4.808023e-001,
- 9.249904e-002,
- 3.025816e+000,
- 5.880239e-001,
- -1.593165e+000,
- -7.027621e-001,
- 2.220896e+000,
- -1.437709e+000,
- -1.534738e-002,
- 6.286958e-002,
- 6.644555e-002,
- 1.091727e+000,
- 5.470080e-001,
- -9.136506e-001,
- 1.344874e-002,
- 7.772636e-001,
- -1.209396e+000,
- -1.408978e-001,
- 2.433718e+000,
- -1.041938e-001,
- 3.791244e+000,
- 1.037916e+000,
- -1.134968e+000,
- -1.803315e-001,
- -9.267335e-001,
- 4.576670e+000,
- 6.851928e-001,
- -2.805000e+000,
- 8.687208e-001,
- 1.161483e+000,
- 2.571688e-001,
- -1.017037e+000,
- -2.053943e-001,
- 2.361640e+000,
- -9.887818e+000,
- -5.122889e+000,
- 3.287088e+000,
- -2.594102e-001,
- 8.578927e-001,
- 9.592340e-001,
- -1.118723e+000,
- -1.934942e-001,
- 1.226023e+000,
- 1.674140e+001,
- -3.277335e+000,
- -1.629809e+000,
- 2.765232e-001,
- 1.637713e+000,
- 6.113963e-001,
- // albedo 1, turbidity 7
- -1.314779e+000,
- -4.119915e-001,
- -1.241150e+001,
- 1.241578e+001,
- 2.344284e-003,
- 2.980837e-001,
- 1.414613e-001,
- 2.781731e+000,
- 4.998556e-001,
- -1.926199e+000,
- -1.020038e+000,
- 2.569200e+000,
- -1.081159e+000,
- -2.266833e-002,
- 3.588668e-001,
- 8.750078e-003,
- -2.452171e-001,
- 4.796758e-001,
- -7.780002e-001,
- 1.850647e-001,
- 4.445456e-002,
- -2.409297e+000,
- -7.816346e-002,
- 1.546790e+000,
- -2.807227e-002,
- 5.998176e+000,
- 1.132396e+000,
- -1.179326e+000,
- -3.578330e-001,
- -2.392933e-001,
- 6.467883e+000,
- 5.904596e-001,
- -1.869975e+000,
- 8.045839e-001,
- -2.498121e+000,
- 1.610633e-001,
- -1.009956e+000,
- -1.311896e-001,
- 1.726577e+000,
- -1.219356e+001,
- -3.466239e+000,
- 2.343602e+000,
- -2.252205e-001,
- 2.573681e+000,
- 1.027109e+000,
- -1.112460e+000,
- -2.063093e-001,
- 1.233051e+000,
- 2.058946e+001,
- -4.578074e+000,
- -1.145643e+000,
- 3.160192e-001,
- 1.420159e+000,
- 5.860212e-001,
- // albedo 1, turbidity 8
- -1.371689e+000,
- -4.914196e-001,
- -1.076610e+001,
- 1.107405e+001,
- -1.485077e-002,
- 5.936218e-001,
- 3.685482e-002,
- 2.599968e+000,
- 6.002204e-001,
- -2.436997e+000,
- -1.377939e+000,
- 2.130141e-002,
- 1.079593e+000,
- -1.796232e-002,
- -3.933248e-002,
- 1.610711e-001,
- -6.901181e-001,
- 1.206416e-001,
- -8.743368e-001,
- 7.331370e-002,
- 8.734259e-001,
- -3.743126e+000,
- -3.151167e-002,
- 1.297596e+000,
- -7.634926e-002,
- 6.532873e+000,
- 1.435737e+000,
- -9.810197e-001,
- -3.521634e-001,
- -2.855205e-001,
- 7.134674e+000,
- 6.839748e-001,
- -1.394841e+000,
- 6.952036e-001,
- -4.633104e+000,
- -2.173401e-002,
- -1.122958e+000,
- -1.691536e-001,
- 1.382360e+000,
- -1.102913e+001,
- -2.608171e+000,
- 1.865111e+000,
- -1.345154e-001,
- 3.112342e+000,
- 1.094134e+000,
- -1.075586e+000,
- -2.077415e-001,
- 1.171477e+000,
- 1.793270e+001,
- -4.656858e+000,
- -1.036839e+000,
- 3.338295e-001,
- 1.042793e+000,
- 5.739374e-001,
- // albedo 1, turbidity 9
- -1.465871e+000,
- -6.364486e-001,
- -8.833718e+000,
- 9.343650e+000,
- -3.223600e-002,
- 7.552848e-001,
- -3.121341e-006,
- 2.249164e+000,
- 8.094662e-001,
- -2.448924e+000,
- -1.270878e+000,
- -4.823703e+000,
- 5.853058e+000,
- -2.149127e-002,
- 3.581132e-002,
- -1.230276e-003,
- 4.892553e-001,
- -1.597657e-001,
- -2.419809e+000,
- -1.071337e+000,
- 1.575648e+000,
- -4.983580e+000,
- 9.545185e-003,
- 5.032615e-001,
- 4.186266e-001,
- 4.634147e+000,
- 1.433517e+000,
- -1.383278e-001,
- -2.797095e-002,
- -1.943067e-001,
- 6.679623e+000,
- 4.118280e-001,
- -2.744289e-001,
- -2.118722e-002,
- -4.337025e+000,
- 1.505072e-001,
- -1.341872e+000,
- -2.518572e-001,
- 1.027009e+000,
- -6.527103e+000,
- -1.081271e+000,
- 1.015465e+000,
- 2.845789e-001,
- 2.470371e+000,
- 9.278120e-001,
- -1.040640e+000,
- -2.367454e-001,
- 1.100744e+000,
- 8.827253e+000,
- -4.560794e+000,
- -7.287017e-001,
- 2.842503e-001,
- 6.336593e-001,
- 6.327335e-001,
- // albedo 1, turbidity 10
- -1.877993e+000,
- -1.025135e+000,
- -4.311037e+000,
- 4.715016e+000,
- -4.711631e-002,
- 6.335844e-001,
- -7.665398e-006,
- 1.788017e+000,
- 9.001409e-001,
- -2.281540e+000,
- -1.137668e+000,
- -1.036869e+001,
- 1.136254e+001,
- 1.961739e-002,
- -9.836174e-002,
- -6.734567e-003,
- 1.320918e+000,
- -2.400807e-001,
- -4.904054e+000,
- -2.315781e+000,
- 5.735999e+000,
- -8.626257e+000,
- -1.255643e-001,
- 1.545446e+000,
- 1.396860e-001,
- 2.972897e+000,
- 1.429934e+000,
- 4.077067e-001,
- -1.833688e-001,
- -2.450939e+000,
- 9.119433e+000,
- 4.505361e-001,
- -1.340828e+000,
- 3.973690e-001,
- -1.785370e+000,
- 9.628711e-002,
- -1.296052e+000,
- -3.250526e-001,
- 1.813294e+000,
- -1.031485e+001,
- -1.388690e+000,
- 1.239733e+000,
- -8.989196e-002,
- -3.389637e-001,
- 9.639560e-001,
- -1.062181e+000,
- -2.423444e-001,
- 7.577592e-001,
- 1.566938e+001,
- -4.462264e+000,
- -5.742810e-001,
- 3.262259e-001,
- 9.461672e-001,
- 6.232887e-001,
-};
-
-static const double datasetXYZRad1[] = {
- // albedo 0, turbidity 1
- 1.560219e+000,
- 1.417388e+000,
- 1.206927e+000,
- 1.091949e+001,
- 5.931416e+000,
- 7.304788e+000,
- // albedo 0, turbidity 2
- 1.533049e+000,
- 1.560532e+000,
- 3.685059e-001,
- 1.355040e+001,
- 5.543711e+000,
- 7.792189e+000,
- // albedo 0, turbidity 3
- 1.471043e+000,
- 1.746088e+000,
- -9.299697e-001,
- 1.720362e+001,
- 5.473384e+000,
- 8.336416e+000,
- // albedo 0, turbidity 4
- 1.355991e+000,
- 2.109348e+000,
- -3.295855e+000,
- 2.264843e+001,
- 5.454607e+000,
- 9.304656e+000,
- // albedo 0, turbidity 5
- 1.244963e+000,
- 2.547533e+000,
- -5.841485e+000,
- 2.756879e+001,
- 5.576104e+000,
- 1.043287e+001,
- // albedo 0, turbidity 6
- 1.175532e+000,
- 2.784634e+000,
- -7.212225e+000,
- 2.975347e+001,
- 6.472980e+000,
- 1.092331e+001,
- // albedo 0, turbidity 7
- 1.082973e+000,
- 3.118094e+000,
- -8.934293e+000,
- 3.186879e+001,
- 8.473885e+000,
- 1.174019e+001,
- // albedo 0, turbidity 8
- 9.692500e-001,
- 3.349574e+000,
- -1.003810e+001,
- 3.147654e+001,
- 1.338931e+001,
- 1.272547e+001,
- // albedo 0, turbidity 9
- 8.547044e-001,
- 3.151538e+000,
- -9.095567e+000,
- 2.554995e+001,
- 2.273219e+001,
- 1.410398e+001,
- // albedo 0, turbidity 10
- 7.580340e-001,
- 2.311153e+000,
- -5.170814e+000,
- 1.229669e+001,
- 3.686529e+001,
- 1.598882e+001,
- // albedo 1, turbidity 1
- 1.664273e+000,
- 1.574468e+000,
- 1.422078e+000,
- 9.768247e+000,
- 1.447338e+001,
- 1.644988e+001,
- // albedo 1, turbidity 2
- 1.638295e+000,
- 1.719586e+000,
- 5.786675e-001,
- 1.239846e+001,
- 1.415419e+001,
- 1.728605e+001,
- // albedo 1, turbidity 3
- 1.572623e+000,
- 1.921559e+000,
- -7.714802e-001,
- 1.609246e+001,
- 1.420954e+001,
- 1.825908e+001,
- // albedo 1, turbidity 4
- 1.468395e+000,
- 2.211970e+000,
- -2.845869e+000,
- 2.075027e+001,
- 1.524822e+001,
- 1.937622e+001,
- // albedo 1, turbidity 5
- 1.355047e+000,
- 2.556469e+000,
- -4.960920e+000,
- 2.460237e+001,
- 1.648360e+001,
- 2.065648e+001,
- // albedo 1, turbidity 6
- 1.291642e+000,
- 2.742036e+000,
- -6.061967e+000,
- 2.602002e+001,
- 1.819144e+001,
- 2.116712e+001,
- // albedo 1, turbidity 7
- 1.194565e+000,
- 2.972120e+000,
- -7.295779e+000,
- 2.691805e+001,
- 2.124880e+001,
- 2.201819e+001,
- // albedo 1, turbidity 8
- 1.083631e+000,
- 3.047021e+000,
- -7.766096e+000,
- 2.496261e+001,
- 2.744264e+001,
- 2.291875e+001,
- // albedo 1, turbidity 9
- 9.707994e-001,
- 2.736459e+000,
- -6.308284e+000,
- 1.760860e+001,
- 3.776291e+001,
- 2.392150e+001,
- // albedo 1, turbidity 10
- 8.574294e-001,
- 1.865155e+000,
- -2.364707e+000,
- 4.337793e+000,
- 5.092831e+001,
- 2.523432e+001,
-};
-
-static const double datasetXYZ2[] = {
- // albedo 0, turbidity 1
- -1.127942e+000,
- -1.905548e-001,
- -1.252356e+001,
- 1.375799e+001,
- -3.624732e-002,
- 1.055453e+000,
- 1.385036e-002,
- 4.176970e+000,
- 5.928345e-001,
- -1.155260e+000,
- -1.778135e-001,
- 6.216056e+000,
- -5.254116e+000,
- -8.787445e-002,
- 8.434621e-001,
- 4.025734e-002,
- 6.195322e+000,
- 3.111856e-001,
- -1.125624e+000,
- -3.217593e-001,
- 5.043919e-001,
- 1.686284e+000,
- -3.536071e-001,
- 1.476321e+000,
- -7.899019e-002,
- -4.522531e+000,
- 1.271691e+000,
- -1.081801e+000,
- -1.033234e-001,
- 9.995550e-001,
- 7.482946e-003,
- -6.776018e-002,
- 1.463141e+000,
- 9.492021e-002,
- 5.612723e+000,
- 1.298846e-001,
- -1.075320e+000,
- -2.402711e-001,
- 2.141284e+000,
- -1.203359e+000,
- -4.945188e+000,
- 1.437221e+000,
- -8.096750e-002,
- -1.028378e+000,
- 1.004164e+000,
- -1.073337e+000,
- -1.516517e-001,
- 1.639379e+000,
- 2.304669e+000,
- -3.214244e+000,
- 1.286245e+000,
- 5.613957e-002,
- 2.480902e+000,
- 4.999363e-001,
- // albedo 0, turbidity 2
- -1.128399e+000,
- -1.857793e-001,
- -1.089863e+001,
- 1.172984e+001,
- -3.768099e-002,
- 9.439285e-001,
- 4.869335e-002,
- 4.845114e+000,
- 6.119211e-001,
- -1.114002e+000,
- -1.399280e-001,
- 4.963800e+000,
- -4.685500e+000,
- -7.780879e-002,
- 4.049736e-001,
- 1.586297e-001,
- 7.770264e+000,
- 3.449006e-001,
- -1.185472e+000,
- -3.403543e-001,
- 6.588322e-001,
- 1.133713e+000,
- -4.118674e-001,
- 2.061191e+000,
- -1.882768e-001,
- -4.372586e+000,
- 1.223530e+000,
- -1.002272e+000,
- 2.000703e-002,
- 7.073269e-002,
- 1.485075e+000,
- 5.005589e-001,
- 4.301494e-001,
- 3.626541e-001,
- 7.921098e+000,
- 1.574766e-001,
- -1.121006e+000,
- -3.007777e-001,
- 2.242051e+000,
- -4.571561e+000,
- -7.761071e+000,
- 2.053404e+000,
- -1.524018e-001,
- -1.886162e+000,
- 1.018208e+000,
- -1.058864e+000,
- -1.358673e-001,
- 1.389667e+000,
- 8.633409e+000,
- -3.437249e+000,
- 7.295429e-001,
- 1.514700e-001,
- 2.842513e+000,
- 5.014325e-001,
- // albedo 0, turbidity 3
- -1.144464e+000,
- -2.043799e-001,
- -1.020188e+001,
- 1.071247e+001,
- -3.256693e-002,
- 7.860205e-001,
- 6.872719e-002,
- 4.824771e+000,
- 6.259836e-001,
- -1.170104e+000,
- -2.118626e-001,
- 4.391405e+000,
- -4.198900e+000,
- -7.111559e-002,
- 3.890442e-001,
- 1.024831e-001,
- 6.282535e+000,
- 5.365688e-001,
- -1.129171e+000,
- -2.552880e-001,
- 2.238298e-001,
- 7.314295e-001,
- -3.562730e-001,
- 1.881931e+000,
- -3.078716e-002,
- -1.039120e+000,
- 9.096301e-001,
- -1.042294e+000,
- 4.450203e-003,
- -5.116033e-001,
- 2.627589e+000,
- 6.098996e-001,
- -1.264638e-001,
- 4.325281e-001,
- 7.080503e+000,
- 4.583646e-001,
- -1.082293e+000,
- -2.723056e-001,
- 2.065076e+000,
- -8.143133e+000,
- -7.892212e+000,
- 2.142231e+000,
- -7.106240e-002,
- -1.122398e+000,
- 8.338505e-001,
- -1.071715e+000,
- -1.426568e-001,
- 1.095351e+000,
- 1.729783e+001,
- -3.851931e+000,
- 4.360514e-001,
- 2.114440e-001,
- 2.970832e+000,
- 5.944389e-001,
- // albedo 0, turbidity 4
- -1.195909e+000,
- -2.590449e-001,
- -1.191037e+001,
- 1.207947e+001,
- -1.589842e-002,
- 6.297846e-001,
- 9.054772e-002,
- 4.285959e+000,
- 5.933752e-001,
- -1.245763e+000,
- -3.316637e-001,
- 4.293660e+000,
- -3.694011e+000,
- -4.699947e-002,
- 4.843684e-001,
- 2.130425e-002,
- 4.097549e+000,
- 6.530809e-001,
- -1.148742e+000,
- -1.902509e-001,
- -2.393233e-001,
- -2.441254e-001,
- -2.610918e-001,
- 1.846988e+000,
- 3.532866e-002,
- 2.660106e+000,
- 8.358294e-001,
- -1.016080e+000,
- -7.444960e-002,
- -5.053436e-001,
- 4.388855e+000,
- 6.054987e-001,
- -1.208300e+000,
- 5.817215e-001,
- 2.543570e+000,
- 4.726568e-001,
- -1.072027e+000,
- -2.101440e-001,
- 1.518378e+000,
- -1.060119e+001,
- -6.016546e+000,
- 2.649475e+000,
- -5.166992e-002,
- 1.571269e+000,
- 8.344622e-001,
- -1.072365e+000,
- -1.511201e-001,
- 7.478010e-001,
- 1.900732e+001,
- -3.950387e+000,
- -3.473907e-001,
- 3.797211e-001,
- 2.782949e+000,
- 6.296808e-001,
- // albedo 0, turbidity 5
- -1.239423e+000,
- -3.136289e-001,
- -1.351100e+001,
- 1.349468e+001,
- -7.070423e-003,
- 5.012315e-001,
- 1.106008e-001,
- 3.803619e+000,
- 5.577948e-001,
- -1.452524e+000,
- -5.676944e-001,
- 2.993153e+000,
- -2.277288e+000,
- -2.168954e-002,
- 3.056720e-001,
- 1.152338e-002,
- 1.852697e+000,
- 6.427228e-001,
- -1.061421e+000,
- -4.590521e-002,
- 6.057022e-001,
- -1.096835e+000,
- -1.504952e-001,
- 2.344921e+000,
- -5.491832e-002,
- 5.268322e+000,
- 9.082253e-001,
- -1.042373e+000,
- -1.769498e-001,
- -1.075388e+000,
- 3.831712e+000,
- 3.154140e-001,
- -2.416458e+000,
- 7.909032e-001,
- -1.492892e-002,
- 3.854049e-001,
- -1.064159e+000,
- -1.892684e-001,
- 1.438685e+000,
- -8.166362e+000,
- -3.616364e+000,
- 3.275206e+000,
- -1.203825e-001,
- 2.039491e+000,
- 8.688057e-001,
- -1.070120e+000,
- -1.569508e-001,
- 4.124760e-001,
- 1.399683e+001,
- -3.547085e+000,
- -1.046326e+000,
- 4.973825e-001,
- 2.791231e+000,
- 6.503286e-001,
- // albedo 0, turbidity 6
- -1.283579e+000,
- -3.609518e-001,
- -1.335397e+001,
- 1.315248e+001,
- -4.431938e-004,
- 3.769526e-001,
- 1.429824e-001,
- 3.573613e+000,
- 4.998696e-001,
- -1.657952e+000,
- -7.627948e-001,
- 1.958222e+000,
- -7.949816e-001,
- -2.882837e-002,
- 5.356149e-001,
- -5.191946e-002,
- 8.869955e-001,
- 6.263320e-001,
- -9.527600e-001,
- 6.494189e-002,
- 5.361303e-001,
- -2.129590e+000,
- -9.258630e-002,
- 1.604776e+000,
- 5.067770e-002,
- 6.376055e+000,
- 9.138052e-001,
- -1.080827e+000,
- -2.523120e-001,
- -7.154262e-001,
- 4.120085e+000,
- 1.878228e-001,
- -1.492158e+000,
- 6.881655e-001,
- -1.446611e+000,
- 4.040631e-001,
- -1.054075e+000,
- -1.665498e-001,
- 9.191052e-001,
- -6.636943e+000,
- -1.894826e+000,
- 2.107810e+000,
- -3.680499e-002,
- 2.655452e+000,
- 8.413840e-001,
- -1.061127e+000,
- -1.448849e-001,
- 2.667493e-001,
- 1.034103e+001,
- -4.285769e+000,
- -3.874504e-001,
- 5.998752e-001,
- 3.132426e+000,
- 6.652753e-001,
- // albedo 0, turbidity 7
- -1.347345e+000,
- -4.287832e-001,
- -9.305553e+000,
- 9.133813e+000,
- -3.173527e-003,
- 3.977564e-001,
- 1.151420e-001,
- 3.320564e+000,
- 4.998134e-001,
- -1.927296e+000,
- -9.901372e-001,
- -2.593499e+000,
- 4.087421e+000,
- -5.833993e-002,
- 8.158929e-001,
- -4.681279e-002,
- 2.423716e-001,
- 4.938052e-001,
- -9.470092e-001,
- 7.325237e-002,
- 2.064735e+000,
- -5.167540e+000,
- -1.313751e-002,
- 4.832169e-001,
- 1.126295e-001,
- 6.970522e+000,
- 1.035022e+000,
- -1.022557e+000,
- -2.762616e-001,
- -9.375748e-001,
- 6.696739e+000,
- 2.200765e-001,
- -1.133253e-001,
- 5.492505e-001,
- -3.109391e+000,
- 3.321914e-001,
- -1.087444e+000,
- -1.836263e-001,
- 6.225024e-001,
- -8.576765e+000,
- -1.107637e+000,
- 7.859427e-001,
- 9.910909e-002,
- 3.112938e+000,
- 8.596261e-001,
- -1.051544e+000,
- -1.546262e-001,
- 2.371731e-001,
- 1.200502e+001,
- -4.527291e+000,
- 7.268862e-002,
- 5.571478e-001,
- 2.532873e+000,
- 6.662000e-001,
- // albedo 0, turbidity 8
- -1.375576e+000,
- -4.840019e-001,
- -8.121290e+000,
- 8.058140e+000,
- -1.445661e-002,
- 5.123314e-001,
- 5.813321e-002,
- 3.203219e+000,
- 5.442318e-001,
- -2.325221e+000,
- -1.241463e+000,
- -7.063430e+000,
- 8.741369e+000,
- -7.829950e-002,
- 8.844273e-001,
- -3.471106e-002,
- 1.740583e-001,
- 2.814079e-001,
- -1.228700e+000,
- -2.013412e-001,
- 2.949042e+000,
- -7.371945e+000,
- 1.071753e-001,
- -2.491970e-001,
- 2.265223e-001,
- 6.391504e+000,
- 1.172389e+000,
- -7.601786e-001,
- -1.680631e-001,
- -7.584444e-001,
- 8.541356e+000,
- 8.222291e-002,
- 6.729633e-001,
- 3.206615e-001,
- -3.700940e+000,
- 2.710054e-001,
- -1.191166e+000,
- -2.672347e-001,
- 2.927498e-001,
- -9.713613e+000,
- -4.783721e-001,
- 2.352803e-001,
- 2.161949e-001,
- 2.691481e+000,
- 8.745447e-001,
- -1.030135e+000,
- -1.653301e-001,
- 2.263443e-001,
- 1.296157e+001,
- -4.650644e+000,
- 7.055709e-003,
- 5.091975e-001,
- 2.000370e+000,
- 6.603839e-001,
- // albedo 0, turbidity 9
- -1.508018e+000,
- -6.460933e-001,
- -6.402745e+000,
- 6.545995e+000,
- -3.750320e-002,
- 6.921803e-001,
- 3.309819e-003,
- 2.797527e+000,
- 6.978446e-001,
- -2.333308e+000,
- -1.167837e+000,
- -1.746787e+001,
- 1.868630e+001,
- -8.948229e-003,
- 5.621946e-001,
- -3.402626e-002,
- 1.217943e+000,
- 1.149865e-002,
- -2.665953e+000,
- -1.226307e+000,
- 7.169725e+000,
- -1.159434e+001,
- 3.583420e-002,
- -3.074378e-001,
- 3.412248e-001,
- 4.422122e+000,
- 1.283791e+000,
- -9.705116e-002,
- 8.312991e-002,
- -2.160462e+000,
- 1.028235e+001,
- 3.543357e-002,
- 1.032049e+000,
- 1.058310e-001,
- -2.972898e+000,
- 2.418628e-001,
- -1.329617e+000,
- -3.699557e-001,
- 5.560117e-001,
- -9.730113e+000,
- 9.938865e-002,
- -3.071488e-001,
- 2.510691e-001,
- 1.777111e+000,
- 8.705142e-001,
- -1.019387e+000,
- -1.893247e-001,
- 1.194079e-001,
- 1.239436e+001,
- -4.799224e+000,
- 2.940213e-001,
- 4.841268e-001,
- 1.529724e+000,
- 6.582615e-001,
- // albedo 0, turbidity 10
- -1.896737e+000,
- -1.005442e+000,
- -6.411032e+000,
- 6.548220e+000,
- -3.227596e-002,
- 5.717262e-001,
- -8.115192e-006,
- 2.296704e+000,
- 9.000749e-001,
- -2.411116e+000,
- -1.225587e+000,
- -1.753629e+001,
- 1.829393e+001,
- 1.247555e-002,
- 2.364616e-001,
- -5.114637e-003,
- 1.603778e+000,
- -2.224156e-001,
- -4.707121e+000,
- -2.074977e+000,
- 7.942300e+000,
- -1.132407e+001,
- -5.415654e-002,
- 5.446811e-001,
- 1.032493e-001,
- 4.010235e+000,
- 1.369802e+000,
- 1.010482e-001,
- -4.013305e-001,
- -2.674579e+000,
- 9.779409e+000,
- 1.782506e-001,
- 7.053045e-001,
- 4.200002e-001,
- -2.400671e+000,
- 1.953165e-001,
- -1.243526e+000,
- -3.391255e-001,
- 8.848882e-001,
- -9.789025e+000,
- -3.997324e-001,
- -9.546227e-001,
- -1.044017e-001,
- 6.010593e-001,
- 8.714462e-001,
- -1.014633e+000,
- -1.730009e-001,
- -7.738934e-002,
- 1.390903e+001,
- -4.847307e+000,
- 1.076059e+000,
- 5.685743e-001,
- 1.572992e+000,
- 6.561432e-001,
- // albedo 1, turbidity 1
- -1.122998e+000,
- -1.881183e-001,
- -1.030709e+001,
- 1.158932e+001,
- -4.079495e-002,
- 9.603774e-001,
- 3.079436e-002,
- 4.009235e+000,
- 5.060745e-001,
- -1.134790e+000,
- -1.539688e-001,
- 5.478405e+000,
- -4.217270e+000,
- -1.043858e-001,
- 7.165008e-001,
- 1.524765e-002,
- 6.473623e+000,
- 4.207882e-001,
- -1.134957e+000,
- -3.513318e-001,
- 7.393837e-001,
- 1.354415e+000,
- -4.764078e-001,
- 1.690441e+000,
- -5.492640e-002,
- -5.563523e+000,
- 1.145743e+000,
- -1.058344e+000,
- -5.758503e-002,
- 1.168230e+000,
- 3.269824e-001,
- 1.795193e-001,
- 7.849011e-001,
- 7.441853e-002,
- 6.904804e+000,
- 2.818790e-001,
- -1.075194e+000,
- -2.355813e-001,
- 2.463685e+000,
- -1.536505e+000,
- -7.505771e+000,
- 9.619712e-001,
- -6.465851e-002,
- -1.355492e+000,
- 8.489847e-001,
- -1.079030e+000,
- -1.465328e-001,
- 1.773838e+000,
- 2.310131e+000,
- -3.136065e+000,
- 3.507952e-001,
- 4.435014e-002,
- 2.819225e+000,
- 5.689008e-001,
- // albedo 1, turbidity 2
- -1.125833e+000,
- -1.870849e-001,
- -9.555833e+000,
- 1.059713e+001,
- -4.225402e-002,
- 9.164663e-001,
- 4.338796e-002,
- 4.400980e+000,
- 6.056119e-001,
- -1.127440e+000,
- -1.551891e-001,
- 4.755621e+000,
- -4.408806e+000,
- -7.851763e-002,
- 2.268284e-001,
- 1.460070e-001,
- 7.048003e+000,
- 3.525997e-001,
- -1.143788e+000,
- -3.170178e-001,
- 5.480669e-001,
- 2.041830e+000,
- -4.532139e-001,
- 2.302233e+000,
- -1.887419e-001,
- -4.489221e+000,
- 1.250967e+000,
- -1.032849e+000,
- 7.376031e-003,
- 5.666073e-001,
- -2.312203e-001,
- 4.862894e-001,
- -1.748294e-001,
- 3.572870e-001,
- 8.380522e+000,
- 1.302333e-001,
- -1.093728e+000,
- -2.786977e-001,
- 2.641272e+000,
- -1.507494e+000,
- -8.731243e+000,
- 1.684055e+000,
- -2.023377e-001,
- -2.176398e+000,
- 1.013249e+000,
- -1.076578e+000,
- -1.456205e-001,
- 1.693935e+000,
- 2.945003e+000,
- -2.822673e+000,
- -2.520033e-001,
- 1.517034e-001,
- 2.649109e+000,
- 5.179094e-001,
- // albedo 1, turbidity 3
- -1.146417e+000,
- -2.119353e-001,
- -7.187525e+000,
- 8.058599e+000,
- -5.256438e-002,
- 8.375733e-001,
- 3.887093e-002,
- 4.222111e+000,
- 6.695347e-001,
- -1.173674e+000,
- -2.067025e-001,
- 2.899359e+000,
- -2.804918e+000,
- -8.473899e-002,
- 3.944225e-003,
- 1.340641e-001,
- 6.160887e+000,
- 4.527141e-001,
- -1.090098e+000,
- -2.599633e-001,
- 9.180856e-001,
- 1.092710e+000,
- -4.215019e-001,
- 2.427660e+000,
- -9.277667e-002,
- -2.123523e+000,
- 1.058159e+000,
- -1.084460e+000,
- 8.056181e-003,
- -2.453510e-001,
- 6.619567e-001,
- 4.668118e-001,
- -9.526719e-001,
- 4.648454e-001,
- 8.001572e+000,
- 3.054194e-001,
- -1.053728e+000,
- -2.765784e-001,
- 2.792388e+000,
- -3.489517e+000,
- -8.150535e+000,
- 2.195757e+000,
- -2.017234e-001,
- -2.128017e+000,
- 9.326589e-001,
- -1.099348e+000,
- -1.593939e-001,
- 1.568292e+000,
- 7.247853e+000,
- -2.933000e+000,
- -5.890481e-001,
- 1.724440e-001,
- 2.433484e+000,
- 5.736558e-001,
- // albedo 1, turbidity 4
- -1.185983e+000,
- -2.581184e-001,
- -7.761056e+000,
- 8.317053e+000,
- -3.351773e-002,
- 6.676667e-001,
- 5.941733e-002,
- 3.820727e+000,
- 6.324032e-001,
- -1.268591e+000,
- -3.398067e-001,
- 2.348503e+000,
- -2.023779e+000,
- -5.368458e-002,
- 1.083282e-001,
- 8.402858e-002,
- 3.910254e+000,
- 5.577481e-001,
- -1.071353e+000,
- -1.992459e-001,
- 7.878387e-001,
- 1.974702e-001,
- -3.033058e-001,
- 2.335298e+000,
- -8.205259e-002,
- 7.954454e-001,
- 9.972312e-001,
- -1.089513e+000,
- -3.104364e-002,
- -5.995746e-001,
- 2.330281e+000,
- 6.581939e-001,
- -1.821467e+000,
- 6.679973e-001,
- 5.090195e+000,
- 3.125161e-001,
- -1.040214e+000,
- -2.570934e-001,
- 2.660489e+000,
- -6.506045e+000,
- -7.053586e+000,
- 2.763153e+000,
- -2.433632e-001,
- -7.648176e-001,
- 9.452937e-001,
- -1.116052e+000,
- -1.831993e-001,
- 1.457694e+000,
- 1.163608e+001,
- -3.216426e+000,
- -1.045594e+000,
- 2.285002e-001,
- 1.817407e+000,
- 5.810396e-001,
- // albedo 1, turbidity 5
- -1.230134e+000,
- -3.136264e-001,
- -8.909301e+000,
- 9.145006e+000,
- -1.055387e-002,
- 4.467317e-001,
- 1.016826e-001,
- 3.342964e+000,
- 5.633840e-001,
- -1.442907e+000,
- -5.593147e-001,
- 2.156447e+000,
- -1.241657e+000,
- -3.512130e-002,
- 3.050274e-001,
- 1.797175e-002,
- 1.742358e+000,
- 5.977153e-001,
- -1.027627e+000,
- -6.481539e-002,
- 4.351975e-001,
- -1.051677e+000,
- -2.030672e-001,
- 1.942684e+000,
- -3.615993e-002,
- 4.050266e+000,
- 9.801624e-001,
- -1.082110e+000,
- -1.578209e-001,
- -3.397511e-001,
- 4.163851e+000,
- 6.650368e-001,
- -1.841730e+000,
- 7.062544e-001,
- 6.789881e-001,
- 3.172623e-001,
- -1.047447e+000,
- -1.977560e-001,
- 2.183364e+000,
- -8.805249e+000,
- -5.483962e+000,
- 2.551309e+000,
- -1.779640e-001,
- 1.519501e+000,
- 9.212536e-001,
- -1.111853e+000,
- -1.935736e-001,
- 1.394408e+000,
- 1.392405e+001,
- -3.465430e+000,
- -1.068432e+000,
- 2.388671e-001,
- 1.455336e+000,
- 6.233425e-001,
- // albedo 1, turbidity 6
- -1.262238e+000,
- -3.546341e-001,
- -1.008703e+001,
- 1.020084e+001,
- -1.852187e-003,
- 3.537580e-001,
- 1.239199e-001,
- 3.056093e+000,
- 5.132052e-001,
- -1.613810e+000,
- -7.355585e-001,
- 2.760123e+000,
- -1.685253e+000,
- -2.517552e-002,
- 2.914258e-001,
- 4.743448e-003,
- 8.689596e-001,
- 5.674192e-001,
- -9.462336e-001,
- 2.950767e-002,
- -2.613816e-001,
- -7.398653e-001,
- -1.315558e-001,
- 1.901042e+000,
- -6.447844e-002,
- 4.969341e+000,
- 1.027342e+000,
- -1.111481e+000,
- -2.194054e-001,
- -9.004538e-002,
- 3.983442e+000,
- 4.871278e-001,
- -1.965315e+000,
- 7.956121e-001,
- -2.363225e-001,
- 2.718037e-001,
- -1.036397e+000,
- -1.827106e-001,
- 1.964747e+000,
- -8.870759e+000,
- -4.208011e+000,
- 2.461215e+000,
- -2.158905e-001,
- 1.561676e+000,
- 9.436866e-001,
- -1.113769e+000,
- -1.947819e-001,
- 1.300720e+000,
- 1.516476e+001,
- -4.088732e+000,
- -1.069384e+000,
- 2.836434e-001,
- 1.671451e+000,
- 6.229612e-001,
- // albedo 1, turbidity 7
- -1.328069e+000,
- -4.244047e-001,
- -8.417040e+000,
- 8.552244e+000,
- -6.813504e-003,
- 4.127422e-001,
- 9.619897e-002,
- 2.854227e+000,
- 5.059880e-001,
- -1.927552e+000,
- -1.025290e+000,
- 9.529576e-001,
- 4.255950e-001,
- -3.738779e-002,
- 2.584586e-001,
- 4.911004e-002,
- -2.640913e-001,
- 4.138626e-001,
- -8.488094e-001,
- 1.435988e-001,
- 6.356807e-001,
- -2.895732e+000,
- -8.473961e-002,
- 1.701305e+000,
- -1.323908e-001,
- 6.499338e+000,
- 1.210928e+000,
- -1.128313e+000,
- -3.397048e-001,
- -4.043140e-001,
- 6.265097e+000,
- 5.482395e-001,
- -2.057614e+000,
- 8.884087e-001,
- -2.943879e+000,
- 9.760301e-002,
- -1.039764e+000,
- -1.494772e-001,
- 1.781915e+000,
- -1.153012e+001,
- -3.379232e+000,
- 2.517231e+000,
- -2.764393e-001,
- 2.588849e+000,
- 1.052120e+000,
- -1.108447e+000,
- -2.012251e-001,
- 1.198640e+000,
- 1.925331e+001,
- -4.423892e+000,
- -1.257122e+000,
- 3.395690e-001,
- 1.481220e+000,
- 5.880175e-001,
- // albedo 1, turbidity 8
- -1.374185e+000,
- -4.967434e-001,
- -7.401318e+000,
- 7.724021e+000,
- -2.345723e-002,
- 5.979653e-001,
- 2.436346e-002,
- 2.658970e+000,
- 6.014891e-001,
- -2.310933e+000,
- -1.290290e+000,
- -1.301909e+000,
- 2.557806e+000,
- -3.744449e-002,
- 8.982861e-002,
- 1.090613e-001,
- -4.398363e-001,
- 1.184329e-001,
- -1.124730e+000,
- -9.921830e-002,
- 1.366902e+000,
- -4.172489e+000,
- -5.078016e-002,
- 1.393597e+000,
- -9.323843e-002,
- 6.452721e+000,
- 1.435913e+000,
- -8.468477e-001,
- -2.744819e-001,
- -4.347200e-001,
- 6.713362e+000,
- 6.127133e-001,
- -1.685634e+000,
- 7.360941e-001,
- -4.535502e+000,
- -2.920866e-002,
- -1.165242e+000,
- -2.008697e-001,
- 1.438778e+000,
- -1.008936e+001,
- -2.214771e+000,
- 2.102909e+000,
- -1.763085e-001,
- 2.859075e+000,
- 1.093470e+000,
- -1.074614e+000,
- -2.066374e-001,
- 1.131891e+000,
- 1.630063e+001,
- -4.801441e+000,
- -1.112590e+000,
- 3.595785e-001,
- 1.122227e+000,
- 5.794610e-001,
- // albedo 1, turbidity 9
- -1.521515e+000,
- -6.835604e-001,
- -5.571044e+000,
- 6.028774e+000,
- -4.253715e-002,
- 6.875746e-001,
- -5.279456e-006,
- 2.180150e+000,
- 8.487705e-001,
- -2.240415e+000,
- -1.171166e+000,
- -7.182771e+000,
- 8.417068e+000,
- -1.932866e-002,
- 1.101887e-001,
- -1.098862e-002,
- 6.242195e-001,
- -2.393875e-001,
- -2.712354e+000,
- -1.198830e+000,
- 3.180200e+000,
- -6.768130e+000,
- -2.563386e-003,
- 7.984607e-001,
- 2.764376e-001,
- 4.695358e+000,
- 1.557045e+000,
- -3.655172e-002,
- -2.142321e-002,
- -9.138120e-001,
- 7.932786e+000,
- 3.516542e-001,
- -7.994343e-001,
- 1.786761e-001,
- -4.208399e+000,
- 1.820576e-002,
- -1.368610e+000,
- -2.656212e-001,
- 1.249397e+000,
- -8.317818e+000,
- -8.962772e-001,
- 1.423249e+000,
- 1.478381e-001,
- 2.191660e+000,
- 1.007748e+000,
- -1.041753e+000,
- -2.453366e-001,
- 1.061102e+000,
- 1.130172e+001,
- -4.739312e+000,
- -9.223334e-001,
- 2.982776e-001,
- 6.162931e-001,
- 6.080302e-001,
- // albedo 1, turbidity 10
- -1.989159e+000,
- -1.095160e+000,
- -2.915550e+000,
- 3.275339e+000,
- -5.735765e-002,
- 5.742174e-001,
- -7.683288e-006,
- 1.763400e+000,
- 9.001342e-001,
- -2.070020e+000,
- -1.086338e+000,
- -1.095898e+001,
- 1.206960e+001,
- 3.780123e-002,
- -1.774699e-002,
- -5.881348e-004,
- 1.333819e+000,
- -2.605423e-001,
- -5.249653e+000,
- -2.383040e+000,
- 6.160406e+000,
- -9.097138e+000,
- -1.955319e-001,
- 1.651785e+000,
- 6.016463e-004,
- 3.021824e+000,
- 1.493574e+000,
- 4.685432e-001,
- -2.358662e-001,
- -2.666433e+000,
- 9.685763e+000,
- 5.804928e-001,
- -1.521875e+000,
- 5.668989e-001,
- -1.548136e+000,
- 1.688642e-002,
- -1.296891e+000,
- -3.449031e-001,
- 1.928548e+000,
- -1.167560e+001,
- -1.627615e+000,
- 1.355603e+000,
- -1.929074e-001,
- -6.568952e-001,
- 1.009774e+000,
- -1.067288e+000,
- -2.410392e-001,
- 7.147961e-001,
- 1.783840e+001,
- -4.374399e+000,
- -6.588777e-001,
- 3.329831e-001,
- 1.012066e+000,
- 6.118645e-001,
-};
-
-static const double datasetXYZRad2[] = {
- // albedo 0, turbidity 1
- 1.632341e+000,
- 1.395230e+000,
- 1.375634e+000,
- 1.238193e+001,
- 5.921102e+000,
- 7.766508e+000,
- // albedo 0, turbidity 2
- 1.597115e+000,
- 1.554617e+000,
- 3.932382e-001,
- 1.505284e+001,
- 5.725234e+000,
- 8.158155e+000,
- // albedo 0, turbidity 3
- 1.522034e+000,
- 1.844545e+000,
- -1.322862e+000,
- 1.918382e+001,
- 5.440769e+000,
- 8.837119e+000,
- // albedo 0, turbidity 4
- 1.403048e+000,
- 2.290852e+000,
- -4.013792e+000,
- 2.485100e+001,
- 5.521888e+000,
- 9.845547e+000,
- // albedo 0, turbidity 5
- 1.286364e+000,
- 2.774498e+000,
- -6.648221e+000,
- 2.964151e+001,
- 5.923777e+000,
- 1.097075e+001,
- // albedo 0, turbidity 6
- 1.213544e+000,
- 3.040195e+000,
- -8.092676e+000,
- 3.186082e+001,
- 6.789782e+000,
- 1.158899e+001,
- // albedo 0, turbidity 7
- 1.122622e+000,
- 3.347465e+000,
- -9.649016e+000,
- 3.343824e+001,
- 9.347715e+000,
- 1.231374e+001,
- // albedo 0, turbidity 8
- 1.007356e+000,
- 3.543858e+000,
- -1.053520e+001,
- 3.239842e+001,
- 1.483962e+001,
- 1.331718e+001,
- // albedo 0, turbidity 9
- 8.956642e-001,
- 3.278700e+000,
- -9.254933e+000,
- 2.557923e+001,
- 2.489677e+001,
- 1.476166e+001,
- // albedo 0, turbidity 10
- 7.985143e-001,
- 2.340404e+000,
- -4.928274e+000,
- 1.141787e+001,
- 3.961501e+001,
- 1.682448e+001,
- // albedo 1, turbidity 1
- 1.745162e+000,
- 1.639467e+000,
- 1.342721e+000,
- 1.166033e+001,
- 1.490124e+001,
- 1.774031e+001,
- // albedo 1, turbidity 2
- 1.708439e+000,
- 1.819144e+000,
- 2.834399e-001,
- 1.448066e+001,
- 1.459214e+001,
- 1.858679e+001,
- // albedo 1, turbidity 3
- 1.631720e+000,
- 2.094799e+000,
- -1.378825e+000,
- 1.843198e+001,
- 1.463173e+001,
- 1.962881e+001,
- // albedo 1, turbidity 4
- 1.516536e+000,
- 2.438729e+000,
- -3.624121e+000,
- 2.298621e+001,
- 1.599782e+001,
- 2.070027e+001,
- // albedo 1, turbidity 5
- 1.405863e+000,
- 2.785191e+000,
- -5.705236e+000,
- 2.645121e+001,
- 1.768330e+001,
- 2.191903e+001,
- // albedo 1, turbidity 6
- 1.344052e+000,
- 2.951807e+000,
- -6.683851e+000,
- 2.744271e+001,
- 1.985706e+001,
- 2.229452e+001,
- // albedo 1, turbidity 7
- 1.245827e+000,
- 3.182923e+000,
- -7.822960e+000,
- 2.791395e+001,
- 2.327254e+001,
- 2.315910e+001,
- // albedo 1, turbidity 8
- 1.132305e+000,
- 3.202593e+000,
- -8.008429e+000,
- 2.521093e+001,
- 3.000014e+001,
- 2.405306e+001,
- // albedo 1, turbidity 9
- 1.020330e+000,
- 2.820556e+000,
- -6.238704e+000,
- 1.709276e+001,
- 4.077916e+001,
- 2.509949e+001,
- // albedo 1, turbidity 10
- 9.031570e-001,
- 1.863917e+000,
- -1.955738e+000,
- 3.032665e+000,
- 5.434290e+001,
- 2.641780e+001,
-};
-
-static const double datasetXYZ3[] = {
- // albedo 0, turbidity 1
- -1.310023e+000,
- -4.407658e-001,
- -3.640340e+001,
- 3.683292e+001,
- -8.124762e-003,
- 5.297961e-001,
- 1.188633e-002,
- 3.138320e+000,
- 5.134778e-001,
- -1.424100e+000,
- -5.501606e-001,
- -1.753510e+001,
- 1.822769e+001,
- -1.539272e-002,
- 6.366826e-001,
- 2.661996e-003,
- 2.659915e+000,
- 4.071138e-001,
- -1.103436e+000,
- -1.884105e-001,
- 6.425322e+000,
- -6.910579e+000,
- -2.019861e-002,
- 3.553271e-001,
- -1.589061e-002,
- 5.345985e+000,
- 8.790218e-001,
- -1.186200e+000,
- -4.307514e-001,
- -3.957947e+000,
- 5.979352e+000,
- -5.348869e-002,
- 1.736117e+000,
- 3.491346e-002,
- -2.692261e+000,
- 5.610506e-001,
- -1.006038e+000,
- -1.305995e-001,
- 4.473513e+000,
- -3.806719e+000,
- 1.419407e-001,
- -2.148238e-002,
- -5.081185e-002,
- 3.735362e+000,
- 5.358280e-001,
- -1.078507e+000,
- -1.633754e-001,
- -3.812368e+000,
- 4.381700e+000,
- 2.988122e-002,
- 1.754224e+000,
- 1.472376e-001,
- 3.722798e+000,
- 4.999157e-001,
- // albedo 0, turbidity 2
- -1.333582e+000,
- -4.649908e-001,
- -3.359528e+001,
- 3.404375e+001,
- -9.384242e-003,
- 5.587511e-001,
- 5.726310e-003,
- 3.073145e+000,
- 5.425529e-001,
- -1.562624e+000,
- -7.107068e-001,
- -1.478170e+001,
- 1.559839e+001,
- -1.462375e-002,
- 5.050133e-001,
- 2.516017e-002,
- 1.604696e+000,
- 2.902403e-001,
- -8.930158e-001,
- 4.068077e-002,
- 1.373481e+000,
- -2.342752e+000,
- -2.098058e-002,
- 6.248686e-001,
- -5.258363e-002,
- 7.058214e+000,
- 1.150373e+000,
- -1.262823e+000,
- -4.818353e-001,
- 8.892610e-004,
- 1.923120e+000,
- -4.979718e-002,
- 1.040693e+000,
- 1.558103e-001,
- -2.852480e+000,
- 2.420691e-001,
- -9.968383e-001,
- -1.200648e-001,
- 1.324342e+000,
- -9.430889e-001,
- 1.931098e-001,
- 4.436916e-001,
- -7.320456e-002,
- 4.215931e+000,
- 7.898019e-001,
- -1.078185e+000,
- -1.718192e-001,
- -1.720191e+000,
- 2.358918e+000,
- 2.765637e-002,
- 1.260245e+000,
- 2.021941e-001,
- 3.395483e+000,
- 5.173628e-001,
- // albedo 0, turbidity 3
- -1.353023e+000,
- -4.813523e-001,
- -3.104920e+001,
- 3.140156e+001,
- -9.510741e-003,
- 5.542030e-001,
- 8.135471e-003,
- 3.136646e+000,
- 5.215989e-001,
- -1.624704e+000,
- -7.990201e-001,
- -2.167125e+001,
- 2.246341e+001,
- -1.163533e-002,
- 5.415746e-001,
- 2.618378e-002,
- 1.139214e+000,
- 3.444357e-001,
- -7.983610e-001,
- 1.417476e-001,
- 9.914841e+000,
- -1.081503e+001,
- -1.218845e-002,
- 3.411392e-001,
- -6.137698e-002,
- 7.445848e+000,
- 1.180080e+000,
- -1.266679e+000,
- -4.288977e-001,
- -5.818701e+000,
- 6.986437e+000,
- -8.180711e-002,
- 1.397403e+000,
- 2.016916e-001,
- -1.275731e+000,
- 2.592773e-001,
- -1.009707e+000,
- -1.537754e-001,
- 3.496378e+000,
- -3.013726e+000,
- 2.421150e-001,
- -2.831925e-001,
- 3.003395e-002,
- 3.702862e+000,
- 7.746320e-001,
- -1.075646e+000,
- -1.768747e-001,
- -1.347762e+000,
- 1.989004e+000,
- 1.375836e-002,
- 1.764810e+000,
- 1.330018e-001,
- 3.230864e+000,
- 6.626210e-001,
- // albedo 0, turbidity 4
- -1.375269e+000,
- -5.103569e-001,
- -3.442661e+001,
- 3.478703e+001,
- -8.460009e-003,
- 5.408643e-001,
- 4.813323e-003,
- 3.016078e+000,
- 5.062069e-001,
- -1.821679e+000,
- -9.766461e-001,
- -1.926488e+001,
- 1.997912e+001,
- -9.822567e-003,
- 3.649556e-001,
- 4.316092e-002,
- 8.930190e-001,
- 4.166527e-001,
- -6.633542e-001,
- 1.997841e-001,
- 2.395592e+000,
- -3.117175e+000,
- -1.080884e-002,
- 8.983814e-001,
- -1.375825e-001,
- 6.673463e+000,
- 1.115663e+000,
- -1.303240e+000,
- -3.612712e-001,
- 8.292959e-002,
- 3.381364e-001,
- -6.078648e-002,
- 3.229247e-001,
- 3.680987e-001,
- 7.046755e-001,
- 3.144924e-001,
- -9.952598e-001,
- -2.039076e-001,
- 4.026851e-001,
- 2.686684e-001,
- 1.640712e-001,
- 5.186341e-001,
- -1.205520e-002,
- 2.659613e+000,
- 8.030394e-001,
- -1.098579e+000,
- -2.151992e-001,
- 6.558198e-001,
- -7.436900e-004,
- -1.421817e-003,
- 1.073701e+000,
- 1.886875e-001,
- 2.536857e+000,
- 6.673923e-001,
- // albedo 0, turbidity 5
- -1.457986e+000,
- -5.906842e-001,
- -3.812464e+001,
- 3.838539e+001,
- -6.024357e-003,
- 4.741484e-001,
- 1.209223e-002,
- 2.818432e+000,
- 5.012433e-001,
- -1.835728e+000,
- -1.003405e+000,
- -6.848129e+000,
- 7.601943e+000,
- -1.277375e-002,
- 4.785598e-001,
- 3.366853e-002,
- 1.097701e+000,
- 4.636635e-001,
- -8.491348e-001,
- 9.466365e-003,
- -2.685226e+000,
- 2.004060e+000,
- -1.168708e-002,
- 6.752316e-001,
- -1.543371e-001,
- 5.674759e+000,
- 1.039534e+000,
- -1.083379e+000,
- -1.506790e-001,
- 7.328236e-001,
- -5.095568e-001,
- -8.609153e-002,
- 4.448820e-001,
- 4.174662e-001,
- 1.481556e+000,
- 3.942551e-001,
- -1.117089e+000,
- -3.337605e-001,
- 2.502281e-001,
- 4.036323e-001,
- 2.673899e-001,
- 2.829817e-001,
- 2.242450e-002,
- 2.043207e+000,
- 7.706902e-001,
- -1.071648e+000,
- -2.126200e-001,
- 6.069466e-001,
- -1.456290e-003,
- -5.515960e-001,
- 1.046755e+000,
- 1.985021e-001,
- 2.290245e+000,
- 6.876058e-001,
- // albedo 0, turbidity 6
- -1.483903e+000,
- -6.309647e-001,
- -4.380213e+001,
- 4.410537e+001,
- -5.712161e-003,
- 5.195992e-001,
- 2.028428e-003,
- 2.687114e+000,
- 5.098321e-001,
- -2.053976e+000,
- -1.141473e+000,
- 5.109183e-001,
- 8.060391e-002,
- -1.033983e-002,
- 4.066532e-001,
- 4.869627e-002,
- 1.161722e+000,
- 4.039525e-001,
- -6.348185e-001,
- 7.651292e-002,
- -1.031327e+001,
- 1.007598e+001,
- -2.083688e-002,
- 7.359516e-001,
- -2.029459e-001,
- 5.013257e+000,
- 1.077649e+000,
- -1.228630e+000,
- -1.650496e-001,
- 4.077157e-002,
- -7.189167e-001,
- -5.092220e-002,
- 2.959814e-001,
- 5.111496e-001,
- 2.540433e+000,
- 3.615330e-001,
- -1.041883e+000,
- -3.278413e-001,
- -6.691911e-002,
- 1.307364e+000,
- 2.166663e-001,
- 3.000595e-001,
- -3.157136e-003,
- 1.389208e+000,
- 7.999026e-001,
- -1.103556e+000,
- -2.443602e-001,
- 4.705347e-001,
- -9.296482e-004,
- -5.309920e-001,
- 9.654511e-001,
- 2.142587e-001,
- 2.244723e+000,
- 6.839976e-001,
- // albedo 0, turbidity 7
- -1.555684e+000,
- -6.962113e-001,
- -4.647983e+001,
- 4.674270e+001,
- -5.034895e-003,
- 4.755090e-001,
- -9.502561e-007,
- 2.626569e+000,
- 5.056194e-001,
- -1.998288e+000,
- -1.124720e+000,
- -1.629586e+000,
- 2.187993e+000,
- -8.284384e-003,
- 3.845258e-001,
- 5.726240e-002,
- 1.185644e+000,
- 4.255812e-001,
- -1.032570e+000,
- -2.513850e-001,
- -3.721112e+000,
- 3.506967e+000,
- -2.186561e-002,
- 9.436049e-001,
- -2.451412e-001,
- 4.725724e+000,
- 1.039256e+000,
- -8.597532e-001,
- 9.073332e-002,
- -2.553741e+000,
- 1.993237e+000,
- -4.390891e-002,
- -2.046928e-001,
- 5.515623e-001,
- 1.909127e+000,
- 3.948212e-001,
- -1.210482e+000,
- -4.477622e-001,
- -2.267805e-001,
- 1.219488e+000,
- 1.336186e-001,
- 6.866897e-001,
- 2.808997e-002,
- 1.600403e+000,
- 7.816409e-001,
- -1.078168e+000,
- -2.699261e-001,
- 2.537282e-001,
- 3.820684e-001,
- -4.425103e-001,
- 5.298235e-001,
- 2.185217e-001,
- 1.728679e+000,
- 6.882743e-001,
- // albedo 0, turbidity 8
- -1.697968e+000,
- -8.391488e-001,
- -5.790105e+001,
- 5.814120e+001,
- -3.404760e-003,
- 4.265140e-001,
- -1.796301e-006,
- 2.368442e+000,
- 5.324429e-001,
- -2.141552e+000,
- -1.172230e+000,
- 1.677872e+001,
- -1.641470e+001,
- -5.732425e-003,
- 2.002199e-001,
- 6.841834e-002,
- 1.485338e+000,
- 3.215763e-001,
- -1.442946e+000,
- -7.264245e-001,
- -9.503706e+000,
- 9.650462e+000,
- -2.120995e-002,
- 1.419263e+000,
- -2.893098e-001,
- 3.860731e+000,
- 1.120857e+000,
- -5.696752e-001,
- 3.411279e-001,
- -2.931035e-001,
- -6.512552e-001,
- -1.068437e-001,
- -1.085661e+000,
- 6.107549e-001,
- 1.459503e+000,
- 3.210336e-001,
- -1.313839e+000,
- -5.921371e-001,
- -2.332222e-001,
- 1.648196e+000,
- 2.492787e-001,
- 1.381033e+000,
- -1.993392e-002,
- 9.812560e-001,
- 8.316329e-001,
- -1.087464e+000,
- -3.195534e-001,
- 2.902095e-001,
- 3.383709e-001,
- -8.798482e-001,
- 1.494668e-002,
- 2.529703e-001,
- 1.452644e+000,
- 6.693870e-001,
- // albedo 0, turbidity 9
- -2.068582e+000,
- -1.118605e+000,
- -5.081598e+001,
- 5.097486e+001,
- -3.280669e-003,
- 4.067371e-001,
- -2.544951e-006,
- 2.179497e+000,
- 5.778017e-001,
- -1.744693e+000,
- -8.537207e-001,
- 2.234361e+001,
- -2.208318e+001,
- -5.932616e-003,
- 1.035049e-001,
- 5.742772e-002,
- 1.977880e+000,
- 2.124846e-001,
- -3.287515e+000,
- -2.140268e+000,
- -1.249566e+001,
- 1.240091e+001,
- -2.409349e-002,
- 1.397821e+000,
- -2.371627e-001,
- 2.771192e+000,
- 1.170496e+000,
- 5.502311e-001,
- 1.046630e+000,
- 2.193517e+000,
- -2.220400e+000,
- -1.064394e-001,
- -1.017926e+000,
- 4.795457e-001,
- 1.030644e+000,
- 3.177516e-001,
- -1.719734e+000,
- -9.536198e-001,
- -6.586821e-001,
- 1.386361e+000,
- -2.513065e-002,
- 1.187011e+000,
- 6.542539e-002,
- 5.296055e-001,
- 8.082660e-001,
- -1.005700e+000,
- -3.028096e-001,
- 4.470957e-002,
- 1.007760e+000,
- -8.119016e-001,
- 3.153338e-002,
- 2.311321e-001,
- 1.182208e+000,
- 6.824758e-001,
- // albedo 0, turbidity 10
- -2.728867e+000,
- -1.580388e+000,
- -3.079627e+001,
- 3.092586e+001,
- -4.197673e-003,
- 3.154759e-001,
- -3.897675e-006,
- 1.920567e+000,
- 6.664791e-001,
- -1.322495e+000,
- -7.249275e-001,
- 1.477660e+001,
- -1.468154e+001,
- -9.044857e-003,
- 5.624314e-002,
- 6.498392e-002,
- 2.047389e+000,
- 6.367540e-002,
- -6.102376e+000,
- -3.473018e+000,
- -9.926071e+000,
- 9.637797e+000,
- -1.097909e-002,
- 1.103498e+000,
- -2.424521e-001,
- 2.520748e+000,
- 1.240260e+000,
- 1.351796e+000,
- 1.018588e+000,
- 2.009081e+000,
- -1.333394e+000,
- -1.979125e-001,
- -3.318292e-001,
- 4.476624e-001,
- 9.095235e-001,
- 2.955611e-001,
- -1.774467e+000,
- -1.079880e+000,
- -8.084680e-002,
- 2.577697e-001,
- -1.149295e-001,
- 4.975303e-001,
- 2.931611e-003,
- -3.803171e-001,
- 8.002794e-001,
- -9.898401e-001,
- -2.542513e-001,
- -7.530911e-002,
- 1.870355e+000,
- -1.521918e+000,
- 2.405164e-001,
- 2.964615e-001,
- 1.334800e+000,
- 6.789053e-001,
- // albedo 1, turbidity 1
- -1.279730e+000,
- -4.290674e-001,
- -4.277972e+001,
- 4.343305e+001,
- -6.541826e-003,
- 4.945086e-001,
- 1.425338e-002,
- 2.685244e+000,
- 5.011313e-001,
- -1.449506e+000,
- -5.766374e-001,
- -1.688496e+001,
- 1.781118e+001,
- -1.121649e-002,
- 3.545020e-001,
- 2.287338e-002,
- 1.904281e+000,
- 4.936998e-001,
- -1.021980e+000,
- -1.897574e-001,
- 2.482462e+000,
- -2.941725e+000,
- -1.570448e-002,
- 7.532578e-001,
- -4.256800e-002,
- 5.239660e+000,
- 4.983116e-001,
- -1.162608e+000,
- -3.428049e-001,
- 3.974358e+000,
- -1.527935e+000,
- -3.919201e-002,
- 8.758593e-001,
- 7.291363e-002,
- -3.455257e+000,
- 8.007426e-001,
- -9.929985e-001,
- -8.712006e-002,
- -7.397313e-001,
- 1.348372e+000,
- 9.511685e-002,
- 3.233584e-001,
- -7.549148e-002,
- 5.806452e+000,
- 4.990042e-001,
- -1.084996e+000,
- -1.739767e-001,
- 1.580475e-001,
- 9.088180e-001,
- 6.871433e-002,
- 5.933079e-001,
- 1.188921e-001,
- 3.074079e+000,
- 4.999327e-001,
- // albedo 1, turbidity 2
- -1.317009e+000,
- -4.661946e-001,
- -4.255347e+001,
- 4.312782e+001,
- -5.727235e-003,
- 4.285447e-001,
- 2.189854e-002,
- 2.608310e+000,
- 5.190700e-001,
- -1.469236e+000,
- -6.282139e-001,
- -1.241404e+001,
- 1.348765e+001,
- -1.204770e-002,
- 5.070285e-001,
- -7.280216e-004,
- 1.491533e+000,
- 3.635064e-001,
- -9.713808e-001,
- -8.138038e-002,
- 3.709854e-001,
- -1.041174e+000,
- -1.814075e-002,
- 5.060860e-001,
- -2.053756e-002,
- 6.161431e+000,
- 1.093736e+000,
- -1.159057e+000,
- -3.698074e-001,
- 2.711209e+000,
- -6.006479e-001,
- -4.896926e-002,
- 9.273957e-001,
- 1.137712e-001,
- -3.496828e+000,
- 2.867109e-001,
- -1.011601e+000,
- -8.201890e-002,
- 2.105725e-001,
- 4.597520e-001,
- 1.478925e-001,
- 2.138940e-001,
- -5.660670e-002,
- 6.057755e+000,
- 7.859121e-001,
- -1.078020e+000,
- -1.811580e-001,
- 1.646622e-001,
- 8.348426e-001,
- 1.149064e-001,
- 4.985738e-001,
- 1.376605e-001,
- 2.746607e+000,
- 4.999626e-001,
- // albedo 1, turbidity 3
- -1.325672e+000,
- -4.769313e-001,
- -4.111215e+001,
- 4.168293e+001,
- -6.274997e-003,
- 4.649469e-001,
- 1.119411e-002,
- 2.631267e+000,
- 5.234546e-001,
- -1.619391e+000,
- -8.000253e-001,
- -1.534098e+001,
- 1.632706e+001,
- -1.012023e-002,
- 4.242255e-001,
- 2.931597e-002,
- 8.925807e-001,
- 3.314765e-001,
- -7.356979e-001,
- 1.368406e-001,
- 2.972579e+000,
- -3.535359e+000,
- -1.318948e-002,
- 4.607620e-001,
- -7.182778e-002,
- 6.254100e+000,
- 1.236299e+000,
- -1.316217e+000,
- -4.194427e-001,
- 3.489902e-002,
- 1.289849e+000,
- -4.755960e-002,
- 1.138222e+000,
- 1.975992e-001,
- -8.991542e-001,
- 2.290572e-001,
- -9.502188e-001,
- -1.172703e-001,
- 1.405202e+000,
- -3.061919e-001,
- 1.058772e-001,
- -3.760592e-001,
- -1.983179e-002,
- 3.562353e+000,
- 7.895959e-001,
- -1.100117e+000,
- -1.900567e-001,
- 4.925030e-001,
- 5.250225e-001,
- 1.576804e-001,
- 1.042701e+000,
- 7.330743e-002,
- 2.796064e+000,
- 6.749783e-001,
- // albedo 1, turbidity 4
- -1.354183e+000,
- -5.130625e-001,
- -4.219268e+001,
- 4.271772e+001,
- -5.365373e-003,
- 4.136743e-001,
- 1.235172e-002,
- 2.520122e+000,
- 5.187269e-001,
- -1.741434e+000,
- -9.589761e-001,
- -8.230339e+000,
- 9.296799e+000,
- -9.600162e-003,
- 4.994969e-001,
- 2.955452e-002,
- 3.667099e-001,
- 3.526999e-001,
- -6.917347e-001,
- 2.154887e-001,
- -8.760264e-001,
- 2.334121e-001,
- -1.909621e-002,
- 4.748033e-001,
- -1.138514e-001,
- 6.515360e+000,
- 1.225097e+000,
- -1.293189e+000,
- -4.218700e-001,
- 1.620952e+000,
- -7.858597e-001,
- -3.769410e-002,
- 6.636786e-001,
- 3.364945e-001,
- -5.341017e-001,
- 2.128347e-001,
- -9.735521e-001,
- -1.325495e-001,
- 1.007517e+000,
- 2.598258e-001,
- 6.762169e-002,
- 1.421018e-003,
- -6.915987e-002,
- 3.185897e+000,
- 8.641956e-001,
- -1.094800e+000,
- -1.962062e-001,
- 5.755591e-001,
- 2.906259e-001,
- 2.625748e-001,
- 7.644049e-001,
- 1.347492e-001,
- 2.677126e+000,
- 6.465460e-001,
- // albedo 1, turbidity 5
- -1.393063e+000,
- -5.578338e-001,
- -4.185249e+001,
- 4.233504e+001,
- -5.435640e-003,
- 4.743765e-001,
- 7.422477e-003,
- 2.442801e+000,
- 5.211707e-001,
- -1.939487e+000,
- -1.128509e+000,
- -8.974257e+000,
- 9.978383e+000,
- -7.965597e-003,
- 2.948830e-001,
- 4.436763e-002,
- 2.839868e-001,
- 3.440424e-001,
- -6.011562e-001,
- 2.354877e-001,
- -3.079820e+000,
- 2.585094e+000,
- -2.002701e-002,
- 7.793909e-001,
- -1.598414e-001,
- 5.834678e+000,
- 1.202856e+000,
- -1.315676e+000,
- -3.903446e-001,
- 1.701900e+000,
- -1.304609e+000,
- -1.045121e-002,
- 2.747707e-001,
- 4.143967e-001,
- 3.197102e-001,
- 2.637580e-001,
- -9.618628e-001,
- -1.625841e-001,
- 1.187138e+000,
- 1.497802e-001,
- -5.590954e-006,
- 3.178475e-002,
- -4.153145e-002,
- 2.496096e+000,
- 8.195082e-001,
- -1.111554e+000,
- -2.365546e-001,
- 7.831875e-001,
- 2.018684e-001,
- 2.074369e-001,
- 7.395978e-001,
- 1.225730e-001,
- 1.876478e+000,
- 6.821167e-001,
- // albedo 1, turbidity 6
- -1.427879e+000,
- -5.994879e-001,
- -3.531016e+001,
- 3.581581e+001,
- -6.431497e-003,
- 4.554192e-001,
- 7.348731e-004,
- 2.334619e+000,
- 5.233377e-001,
- -1.998177e+000,
- -1.206633e+000,
- -2.146510e+001,
- 2.242237e+001,
- -5.857596e-003,
- 2.755663e-001,
- 6.384795e-002,
- 1.358244e-001,
- 3.328437e-001,
- -6.440630e-001,
- 2.058571e-001,
- 2.155499e+000,
- -2.587968e+000,
- -1.840023e-002,
- 8.826555e-001,
- -2.222452e-001,
- 5.847073e+000,
- 1.228387e+000,
- -1.229071e+000,
- -3.360441e-001,
- -3.429599e-001,
- 6.179469e-001,
- 2.029610e-003,
- 8.899319e-002,
- 5.041624e-001,
- 1.882964e-001,
- 2.252040e-001,
- -1.022905e+000,
- -2.101621e-001,
- 1.915689e+000,
- -6.498794e-001,
- -3.463651e-002,
- 8.954605e-002,
- -6.797854e-002,
- 2.417705e+000,
- 8.568618e-001,
- -1.082538e+000,
- -2.007723e-001,
- 4.731009e-001,
- 4.077267e-001,
- 1.324289e-001,
- 6.514880e-001,
- 1.702912e-001,
- 2.309383e+000,
- 6.600895e-001,
- // albedo 1, turbidity 7
- -1.472139e+000,
- -6.499815e-001,
- -3.428465e+001,
- 3.469659e+001,
- -5.747023e-003,
- 4.174167e-001,
- 1.688597e-003,
- 2.323046e+000,
- 5.395191e-001,
- -2.161176e+000,
- -1.353089e+000,
- -2.226827e+001,
- 2.329138e+001,
- -5.583808e-003,
- 2.364793e-001,
- 6.096656e-002,
- 1.944666e-003,
- 2.861624e-001,
- -6.593044e-001,
- 1.393558e-001,
- 4.698373e+000,
- -5.193883e+000,
- -1.998390e-002,
- 1.095635e+000,
- -2.391254e-001,
- 5.598103e+000,
- 1.236193e+000,
- -1.195717e+000,
- -2.972715e-001,
- 4.648953e-002,
- 3.024588e-001,
- 5.003313e-003,
- -3.754741e-001,
- 5.247265e-001,
- -1.381312e-001,
- 2.493896e-001,
- -1.020139e+000,
- -2.253524e-001,
- 3.548437e-001,
- 7.030485e-001,
- -2.107076e-002,
- 4.581395e-001,
- -3.243757e-002,
- 2.453259e+000,
- 8.323623e-001,
- -1.098770e+000,
- -2.435780e-001,
- 8.761614e-001,
- 1.941613e-001,
- -1.990692e-001,
- 3.761139e-001,
- 1.657412e-001,
- 1.590503e+000,
- 6.741417e-001,
- // albedo 1, turbidity 8
- -1.648007e+000,
- -8.205121e-001,
- -4.435106e+001,
- 4.479801e+001,
- -4.181353e-003,
- 3.854830e-001,
- -1.842385e-006,
- 2.000281e+000,
- 5.518363e-001,
- -2.140986e+000,
- -1.282239e+000,
- -3.979213e+000,
- 4.672459e+000,
- -5.008582e-003,
- 2.421920e-001,
- 6.253602e-002,
- 6.612713e-001,
- 2.555851e-001,
- -1.300502e+000,
- -5.137898e-001,
- 5.179821e-001,
- -4.032341e-001,
- -2.066785e-002,
- 1.087929e+000,
- -2.615309e-001,
- 4.225887e+000,
- 1.229237e+000,
- -6.963340e-001,
- 9.241060e-002,
- 6.936356e-002,
- -3.588571e-001,
- -5.461843e-002,
- -5.616643e-001,
- 5.484166e-001,
- -4.776267e-002,
- 2.414935e-001,
- -1.233179e+000,
- -4.325498e-001,
- 6.479813e-001,
- 8.368356e-001,
- 2.458875e-001,
- 6.464752e-001,
- -2.897097e-002,
- 1.561773e+000,
- 8.518598e-001,
- -1.051023e+000,
- -2.533690e-001,
- 1.004294e+000,
- 3.028083e-001,
- -1.520108e+000,
- 1.607013e-001,
- 1.619975e-001,
- 1.131094e+000,
- 6.706655e-001,
- // albedo 1, turbidity 9
- -1.948249e+000,
- -1.097383e+000,
- -4.453697e+001,
- 4.494902e+001,
- -3.579939e-003,
- 3.491605e-001,
- -2.500253e-006,
- 1.740442e+000,
- 6.188022e-001,
- -2.154253e+000,
- -1.209559e+000,
- 4.144894e+000,
- -3.562411e+000,
- -5.638843e-003,
- 1.067169e-001,
- 7.594858e-002,
- 1.005280e+000,
- 1.072543e-001,
- -2.513259e+000,
- -1.507208e+000,
- -1.602979e+000,
- 1.404154e+000,
- -5.560750e-003,
- 1.240490e+000,
- -2.852117e-001,
- 3.485252e+000,
- 1.349321e+000,
- -7.832214e-002,
- 3.655626e-001,
- 3.856288e-001,
- 6.867894e-001,
- -1.609523e-001,
- -6.704306e-001,
- 5.357301e-001,
- -6.457935e-001,
- 1.479503e-001,
- -1.354784e+000,
- -5.454375e-001,
- 8.797469e-001,
- -1.466514e+000,
- 7.134420e-001,
- 5.934903e-001,
- -2.911178e-002,
- 8.643737e-001,
- 9.030724e-001,
- -1.048324e+000,
- -2.738736e-001,
- 8.783074e-001,
- 3.246188e+000,
- -4.435369e+000,
- 1.251791e-001,
- 1.783486e-001,
- 1.064657e+000,
- 6.522878e-001,
- // albedo 1, turbidity 10
- -2.770408e+000,
- -1.618911e+000,
- -2.504031e+001,
- 2.531674e+001,
- -4.239279e-003,
- 3.241013e-001,
- -3.764484e-006,
- 1.586843e+000,
- 7.035906e-001,
- -1.913500e+000,
- -1.144014e+000,
- -1.080587e+001,
- 1.153677e+001,
- -1.003197e-002,
- 1.577515e-001,
- 5.217789e-002,
- 1.225278e+000,
- 5.172771e-003,
- -5.293208e+000,
- -2.876463e+000,
- 2.087053e+000,
- -3.201552e+000,
- 3.892964e-003,
- 5.323930e-001,
- -2.034512e-001,
- 2.617760e+000,
- 1.273597e+000,
- 9.060340e-001,
- 3.773409e-001,
- -6.399945e-001,
- 3.213979e+000,
- -9.112172e-002,
- 6.494055e-001,
- 3.953280e-001,
- 5.047796e-001,
- 2.998695e-001,
- -1.482179e+000,
- -6.778310e-001,
- 1.161775e+000,
- -3.004872e+000,
- 4.774797e-001,
- -4.969248e-001,
- -3.512074e-003,
- -1.307190e+000,
- 7.927378e-001,
- -9.863181e-001,
- -1.803364e-001,
- 5.810824e-001,
- 4.580570e+000,
- -3.863454e+000,
- 5.328174e-001,
- 2.272821e-001,
- 1.771114e+000,
- 6.791814e-001,
-};
-
-static const double datasetXYZRad3[] = {
- // albedo 0, turbidity 1
- 1.168084e+000,
- 2.156455e+000,
- -3.980314e+000,
- 1.989302e+001,
- 1.328335e+001,
- 1.435621e+001,
- // albedo 0, turbidity 2
- 1.135488e+000,
- 2.294701e+000,
- -4.585886e+000,
- 2.090208e+001,
- 1.347840e+001,
- 1.467658e+001,
- // albedo 0, turbidity 3
- 1.107408e+000,
- 2.382765e+000,
- -5.112357e+000,
- 2.147823e+001,
- 1.493128e+001,
- 1.460882e+001,
- // albedo 0, turbidity 4
- 1.054193e+000,
- 2.592891e+000,
- -6.115000e+000,
- 2.268967e+001,
- 1.635672e+001,
- 1.518999e+001,
- // albedo 0, turbidity 5
- 1.006946e+000,
- 2.705420e+000,
- -6.698930e+000,
- 2.291830e+001,
- 1.834324e+001,
- 1.570651e+001,
- // albedo 0, turbidity 6
- 9.794044e-001,
- 2.742440e+000,
- -6.805283e+000,
- 2.225271e+001,
- 2.050797e+001,
- 1.563130e+001,
- // albedo 0, turbidity 7
- 9.413577e-001,
- 2.722009e+000,
- -6.760707e+000,
- 2.098242e+001,
- 2.342588e+001,
- 1.605011e+001,
- // albedo 0, turbidity 8
- 8.917923e-001,
- 2.592780e+000,
- -6.152635e+000,
- 1.774141e+001,
- 2.858324e+001,
- 1.657910e+001,
- // albedo 0, turbidity 9
- 8.288391e-001,
- 2.153434e+000,
- -4.118327e+000,
- 1.078118e+001,
- 3.681710e+001,
- 1.738139e+001,
- // albedo 0, turbidity 10
- 7.623528e-001,
- 1.418187e+000,
- -8.845235e-001,
- 7.590129e-001,
- 4.629859e+001,
- 1.921657e+001,
- // albedo 1, turbidity 1
- 1.352858e+000,
- 2.048862e+000,
- -2.053393e+000,
- 1.405874e+001,
- 3.045344e+001,
- 3.044430e+001,
- // albedo 1, turbidity 2
- 1.330497e+000,
- 2.126497e+000,
- -2.466296e+000,
- 1.467559e+001,
- 3.090738e+001,
- 3.069707e+001,
- // albedo 1, turbidity 3
- 1.286344e+000,
- 2.200436e+000,
- -2.877228e+000,
- 1.492701e+001,
- 3.236288e+001,
- 3.077223e+001,
- // albedo 1, turbidity 4
- 1.234428e+000,
- 2.289628e+000,
- -3.404699e+000,
- 1.499436e+001,
- 3.468390e+001,
- 3.084842e+001,
- // albedo 1, turbidity 5
- 1.178660e+000,
- 2.306071e+000,
- -3.549159e+000,
- 1.411006e+001,
- 3.754188e+001,
- 3.079730e+001,
- // albedo 1, turbidity 6
- 1.151366e+000,
- 2.333005e+000,
- -3.728627e+000,
- 1.363374e+001,
- 3.905894e+001,
- 3.092599e+001,
- // albedo 1, turbidity 7
- 1.101593e+000,
- 2.299422e+000,
- -3.565787e+000,
- 1.196745e+001,
- 4.188472e+001,
- 3.102755e+001,
- // albedo 1, turbidity 8
- 1.038322e+000,
- 2.083539e+000,
- -2.649585e+000,
- 8.037389e+000,
- 4.700869e+001,
- 3.065948e+001,
- // albedo 1, turbidity 9
- 9.596146e-001,
- 1.671470e+000,
- -8.751538e-001,
- 1.679772e+000,
- 5.345784e+001,
- 3.054520e+001,
- // albedo 1, turbidity 10
- 8.640731e-001,
- 9.858301e-001,
- 1.854956e+000,
- -6.798097e+000,
- 5.936468e+001,
- 3.110255e+001,
-};
-
-static const double *datasetsXYZ[] = {datasetXYZ1, datasetXYZ2, datasetXYZ3};
-
-static const double *datasetsXYZRad[] = {datasetXYZRad1, datasetXYZRad2, datasetXYZRad3};
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_ssef.h b/intern/cycles/util/util_ssef.h
index e6610dbb197..e9f0efb4efb 100644
--- a/intern/cycles/util/util_ssef.h
+++ b/intern/cycles/util/util_ssef.h
@@ -18,6 +18,8 @@
#ifndef __UTIL_SSEF_H__
#define __UTIL_SSEF_H__
+#include "util_ssei.h"
+
CCL_NAMESPACE_BEGIN
#ifdef __KERNEL_SSE2__
@@ -523,13 +525,29 @@ __forceinline ssei truncatei(const ssef &a)
return _mm_cvttps_epi32(a.m128);
}
+/* This is about 25% faster than straightforward floor to integer conversion
+ * due to better pipelining.
+ *
+ * Unsaturated add 0xffffffff (a < 0) is the same as subtract -1.
+ */
__forceinline ssei floori(const ssef &a)
{
-# if defined(__KERNEL_SSE41__)
- return ssei(floor(a));
-# else
- return ssei(a - ssef(0.5f));
-# endif
+ return truncatei(a) + cast((a < 0.0f).m128);
+}
+
+__forceinline ssef floorfrac(const ssef &x, ssei *i)
+{
+ *i = floori(x);
+ return x - ssef(*i);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Common Functions
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline ssef mix(const ssef &a, const ssef &b, const ssef &t)
+{
+ return madd(t, b, (ssef(1.0f) - t) * a);
}
////////////////////////////////////////////////////////////////////////////////
diff --git a/intern/cycles/util/util_ssei.h b/intern/cycles/util/util_ssei.h
index bfab2ee00f9..a4db9193206 100644
--- a/intern/cycles/util/util_ssei.h
+++ b/intern/cycles/util/util_ssei.h
@@ -310,6 +310,15 @@ __forceinline ssei &operator|=(ssei &a, const int32_t &b)
return a = a | b;
}
+__forceinline ssei &operator^=(ssei &a, const ssei &b)
+{
+ return a = a ^ b;
+}
+__forceinline ssei &operator^=(ssei &a, const int32_t &b)
+{
+ return a = a ^ b;
+}
+
__forceinline ssei &operator<<=(ssei &a, const int32_t &b)
{
return a = a << b;
diff --git a/intern/cycles/util/util_static_assert.h b/intern/cycles/util/util_static_assert.h
index ceb52830319..d809f2e06d7 100644
--- a/intern/cycles/util/util_static_assert.h
+++ b/intern/cycles/util/util_static_assert.h
@@ -14,34 +14,20 @@
* limitations under the License.
*/
+/* clang-format off */
+
+/* #define static_assert triggers a bug in some clang-format versions, disable
+ * format for entire file to keep results consistent. */
+
#ifndef __UTIL_STATIC_ASSERT_H__
#define __UTIL_STATIC_ASSERT_H__
CCL_NAMESPACE_BEGIN
-/* TODO(sergey): In theory CUDA might work with own static assert
- * implementation since it's just pure C++.
- */
-#ifdef __KERNEL_GPU__
-# ifndef static_assert
-# define static_assert(statement, message)
-# endif
-#endif /* __KERNEL_GPU__ */
-
-/* TODO(sergey): For until C++11 is a bare minimum for us,
- * we do a bit of a trickery to show meaningful message so
- * it's more or less clear what's wrong when building without
- * C++11.
- *
- * The thing here is: our non-C++11 implementation doesn't
- * have a way to print any message after preprocessor
- * substitution so we rely on the message which is passed to
- * static_assert() since that's the only message visible when
- * compilation fails.
- *
- * After C++11 bump it should be possible to glue structure
- * name to the error message,
- */
+#if defined(__KERNEL_OPENCL__) || defined(CYCLES_CUBIN_CC)
+# define static_assert(statement, message)
+#endif /* __KERNEL_OPENCL__ */
+
#define static_assert_align(st, align) \
static_assert((sizeof(st) % (align) == 0), "Structure must be strictly aligned") // NOLINT
diff --git a/intern/cycles/util/util_string.h b/intern/cycles/util/util_string.h
index f71145741c9..ce2d4acdde4 100644
--- a/intern/cycles/util/util_string.h
+++ b/intern/cycles/util/util_string.h
@@ -17,9 +17,9 @@
#ifndef __UTIL_STRING_H__
#define __UTIL_STRING_H__
+#include <sstream>
#include <string.h>
#include <string>
-#include <sstream>
#include "util/util_vector.h"
diff --git a/intern/cycles/util/util_system.cpp b/intern/cycles/util/util_system.cpp
index 0cd991c6231..6d32153209a 100644
--- a/intern/cycles/util/util_system.cpp
+++ b/intern/cycles/util/util_system.cpp
@@ -17,8 +17,8 @@
#include "util/util_system.h"
#include "util/util_logging.h"
-#include "util/util_types.h"
#include "util/util_string.h"
+#include "util/util_types.h"
#include <numaapi.h>
@@ -35,8 +35,8 @@ OIIO_NAMESPACE_USING
# include <sys/sysctl.h>
# include <sys/types.h>
#else
-# include <unistd.h>
# include <sys/ioctl.h>
+# include <unistd.h>
#endif
CCL_NAMESPACE_BEGIN
@@ -357,7 +357,7 @@ size_t system_physical_ram()
MEMORYSTATUSEX ram;
ram.dwLength = sizeof(ram);
GlobalMemoryStatusEx(&ram);
- return ram.ullTotalPhys * 1024;
+ return ram.ullTotalPhys;
#elif defined(__APPLE__)
uint64_t ram = 0;
size_t len = sizeof(ram);
diff --git a/intern/cycles/util/util_task.cpp b/intern/cycles/util/util_task.cpp
index 4b11ce73ea9..4fb61392e92 100644
--- a/intern/cycles/util/util_task.cpp
+++ b/intern/cycles/util/util_task.cpp
@@ -14,106 +14,34 @@
* limitations under the License.
*/
+#include "util/util_task.h"
#include "util/util_foreach.h"
#include "util/util_logging.h"
#include "util/util_system.h"
-#include "util/util_task.h"
#include "util/util_time.h"
-//#define THREADING_DEBUG_ENABLED
-
-#ifdef THREADING_DEBUG_ENABLED
-# include <stdio.h>
-# define THREADING_DEBUG(...) \
- do { \
- printf(__VA_ARGS__); \
- fflush(stdout); \
- } while (0)
-#else
-# define THREADING_DEBUG(...)
-#endif
-
CCL_NAMESPACE_BEGIN
/* Task Pool */
-TaskPool::TaskPool()
+TaskPool::TaskPool() : start_time(time_dt()), num_tasks_handled(0)
{
- num_tasks_handled = 0;
- num = 0;
- do_cancel = false;
}
TaskPool::~TaskPool()
{
- stop();
+ cancel();
}
-void TaskPool::push(Task *task, bool front)
+void TaskPool::push(TaskRunFunction &&task)
{
- TaskScheduler::Entry entry;
-
- entry.task = task;
- entry.pool = this;
-
- TaskScheduler::push(entry, front);
-}
-
-void TaskPool::push(const TaskRunFunction &run, bool front)
-{
- push(new Task(run), front);
+ tbb_group.run(std::move(task));
+ num_tasks_handled++;
}
void TaskPool::wait_work(Summary *stats)
{
- thread_scoped_lock num_lock(num_mutex);
-
- while (num != 0) {
- num_lock.unlock();
-
- thread_scoped_lock queue_lock(TaskScheduler::queue_mutex);
-
- /* find task from this pool. if we get a task from another pool,
- * we can get into deadlock */
- TaskScheduler::Entry work_entry;
- bool found_entry = false;
- list<TaskScheduler::Entry>::iterator it;
-
- for (it = TaskScheduler::queue.begin(); it != TaskScheduler::queue.end(); it++) {
- TaskScheduler::Entry &entry = *it;
-
- if (entry.pool == this) {
- work_entry = entry;
- found_entry = true;
- TaskScheduler::queue.erase(it);
- break;
- }
- }
-
- queue_lock.unlock();
-
- /* if found task, do it, otherwise wait until other tasks are done */
- if (found_entry) {
- /* run task */
- work_entry.task->run(0);
-
- /* delete task */
- delete work_entry.task;
-
- /* notify pool task was done */
- num_decrease(1);
- }
-
- num_lock.lock();
- if (num == 0)
- break;
-
- if (!found_entry) {
- THREADING_DEBUG("num==%d, Waiting for condition in TaskPool::wait_work !found_entry\n", num);
- num_cond.wait(num_lock);
- THREADING_DEBUG("num==%d, condition wait done in TaskPool::wait_work !found_entry\n", num);
- }
- }
+ tbb_group.wait();
if (stats != NULL) {
stats->time_total = time_dt() - start_time;
@@ -123,193 +51,21 @@ void TaskPool::wait_work(Summary *stats)
void TaskPool::cancel()
{
- do_cancel = true;
-
- TaskScheduler::clear(this);
-
- {
- thread_scoped_lock num_lock(num_mutex);
-
- while (num) {
- THREADING_DEBUG("num==%d, Waiting for condition in TaskPool::cancel\n", num);
- num_cond.wait(num_lock);
- THREADING_DEBUG("num==%d condition wait done in TaskPool::cancel\n", num);
- }
- }
-
- do_cancel = false;
-}
-
-void TaskPool::stop()
-{
- TaskScheduler::clear(this);
-
- assert(num == 0);
+ tbb_group.cancel();
+ tbb_group.wait();
}
bool TaskPool::canceled()
{
- return do_cancel;
-}
-
-bool TaskPool::finished()
-{
- thread_scoped_lock num_lock(num_mutex);
- return num == 0;
-}
-
-void TaskPool::num_decrease(int done)
-{
- num_mutex.lock();
- num -= done;
-
- assert(num >= 0);
- if (num == 0) {
- THREADING_DEBUG("num==%d, notifying all in TaskPool::num_decrease\n", num);
- num_cond.notify_all();
- }
-
- num_mutex.unlock();
-}
-
-void TaskPool::num_increase()
-{
- thread_scoped_lock num_lock(num_mutex);
- if (num_tasks_handled == 0) {
- start_time = time_dt();
- }
- num++;
- num_tasks_handled++;
- THREADING_DEBUG("num==%d, notifying all in TaskPool::num_increase\n", num);
- num_cond.notify_all();
+ return tbb_group.is_canceling();
}
/* Task Scheduler */
thread_mutex TaskScheduler::mutex;
int TaskScheduler::users = 0;
-vector<thread *> TaskScheduler::threads;
-bool TaskScheduler::do_exit = false;
-
-list<TaskScheduler::Entry> TaskScheduler::queue;
-thread_mutex TaskScheduler::queue_mutex;
-thread_condition_variable TaskScheduler::queue_cond;
-
-namespace {
-
-/* Get number of processors on each of the available nodes. The result is sized
- * by the highest node index, and element corresponds to number of processors on
- * that node.
- * If node is not available, then the corresponding number of processors is
- * zero. */
-void get_per_node_num_processors(vector<int> *num_per_node_processors)
-{
- const int num_nodes = system_cpu_num_numa_nodes();
- if (num_nodes == 0) {
- LOG(ERROR) << "Zero available NUMA nodes, is not supposed to happen.";
- return;
- }
- num_per_node_processors->resize(num_nodes);
- for (int node = 0; node < num_nodes; ++node) {
- if (!system_cpu_is_numa_node_available(node)) {
- (*num_per_node_processors)[node] = 0;
- continue;
- }
- (*num_per_node_processors)[node] = system_cpu_num_numa_node_processors(node);
- }
-}
-
-/* Calculate total number of processors on all available nodes.
- * This is similar to system_cpu_thread_count(), but uses pre-calculated number
- * of processors on each of the node, avoiding extra system calls and checks for
- * the node availability. */
-int get_num_total_processors(const vector<int> &num_per_node_processors)
-{
- int num_total_processors = 0;
- foreach (int num_node_processors, num_per_node_processors) {
- num_total_processors += num_node_processors;
- }
- return num_total_processors;
-}
-
-/* Compute NUMA node for every thread to run on, for the best performance. */
-vector<int> distribute_threads_on_nodes(const int num_threads)
-{
- /* Start with all threads unassigned to any specific NUMA node. */
- vector<int> thread_nodes(num_threads, -1);
- const int num_active_group_processors = system_cpu_num_active_group_processors();
- VLOG(1) << "Detected " << num_active_group_processors << " processors "
- << "in active group.";
- if (num_active_group_processors >= num_threads) {
- /* If the current thread is set up in a way that its affinity allows to
- * use at least requested number of threads we do not explicitly set
- * affinity to the worker therads.
- * This way we allow users to manually edit affinity of the parent
- * thread, and here we follow that affinity. This way it's possible to
- * have two Cycles/Blender instances running manually set to a different
- * dies on a CPU. */
- VLOG(1) << "Not setting thread group affinity.";
- return thread_nodes;
- }
- vector<int> num_per_node_processors;
- get_per_node_num_processors(&num_per_node_processors);
- if (num_per_node_processors.size() == 0) {
- /* Error was already repported, here we can't do anything, so we simply
- * leave default affinity to all the worker threads. */
- return thread_nodes;
- }
- const int num_nodes = num_per_node_processors.size();
- int thread_index = 0;
- /* First pass: fill in all the nodes to their maximum.
- *
- * If there is less threads than the overall nodes capacity, some of the
- * nodes or parts of them will idle.
- *
- * TODO(sergey): Consider picking up fastest nodes if number of threads
- * fits on them. For example, on Threadripper2 we might consider using nodes
- * 0 and 2 if user requested 32 render threads. */
- const int num_total_node_processors = get_num_total_processors(num_per_node_processors);
- int current_node_index = 0;
- while (thread_index < num_total_node_processors && thread_index < num_threads) {
- const int num_node_processors = num_per_node_processors[current_node_index];
- for (int processor_index = 0; processor_index < num_node_processors; ++processor_index) {
- VLOG(1) << "Scheduling thread " << thread_index << " to node " << current_node_index << ".";
- thread_nodes[thread_index] = current_node_index;
- ++thread_index;
- if (thread_index == num_threads) {
- /* All threads are scheduled on their nodes. */
- return thread_nodes;
- }
- }
- ++current_node_index;
- }
- /* Second pass: keep scheduling threads to each node one by one, uniformly
- * fillign them in.
- * This is where things becomes tricky to predict for the maximum
- * performance: on the one hand this avoids too much threading overhead on
- * few nodes, but for the final performance having all the overhead on one
- * node might be better idea (since other nodes will have better chance of
- * rendering faster).
- * But more tricky is that nodes might have difference capacity, so we might
- * want to do some weighted scheduling. For example, if node 0 has 16
- * processors and node 1 has 32 processors, we'd better schedule 1 extra
- * thread on node 0 and 2 extra threads on node 1. */
- current_node_index = 0;
- while (thread_index < num_threads) {
- /* Skip unavailable nodes. */
- /* TODO(sergey): Add sanity check against deadlock. */
- while (num_per_node_processors[current_node_index] == 0) {
- current_node_index = (current_node_index + 1) % num_nodes;
- }
- VLOG(1) << "Scheduling thread " << thread_index << " to node " << current_node_index << ".";
- ++thread_index;
- current_node_index = (current_node_index + 1) % num_nodes;
- }
-
- return thread_nodes;
-}
-
-} // namespace
+int TaskScheduler::active_num_threads = 0;
+tbb::global_control *TaskScheduler::global_control = nullptr;
void TaskScheduler::init(int num_threads)
{
@@ -320,22 +76,15 @@ void TaskScheduler::init(int num_threads)
if (users != 1) {
return;
}
- do_exit = false;
- const bool use_auto_threads = (num_threads == 0);
- if (use_auto_threads) {
+ if (num_threads > 0) {
/* Automatic number of threads. */
- num_threads = system_cpu_thread_count();
+ VLOG(1) << "Overriding number of TBB threads to " << num_threads << ".";
+ global_control = new tbb::global_control(tbb::global_control::max_allowed_parallelism,
+ num_threads);
+ active_num_threads = num_threads;
}
- VLOG(1) << "Creating pool of " << num_threads << " threads.";
-
- /* Compute distribution on NUMA nodes. */
- vector<int> thread_nodes = distribute_threads_on_nodes(num_threads);
-
- /* Launch threads that will be waiting for work. */
- threads.resize(num_threads);
- for (int thread_index = 0; thread_index < num_threads; ++thread_index) {
- threads[thread_index] = new thread(function_bind(&TaskScheduler::thread_run, thread_index + 1),
- thread_nodes[thread_index]);
+ else {
+ active_num_threads = system_cpu_thread_count();
}
}
@@ -344,105 +93,20 @@ void TaskScheduler::exit()
thread_scoped_lock lock(mutex);
users--;
if (users == 0) {
- VLOG(1) << "De-initializing thread pool of task scheduler.";
- /* stop all waiting threads */
- TaskScheduler::queue_mutex.lock();
- do_exit = true;
- TaskScheduler::queue_cond.notify_all();
- TaskScheduler::queue_mutex.unlock();
-
- /* delete threads */
- foreach (thread *t, threads) {
- t->join();
- delete t;
- }
- threads.clear();
+ delete global_control;
+ global_control = nullptr;
+ active_num_threads = 0;
}
}
void TaskScheduler::free_memory()
{
assert(users == 0);
- threads.free_memory();
}
-bool TaskScheduler::thread_wait_pop(Entry &entry)
+int TaskScheduler::num_threads()
{
- thread_scoped_lock queue_lock(queue_mutex);
-
- while (queue.empty() && !do_exit)
- queue_cond.wait(queue_lock);
-
- if (queue.empty()) {
- assert(do_exit);
- return false;
- }
-
- entry = queue.front();
- queue.pop_front();
-
- return true;
-}
-
-void TaskScheduler::thread_run(int thread_id)
-{
- Entry entry;
-
- /* todo: test affinity/denormal mask */
-
- /* keep popping off tasks */
- while (thread_wait_pop(entry)) {
- /* run task */
- entry.task->run(thread_id);
-
- /* delete task */
- delete entry.task;
-
- /* notify pool task was done */
- entry.pool->num_decrease(1);
- }
-}
-
-void TaskScheduler::push(Entry &entry, bool front)
-{
- entry.pool->num_increase();
-
- /* add entry to queue */
- TaskScheduler::queue_mutex.lock();
- if (front)
- TaskScheduler::queue.push_front(entry);
- else
- TaskScheduler::queue.push_back(entry);
-
- TaskScheduler::queue_cond.notify_one();
- TaskScheduler::queue_mutex.unlock();
-}
-
-void TaskScheduler::clear(TaskPool *pool)
-{
- thread_scoped_lock queue_lock(TaskScheduler::queue_mutex);
-
- /* erase all tasks from this pool from the queue */
- list<Entry>::iterator it = queue.begin();
- int done = 0;
-
- while (it != queue.end()) {
- Entry &entry = *it;
-
- if (entry.pool == pool) {
- done++;
- delete entry.task;
-
- it = queue.erase(it);
- }
- else
- it++;
- }
-
- queue_lock.unlock();
-
- /* notify done */
- pool->num_decrease(done);
+ return active_num_threads;
}
/* Dedicated Task Pool */
@@ -458,31 +122,30 @@ DedicatedTaskPool::DedicatedTaskPool()
DedicatedTaskPool::~DedicatedTaskPool()
{
- stop();
+ wait();
+
+ do_exit = true;
+ queue_cond.notify_all();
+
worker_thread->join();
delete worker_thread;
}
-void DedicatedTaskPool::push(Task *task, bool front)
+void DedicatedTaskPool::push(TaskRunFunction &&task, bool front)
{
num_increase();
/* add task to queue */
queue_mutex.lock();
if (front)
- queue.push_front(task);
+ queue.emplace_front(std::move(task));
else
- queue.push_back(task);
+ queue.emplace_back(std::move(task));
queue_cond.notify_one();
queue_mutex.unlock();
}
-void DedicatedTaskPool::push(const TaskRunFunction &run, bool front)
-{
- push(new Task(run), front);
-}
-
void DedicatedTaskPool::wait()
{
thread_scoped_lock num_lock(num_mutex);
@@ -501,18 +164,6 @@ void DedicatedTaskPool::cancel()
do_cancel = false;
}
-void DedicatedTaskPool::stop()
-{
- clear();
-
- do_exit = true;
- queue_cond.notify_all();
-
- wait();
-
- assert(num == 0);
-}
-
bool DedicatedTaskPool::canceled()
{
return do_cancel;
@@ -535,7 +186,7 @@ void DedicatedTaskPool::num_increase()
num_cond.notify_all();
}
-bool DedicatedTaskPool::thread_wait_pop(Task *&task)
+bool DedicatedTaskPool::thread_wait_pop(TaskRunFunction &task)
{
thread_scoped_lock queue_lock(queue_mutex);
@@ -555,15 +206,15 @@ bool DedicatedTaskPool::thread_wait_pop(Task *&task)
void DedicatedTaskPool::thread_run()
{
- Task *task;
+ TaskRunFunction task;
/* keep popping off tasks */
while (thread_wait_pop(task)) {
/* run task */
- task->run(0);
+ task();
/* delete task */
- delete task;
+ task = nullptr;
/* notify task was done */
num_decrease(1);
@@ -575,15 +226,8 @@ void DedicatedTaskPool::clear()
thread_scoped_lock queue_lock(queue_mutex);
/* erase all tasks from the queue */
- list<Task *>::iterator it = queue.begin();
- int done = 0;
-
- while (it != queue.end()) {
- done++;
- delete *it;
-
- it = queue.erase(it);
- }
+ int done = queue.size();
+ queue.clear();
queue_lock.unlock();
diff --git a/intern/cycles/util/util_task.h b/intern/cycles/util/util_task.h
index fd30a33d8ef..a56ca62f62c 100644
--- a/intern/cycles/util/util_task.h
+++ b/intern/cycles/util/util_task.h
@@ -19,48 +19,16 @@
#include "util/util_list.h"
#include "util/util_string.h"
+#include "util/util_tbb.h"
#include "util/util_thread.h"
#include "util/util_vector.h"
CCL_NAMESPACE_BEGIN
-class Task;
class TaskPool;
class TaskScheduler;
-/* Notes on Thread ID
- *
- * Thread ID argument reports the 0-based ID of a working thread from which
- * the run() callback is being invoked. Thread ID of 0 denotes the thread from
- * which wait_work() was called.
- *
- * DO NOT use this ID to control execution flaw, use it only for things like
- * emulating TLS which does not affect on scheduling. Don't use this ID to make
- * any decisions.
- *
- * It is to be noted here that dedicated task pool will always report thread ID
- * of 0.
- */
-
-typedef function<void(int thread_id)> TaskRunFunction;
-
-/* Task
- *
- * Base class for tasks to be executed in threads. */
-
-class Task {
- public:
- Task(){};
- explicit Task(const TaskRunFunction &run_) : run(run_)
- {
- }
-
- virtual ~Task()
- {
- }
-
- TaskRunFunction run;
-};
+typedef function<void(void)> TaskRunFunction;
/* Task Pool
*
@@ -68,8 +36,7 @@ class Task {
* pool, we can wait for all tasks to be done, or cancel them before they are
* done.
*
- * The run callback that actually executes the task may be created like this:
- * function_bind(&MyClass::task_execute, this, _1, _2) */
+ * TaskRunFunction may be created with std::bind or lambda expressions. */
class TaskPool {
public:
@@ -89,27 +56,15 @@ class TaskPool {
TaskPool();
~TaskPool();
- void push(Task *task, bool front = false);
- void push(const TaskRunFunction &run, bool front = false);
+ void push(TaskRunFunction &&task);
void wait_work(Summary *stats = NULL); /* work and wait until all tasks are done */
- void cancel(); /* cancel all tasks, keep worker threads running */
- void stop(); /* stop all worker threads */
- bool finished(); /* check if all work has been completed */
+ void cancel(); /* cancel all tasks and wait until they are no longer executing */
bool canceled(); /* for worker threads, test if canceled */
protected:
- friend class TaskScheduler;
-
- void num_decrease(int done);
- void num_increase();
-
- thread_mutex num_mutex;
- thread_condition_variable num_cond;
-
- int num;
- bool do_cancel;
+ tbb::task_group tbb_group;
/* ** Statistics ** */
@@ -131,40 +86,19 @@ class TaskScheduler {
static void exit();
static void free_memory();
- /* number of threads that can work on task */
- static int num_threads()
- {
- return threads.size();
- }
-
- /* test if any session is using the scheduler */
- static bool active()
- {
- return users != 0;
- }
+ /* Approximate number of threads that will work on task, which may be lower
+ * or higher than the actual number of threads. Use as little as possible and
+ * leave splitting up tasks to the scheduler.. */
+ static int num_threads();
protected:
- friend class TaskPool;
-
- struct Entry {
- Task *task;
- TaskPool *pool;
- };
-
static thread_mutex mutex;
static int users;
- static vector<thread *> threads;
- static bool do_exit;
+ static int active_num_threads;
- static list<Entry> queue;
- static thread_mutex queue_mutex;
- static thread_condition_variable queue_cond;
-
- static void thread_run(int thread_id);
- static bool thread_wait_pop(Entry &entry);
-
- static void push(Entry &entry, bool front);
- static void clear(TaskPool *pool);
+#ifdef WITH_TBB_GLOBAL_CONTROL
+ static tbb::global_control *global_control;
+#endif
};
/* Dedicated Task Pool
@@ -179,12 +113,10 @@ class DedicatedTaskPool {
DedicatedTaskPool();
~DedicatedTaskPool();
- void push(Task *task, bool front = false);
- void push(const TaskRunFunction &run, bool front = false);
+ void push(TaskRunFunction &&run, bool front = false);
void wait(); /* wait until all tasks are done */
void cancel(); /* cancel all tasks, keep worker thread running */
- void stop(); /* stop worker thread */
bool canceled(); /* for worker thread, test if canceled */
@@ -193,14 +125,14 @@ class DedicatedTaskPool {
void num_increase();
void thread_run();
- bool thread_wait_pop(Task *&entry);
+ bool thread_wait_pop(TaskRunFunction &task);
void clear();
thread_mutex num_mutex;
thread_condition_variable num_cond;
- list<Task *> queue;
+ list<TaskRunFunction> queue;
thread_mutex queue_mutex;
thread_condition_variable queue_cond;
diff --git a/intern/cycles/util/util_tbb.h b/intern/cycles/util/util_tbb.h
new file mode 100644
index 00000000000..206ba106ca6
--- /dev/null
+++ b/intern/cycles/util/util_tbb.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2011-2020 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TBB_H__
+#define __UTIL_TBB_H__
+
+/* TBB includes <windows.h>, do it ourselves first so we are sure
+ * WIN32_LEAN_AND_MEAN and similar are defined beforehand. */
+#include "util_windows.h"
+
+#define TBB_SUPPRESS_DEPRECATED_MESSAGES 1
+#include <tbb/tbb.h>
+
+#if TBB_INTERFACE_VERSION_MAJOR >= 10
+# define WITH_TBB_GLOBAL_CONTROL
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+using tbb::blocked_range;
+using tbb::enumerable_thread_specific;
+using tbb::parallel_for;
+
+static inline void parallel_for_cancel()
+{
+ tbb::task::self().cancel_group_execution();
+}
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_TBB_H__ */
diff --git a/intern/cycles/util/util_texture.h b/intern/cycles/util/util_texture.h
index 5ce16e0095a..863c2ea3124 100644
--- a/intern/cycles/util/util_texture.h
+++ b/intern/cycles/util/util_texture.h
@@ -17,6 +17,8 @@
#ifndef __UTIL_TEXTURE_H__
#define __UTIL_TEXTURE_H__
+#include "util_transform.h"
+
CCL_NAMESPACE_BEGIN
/* Texture limits on devices. */
@@ -59,6 +61,18 @@ typedef enum ImageDataType {
IMAGE_DATA_NUM_TYPES
} ImageDataType;
+/* Alpha types
+ * How to treat alpha in images. */
+typedef enum ImageAlphaType {
+ IMAGE_ALPHA_UNASSOCIATED = 0,
+ IMAGE_ALPHA_ASSOCIATED = 1,
+ IMAGE_ALPHA_CHANNEL_PACKED = 2,
+ IMAGE_ALPHA_IGNORE = 3,
+ IMAGE_ALPHA_AUTO = 4,
+
+ IMAGE_ALPHA_NUM_TYPES,
+} ImageAlphaType;
+
#define IMAGE_DATA_TYPE_SHIFT 3
#define IMAGE_DATA_TYPE_MASK 0x7
@@ -79,12 +93,17 @@ typedef enum ExtensionType {
typedef struct TextureInfo {
/* Pointer, offset or texture depending on device. */
uint64_t data;
+ /* Data Type */
+ uint data_type;
/* Buffer number for OpenCL. */
uint cl_buffer;
/* Interpolation and extension type. */
uint interpolation, extension;
/* Dimensions. */
uint width, height, depth;
+ /* Transform for 3D textures. */
+ uint use_transform_3d;
+ Transform transform_3d;
} TextureInfo;
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_thread.h b/intern/cycles/util/util_thread.h
index 18ec5b32144..29f9becbefe 100644
--- a/intern/cycles/util/util_thread.h
+++ b/intern/cycles/util/util_thread.h
@@ -17,11 +17,11 @@
#ifndef __UTIL_THREAD_H__
#define __UTIL_THREAD_H__
-#include <thread>
-#include <mutex>
#include <condition_variable>
#include <functional>
+#include <mutex>
#include <queue>
+#include <thread>
#ifdef _WIN32
# include "util_windows.h"
@@ -29,9 +29,9 @@
# include <pthread.h>
#endif
-#ifdef __APPLE__
-# include <libkern/OSAtomic.h>
-#endif
+/* NOTE: Use tbb/spin_mutex.h instead of util_tbb.h because some of the TBB
+ * functionality requires RTTI, which is disabled for OSL kernel. */
+#include <tbb/spin_mutex.h>
#include "util/util_function.h"
@@ -65,76 +65,7 @@ class thread {
int node_;
};
-/* Own wrapper around pthread's spin lock to make it's use easier. */
-
-class thread_spin_lock {
- public:
-#ifdef __APPLE__
- inline thread_spin_lock()
- {
- spin_ = OS_SPINLOCK_INIT;
- }
-
- inline void lock()
- {
- OSSpinLockLock(&spin_);
- }
-
- inline void unlock()
- {
- OSSpinLockUnlock(&spin_);
- }
-#elif defined(_WIN32)
- inline thread_spin_lock()
- {
- const DWORD SPIN_COUNT = 50000;
- InitializeCriticalSectionAndSpinCount(&cs_, SPIN_COUNT);
- }
-
- inline ~thread_spin_lock()
- {
- DeleteCriticalSection(&cs_);
- }
-
- inline void lock()
- {
- EnterCriticalSection(&cs_);
- }
-
- inline void unlock()
- {
- LeaveCriticalSection(&cs_);
- }
-#else
- inline thread_spin_lock()
- {
- pthread_spin_init(&spin_, 0);
- }
-
- inline ~thread_spin_lock()
- {
- pthread_spin_destroy(&spin_);
- }
-
- inline void lock()
- {
- pthread_spin_lock(&spin_);
- }
-
- inline void unlock()
- {
- pthread_spin_unlock(&spin_);
- }
-#endif
- protected:
-#ifdef __APPLE__
- OSSpinLock spin_;
-#elif defined(_WIN32)
- CRITICAL_SECTION cs_;
-#else
- pthread_spinlock_t spin_;
-#endif
-};
+using thread_spin_lock = tbb::spin_mutex;
class thread_scoped_spin_lock {
public:
diff --git a/intern/cycles/util/util_transform.cpp b/intern/cycles/util/util_transform.cpp
index 6a9bfbea4ca..101122740d7 100644
--- a/intern/cycles/util/util_transform.cpp
+++ b/intern/cycles/util/util_transform.cpp
@@ -46,8 +46,8 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#include "util/util_projection.h"
#include "util/util_transform.h"
+#include "util/util_projection.h"
#include "util/util_boundbox.h"
#include "util/util_math.h"
@@ -227,6 +227,7 @@ static void transform_decompose(DecomposedTransform *decomp, const Transform *tf
M.y.w = 0.0f;
M.z.w = 0.0f;
+#if 0
Transform R = M;
float norm;
int iteration = 0;
@@ -260,6 +261,41 @@ static void transform_decompose(DecomposedTransform *decomp, const Transform *tf
decomp->y.w = scale.x.x;
decomp->z = make_float4(scale.x.y, scale.x.z, scale.y.x, scale.y.y);
decomp->w = make_float4(scale.y.z, scale.z.x, scale.z.y, scale.z.z);
+#else
+ float3 colx = transform_get_column(&M, 0);
+ float3 coly = transform_get_column(&M, 1);
+ float3 colz = transform_get_column(&M, 2);
+
+ /* extract scale and shear first */
+ float3 scale, shear;
+ scale.x = len(colx);
+ colx /= scale.x;
+ shear.z = dot(colx, coly);
+ coly -= shear.z * colx;
+ scale.y = len(coly);
+ coly /= scale.y;
+ shear.y = dot(colx, colz);
+ colz -= shear.y * colx;
+ shear.x = dot(coly, colz);
+ colz -= shear.x * coly;
+ scale.z = len(colz);
+ colz /= scale.z;
+
+ transform_set_column(&M, 0, colx);
+ transform_set_column(&M, 1, coly);
+ transform_set_column(&M, 2, colz);
+
+ if (transform_negative_scale(M)) {
+ scale *= -1.0f;
+ M = M * transform_scale(-1.0f, -1.0f, -1.0f);
+ }
+
+ decomp->x = transform_to_quat(M);
+
+ decomp->y.w = scale.x;
+ decomp->z = make_float4(shear.z, shear.y, 0.0f, scale.y);
+ decomp->w = make_float4(shear.x, 0.0f, 0.0f, scale.z);
+#endif
}
void transform_motion_decompose(DecomposedTransform *decomp, const Transform *motion, size_t size)
@@ -271,7 +307,7 @@ void transform_motion_decompose(DecomposedTransform *decomp, const Transform *mo
/* Ensure rotation around shortest angle, negated quaternions are the same
* but this means we don't have to do the check in quat_interpolate */
if (dot(decomp[i - 1].x, decomp[i].x) < 0.0f)
- decomp[i - 1].x = -decomp[i - 1].x;
+ decomp[i].x = -decomp[i].x;
}
}
}
diff --git a/intern/cycles/util/util_transform.h b/intern/cycles/util/util_transform.h
index 13ca27c2fed..d0a6264d5cf 100644
--- a/intern/cycles/util/util_transform.h
+++ b/intern/cycles/util/util_transform.h
@@ -148,6 +148,32 @@ ccl_device_inline Transform make_transform(float a,
return t;
}
+ccl_device_inline Transform euler_to_transform(const float3 euler)
+{
+ float cx = cosf(euler.x);
+ float cy = cosf(euler.y);
+ float cz = cosf(euler.z);
+ float sx = sinf(euler.x);
+ float sy = sinf(euler.y);
+ float sz = sinf(euler.z);
+
+ Transform t;
+ t.x.x = cy * cz;
+ t.y.x = cy * sz;
+ t.z.x = -sy;
+
+ t.x.y = sy * sx * cz - cx * sz;
+ t.y.y = sy * sx * sz + cx * cz;
+ t.z.y = cy * sx;
+
+ t.x.z = sy * cx * cz + sx * sz;
+ t.y.z = sy * cx * sz - sx * cz;
+ t.z.z = cy * cx;
+
+ t.x.w = t.y.w = t.z.w = 0.0f;
+ return t;
+}
+
/* Constructs a coordinate frame from a normalized normal. */
ccl_device_inline Transform make_transform_frame(float3 N)
{
@@ -318,28 +344,28 @@ ccl_device_inline Transform transform_empty()
ccl_device_inline float4 quat_interpolate(float4 q1, float4 q2, float t)
{
- /* use simpe nlerp instead of slerp. it's faster and almost the same */
+ /* Optix is using lerp to interpolate motion transformations. */
+#ifdef __KERNEL_OPTIX__
return normalize((1.0f - t) * q1 + t * q2);
-
-#if 0
+#else /* __KERNEL_OPTIX__ */
/* note: this does not ensure rotation around shortest angle, q1 and q2
* are assumed to be matched already in transform_motion_decompose */
float costheta = dot(q1, q2);
/* possible optimization: it might be possible to precompute theta/qperp */
- if(costheta > 0.9995f) {
+ if (costheta > 0.9995f) {
/* linear interpolation in degenerate case */
- return normalize((1.0f - t)*q1 + t*q2);
+ return normalize((1.0f - t) * q1 + t * q2);
}
- else {
+ else {
/* slerp */
float theta = acosf(clamp(costheta, -1.0f, 1.0f));
float4 qperp = normalize(q2 - q1 * costheta);
float thetap = theta * t;
return q1 * cosf(thetap) + qperp * sinf(thetap);
}
-#endif
+#endif /* __KERNEL_OPTIX__ */
}
ccl_device_inline Transform transform_quick_inverse(Transform M)
@@ -442,29 +468,6 @@ ccl_device void transform_motion_array_interpolate(Transform *tfm,
#ifndef __KERNEL_GPU__
-# ifdef WITH_EMBREE
-ccl_device void transform_motion_array_interpolate_straight(
- Transform *tfm, const ccl_global DecomposedTransform *motion, uint numsteps, float time)
-{
- /* Figure out which steps we need to interpolate. */
- int maxstep = numsteps - 1;
- int step = min((int)(time * maxstep), maxstep - 1);
- float t = time * maxstep - step;
-
- const ccl_global DecomposedTransform *a = motion + step;
- const ccl_global DecomposedTransform *b = motion + step + 1;
- Transform step1, step2;
-
- transform_compose(&step1, a);
- transform_compose(&step2, b);
-
- /* matrix lerp */
- tfm->x = (1.0f - t) * step1.x + t * step2.x;
- tfm->y = (1.0f - t) * step1.y + t * step2.y;
- tfm->z = (1.0f - t) * step1.z + t * step2.z;
-}
-# endif
-
class BoundBox2D;
ccl_device_inline bool operator==(const DecomposedTransform &A, const DecomposedTransform &B)
diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h
index 48e9983ac8f..a721595667d 100644
--- a/intern/cycles/util/util_types.h
+++ b/intern/cycles/util/util_types.h
@@ -101,6 +101,11 @@ ccl_device_inline size_t round_down(size_t x, size_t multiple)
return (x / multiple) * multiple;
}
+ccl_device_inline bool is_power_of_two(size_t x)
+{
+ return (x & (x - 1)) == 0;
+}
+
CCL_NAMESPACE_END
/* Vectorized types declaration. */
@@ -148,11 +153,12 @@ CCL_NAMESPACE_END
/* SSE types. */
#ifndef __KERNEL_GPU__
# include "util/util_sseb.h"
-# include "util/util_ssei.h"
# include "util/util_ssef.h"
+# include "util/util_ssei.h"
# if defined(__KERNEL_AVX__) || defined(__KERNEL_AVX2__)
# include "util/util_avxb.h"
# include "util/util_avxf.h"
+# include "util/util_avxi.h"
# endif
#endif
diff --git a/intern/cycles/util/util_types_float8.h b/intern/cycles/util/util_types_float8.h
index 7289e3298c3..27da120a4ba 100644
--- a/intern/cycles/util/util_types_float8.h
+++ b/intern/cycles/util/util_types_float8.h
@@ -1,30 +1,30 @@
/*
-* Original code Copyright 2017, Intel Corporation
-* Modifications Copyright 2018, Blender Foundation.
-*
-* Redistribution and use in source and binary forms, with or without
-* modification, are permitted provided that the following conditions are met:
-*
-* * Redistributions of source code must retain the above copyright notice,
-* this list of conditions and the following disclaimer.
-* * Redistributions in binary form must reproduce the above copyright
-* notice, this list of conditions and the following disclaimer in the
-* documentation and/or other materials provided with the distribution.
-* * Neither the name of Intel Corporation nor the names of its contributors
-* may be used to endorse or promote products derived from this software
-* without specific prior written permission.
-*
-* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
-* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+ * Original code Copyright 2017, Intel Corporation
+ * Modifications Copyright 2018, Blender Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
#ifndef __UTIL_TYPES_FLOAT8_H__
#define __UTIL_TYPES_FLOAT8_H__
diff --git a/intern/cycles/util/util_types_float8_impl.h b/intern/cycles/util/util_types_float8_impl.h
index 8ce3d81b1bb..4e4ea28c6a4 100644
--- a/intern/cycles/util/util_types_float8_impl.h
+++ b/intern/cycles/util/util_types_float8_impl.h
@@ -1,30 +1,30 @@
/*
-* Original code Copyright 2017, Intel Corporation
-* Modifications Copyright 2018, Blender Foundation.
-*
-* Redistribution and use in source and binary forms, with or without
-* modification, are permitted provided that the following conditions are met:
-*
-* * Redistributions of source code must retain the above copyright notice,
-* this list of conditions and the following disclaimer.
-* * Redistributions in binary form must reproduce the above copyright
-* notice, this list of conditions and the following disclaimer in the
-* documentation and/or other materials provided with the distribution.
-* * Neither the name of Intel Corporation nor the names of its contributors
-* may be used to endorse or promote products derived from this software
-* without specific prior written permission.
-*
-* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
-* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+ * Original code Copyright 2017, Intel Corporation
+ * Modifications Copyright 2018, Blender Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
#ifndef __UTIL_TYPES_FLOAT8_IMPL_H__
#define __UTIL_TYPES_FLOAT8_IMPL_H__
diff --git a/intern/cycles/util/util_vector.h b/intern/cycles/util/util_vector.h
index 437478d64d3..04fb33368d9 100644
--- a/intern/cycles/util/util_vector.h
+++ b/intern/cycles/util/util_vector.h
@@ -27,7 +27,7 @@
CCL_NAMESPACE_BEGIN
-/* Own subclass-ed vestion of std::vector. Subclass is needed because:
+/* Own subclass-ed version of std::vector. Subclass is needed because:
*
* - Use own allocator which keeps track of used/peak memory.
* - Have method to ensure capacity is re-set to 0.
diff --git a/intern/cycles/util/util_version.h b/intern/cycles/util/util_version.h
index 38829d3a29c..8bce5ff85aa 100644
--- a/intern/cycles/util/util_version.h
+++ b/intern/cycles/util/util_version.h
@@ -22,7 +22,7 @@
CCL_NAMESPACE_BEGIN
#define CYCLES_VERSION_MAJOR 1
-#define CYCLES_VERSION_MINOR 9
+#define CYCLES_VERSION_MINOR 13
#define CYCLES_VERSION_PATCH 0
#define CYCLES_MAKE_VERSION_STRING2(a, b, c) #a "." #b "." #c
diff --git a/intern/cycles/util/util_view.cpp b/intern/cycles/util/util_view.cpp
index f23174fd6dc..9d9ff451b3b 100644
--- a/intern/cycles/util/util_view.cpp
+++ b/intern/cycles/util/util_view.cpp
@@ -134,7 +134,7 @@ static void view_display()
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
- gluOrtho2D(0, V.width, 0, V.height);
+ glOrtho(0, V.width, 0, V.height, -1, 1);
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
diff --git a/intern/cycles/util/util_windows.cpp b/intern/cycles/util/util_windows.cpp
new file mode 100644
index 00000000000..807a5adc84a
--- /dev/null
+++ b/intern/cycles/util/util_windows.cpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright 2019-2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef _WIN32
+# include <windows.h>
+#endif
+
+#include "util_windows.h"
+
+CCL_NAMESPACE_BEGIN
+
+bool system_windows_version_at_least(int major, int build)
+{
+#ifdef _WIN32
+ HMODULE hMod = ::GetModuleHandleW(L"ntdll.dll");
+ if (hMod == 0) {
+ return false;
+ }
+
+ typedef NTSTATUS(WINAPI * RtlGetVersionPtr)(PRTL_OSVERSIONINFOW);
+ RtlGetVersionPtr rtl_get_version = (RtlGetVersionPtr)::GetProcAddress(hMod, "RtlGetVersion");
+ if (rtl_get_version == NULL) {
+ return false;
+ }
+
+ RTL_OSVERSIONINFOW rovi = {0};
+ rovi.dwOSVersionInfoSize = sizeof(rovi);
+ if (rtl_get_version(&rovi) != 0) {
+ return false;
+ }
+
+ return (rovi.dwMajorVersion > major ||
+ (rovi.dwMajorVersion == major && rovi.dwBuildNumber >= build));
+#else
+ (void)major;
+ (void)build;
+ return false;
+#endif
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_windows.h b/intern/cycles/util/util_windows.h
index 0d85c5437f6..9cbf91a23a7 100644
--- a/intern/cycles/util/util_windows.h
+++ b/intern/cycles/util/util_windows.h
@@ -33,4 +33,10 @@
#endif /* _WIN32 */
+CCL_NAMESPACE_BEGIN
+
+bool system_windows_version_at_least(int major, int build);
+
+CCL_NAMESPACE_END
+
#endif /* __UTIL_WINDOWS_H__ */