Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles')
-rw-r--r--intern/cycles/CMakeLists.txt35
-rw-r--r--intern/cycles/app/CMakeLists.txt19
-rw-r--r--intern/cycles/app/cycles_cubin_cc.cpp33
-rw-r--r--intern/cycles/app/cycles_server.cpp2
-rw-r--r--intern/cycles/app/cycles_standalone.cpp4
-rw-r--r--intern/cycles/app/cycles_xml.cpp14
-rw-r--r--intern/cycles/blender/CMakeLists.txt20
-rw-r--r--intern/cycles/blender/addon/__init__.py2
-rw-r--r--intern/cycles/blender/addon/engine.py9
-rw-r--r--intern/cycles/blender/addon/operators.py4
-rw-r--r--intern/cycles/blender/addon/properties.py131
-rw-r--r--intern/cycles/blender/addon/ui.py112
-rw-r--r--intern/cycles/blender/addon/version_update.py5
-rw-r--r--intern/cycles/blender/blender_camera.cpp19
-rw-r--r--intern/cycles/blender/blender_curves.cpp527
-rw-r--r--intern/cycles/blender/blender_device.cpp56
-rw-r--r--intern/cycles/blender/blender_device.h2
-rw-r--r--intern/cycles/blender/blender_geometry.cpp192
-rw-r--r--intern/cycles/blender/blender_id_map.h299
-rw-r--r--intern/cycles/blender/blender_image.cpp220
-rw-r--r--intern/cycles/blender/blender_image.h61
-rw-r--r--intern/cycles/blender/blender_light.cpp212
-rw-r--r--intern/cycles/blender/blender_mesh.cpp319
-rw-r--r--intern/cycles/blender/blender_object.cpp295
-rw-r--r--intern/cycles/blender/blender_object_cull.cpp1
-rw-r--r--intern/cycles/blender/blender_particles.cpp6
-rw-r--r--intern/cycles/blender/blender_python.cpp5
-rw-r--r--intern/cycles/blender/blender_session.cpp397
-rw-r--r--intern/cycles/blender/blender_session.h22
-rw-r--r--intern/cycles/blender/blender_shader.cpp142
-rw-r--r--intern/cycles/blender/blender_sync.cpp107
-rw-r--r--intern/cycles/blender/blender_sync.h88
-rw-r--r--intern/cycles/blender/blender_texture.h2
-rw-r--r--intern/cycles/blender/blender_util.h245
-rw-r--r--intern/cycles/blender/blender_viewport.cpp16
-rw-r--r--intern/cycles/blender/blender_viewport.h6
-rw-r--r--intern/cycles/blender/blender_volume.cpp379
-rw-r--r--intern/cycles/bvh/bvh.cpp92
-rw-r--r--intern/cycles/bvh/bvh.h10
-rw-r--r--intern/cycles/bvh/bvh2.cpp4
-rw-r--r--intern/cycles/bvh/bvh2.h4
-rw-r--r--intern/cycles/bvh/bvh4.cpp4
-rw-r--r--intern/cycles/bvh/bvh4.h4
-rw-r--r--intern/cycles/bvh/bvh8.cpp29
-rw-r--r--intern/cycles/bvh/bvh8.h4
-rw-r--r--intern/cycles/bvh/bvh_build.cpp85
-rw-r--r--intern/cycles/bvh/bvh_build.h6
-rw-r--r--intern/cycles/bvh/bvh_embree.cpp264
-rw-r--r--intern/cycles/bvh/bvh_embree.h10
-rw-r--r--intern/cycles/bvh/bvh_optix.cpp130
-rw-r--r--intern/cycles/bvh/bvh_optix.h7
-rw-r--r--intern/cycles/bvh/bvh_params.h5
-rw-r--r--intern/cycles/bvh/bvh_split.cpp41
-rw-r--r--intern/cycles/bvh/bvh_split.h6
-rw-r--r--intern/cycles/bvh/bvh_unaligned.cpp14
-rw-r--r--intern/cycles/cmake/external_libs.cmake2
-rw-r--r--intern/cycles/device/CMakeLists.txt14
-rw-r--r--intern/cycles/device/cuda/device_cuda.h269
-rw-r--r--intern/cycles/device/cuda/device_cuda_impl.cpp2620
-rw-r--r--intern/cycles/device/device.cpp28
-rw-r--r--intern/cycles/device/device.h3
-rw-r--r--intern/cycles/device/device_cpu.cpp193
-rw-r--r--intern/cycles/device/device_cuda.cpp2585
-rw-r--r--intern/cycles/device/device_intern.h6
-rw-r--r--intern/cycles/device/device_memory.cpp97
-rw-r--r--intern/cycles/device/device_memory.h46
-rw-r--r--intern/cycles/device/device_multi.cpp161
-rw-r--r--intern/cycles/device/device_network.cpp2
-rw-r--r--intern/cycles/device/device_network.h6
-rw-r--r--intern/cycles/device/device_opencl.cpp4
-rw-r--r--intern/cycles/device/device_optix.cpp1855
-rw-r--r--intern/cycles/device/device_split_kernel.cpp67
-rw-r--r--intern/cycles/device/device_split_kernel.h4
-rw-r--r--intern/cycles/device/device_task.cpp57
-rw-r--r--intern/cycles/device/device_task.h26
-rw-r--r--intern/cycles/device/opencl/device_opencl.h (renamed from intern/cycles/device/opencl/opencl.h)14
-rw-r--r--intern/cycles/device/opencl/device_opencl_impl.cpp (renamed from intern/cycles/device/opencl/opencl_split.cpp)141
-rw-r--r--intern/cycles/device/opencl/memory_manager.cpp2
-rw-r--r--intern/cycles/device/opencl/memory_manager.h2
-rw-r--r--intern/cycles/device/opencl/opencl_util.cpp67
-rw-r--r--intern/cycles/graph/node.cpp10
-rw-r--r--intern/cycles/graph/node.h3
-rw-r--r--intern/cycles/graph/node_type.cpp11
-rw-r--r--intern/cycles/graph/node_type.h16
-rw-r--r--intern/cycles/graph/node_xml.cpp4
-rw-r--r--intern/cycles/kernel/CMakeLists.txt85
-rw-r--r--intern/cycles/kernel/bvh/bvh.h10
-rw-r--r--intern/cycles/kernel/bvh/bvh_embree.h3
-rw-r--r--intern/cycles/kernel/closure/bsdf.h2
-rw-r--r--intern/cycles/kernel/closure/bsdf_hair_principled.h30
-rw-r--r--intern/cycles/kernel/filter/filter_features_sse.h1
-rw-r--r--intern/cycles/kernel/geom/geom.h2
-rw-r--r--intern/cycles/kernel/geom/geom_attribute.h10
-rw-r--r--intern/cycles/kernel/geom/geom_curve.h30
-rw-r--r--intern/cycles/kernel/geom/geom_motion_curve.h2
-rw-r--r--intern/cycles/kernel/geom/geom_motion_triangle_intersect.h23
-rw-r--r--intern/cycles/kernel/geom/geom_object.h28
-rw-r--r--intern/cycles/kernel/geom/geom_subd_triangle.h32
-rw-r--r--intern/cycles/kernel/geom/geom_triangle.h32
-rw-r--r--intern/cycles/kernel/geom/geom_triangle_intersect.h21
-rw-r--r--intern/cycles/kernel/geom/geom_volume.h13
-rw-r--r--intern/cycles/kernel/kernel.h4
-rw-r--r--intern/cycles/kernel/kernel_accumulate.h78
-rw-r--r--intern/cycles/kernel/kernel_adaptive_sampling.h230
-rw-r--r--intern/cycles/kernel/kernel_bake.h20
-rw-r--r--intern/cycles/kernel/kernel_compat_cpu.h4
-rw-r--r--intern/cycles/kernel/kernel_compat_cuda.h7
-rw-r--r--intern/cycles/kernel/kernel_compat_optix.h8
-rw-r--r--intern/cycles/kernel/kernel_emission.h6
-rw-r--r--intern/cycles/kernel/kernel_film.h12
-rw-r--r--intern/cycles/kernel/kernel_globals.h2
-rw-r--r--intern/cycles/kernel/kernel_jitter.h32
-rw-r--r--intern/cycles/kernel/kernel_passes.h99
-rw-r--r--intern/cycles/kernel/kernel_path.h19
-rw-r--r--intern/cycles/kernel/kernel_path_branched.h14
-rw-r--r--intern/cycles/kernel/kernel_path_state.h2
-rw-r--r--intern/cycles/kernel/kernel_random.h38
-rw-r--r--intern/cycles/kernel/kernel_shader.h19
-rw-r--r--intern/cycles/kernel/kernel_subsurface.h5
-rw-r--r--intern/cycles/kernel/kernel_textures.h3
-rw-r--r--intern/cycles/kernel/kernel_types.h79
-rw-r--r--intern/cycles/kernel/kernel_volume.h59
-rw-r--r--intern/cycles/kernel/kernel_work_stealing.h84
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel.cpp2
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu.h4
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h30
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h10
-rw-r--r--intern/cycles/kernel/kernels/cuda/filter.cu6
-rw-r--r--intern/cycles/kernel/kernels/cuda/kernel.cu70
-rw-r--r--intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h19
-rw-r--r--intern/cycles/kernel/kernels/cuda/kernel_split.cu8
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_adaptive_adjust_samples.cl23
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_x.cl23
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_y.cl23
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_adaptive_stopping.cl23
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h14
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl4
-rw-r--r--intern/cycles/kernel/osl/CMakeLists.txt3
-rw-r--r--intern/cycles/kernel/osl/background.cpp2
-rw-r--r--intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp2
-rw-r--r--intern/cycles/kernel/osl/bsdf_phong_ramp.cpp2
-rw-r--r--intern/cycles/kernel/osl/emissive.cpp2
-rw-r--r--intern/cycles/kernel/osl/osl_bssrdf.cpp2
-rw-r--r--intern/cycles/kernel/osl/osl_closures.cpp2
-rw-r--r--intern/cycles/kernel/osl/osl_closures.h4
-rw-r--r--intern/cycles/kernel/osl/osl_globals.h2
-rw-r--r--intern/cycles/kernel/osl/osl_services.cpp6
-rw-r--r--intern/cycles/kernel/osl/osl_services.h2
-rw-r--r--intern/cycles/kernel/osl/osl_shader.cpp6
-rw-r--r--intern/cycles/kernel/shaders/CMakeLists.txt14
-rw-r--r--intern/cycles/kernel/shaders/node_absorption_volume.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_add_closure.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_ambient_occlusion.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_anisotropic_bsdf.osl3
-rw-r--r--intern/cycles/kernel/shaders/node_attribute.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_background.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_bevel.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_blackbody.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_brick_texture.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_brightness.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_bump.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_camera.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_checker_texture.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_clamp.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_combine_hsv.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_combine_rgb.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_combine_xyz.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_convert_from_color.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_convert_from_float.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_convert_from_int.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_convert_from_normal.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_convert_from_point.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_convert_from_string.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_convert_from_vector.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_diffuse_bsdf.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_displacement.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_emission.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_environment_texture.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_fresnel.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_gamma.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_geometry.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_glass_bsdf.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_glossy_bsdf.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_gradient_texture.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_hair_bsdf.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_hair_info.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_hash.h2
-rw-r--r--intern/cycles/kernel/shaders/node_holdout.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_hsv.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_ies_light.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_image_texture.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_invert.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_layer_weight.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_light_falloff.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_light_path.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_magic_texture.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_map_range.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_mapping.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_math.h110
-rw-r--r--intern/cycles/kernel/shaders/node_math.osl52
-rw-r--r--intern/cycles/kernel/shaders/node_mix.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_mix_closure.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_musgrave_texture.osl4
-rw-r--r--intern/cycles/kernel/shaders/node_noise_texture.osl4
-rw-r--r--intern/cycles/kernel/shaders/node_normal.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_normal_map.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_object_info.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_output_displacement.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_output_surface.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_output_volume.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_particle_info.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_principled_bsdf.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_principled_hair_bsdf.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_principled_volume.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_refraction_bsdf.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_rgb_curves.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_rgb_ramp.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_rgb_to_bw.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_scatter_volume.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_separate_hsv.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_separate_rgb.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_separate_xyz.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_set_normal.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_sky_texture.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_subsurface_scattering.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_tangent.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_texture_coordinate.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_toon_bsdf.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_translucent_bsdf.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_transparent_bsdf.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_uv_map.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_value.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_vector_curves.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_vector_displacement.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_vector_math.osl39
-rw-r--r--intern/cycles/kernel/shaders/node_vector_rotate.osl49
-rw-r--r--intern/cycles/kernel/shaders/node_vector_transform.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_velvet_bsdf.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_vertex_color.osl13
-rw-r--r--intern/cycles/kernel/shaders/node_voronoi_texture.osl4
-rw-r--r--intern/cycles/kernel/shaders/node_voxel_texture.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_wave_texture.osl69
-rw-r--r--intern/cycles/kernel/shaders/node_wavelength.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_white_noise_texture.osl4
-rw-r--r--intern/cycles/kernel/shaders/node_wireframe.osl2
-rw-r--r--intern/cycles/kernel/shaders/oslutil.h101
-rw-r--r--intern/cycles/kernel/shaders/stdcycles.h150
-rw-r--r--intern/cycles/kernel/shaders/stdosl.h880
-rw-r--r--intern/cycles/kernel/shaders/vector2.h291
-rw-r--r--intern/cycles/kernel/shaders/vector4.h327
-rw-r--r--intern/cycles/kernel/split/kernel_adaptive_adjust_samples.h44
-rw-r--r--intern/cycles/kernel/split/kernel_adaptive_filter_x.h30
-rw-r--r--intern/cycles/kernel/split/kernel_adaptive_filter_y.h29
-rw-r--r--intern/cycles/kernel/split/kernel_adaptive_stopping.h37
-rw-r--r--intern/cycles/kernel/split/kernel_do_volume.h8
-rw-r--r--intern/cycles/kernel/split/kernel_split_common.h2
-rw-r--r--intern/cycles/kernel/split/kernel_split_data.h1
-rw-r--r--intern/cycles/kernel/svm/svm.h111
-rw-r--r--intern/cycles/kernel/svm/svm_closure.h27
-rw-r--r--intern/cycles/kernel/svm/svm_image.h8
-rw-r--r--intern/cycles/kernel/svm/svm_math.h10
-rw-r--r--intern/cycles/kernel/svm/svm_math_util.h21
-rw-r--r--intern/cycles/kernel/svm/svm_types.h159
-rw-r--r--intern/cycles/kernel/svm/svm_vector_rotate.h78
-rw-r--r--intern/cycles/kernel/svm/svm_voxel.h2
-rw-r--r--intern/cycles/kernel/svm/svm_wave.h91
-rw-r--r--intern/cycles/render/CMakeLists.txt33
-rw-r--r--intern/cycles/render/attribute.cpp243
-rw-r--r--intern/cycles/render/attribute.h64
-rw-r--r--intern/cycles/render/background.cpp7
-rw-r--r--intern/cycles/render/background.h2
-rw-r--r--intern/cycles/render/bake.cpp8
-rw-r--r--intern/cycles/render/buffers.cpp27
-rw-r--r--intern/cycles/render/buffers.h2
-rw-r--r--intern/cycles/render/camera.cpp4
-rw-r--r--intern/cycles/render/colorspace.cpp81
-rw-r--r--intern/cycles/render/colorspace.h4
-rw-r--r--intern/cycles/render/constant_fold.h2
-rw-r--r--intern/cycles/render/coverage.cpp7
-rw-r--r--intern/cycles/render/coverage.h15
-rw-r--r--intern/cycles/render/curves.cpp2
-rw-r--r--intern/cycles/render/denoising.h2
-rw-r--r--intern/cycles/render/film.cpp52
-rw-r--r--intern/cycles/render/film.h2
-rw-r--r--intern/cycles/render/geometry.cpp1470
-rw-r--r--intern/cycles/render/geometry.h205
-rw-r--r--intern/cycles/render/graph.cpp4
-rw-r--r--intern/cycles/render/hair.cpp487
-rw-r--r--intern/cycles/render/hair.h151
-rw-r--r--intern/cycles/render/image.cpp940
-rw-r--r--intern/cycles/render/image.h240
-rw-r--r--intern/cycles/render/image_oiio.cpp236
-rw-r--r--intern/cycles/render/image_oiio.h48
-rw-r--r--intern/cycles/render/image_vdb.cpp188
-rw-r--r--intern/cycles/render/image_vdb.h56
-rw-r--r--intern/cycles/render/integrator.cpp54
-rw-r--r--intern/cycles/render/integrator.h5
-rw-r--r--intern/cycles/render/jitter.cpp287
-rw-r--r--intern/cycles/render/jitter.h29
-rw-r--r--intern/cycles/render/light.cpp28
-rw-r--r--intern/cycles/render/merge.cpp2
-rw-r--r--intern/cycles/render/mesh.cpp1905
-rw-r--r--intern/cycles/render/mesh.h243
-rw-r--r--intern/cycles/render/mesh_displace.cpp6
-rw-r--r--intern/cycles/render/mesh_subdivision.cpp12
-rw-r--r--intern/cycles/render/mesh_volume.cpp35
-rw-r--r--intern/cycles/render/nodes.cpp600
-rw-r--r--intern/cycles/render/nodes.h83
-rw-r--r--intern/cycles/render/object.cpp345
-rw-r--r--intern/cycles/render/object.h12
-rw-r--r--intern/cycles/render/osl.cpp37
-rw-r--r--intern/cycles/render/osl.h7
-rw-r--r--intern/cycles/render/particles.cpp2
-rw-r--r--intern/cycles/render/scene.cpp119
-rw-r--r--intern/cycles/render/scene.h12
-rw-r--r--intern/cycles/render/session.cpp291
-rw-r--r--intern/cycles/render/session.h27
-rw-r--r--intern/cycles/render/shader.cpp79
-rw-r--r--intern/cycles/render/shader.h12
-rw-r--r--intern/cycles/render/svm.cpp13
-rw-r--r--intern/cycles/render/tables.cpp2
-rw-r--r--intern/cycles/render/tables.h1
-rw-r--r--intern/cycles/render/tile.cpp140
-rw-r--r--intern/cycles/render/tile.h10
-rw-r--r--intern/cycles/test/CMakeLists.txt32
-rw-r--r--intern/cycles/test/render_graph_finalize_test.cpp4
-rw-r--r--intern/cycles/test/util_avxf_avx2_test.cpp21
-rw-r--r--intern/cycles/test/util_avxf_avx_test.cpp21
-rw-r--r--intern/cycles/test/util_avxf_test.h222
-rw-r--r--intern/cycles/util/CMakeLists.txt1
-rw-r--r--intern/cycles/util/util_algorithm.h1
-rw-r--r--intern/cycles/util/util_atomic.h2
-rw-r--r--intern/cycles/util/util_boundbox.h2
-rw-r--r--intern/cycles/util/util_defines.h5
-rw-r--r--intern/cycles/util/util_disjoint_set.h2
-rw-r--r--intern/cycles/util/util_half.h2
-rw-r--r--intern/cycles/util/util_md5.cpp2
-rw-r--r--intern/cycles/util/util_path.cpp4
-rw-r--r--intern/cycles/util/util_profiling.cpp3
-rw-r--r--intern/cycles/util/util_profiling.h1
-rw-r--r--intern/cycles/util/util_progress.h4
-rw-r--r--intern/cycles/util/util_semaphore.h61
-rw-r--r--intern/cycles/util/util_sky_model.cpp2
-rw-r--r--intern/cycles/util/util_ssef.h2
-rw-r--r--intern/cycles/util/util_static_assert.h32
-rw-r--r--intern/cycles/util/util_string.h2
-rw-r--r--intern/cycles/util/util_system.cpp4
-rw-r--r--intern/cycles/util/util_task.cpp2
-rw-r--r--intern/cycles/util/util_texture.h7
-rw-r--r--intern/cycles/util/util_thread.h4
-rw-r--r--intern/cycles/util/util_transform.cpp2
-rw-r--r--intern/cycles/util/util_transform.h31
-rw-r--r--intern/cycles/util/util_types.h7
-rw-r--r--intern/cycles/util/util_version.h2
-rw-r--r--intern/cycles/util/util_view.cpp2
355 files changed, 14704 insertions, 12189 deletions
diff --git a/intern/cycles/CMakeLists.txt b/intern/cycles/CMakeLists.txt
index 6f6bd7ec2cc..121c8bdad6e 100644
--- a/intern/cycles/CMakeLists.txt
+++ b/intern/cycles/CMakeLists.txt
@@ -177,14 +177,11 @@ if(CXX_HAS_AVX2)
add_definitions(-DWITH_KERNEL_AVX2)
endif()
-if(WITH_CYCLES_OSL)
- # LLVM and OSL need to build without RTTI
- if(WIN32 AND MSVC)
- set(RTTI_DISABLE_FLAGS "/GR- -DBOOST_NO_RTTI -DBOOST_NO_TYPEID")
- elseif(CMAKE_COMPILER_IS_GNUCC OR (CMAKE_C_COMPILER_ID MATCHES "Clang"))
- set(RTTI_DISABLE_FLAGS "-fno-rtti -DBOOST_NO_RTTI -DBOOST_NO_TYPEID")
- endif()
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${RTTI_DISABLE_FLAGS}")
+# LLVM and OSL need to build without RTTI
+if(WIN32 AND MSVC)
+ set(RTTI_DISABLE_FLAGS "/GR- -DBOOST_NO_RTTI -DBOOST_NO_TYPEID")
+elseif(CMAKE_COMPILER_IS_GNUCC OR (CMAKE_C_COMPILER_ID MATCHES "Clang"))
+ set(RTTI_DISABLE_FLAGS "-fno-rtti -DBOOST_NO_RTTI -DBOOST_NO_TYPEID")
endif()
# Definitions and Includes
@@ -228,11 +225,8 @@ if(WITH_CYCLES_DEVICE_OPTIX)
SYSTEM
${OPTIX_INCLUDE_DIR}
)
-
- # Need pre-compiled CUDA binaries in the OptiX device
- set(WITH_CYCLES_CUDA_BINARIES ON)
else()
- message(STATUS "Optix not found, disabling it from Cycles")
+ message(STATUS "OptiX not found, disabling it from Cycles")
set(WITH_CYCLES_DEVICE_OPTIX OFF)
endif()
endif()
@@ -319,9 +313,7 @@ if(WITH_CYCLES_CUDA_BINARIES AND (NOT WITH_CYCLES_CUBIN_COMPILER))
set(MAX_MSVC 1910)
elseif(${CUDA_VERSION} EQUAL "9.1")
set(MAX_MSVC 1911)
- elseif(${CUDA_VERSION} EQUAL "10.0")
- set(MAX_MSVC 1999)
- elseif(${CUDA_VERSION} EQUAL "10.1")
+ elseif(${CUDA_VERSION} LESS "11.0")
set(MAX_MSVC 1999)
endif()
if(NOT MSVC_VERSION LESS ${MAX_MSVC} OR CMAKE_C_COMPILER_ID MATCHES "Clang")
@@ -338,7 +330,7 @@ if(WITH_CYCLES_CUDA_BINARIES AND (NOT WITH_CYCLES_CUBIN_COMPILER))
endif()
# NVRTC gives wrong rendering result in CUDA 10.0, so we must use NVCC.
-if(WITH_CYCLES_CUDA_BINARIES AND WITH_CYCLES_CUBIN_COMPILER)
+if(WITH_CYCLES_CUDA_BINARIES AND WITH_CYCLES_CUBIN_COMPILER AND NOT WITH_CYCLES_CUBIN_COMPILER_OVERRRIDE)
if(NOT (${CUDA_VERSION} VERSION_LESS 10.0))
message(STATUS "cycles_cubin_cc not supported for CUDA 10.0+, using nvcc instead.")
set(WITH_CYCLES_CUBIN_COMPILER OFF)
@@ -356,17 +348,6 @@ if(WITH_CYCLES_NETWORK)
add_definitions(-DWITH_NETWORK)
endif()
-if(WITH_OPENCOLORIO)
- add_definitions(-DWITH_OCIO)
- include_directories(
- SYSTEM
- ${OPENCOLORIO_INCLUDE_DIRS}
- )
- if(WIN32)
- add_definitions(-DOpenColorIO_STATIC)
- endif()
-endif()
-
if(WITH_CYCLES_STANDALONE OR WITH_CYCLES_NETWORK OR WITH_CYCLES_CUBIN_COMPILER)
add_subdirectory(app)
endif()
diff --git a/intern/cycles/app/CMakeLists.txt b/intern/cycles/app/CMakeLists.txt
index d67a72ab7db..ef374f91a65 100644
--- a/intern/cycles/app/CMakeLists.txt
+++ b/intern/cycles/app/CMakeLists.txt
@@ -51,14 +51,17 @@ endif()
# Common configuration.
-link_directories(${OPENIMAGEIO_LIBPATH}
- ${BOOST_LIBPATH}
- ${PNG_LIBPATH}
- ${JPEG_LIBPATH}
- ${ZLIB_LIBPATH}
- ${TIFF_LIBPATH}
- ${OPENEXR_LIBPATH}
- ${OPENJPEG_LIBPATH})
+link_directories(
+ ${OPENIMAGEIO_LIBPATH}
+ ${BOOST_LIBPATH}
+ ${PNG_LIBPATH}
+ ${JPEG_LIBPATH}
+ ${ZLIB_LIBPATH}
+ ${TIFF_LIBPATH}
+ ${OPENEXR_LIBPATH}
+ ${OPENJPEG_LIBPATH}
+ ${OPENVDB_LIBPATH}
+)
if(WITH_OPENCOLORIO)
link_directories(${OPENCOLORIO_LIBPATH})
diff --git a/intern/cycles/app/cycles_cubin_cc.cpp b/intern/cycles/app/cycles_cubin_cc.cpp
index 774c18f4219..7631cb9bed5 100644
--- a/intern/cycles/app/cycles_cubin_cc.cpp
+++ b/intern/cycles/app/cycles_cubin_cc.cpp
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include <stdio.h>
#include <stdint.h>
+#include <stdio.h>
#include <string>
#include <vector>
@@ -43,7 +43,8 @@ template<typename T> std::string to_string(const T &n)
class CompilationSettings {
public:
- CompilationSettings() : target_arch(0), bits(64), verbose(false), fast_math(false)
+ CompilationSettings()
+ : target_arch(0), bits(64), verbose(false), fast_math(false), ptx_only(false)
{
}
@@ -57,12 +58,13 @@ class CompilationSettings {
int bits;
bool verbose;
bool fast_math;
+ bool ptx_only;
};
static bool compile_cuda(CompilationSettings &settings)
{
- const char *headers[] = {"stdlib.h", "float.h", "math.h", "stdio.h"};
- const char *header_content[] = {"\n", "\n", "\n", "\n"};
+ const char *headers[] = {"stdlib.h", "float.h", "math.h", "stdio.h", "stddef.h"};
+ const char *header_content[] = {"\n", "\n", "\n", "\n", "\n"};
printf("Building %s\n", settings.input_file.c_str());
@@ -83,6 +85,8 @@ static bool compile_cuda(CompilationSettings &settings)
options.push_back("-D__KERNEL_CUDA_VERSION__=" + std::to_string(cuewNvrtcVersion()));
options.push_back("-arch=compute_" + std::to_string(settings.target_arch));
options.push_back("--device-as-default-execution-space");
+ options.push_back("-DCYCLES_CUBIN_CC");
+ options.push_back("--std=c++11");
if (settings.fast_math)
options.push_back("--use_fast_math");
@@ -134,10 +138,14 @@ static bool compile_cuda(CompilationSettings &settings)
fprintf(stderr, "Error: nvrtcGetPTX failed (%d)\n\n", (int)result);
return false;
}
-
- /* Write a file in the temp folder with the ptx code. */
- settings.ptx_file = OIIO::Filesystem::temp_directory_path() + "/" +
- OIIO::Filesystem::unique_path();
+ if (settings.ptx_only) {
+ settings.ptx_file = settings.output_file;
+ }
+ else {
+ /* Write a file in the temp folder with the ptx code. */
+ settings.ptx_file = OIIO::Filesystem::temp_directory_path() + "/" +
+ OIIO::Filesystem::unique_path();
+ }
FILE *f = fopen(settings.ptx_file.c_str(), "wb");
fwrite(&ptx_code[0], 1, ptx_size, f);
fclose(f);
@@ -249,6 +257,9 @@ static bool parse_parameters(int argc, const char **argv, CompilationSettings &s
"-D %L",
&settings.defines,
"Add additional defines",
+ "-ptx",
+ &settings.ptx_only,
+ "emit PTX code",
"-v",
&settings.verbose,
"Use verbose logging",
@@ -303,8 +314,10 @@ int main(int argc, const char **argv)
exit(EXIT_FAILURE);
}
- if (!link_ptxas(settings)) {
- exit(EXIT_FAILURE);
+ if (!settings.ptx_only) {
+ if (!link_ptxas(settings)) {
+ exit(EXIT_FAILURE);
+ }
}
return 0;
diff --git a/intern/cycles/app/cycles_server.cpp b/intern/cycles/app/cycles_server.cpp
index c5a4c9b375b..1ad70a376ed 100644
--- a/intern/cycles/app/cycles_server.cpp
+++ b/intern/cycles/app/cycles_server.cpp
@@ -20,11 +20,11 @@
#include "util/util_args.h"
#include "util/util_foreach.h"
+#include "util/util_logging.h"
#include "util/util_path.h"
#include "util/util_stats.h"
#include "util/util_string.h"
#include "util/util_task.h"
-#include "util/util_logging.h"
using namespace ccl;
diff --git a/intern/cycles/app/cycles_standalone.cpp b/intern/cycles/app/cycles_standalone.cpp
index d2d112e8d7e..e45c37be494 100644
--- a/intern/cycles/app/cycles_standalone.cpp
+++ b/intern/cycles/app/cycles_standalone.cpp
@@ -16,12 +16,12 @@
#include <stdio.h>
+#include "device/device.h"
#include "render/buffers.h"
#include "render/camera.h"
-#include "device/device.h"
+#include "render/integrator.h"
#include "render/scene.h"
#include "render/session.h"
-#include "render/integrator.h"
#include "util/util_args.h"
#include "util/util_foreach.h"
diff --git a/intern/cycles/app/cycles_xml.cpp b/intern/cycles/app/cycles_xml.cpp
index 1dbe8a30ff2..aec00f845f3 100644
--- a/intern/cycles/app/cycles_xml.cpp
+++ b/intern/cycles/app/cycles_xml.cpp
@@ -16,9 +16,9 @@
#include <stdio.h>
-#include <sstream>
#include <algorithm>
#include <iterator>
+#include <sstream>
#include "graph/node_xml.h"
@@ -32,8 +32,8 @@
#include "render/nodes.h"
#include "render/object.h"
#include "render/osl.h"
-#include "render/shader.h"
#include "render/scene.h"
+#include "render/shader.h"
#include "subd/subd_patch.h"
#include "subd/subd_split.h"
@@ -292,7 +292,7 @@ static void xml_read_shader_graph(XMLReadState &state, Shader *shader, xml_node
filepath = path_join(state.base, filepath);
}
- snode = ((OSLShaderManager *)manager)->osl_node(filepath);
+ snode = OSLShaderManager::osl_node(manager, filepath);
if (!snode) {
fprintf(stderr, "Failed to create OSL node from \"%s\".\n", filepath.c_str());
@@ -326,6 +326,10 @@ static void xml_read_shader_graph(XMLReadState &state, Shader *shader, xml_node
fprintf(stderr, "Node type \"%s\" is not a shader node.\n", node_type->name.c_str());
continue;
}
+ else if (node_type->create == NULL) {
+ fprintf(stderr, "Can't create abstract node type \"%s\".\n", node_type->name.c_str());
+ continue;
+ }
snode = (ShaderNode *)node_type->create(node_type);
}
@@ -376,11 +380,11 @@ static Mesh *xml_add_mesh(Scene *scene, const Transform &tfm)
{
/* create mesh */
Mesh *mesh = new Mesh();
- scene->meshes.push_back(mesh);
+ scene->geometry.push_back(mesh);
/* create object*/
Object *object = new Object();
- object->mesh = mesh;
+ object->geometry = mesh;
object->tfm = tfm;
scene->objects.push_back(object);
diff --git a/intern/cycles/blender/CMakeLists.txt b/intern/cycles/blender/CMakeLists.txt
index 0888eeb78bb..496e8e9310b 100644
--- a/intern/cycles/blender/CMakeLists.txt
+++ b/intern/cycles/blender/CMakeLists.txt
@@ -18,6 +18,9 @@ set(INC_SYS
set(SRC
blender_camera.cpp
blender_device.cpp
+ blender_image.cpp
+ blender_geometry.cpp
+ blender_light.cpp
blender_mesh.cpp
blender_object.cpp
blender_object_cull.cpp
@@ -30,9 +33,12 @@ set(SRC
blender_sync.cpp
blender_texture.cpp
blender_viewport.cpp
+ blender_volume.cpp
CCL_api.h
blender_device.h
+ blender_id_map.h
+ blender_image.h
blender_object_cull.h
blender_sync.h
blender_session.h
@@ -86,6 +92,20 @@ if(WITH_MOD_FLUID)
add_definitions(-DWITH_FLUID)
endif()
+if(WITH_NEW_OBJECT_TYPES)
+ add_definitions(-DWITH_NEW_OBJECT_TYPES)
+endif()
+
+if(WITH_OPENVDB)
+ add_definitions(-DWITH_OPENVDB ${OPENVDB_DEFINITIONS})
+ list(APPEND INC_SYS
+ ${OPENVDB_INCLUDE_DIRS}
+ )
+ list(APPEND LIB
+ ${OPENVDB_LIBRARIES}
+ )
+endif()
+
blender_add_lib(bf_intern_cycles "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")
# avoid link failure with clang 3.4 debug
diff --git a/intern/cycles/blender/addon/__init__.py b/intern/cycles/blender/addon/__init__.py
index 6d6f89603fe..3d2a52d0cf6 100644
--- a/intern/cycles/blender/addon/__init__.py
+++ b/intern/cycles/blender/addon/__init__.py
@@ -22,7 +22,7 @@ bl_info = {
"blender": (2, 80, 0),
"description": "Cycles renderer integration",
"warning": "",
- "wiki_url": "https://docs.blender.org/manual/en/latest/render/cycles/",
+ "doc_url": "https://docs.blender.org/manual/en/latest/render/cycles/",
"tracker_url": "",
"support": 'OFFICIAL',
"category": "Render"}
diff --git a/intern/cycles/blender/addon/engine.py b/intern/cycles/blender/addon/engine.py
index ee7ac7737c0..2b872bb5c39 100644
--- a/intern/cycles/blender/addon/engine.py
+++ b/intern/cycles/blender/addon/engine.py
@@ -33,7 +33,7 @@ def _is_using_buggy_driver():
# in the version string, but those cards do not quite work and
# causing crashes.
return True
- regex = re.compile(".*Compatibility Profile Context ([0-9]+(\.[0-9]+)+)$")
+ regex = re.compile(".*Compatibility Profile Context ([0-9]+(\\.[0-9]+)+)$")
if not regex.match(version):
# Skip cards like FireGL
return False
@@ -245,9 +245,6 @@ def list_render_passes(srl):
if srl.use_pass_transmission_direct: yield ("TransDir", "RGB", 'COLOR')
if srl.use_pass_transmission_indirect: yield ("TransInd", "RGB", 'COLOR')
if srl.use_pass_transmission_color: yield ("TransCol", "RGB", 'COLOR')
- if srl.use_pass_subsurface_direct: yield ("SubsurfaceDir", "RGB", 'COLOR')
- if srl.use_pass_subsurface_indirect: yield ("SubsurfaceInd", "RGB", 'COLOR')
- if srl.use_pass_subsurface_color: yield ("SubsurfaceCol", "RGB", 'COLOR')
if srl.use_pass_emit: yield ("Emit", "RGB", 'COLOR')
if srl.use_pass_environment: yield ("Env", "RGB", 'COLOR')
@@ -258,6 +255,7 @@ def list_render_passes(srl):
if crl.pass_debug_bvh_traversed_instances: yield ("Debug BVH Traversed Instances", "X", 'VALUE')
if crl.pass_debug_bvh_intersections: yield ("Debug BVH Intersections", "X", 'VALUE')
if crl.pass_debug_ray_bounces: yield ("Debug Ray Bounces", "X", 'VALUE')
+ if crl.pass_debug_sample_count: yield ("Debug Sample Count", "X", 'VALUE')
if crl.use_pass_volume_direct: yield ("VolumeDir", "RGB", 'COLOR')
if crl.use_pass_volume_indirect: yield ("VolumeInd", "RGB", 'COLOR')
@@ -284,8 +282,7 @@ def list_render_passes(srl):
yield ("Denoising Intensity", "X", 'VALUE')
clean_options = ("denoising_diffuse_direct", "denoising_diffuse_indirect",
"denoising_glossy_direct", "denoising_glossy_indirect",
- "denoising_transmission_direct", "denoising_transmission_indirect",
- "denoising_subsurface_direct", "denoising_subsurface_indirect")
+ "denoising_transmission_direct", "denoising_transmission_indirect")
if any(getattr(crl, option) for option in clean_options):
yield ("Denoising Clean", "RGB", 'COLOR')
diff --git a/intern/cycles/blender/addon/operators.py b/intern/cycles/blender/addon/operators.py
index 80bb663330b..3c8e79eaba5 100644
--- a/intern/cycles/blender/addon/operators.py
+++ b/intern/cycles/blender/addon/operators.py
@@ -153,12 +153,12 @@ class CYCLES_OT_denoise_animation(Operator):
self.report({'ERROR'}, str(e))
return {'FINISHED'}
- self.report({'INFO'}, "Denoising completed.")
+ self.report({'INFO'}, "Denoising completed")
return {'FINISHED'}
class CYCLES_OT_merge_images(Operator):
- "Combine OpenEXR multilayer images rendered with different sample" \
+ "Combine OpenEXR multilayer images rendered with different sample " \
"ranges into one image with reduced noise"
bl_idname = "cycles.merge_images"
bl_label = "Merge Images"
diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py
index 5f163c2510b..c91e210bbd8 100644
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -112,6 +112,7 @@ enum_use_layer_samples = (
enum_sampling_pattern = (
('SOBOL', "Sobol", "Use Sobol random sampling pattern"),
('CORRELATED_MUTI_JITTER', "Correlated Multi-Jitter", "Use Correlated Multi-Jitter random sampling pattern"),
+ ('PROGRESSIVE_MUTI_JITTER', "Progressive Multi-Jitter", "Use Progressive Multi-Jitter random sampling pattern"),
)
enum_integrator = (
@@ -178,10 +179,6 @@ enum_view3d_shading_render_pass= (
('TRANSMISSION_INDIRECT', "Transmission Indirect", "Show the Transmission Indirect render pass", 45),
('TRANSMISSION_COLOR', "Transmission Color", "Show the Transmission Color render pass", 46),
- ('SUBSURFACE_DIRECT', "Subsurface Direct", "Show the Subsurface Direct render pass", 47),
- ('SUBSURFACE_INDIRECT', "Subsurface Indirect", "Show the Subsurface Indirect render pass", 48),
- ('SUBSURFACE_COLOR', "Subsurface Color", "Show the Subsurface Color render pass", 49),
-
('VOLUME_DIRECT', "Volume Direct", "Show the Volume Direct render pass", 50),
('VOLUME_INDIRECT', "Volume Indirect", "Show the Volume Indirect render pass", 51),
@@ -197,7 +194,12 @@ enum_aov_types = (
('COLOR', "Color", "Write a Color pass", 1),
)
-enum_denoising_optix_input_passes= (
+enum_viewport_denoising = (
+ ('NONE', "None", "Disable viewport denoising", 0),
+ ('OPTIX', "OptiX AI-Accelerated", "Use the OptiX denoiser running on the GPU (requires at least one compatible OptiX device)", 1),
+)
+
+enum_denoising_optix_input_passes = (
('RGB', "Color", "Use only color as input", 1),
('RGB_ALBEDO', "Color + Albedo", "Use color and albedo data as input", 2),
('RGB_ALBEDO_NORMAL', "Color + Albedo + Normal", "Use color, albedo and normal data as input", 3),
@@ -229,6 +231,18 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
default='PATH',
)
+ preview_pause: BoolProperty(
+ name="Pause Preview",
+ description="Pause all viewport preview renders",
+ default=False,
+ )
+ preview_denoising: EnumProperty(
+ name="Viewport Denoising",
+ description="Denoise the image after each preview update with the selected denoiser engine",
+ items=enum_viewport_denoising,
+ default='NONE',
+ )
+
use_square_samples: BoolProperty(
name="Square Samples",
description="Square sampling values for easier artist control",
@@ -247,11 +261,6 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
min=0, max=(1 << 24),
default=32,
)
- preview_pause: BoolProperty(
- name="Pause Preview",
- description="Pause all viewport preview renders",
- default=False,
- )
aa_samples: IntProperty(
name="AA Samples",
description="Number of antialiasing samples to render for each pixel",
@@ -264,6 +273,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
min=0, max=2097151,
default=32,
)
+
diffuse_samples: IntProperty(
name="Diffuse Samples",
description="Number of diffuse bounce samples to render for each AA sample",
@@ -294,14 +304,12 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
min=1, max=1024,
default=1,
)
-
subsurface_samples: IntProperty(
name="Subsurface Samples",
description="Number of subsurface scattering samples to render for each AA sample",
min=1, max=1024,
default=1,
)
-
volume_samples: IntProperty(
name="Volume Samples",
description="Number of volume scattering samples to render for each AA sample",
@@ -342,6 +350,26 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
default=0.01,
)
+ use_adaptive_sampling: BoolProperty(
+ name="Use Adaptive Sampling",
+ description="Automatically reduce the number of samples per pixel based on estimated noise level",
+ default=False,
+ )
+
+ adaptive_threshold: FloatProperty(
+ name="Adaptive Sampling Threshold",
+ description="Noise level step to stop sampling at, lower values reduce noise the cost of render time. Zero for automatic setting based on number of AA samples",
+ min=0.0, max=1.0,
+ default=0.0,
+ precision=4,
+ )
+ adaptive_min_samples: IntProperty(
+ name="Adaptive Min Samples",
+ description="Minimum AA samples for adaptive sampling, to discover noisy features before stopping sampling. Zero for automatic setting based on number of AA samples",
+ min=0, max=4096,
+ default=0,
+ )
+
min_light_bounces: IntProperty(
name="Min Light Bounces",
description="Minimum number of light bounces. Setting this higher reduces noise in the first bounces, "
@@ -416,13 +444,20 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
default=8,
)
- volume_step_size: FloatProperty(
- name="Step Size",
- description="Distance between volume shader samples when rendering the volume "
- "(lower values give more accurate and detailed results, but also increased render time)",
- default=0.1,
- min=0.0000001, max=100000.0, soft_min=0.01, soft_max=1.0, precision=4,
- unit='LENGTH'
+ volume_step_rate: FloatProperty(
+ name="Step Rate",
+ description="Globally adjust detail for volume rendering, on top of automatically estimated step size. "
+ "Higher values reduce render time, lower values render with more detail",
+ default=1.0,
+ min=0.01, max=100.0, soft_min=0.1, soft_max=10.0, precision=2
+ )
+
+ volume_preview_step_rate: FloatProperty(
+ name="Step Rate",
+ description="Globally adjust detail for volume rendering, on top of automatically estimated step size. "
+ "Higher values reduce render time, lower values render with more detail",
+ default=1.0,
+ min=0.01, max=100.0, soft_min=0.1, soft_max=10.0, precision=2
)
volume_max_steps: IntProperty(
@@ -562,6 +597,12 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
default=64,
subtype='PIXEL'
)
+ preview_denoising_start_sample: IntProperty(
+ name="Start Denoising",
+ description="Sample to start denoising the preview at",
+ min=0, max=(1 << 24),
+ default=1,
+ )
debug_reset_timeout: FloatProperty(
name="Reset timeout",
@@ -641,7 +682,6 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
('DIFFUSE', "Diffuse", ""),
('GLOSSY', "Glossy", ""),
('TRANSMISSION', "Transmission", ""),
- ('SUBSURFACE', "Subsurface", ""),
),
)
@@ -901,6 +941,14 @@ class CyclesMaterialSettings(bpy.types.PropertyGroup):
default='LINEAR',
)
+ volume_step_rate: FloatProperty(
+ name="Step Rate",
+ description="Scale the distance between volume shader samples when rendering the volume "
+ "(lower values give more accurate and detailed results, but also increased render time)",
+ default=1.0,
+ min=0.001, max=1000.0, soft_min=0.1, soft_max=10.0, precision=4
+ )
+
displacement_method: EnumProperty(
name="Displacement Method",
description="Method to use for the displacement",
@@ -1011,6 +1059,13 @@ class CyclesWorldSettings(bpy.types.PropertyGroup):
items=enum_volume_interpolation,
default='LINEAR',
)
+ volume_step_size: FloatProperty(
+ name="Step Size",
+ description="Distance between volume shader samples when rendering the volume "
+ "(lower values give more accurate and detailed results, but also increased render time)",
+ default=1.0,
+ min=0.0000001, max=100000.0, soft_min=0.1, soft_max=100.0, precision=4
+ )
@classmethod
def register(cls):
@@ -1121,7 +1176,7 @@ class CyclesObjectSettings(bpy.types.PropertyGroup):
motion_steps: IntProperty(
name="Motion Steps",
description="Control accuracy of motion blur, more steps gives more memory usage (actual number of steps is 2^(steps - 1))",
- min=1, soft_max=8,
+ min=1, max=7,
default=1,
)
@@ -1285,7 +1340,12 @@ class CyclesRenderLayerSettings(bpy.types.PropertyGroup):
default=False,
update=update_render_passes,
)
-
+ pass_debug_sample_count: BoolProperty(
+ name="Debug Sample Count",
+ description="Number of samples/camera rays per pixel",
+ default=False,
+ update=update_render_passes,
+ )
use_pass_volume_direct: BoolProperty(
name="Volume Direct",
description="Deliver direct volumetric scattering pass",
@@ -1305,12 +1365,6 @@ class CyclesRenderLayerSettings(bpy.types.PropertyGroup):
default=False,
update=update_render_passes,
)
- use_optix_denoising: BoolProperty(
- name="Use OptiX AI Denoising",
- description="Denoise the rendered image with the OptiX AI denoiser",
- default=False,
- update=update_render_passes,
- )
denoising_diffuse_direct: BoolProperty(
name="Diffuse Direct",
description="Denoise the direct diffuse lighting",
@@ -1341,16 +1395,6 @@ class CyclesRenderLayerSettings(bpy.types.PropertyGroup):
description="Denoise the indirect transmission lighting",
default=True,
)
- denoising_subsurface_direct: BoolProperty(
- name="Subsurface Direct",
- description="Denoise the direct subsurface lighting",
- default=True,
- )
- denoising_subsurface_indirect: BoolProperty(
- name="Subsurface Indirect",
- description="Denoise the indirect subsurface lighting",
- default=True,
- )
denoising_strength: FloatProperty(
name="Denoising Strength",
description="Controls neighbor pixel weighting for the denoising filter (lower values preserve more detail, but aren't as smooth)",
@@ -1387,11 +1431,18 @@ class CyclesRenderLayerSettings(bpy.types.PropertyGroup):
min=0, max=7,
default=0,
)
+
+ use_optix_denoising: BoolProperty(
+ name="OptiX AI-Accelerated",
+ description="Use the OptiX denoiser to denoise the rendered image",
+ default=False,
+ update=update_render_passes,
+ )
denoising_optix_input_passes: EnumProperty(
name="Input Passes",
- description="Controls which passes the OptiX AI denoiser should use as input, which can have different effects on the denoised image",
+ description="Passes handed over to the OptiX denoiser (this can have different effects on the denoised image)",
items=enum_denoising_optix_input_passes,
- default='RGB',
+ default='RGB_ALBEDO',
)
use_pass_crypto_object: BoolProperty(
diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py
index 35d5d3801d2..37675c5699d 100644
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -112,6 +112,10 @@ def show_device_active(context):
return True
return context.preferences.addons[__package__].preferences.has_active_device()
+def show_optix_denoising(context):
+ # OptiX AI denoiser can be used when at least one device supports OptiX
+ return bool(context.preferences.addons[__package__].preferences.get_devices_for_type('OPTIX'))
+
def draw_samples_info(layout, context):
cscene = context.scene.cycles
@@ -177,17 +181,23 @@ class CYCLES_RENDER_PT_sampling(CyclesButtonsPanel, Panel):
if not use_optix(context):
layout.prop(cscene, "progressive")
- if cscene.progressive == 'PATH' or use_branched_path(context) is False:
+ if not use_branched_path(context):
col = layout.column(align=True)
col.prop(cscene, "samples", text="Render")
col.prop(cscene, "preview_samples", text="Viewport")
-
- draw_samples_info(layout, context)
else:
col = layout.column(align=True)
col.prop(cscene, "aa_samples", text="Render")
col.prop(cscene, "preview_aa_samples", text="Viewport")
+ # Viewport denoising is currently only supported with OptiX
+ if show_optix_denoising(context):
+ col = layout.column()
+ col.prop(cscene, "preview_denoising")
+
+ if not use_branched_path(context):
+ draw_samples_info(layout, context)
+
class CYCLES_RENDER_PT_sampling_sub_samples(CyclesButtonsPanel, Panel):
bl_label = "Sub Samples"
@@ -195,9 +205,7 @@ class CYCLES_RENDER_PT_sampling_sub_samples(CyclesButtonsPanel, Panel):
@classmethod
def poll(cls, context):
- scene = context.scene
- cscene = scene.cycles
- return cscene.progressive != 'PATH' and use_branched_path(context)
+ return use_branched_path(context)
def draw(self, context):
layout = self.layout
@@ -222,6 +230,32 @@ class CYCLES_RENDER_PT_sampling_sub_samples(CyclesButtonsPanel, Panel):
draw_samples_info(layout, context)
+class CYCLES_RENDER_PT_sampling_adaptive(CyclesButtonsPanel, Panel):
+ bl_label = "Adaptive Sampling"
+ bl_parent_id = "CYCLES_RENDER_PT_sampling"
+ bl_options = {'DEFAULT_CLOSED'}
+
+ def draw_header(self, context):
+ layout = self.layout
+ scene = context.scene
+ cscene = scene.cycles
+
+ layout.prop(cscene, "use_adaptive_sampling", text="")
+
+ def draw(self, context):
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False
+
+ scene = context.scene
+ cscene = scene.cycles
+
+ layout.active = cscene.use_adaptive_sampling
+
+ col = layout.column(align=True)
+ col.prop(cscene, "adaptive_min_samples", text="Min Samples")
+ col.prop(cscene, "adaptive_threshold", text="Noise Threshold")
+
class CYCLES_RENDER_PT_sampling_advanced(CyclesButtonsPanel, Panel):
bl_label = "Advanced"
bl_parent_id = "CYCLES_RENDER_PT_sampling"
@@ -239,7 +273,9 @@ class CYCLES_RENDER_PT_sampling_advanced(CyclesButtonsPanel, Panel):
row.prop(cscene, "seed")
row.prop(cscene, "use_animated_seed", text="", icon='TIME')
- layout.prop(cscene, "sampling_pattern", text="Pattern")
+ col = layout.column(align=True)
+ col.active = not(cscene.use_adaptive_sampling)
+ col.prop(cscene, "sampling_pattern", text="Pattern")
layout.prop(cscene, "use_square_samples")
@@ -337,7 +373,7 @@ class CYCLES_RENDER_PT_subdivision(CyclesButtonsPanel, Panel):
col = layout.column()
sub = col.column(align=True)
sub.prop(cscene, "dicing_rate", text="Dicing Rate Render")
- sub.prop(cscene, "preview_dicing_rate", text="Preview")
+ sub.prop(cscene, "preview_dicing_rate", text="Viewport")
col.separator()
@@ -392,9 +428,11 @@ class CYCLES_RENDER_PT_volumes(CyclesButtonsPanel, Panel):
scene = context.scene
cscene = scene.cycles
- col = layout.column()
- col.prop(cscene, "volume_step_size", text="Step Size")
- col.prop(cscene, "volume_max_steps", text="Max Steps")
+ col = layout.column(align=True)
+ col.prop(cscene, "volume_step_rate", text="Step Rate Render")
+ col.prop(cscene, "volume_preview_step_rate", text="Viewport")
+
+ layout.prop(cscene, "volume_max_steps", text="Max Steps")
class CYCLES_RENDER_PT_light_paths(CyclesButtonsPanel, Panel):
@@ -635,9 +673,6 @@ class CYCLES_RENDER_PT_performance_tiles(CyclesButtonsPanel, Panel):
sub = col.column()
sub.active = not rd.use_save_buffers
- for view_layer in scene.view_layers:
- if view_layer.cycles.use_denoising:
- sub.active = False
sub.prop(cscene, "use_progressive_refine")
@@ -705,6 +740,11 @@ class CYCLES_RENDER_PT_performance_viewport(CyclesButtonsPanel, Panel):
col.prop(rd, "preview_pixel_size", text="Pixel Size")
col.prop(cscene, "preview_start_resolution", text="Start Pixels")
+ if show_optix_denoising(context):
+ sub = col.row(align=True)
+ sub.active = cscene.preview_denoising != 'NONE'
+ sub.prop(cscene, "preview_denoising_start_sample", text="Denoising Start Sample")
+
class CYCLES_RENDER_PT_filter(CyclesButtonsPanel, Panel):
bl_label = "Filter"
@@ -732,6 +772,8 @@ class CYCLES_RENDER_PT_filter(CyclesButtonsPanel, Panel):
col.prop(view_layer, "use_solid", text="Surfaces")
col = flow.column()
col.prop(view_layer, "use_strand", text="Hair")
+ col = flow.column()
+ col.prop(view_layer, "use_volumes", text="Volumes")
if with_freestyle:
col = flow.column()
col.prop(view_layer, "use_freestyle", text="Freestyle")
@@ -803,6 +845,8 @@ class CYCLES_RENDER_PT_passes_data(CyclesButtonsPanel, Panel):
col.prop(cycles_view_layer, "denoising_store_passes", text="Denoising Data")
col = flow.column()
col.prop(cycles_view_layer, "pass_debug_render_time", text="Render Time")
+ col = flow.column()
+ col.prop(cycles_view_layer, "pass_debug_sample_count", text="Sample Count")
layout.separator()
@@ -848,14 +892,6 @@ class CYCLES_RENDER_PT_passes_light(CyclesButtonsPanel, Panel):
split = layout.split(factor=0.35)
split.use_property_split = False
- split.label(text="Subsurface")
- row = split.row(align=True)
- row.prop(view_layer, "use_pass_subsurface_direct", text="Direct", toggle=True)
- row.prop(view_layer, "use_pass_subsurface_indirect", text="Indirect", toggle=True)
- row.prop(view_layer, "use_pass_subsurface_color", text="Color", toggle=True)
-
- split = layout.split(factor=0.35)
- split.use_property_split = False
split.label(text="Volume")
row = split.row(align=True)
row.prop(cycles_view_layer, "use_pass_volume_direct", text="Direct", toggle=True)
@@ -981,15 +1017,14 @@ class CYCLES_RENDER_PT_denoising(CyclesButtonsPanel, Panel):
col = split.column(align=True)
- if use_optix(context):
- col.prop(cycles_view_layer, "use_optix_denoising", text="OptiX AI Denoising")
+ if show_optix_denoising(context):
+ col.prop(cycles_view_layer, "use_optix_denoising")
+ col.separator(factor=2.0)
if cycles_view_layer.use_optix_denoising:
col.prop(cycles_view_layer, "denoising_optix_input_passes")
return
- col.separator(factor=2.0)
-
col.prop(cycles_view_layer, "denoising_radius", text="Radius")
col.prop(cycles_view_layer, "denoising_strength", slider=True, text="Strength")
col.prop(cycles_view_layer, "denoising_feature_strength", slider=True, text="Feature Strength")
@@ -1036,15 +1071,6 @@ class CYCLES_RENDER_PT_denoising(CyclesButtonsPanel, Panel):
split = layout.split(factor=0.5)
split.active = cycles_view_layer.use_denoising or cycles_view_layer.denoising_store_passes
- col = split.column()
- col.alignment = 'RIGHT'
- col.label(text="Subsurface")
-
- row = split.row(align=True)
- row.use_property_split = False
- row.prop(cycles_view_layer, "denoising_subsurface_direct", text="Direct", toggle=True)
- row.prop(cycles_view_layer, "denoising_subsurface_indirect", text="Indirect", toggle=True)
-
class CYCLES_PT_post_processing(CyclesButtonsPanel, Panel):
bl_label = "Post Processing"
@@ -1391,8 +1417,6 @@ class CYCLES_LIGHT_PT_light(CyclesButtonsPanel, Panel):
light = context.light
clamp = light.cycles
- layout.use_property_decorate = False
-
if self.bl_space_type == 'PROPERTIES':
layout.row().prop(light, "type", expand=True)
layout.use_property_split = True
@@ -1674,6 +1698,9 @@ class CYCLES_WORLD_PT_settings_volume(CyclesButtonsPanel, Panel):
sub.prop(cworld, "volume_sampling", text="Sampling")
col.prop(cworld, "volume_interpolation", text="Interpolation")
col.prop(cworld, "homogeneous_volume", text="Homogeneous")
+ sub = col.column()
+ sub.active = not cworld.homogeneous_volume
+ sub.prop(cworld, "volume_step_size")
class CYCLES_MATERIAL_PT_preview(CyclesButtonsPanel, Panel):
@@ -1805,6 +1832,9 @@ class CYCLES_MATERIAL_PT_settings_volume(CyclesButtonsPanel, Panel):
sub.prop(cmat, "volume_sampling", text="Sampling")
col.prop(cmat, "volume_interpolation", text="Interpolation")
col.prop(cmat, "homogeneous_volume", text="Homogeneous")
+ sub = col.column()
+ sub.active = not cmat.homogeneous_volume
+ sub.prop(cmat, "volume_step_rate")
def draw(self, context):
self.draw_shared(self, context, context.material)
@@ -1852,7 +1882,7 @@ class CYCLES_RENDER_PT_bake_influence(CyclesButtonsPanel, Panel):
cscene = scene.cycles
rd = scene.render
if rd.use_bake_multires == False and cscene.bake_type in {
- 'NORMAL', 'COMBINED', 'DIFFUSE', 'GLOSSY', 'TRANSMISSION', 'SUBSURFACE'}:
+ 'NORMAL', 'COMBINED', 'DIFFUSE', 'GLOSSY', 'TRANSMISSION'}:
return True
def draw(self, context):
@@ -1887,11 +1917,10 @@ class CYCLES_RENDER_PT_bake_influence(CyclesButtonsPanel, Panel):
flow.prop(cbk, "use_pass_diffuse")
flow.prop(cbk, "use_pass_glossy")
flow.prop(cbk, "use_pass_transmission")
- flow.prop(cbk, "use_pass_subsurface")
flow.prop(cbk, "use_pass_ambient_occlusion")
flow.prop(cbk, "use_pass_emit")
- elif cscene.bake_type in {'DIFFUSE', 'GLOSSY', 'TRANSMISSION', 'SUBSURFACE'}:
+ elif cscene.bake_type in {'DIFFUSE', 'GLOSSY', 'TRANSMISSION'}:
row = col.row(align=True)
row.use_property_split = False
row.prop(cbk, "use_pass_direct", toggle=True)
@@ -2192,8 +2221,6 @@ def draw_device(self, context):
col = layout.column()
col.prop(cscene, "feature_set")
- scene = context.scene
-
col = layout.column()
col.active = show_device_active(context)
col.prop(cscene, "device")
@@ -2248,6 +2275,7 @@ classes = (
CYCLES_PT_integrator_presets,
CYCLES_RENDER_PT_sampling,
CYCLES_RENDER_PT_sampling_sub_samples,
+ CYCLES_RENDER_PT_sampling_adaptive,
CYCLES_RENDER_PT_sampling_advanced,
CYCLES_RENDER_PT_light_paths,
CYCLES_RENDER_PT_light_paths_max_bounces,
diff --git a/intern/cycles/blender/addon/version_update.py b/intern/cycles/blender/addon/version_update.py
index 899245db03e..49f23f4ba30 100644
--- a/intern/cycles/blender/addon/version_update.py
+++ b/intern/cycles/blender/addon/version_update.py
@@ -42,10 +42,7 @@ def custom_bake_remap(scene):
'GLOSSY_COLOR',
'TRANSMISSION_DIRECT',
'TRANSMISSION_INDIRECT',
- 'TRANSMISSION_COLOR',
- 'SUBSURFACE_DIRECT',
- 'SUBSURFACE_INDIRECT',
- 'SUBSURFACE_COLOR')
+ 'TRANSMISSION_COLOR')
diffuse_direct_idx = bake_lookup.index('DIFFUSE_DIRECT')
diff --git a/intern/cycles/blender/blender_camera.cpp b/intern/cycles/blender/blender_camera.cpp
index c84d6e1572b..40a1a2c2edc 100644
--- a/intern/cycles/blender/blender_camera.cpp
+++ b/intern/cycles/blender/blender_camera.cpp
@@ -725,22 +725,26 @@ static void blender_camera_view_subset(BL::RenderEngine &b_engine,
BoundBox2D cam, view;
float view_aspect, cam_aspect, sensor_size;
- /* get viewport viewplane */
+ /* Get viewport viewplane. */
BlenderCamera view_bcam;
blender_camera_init(&view_bcam, b_render);
blender_camera_from_view(&view_bcam, b_engine, b_scene, b_v3d, b_rv3d, width, height, true);
blender_camera_viewplane(&view_bcam, width, height, &view, &view_aspect, &sensor_size);
- /* get camera viewplane */
+ /* Get camera viewplane. */
BlenderCamera cam_bcam;
blender_camera_init(&cam_bcam, b_render);
blender_camera_from_object(&cam_bcam, b_engine, b_ob, true);
+ /* Camera border is affect by aspect, viewport is not. */
+ cam_bcam.pixelaspect.x = b_render.pixel_aspect_x();
+ cam_bcam.pixelaspect.y = b_render.pixel_aspect_y();
+
blender_camera_viewplane(
&cam_bcam, cam_bcam.full_width, cam_bcam.full_height, &cam, &cam_aspect, &sensor_size);
- /* return */
+ /* Return */
*view_box = view * (1.0f / view_aspect);
*cam_box = cam * (1.0f / cam_aspect);
}
@@ -863,7 +867,8 @@ void BlenderSync::sync_view(BL::SpaceView3D &b_v3d,
}
}
-BufferParams BlenderSync::get_buffer_params(BL::RenderSettings &b_render,
+BufferParams BlenderSync::get_buffer_params(BL::Scene &b_scene,
+ BL::RenderSettings &b_render,
BL::SpaceView3D &b_v3d,
BL::RegionView3D &b_rv3d,
Camera *cam,
@@ -899,7 +904,11 @@ BufferParams BlenderSync::get_buffer_params(BL::RenderSettings &b_render,
params.height = height;
}
- update_viewport_display_passes(b_v3d, params.passes);
+ PassType display_pass = update_viewport_display_passes(b_v3d, params.passes);
+
+ /* Can only denoise the combined image pass */
+ params.denoising_data_pass = display_pass == PASS_COMBINED &&
+ update_viewport_display_denoising(b_v3d, b_scene);
return params;
}
diff --git a/intern/cycles/blender/blender_curves.cpp b/intern/cycles/blender/blender_curves.cpp
index 4dba8ffbe0e..0c87808d880 100644
--- a/intern/cycles/blender/blender_curves.cpp
+++ b/intern/cycles/blender/blender_curves.cpp
@@ -17,6 +17,7 @@
#include "render/attribute.h"
#include "render/camera.h"
#include "render/curves.h"
+#include "render/hair.h"
#include "render/mesh.h"
#include "render/object.h"
#include "render/scene.h"
@@ -107,12 +108,12 @@ static void InterpolateKeySegments(
}
static bool ObtainCacheParticleData(
- Mesh *mesh, BL::Mesh *b_mesh, BL::Object *b_ob, ParticleCurveData *CData, bool background)
+ Geometry *geom, BL::Mesh *b_mesh, BL::Object *b_ob, ParticleCurveData *CData, bool background)
{
int curvenum = 0;
int keyno = 0;
- if (!(mesh && b_mesh && b_ob && CData))
+ if (!(geom && b_mesh && b_ob && CData))
return false;
Transform tfm = get_transform(b_ob->matrix_world());
@@ -128,7 +129,7 @@ static bool ObtainCacheParticleData(
if ((b_part.render_type() == BL::ParticleSettings::render_type_PATH) &&
(b_part.type() == BL::ParticleSettings::type_HAIR)) {
- int shader = clamp(b_part.material() - 1, 0, mesh->used_shaders.size() - 1);
+ int shader = clamp(b_part.material() - 1, 0, geom->used_shaders.size() - 1);
int display_step = background ? b_part.render_step() : b_part.display_step();
int totparts = b_psys.particles.length();
int totchild = background ? b_psys.child_particles.length() :
@@ -173,19 +174,20 @@ static bool ObtainCacheParticleData(
CData->curve_firstkey.push_back_slow(keyno);
float curve_length = 0.0f;
- float3 pcKey;
+ float3 prev_co_world = make_float3(0.0f, 0.0f, 0.0f);
+ float3 prev_co_object = make_float3(0.0f, 0.0f, 0.0f);
for (int step_no = 0; step_no < ren_step; step_no++) {
- float nco[3];
- b_psys.co_hair(*b_ob, pa_no, step_no, nco);
- float3 cKey = make_float3(nco[0], nco[1], nco[2]);
- cKey = transform_point(&itfm, cKey);
+ float3 co_world = prev_co_world;
+ b_psys.co_hair(*b_ob, pa_no, step_no, &co_world.x);
+ float3 co_object = transform_point(&itfm, co_world);
if (step_no > 0) {
- const float step_length = len(cKey - pcKey);
+ const float step_length = len(co_object - prev_co_object);
curve_length += step_length;
}
- CData->curvekey_co.push_back_slow(cKey);
+ CData->curvekey_co.push_back_slow(co_object);
CData->curvekey_time.push_back_slow(curve_length);
- pcKey = cKey;
+ prev_co_object = co_object;
+ prev_co_world = co_world;
keynum++;
}
keyno += keynum;
@@ -201,14 +203,14 @@ static bool ObtainCacheParticleData(
return true;
}
-static bool ObtainCacheParticleUV(Mesh *mesh,
+static bool ObtainCacheParticleUV(Geometry *geom,
BL::Mesh *b_mesh,
BL::Object *b_ob,
ParticleCurveData *CData,
bool background,
int uv_num)
{
- if (!(mesh && b_mesh && b_ob && CData))
+ if (!(geom && b_mesh && b_ob && CData))
return false;
CData->curve_uv.clear();
@@ -264,14 +266,14 @@ static bool ObtainCacheParticleUV(Mesh *mesh,
return true;
}
-static bool ObtainCacheParticleVcol(Mesh *mesh,
+static bool ObtainCacheParticleVcol(Geometry *geom,
BL::Mesh *b_mesh,
BL::Object *b_ob,
ParticleCurveData *CData,
bool background,
int vcol_num)
{
- if (!(mesh && b_mesh && b_ob && CData))
+ if (!(geom && b_mesh && b_ob && CData))
return false;
CData->curve_vcol.clear();
@@ -593,21 +595,21 @@ static void ExportCurveTriangleGeometry(Mesh *mesh, ParticleCurveData *CData, in
/* texture coords still needed */
}
-static void ExportCurveSegments(Scene *scene, Mesh *mesh, ParticleCurveData *CData)
+static void ExportCurveSegments(Scene *scene, Hair *hair, ParticleCurveData *CData)
{
int num_keys = 0;
int num_curves = 0;
- if (mesh->num_curves())
+ if (hair->num_curves())
return;
Attribute *attr_intercept = NULL;
Attribute *attr_random = NULL;
- if (mesh->need_attribute(scene, ATTR_STD_CURVE_INTERCEPT))
- attr_intercept = mesh->curve_attributes.add(ATTR_STD_CURVE_INTERCEPT);
- if (mesh->need_attribute(scene, ATTR_STD_CURVE_RANDOM))
- attr_random = mesh->curve_attributes.add(ATTR_STD_CURVE_RANDOM);
+ if (hair->need_attribute(scene, ATTR_STD_CURVE_INTERCEPT))
+ attr_intercept = hair->attributes.add(ATTR_STD_CURVE_INTERCEPT);
+ if (hair->need_attribute(scene, ATTR_STD_CURVE_RANDOM))
+ attr_random = hair->attributes.add(ATTR_STD_CURVE_RANDOM);
/* compute and reserve size of arrays */
for (int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
@@ -620,10 +622,10 @@ static void ExportCurveSegments(Scene *scene, Mesh *mesh, ParticleCurveData *CDa
}
if (num_curves > 0) {
- VLOG(1) << "Exporting curve segments for mesh " << mesh->name;
+ VLOG(1) << "Exporting curve segments for mesh " << hair->name;
}
- mesh->reserve_curves(mesh->num_curves() + num_curves, mesh->curve_keys.size() + num_keys);
+ hair->reserve_curves(hair->num_curves() + num_curves, hair->curve_keys.size() + num_keys);
num_keys = 0;
num_curves = 0;
@@ -648,7 +650,7 @@ static void ExportCurveSegments(Scene *scene, Mesh *mesh, ParticleCurveData *CDa
(curvekey == CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1)) {
radius = 0.0f;
}
- mesh->add_curve_key(ickey_loc, radius);
+ hair->add_curve_key(ickey_loc, radius);
if (attr_intercept)
attr_intercept->add(time);
@@ -659,16 +661,16 @@ static void ExportCurveSegments(Scene *scene, Mesh *mesh, ParticleCurveData *CDa
attr_random->add(hash_uint2_to_float(num_curves, 0));
}
- mesh->add_curve(num_keys, CData->psys_shader[sys]);
+ hair->add_curve(num_keys, CData->psys_shader[sys]);
num_keys += num_curve_keys;
num_curves++;
}
}
/* check allocation */
- if ((mesh->curve_keys.size() != num_keys) || (mesh->num_curves() != num_curves)) {
+ if ((hair->curve_keys.size() != num_keys) || (hair->num_curves() != num_curves)) {
VLOG(1) << "Allocation failed, clearing data";
- mesh->clear();
+ hair->clear();
}
}
@@ -712,24 +714,58 @@ static float4 LerpCurveSegmentMotionCV(ParticleCurveData *CData, int sys, int cu
return lerp(mP, mP2, remainder);
}
-static void ExportCurveSegmentsMotion(Mesh *mesh, ParticleCurveData *CData, int motion_step)
+static void export_hair_motion_validate_attribute(Hair *hair,
+ int motion_step,
+ int num_motion_keys,
+ bool have_motion)
{
- VLOG(1) << "Exporting curve motion segments for mesh " << mesh->name << ", motion step "
+ Attribute *attr_mP = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+ const int num_keys = hair->curve_keys.size();
+
+ if (num_motion_keys != num_keys || !have_motion) {
+ /* No motion or hair "topology" changed, remove attributes again. */
+ if (num_motion_keys != num_keys) {
+ VLOG(1) << "Hair topology changed, removing attribute.";
+ }
+ else {
+ VLOG(1) << "No motion, removing attribute.";
+ }
+ hair->attributes.remove(ATTR_STD_MOTION_VERTEX_POSITION);
+ }
+ else if (motion_step > 0) {
+ VLOG(1) << "Filling in new motion vertex position for motion_step " << motion_step;
+
+ /* Motion, fill up previous steps that we might have skipped because
+ * they had no motion, but we need them anyway now. */
+ for (int step = 0; step < motion_step; step++) {
+ float4 *mP = attr_mP->data_float4() + step * num_keys;
+
+ for (int key = 0; key < num_keys; key++) {
+ mP[key] = float3_to_float4(hair->curve_keys[key]);
+ mP[key].w = hair->curve_radius[key];
+ }
+ }
+ }
+}
+
+static void ExportCurveSegmentsMotion(Hair *hair, ParticleCurveData *CData, int motion_step)
+{
+ VLOG(1) << "Exporting curve motion segments for hair " << hair->name << ", motion step "
<< motion_step;
/* find attribute */
- Attribute *attr_mP = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+ Attribute *attr_mP = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
bool new_attribute = false;
/* add new attribute if it doesn't exist already */
if (!attr_mP) {
VLOG(1) << "Creating new motion vertex position attribute";
- attr_mP = mesh->curve_attributes.add(ATTR_STD_MOTION_VERTEX_POSITION);
+ attr_mP = hair->attributes.add(ATTR_STD_MOTION_VERTEX_POSITION);
new_attribute = true;
}
/* export motion vectors for curve keys */
- size_t numkeys = mesh->curve_keys.size();
+ size_t numkeys = hair->curve_keys.size();
float4 *mP = attr_mP->data_float4() + motion_step * numkeys;
bool have_motion = false;
int i = 0;
@@ -740,24 +776,24 @@ static void ExportCurveSegmentsMotion(Mesh *mesh, ParticleCurveData *CData, int
curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys];
curve++) {
/* Curve lengths may not match! Curves can be clipped. */
- int curve_key_end = (num_curves + 1 < (int)mesh->curve_first_key.size() ?
- mesh->curve_first_key[num_curves + 1] :
- (int)mesh->curve_keys.size());
- const int num_center_curve_keys = curve_key_end - mesh->curve_first_key[num_curves];
+ int curve_key_end = (num_curves + 1 < (int)hair->curve_first_key.size() ?
+ hair->curve_first_key[num_curves + 1] :
+ (int)hair->curve_keys.size());
+ const int num_center_curve_keys = curve_key_end - hair->curve_first_key[num_curves];
const int is_num_keys_different = CData->curve_keynum[curve] - num_center_curve_keys;
if (!is_num_keys_different) {
for (int curvekey = CData->curve_firstkey[curve];
curvekey < CData->curve_firstkey[curve] + CData->curve_keynum[curve];
curvekey++) {
- if (i < mesh->curve_keys.size()) {
+ if (i < hair->curve_keys.size()) {
mP[i] = CurveSegmentMotionCV(CData, sys, curve, curvekey);
if (!have_motion) {
/* unlike mesh coordinates, these tend to be slightly different
* between frames due to particle transforms into/out of object
* space, so we use an epsilon to detect actual changes */
- float4 curve_key = float3_to_float4(mesh->curve_keys[i]);
- curve_key.w = mesh->curve_radius[i];
+ float4 curve_key = float3_to_float4(hair->curve_keys[i]);
+ curve_key.w = hair->curve_radius[i];
if (len_squared(mP[i] - curve_key) > 1e-5f * 1e-5f)
have_motion = true;
}
@@ -781,42 +817,17 @@ static void ExportCurveSegmentsMotion(Mesh *mesh, ParticleCurveData *CData, int
}
}
- /* in case of new attribute, we verify if there really was any motion */
+ /* In case of new attribute, we verify if there really was any motion. */
if (new_attribute) {
- if (i != numkeys || !have_motion) {
- /* No motion or hair "topology" changed, remove attributes again. */
- if (i != numkeys) {
- VLOG(1) << "Hair topology changed, removing attribute.";
- }
- else {
- VLOG(1) << "No motion, removing attribute.";
- }
- mesh->curve_attributes.remove(ATTR_STD_MOTION_VERTEX_POSITION);
- }
- else if (motion_step > 0) {
- VLOG(1) << "Filling in new motion vertex position for motion_step " << motion_step;
- /* motion, fill up previous steps that we might have skipped because
- * they had no motion, but we need them anyway now */
- for (int step = 0; step < motion_step; step++) {
- float4 *mP = attr_mP->data_float4() + step * numkeys;
-
- for (int key = 0; key < numkeys; key++) {
- mP[key] = float3_to_float4(mesh->curve_keys[key]);
- mP[key].w = mesh->curve_radius[key];
- }
- }
- }
+ export_hair_motion_validate_attribute(hair, motion_step, i, have_motion);
}
}
-static void ExportCurveTriangleUV(ParticleCurveData *CData,
- int vert_offset,
- int resol,
- float2 *uvdata)
+static void ExportCurveTriangleUV(ParticleCurveData *CData, int resol, float2 *uvdata)
{
if (uvdata == NULL)
return;
- int vertexindex = vert_offset;
+ int vertexindex = 0;
for (int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
for (int curve = CData->psys_firstcurve[sys];
@@ -844,15 +855,12 @@ static void ExportCurveTriangleUV(ParticleCurveData *CData,
}
}
-static void ExportCurveTriangleVcol(ParticleCurveData *CData,
- int vert_offset,
- int resol,
- uchar4 *cdata)
+static void ExportCurveTriangleVcol(ParticleCurveData *CData, int resol, uchar4 *cdata)
{
if (cdata == NULL)
return;
- int vertexindex = vert_offset;
+ int vertexindex = 0;
for (int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
for (int curve = CData->psys_firstcurve[sys];
@@ -951,7 +959,7 @@ void BlenderSync::sync_curve_settings()
if ((b_psys->settings().render_type() == BL::ParticleSettings::render_type_PATH) &&
(b_psys->settings().type() == BL::ParticleSettings::type_HAIR)) {
BL::ID key = BKE_object_is_modified(*b_ob) ? *b_ob : b_ob->data();
- mesh_map.set_recalc(key);
+ geometry_map.set_recalc(key);
object_map.set_recalc(*b_ob);
}
}
@@ -963,42 +971,51 @@ void BlenderSync::sync_curve_settings()
curve_system_manager->tag_update(scene);
}
-void BlenderSync::sync_curves(
- Mesh *mesh, BL::Mesh &b_mesh, BL::Object &b_ob, bool motion, int motion_step)
+bool BlenderSync::object_has_particle_hair(BL::Object b_ob)
{
- if (!motion) {
- /* Clear stored curve data */
- mesh->curve_keys.clear();
- mesh->curve_radius.clear();
- mesh->curve_first_key.clear();
- mesh->curve_shader.clear();
- mesh->curve_attributes.clear();
+ /* Test if the object has a particle modifier with hair. */
+ BL::Object::modifiers_iterator b_mod;
+ for (b_ob.modifiers.begin(b_mod); b_mod != b_ob.modifiers.end(); ++b_mod) {
+ if ((b_mod->type() == b_mod->type_PARTICLE_SYSTEM) &&
+ (preview ? b_mod->show_viewport() : b_mod->show_render())) {
+ BL::ParticleSystemModifier psmd((const PointerRNA)b_mod->ptr);
+ BL::ParticleSystem b_psys((const PointerRNA)psmd.particle_system().ptr);
+ BL::ParticleSettings b_part((const PointerRNA)b_psys.settings().ptr);
+
+ if ((b_part.render_type() == BL::ParticleSettings::render_type_PATH) &&
+ (b_part.type() == BL::ParticleSettings::type_HAIR)) {
+ return true;
+ }
+ }
}
- /* obtain general settings */
- const bool use_curves = scene->curve_system_manager->use_curves;
+ return false;
+}
- if (!(use_curves && b_ob.mode() != b_ob.mode_PARTICLE_EDIT && b_ob.mode() != b_ob.mode_EDIT)) {
- if (!motion)
- mesh->compute_bounds();
+/* Old particle hair. */
+void BlenderSync::sync_particle_hair(
+ Geometry *geom, BL::Mesh &b_mesh, BL::Object &b_ob, bool motion, int motion_step)
+{
+ Hair *hair = (geom->type == Geometry::HAIR) ? static_cast<Hair *>(geom) : NULL;
+ Mesh *mesh = (geom->type == Geometry::MESH) ? static_cast<Mesh *>(geom) : NULL;
+
+ /* obtain general settings */
+ if (b_ob.mode() == b_ob.mode_PARTICLE_EDIT || b_ob.mode() == b_ob.mode_EDIT) {
return;
}
- const int primitive = scene->curve_system_manager->primitive;
const int triangle_method = scene->curve_system_manager->triangle_method;
const int resolution = scene->curve_system_manager->resolution;
- const size_t vert_num = mesh->verts.size();
- const size_t tri_num = mesh->num_triangles();
int used_res = 1;
/* extract particle hair data - should be combined with connecting to mesh later*/
ParticleCurveData CData;
- ObtainCacheParticleData(mesh, &b_mesh, &b_ob, &CData, !preview);
+ ObtainCacheParticleData(geom, &b_mesh, &b_ob, &CData, !preview);
/* add hair geometry to mesh */
- if (primitive == CURVE_TRIANGLES) {
+ if (mesh) {
if (triangle_method == CURVE_CAMERA_TRIANGLES) {
/* obtain camera parameters */
float3 RotCam;
@@ -1022,31 +1039,31 @@ void BlenderSync::sync_curves(
}
else {
if (motion)
- ExportCurveSegmentsMotion(mesh, &CData, motion_step);
+ ExportCurveSegmentsMotion(hair, &CData, motion_step);
else
- ExportCurveSegments(scene, mesh, &CData);
+ ExportCurveSegments(scene, hair, &CData);
}
/* generated coordinates from first key. we should ideally get this from
* blender to handle deforming objects */
if (!motion) {
- if (mesh->need_attribute(scene, ATTR_STD_GENERATED)) {
+ if (geom->need_attribute(scene, ATTR_STD_GENERATED)) {
float3 loc, size;
mesh_texture_space(b_mesh, loc, size);
- if (primitive == CURVE_TRIANGLES) {
+ if (mesh) {
Attribute *attr_generated = mesh->attributes.add(ATTR_STD_GENERATED);
float3 *generated = attr_generated->data_float3();
- for (size_t i = vert_num; i < mesh->verts.size(); i++)
+ for (size_t i = 0; i < mesh->verts.size(); i++)
generated[i] = mesh->verts[i] * size - loc;
}
else {
- Attribute *attr_generated = mesh->curve_attributes.add(ATTR_STD_GENERATED);
+ Attribute *attr_generated = hair->attributes.add(ATTR_STD_GENERATED);
float3 *generated = attr_generated->data_float3();
- for (size_t i = 0; i < mesh->num_curves(); i++) {
- float3 co = mesh->curve_keys[mesh->get_curve(i).first_key];
+ for (size_t i = 0; i < hair->num_curves(); i++) {
+ float3 co = hair->curve_keys[hair->get_curve(i).first_key];
generated[i] = co * size - loc;
}
}
@@ -1059,21 +1076,21 @@ void BlenderSync::sync_curves(
int vcol_num = 0;
for (b_mesh.vertex_colors.begin(l); l != b_mesh.vertex_colors.end(); ++l, vcol_num++) {
- if (!mesh->need_attribute(scene, ustring(l->name().c_str())))
+ if (!geom->need_attribute(scene, ustring(l->name().c_str())))
continue;
- ObtainCacheParticleVcol(mesh, &b_mesh, &b_ob, &CData, !preview, vcol_num);
+ ObtainCacheParticleVcol(geom, &b_mesh, &b_ob, &CData, !preview, vcol_num);
- if (primitive == CURVE_TRIANGLES) {
+ if (mesh) {
Attribute *attr_vcol = mesh->attributes.add(
ustring(l->name().c_str()), TypeDesc::TypeColor, ATTR_ELEMENT_CORNER_BYTE);
uchar4 *cdata = attr_vcol->data_uchar4();
- ExportCurveTriangleVcol(&CData, tri_num * 3, used_res, cdata);
+ ExportCurveTriangleVcol(&CData, used_res, cdata);
}
else {
- Attribute *attr_vcol = mesh->curve_attributes.add(
+ Attribute *attr_vcol = hair->attributes.add(
ustring(l->name().c_str()), TypeDesc::TypeColor, ATTR_ELEMENT_CURVE);
float3 *fdata = attr_vcol->data_float3();
@@ -1101,12 +1118,12 @@ void BlenderSync::sync_curves(
ustring name = ustring(l->name().c_str());
/* UV map */
- if (mesh->need_attribute(scene, name) || mesh->need_attribute(scene, std)) {
+ if (geom->need_attribute(scene, name) || geom->need_attribute(scene, std)) {
Attribute *attr_uv;
- ObtainCacheParticleUV(mesh, &b_mesh, &b_ob, &CData, !preview, uv_num);
+ ObtainCacheParticleUV(geom, &b_mesh, &b_ob, &CData, !preview, uv_num);
- if (primitive == CURVE_TRIANGLES) {
+ if (mesh) {
if (active_render)
attr_uv = mesh->attributes.add(std, name);
else
@@ -1114,13 +1131,13 @@ void BlenderSync::sync_curves(
float2 *uv = attr_uv->data_float2();
- ExportCurveTriangleUV(&CData, tri_num * 3, used_res, uv);
+ ExportCurveTriangleUV(&CData, used_res, uv);
}
else {
if (active_render)
- attr_uv = mesh->curve_attributes.add(std, name);
+ attr_uv = hair->attributes.add(std, name);
else
- attr_uv = mesh->curve_attributes.add(name, TypeFloat2, ATTR_ELEMENT_CURVE);
+ attr_uv = hair->attributes.add(name, TypeFloat2, ATTR_ELEMENT_CURVE);
float2 *uv = attr_uv->data_float2();
@@ -1135,8 +1152,292 @@ void BlenderSync::sync_curves(
}
}
}
+}
+
+#ifdef WITH_NEW_OBJECT_TYPES
+static float4 hair_point_as_float4(BL::HairPoint b_point)
+{
+ float4 mP = float3_to_float4(get_float3(b_point.co()));
+ mP.w = b_point.radius();
+ return mP;
+}
+
+static float4 interpolate_hair_points(BL::Hair b_hair,
+ const int first_point_index,
+ const int num_points,
+ const float step)
+{
+ const float curve_t = step * (num_points - 1);
+ const int point_a = clamp((int)curve_t, 0, num_points - 1);
+ const int point_b = min(point_a + 1, num_points - 1);
+ const float t = curve_t - (float)point_a;
+ return lerp(hair_point_as_float4(b_hair.points[first_point_index + point_a]),
+ hair_point_as_float4(b_hair.points[first_point_index + point_b]),
+ t);
+}
+
+static void export_hair_curves(Scene *scene, Hair *hair, BL::Hair b_hair)
+{
+ /* TODO: optimize so we can straight memcpy arrays from Blender? */
+
+ /* Add requested attributes. */
+ Attribute *attr_intercept = NULL;
+ Attribute *attr_random = NULL;
+
+ if (hair->need_attribute(scene, ATTR_STD_CURVE_INTERCEPT)) {
+ attr_intercept = hair->attributes.add(ATTR_STD_CURVE_INTERCEPT);
+ }
+ if (hair->need_attribute(scene, ATTR_STD_CURVE_RANDOM)) {
+ attr_random = hair->attributes.add(ATTR_STD_CURVE_RANDOM);
+ }
+
+ /* Reserve memory. */
+ const int num_keys = b_hair.points.length();
+ const int num_curves = b_hair.curves.length();
+
+ if (num_curves > 0) {
+ VLOG(1) << "Exporting curve segments for hair " << hair->name;
+ }
+
+ hair->reserve_curves(num_curves, num_keys);
+
+ /* Export curves and points. */
+ vector<float> points_length;
+
+ BL::Hair::curves_iterator b_curve_iter;
+ for (b_hair.curves.begin(b_curve_iter); b_curve_iter != b_hair.curves.end(); ++b_curve_iter) {
+ BL::HairCurve b_curve = *b_curve_iter;
+ const int first_point_index = b_curve.first_point_index();
+ const int num_points = b_curve.num_points();
+
+ float3 prev_co = make_float3(0.0f, 0.0f, 0.0f);
+ float length = 0.0f;
+ if (attr_intercept) {
+ points_length.clear();
+ points_length.reserve(num_points);
+ }
+
+ /* Position and radius. */
+ for (int i = 0; i < num_points; i++) {
+ BL::HairPoint b_point = b_hair.points[first_point_index + i];
+
+ const float3 co = get_float3(b_point.co());
+ const float radius = b_point.radius();
+ hair->add_curve_key(co, radius);
+
+ if (attr_intercept) {
+ if (i > 0) {
+ length += len(co - prev_co);
+ points_length.push_back(length);
+ }
+ prev_co = co;
+ }
+ }
+
+ /* Normalized 0..1 attribute along curve. */
+ if (attr_intercept) {
+ for (int i = 0; i < num_points; i++) {
+ attr_intercept->add((length == 0.0f) ? 0.0f : points_length[i] / length);
+ }
+ }
+
+ /* Random number per curve. */
+ if (attr_random != NULL) {
+ attr_random->add(hash_uint2_to_float(b_curve.index(), 0));
+ }
+
+ /* Curve. */
+ const int shader_index = 0;
+ hair->add_curve(first_point_index, shader_index);
+ }
+}
+
+static void export_hair_curves_motion(Hair *hair, BL::Hair b_hair, int motion_step)
+{
+ VLOG(1) << "Exporting curve motion segments for hair " << hair->name << ", motion step "
+ << motion_step;
+
+ /* Find or add attribute. */
+ Attribute *attr_mP = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+ bool new_attribute = false;
+
+ if (!attr_mP) {
+ VLOG(1) << "Creating new motion vertex position attribute";
+ attr_mP = hair->attributes.add(ATTR_STD_MOTION_VERTEX_POSITION);
+ new_attribute = true;
+ }
+
+ /* Export motion keys. */
+ const int num_keys = hair->curve_keys.size();
+ float4 *mP = attr_mP->data_float4() + motion_step * num_keys;
+ bool have_motion = false;
+ int num_motion_keys = 0;
+ int curve_index = 0;
+
+ BL::Hair::curves_iterator b_curve_iter;
+ for (b_hair.curves.begin(b_curve_iter); b_curve_iter != b_hair.curves.end(); ++b_curve_iter) {
+ BL::HairCurve b_curve = *b_curve_iter;
+ const int first_point_index = b_curve.first_point_index();
+ const int num_points = b_curve.num_points();
+
+ Hair::Curve curve = hair->get_curve(curve_index);
+ curve_index++;
+
+ if (num_points == curve.num_keys) {
+ /* Number of keys matches. */
+ for (int i = 0; i < num_points; i++) {
+ int point_index = first_point_index + i;
+
+ if (point_index < num_keys) {
+ mP[num_motion_keys] = hair_point_as_float4(b_hair.points[point_index]);
+ num_motion_keys++;
+
+ if (!have_motion) {
+ /* TODO: use epsilon for comparison? Was needed for particles due to
+ * transform, but ideally should not happen anymore. */
+ float4 curve_key = float3_to_float4(hair->curve_keys[i]);
+ curve_key.w = hair->curve_radius[i];
+ have_motion = !(mP[i] == curve_key);
+ }
+ }
+ }
+ }
+ else {
+ /* Number of keys has changed. Generate an interpolated version
+ * to preserve motion blur. */
+ const float step_size = curve.num_keys > 1 ? 1.0f / (curve.num_keys - 1) : 0.0f;
+ for (int i = 0; i < curve.num_keys; i++) {
+ const float step = i * step_size;
+ mP[num_motion_keys] = interpolate_hair_points(b_hair, first_point_index, num_points, step);
+ num_motion_keys++;
+ }
+ have_motion = true;
+ }
+ }
+
+ /* In case of new attribute, we verify if there really was any motion. */
+ if (new_attribute) {
+ export_hair_motion_validate_attribute(hair, motion_step, num_motion_keys, have_motion);
+ }
+}
+#endif /* WITH_NEW_OBJECT_TYPES */
- mesh->compute_bounds();
+/* Hair object. */
+void BlenderSync::sync_hair(Hair *hair, BL::Object &b_ob, bool motion, int motion_step)
+{
+#ifdef WITH_NEW_OBJECT_TYPES
+ /* Convert Blender hair to Cycles curves. */
+ BL::Hair b_hair(b_ob.data());
+ if (motion) {
+ export_hair_curves_motion(hair, b_hair, motion_step);
+ }
+ else {
+ export_hair_curves(scene, hair, b_hair);
+ }
+#else
+ (void)hair;
+ (void)b_ob;
+ (void)motion;
+ (void)motion_step;
+#endif /* WITH_NEW_OBJECT_TYPES */
+}
+
+void BlenderSync::sync_hair(BL::Depsgraph b_depsgraph,
+ BL::Object b_ob,
+ Geometry *geom,
+ const vector<Shader *> &used_shaders)
+{
+ Hair *hair = (geom->type == Geometry::HAIR) ? static_cast<Hair *>(geom) : NULL;
+ Mesh *mesh = (geom->type == Geometry::MESH) ? static_cast<Mesh *>(geom) : NULL;
+
+ /* Compares curve_keys rather than strands in order to handle quick hair
+ * adjustments in dynamic BVH - other methods could probably do this better. */
+ array<float3> oldcurve_keys;
+ array<float> oldcurve_radius;
+ array<int> oldtriangles;
+ if (hair) {
+ oldcurve_keys.steal_data(hair->curve_keys);
+ oldcurve_radius.steal_data(hair->curve_radius);
+ }
+ else {
+ oldtriangles.steal_data(mesh->triangles);
+ }
+
+ geom->clear();
+ geom->used_shaders = used_shaders;
+
+ if (view_layer.use_hair && scene->curve_system_manager->use_curves) {
+#ifdef WITH_NEW_OBJECT_TYPES
+ if (b_ob.type() == BL::Object::type_HAIR) {
+ /* Hair object. */
+ sync_hair(hair, b_ob, false);
+ assert(mesh == NULL);
+ }
+ else
+#endif
+ {
+ /* Particle hair. */
+ bool need_undeformed = geom->need_attribute(scene, ATTR_STD_GENERATED);
+ BL::Mesh b_mesh = object_to_mesh(
+ b_data, b_ob, b_depsgraph, need_undeformed, Mesh::SUBDIVISION_NONE);
+
+ if (b_mesh) {
+ sync_particle_hair(geom, b_mesh, b_ob, false);
+ free_object_to_mesh(b_data, b_ob, b_mesh);
+ }
+ }
+ }
+
+ /* tag update */
+ const bool rebuild = (hair && ((oldcurve_keys != hair->curve_keys) ||
+ (oldcurve_radius != hair->curve_radius))) ||
+ (mesh && (oldtriangles != mesh->triangles));
+
+ geom->tag_update(scene, rebuild);
+}
+
+void BlenderSync::sync_hair_motion(BL::Depsgraph b_depsgraph,
+ BL::Object b_ob,
+ Geometry *geom,
+ int motion_step)
+{
+ Hair *hair = (geom->type == Geometry::HAIR) ? static_cast<Hair *>(geom) : NULL;
+ Mesh *mesh = (geom->type == Geometry::MESH) ? static_cast<Mesh *>(geom) : NULL;
+
+ /* Skip if nothing exported. */
+ if ((hair && hair->num_keys() == 0) || (mesh && mesh->verts.size() == 0)) {
+ return;
+ }
+
+ /* Export deformed coordinates. */
+ if (ccl::BKE_object_is_deform_modified(b_ob, b_scene, preview)) {
+#ifdef WITH_NEW_OBJECT_TYPES
+ if (b_ob.type() == BL::Object::type_HAIR) {
+ /* Hair object. */
+ sync_hair(hair, b_ob, true, motion_step);
+ assert(mesh == NULL);
+ return;
+ }
+ else
+#endif
+ {
+ /* Particle hair. */
+ BL::Mesh b_mesh = object_to_mesh(b_data, b_ob, b_depsgraph, false, Mesh::SUBDIVISION_NONE);
+ if (b_mesh) {
+ sync_particle_hair(geom, b_mesh, b_ob, true, motion_step);
+ free_object_to_mesh(b_data, b_ob, b_mesh);
+ return;
+ }
+ }
+ }
+
+ /* No deformation on this frame, copy coordinates if other frames did have it. */
+ if (hair) {
+ hair->copy_center_to_motion_step(motion_step);
+ }
+ else {
+ mesh->copy_center_to_motion_step(motion_step);
+ }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/blender_device.cpp b/intern/cycles/blender/blender_device.cpp
index 111fc8d5192..5140f190f36 100644
--- a/intern/cycles/blender/blender_device.cpp
+++ b/intern/cycles/blender/blender_device.cpp
@@ -17,8 +17,26 @@
#include "blender/blender_device.h"
#include "blender/blender_util.h"
+#include "util/util_foreach.h"
+
CCL_NAMESPACE_BEGIN
+enum DenoiserType {
+ DENOISER_NONE = 0,
+ DENOISER_OPTIX = 1,
+
+ DENOISER_NUM
+};
+
+enum ComputeDevice {
+ COMPUTE_DEVICE_CPU = 0,
+ COMPUTE_DEVICE_CUDA = 1,
+ COMPUTE_DEVICE_OPENCL = 2,
+ COMPUTE_DEVICE_OPTIX = 3,
+
+ COMPUTE_DEVICE_NUM
+};
+
int blender_device_threads(BL::Scene &b_scene)
{
BL::RenderSettings b_r = b_scene.render();
@@ -40,7 +58,7 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences, BL::Scene &b_scen
/* Find network device. */
vector<DeviceInfo> devices = Device::available_devices(DEVICE_MASK_NETWORK);
if (!devices.empty()) {
- device = devices.front();
+ return devices.front();
}
}
else if (get_enum(cscene, "device") == 1) {
@@ -57,14 +75,6 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences, BL::Scene &b_scen
}
/* Test if we are using GPU devices. */
- enum ComputeDevice {
- COMPUTE_DEVICE_CPU = 0,
- COMPUTE_DEVICE_CUDA = 1,
- COMPUTE_DEVICE_OPENCL = 2,
- COMPUTE_DEVICE_OPTIX = 3,
- COMPUTE_DEVICE_NUM = 4,
- };
-
ComputeDevice compute_device = (ComputeDevice)get_enum(
cpreferences, "compute_device_type", COMPUTE_DEVICE_NUM, COMPUTE_DEVICE_CPU);
@@ -106,6 +116,34 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences, BL::Scene &b_scen
}
}
+ /* Ensure there is an OptiX device when using the OptiX denoiser. */
+ bool use_optix_denoising = get_enum(cscene, "preview_denoising", DENOISER_NUM, DENOISER_NONE) ==
+ DENOISER_OPTIX &&
+ !background;
+ BL::Scene::view_layers_iterator b_view_layer;
+ for (b_scene.view_layers.begin(b_view_layer); b_view_layer != b_scene.view_layers.end();
+ ++b_view_layer) {
+ PointerRNA crl = RNA_pointer_get(&b_view_layer->ptr, "cycles");
+ if (get_boolean(crl, "use_optix_denoising")) {
+ use_optix_denoising = true;
+ }
+ }
+
+ if (use_optix_denoising && device.type != DEVICE_OPTIX) {
+ vector<DeviceInfo> optix_devices = Device::available_devices(DEVICE_MASK_OPTIX);
+ if (!optix_devices.empty()) {
+ /* Convert to a special multi device with separate denoising devices. */
+ if (device.multi_devices.empty()) {
+ device.multi_devices.push_back(device);
+ }
+
+ /* Simply use the first available OptiX device. */
+ const DeviceInfo optix_device = optix_devices.front();
+ device.id += optix_device.id; /* Uniquely identify this special multi device. */
+ device.denoising_devices.push_back(optix_device);
+ }
+ }
+
return device;
}
diff --git a/intern/cycles/blender/blender_device.h b/intern/cycles/blender/blender_device.h
index fd6c045c966..8d2ecac7483 100644
--- a/intern/cycles/blender/blender_device.h
+++ b/intern/cycles/blender/blender_device.h
@@ -18,9 +18,9 @@
#define __BLENDER_DEVICE_H__
#include "MEM_guardedalloc.h"
-#include "RNA_types.h"
#include "RNA_access.h"
#include "RNA_blender_cpp.h"
+#include "RNA_types.h"
#include "device/device.h"
diff --git a/intern/cycles/blender/blender_geometry.cpp b/intern/cycles/blender/blender_geometry.cpp
new file mode 100644
index 00000000000..7ca35cff961
--- /dev/null
+++ b/intern/cycles/blender/blender_geometry.cpp
@@ -0,0 +1,192 @@
+
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "render/curves.h"
+#include "render/hair.h"
+#include "render/mesh.h"
+#include "render/object.h"
+
+#include "blender/blender_sync.h"
+#include "blender/blender_util.h"
+
+#include "util/util_foreach.h"
+
+CCL_NAMESPACE_BEGIN
+
+Geometry *BlenderSync::sync_geometry(BL::Depsgraph &b_depsgraph,
+ BL::Object &b_ob,
+ BL::Object &b_ob_instance,
+ bool object_updated,
+ bool use_particle_hair)
+{
+ /* Test if we can instance or if the object is modified. */
+ BL::ID b_ob_data = b_ob.data();
+ BL::ID b_key_id = (BKE_object_is_modified(b_ob)) ? b_ob_instance : b_ob_data;
+ GeometryKey key(b_key_id.ptr.data, use_particle_hair);
+ BL::Material material_override = view_layer.material_override;
+ Shader *default_shader = (b_ob.type() == BL::Object::type_VOLUME) ? scene->default_volume :
+ scene->default_surface;
+#ifdef WITH_NEW_OBJECT_TYPES
+ Geometry::Type geom_type = ((b_ob.type() == BL::Object::type_HAIR || use_particle_hair) &&
+ (scene->curve_system_manager->primitive != CURVE_TRIANGLES)) ?
+ Geometry::HAIR :
+ Geometry::MESH;
+#else
+ Geometry::Type geom_type = ((use_particle_hair) &&
+ (scene->curve_system_manager->primitive != CURVE_TRIANGLES)) ?
+ Geometry::HAIR :
+ Geometry::MESH;
+#endif
+
+ /* Find shader indices. */
+ vector<Shader *> used_shaders;
+
+ BL::Object::material_slots_iterator slot;
+ for (b_ob.material_slots.begin(slot); slot != b_ob.material_slots.end(); ++slot) {
+ if (material_override) {
+ find_shader(material_override, used_shaders, default_shader);
+ }
+ else {
+ BL::ID b_material(slot->material());
+ find_shader(b_material, used_shaders, default_shader);
+ }
+ }
+
+ if (used_shaders.size() == 0) {
+ if (material_override)
+ find_shader(material_override, used_shaders, default_shader);
+ else
+ used_shaders.push_back(default_shader);
+ }
+
+ /* Test if we need to sync. */
+ Geometry *geom = geometry_map.find(key);
+ bool sync = true;
+ if (geom == NULL) {
+ /* Add new geometry if it did not exist yet. */
+ if (geom_type == Geometry::HAIR) {
+ geom = new Hair();
+ }
+ else {
+ geom = new Mesh();
+ }
+ geometry_map.add(key, geom);
+ }
+ else {
+ /* Test if we need to update existing geometry. */
+ sync = geometry_map.update(geom, b_key_id);
+ }
+
+ if (!sync) {
+ /* If transform was applied to geometry, need full update. */
+ if (object_updated && geom->transform_applied) {
+ ;
+ }
+ /* Test if shaders changed, these can be object level so geometry
+ * does not get tagged for recalc. */
+ else if (geom->used_shaders != used_shaders) {
+ ;
+ }
+ else {
+ /* Even if not tagged for recalc, we may need to sync anyway
+ * because the shader needs different geometry attributes. */
+ bool attribute_recalc = false;
+
+ foreach (Shader *shader, geom->used_shaders) {
+ if (shader->need_update_geometry) {
+ attribute_recalc = true;
+ }
+ }
+
+ if (!attribute_recalc) {
+ return geom;
+ }
+ }
+ }
+
+ /* Ensure we only sync instanced geometry once. */
+ if (geometry_synced.find(geom) != geometry_synced.end()) {
+ return geom;
+ }
+
+ progress.set_sync_status("Synchronizing object", b_ob.name());
+
+ geometry_synced.insert(geom);
+
+ geom->name = ustring(b_ob_data.name().c_str());
+
+#ifdef WITH_NEW_OBJECT_TYPES
+ if (b_ob.type() == BL::Object::type_HAIR || use_particle_hair) {
+#else
+ if (use_particle_hair) {
+#endif
+ sync_hair(b_depsgraph, b_ob, geom, used_shaders);
+ }
+ else if (b_ob.type() == BL::Object::type_VOLUME || object_fluid_gas_domain_find(b_ob)) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ sync_volume(b_ob, mesh, used_shaders);
+ }
+ else {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ sync_mesh(b_depsgraph, b_ob, mesh, used_shaders);
+ }
+
+ return geom;
+}
+
+void BlenderSync::sync_geometry_motion(BL::Depsgraph &b_depsgraph,
+ BL::Object &b_ob,
+ Object *object,
+ float motion_time,
+ bool use_particle_hair)
+{
+ /* Ensure we only sync instanced geometry once. */
+ Geometry *geom = object->geometry;
+
+ if (geometry_motion_synced.find(geom) != geometry_motion_synced.end())
+ return;
+
+ geometry_motion_synced.insert(geom);
+
+ /* Ensure we only motion sync geometry that also had geometry synced, to avoid
+ * unnecessary work and to ensure that its attributes were clear. */
+ if (geometry_synced.find(geom) == geometry_synced.end())
+ return;
+
+ /* Find time matching motion step required by geometry. */
+ int motion_step = geom->motion_step(motion_time);
+ if (motion_step < 0) {
+ return;
+ }
+
+#ifdef WITH_NEW_OBJECT_TYPES
+ if (b_ob.type() == BL::Object::type_HAIR || use_particle_hair) {
+#else
+ if (use_particle_hair) {
+#endif
+ sync_hair_motion(b_depsgraph, b_ob, geom, motion_step);
+ }
+ else if (b_ob.type() == BL::Object::type_VOLUME || object_fluid_gas_domain_find(b_ob)) {
+ /* No volume motion blur support yet. */
+ }
+ else {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ sync_mesh_motion(b_depsgraph, b_ob, mesh, motion_step);
+ }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/blender_id_map.h b/intern/cycles/blender/blender_id_map.h
new file mode 100644
index 00000000000..3bc42e349ae
--- /dev/null
+++ b/intern/cycles/blender/blender_id_map.h
@@ -0,0 +1,299 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BLENDER_ID_MAP_H__
+#define __BLENDER_ID_MAP_H__
+
+#include <string.h>
+
+#include "util/util_map.h"
+#include "util/util_set.h"
+#include "util/util_vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* ID Map
+ *
+ * Utility class to map between Blender datablocks and Cycles data structures,
+ * and keep track of recalc tags from the dependency graph. */
+
+template<typename K, typename T> class id_map {
+ public:
+ id_map(vector<T *> *scene_data_)
+ {
+ scene_data = scene_data_;
+ }
+
+ T *find(const BL::ID &id)
+ {
+ return find(id.ptr.owner_id);
+ }
+
+ T *find(const K &key)
+ {
+ if (b_map.find(key) != b_map.end()) {
+ T *data = b_map[key];
+ return data;
+ }
+
+ return NULL;
+ }
+
+ void set_recalc(const BL::ID &id)
+ {
+ b_recalc.insert(id.ptr.data);
+ }
+
+ void set_recalc(void *id_ptr)
+ {
+ b_recalc.insert(id_ptr);
+ }
+
+ bool has_recalc()
+ {
+ return !(b_recalc.empty());
+ }
+
+ void pre_sync()
+ {
+ used_set.clear();
+ }
+
+ /* Add new data. */
+ void add(const K &key, T *data)
+ {
+ assert(find(key) == NULL);
+ scene_data->push_back(data);
+ b_map[key] = data;
+ used(data);
+ }
+
+ /* Update existing data. */
+ bool update(T *data, const BL::ID &id)
+ {
+ return update(data, id, id);
+ }
+ bool update(T *data, const BL::ID &id, const BL::ID &parent)
+ {
+ bool recalc = (b_recalc.find(id.ptr.data) != b_recalc.end());
+ if (parent.ptr.data && parent.ptr.data != id.ptr.data) {
+ recalc = recalc || (b_recalc.find(parent.ptr.data) != b_recalc.end());
+ }
+ used(data);
+ return recalc;
+ }
+
+ /* Combined add and update as needed. */
+ bool add_or_update(T **r_data, const BL::ID &id)
+ {
+ return add_or_update(r_data, id, id, id.ptr.owner_id);
+ }
+ bool add_or_update(T **r_data, const BL::ID &id, const K &key)
+ {
+ return add_or_update(r_data, id, id, key);
+ }
+ bool add_or_update(T **r_data, const BL::ID &id, const BL::ID &parent, const K &key)
+ {
+ T *data = find(key);
+ bool recalc;
+
+ if (!data) {
+ /* Add data if it didn't exist yet. */
+ data = new T();
+ add(key, data);
+ recalc = true;
+ }
+ else {
+ /* check if updated needed. */
+ recalc = update(data, id, parent);
+ }
+
+ *r_data = data;
+ return recalc;
+ }
+
+ /* Combined add or update for convenience. */
+
+ bool is_used(const K &key)
+ {
+ T *data = find(key);
+ return (data) ? used_set.find(data) != used_set.end() : false;
+ }
+
+ void used(T *data)
+ {
+ /* tag data as still in use */
+ used_set.insert(data);
+ }
+
+ void set_default(T *data)
+ {
+ b_map[NULL] = data;
+ }
+
+ bool post_sync(bool do_delete = true)
+ {
+ /* remove unused data */
+ vector<T *> new_scene_data;
+ typename vector<T *>::iterator it;
+ bool deleted = false;
+
+ for (it = scene_data->begin(); it != scene_data->end(); it++) {
+ T *data = *it;
+
+ if (do_delete && used_set.find(data) == used_set.end()) {
+ delete data;
+ deleted = true;
+ }
+ else
+ new_scene_data.push_back(data);
+ }
+
+ *scene_data = new_scene_data;
+
+ /* update mapping */
+ map<K, T *> new_map;
+ typedef pair<const K, T *> TMapPair;
+ typename map<K, T *>::iterator jt;
+
+ for (jt = b_map.begin(); jt != b_map.end(); jt++) {
+ TMapPair &pair = *jt;
+
+ if (used_set.find(pair.second) != used_set.end())
+ new_map[pair.first] = pair.second;
+ }
+
+ used_set.clear();
+ b_recalc.clear();
+ b_map = new_map;
+
+ return deleted;
+ }
+
+ const map<K, T *> &key_to_scene_data()
+ {
+ return b_map;
+ }
+
+ protected:
+ vector<T *> *scene_data;
+ map<K, T *> b_map;
+ set<T *> used_set;
+ set<void *> b_recalc;
+};
+
+/* Object Key
+ *
+ * To uniquely identify instances, we use the parent, object and persistent instance ID.
+ * We also export separate object for a mesh and its particle hair. */
+
+enum { OBJECT_PERSISTENT_ID_SIZE = 16 };
+
+struct ObjectKey {
+ void *parent;
+ int id[OBJECT_PERSISTENT_ID_SIZE];
+ void *ob;
+ bool use_particle_hair;
+
+ ObjectKey(void *parent_, int id_[OBJECT_PERSISTENT_ID_SIZE], void *ob_, bool use_particle_hair_)
+ : parent(parent_), ob(ob_), use_particle_hair(use_particle_hair_)
+ {
+ if (id_)
+ memcpy(id, id_, sizeof(id));
+ else
+ memset(id, 0, sizeof(id));
+ }
+
+ bool operator<(const ObjectKey &k) const
+ {
+ if (ob < k.ob) {
+ return true;
+ }
+ else if (ob == k.ob) {
+ if (parent < k.parent) {
+ return true;
+ }
+ else if (parent == k.parent) {
+ if (use_particle_hair < k.use_particle_hair) {
+ return true;
+ }
+ else if (use_particle_hair == k.use_particle_hair) {
+ return memcmp(id, k.id, sizeof(id)) < 0;
+ }
+ }
+ }
+
+ return false;
+ }
+};
+
+/* Geometry Key
+ *
+ * We export separate geometry for a mesh and its particle hair, so key needs to
+ * distinguish between them. */
+
+struct GeometryKey {
+ void *id;
+ bool use_particle_hair;
+
+ GeometryKey(void *id, bool use_particle_hair) : id(id), use_particle_hair(use_particle_hair)
+ {
+ }
+
+ bool operator<(const GeometryKey &k) const
+ {
+ if (id < k.id) {
+ return true;
+ }
+ else if (id == k.id) {
+ if (use_particle_hair < k.use_particle_hair) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+};
+
+/* Particle System Key */
+
+struct ParticleSystemKey {
+ void *ob;
+ int id[OBJECT_PERSISTENT_ID_SIZE];
+
+ ParticleSystemKey(void *ob_, int id_[OBJECT_PERSISTENT_ID_SIZE]) : ob(ob_)
+ {
+ if (id_)
+ memcpy(id, id_, sizeof(id));
+ else
+ memset(id, 0, sizeof(id));
+ }
+
+ bool operator<(const ParticleSystemKey &k) const
+ {
+ /* first id is particle index, we don't compare that */
+ if (ob < k.ob)
+ return true;
+ else if (ob == k.ob)
+ return memcmp(id + 1, k.id + 1, sizeof(int) * (OBJECT_PERSISTENT_ID_SIZE - 1)) < 0;
+
+ return false;
+ }
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __BLENDER_ID_MAP_H__ */
diff --git a/intern/cycles/blender/blender_image.cpp b/intern/cycles/blender/blender_image.cpp
new file mode 100644
index 00000000000..459dc1779fb
--- /dev/null
+++ b/intern/cycles/blender/blender_image.cpp
@@ -0,0 +1,220 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MEM_guardedalloc.h"
+
+#include "blender/blender_image.h"
+#include "blender/blender_session.h"
+#include "blender/blender_util.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Packed Images */
+
+BlenderImageLoader::BlenderImageLoader(BL::Image b_image, int frame)
+ : b_image(b_image), frame(frame), free_cache(!b_image.has_data())
+{
+}
+
+bool BlenderImageLoader::load_metadata(ImageMetaData &metadata)
+{
+ metadata.width = b_image.size()[0];
+ metadata.height = b_image.size()[1];
+ metadata.depth = 1;
+ metadata.channels = b_image.channels();
+
+ if (b_image.is_float()) {
+ if (metadata.channels == 1) {
+ metadata.type = IMAGE_DATA_TYPE_FLOAT;
+ }
+ else if (metadata.channels == 4) {
+ metadata.type = IMAGE_DATA_TYPE_FLOAT4;
+ }
+ else {
+ return false;
+ }
+
+ /* Float images are already converted on the Blender side,
+ * no need to do anything in Cycles. */
+ metadata.colorspace = u_colorspace_raw;
+ }
+ else {
+ if (metadata.channels == 1) {
+ metadata.type = IMAGE_DATA_TYPE_BYTE;
+ }
+ else if (metadata.channels == 4) {
+ metadata.type = IMAGE_DATA_TYPE_BYTE4;
+ }
+ else {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool BlenderImageLoader::load_pixels(const ImageMetaData &metadata,
+ void *pixels,
+ const size_t pixels_size,
+ const bool associate_alpha)
+{
+ const size_t num_pixels = ((size_t)metadata.width) * metadata.height;
+ const int channels = metadata.channels;
+ const int tile = 0; /* TODO(lukas): Support tiles here? */
+
+ if (b_image.is_float()) {
+ /* image data */
+ float *image_pixels;
+ image_pixels = image_get_float_pixels_for_frame(b_image, frame, tile);
+
+ if (image_pixels && num_pixels * channels == pixels_size) {
+ memcpy(pixels, image_pixels, pixels_size * sizeof(float));
+ }
+ else {
+ if (channels == 1) {
+ memset(pixels, 0, num_pixels * sizeof(float));
+ }
+ else {
+ const size_t num_pixels_safe = pixels_size / channels;
+ float *fp = (float *)pixels;
+ for (int i = 0; i < num_pixels_safe; i++, fp += channels) {
+ fp[0] = 1.0f;
+ fp[1] = 0.0f;
+ fp[2] = 1.0f;
+ if (channels == 4) {
+ fp[3] = 1.0f;
+ }
+ }
+ }
+ }
+
+ if (image_pixels) {
+ MEM_freeN(image_pixels);
+ }
+ }
+ else {
+ unsigned char *image_pixels = image_get_pixels_for_frame(b_image, frame, tile);
+
+ if (image_pixels && num_pixels * channels == pixels_size) {
+ memcpy(pixels, image_pixels, pixels_size * sizeof(unsigned char));
+ }
+ else {
+ if (channels == 1) {
+ memset(pixels, 0, pixels_size * sizeof(unsigned char));
+ }
+ else {
+ const size_t num_pixels_safe = pixels_size / channels;
+ unsigned char *cp = (unsigned char *)pixels;
+ for (size_t i = 0; i < num_pixels_safe; i++, cp += channels) {
+ cp[0] = 255;
+ cp[1] = 0;
+ cp[2] = 255;
+ if (channels == 4) {
+ cp[3] = 255;
+ }
+ }
+ }
+ }
+
+ if (image_pixels) {
+ MEM_freeN(image_pixels);
+ }
+
+ if (associate_alpha) {
+ /* Premultiply, byte images are always straight for Blender. */
+ unsigned char *cp = (unsigned char *)pixels;
+ for (size_t i = 0; i < num_pixels; i++, cp += channels) {
+ cp[0] = (cp[0] * cp[3]) >> 8;
+ cp[1] = (cp[1] * cp[3]) >> 8;
+ cp[2] = (cp[2] * cp[3]) >> 8;
+ }
+ }
+ }
+
+ /* Free image buffers to save memory during render. */
+ if (free_cache) {
+ b_image.buffers_free();
+ }
+
+ return true;
+}
+
+string BlenderImageLoader::name() const
+{
+ return BL::Image(b_image).name();
+}
+
+bool BlenderImageLoader::equals(const ImageLoader &other) const
+{
+ const BlenderImageLoader &other_loader = (const BlenderImageLoader &)other;
+ return b_image == other_loader.b_image && frame == other_loader.frame;
+}
+
+/* Point Density */
+
+BlenderPointDensityLoader::BlenderPointDensityLoader(BL::Depsgraph b_depsgraph,
+ BL::ShaderNodeTexPointDensity b_node)
+ : b_depsgraph(b_depsgraph), b_node(b_node)
+{
+}
+
+bool BlenderPointDensityLoader::load_metadata(ImageMetaData &metadata)
+{
+ metadata.channels = 4;
+ metadata.width = b_node.resolution();
+ metadata.height = metadata.width;
+ metadata.depth = metadata.width;
+ metadata.type = IMAGE_DATA_TYPE_FLOAT4;
+ return true;
+}
+
+bool BlenderPointDensityLoader::load_pixels(const ImageMetaData &,
+ void *pixels,
+ const size_t,
+ const bool)
+{
+ int length;
+ b_node.calc_point_density(b_depsgraph, &length, (float **)&pixels);
+ return true;
+}
+
+void BlenderSession::builtin_images_load()
+{
+ /* Force builtin images to be loaded along with Blender data sync. This
+ * is needed because we may be reading from depsgraph evaluated data which
+ * can be freed by Blender before Cycles reads it.
+ *
+ * TODO: the assumption that no further access to builtin image data will
+ * happen is really weak, and likely to break in the future. We should find
+ * a better solution to hand over the data directly to the image manager
+ * instead of through callbacks whose timing is difficult to control. */
+ ImageManager *manager = session->scene->image_manager;
+ Device *device = session->device;
+ manager->device_load_builtin(device, session->scene, session->progress);
+}
+
+string BlenderPointDensityLoader::name() const
+{
+ return BL::ShaderNodeTexPointDensity(b_node).name();
+}
+
+bool BlenderPointDensityLoader::equals(const ImageLoader &other) const
+{
+ const BlenderPointDensityLoader &other_loader = (const BlenderPointDensityLoader &)other;
+ return b_node == other_loader.b_node && b_depsgraph == other_loader.b_depsgraph;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/blender_image.h b/intern/cycles/blender/blender_image.h
new file mode 100644
index 00000000000..b58a159a6ba
--- /dev/null
+++ b/intern/cycles/blender/blender_image.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2011-2020 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BLENDER_IMAGE_H__
+#define __BLENDER_IMAGE_H__
+
+#include "RNA_blender_cpp.h"
+
+#include "render/image.h"
+
+CCL_NAMESPACE_BEGIN
+
+class BlenderImageLoader : public ImageLoader {
+ public:
+ BlenderImageLoader(BL::Image b_image, int frame);
+
+ bool load_metadata(ImageMetaData &metadata) override;
+ bool load_pixels(const ImageMetaData &metadata,
+ void *pixels,
+ const size_t pixels_size,
+ const bool associate_alpha) override;
+ string name() const override;
+ bool equals(const ImageLoader &other) const override;
+
+ BL::Image b_image;
+ int frame;
+ bool free_cache;
+};
+
+class BlenderPointDensityLoader : public ImageLoader {
+ public:
+ BlenderPointDensityLoader(BL::Depsgraph depsgraph, BL::ShaderNodeTexPointDensity b_node);
+
+ bool load_metadata(ImageMetaData &metadata) override;
+ bool load_pixels(const ImageMetaData &metadata,
+ void *pixels,
+ const size_t pixels_size,
+ const bool associate_alpha) override;
+ string name() const override;
+ bool equals(const ImageLoader &other) const override;
+
+ BL::Depsgraph b_depsgraph;
+ BL::ShaderNodeTexPointDensity b_node;
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __BLENDER_IMAGE_H__ */
diff --git a/intern/cycles/blender/blender_light.cpp b/intern/cycles/blender/blender_light.cpp
new file mode 100644
index 00000000000..6f95821e31e
--- /dev/null
+++ b/intern/cycles/blender/blender_light.cpp
@@ -0,0 +1,212 @@
+
+
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "render/light.h"
+
+#include "blender/blender_sync.h"
+#include "blender/blender_util.h"
+
+#include "util/util_hash.h"
+
+CCL_NAMESPACE_BEGIN
+
+void BlenderSync::sync_light(BL::Object &b_parent,
+ int persistent_id[OBJECT_PERSISTENT_ID_SIZE],
+ BL::Object &b_ob,
+ BL::Object &b_ob_instance,
+ int random_id,
+ Transform &tfm,
+ bool *use_portal)
+{
+ /* test if we need to sync */
+ Light *light;
+ ObjectKey key(b_parent, persistent_id, b_ob_instance, false);
+ BL::Light b_light(b_ob.data());
+
+ /* Update if either object or light data changed. */
+ if (!light_map.add_or_update(&light, b_ob, b_parent, key)) {
+ Shader *shader;
+ if (!shader_map.add_or_update(&shader, b_light)) {
+ if (light->is_portal)
+ *use_portal = true;
+ return;
+ }
+ }
+
+ /* type */
+ switch (b_light.type()) {
+ case BL::Light::type_POINT: {
+ BL::PointLight b_point_light(b_light);
+ light->size = b_point_light.shadow_soft_size();
+ light->type = LIGHT_POINT;
+ break;
+ }
+ case BL::Light::type_SPOT: {
+ BL::SpotLight b_spot_light(b_light);
+ light->size = b_spot_light.shadow_soft_size();
+ light->type = LIGHT_SPOT;
+ light->spot_angle = b_spot_light.spot_size();
+ light->spot_smooth = b_spot_light.spot_blend();
+ break;
+ }
+ /* Hemi were removed from 2.8 */
+ // case BL::Light::type_HEMI: {
+ // light->type = LIGHT_DISTANT;
+ // light->size = 0.0f;
+ // break;
+ // }
+ case BL::Light::type_SUN: {
+ BL::SunLight b_sun_light(b_light);
+ light->angle = b_sun_light.angle();
+ light->type = LIGHT_DISTANT;
+ break;
+ }
+ case BL::Light::type_AREA: {
+ BL::AreaLight b_area_light(b_light);
+ light->size = 1.0f;
+ light->axisu = transform_get_column(&tfm, 0);
+ light->axisv = transform_get_column(&tfm, 1);
+ light->sizeu = b_area_light.size();
+ switch (b_area_light.shape()) {
+ case BL::AreaLight::shape_SQUARE:
+ light->sizev = light->sizeu;
+ light->round = false;
+ break;
+ case BL::AreaLight::shape_RECTANGLE:
+ light->sizev = b_area_light.size_y();
+ light->round = false;
+ break;
+ case BL::AreaLight::shape_DISK:
+ light->sizev = light->sizeu;
+ light->round = true;
+ break;
+ case BL::AreaLight::shape_ELLIPSE:
+ light->sizev = b_area_light.size_y();
+ light->round = true;
+ break;
+ }
+ light->type = LIGHT_AREA;
+ break;
+ }
+ }
+
+ /* strength */
+ light->strength = get_float3(b_light.color());
+ light->strength *= BL::PointLight(b_light).energy();
+
+ /* location and (inverted!) direction */
+ light->co = transform_get_column(&tfm, 3);
+ light->dir = -transform_get_column(&tfm, 2);
+ light->tfm = tfm;
+
+ /* shader */
+ vector<Shader *> used_shaders;
+ find_shader(b_light, used_shaders, scene->default_light);
+ light->shader = used_shaders[0];
+
+ /* shadow */
+ PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
+ PointerRNA clight = RNA_pointer_get(&b_light.ptr, "cycles");
+ light->cast_shadow = get_boolean(clight, "cast_shadow");
+ light->use_mis = get_boolean(clight, "use_multiple_importance_sampling");
+
+ int samples = get_int(clight, "samples");
+ if (get_boolean(cscene, "use_square_samples"))
+ light->samples = samples * samples;
+ else
+ light->samples = samples;
+
+ light->max_bounces = get_int(clight, "max_bounces");
+
+ if (b_ob != b_ob_instance) {
+ light->random_id = random_id;
+ }
+ else {
+ light->random_id = hash_uint2(hash_string(b_ob.name().c_str()), 0);
+ }
+
+ if (light->type == LIGHT_AREA)
+ light->is_portal = get_boolean(clight, "is_portal");
+ else
+ light->is_portal = false;
+
+ if (light->is_portal)
+ *use_portal = true;
+
+ /* visibility */
+ uint visibility = object_ray_visibility(b_ob);
+ light->use_diffuse = (visibility & PATH_RAY_DIFFUSE) != 0;
+ light->use_glossy = (visibility & PATH_RAY_GLOSSY) != 0;
+ light->use_transmission = (visibility & PATH_RAY_TRANSMIT) != 0;
+ light->use_scatter = (visibility & PATH_RAY_VOLUME_SCATTER) != 0;
+
+ /* tag */
+ light->tag_update(scene);
+}
+
+void BlenderSync::sync_background_light(BL::SpaceView3D &b_v3d, bool use_portal)
+{
+ BL::World b_world = b_scene.world();
+
+ if (b_world) {
+ PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
+ PointerRNA cworld = RNA_pointer_get(&b_world.ptr, "cycles");
+
+ enum SamplingMethod { SAMPLING_NONE = 0, SAMPLING_AUTOMATIC, SAMPLING_MANUAL, SAMPLING_NUM };
+ int sampling_method = get_enum(cworld, "sampling_method", SAMPLING_NUM, SAMPLING_AUTOMATIC);
+ bool sample_as_light = (sampling_method != SAMPLING_NONE);
+
+ if (sample_as_light || use_portal) {
+ /* test if we need to sync */
+ Light *light;
+ ObjectKey key(b_world, 0, b_world, false);
+
+ if (light_map.add_or_update(&light, b_world, b_world, key) || world_recalc ||
+ b_world.ptr.data != world_map) {
+ light->type = LIGHT_BACKGROUND;
+ if (sampling_method == SAMPLING_MANUAL) {
+ light->map_resolution = get_int(cworld, "sample_map_resolution");
+ }
+ else {
+ light->map_resolution = 0;
+ }
+ light->shader = scene->default_background;
+ light->use_mis = sample_as_light;
+ light->max_bounces = get_int(cworld, "max_bounces");
+
+ /* force enable light again when world is resynced */
+ light->is_enabled = true;
+
+ int samples = get_int(cworld, "samples");
+ if (get_boolean(cscene, "use_square_samples"))
+ light->samples = samples * samples;
+ else
+ light->samples = samples;
+
+ light->tag_update(scene);
+ light_map.set_recalc(b_world);
+ }
+ }
+ }
+
+ world_map = b_world.ptr.data;
+ world_recalc = false;
+ viewport_parameters = BlenderViewportParameters(b_v3d);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/blender_mesh.cpp b/intern/cycles/blender/blender_mesh.cpp
index b18f9a37948..a6f380a9ae7 100644
--- a/intern/cycles/blender/blender_mesh.cpp
+++ b/intern/cycles/blender/blender_mesh.cpp
@@ -14,25 +14,25 @@
* limitations under the License.
*/
+#include "render/camera.h"
#include "render/colorspace.h"
#include "render/mesh.h"
#include "render/object.h"
#include "render/scene.h"
-#include "render/camera.h"
-#include "blender/blender_sync.h"
#include "blender/blender_session.h"
+#include "blender/blender_sync.h"
#include "blender/blender_util.h"
#include "subd/subd_patch.h"
#include "subd/subd_split.h"
#include "util/util_algorithm.h"
+#include "util/util_disjoint_set.h"
#include "util/util_foreach.h"
#include "util/util_hash.h"
#include "util/util_logging.h"
#include "util/util_math.h"
-#include "util/util_disjoint_set.h"
#include "mikktspace.h"
@@ -278,54 +278,6 @@ static void mikk_compute_tangents(
genTangSpaceDefault(&context);
}
-/* Create Volume Attribute */
-
-static void create_mesh_volume_attribute(
- BL::Object &b_ob, Mesh *mesh, ImageManager *image_manager, AttributeStandard std, float frame)
-{
- BL::FluidDomainSettings b_domain = object_fluid_domain_find(b_ob);
-
- if (!b_domain)
- return;
-
- mesh->volume_isovalue = b_domain.clipping();
-
- Attribute *attr = mesh->attributes.add(std);
- VoxelAttribute *volume_data = attr->data_voxel();
- ImageMetaData metadata;
- bool animated = false;
-
- volume_data->manager = image_manager;
- volume_data->slot = image_manager->add_image(Attribute::standard_name(std),
- b_ob.ptr.data,
- animated,
- frame,
- INTERPOLATION_LINEAR,
- EXTENSION_CLIP,
- IMAGE_ALPHA_AUTO,
- u_colorspace_raw,
- metadata);
-}
-
-static void create_mesh_volume_attributes(Scene *scene, BL::Object &b_ob, Mesh *mesh, float frame)
-{
- /* for smoke volume rendering */
- if (mesh->need_attribute(scene, ATTR_STD_VOLUME_DENSITY))
- create_mesh_volume_attribute(b_ob, mesh, scene->image_manager, ATTR_STD_VOLUME_DENSITY, frame);
- if (mesh->need_attribute(scene, ATTR_STD_VOLUME_COLOR))
- create_mesh_volume_attribute(b_ob, mesh, scene->image_manager, ATTR_STD_VOLUME_COLOR, frame);
- if (mesh->need_attribute(scene, ATTR_STD_VOLUME_FLAME))
- create_mesh_volume_attribute(b_ob, mesh, scene->image_manager, ATTR_STD_VOLUME_FLAME, frame);
- if (mesh->need_attribute(scene, ATTR_STD_VOLUME_HEAT))
- create_mesh_volume_attribute(b_ob, mesh, scene->image_manager, ATTR_STD_VOLUME_HEAT, frame);
- if (mesh->need_attribute(scene, ATTR_STD_VOLUME_TEMPERATURE))
- create_mesh_volume_attribute(
- b_ob, mesh, scene->image_manager, ATTR_STD_VOLUME_TEMPERATURE, frame);
- if (mesh->need_attribute(scene, ATTR_STD_VOLUME_VELOCITY))
- create_mesh_volume_attribute(
- b_ob, mesh, scene->image_manager, ATTR_STD_VOLUME_VELOCITY, frame);
-}
-
/* Create vertex color attributes. */
static void attr_create_vertex_color(Scene *scene, Mesh *mesh, BL::Mesh &b_mesh, bool subdivision)
{
@@ -333,14 +285,27 @@ static void attr_create_vertex_color(Scene *scene, Mesh *mesh, BL::Mesh &b_mesh,
BL::Mesh::vertex_colors_iterator l;
for (b_mesh.vertex_colors.begin(l); l != b_mesh.vertex_colors.end(); ++l) {
- if (!mesh->need_attribute(scene, ustring(l->name().c_str())))
+ const bool active_render = l->active_render();
+ AttributeStandard vcol_std = (active_render) ? ATTR_STD_VERTEX_COLOR : ATTR_STD_NONE;
+ ustring vcol_name = ustring(l->name().c_str());
+
+ const bool need_vcol = mesh->need_attribute(scene, vcol_name) ||
+ mesh->need_attribute(scene, vcol_std);
+
+ if (!need_vcol) {
continue;
+ }
- Attribute *attr = mesh->subd_attributes.add(
- ustring(l->name().c_str()), TypeRGBA, ATTR_ELEMENT_CORNER_BYTE);
+ Attribute *vcol_attr = NULL;
+ if (active_render) {
+ vcol_attr = mesh->subd_attributes.add(vcol_std, vcol_name);
+ }
+ else {
+ vcol_attr = mesh->subd_attributes.add(vcol_name, TypeRGBA, ATTR_ELEMENT_CORNER_BYTE);
+ }
BL::Mesh::polygons_iterator p;
- uchar4 *cdata = attr->data_uchar4();
+ uchar4 *cdata = vcol_attr->data_uchar4();
for (b_mesh.polygons.begin(p); p != b_mesh.polygons.end(); ++p) {
int n = p->loop_total();
@@ -355,14 +320,27 @@ static void attr_create_vertex_color(Scene *scene, Mesh *mesh, BL::Mesh &b_mesh,
else {
BL::Mesh::vertex_colors_iterator l;
for (b_mesh.vertex_colors.begin(l); l != b_mesh.vertex_colors.end(); ++l) {
- if (!mesh->need_attribute(scene, ustring(l->name().c_str())))
+ const bool active_render = l->active_render();
+ AttributeStandard vcol_std = (active_render) ? ATTR_STD_VERTEX_COLOR : ATTR_STD_NONE;
+ ustring vcol_name = ustring(l->name().c_str());
+
+ const bool need_vcol = mesh->need_attribute(scene, vcol_name) ||
+ mesh->need_attribute(scene, vcol_std);
+
+ if (!need_vcol) {
continue;
+ }
- Attribute *attr = mesh->attributes.add(
- ustring(l->name().c_str()), TypeRGBA, ATTR_ELEMENT_CORNER_BYTE);
+ Attribute *vcol_attr = NULL;
+ if (active_render) {
+ vcol_attr = mesh->attributes.add(vcol_std, vcol_name);
+ }
+ else {
+ vcol_attr = mesh->attributes.add(vcol_name, TypeRGBA, ATTR_ELEMENT_CORNER_BYTE);
+ }
BL::Mesh::loop_triangles_iterator t;
- uchar4 *cdata = attr->data_uchar4();
+ uchar4 *cdata = vcol_attr->data_uchar4();
for (b_mesh.loop_triangles.begin(t); t != b_mesh.loop_triangles.end(); ++t) {
int3 li = get_int3(t->loops());
@@ -859,9 +837,9 @@ static void create_mesh(Scene *scene,
attr_create_uv_map(scene, mesh, b_mesh);
}
- /* for volume objects, create a matrix to transform from object space to
+ /* For volume objects, create a matrix to transform from object space to
* mesh texture space. this does not work with deformations but that can
- * probably only be done well with a volume grid mapping of coordinates */
+ * probably only be done well with a volume grid mapping of coordinates. */
if (mesh->need_attribute(scene, ATTR_STD_GENERATED_TRANSFORM)) {
Attribute *attr = mesh->attributes.add(ATTR_STD_GENERATED_TRANSFORM);
Transform *tfm = attr->data_transform();
@@ -930,7 +908,7 @@ static void sync_mesh_fluid_motion(BL::Object &b_ob, Scene *scene, Mesh *mesh)
if (scene->need_motion() == Scene::MOTION_NONE)
return;
- BL::FluidDomainSettings b_fluid_domain = object_fluid_domain_find(b_ob);
+ BL::FluidDomainSettings b_fluid_domain = object_fluid_liquid_domain_find(b_ob);
if (!b_fluid_domain)
return;
@@ -963,82 +941,11 @@ static void sync_mesh_fluid_motion(BL::Object &b_ob, Scene *scene, Mesh *mesh)
}
}
-Mesh *BlenderSync::sync_mesh(BL::Depsgraph &b_depsgraph,
- BL::Object &b_ob,
- BL::Object &b_ob_instance,
- bool object_updated,
- bool show_self,
- bool show_particles)
+void BlenderSync::sync_mesh(BL::Depsgraph b_depsgraph,
+ BL::Object b_ob,
+ Mesh *mesh,
+ const vector<Shader *> &used_shaders)
{
- /* test if we can instance or if the object is modified */
- BL::ID b_ob_data = b_ob.data();
- BL::ID key = (BKE_object_is_modified(b_ob)) ? b_ob_instance : b_ob_data;
- BL::Material material_override = view_layer.material_override;
-
- /* find shader indices */
- vector<Shader *> used_shaders;
-
- BL::Object::material_slots_iterator slot;
- for (b_ob.material_slots.begin(slot); slot != b_ob.material_slots.end(); ++slot) {
- if (material_override) {
- find_shader(material_override, used_shaders, scene->default_surface);
- }
- else {
- BL::ID b_material(slot->material());
- find_shader(b_material, used_shaders, scene->default_surface);
- }
- }
-
- if (used_shaders.size() == 0) {
- if (material_override)
- find_shader(material_override, used_shaders, scene->default_surface);
- else
- used_shaders.push_back(scene->default_surface);
- }
-
- /* test if we need to sync */
- int requested_geometry_flags = Mesh::GEOMETRY_NONE;
- if (view_layer.use_surfaces) {
- requested_geometry_flags |= Mesh::GEOMETRY_TRIANGLES;
- }
- if (view_layer.use_hair) {
- requested_geometry_flags |= Mesh::GEOMETRY_CURVES;
- }
- Mesh *mesh;
-
- if (!mesh_map.sync(&mesh, key)) {
- /* if transform was applied to mesh, need full update */
- if (object_updated && mesh->transform_applied)
- ;
- /* test if shaders changed, these can be object level so mesh
- * does not get tagged for recalc */
- else if (mesh->used_shaders != used_shaders)
- ;
- else if (requested_geometry_flags != mesh->geometry_flags)
- ;
- else {
- /* even if not tagged for recalc, we may need to sync anyway
- * because the shader needs different mesh attributes */
- bool attribute_recalc = false;
-
- foreach (Shader *shader, mesh->used_shaders)
- if (shader->need_update_mesh)
- attribute_recalc = true;
-
- if (!attribute_recalc)
- return mesh;
- }
- }
-
- /* ensure we only sync instanced meshes once */
- if (mesh_synced.find(mesh) != mesh_synced.end())
- return mesh;
-
- progress.set_sync_status("Synchronizing object", b_ob.name());
-
- mesh_synced.insert(mesh);
-
- /* create derived mesh */
array<int> oldtriangles;
array<Mesh::SubdFace> oldsubd_faces;
array<int> oldsubd_face_corners;
@@ -1046,150 +953,73 @@ Mesh *BlenderSync::sync_mesh(BL::Depsgraph &b_depsgraph,
oldsubd_faces.steal_data(mesh->subd_faces);
oldsubd_face_corners.steal_data(mesh->subd_face_corners);
- /* compares curve_keys rather than strands in order to handle quick hair
- * adjustments in dynamic BVH - other methods could probably do this better*/
- array<float3> oldcurve_keys;
- array<float> oldcurve_radius;
- oldcurve_keys.steal_data(mesh->curve_keys);
- oldcurve_radius.steal_data(mesh->curve_radius);
-
- /* ensure bvh rebuild (instead of refit) if has_voxel_attributes() changed */
- bool oldhas_voxel_attributes = mesh->has_voxel_attributes();
-
mesh->clear();
mesh->used_shaders = used_shaders;
- mesh->name = ustring(b_ob_data.name().c_str());
- if (requested_geometry_flags != Mesh::GEOMETRY_NONE) {
+ mesh->subdivision_type = Mesh::SUBDIVISION_NONE;
+
+ if (view_layer.use_surfaces) {
/* Adaptive subdivision setup. Not for baking since that requires
* exact mapping to the Blender mesh. */
- if (scene->bake_manager->get_baking()) {
- mesh->subdivision_type = Mesh::SUBDIVISION_NONE;
- }
- else {
+ if (!scene->bake_manager->get_baking()) {
mesh->subdivision_type = object_subdivision_type(b_ob, preview, experimental);
}
/* For some reason, meshes do not need this... */
bool need_undeformed = mesh->need_attribute(scene, ATTR_STD_GENERATED);
-
BL::Mesh b_mesh = object_to_mesh(
b_data, b_ob, b_depsgraph, need_undeformed, mesh->subdivision_type);
if (b_mesh) {
/* Sync mesh itself. */
- if (view_layer.use_surfaces && show_self) {
- if (mesh->subdivision_type != Mesh::SUBDIVISION_NONE)
- create_subd_mesh(scene, mesh, b_ob, b_mesh, used_shaders, dicing_rate, max_subdivisions);
- else
- create_mesh(scene, mesh, b_mesh, used_shaders, false);
-
- create_mesh_volume_attributes(scene, b_ob, mesh, b_scene.frame_current());
- }
-
- /* Sync hair curves. */
- if (view_layer.use_hair && show_particles &&
- mesh->subdivision_type == Mesh::SUBDIVISION_NONE) {
- sync_curves(mesh, b_mesh, b_ob, false);
- }
+ if (mesh->subdivision_type != Mesh::SUBDIVISION_NONE)
+ create_subd_mesh(
+ scene, mesh, b_ob, b_mesh, mesh->used_shaders, dicing_rate, max_subdivisions);
+ else
+ create_mesh(scene, mesh, b_mesh, mesh->used_shaders, false);
free_object_to_mesh(b_data, b_ob, b_mesh);
}
}
- mesh->geometry_flags = requested_geometry_flags;
/* mesh fluid motion mantaflow */
sync_mesh_fluid_motion(b_ob, scene, mesh);
/* tag update */
bool rebuild = (oldtriangles != mesh->triangles) || (oldsubd_faces != mesh->subd_faces) ||
- (oldsubd_face_corners != mesh->subd_face_corners) ||
- (oldcurve_keys != mesh->curve_keys) || (oldcurve_radius != mesh->curve_radius) ||
- (oldhas_voxel_attributes != mesh->has_voxel_attributes());
+ (oldsubd_face_corners != mesh->subd_face_corners);
mesh->tag_update(scene, rebuild);
-
- return mesh;
}
-void BlenderSync::sync_mesh_motion(BL::Depsgraph &b_depsgraph,
- BL::Object &b_ob,
- Object *object,
- float motion_time)
+void BlenderSync::sync_mesh_motion(BL::Depsgraph b_depsgraph,
+ BL::Object b_ob,
+ Mesh *mesh,
+ int motion_step)
{
- /* ensure we only sync instanced meshes once */
- Mesh *mesh = object->mesh;
-
- if (mesh_motion_synced.find(mesh) != mesh_motion_synced.end())
- return;
-
- mesh_motion_synced.insert(mesh);
-
- /* ensure we only motion sync meshes that also had mesh synced, to avoid
- * unnecessary work and to ensure that its attributes were clear */
- if (mesh_synced.find(mesh) == mesh_synced.end())
- return;
-
- /* Find time matching motion step required by mesh. */
- int motion_step = mesh->motion_step(motion_time);
- if (motion_step < 0) {
+ /* Fluid motion blur already exported. */
+ BL::FluidDomainSettings b_fluid_domain = object_fluid_liquid_domain_find(b_ob);
+ if (b_fluid_domain) {
return;
}
- /* skip empty meshes */
- const size_t numverts = mesh->verts.size();
- const size_t numkeys = mesh->curve_keys.size();
-
- if (!numverts && !numkeys)
+ /* Skip if no vertices were exported. */
+ size_t numverts = mesh->verts.size();
+ if (numverts == 0) {
return;
+ }
- /* skip objects without deforming modifiers. this is not totally reliable,
- * would need a more extensive check to see which objects are animated */
+ /* Skip objects without deforming modifiers. this is not totally reliable,
+ * would need a more extensive check to see which objects are animated. */
BL::Mesh b_mesh(PointerRNA_NULL);
-
- /* manta motion is exported immediate with mesh, skip here */
- BL::FluidDomainSettings b_fluid_domain = object_fluid_domain_find(b_ob);
- if (b_fluid_domain)
- return;
-
if (ccl::BKE_object_is_deform_modified(b_ob, b_scene, preview)) {
/* get derived mesh */
b_mesh = object_to_mesh(b_data, b_ob, b_depsgraph, false, Mesh::SUBDIVISION_NONE);
}
- if (!b_mesh) {
- /* if we have no motion blur on this frame, but on other frames, copy */
- if (numverts) {
- /* triangles */
- Attribute *attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
-
- if (attr_mP) {
- Attribute *attr_mN = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_NORMAL);
- Attribute *attr_N = mesh->attributes.find(ATTR_STD_VERTEX_NORMAL);
- float3 *P = &mesh->verts[0];
- float3 *N = (attr_N) ? attr_N->data_float3() : NULL;
-
- memcpy(attr_mP->data_float3() + motion_step * numverts, P, sizeof(float3) * numverts);
- if (attr_mN)
- memcpy(attr_mN->data_float3() + motion_step * numverts, N, sizeof(float3) * numverts);
- }
- }
-
- if (numkeys) {
- /* curves */
- Attribute *attr_mP = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
-
- if (attr_mP) {
- float3 *keys = &mesh->curve_keys[0];
- memcpy(attr_mP->data_float3() + motion_step * numkeys, keys, sizeof(float3) * numkeys);
- }
- }
-
- return;
- }
-
/* TODO(sergey): Perform preliminary check for number of vertices. */
- if (numverts) {
+ if (b_mesh) {
+ /* Export deformed coordinates. */
/* Find attributes. */
Attribute *attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
Attribute *attr_mN = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_NORMAL);
@@ -1254,14 +1084,13 @@ void BlenderSync::sync_mesh_motion(BL::Depsgraph &b_depsgraph,
}
}
}
- }
- /* hair motion */
- if (numkeys)
- sync_curves(mesh, b_mesh, b_ob, true, motion_step);
+ free_object_to_mesh(b_data, b_ob, b_mesh);
+ return;
+ }
- /* free derived mesh */
- free_object_to_mesh(b_data, b_ob, b_mesh);
+ /* No deformation on this frame, copy coordinates if other frames did have it. */
+ mesh->copy_center_to_motion_step(motion_step);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/blender_object.cpp b/intern/cycles/blender/blender_object.cpp
index 6981412bb88..4b29c28913b 100644
--- a/intern/cycles/blender/blender_object.cpp
+++ b/intern/cycles/blender/blender_object.cpp
@@ -15,14 +15,14 @@
*/
#include "render/camera.h"
-#include "render/integrator.h"
#include "render/graph.h"
+#include "render/integrator.h"
#include "render/light.h"
#include "render/mesh.h"
-#include "render/object.h"
-#include "render/scene.h"
#include "render/nodes.h"
+#include "render/object.h"
#include "render/particles.h"
+#include "render/scene.h"
#include "render/shader.h"
#include "blender/blender_object_cull.h"
@@ -67,10 +67,20 @@ bool BlenderSync::object_is_mesh(BL::Object &b_ob)
return false;
}
- if (b_ob.type() == BL::Object::type_CURVE) {
+ BL::Object::type_enum type = b_ob.type();
+
+#ifdef WITH_NEW_OBJECT_TYPES
+ if (type == BL::Object::type_VOLUME || type == BL::Object::type_HAIR) {
+#else
+ if (type == BL::Object::type_VOLUME) {
+#endif
+ /* Will be exported attached to mesh. */
+ return true;
+ }
+ else if (type == BL::Object::type_CURVE) {
/* Skip exporting curves without faces, overhead can be
* significant if there are many for path animation. */
- BL::Curve b_curve(b_ob.data());
+ BL::Curve b_curve(b_ob_data);
return (b_curve.bevel_object() || b_curve.extrude() != 0.0f || b_curve.bevel_depth() != 0.0f ||
b_curve.dimensions() == BL::Curve::dimensions_2D || b_ob.modifiers.length());
@@ -88,215 +98,13 @@ bool BlenderSync::object_is_light(BL::Object &b_ob)
return (b_ob_data && b_ob_data.is_a(&RNA_Light));
}
-static uint object_ray_visibility(BL::Object &b_ob)
-{
- PointerRNA cvisibility = RNA_pointer_get(&b_ob.ptr, "cycles_visibility");
- uint flag = 0;
-
- flag |= get_boolean(cvisibility, "camera") ? PATH_RAY_CAMERA : 0;
- flag |= get_boolean(cvisibility, "diffuse") ? PATH_RAY_DIFFUSE : 0;
- flag |= get_boolean(cvisibility, "glossy") ? PATH_RAY_GLOSSY : 0;
- flag |= get_boolean(cvisibility, "transmission") ? PATH_RAY_TRANSMIT : 0;
- flag |= get_boolean(cvisibility, "shadow") ? PATH_RAY_SHADOW : 0;
- flag |= get_boolean(cvisibility, "scatter") ? PATH_RAY_VOLUME_SCATTER : 0;
-
- return flag;
-}
-
-/* Light */
-
-void BlenderSync::sync_light(BL::Object &b_parent,
- int persistent_id[OBJECT_PERSISTENT_ID_SIZE],
- BL::Object &b_ob,
- BL::Object &b_ob_instance,
- int random_id,
- Transform &tfm,
- bool *use_portal)
-{
- /* test if we need to sync */
- Light *light;
- ObjectKey key(b_parent, persistent_id, b_ob_instance);
- BL::Light b_light(b_ob.data());
-
- /* Update if either object or light data changed. */
- if (!light_map.sync(&light, b_ob, b_parent, key)) {
- Shader *shader;
- if (!shader_map.sync(&shader, b_light)) {
- if (light->is_portal)
- *use_portal = true;
- return;
- }
- }
-
- /* type */
- switch (b_light.type()) {
- case BL::Light::type_POINT: {
- BL::PointLight b_point_light(b_light);
- light->size = b_point_light.shadow_soft_size();
- light->type = LIGHT_POINT;
- break;
- }
- case BL::Light::type_SPOT: {
- BL::SpotLight b_spot_light(b_light);
- light->size = b_spot_light.shadow_soft_size();
- light->type = LIGHT_SPOT;
- light->spot_angle = b_spot_light.spot_size();
- light->spot_smooth = b_spot_light.spot_blend();
- break;
- }
- /* Hemi were removed from 2.8 */
- // case BL::Light::type_HEMI: {
- // light->type = LIGHT_DISTANT;
- // light->size = 0.0f;
- // break;
- // }
- case BL::Light::type_SUN: {
- BL::SunLight b_sun_light(b_light);
- light->angle = b_sun_light.angle();
- light->type = LIGHT_DISTANT;
- break;
- }
- case BL::Light::type_AREA: {
- BL::AreaLight b_area_light(b_light);
- light->size = 1.0f;
- light->axisu = transform_get_column(&tfm, 0);
- light->axisv = transform_get_column(&tfm, 1);
- light->sizeu = b_area_light.size();
- switch (b_area_light.shape()) {
- case BL::AreaLight::shape_SQUARE:
- light->sizev = light->sizeu;
- light->round = false;
- break;
- case BL::AreaLight::shape_RECTANGLE:
- light->sizev = b_area_light.size_y();
- light->round = false;
- break;
- case BL::AreaLight::shape_DISK:
- light->sizev = light->sizeu;
- light->round = true;
- break;
- case BL::AreaLight::shape_ELLIPSE:
- light->sizev = b_area_light.size_y();
- light->round = true;
- break;
- }
- light->type = LIGHT_AREA;
- break;
- }
- }
-
- /* strength */
- light->strength = get_float3(b_light.color());
- light->strength *= BL::PointLight(b_light).energy();
-
- /* location and (inverted!) direction */
- light->co = transform_get_column(&tfm, 3);
- light->dir = -transform_get_column(&tfm, 2);
- light->tfm = tfm;
-
- /* shader */
- vector<Shader *> used_shaders;
- find_shader(b_light, used_shaders, scene->default_light);
- light->shader = used_shaders[0];
-
- /* shadow */
- PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
- PointerRNA clight = RNA_pointer_get(&b_light.ptr, "cycles");
- light->cast_shadow = get_boolean(clight, "cast_shadow");
- light->use_mis = get_boolean(clight, "use_multiple_importance_sampling");
-
- int samples = get_int(clight, "samples");
- if (get_boolean(cscene, "use_square_samples"))
- light->samples = samples * samples;
- else
- light->samples = samples;
-
- light->max_bounces = get_int(clight, "max_bounces");
-
- if (b_ob != b_ob_instance) {
- light->random_id = random_id;
- }
- else {
- light->random_id = hash_uint2(hash_string(b_ob.name().c_str()), 0);
- }
-
- if (light->type == LIGHT_AREA)
- light->is_portal = get_boolean(clight, "is_portal");
- else
- light->is_portal = false;
-
- if (light->is_portal)
- *use_portal = true;
-
- /* visibility */
- uint visibility = object_ray_visibility(b_ob);
- light->use_diffuse = (visibility & PATH_RAY_DIFFUSE) != 0;
- light->use_glossy = (visibility & PATH_RAY_GLOSSY) != 0;
- light->use_transmission = (visibility & PATH_RAY_TRANSMIT) != 0;
- light->use_scatter = (visibility & PATH_RAY_VOLUME_SCATTER) != 0;
-
- /* tag */
- light->tag_update(scene);
-}
-
-void BlenderSync::sync_background_light(BL::SpaceView3D &b_v3d, bool use_portal)
-{
- BL::World b_world = b_scene.world();
-
- if (b_world) {
- PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
- PointerRNA cworld = RNA_pointer_get(&b_world.ptr, "cycles");
-
- enum SamplingMethod { SAMPLING_NONE = 0, SAMPLING_AUTOMATIC, SAMPLING_MANUAL, SAMPLING_NUM };
- int sampling_method = get_enum(cworld, "sampling_method", SAMPLING_NUM, SAMPLING_AUTOMATIC);
- bool sample_as_light = (sampling_method != SAMPLING_NONE);
-
- if (sample_as_light || use_portal) {
- /* test if we need to sync */
- Light *light;
- ObjectKey key(b_world, 0, b_world);
-
- if (light_map.sync(&light, b_world, b_world, key) || world_recalc ||
- b_world.ptr.data != world_map) {
- light->type = LIGHT_BACKGROUND;
- if (sampling_method == SAMPLING_MANUAL) {
- light->map_resolution = get_int(cworld, "sample_map_resolution");
- }
- else {
- light->map_resolution = 0;
- }
- light->shader = scene->default_background;
- light->use_mis = sample_as_light;
- light->max_bounces = get_int(cworld, "max_bounces");
-
- /* force enable light again when world is resynced */
- light->is_enabled = true;
-
- int samples = get_int(cworld, "samples");
- if (get_boolean(cscene, "use_square_samples"))
- light->samples = samples * samples;
- else
- light->samples = samples;
-
- light->tag_update(scene);
- light_map.set_recalc(b_world);
- }
- }
- }
-
- world_map = b_world.ptr.data;
- world_recalc = false;
- viewport_parameters = BlenderViewportParameters(b_v3d);
-}
-
/* Object */
Object *BlenderSync::sync_object(BL::Depsgraph &b_depsgraph,
BL::ViewLayer &b_view_layer,
BL::DepsgraphObjectInstance &b_instance,
float motion_time,
- bool show_self,
- bool show_particles,
+ bool use_particle_hair,
bool show_lights,
BlenderObjectCulling &culling,
bool *use_portal)
@@ -378,7 +186,7 @@ Object *BlenderSync::sync_object(BL::Depsgraph &b_depsgraph,
}
/* key to lookup object */
- ObjectKey key(b_parent, persistent_id, b_ob_instance);
+ ObjectKey key(b_parent, persistent_id, b_ob_instance, use_particle_hair);
Object *object;
/* motion vector case */
@@ -393,8 +201,8 @@ Object *BlenderSync::sync_object(BL::Depsgraph &b_depsgraph,
}
/* mesh deformation */
- if (object->mesh)
- sync_mesh_motion(b_depsgraph, b_ob, object, motion_time);
+ if (object->geometry)
+ sync_geometry_motion(b_depsgraph, b_ob, object, motion_time, use_particle_hair);
}
return object;
@@ -403,12 +211,12 @@ Object *BlenderSync::sync_object(BL::Depsgraph &b_depsgraph,
/* test if we need to sync */
bool object_updated = false;
- if (object_map.sync(&object, b_ob, b_parent, key))
+ if (object_map.add_or_update(&object, b_ob, b_parent, key))
object_updated = true;
/* mesh sync */
- object->mesh = sync_mesh(
- b_depsgraph, b_ob, b_ob_instance, object_updated, show_self, show_particles);
+ object->geometry = sync_geometry(
+ b_depsgraph, b_ob, b_ob_instance, object_updated, use_particle_hair);
/* special case not tracked by object update flags */
@@ -450,7 +258,8 @@ Object *BlenderSync::sync_object(BL::Depsgraph &b_depsgraph,
/* object sync
* transform comparison should not be needed, but duplis don't work perfect
* in the depsgraph and may not signal changes, so this is a workaround */
- if (object_updated || (object->mesh && object->mesh->need_update) || tfm != object->tfm) {
+ if (object_updated || (object->geometry && object->geometry->need_update) ||
+ tfm != object->tfm) {
object->name = b_ob.name().c_str();
object->pass_id = b_ob.pass_index();
object->color = get_float3(b_ob.color());
@@ -459,23 +268,23 @@ Object *BlenderSync::sync_object(BL::Depsgraph &b_depsgraph,
/* motion blur */
Scene::MotionType need_motion = scene->need_motion();
- if (need_motion != Scene::MOTION_NONE && object->mesh) {
- Mesh *mesh = object->mesh;
- mesh->use_motion_blur = false;
- mesh->motion_steps = 0;
+ if (need_motion != Scene::MOTION_NONE && object->geometry) {
+ Geometry *geom = object->geometry;
+ geom->use_motion_blur = false;
+ geom->motion_steps = 0;
uint motion_steps;
if (need_motion == Scene::MOTION_BLUR) {
- motion_steps = object_motion_steps(b_parent, b_ob);
- mesh->motion_steps = motion_steps;
+ motion_steps = object_motion_steps(b_parent, b_ob, Object::MAX_MOTION_STEPS);
+ geom->motion_steps = motion_steps;
if (motion_steps && object_use_deform_motion(b_parent, b_ob)) {
- mesh->use_motion_blur = true;
+ geom->use_motion_blur = true;
}
}
else {
motion_steps = 3;
- mesh->motion_steps = motion_steps;
+ geom->motion_steps = motion_steps;
}
object->motion.clear();
@@ -526,13 +335,13 @@ void BlenderSync::sync_objects(BL::Depsgraph &b_depsgraph,
if (!motion) {
/* prepare for sync */
light_map.pre_sync();
- mesh_map.pre_sync();
+ geometry_map.pre_sync();
object_map.pre_sync();
particle_system_map.pre_sync();
motion_times.clear();
}
else {
- mesh_motion_synced.clear();
+ geometry_motion_synced.clear();
}
/* initialize culling */
@@ -552,22 +361,34 @@ void BlenderSync::sync_objects(BL::Depsgraph &b_depsgraph,
BL::DepsgraphObjectInstance b_instance = *b_instance_iter;
BL::Object b_ob = b_instance.object();
- /* load per-object culling data */
+ /* Viewport visibility. */
+ const bool show_in_viewport = !b_v3d || b_ob.visible_in_viewport_get(b_v3d);
+ if (show_in_viewport == false) {
+ continue;
+ }
+
+ /* Load per-object culling data. */
culling.init_object(scene, b_ob);
- /* test if object needs to be hidden */
- const bool show_self = b_instance.show_self();
- const bool show_particles = b_instance.show_particles();
- const bool show_in_viewport = !b_v3d || b_ob.visible_in_viewport_get(b_v3d);
+ /* Object itself. */
+ if (b_instance.show_self()) {
+ sync_object(b_depsgraph,
+ b_view_layer,
+ b_instance,
+ motion_time,
+ false,
+ show_lights,
+ culling,
+ &use_portal);
+ }
- if (show_in_viewport && (show_self || show_particles)) {
- /* object itself */
+ /* Particle hair as separate object. */
+ if (b_instance.show_particles() && object_has_particle_hair(b_ob)) {
sync_object(b_depsgraph,
b_view_layer,
b_instance,
motion_time,
- show_self,
- show_particles,
+ true,
show_lights,
culling,
&use_portal);
@@ -584,8 +405,8 @@ void BlenderSync::sync_objects(BL::Depsgraph &b_depsgraph,
/* handle removed data and modified pointers */
if (light_map.post_sync())
scene->light_manager->tag_update(scene);
- if (mesh_map.post_sync())
- scene->mesh_manager->tag_update(scene);
+ if (geometry_map.post_sync())
+ scene->geometry_manager->tag_update(scene);
if (object_map.post_sync())
scene->object_manager->tag_update(scene);
if (particle_system_map.post_sync())
@@ -593,7 +414,7 @@ void BlenderSync::sync_objects(BL::Depsgraph &b_depsgraph,
}
if (motion)
- mesh_motion_synced.clear();
+ geometry_motion_synced.clear();
}
void BlenderSync::sync_motion(BL::RenderSettings &b_render,
diff --git a/intern/cycles/blender/blender_object_cull.cpp b/intern/cycles/blender/blender_object_cull.cpp
index 74f8fb1dc53..bebecb364eb 100644
--- a/intern/cycles/blender/blender_object_cull.cpp
+++ b/intern/cycles/blender/blender_object_cull.cpp
@@ -19,6 +19,7 @@
#include "render/camera.h"
#include "blender/blender_object_cull.h"
+#include "blender/blender_util.h"
CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/blender/blender_particles.cpp b/intern/cycles/blender/blender_particles.cpp
index d74f132ed60..e5eab1ae62b 100644
--- a/intern/cycles/blender/blender_particles.cpp
+++ b/intern/cycles/blender/blender_particles.cpp
@@ -39,7 +39,7 @@ bool BlenderSync::sync_dupli_particle(BL::Object &b_ob,
object->hide_on_missing_motion = true;
/* test if we need particle data */
- if (!object->mesh->need_attribute(scene, ATTR_STD_PARTICLE))
+ if (!object->geometry->need_attribute(scene, ATTR_STD_PARTICLE))
return false;
/* don't handle child particles yet */
@@ -53,10 +53,10 @@ bool BlenderSync::sync_dupli_particle(BL::Object &b_ob,
ParticleSystem *psys;
bool first_use = !particle_system_map.is_used(key);
- bool need_update = particle_system_map.sync(&psys, b_ob, b_instance.object(), key);
+ bool need_update = particle_system_map.add_or_update(&psys, b_ob, b_instance.object(), key);
/* no update needed? */
- if (!need_update && !object->mesh->need_update && !scene->object_manager->need_update)
+ if (!need_update && !object->geometry->need_update && !scene->object_manager->need_update)
return true;
/* first time used in this sync loop? clear and tag update */
diff --git a/intern/cycles/blender/blender_python.cpp b/intern/cycles/blender/blender_python.cpp
index 335d4daf09c..89bcebda193 100644
--- a/intern/cycles/blender/blender_python.cpp
+++ b/intern/cycles/blender/blender_python.cpp
@@ -19,8 +19,9 @@
#include "blender/CCL_api.h"
#include "blender/blender_device.h"
-#include "blender/blender_sync.h"
#include "blender/blender_session.h"
+#include "blender/blender_sync.h"
+#include "blender/blender_util.h"
#include "render/denoising.h"
#include "render/merge.h"
@@ -37,8 +38,8 @@
#ifdef WITH_OSL
# include "render/osl.h"
-# include <OSL/oslquery.h>
# include <OSL/oslconfig.h>
+# include <OSL/oslquery.h>
#endif
#ifdef WITH_OPENCL
diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp
index e2dea24fdd1..5ea96d6bdfd 100644
--- a/intern/cycles/blender/blender_session.cpp
+++ b/intern/cycles/blender/blender_session.cpp
@@ -41,8 +41,8 @@
#include "util/util_progress.h"
#include "util/util_time.h"
-#include "blender/blender_sync.h"
#include "blender/blender_session.h"
+#include "blender/blender_sync.h"
#include "blender/blender_util.h"
CCL_NAMESPACE_BEGIN
@@ -138,14 +138,6 @@ void BlenderSession::create_session()
scene = new Scene(scene_params, session->device);
scene->name = b_scene.name();
- /* setup callbacks for builtin image support */
- scene->image_manager->builtin_image_info_cb = function_bind(
- &BlenderSession::builtin_image_info, this, _1, _2, _3);
- scene->image_manager->builtin_image_pixels_cb = function_bind(
- &BlenderSession::builtin_image_pixels, this, _1, _2, _3, _4, _5, _6, _7);
- scene->image_manager->builtin_image_float_pixels_cb = function_bind(
- &BlenderSession::builtin_image_float_pixels, this, _1, _2, _3, _4, _5, _6, _7);
-
session->scene = scene;
/* There is no single depsgraph to use for the entire render.
@@ -166,7 +158,7 @@ void BlenderSession::create_session()
/* set buffer parameters */
BufferParams buffer_params = BlenderSync::get_buffer_params(
- b_render, b_v3d, b_rv3d, scene->camera, width, height);
+ b_scene, b_render, b_v3d, b_rv3d, scene->camera, width, height);
session->reset(buffer_params, session_params.samples);
b_engine.use_highlight_tiles(session_params.progressive_refine == false);
@@ -244,7 +236,7 @@ void BlenderSession::reset_session(BL::BlendData &b_data, BL::Depsgraph &b_depsg
BL::SpaceView3D b_null_space_view3d(PointerRNA_NULL);
BL::RegionView3D b_null_region_view3d(PointerRNA_NULL);
BufferParams buffer_params = BlenderSync::get_buffer_params(
- b_render, b_null_space_view3d, b_null_region_view3d, scene->camera, width, height);
+ b_scene, b_render, b_null_space_view3d, b_null_region_view3d, scene->camera, width, height);
session->reset(buffer_params, session_params.samples);
b_engine.use_highlight_tiles(session_params.progressive_refine == false);
@@ -278,8 +270,6 @@ static ShaderEvalType get_shader_type(const string &pass_type)
return SHADER_EVAL_GLOSSY_COLOR;
else if (strcmp(shader_type, "TRANSMISSION_COLOR") == 0)
return SHADER_EVAL_TRANSMISSION_COLOR;
- else if (strcmp(shader_type, "SUBSURFACE_COLOR") == 0)
- return SHADER_EVAL_SUBSURFACE_COLOR;
else if (strcmp(shader_type, "EMIT") == 0)
return SHADER_EVAL_EMISSION;
@@ -296,8 +286,6 @@ static ShaderEvalType get_shader_type(const string &pass_type)
return SHADER_EVAL_GLOSSY;
else if (strcmp(shader_type, "TRANSMISSION") == 0)
return SHADER_EVAL_TRANSMISSION;
- else if (strcmp(shader_type, "SUBSURFACE") == 0)
- return SHADER_EVAL_SUBSURFACE;
/* extra */
else if (strcmp(shader_type, "ENVIRONMENT") == 0)
@@ -460,7 +448,7 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_)
SessionParams session_params = BlenderSync::get_session_params(
b_engine, b_userpref, b_scene, background);
BufferParams buffer_params = BlenderSync::get_buffer_params(
- b_render, b_v3d, b_rv3d, scene->camera, width, height);
+ b_scene, b_render, b_v3d, b_rv3d, scene->camera, width, height);
/* render each layer */
BL::ViewLayer b_view_layer = b_depsgraph.view_layer_eval();
@@ -474,7 +462,8 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_)
b_rlay_name = b_view_layer.name();
/* add passes */
- vector<Pass> passes = sync->sync_render_passes(b_rlay, b_view_layer);
+ vector<Pass> passes = sync->sync_render_passes(
+ b_rlay, b_view_layer, session_params.adaptive_sampling);
buffer_params.passes = passes;
PointerRNA crl = RNA_pointer_get(&b_view_layer.ptr, "cycles");
@@ -640,8 +629,6 @@ static int bake_pass_filter_get(const int pass_filter)
flag |= BAKE_FILTER_GLOSSY;
if ((pass_filter & BL::BakeSettings::pass_filter_TRANSMISSION) != 0)
flag |= BAKE_FILTER_TRANSMISSION;
- if ((pass_filter & BL::BakeSettings::pass_filter_SUBSURFACE) != 0)
- flag |= BAKE_FILTER_SUBSURFACE;
if ((pass_filter & BL::BakeSettings::pass_filter_EMIT) != 0)
flag |= BAKE_FILTER_EMISSION;
@@ -706,7 +693,7 @@ void BlenderSession::bake(BL::Depsgraph &b_depsgraph_,
SessionParams session_params = BlenderSync::get_session_params(
b_engine, b_userpref, b_scene, background);
BufferParams buffer_params = BlenderSync::get_buffer_params(
- b_render, b_v3d, b_rv3d, scene->camera, width, height);
+ b_scene, b_render, b_v3d, b_rv3d, scene->camera, width, height);
scene->bake_manager->set_shader_limit((size_t)b_engine.tile_x(), (size_t)b_engine.tile_y());
@@ -720,9 +707,12 @@ void BlenderSession::bake(BL::Depsgraph &b_depsgraph_,
int tri_offset = 0;
for (size_t i = 0; i < scene->objects.size(); i++) {
- if (strcmp(scene->objects[i]->name.c_str(), b_object.name().c_str()) == 0) {
+ const Object *object = scene->objects[i];
+ const Geometry *geom = object->geometry;
+ if (object->name == b_object.name() && geom->type == Geometry::MESH) {
+ const Mesh *mesh = static_cast<const Mesh *>(geom);
object_index = i;
- tri_offset = scene->objects[i]->mesh->tri_offset;
+ tri_offset = mesh->prim_offset;
break;
}
}
@@ -848,11 +838,11 @@ void BlenderSession::synchronize(BL::Depsgraph &b_depsgraph_)
if (session->params.modified(session_params) || scene->params.modified(scene_params)) {
free_session();
create_session();
- return;
}
/* increase samples, but never decrease */
session->set_samples(session_params.samples);
+ session->set_denoising_start_sample(session_params.denoising_start_sample);
session->set_pause(session_pause);
/* copy recalc flags, outside of mutex so we can decide to do the real
@@ -883,10 +873,28 @@ void BlenderSession::synchronize(BL::Depsgraph &b_depsgraph_)
else
sync->sync_camera(b_render, b_camera_override, width, height, "");
+ /* get buffer parameters */
+ BufferParams buffer_params = BlenderSync::get_buffer_params(
+ b_scene, b_render, b_v3d, b_rv3d, scene->camera, width, height);
+
+ if (session_params.device.type != DEVICE_OPTIX &&
+ session_params.device.denoising_devices.empty()) {
+ /* cannot use OptiX denoising when it is not supported by the device. */
+ buffer_params.denoising_data_pass = false;
+ }
+ else {
+ session->set_denoising(buffer_params.denoising_data_pass, true);
+ }
+
+ if (scene->film->denoising_data_pass != buffer_params.denoising_data_pass) {
+ scene->film->denoising_data_pass = buffer_params.denoising_data_pass;
+
+ /* Force a scene and session reset below. */
+ scene->film->tag_update(scene);
+ }
+
/* reset if needed */
if (scene->need_reset()) {
- BufferParams buffer_params = BlenderSync::get_buffer_params(
- b_render, b_v3d, b_rv3d, scene->camera, width, height);
session->reset(buffer_params, session_params.samples);
/* After session reset, so device is not accessing image data anymore. */
@@ -953,7 +961,7 @@ bool BlenderSession::draw(int w, int h)
SessionParams session_params = BlenderSync::get_session_params(
b_engine, b_userpref, b_scene, background);
BufferParams buffer_params = BlenderSync::get_buffer_params(
- b_render, b_v3d, b_rv3d, scene->camera, width, height);
+ b_scene, b_render, b_v3d, b_rv3d, scene->camera, width, height);
bool session_pause = BlenderSync::get_session_pause(b_scene, background);
if (session_pause == false) {
@@ -971,7 +979,7 @@ bool BlenderSession::draw(int w, int h)
/* draw */
BufferParams buffer_params = BlenderSync::get_buffer_params(
- b_render, b_v3d, b_rv3d, scene->camera, width, height);
+ b_scene, b_render, b_v3d, b_rv3d, scene->camera, width, height);
DeviceDrawParams draw_params;
if (session->params.display_buffer_linear) {
@@ -1112,341 +1120,6 @@ void BlenderSession::test_cancel()
session->progress.set_cancel("Cancelled");
}
-/* builtin image file name is actually an image datablock name with
- * absolute sequence frame number concatenated via '@' character
- *
- * this function splits frame from builtin name
- */
-int BlenderSession::builtin_image_frame(const string &builtin_name)
-{
- int last = builtin_name.find_last_of('@');
- return atoi(builtin_name.substr(last + 1, builtin_name.size() - last - 1).c_str());
-}
-
-void BlenderSession::builtin_image_info(const string &builtin_name,
- void *builtin_data,
- ImageMetaData &metadata)
-{
- /* empty image */
- metadata.width = 1;
- metadata.height = 1;
-
- if (!builtin_data)
- return;
-
- /* recover ID pointer */
- PointerRNA ptr;
- RNA_id_pointer_create((ID *)builtin_data, &ptr);
- BL::ID b_id(ptr);
-
- if (b_id.is_a(&RNA_Image)) {
- /* image data */
- BL::Image b_image(b_id);
-
- metadata.builtin_free_cache = !b_image.has_data();
- metadata.is_float = b_image.is_float();
- metadata.width = b_image.size()[0];
- metadata.height = b_image.size()[1];
- metadata.depth = 1;
- metadata.channels = b_image.channels();
-
- if (metadata.is_float) {
- /* Float images are already converted on the Blender side,
- * no need to do anything in Cycles. */
- metadata.colorspace = u_colorspace_raw;
- }
- }
- else if (b_id.is_a(&RNA_Object)) {
- /* smoke volume data */
- BL::Object b_ob(b_id);
- BL::FluidDomainSettings b_domain = object_fluid_domain_find(b_ob);
-
- metadata.is_float = true;
- metadata.depth = 1;
- metadata.channels = 1;
-
- if (!b_domain)
- return;
-
- if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_DENSITY) ||
- builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_FLAME) ||
- builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_HEAT) ||
- builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_TEMPERATURE))
- metadata.channels = 1;
- else if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_COLOR))
- metadata.channels = 4;
- else if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_VELOCITY))
- metadata.channels = 3;
- else
- return;
-
- int3 resolution = get_int3(b_domain.domain_resolution());
- int amplify = (b_domain.use_noise()) ? b_domain.noise_scale() : 1;
-
- /* Velocity and heat data is always low-resolution. */
- if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_VELOCITY) ||
- builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_HEAT)) {
- amplify = 1;
- }
-
- metadata.width = resolution.x * amplify;
- metadata.height = resolution.y * amplify;
- metadata.depth = resolution.z * amplify;
- }
- else {
- /* TODO(sergey): Check we're indeed in shader node tree. */
- PointerRNA ptr;
- RNA_pointer_create(NULL, &RNA_Node, builtin_data, &ptr);
- BL::Node b_node(ptr);
- if (b_node.is_a(&RNA_ShaderNodeTexPointDensity)) {
- BL::ShaderNodeTexPointDensity b_point_density_node(b_node);
- metadata.channels = 4;
- metadata.width = b_point_density_node.resolution();
- metadata.height = metadata.width;
- metadata.depth = metadata.width;
- metadata.is_float = true;
- }
- }
-}
-
-bool BlenderSession::builtin_image_pixels(const string &builtin_name,
- void *builtin_data,
- int tile,
- unsigned char *pixels,
- const size_t pixels_size,
- const bool associate_alpha,
- const bool free_cache)
-{
- if (!builtin_data) {
- return false;
- }
-
- const int frame = builtin_image_frame(builtin_name);
-
- PointerRNA ptr;
- RNA_id_pointer_create((ID *)builtin_data, &ptr);
- BL::Image b_image(ptr);
-
- const int width = b_image.size()[0];
- const int height = b_image.size()[1];
- const int channels = b_image.channels();
-
- unsigned char *image_pixels = image_get_pixels_for_frame(b_image, frame, tile);
- const size_t num_pixels = ((size_t)width) * height;
-
- if (image_pixels && num_pixels * channels == pixels_size) {
- memcpy(pixels, image_pixels, pixels_size * sizeof(unsigned char));
- }
- else {
- if (channels == 1) {
- memset(pixels, 0, pixels_size * sizeof(unsigned char));
- }
- else {
- const size_t num_pixels_safe = pixels_size / channels;
- unsigned char *cp = pixels;
- for (size_t i = 0; i < num_pixels_safe; i++, cp += channels) {
- cp[0] = 255;
- cp[1] = 0;
- cp[2] = 255;
- if (channels == 4) {
- cp[3] = 255;
- }
- }
- }
- }
-
- if (image_pixels) {
- MEM_freeN(image_pixels);
- }
-
- /* Free image buffers to save memory during render. */
- if (free_cache) {
- b_image.buffers_free();
- }
-
- if (associate_alpha) {
- /* Premultiply, byte images are always straight for Blender. */
- unsigned char *cp = pixels;
- for (size_t i = 0; i < num_pixels; i++, cp += channels) {
- cp[0] = (cp[0] * cp[3]) >> 8;
- cp[1] = (cp[1] * cp[3]) >> 8;
- cp[2] = (cp[2] * cp[3]) >> 8;
- }
- }
- return true;
-}
-
-bool BlenderSession::builtin_image_float_pixels(const string &builtin_name,
- void *builtin_data,
- int tile,
- float *pixels,
- const size_t pixels_size,
- const bool,
- const bool free_cache)
-{
- if (!builtin_data) {
- return false;
- }
-
- PointerRNA ptr;
- RNA_id_pointer_create((ID *)builtin_data, &ptr);
- BL::ID b_id(ptr);
-
- if (b_id.is_a(&RNA_Image)) {
- /* image data */
- BL::Image b_image(b_id);
- int frame = builtin_image_frame(builtin_name);
-
- const int width = b_image.size()[0];
- const int height = b_image.size()[1];
- const int channels = b_image.channels();
-
- float *image_pixels;
- image_pixels = image_get_float_pixels_for_frame(b_image, frame, tile);
- const size_t num_pixels = ((size_t)width) * height;
-
- if (image_pixels && num_pixels * channels == pixels_size) {
- memcpy(pixels, image_pixels, pixels_size * sizeof(float));
- }
- else {
- if (channels == 1) {
- memset(pixels, 0, num_pixels * sizeof(float));
- }
- else {
- const size_t num_pixels_safe = pixels_size / channels;
- float *fp = pixels;
- for (int i = 0; i < num_pixels_safe; i++, fp += channels) {
- fp[0] = 1.0f;
- fp[1] = 0.0f;
- fp[2] = 1.0f;
- if (channels == 4) {
- fp[3] = 1.0f;
- }
- }
- }
- }
-
- if (image_pixels) {
- MEM_freeN(image_pixels);
- }
-
- /* Free image buffers to save memory during render. */
- if (free_cache) {
- b_image.buffers_free();
- }
-
- return true;
- }
- else if (b_id.is_a(&RNA_Object)) {
- /* smoke volume data */
- BL::Object b_ob(b_id);
- BL::FluidDomainSettings b_domain = object_fluid_domain_find(b_ob);
-
- if (!b_domain) {
- return false;
- }
-#if WITH_FLUID
- int3 resolution = get_int3(b_domain.domain_resolution());
- int length, amplify = (b_domain.use_noise()) ? b_domain.noise_scale() : 1;
-
- /* Velocity and heat data is always low-resolution. */
- if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_VELOCITY) ||
- builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_HEAT)) {
- amplify = 1;
- }
-
- const int width = resolution.x * amplify;
- const int height = resolution.y * amplify;
- const int depth = resolution.z * amplify;
- const size_t num_pixels = ((size_t)width) * height * depth;
-
- if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_DENSITY)) {
- FluidDomainSettings_density_grid_get_length(&b_domain.ptr, &length);
- if (length == num_pixels) {
- FluidDomainSettings_density_grid_get(&b_domain.ptr, pixels);
- return true;
- }
- }
- else if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_FLAME)) {
- /* this is in range 0..1, and interpreted by the OpenGL smoke viewer
- * as 1500..3000 K with the first part faded to zero density */
- FluidDomainSettings_flame_grid_get_length(&b_domain.ptr, &length);
- if (length == num_pixels) {
- FluidDomainSettings_flame_grid_get(&b_domain.ptr, pixels);
- return true;
- }
- }
- else if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_COLOR)) {
- /* the RGB is "premultiplied" by density for better interpolation results */
- FluidDomainSettings_color_grid_get_length(&b_domain.ptr, &length);
- if (length == num_pixels * 4) {
- FluidDomainSettings_color_grid_get(&b_domain.ptr, pixels);
- return true;
- }
- }
- else if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_VELOCITY)) {
- FluidDomainSettings_velocity_grid_get_length(&b_domain.ptr, &length);
- if (length == num_pixels * 3) {
- FluidDomainSettings_velocity_grid_get(&b_domain.ptr, pixels);
- return true;
- }
- }
- else if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_HEAT)) {
- FluidDomainSettings_heat_grid_get_length(&b_domain.ptr, &length);
- if (length == num_pixels) {
- FluidDomainSettings_heat_grid_get(&b_domain.ptr, pixels);
- return true;
- }
- }
- else if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_TEMPERATURE)) {
- FluidDomainSettings_temperature_grid_get_length(&b_domain.ptr, &length);
- if (length == num_pixels) {
- FluidDomainSettings_temperature_grid_get(&b_domain.ptr, pixels);
- return true;
- }
- }
- else {
- fprintf(
- stderr, "Cycles error: unknown volume attribute %s, skipping\n", builtin_name.c_str());
- pixels[0] = 0.0f;
- return false;
- }
-#endif
- fprintf(stderr, "Cycles error: unexpected smoke volume resolution, skipping\n");
- }
- else {
- /* We originally were passing view_layer here but in reality we need a
- * a depsgraph to pass to the RE_point_density_minmax() function.
- */
- /* TODO(sergey): Check we're indeed in shader node tree. */
- PointerRNA ptr;
- RNA_pointer_create(NULL, &RNA_Node, builtin_data, &ptr);
- BL::Node b_node(ptr);
- if (b_node.is_a(&RNA_ShaderNodeTexPointDensity)) {
- BL::ShaderNodeTexPointDensity b_point_density_node(b_node);
- int length;
- b_point_density_node.calc_point_density(b_depsgraph, &length, &pixels);
- }
- }
-
- return false;
-}
-
-void BlenderSession::builtin_images_load()
-{
- /* Force builtin images to be loaded along with Blender data sync. This
- * is needed because we may be reading from depsgraph evaluated data which
- * can be freed by Blender before Cycles reads it.
- *
- * TODO: the assumption that no further access to builtin image data will
- * happen is really weak, and likely to break in the future. We should find
- * a better solution to hand over the data directly to the image manager
- * instead of through callbacks whose timing is difficult to control. */
- ImageManager *manager = session->scene->image_manager;
- Device *device = session->device;
- manager->device_load_builtin(device, session->scene, session->progress);
-}
-
void BlenderSession::update_resumable_tile_manager(int num_samples)
{
const int num_resumable_chunks = BlenderSession::num_resumable_chunks,
diff --git a/intern/cycles/blender/blender_session.h b/intern/cycles/blender/blender_session.h
index 2f25ec740f9..3e6498bb655 100644
--- a/intern/cycles/blender/blender_session.h
+++ b/intern/cycles/blender/blender_session.h
@@ -17,15 +17,19 @@
#ifndef __BLENDER_SESSION_H__
#define __BLENDER_SESSION_H__
+#include "RNA_blender_cpp.h"
+
#include "device/device.h"
+
+#include "render/bake.h"
#include "render/scene.h"
#include "render/session.h"
-#include "render/bake.h"
#include "util/util_vector.h"
CCL_NAMESPACE_BEGIN
+class BlenderSync;
class ImageMetaData;
class Scene;
class Session;
@@ -153,22 +157,6 @@ class BlenderSession {
bool do_update_only);
void do_write_update_render_tile(RenderTile &rtile, bool do_update_only, bool highlight);
- int builtin_image_frame(const string &builtin_name);
- void builtin_image_info(const string &builtin_name, void *builtin_data, ImageMetaData &metadata);
- bool builtin_image_pixels(const string &builtin_name,
- void *builtin_data,
- int tile,
- unsigned char *pixels,
- const size_t pixels_size,
- const bool associate_alpha,
- const bool free_cache);
- bool builtin_image_float_pixels(const string &builtin_name,
- void *builtin_data,
- int tile,
- float *pixels,
- const size_t pixels_size,
- const bool associate_alpha,
- const bool free_cache);
void builtin_images_load();
/* Update tile manager to reflect resumable render settings. */
diff --git a/intern/cycles/blender/blender_shader.cpp b/intern/cycles/blender/blender_shader.cpp
index 206058259af..edde1fd243e 100644
--- a/intern/cycles/blender/blender_shader.cpp
+++ b/intern/cycles/blender/blender_shader.cpp
@@ -23,14 +23,15 @@
#include "render/scene.h"
#include "render/shader.h"
-#include "blender/blender_texture.h"
+#include "blender/blender_image.h"
#include "blender/blender_sync.h"
+#include "blender/blender_texture.h"
#include "blender/blender_util.h"
#include "util/util_debug.h"
#include "util/util_foreach.h"
-#include "util/util_string.h"
#include "util/util_set.h"
+#include "util/util_string.h"
#include "util/util_task.h"
CCL_NAMESPACE_BEGIN
@@ -323,6 +324,13 @@ static ShaderNode *add_node(Scene *scene,
vector_math_node->type = (NodeVectorMathType)b_vector_math_node.operation();
node = vector_math_node;
}
+ else if (b_node.is_a(&RNA_ShaderNodeVectorRotate)) {
+ BL::ShaderNodeVectorRotate b_vector_rotate_node(b_node);
+ VectorRotateNode *vector_rotate_node = new VectorRotateNode();
+ vector_rotate_node->type = (NodeVectorRotateType)b_vector_rotate_node.rotation_type();
+ vector_rotate_node->invert = b_vector_rotate_node.invert();
+ node = vector_rotate_node;
+ }
else if (b_node.is_a(&RNA_ShaderNodeVectorTransform)) {
BL::ShaderNodeVectorTransform b_vector_transform_node(b_node);
VectorTransformNode *vtransform = new VectorTransformNode();
@@ -612,16 +620,16 @@ static ShaderNode *add_node(Scene *scene,
/* create script node */
BL::ShaderNodeScript b_script_node(b_node);
- OSLShaderManager *manager = (OSLShaderManager *)scene->shader_manager;
+ ShaderManager *manager = scene->shader_manager;
string bytecode_hash = b_script_node.bytecode_hash();
if (!bytecode_hash.empty()) {
- node = manager->osl_node("", bytecode_hash, b_script_node.bytecode());
+ node = OSLShaderManager::osl_node(manager, "", bytecode_hash, b_script_node.bytecode());
}
else {
string absolute_filepath = blender_absolute_path(
b_data, b_ntree, b_script_node.filepath());
- node = manager->osl_node(absolute_filepath, "");
+ node = OSLShaderManager::osl_node(manager, absolute_filepath, "");
}
}
#else
@@ -634,7 +642,27 @@ static ShaderNode *add_node(Scene *scene,
BL::Image b_image(b_image_node.image());
BL::ImageUser b_image_user(b_image_node.image_user());
ImageTextureNode *image = new ImageTextureNode();
+
+ image->interpolation = get_image_interpolation(b_image_node);
+ image->extension = get_image_extension(b_image_node);
+ image->projection = (NodeImageProjection)b_image_node.projection();
+ image->projection_blend = b_image_node.projection_blend();
+ BL::TexMapping b_texture_mapping(b_image_node.texture_mapping());
+ get_tex_mapping(&image->tex_mapping, b_texture_mapping);
+
if (b_image) {
+ PointerRNA colorspace_ptr = b_image.colorspace_settings().ptr;
+ image->colorspace = get_enum_identifier(colorspace_ptr, "name");
+
+ image->animated = b_image_node.image_user().use_auto_refresh();
+ image->alpha_type = get_image_alpha_type(b_image);
+
+ image->tiles.clear();
+ BL::Image::tiles_iterator b_iter;
+ for (b_image.tiles.begin(b_iter); b_iter != b_image.tiles.end(); ++b_iter) {
+ image->tiles.push_back(b_iter->number());
+ }
+
/* builtin images will use callback-based reading because
* they could only be loaded correct from blender side
*/
@@ -651,46 +679,14 @@ static ShaderNode *add_node(Scene *scene,
*/
int scene_frame = b_scene.frame_current();
int image_frame = image_user_frame_number(b_image_user, scene_frame);
- image->filename = b_image.name() + "@" + string_printf("%d", image_frame);
- image->builtin_data = b_image.ptr.data;
+ image->handle = scene->image_manager->add_image(
+ new BlenderImageLoader(b_image, image_frame), image->image_params());
}
else {
image->filename = image_user_file_path(
b_image_user, b_image, b_scene.frame_current(), true);
- image->builtin_data = NULL;
- }
-
- PointerRNA colorspace_ptr = b_image.colorspace_settings().ptr;
- image->colorspace = get_enum_identifier(colorspace_ptr, "name");
-
- image->animated = b_image_node.image_user().use_auto_refresh();
- image->alpha_type = get_image_alpha_type(b_image);
-
- image->tiles.clear();
- BL::Image::tiles_iterator b_iter;
- for (b_image.tiles.begin(b_iter); b_iter != b_image.tiles.end(); ++b_iter) {
- image->tiles.push_back(b_iter->number());
}
-
- /* TODO: restore */
- /* TODO(sergey): Does not work properly when we change builtin type. */
-#if 0
- if (b_image.is_updated()) {
- scene->image_manager->tag_reload_image(image->filename.string(),
- image->builtin_data,
- get_image_interpolation(b_image_node),
- get_image_extension(b_image_node),
- image->use_alpha,
- image->colorspace);
- }
-#endif
}
- image->projection = (NodeImageProjection)b_image_node.projection();
- image->interpolation = get_image_interpolation(b_image_node);
- image->extension = get_image_extension(b_image_node);
- image->projection_blend = b_image_node.projection_blend();
- BL::TexMapping b_texture_mapping(b_image_node.texture_mapping());
- get_tex_mapping(&image->tex_mapping, b_texture_mapping);
node = image;
}
else if (b_node.is_a(&RNA_ShaderNodeTexEnvironment)) {
@@ -698,7 +694,19 @@ static ShaderNode *add_node(Scene *scene,
BL::Image b_image(b_env_node.image());
BL::ImageUser b_image_user(b_env_node.image_user());
EnvironmentTextureNode *env = new EnvironmentTextureNode();
+
+ env->interpolation = get_image_interpolation(b_env_node);
+ env->projection = (NodeEnvironmentProjection)b_env_node.projection();
+ BL::TexMapping b_texture_mapping(b_env_node.texture_mapping());
+ get_tex_mapping(&env->tex_mapping, b_texture_mapping);
+
if (b_image) {
+ PointerRNA colorspace_ptr = b_image.colorspace_settings().ptr;
+ env->colorspace = get_enum_identifier(colorspace_ptr, "name");
+
+ env->animated = b_env_node.image_user().use_auto_refresh();
+ env->alpha_type = get_image_alpha_type(b_image);
+
bool is_builtin = b_image.packed_file() || b_image.source() == BL::Image::source_GENERATED ||
b_image.source() == BL::Image::source_MOVIE ||
(b_engine.is_preview() && b_image.source() != BL::Image::source_SEQUENCE);
@@ -706,38 +714,14 @@ static ShaderNode *add_node(Scene *scene,
if (is_builtin) {
int scene_frame = b_scene.frame_current();
int image_frame = image_user_frame_number(b_image_user, scene_frame);
- env->filename = b_image.name() + "@" + string_printf("%d", image_frame);
- env->builtin_data = b_image.ptr.data;
+ env->handle = scene->image_manager->add_image(new BlenderImageLoader(b_image, image_frame),
+ env->image_params());
}
else {
env->filename = image_user_file_path(
b_image_user, b_image, b_scene.frame_current(), false);
- env->builtin_data = NULL;
- }
-
- PointerRNA colorspace_ptr = b_image.colorspace_settings().ptr;
- env->colorspace = get_enum_identifier(colorspace_ptr, "name");
-
- env->animated = b_env_node.image_user().use_auto_refresh();
- env->alpha_type = get_image_alpha_type(b_image);
-
- /* TODO: restore */
- /* TODO(sergey): Does not work properly when we change builtin type. */
-#if 0
- if (b_image.is_updated()) {
- scene->image_manager->tag_reload_image(env->filename.string(),
- env->builtin_data,
- get_image_interpolation(b_env_node),
- EXTENSION_REPEAT,
- env->use_alpha,
- env->colorspace);
}
-#endif
}
- env->interpolation = get_image_interpolation(b_env_node);
- env->projection = (NodeEnvironmentProjection)b_env_node.projection();
- BL::TexMapping b_texture_mapping(b_env_node.texture_mapping());
- get_tex_mapping(&env->tex_mapping, b_texture_mapping);
node = env;
}
else if (b_node.is_a(&RNA_ShaderNodeTexGradient)) {
@@ -770,6 +754,8 @@ static ShaderNode *add_node(Scene *scene,
BL::ShaderNodeTexWave b_wave_node(b_node);
WaveTextureNode *wave = new WaveTextureNode();
wave->type = (NodeWaveType)b_wave_node.wave_type();
+ wave->bands_direction = (NodeWaveBandsDirection)b_wave_node.bands_direction();
+ wave->rings_direction = (NodeWaveRingsDirection)b_wave_node.rings_direction();
wave->profile = (NodeWaveProfile)b_wave_node.wave_profile();
BL::TexMapping b_texture_mapping(b_wave_node.texture_mapping());
get_tex_mapping(&wave->tex_mapping, b_texture_mapping);
@@ -878,23 +864,13 @@ static ShaderNode *add_node(Scene *scene,
else if (b_node.is_a(&RNA_ShaderNodeTexPointDensity)) {
BL::ShaderNodeTexPointDensity b_point_density_node(b_node);
PointDensityTextureNode *point_density = new PointDensityTextureNode();
- point_density->filename = b_point_density_node.name();
point_density->space = (NodeTexVoxelSpace)b_point_density_node.space();
point_density->interpolation = get_image_interpolation(b_point_density_node);
- point_density->builtin_data = b_point_density_node.ptr.data;
- point_density->image_manager = scene->image_manager;
-
- /* TODO(sergey): Use more proper update flag. */
- if (true) {
- point_density->add_image();
- b_point_density_node.cache_point_density(b_depsgraph);
- scene->image_manager->tag_reload_image(point_density->filename.string(),
- point_density->builtin_data,
- point_density->interpolation,
- EXTENSION_CLIP,
- IMAGE_ALPHA_AUTO,
- u_colorspace_raw);
- }
+ point_density->handle = scene->image_manager->add_image(
+ new BlenderPointDensityLoader(b_depsgraph, b_point_density_node),
+ point_density->image_params());
+
+ b_point_density_node.cache_point_density(b_depsgraph);
node = point_density;
/* Transformation form world space to texture space.
@@ -1255,7 +1231,7 @@ void BlenderSync::sync_materials(BL::Depsgraph &b_depsgraph, bool update_all)
Shader *shader;
/* test if we need to sync */
- if (shader_map.sync(&shader, b_mat) || shader->need_sync_object || update_all) {
+ if (shader_map.add_or_update(&shader, b_mat) || shader->need_sync_object || update_all) {
ShaderGraph *graph = new ShaderGraph();
shader->name = b_mat.name().c_str();
@@ -1284,6 +1260,7 @@ void BlenderSync::sync_materials(BL::Depsgraph &b_depsgraph, bool update_all)
shader->heterogeneous_volume = !get_boolean(cmat, "homogeneous_volume");
shader->volume_sampling_method = get_volume_sampling(cmat);
shader->volume_interpolation_method = get_volume_interpolation(cmat);
+ shader->volume_step_rate = get_float(cmat, "volume_step_rate");
shader->displacement_method = get_displacement_method(cmat);
shader->set_graph(graph);
@@ -1348,6 +1325,7 @@ void BlenderSync::sync_world(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d,
shader->heterogeneous_volume = !get_boolean(cworld, "homogeneous_volume");
shader->volume_sampling_method = get_volume_sampling(cworld);
shader->volume_interpolation_method = get_volume_interpolation(cworld);
+ shader->volume_step_rate = get_float(cworld, "volume_step_size");
}
else if (new_viewport_parameters.use_scene_world && b_world) {
BackgroundNode *background = new BackgroundNode();
@@ -1480,7 +1458,7 @@ void BlenderSync::sync_lights(BL::Depsgraph &b_depsgraph, bool update_all)
Shader *shader;
/* test if we need to sync */
- if (shader_map.sync(&shader, b_light) || update_all) {
+ if (shader_map.add_or_update(&shader, b_light) || update_all) {
ShaderGraph *graph = new ShaderGraph();
/* create nodes */
diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp
index 20dbe23cdb7..28a737c3341 100644
--- a/intern/cycles/blender/blender_sync.cpp
+++ b/intern/cycles/blender/blender_sync.cpp
@@ -16,6 +16,7 @@
#include "render/background.h"
#include "render/camera.h"
+#include "render/curves.h"
#include "render/film.h"
#include "render/graph.h"
#include "render/integrator.h"
@@ -25,19 +26,18 @@
#include "render/object.h"
#include "render/scene.h"
#include "render/shader.h"
-#include "render/curves.h"
#include "device/device.h"
#include "blender/blender_device.h"
-#include "blender/blender_sync.h"
#include "blender/blender_session.h"
+#include "blender/blender_sync.h"
#include "blender/blender_util.h"
#include "util/util_debug.h"
#include "util/util_foreach.h"
-#include "util/util_opengl.h"
#include "util/util_hash.h"
+#include "util/util_opengl.h"
CCL_NAMESPACE_BEGIN
@@ -56,7 +56,7 @@ BlenderSync::BlenderSync(BL::RenderEngine &b_engine,
b_scene(b_scene),
shader_map(&scene->shaders),
object_map(&scene->objects),
- mesh_map(&scene->meshes),
+ geometry_map(&scene->geometry),
light_map(&scene->lights),
particle_system_map(&scene->particle_systems),
world_map(NULL),
@@ -108,10 +108,15 @@ void BlenderSync::sync_recalc(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d
}
if (dicing_prop_changed) {
- for (const pair<void *, Mesh *> &iter : mesh_map.key_to_scene_data()) {
- Mesh *mesh = iter.second;
- if (mesh->subdivision_type != Mesh::SUBDIVISION_NONE) {
- mesh_map.set_recalc(iter.first);
+ for (const pair<GeometryKey, Geometry *> &iter : geometry_map.key_to_scene_data()) {
+ Geometry *geom = iter.second;
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ if (mesh->subdivision_type != Mesh::SUBDIVISION_NONE) {
+ PointerRNA id_ptr;
+ RNA_id_pointer_create((::ID *)iter.first.id, &id_ptr);
+ geometry_map.set_recalc(BL::ID(id_ptr));
+ }
}
}
}
@@ -146,7 +151,7 @@ void BlenderSync::sync_recalc(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d
if (updated_geometry ||
(object_subdivision_type(b_ob, preview, experimental) != Mesh::SUBDIVISION_NONE)) {
BL::ID key = BKE_object_is_modified(b_ob) ? b_ob : b_ob.data();
- mesh_map.set_recalc(key);
+ geometry_map.set_recalc(key);
}
}
else if (object_is_light(b_ob)) {
@@ -164,7 +169,7 @@ void BlenderSync::sync_recalc(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d
/* Mesh */
else if (b_id.is_a(&RNA_Mesh)) {
BL::Mesh b_mesh(b_id);
- mesh_map.set_recalc(b_mesh);
+ geometry_map.set_recalc(b_mesh);
}
/* World */
else if (b_id.is_a(&RNA_World)) {
@@ -173,6 +178,11 @@ void BlenderSync::sync_recalc(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d
world_recalc = true;
}
}
+ /* Volume */
+ else if (b_id.is_a(&RNA_Volume)) {
+ BL::Volume b_volume(b_id);
+ geometry_map.set_recalc(b_volume);
+ }
}
BlenderViewportParameters new_viewport_parameters(b_v3d);
@@ -211,7 +221,7 @@ void BlenderSync::sync_data(BL::RenderSettings &b_render,
sync_images();
sync_curve_settings();
- mesh_synced.clear(); /* use for objects and motion sync */
+ geometry_synced.clear(); /* use for objects and motion sync */
if (scene->need_motion() == Scene::MOTION_PASS || scene->need_motion() == Scene::MOTION_NONE ||
scene->camera->motion_position == Camera::MOTION_POSITION_CENTER) {
@@ -219,7 +229,7 @@ void BlenderSync::sync_data(BL::RenderSettings &b_render,
}
sync_motion(b_render, b_depsgraph, b_v3d, b_override, width, height, python_thread_state);
- mesh_synced.clear();
+ geometry_synced.clear();
/* Shader sync done at the end, since object sync uses it.
* false = don't delete unused shaders, not supported. */
@@ -252,7 +262,8 @@ void BlenderSync::sync_integrator()
integrator->transparent_max_bounce = get_int(cscene, "transparent_max_bounces");
integrator->volume_max_steps = get_int(cscene, "volume_max_steps");
- integrator->volume_step_size = get_float(cscene, "volume_step_size");
+ integrator->volume_step_rate = (preview) ? get_float(cscene, "volume_preview_step_rate") :
+ get_float(cscene, "volume_step_rate");
integrator->caustics_reflective = get_boolean(cscene, "caustics_reflective");
integrator->caustics_refractive = get_boolean(cscene, "caustics_refractive");
@@ -291,6 +302,16 @@ void BlenderSync::sync_integrator()
integrator->sample_all_lights_indirect = get_boolean(cscene, "sample_all_lights_indirect");
integrator->light_sampling_threshold = get_float(cscene, "light_sampling_threshold");
+ if (RNA_boolean_get(&cscene, "use_adaptive_sampling")) {
+ integrator->sampling_pattern = SAMPLING_PATTERN_PMJ;
+ integrator->adaptive_min_samples = get_int(cscene, "adaptive_min_samples");
+ integrator->adaptive_threshold = get_float(cscene, "adaptive_threshold");
+ }
+ else {
+ integrator->adaptive_min_samples = INT_MAX;
+ integrator->adaptive_threshold = 0.0f;
+ }
+
int diffuse_samples = get_int(cscene, "diffuse_samples");
int glossy_samples = get_int(cscene, "glossy_samples");
int transmission_samples = get_int(cscene, "transmission_samples");
@@ -307,6 +328,8 @@ void BlenderSync::sync_integrator()
integrator->mesh_light_samples = mesh_light_samples * mesh_light_samples;
integrator->subsurface_samples = subsurface_samples * subsurface_samples;
integrator->volume_samples = volume_samples * volume_samples;
+ integrator->adaptive_min_samples = min(
+ integrator->adaptive_min_samples * integrator->adaptive_min_samples, INT_MAX);
}
else {
integrator->diffuse_samples = diffuse_samples;
@@ -388,6 +411,7 @@ void BlenderSync::sync_view_layer(BL::SpaceView3D & /*b_v3d*/, BL::ViewLayer &b_
view_layer.use_background_ao = b_view_layer.use_ao();
view_layer.use_surfaces = b_view_layer.use_solid();
view_layer.use_hair = b_view_layer.use_strand();
+ view_layer.use_volumes = b_view_layer.use_volumes();
/* Material override. */
view_layer.material_override = b_view_layer.material_override();
@@ -456,19 +480,16 @@ PassType BlenderSync::get_pass_type(BL::RenderPass &b_pass)
MAP_PASS("DiffDir", PASS_DIFFUSE_DIRECT);
MAP_PASS("GlossDir", PASS_GLOSSY_DIRECT);
MAP_PASS("TransDir", PASS_TRANSMISSION_DIRECT);
- MAP_PASS("SubsurfaceDir", PASS_SUBSURFACE_DIRECT);
MAP_PASS("VolumeDir", PASS_VOLUME_DIRECT);
MAP_PASS("DiffInd", PASS_DIFFUSE_INDIRECT);
MAP_PASS("GlossInd", PASS_GLOSSY_INDIRECT);
MAP_PASS("TransInd", PASS_TRANSMISSION_INDIRECT);
- MAP_PASS("SubsurfaceInd", PASS_SUBSURFACE_INDIRECT);
MAP_PASS("VolumeInd", PASS_VOLUME_INDIRECT);
MAP_PASS("DiffCol", PASS_DIFFUSE_COLOR);
MAP_PASS("GlossCol", PASS_GLOSSY_COLOR);
MAP_PASS("TransCol", PASS_TRANSMISSION_COLOR);
- MAP_PASS("SubsurfaceCol", PASS_SUBSURFACE_COLOR);
MAP_PASS("Emit", PASS_EMISSION);
MAP_PASS("Env", PASS_BACKGROUND);
@@ -482,6 +503,8 @@ PassType BlenderSync::get_pass_type(BL::RenderPass &b_pass)
MAP_PASS("Debug Ray Bounces", PASS_RAY_BOUNCES);
#endif
MAP_PASS("Debug Render Time", PASS_RENDER_TIME);
+ MAP_PASS("AdaptiveAuxBuffer", PASS_ADAPTIVE_AUX_BUFFER);
+ MAP_PASS("Debug Sample Count", PASS_SAMPLE_COUNT);
if (string_startswith(name, cryptomatte_prefix)) {
return PASS_CRYPTOMATTE;
}
@@ -517,7 +540,9 @@ int BlenderSync::get_denoising_pass(BL::RenderPass &b_pass)
return -1;
}
-vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLayer &b_view_layer)
+vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay,
+ BL::ViewLayer &b_view_layer,
+ bool adaptive_sampling)
{
vector<Pass> passes;
@@ -551,8 +576,6 @@ vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLa
MAP_OPTION("denoising_glossy_indirect", DENOISING_CLEAN_GLOSSY_IND);
MAP_OPTION("denoising_transmission_direct", DENOISING_CLEAN_TRANSMISSION_DIR);
MAP_OPTION("denoising_transmission_indirect", DENOISING_CLEAN_TRANSMISSION_IND);
- MAP_OPTION("denoising_subsurface_direct", DENOISING_CLEAN_SUBSURFACE_DIR);
- MAP_OPTION("denoising_subsurface_indirect", DENOISING_CLEAN_SUBSURFACE_IND);
#undef MAP_OPTION
}
b_engine.add_pass("Noisy Image", 4, "RGBA", b_view_layer.name().c_str());
@@ -595,6 +618,10 @@ vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLa
b_engine.add_pass("Debug Render Time", 1, "X", b_view_layer.name().c_str());
Pass::add(PASS_RENDER_TIME, passes, "Debug Render Time");
}
+ if (get_boolean(crp, "pass_debug_sample_count")) {
+ b_engine.add_pass("Debug Sample Count", 1, "X", b_view_layer.name().c_str());
+ Pass::add(PASS_SAMPLE_COUNT, passes, "Debug Sample Count");
+ }
if (get_boolean(crp, "use_pass_volume_direct")) {
b_engine.add_pass("VolumeDir", 3, "RGB", b_view_layer.name().c_str());
Pass::add(PASS_VOLUME_DIRECT, passes, "VolumeDir");
@@ -606,12 +633,12 @@ vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLa
/* Cryptomatte stores two ID/weight pairs per RGBA layer.
* User facing parameter is the number of pairs. */
- int crypto_depth = min(16, get_int(crp, "pass_crypto_depth")) / 2;
+ int crypto_depth = min(16, get_int(crp, "pass_crypto_depth"));
scene->film->cryptomatte_depth = crypto_depth;
scene->film->cryptomatte_passes = CRYPT_NONE;
if (get_boolean(crp, "use_pass_crypto_object")) {
- for (int i = 0; i < crypto_depth; ++i) {
- string passname = cryptomatte_prefix + string_printf("Object%02d", i);
+ for (int i = 0; i < crypto_depth; i += 2) {
+ string passname = cryptomatte_prefix + string_printf("Object%02d", i / 2);
b_engine.add_pass(passname.c_str(), 4, "RGBA", b_view_layer.name().c_str());
Pass::add(PASS_CRYPTOMATTE, passes, passname.c_str());
}
@@ -619,8 +646,8 @@ vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLa
CRYPT_OBJECT);
}
if (get_boolean(crp, "use_pass_crypto_material")) {
- for (int i = 0; i < crypto_depth; ++i) {
- string passname = cryptomatte_prefix + string_printf("Material%02d", i);
+ for (int i = 0; i < crypto_depth; i += 2) {
+ string passname = cryptomatte_prefix + string_printf("Material%02d", i / 2);
b_engine.add_pass(passname.c_str(), 4, "RGBA", b_view_layer.name().c_str());
Pass::add(PASS_CRYPTOMATTE, passes, passname.c_str());
}
@@ -628,8 +655,8 @@ vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLa
CRYPT_MATERIAL);
}
if (get_boolean(crp, "use_pass_crypto_asset")) {
- for (int i = 0; i < crypto_depth; ++i) {
- string passname = cryptomatte_prefix + string_printf("Asset%02d", i);
+ for (int i = 0; i < crypto_depth; i += 2) {
+ string passname = cryptomatte_prefix + string_printf("Asset%02d", i / 2);
b_engine.add_pass(passname.c_str(), 4, "RGBA", b_view_layer.name().c_str());
Pass::add(PASS_CRYPTOMATTE, passes, passname.c_str());
}
@@ -641,6 +668,13 @@ vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLa
CRYPT_ACCURATE);
}
+ if (adaptive_sampling) {
+ Pass::add(PASS_ADAPTIVE_AUX_BUFFER, passes);
+ if (!get_boolean(crp, "pass_debug_sample_count")) {
+ Pass::add(PASS_SAMPLE_COUNT, passes);
+ }
+ }
+
RNA_BEGIN (&crp, b_aov, "aovs") {
bool is_color = (get_enum(b_aov, "type") == 1);
string name = get_string(b_aov, "name");
@@ -832,6 +866,7 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine &b_engine,
/* other parameters */
params.start_resolution = get_int(cscene, "preview_start_resolution");
+ params.denoising_start_sample = get_int(cscene, "preview_denoising_start_sample");
params.pixel_size = b_engine.get_preview_pixel_size(b_scene);
/* other parameters */
@@ -841,20 +876,10 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine &b_engine,
/* progressive refine */
BL::RenderSettings b_r = b_scene.render();
- params.progressive_refine = (b_engine.is_preview() ||
- get_boolean(cscene, "use_progressive_refine")) &&
- !b_r.use_save_buffers();
-
- if (params.progressive_refine) {
- BL::Scene::view_layers_iterator b_view_layer;
- for (b_scene.view_layers.begin(b_view_layer); b_view_layer != b_scene.view_layers.end();
- ++b_view_layer) {
- PointerRNA crl = RNA_pointer_get(&b_view_layer->ptr, "cycles");
- if (get_boolean(crl, "use_denoising")) {
- params.progressive_refine = false;
- }
- }
- }
+ params.progressive_refine = b_engine.is_preview() ||
+ get_boolean(cscene, "use_progressive_refine");
+ if (b_r.use_save_buffers())
+ params.progressive_refine = false;
if (background) {
if (params.progressive_refine)
@@ -889,6 +914,8 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine &b_engine,
params.use_profiling = params.device.has_profiling && !b_engine.is_preview() && background &&
BlenderSession::print_render_stats;
+ params.adaptive_sampling = RNA_boolean_get(&cscene, "use_adaptive_sampling");
+
return params;
}
diff --git a/intern/cycles/blender/blender_sync.h b/intern/cycles/blender/blender_sync.h
index a80f484fb92..650b4f5bb4e 100644
--- a/intern/cycles/blender/blender_sync.h
+++ b/intern/cycles/blender/blender_sync.h
@@ -18,11 +18,11 @@
#define __BLENDER_SYNC_H__
#include "MEM_guardedalloc.h"
-#include "RNA_types.h"
#include "RNA_access.h"
#include "RNA_blender_cpp.h"
+#include "RNA_types.h"
-#include "blender/blender_util.h"
+#include "blender/blender_id_map.h"
#include "blender/blender_viewport.h"
#include "render/scene.h"
@@ -40,6 +40,7 @@ class BlenderObjectCulling;
class BlenderViewportParameters;
class Camera;
class Film;
+class Hair;
class Light;
class Mesh;
class Object;
@@ -70,7 +71,9 @@ class BlenderSync {
int height,
void **python_thread_state);
void sync_view_layer(BL::SpaceView3D &b_v3d, BL::ViewLayer &b_view_layer);
- vector<Pass> sync_render_passes(BL::RenderLayer &b_render_layer, BL::ViewLayer &b_view_layer);
+ vector<Pass> sync_render_passes(BL::RenderLayer &b_render_layer,
+ BL::ViewLayer &b_view_layer,
+ bool adaptive_sampling);
void sync_integrator();
void sync_camera(BL::RenderSettings &b_render,
BL::Object &b_override,
@@ -94,7 +97,8 @@ class BlenderSync {
BL::Scene &b_scene,
bool background);
static bool get_session_pause(BL::Scene &b_scene, bool background);
- static BufferParams get_buffer_params(BL::RenderSettings &b_render,
+ static BufferParams get_buffer_params(BL::Scene &b_scene,
+ BL::RenderSettings &b_render,
BL::SpaceView3D &b_v3d,
BL::RegionView3D &b_rv3d,
Camera *cam,
@@ -118,28 +122,64 @@ class BlenderSync {
void **python_thread_state);
void sync_film(BL::SpaceView3D &b_v3d);
void sync_view();
+
+ /* Shader */
void sync_world(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d, bool update_all);
void sync_shaders(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d);
- void sync_curve_settings();
-
void sync_nodes(Shader *shader, BL::ShaderNodeTree &b_ntree);
- Mesh *sync_mesh(BL::Depsgraph &b_depsgrpah,
- BL::Object &b_ob,
- BL::Object &b_ob_instance,
- bool object_updated,
- bool show_self,
- bool show_particles);
- void sync_curves(
- Mesh *mesh, BL::Mesh &b_mesh, BL::Object &b_ob, bool motion, int motion_step = 0);
+
+ /* Object */
Object *sync_object(BL::Depsgraph &b_depsgraph,
BL::ViewLayer &b_view_layer,
BL::DepsgraphObjectInstance &b_instance,
float motion_time,
- bool show_self,
- bool show_particles,
+ bool use_particle_hair,
bool show_lights,
BlenderObjectCulling &culling,
bool *use_portal);
+
+ /* Volume */
+ void sync_volume(BL::Object &b_ob, Mesh *mesh, const vector<Shader *> &used_shaders);
+
+ /* Mesh */
+ void sync_mesh(BL::Depsgraph b_depsgraph,
+ BL::Object b_ob,
+ Mesh *mesh,
+ const vector<Shader *> &used_shaders);
+ void sync_mesh_motion(BL::Depsgraph b_depsgraph, BL::Object b_ob, Mesh *mesh, int motion_step);
+
+ /* Hair */
+ void sync_hair(BL::Depsgraph b_depsgraph,
+ BL::Object b_ob,
+ Geometry *geom,
+ const vector<Shader *> &used_shaders);
+ void sync_hair_motion(BL::Depsgraph b_depsgraph,
+ BL::Object b_ob,
+ Geometry *geom,
+ int motion_step);
+ void sync_hair(Hair *hair, BL::Object &b_ob, bool motion, int motion_step = 0);
+ void sync_particle_hair(
+ Geometry *geom, BL::Mesh &b_mesh, BL::Object &b_ob, bool motion, int motion_step = 0);
+ void sync_curve_settings();
+ bool object_has_particle_hair(BL::Object b_ob);
+
+ /* Camera */
+ void sync_camera_motion(
+ BL::RenderSettings &b_render, BL::Object &b_ob, int width, int height, float motion_time);
+
+ /* Geometry */
+ Geometry *sync_geometry(BL::Depsgraph &b_depsgrpah,
+ BL::Object &b_ob,
+ BL::Object &b_ob_instance,
+ bool object_updated,
+ bool use_particle_hair);
+ void sync_geometry_motion(BL::Depsgraph &b_depsgraph,
+ BL::Object &b_ob,
+ Object *object,
+ float motion_time,
+ bool use_particle_hair);
+
+ /* Light */
void sync_light(BL::Object &b_parent,
int persistent_id[OBJECT_PERSISTENT_ID_SIZE],
BL::Object &b_ob,
@@ -148,14 +188,8 @@ class BlenderSync {
Transform &tfm,
bool *use_portal);
void sync_background_light(BL::SpaceView3D &b_v3d, bool use_portal);
- void sync_mesh_motion(BL::Depsgraph &b_depsgraph,
- BL::Object &b_ob,
- Object *object,
- float motion_time);
- void sync_camera_motion(
- BL::RenderSettings &b_render, BL::Object &b_ob, int width, int height, float motion_time);
- /* particles */
+ /* Particles */
bool sync_dupli_particle(BL::Object &b_ob,
BL::DepsgraphObjectInstance &b_instance,
Object *object);
@@ -179,11 +213,11 @@ class BlenderSync {
id_map<void *, Shader> shader_map;
id_map<ObjectKey, Object> object_map;
- id_map<void *, Mesh> mesh_map;
+ id_map<GeometryKey, Geometry> geometry_map;
id_map<ObjectKey, Light> light_map;
id_map<ParticleSystemKey, ParticleSystem> particle_system_map;
- set<Mesh *> mesh_synced;
- set<Mesh *> mesh_motion_synced;
+ set<Geometry *> geometry_synced;
+ set<Geometry *> geometry_motion_synced;
set<float> motion_times;
void *world_map;
bool world_recalc;
@@ -203,6 +237,7 @@ class BlenderSync {
use_background_ao(true),
use_surfaces(true),
use_hair(true),
+ use_volumes(true),
samples(0),
bound_samples(false)
{
@@ -214,6 +249,7 @@ class BlenderSync {
bool use_background_ao;
bool use_surfaces;
bool use_hair;
+ bool use_volumes;
int samples;
bool bound_samples;
} view_layer;
diff --git a/intern/cycles/blender/blender_texture.h b/intern/cycles/blender/blender_texture.h
index 896bf62da70..8ab061aaed9 100644
--- a/intern/cycles/blender/blender_texture.h
+++ b/intern/cycles/blender/blender_texture.h
@@ -17,8 +17,8 @@
#ifndef __BLENDER_TEXTURE_H__
#define __BLENDER_TEXTURE_H__
-#include <stdlib.h>
#include "blender/blender_sync.h"
+#include <stdlib.h>
CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/blender/blender_util.h b/intern/cycles/blender/blender_util.h
index bea30a20b8c..ad90a5f8d52 100644
--- a/intern/cycles/blender/blender_util.h
+++ b/intern/cycles/blender/blender_util.h
@@ -483,7 +483,9 @@ static inline void mesh_texture_space(BL::Mesh &b_mesh, float3 &loc, float3 &siz
}
/* Object motion steps, returns 0 if no motion blur needed. */
-static inline uint object_motion_steps(BL::Object &b_parent, BL::Object &b_ob)
+static inline uint object_motion_steps(BL::Object &b_parent,
+ BL::Object &b_ob,
+ const int max_steps = INT_MAX)
{
/* Get motion enabled and steps from object itself. */
PointerRNA cobject = RNA_pointer_get(&b_ob.ptr, "cycles");
@@ -492,7 +494,7 @@ static inline uint object_motion_steps(BL::Object &b_parent, BL::Object &b_ob)
return 0;
}
- uint steps = max(1, get_int(cobject, "motion_steps"));
+ int steps = max(1, get_int(cobject, "motion_steps"));
/* Also check parent object, so motion blur and steps can be
* controlled by dupligroup duplicator for linked groups. */
@@ -510,7 +512,7 @@ static inline uint object_motion_steps(BL::Object &b_parent, BL::Object &b_ob)
/* Use uneven number of steps so we get one keyframe at the current frame,
* and use 2^(steps - 1) so objects with more/fewer steps still have samples
* at the same times, to avoid sampling at many different times. */
- return (2 << (steps - 1)) + 1;
+ return min((2 << (steps - 1)) + 1, max_steps);
}
/* object uses deformation motion blur */
@@ -531,7 +533,7 @@ static inline bool object_use_deform_motion(BL::Object &b_parent, BL::Object &b_
return use_deform_motion;
}
-static inline BL::FluidDomainSettings object_fluid_domain_find(BL::Object &b_ob)
+static inline BL::FluidDomainSettings object_fluid_liquid_domain_find(BL::Object &b_ob)
{
BL::Object::modifiers_iterator b_mod;
@@ -539,8 +541,28 @@ static inline BL::FluidDomainSettings object_fluid_domain_find(BL::Object &b_ob)
if (b_mod->is_a(&RNA_FluidModifier)) {
BL::FluidModifier b_mmd(*b_mod);
- if (b_mmd.fluid_type() == BL::FluidModifier::fluid_type_DOMAIN)
+ if (b_mmd.fluid_type() == BL::FluidModifier::fluid_type_DOMAIN &&
+ b_mmd.domain_settings().domain_type() == BL::FluidDomainSettings::domain_type_LIQUID) {
return b_mmd.domain_settings();
+ }
+ }
+ }
+
+ return BL::FluidDomainSettings(PointerRNA_NULL);
+}
+
+static inline BL::FluidDomainSettings object_fluid_gas_domain_find(BL::Object &b_ob)
+{
+ BL::Object::modifiers_iterator b_mod;
+
+ for (b_ob.modifiers.begin(b_mod); b_mod != b_ob.modifiers.end(); ++b_mod) {
+ if (b_mod->is_a(&RNA_FluidModifier)) {
+ BL::FluidModifier b_mmd(*b_mod);
+
+ if (b_mmd.fluid_type() == BL::FluidModifier::fluid_type_DOMAIN &&
+ b_mmd.domain_settings().domain_type() == BL::FluidDomainSettings::domain_type_GAS) {
+ return b_mmd.domain_settings();
+ }
}
}
@@ -573,209 +595,20 @@ static inline Mesh::SubdivisionType object_subdivision_type(BL::Object &b_ob,
return Mesh::SUBDIVISION_NONE;
}
-/* ID Map
- *
- * Utility class to keep in sync with blender data.
- * Used for objects, meshes, lights and shaders. */
-
-template<typename K, typename T> class id_map {
- public:
- id_map(vector<T *> *scene_data_)
- {
- scene_data = scene_data_;
- }
-
- T *find(const BL::ID &id)
- {
- return find(id.ptr.owner_id);
- }
-
- T *find(const K &key)
- {
- if (b_map.find(key) != b_map.end()) {
- T *data = b_map[key];
- return data;
- }
-
- return NULL;
- }
-
- void set_recalc(const BL::ID &id)
- {
- b_recalc.insert(id.ptr.data);
- }
-
- void set_recalc(void *id_ptr)
- {
- b_recalc.insert(id_ptr);
- }
-
- bool has_recalc()
- {
- return !(b_recalc.empty());
- }
-
- void pre_sync()
- {
- used_set.clear();
- }
-
- bool sync(T **r_data, const BL::ID &id)
- {
- return sync(r_data, id, id, id.ptr.owner_id);
- }
-
- bool sync(T **r_data, const BL::ID &id, const BL::ID &parent, const K &key)
- {
- T *data = find(key);
- bool recalc;
-
- if (!data) {
- /* add data if it didn't exist yet */
- data = new T();
- scene_data->push_back(data);
- b_map[key] = data;
- recalc = true;
- }
- else {
- recalc = (b_recalc.find(id.ptr.data) != b_recalc.end());
- if (parent.ptr.data)
- recalc = recalc || (b_recalc.find(parent.ptr.data) != b_recalc.end());
- }
-
- used(data);
-
- *r_data = data;
- return recalc;
- }
-
- bool is_used(const K &key)
- {
- T *data = find(key);
- return (data) ? used_set.find(data) != used_set.end() : false;
- }
-
- void used(T *data)
- {
- /* tag data as still in use */
- used_set.insert(data);
- }
-
- void set_default(T *data)
- {
- b_map[NULL] = data;
- }
-
- bool post_sync(bool do_delete = true)
- {
- /* remove unused data */
- vector<T *> new_scene_data;
- typename vector<T *>::iterator it;
- bool deleted = false;
-
- for (it = scene_data->begin(); it != scene_data->end(); it++) {
- T *data = *it;
-
- if (do_delete && used_set.find(data) == used_set.end()) {
- delete data;
- deleted = true;
- }
- else
- new_scene_data.push_back(data);
- }
-
- *scene_data = new_scene_data;
-
- /* update mapping */
- map<K, T *> new_map;
- typedef pair<const K, T *> TMapPair;
- typename map<K, T *>::iterator jt;
-
- for (jt = b_map.begin(); jt != b_map.end(); jt++) {
- TMapPair &pair = *jt;
-
- if (used_set.find(pair.second) != used_set.end())
- new_map[pair.first] = pair.second;
- }
-
- used_set.clear();
- b_recalc.clear();
- b_map = new_map;
-
- return deleted;
- }
-
- const map<K, T *> &key_to_scene_data()
- {
- return b_map;
- }
-
- protected:
- vector<T *> *scene_data;
- map<K, T *> b_map;
- set<T *> used_set;
- set<void *> b_recalc;
-};
-
-/* Object Key */
-
-enum { OBJECT_PERSISTENT_ID_SIZE = 16 };
-
-struct ObjectKey {
- void *parent;
- int id[OBJECT_PERSISTENT_ID_SIZE];
- void *ob;
-
- ObjectKey(void *parent_, int id_[OBJECT_PERSISTENT_ID_SIZE], void *ob_)
- : parent(parent_), ob(ob_)
- {
- if (id_)
- memcpy(id, id_, sizeof(id));
- else
- memset(id, 0, sizeof(id));
- }
-
- bool operator<(const ObjectKey &k) const
- {
- if (ob < k.ob) {
- return true;
- }
- else if (ob == k.ob) {
- if (parent < k.parent)
- return true;
- else if (parent == k.parent)
- return memcmp(id, k.id, sizeof(id)) < 0;
- }
-
- return false;
- }
-};
-
-/* Particle System Key */
-
-struct ParticleSystemKey {
- void *ob;
- int id[OBJECT_PERSISTENT_ID_SIZE];
-
- ParticleSystemKey(void *ob_, int id_[OBJECT_PERSISTENT_ID_SIZE]) : ob(ob_)
- {
- if (id_)
- memcpy(id, id_, sizeof(id));
- else
- memset(id, 0, sizeof(id));
- }
+static inline uint object_ray_visibility(BL::Object &b_ob)
+{
+ PointerRNA cvisibility = RNA_pointer_get(&b_ob.ptr, "cycles_visibility");
+ uint flag = 0;
- bool operator<(const ParticleSystemKey &k) const
- {
- /* first id is particle index, we don't compare that */
- if (ob < k.ob)
- return true;
- else if (ob == k.ob)
- return memcmp(id + 1, k.id + 1, sizeof(int) * (OBJECT_PERSISTENT_ID_SIZE - 1)) < 0;
+ flag |= get_boolean(cvisibility, "camera") ? PATH_RAY_CAMERA : 0;
+ flag |= get_boolean(cvisibility, "diffuse") ? PATH_RAY_DIFFUSE : 0;
+ flag |= get_boolean(cvisibility, "glossy") ? PATH_RAY_GLOSSY : 0;
+ flag |= get_boolean(cvisibility, "transmission") ? PATH_RAY_TRANSMIT : 0;
+ flag |= get_boolean(cvisibility, "shadow") ? PATH_RAY_SHADOW : 0;
+ flag |= get_boolean(cvisibility, "scatter") ? PATH_RAY_VOLUME_SCATTER : 0;
- return false;
- }
-};
+ return flag;
+}
class EdgeMap {
public:
diff --git a/intern/cycles/blender/blender_viewport.cpp b/intern/cycles/blender/blender_viewport.cpp
index 73ef5f94720..93e84e28032 100644
--- a/intern/cycles/blender/blender_viewport.cpp
+++ b/intern/cycles/blender/blender_viewport.cpp
@@ -61,6 +61,17 @@ const bool BlenderViewportParameters::custom_viewport_parameters() const
return !(use_scene_world && use_scene_lights);
}
+bool BlenderViewportParameters::get_viewport_display_denoising(BL::SpaceView3D &b_v3d,
+ BL::Scene &b_scene)
+{
+ bool use_denoising = false;
+ if (b_v3d) {
+ PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
+ use_denoising = get_enum(cscene, "preview_denoising") != 0;
+ }
+ return use_denoising;
+}
+
PassType BlenderViewportParameters::get_viewport_display_render_pass(BL::SpaceView3D &b_v3d)
{
PassType display_pass = PASS_NONE;
@@ -72,6 +83,11 @@ PassType BlenderViewportParameters::get_viewport_display_render_pass(BL::SpaceVi
return display_pass;
}
+bool update_viewport_display_denoising(BL::SpaceView3D &b_v3d, BL::Scene &b_scene)
+{
+ return BlenderViewportParameters::get_viewport_display_denoising(b_v3d, b_scene);
+}
+
PassType update_viewport_display_passes(BL::SpaceView3D &b_v3d, vector<Pass> &passes)
{
if (b_v3d) {
diff --git a/intern/cycles/blender/blender_viewport.h b/intern/cycles/blender/blender_viewport.h
index f26d0d38115..3e44e552f1d 100644
--- a/intern/cycles/blender/blender_viewport.h
+++ b/intern/cycles/blender/blender_viewport.h
@@ -18,9 +18,9 @@
#define __BLENDER_VIEWPORT_H__
#include "MEM_guardedalloc.h"
-#include "RNA_types.h"
#include "RNA_access.h"
#include "RNA_blender_cpp.h"
+#include "RNA_types.h"
#include "render/film.h"
#include "util/util_param.h"
@@ -44,11 +44,15 @@ class BlenderViewportParameters {
friend class BlenderSync;
public:
+ /* Get whether to enable denoising data pass in viewport. */
+ static bool get_viewport_display_denoising(BL::SpaceView3D &b_v3d, BL::Scene &b_scene);
/* Retrieve the render pass that needs to be displayed on the given `SpaceView3D`
* When the `b_v3d` parameter is not given `PASS_NONE` will be returned. */
static PassType get_viewport_display_render_pass(BL::SpaceView3D &b_v3d);
};
+bool update_viewport_display_denoising(BL::SpaceView3D &b_v3d, BL::Scene &b_scene);
+
PassType update_viewport_display_passes(BL::SpaceView3D &b_v3d, vector<Pass> &passes);
CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/blender_volume.cpp b/intern/cycles/blender/blender_volume.cpp
new file mode 100644
index 00000000000..6254a1a1b24
--- /dev/null
+++ b/intern/cycles/blender/blender_volume.cpp
@@ -0,0 +1,379 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "render/colorspace.h"
+#include "render/image.h"
+#include "render/image_vdb.h"
+#include "render/mesh.h"
+#include "render/object.h"
+
+#include "blender/blender_sync.h"
+#include "blender/blender_util.h"
+
+#ifdef WITH_OPENVDB
+# include <openvdb/openvdb.h>
+openvdb::GridBase::ConstPtr BKE_volume_grid_openvdb_for_read(const struct Volume *volume,
+ struct VolumeGrid *grid);
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+/* TODO: verify this is not loading unnecessary attributes. */
+class BlenderSmokeLoader : public ImageLoader {
+ public:
+ BlenderSmokeLoader(const BL::Object &b_ob, AttributeStandard attribute)
+ : b_ob(b_ob), attribute(attribute)
+ {
+ }
+
+ bool load_metadata(ImageMetaData &metadata) override
+ {
+ BL::FluidDomainSettings b_domain = object_fluid_gas_domain_find(b_ob);
+
+ if (!b_domain) {
+ return false;
+ }
+
+ if (attribute == ATTR_STD_VOLUME_DENSITY || attribute == ATTR_STD_VOLUME_FLAME ||
+ attribute == ATTR_STD_VOLUME_HEAT || attribute == ATTR_STD_VOLUME_TEMPERATURE) {
+ metadata.type = IMAGE_DATA_TYPE_FLOAT;
+ metadata.channels = 1;
+ }
+ else if (attribute == ATTR_STD_VOLUME_COLOR) {
+ metadata.type = IMAGE_DATA_TYPE_FLOAT4;
+ metadata.channels = 4;
+ }
+ else if (attribute == ATTR_STD_VOLUME_VELOCITY) {
+ metadata.type = IMAGE_DATA_TYPE_FLOAT4;
+ metadata.channels = 3;
+ }
+ else {
+ return false;
+ }
+
+ int3 resolution = get_int3(b_domain.domain_resolution());
+ int amplify = (b_domain.use_noise()) ? b_domain.noise_scale() : 1;
+
+ /* Velocity and heat data is always low-resolution. */
+ if (attribute == ATTR_STD_VOLUME_VELOCITY || attribute == ATTR_STD_VOLUME_HEAT) {
+ amplify = 1;
+ }
+
+ metadata.width = resolution.x * amplify;
+ metadata.height = resolution.y * amplify;
+ metadata.depth = resolution.z * amplify;
+
+ /* Create a matrix to transform from object space to mesh texture space.
+ * This does not work with deformations but that can probably only be done
+ * well with a volume grid mapping of coordinates. */
+ BL::Mesh b_mesh(b_ob.data());
+ float3 loc, size;
+ mesh_texture_space(b_mesh, loc, size);
+ metadata.transform_3d = transform_translate(-loc) * transform_scale(size);
+ metadata.use_transform_3d = true;
+
+ return true;
+ }
+
+ bool load_pixels(const ImageMetaData &, void *pixels, const size_t, const bool) override
+ {
+ /* smoke volume data */
+ BL::FluidDomainSettings b_domain = object_fluid_gas_domain_find(b_ob);
+
+ if (!b_domain) {
+ return false;
+ }
+#ifdef WITH_FLUID
+ int3 resolution = get_int3(b_domain.domain_resolution());
+ int length, amplify = (b_domain.use_noise()) ? b_domain.noise_scale() : 1;
+
+ /* Velocity and heat data is always low-resolution. */
+ if (attribute == ATTR_STD_VOLUME_VELOCITY || attribute == ATTR_STD_VOLUME_HEAT) {
+ amplify = 1;
+ }
+
+ const int width = resolution.x * amplify;
+ const int height = resolution.y * amplify;
+ const int depth = resolution.z * amplify;
+ const size_t num_pixels = ((size_t)width) * height * depth;
+
+ float *fpixels = (float *)pixels;
+
+ if (attribute == ATTR_STD_VOLUME_DENSITY) {
+ FluidDomainSettings_density_grid_get_length(&b_domain.ptr, &length);
+ if (length == num_pixels) {
+ FluidDomainSettings_density_grid_get(&b_domain.ptr, fpixels);
+ return true;
+ }
+ }
+ else if (attribute == ATTR_STD_VOLUME_FLAME) {
+ /* this is in range 0..1, and interpreted by the OpenGL smoke viewer
+ * as 1500..3000 K with the first part faded to zero density */
+ FluidDomainSettings_flame_grid_get_length(&b_domain.ptr, &length);
+ if (length == num_pixels) {
+ FluidDomainSettings_flame_grid_get(&b_domain.ptr, fpixels);
+ return true;
+ }
+ }
+ else if (attribute == ATTR_STD_VOLUME_COLOR) {
+ /* the RGB is "premultiplied" by density for better interpolation results */
+ FluidDomainSettings_color_grid_get_length(&b_domain.ptr, &length);
+ if (length == num_pixels * 4) {
+ FluidDomainSettings_color_grid_get(&b_domain.ptr, fpixels);
+ return true;
+ }
+ }
+ else if (attribute == ATTR_STD_VOLUME_VELOCITY) {
+ FluidDomainSettings_velocity_grid_get_length(&b_domain.ptr, &length);
+ if (length == num_pixels * 3) {
+ FluidDomainSettings_velocity_grid_get(&b_domain.ptr, fpixels);
+ return true;
+ }
+ }
+ else if (attribute == ATTR_STD_VOLUME_HEAT) {
+ FluidDomainSettings_heat_grid_get_length(&b_domain.ptr, &length);
+ if (length == num_pixels) {
+ FluidDomainSettings_heat_grid_get(&b_domain.ptr, fpixels);
+ return true;
+ }
+ }
+ else if (attribute == ATTR_STD_VOLUME_TEMPERATURE) {
+ FluidDomainSettings_temperature_grid_get_length(&b_domain.ptr, &length);
+ if (length == num_pixels) {
+ FluidDomainSettings_temperature_grid_get(&b_domain.ptr, fpixels);
+ return true;
+ }
+ }
+ else {
+ fprintf(stderr,
+ "Cycles error: unknown volume attribute %s, skipping\n",
+ Attribute::standard_name(attribute));
+ fpixels[0] = 0.0f;
+ return false;
+ }
+#else
+ (void)pixels;
+#endif
+ fprintf(stderr, "Cycles error: unexpected smoke volume resolution, skipping\n");
+ return false;
+ }
+
+ string name() const override
+ {
+ return Attribute::standard_name(attribute);
+ }
+
+ bool equals(const ImageLoader &other) const override
+ {
+ const BlenderSmokeLoader &other_loader = (const BlenderSmokeLoader &)other;
+ return b_ob == other_loader.b_ob && attribute == other_loader.attribute;
+ }
+
+ BL::Object b_ob;
+ AttributeStandard attribute;
+};
+
+static void sync_smoke_volume(Scene *scene, BL::Object &b_ob, Mesh *mesh, float frame)
+{
+ BL::FluidDomainSettings b_domain = object_fluid_gas_domain_find(b_ob);
+ if (!b_domain) {
+ return;
+ }
+
+ AttributeStandard attributes[] = {ATTR_STD_VOLUME_DENSITY,
+ ATTR_STD_VOLUME_COLOR,
+ ATTR_STD_VOLUME_FLAME,
+ ATTR_STD_VOLUME_HEAT,
+ ATTR_STD_VOLUME_TEMPERATURE,
+ ATTR_STD_VOLUME_VELOCITY,
+ ATTR_STD_NONE};
+
+ for (int i = 0; attributes[i] != ATTR_STD_NONE; i++) {
+ AttributeStandard std = attributes[i];
+ if (!mesh->need_attribute(scene, std)) {
+ continue;
+ }
+
+ mesh->volume_clipping = b_domain.clipping();
+
+ Attribute *attr = mesh->attributes.add(std);
+
+ ImageLoader *loader = new BlenderSmokeLoader(b_ob, std);
+ ImageParams params;
+ params.frame = frame;
+
+ attr->data_voxel() = scene->image_manager->add_image(loader, params);
+ }
+}
+
+class BlenderVolumeLoader : public VDBImageLoader {
+ public:
+ BlenderVolumeLoader(BL::Volume b_volume, const string &grid_name)
+ : VDBImageLoader(grid_name),
+ b_volume(b_volume),
+ b_volume_grid(PointerRNA_NULL),
+ unload(false)
+ {
+#ifdef WITH_OPENVDB
+ /* Find grid with matching name. */
+ BL::Volume::grids_iterator b_grid_iter;
+ for (b_volume.grids.begin(b_grid_iter); b_grid_iter != b_volume.grids.end(); ++b_grid_iter) {
+ if (b_grid_iter->name() == grid_name) {
+ b_volume_grid = *b_grid_iter;
+ }
+ }
+#endif
+ }
+
+ bool load_metadata(ImageMetaData &metadata) override
+ {
+ if (!b_volume_grid) {
+ return false;
+ }
+
+ unload = !b_volume_grid.is_loaded();
+
+#ifdef WITH_OPENVDB
+ Volume *volume = (Volume *)b_volume.ptr.data;
+ VolumeGrid *volume_grid = (VolumeGrid *)b_volume_grid.ptr.data;
+ grid = BKE_volume_grid_openvdb_for_read(volume, volume_grid);
+#endif
+
+ return VDBImageLoader::load_metadata(metadata);
+ }
+
+ bool load_pixels(const ImageMetaData &metadata,
+ void *pixels,
+ const size_t pixel_size,
+ const bool associate_alpha) override
+ {
+ if (!b_volume_grid) {
+ return false;
+ }
+
+ return VDBImageLoader::load_pixels(metadata, pixels, pixel_size, associate_alpha);
+ }
+
+ bool equals(const ImageLoader &other) const override
+ {
+ /* TODO: detect multiple volume datablocks with the same filepath. */
+ const BlenderVolumeLoader &other_loader = (const BlenderVolumeLoader &)other;
+ return b_volume == other_loader.b_volume && b_volume_grid == other_loader.b_volume_grid;
+ }
+
+ void cleanup() override
+ {
+ VDBImageLoader::cleanup();
+ if (b_volume_grid && unload) {
+ b_volume_grid.unload();
+ }
+ }
+
+ BL::Volume b_volume;
+ BL::VolumeGrid b_volume_grid;
+ bool unload;
+};
+
+static void sync_volume_object(BL::BlendData &b_data, BL::Object &b_ob, Scene *scene, Mesh *mesh)
+{
+ BL::Volume b_volume(b_ob.data());
+ b_volume.grids.load(b_data.ptr.data);
+
+ BL::VolumeRender b_render(b_volume.render());
+
+ mesh->volume_clipping = b_render.clipping();
+ mesh->volume_step_size = b_render.step_size();
+ mesh->volume_object_space = (b_render.space() == BL::VolumeRender::space_OBJECT);
+
+ /* Find grid with matching name. */
+ BL::Volume::grids_iterator b_grid_iter;
+ for (b_volume.grids.begin(b_grid_iter); b_grid_iter != b_volume.grids.end(); ++b_grid_iter) {
+ BL::VolumeGrid b_grid = *b_grid_iter;
+ ustring name = ustring(b_grid.name());
+ AttributeStandard std = ATTR_STD_NONE;
+
+ if (name == Attribute::standard_name(ATTR_STD_VOLUME_DENSITY)) {
+ std = ATTR_STD_VOLUME_DENSITY;
+ }
+ else if (name == Attribute::standard_name(ATTR_STD_VOLUME_COLOR)) {
+ std = ATTR_STD_VOLUME_COLOR;
+ }
+ else if (name == Attribute::standard_name(ATTR_STD_VOLUME_FLAME)) {
+ std = ATTR_STD_VOLUME_FLAME;
+ }
+ else if (name == Attribute::standard_name(ATTR_STD_VOLUME_HEAT)) {
+ std = ATTR_STD_VOLUME_HEAT;
+ }
+ else if (name == Attribute::standard_name(ATTR_STD_VOLUME_TEMPERATURE)) {
+ std = ATTR_STD_VOLUME_TEMPERATURE;
+ }
+ else if (name == Attribute::standard_name(ATTR_STD_VOLUME_VELOCITY)) {
+ std = ATTR_STD_VOLUME_VELOCITY;
+ }
+
+ if ((std != ATTR_STD_NONE && mesh->need_attribute(scene, std)) ||
+ mesh->need_attribute(scene, name)) {
+ Attribute *attr = (std != ATTR_STD_NONE) ?
+ mesh->attributes.add(std) :
+ mesh->attributes.add(name, TypeDesc::TypeFloat, ATTR_ELEMENT_VOXEL);
+
+ ImageLoader *loader = new BlenderVolumeLoader(b_volume, name.string());
+ ImageParams params;
+ params.frame = b_volume.grids.frame();
+
+ attr->data_voxel() = scene->image_manager->add_image(loader, params);
+ }
+ }
+}
+
+/* If the voxel attributes change, we need to rebuild the bounding mesh. */
+static vector<int> get_voxel_image_slots(Mesh *mesh)
+{
+ vector<int> slots;
+ for (const Attribute &attr : mesh->attributes.attributes) {
+ if (attr.element == ATTR_ELEMENT_VOXEL) {
+ slots.push_back(attr.data_voxel().svm_slot());
+ }
+ }
+
+ return slots;
+}
+
+void BlenderSync::sync_volume(BL::Object &b_ob, Mesh *mesh, const vector<Shader *> &used_shaders)
+{
+ vector<int> old_voxel_slots = get_voxel_image_slots(mesh);
+
+ mesh->clear();
+ mesh->used_shaders = used_shaders;
+
+ if (view_layer.use_volumes) {
+ if (b_ob.type() == BL::Object::type_VOLUME) {
+ /* Volume object. Create only attributes, bounding mesh will then
+ * be automatically generated later. */
+ sync_volume_object(b_data, b_ob, scene, mesh);
+ }
+ else {
+ /* Smoke domain. */
+ sync_smoke_volume(scene, b_ob, mesh, b_scene.frame_current());
+ }
+ }
+
+ /* Tag update. */
+ bool rebuild = (old_voxel_slots != get_voxel_image_slots(mesh));
+ mesh->tag_update(scene, rebuild);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/bvh/bvh.cpp b/intern/cycles/bvh/bvh.cpp
index 16c721da06a..e6502a40313 100644
--- a/intern/cycles/bvh/bvh.cpp
+++ b/intern/cycles/bvh/bvh.cpp
@@ -17,6 +17,7 @@
#include "bvh/bvh.h"
+#include "render/hair.h"
#include "render/mesh.h"
#include "render/object.h"
@@ -99,31 +100,33 @@ int BVHStackEntry::encodeIdx() const
/* BVH */
-BVH::BVH(const BVHParams &params_, const vector<Mesh *> &meshes_, const vector<Object *> &objects_)
- : params(params_), meshes(meshes_), objects(objects_)
+BVH::BVH(const BVHParams &params_,
+ const vector<Geometry *> &geometry_,
+ const vector<Object *> &objects_)
+ : params(params_), geometry(geometry_), objects(objects_)
{
}
BVH *BVH::create(const BVHParams &params,
- const vector<Mesh *> &meshes,
+ const vector<Geometry *> &geometry,
const vector<Object *> &objects)
{
switch (params.bvh_layout) {
case BVH_LAYOUT_BVH2:
- return new BVH2(params, meshes, objects);
+ return new BVH2(params, geometry, objects);
case BVH_LAYOUT_BVH4:
- return new BVH4(params, meshes, objects);
+ return new BVH4(params, geometry, objects);
case BVH_LAYOUT_BVH8:
- return new BVH8(params, meshes, objects);
+ return new BVH8(params, geometry, objects);
case BVH_LAYOUT_EMBREE:
#ifdef WITH_EMBREE
- return new BVHEmbree(params, meshes, objects);
+ return new BVHEmbree(params, geometry, objects);
#else
break;
#endif
case BVH_LAYOUT_OPTIX:
#ifdef WITH_OPTIX
- return new BVHOptiX(params, meshes, objects);
+ return new BVHOptiX(params, geometry, objects);
#else
break;
#endif
@@ -217,36 +220,36 @@ void BVH::refit_primitives(int start, int end, BoundBox &bbox, uint &visibility)
}
else {
/* Primitives. */
- const Mesh *mesh = ob->mesh;
-
if (pack.prim_type[prim] & PRIMITIVE_ALL_CURVE) {
/* Curves. */
- int str_offset = (params.top_level) ? mesh->curve_offset : 0;
- Mesh::Curve curve = mesh->get_curve(pidx - str_offset);
+ const Hair *hair = static_cast<const Hair *>(ob->geometry);
+ int prim_offset = (params.top_level) ? hair->prim_offset : 0;
+ Hair::Curve curve = hair->get_curve(pidx - prim_offset);
int k = PRIMITIVE_UNPACK_SEGMENT(pack.prim_type[prim]);
- curve.bounds_grow(k, &mesh->curve_keys[0], &mesh->curve_radius[0], bbox);
+ curve.bounds_grow(k, &hair->curve_keys[0], &hair->curve_radius[0], bbox);
visibility |= PATH_RAY_CURVE;
/* Motion curves. */
- if (mesh->use_motion_blur) {
- Attribute *attr = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+ if (hair->use_motion_blur) {
+ Attribute *attr = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
if (attr) {
- size_t mesh_size = mesh->curve_keys.size();
- size_t steps = mesh->motion_steps - 1;
+ size_t hair_size = hair->curve_keys.size();
+ size_t steps = hair->motion_steps - 1;
float3 *key_steps = attr->data_float3();
for (size_t i = 0; i < steps; i++)
- curve.bounds_grow(k, key_steps + i * mesh_size, &mesh->curve_radius[0], bbox);
+ curve.bounds_grow(k, key_steps + i * hair_size, &hair->curve_radius[0], bbox);
}
}
}
else {
/* Triangles. */
- int tri_offset = (params.top_level) ? mesh->tri_offset : 0;
- Mesh::Triangle triangle = mesh->get_triangle(pidx - tri_offset);
+ const Mesh *mesh = static_cast<const Mesh *>(ob->geometry);
+ int prim_offset = (params.top_level) ? mesh->prim_offset : 0;
+ Mesh::Triangle triangle = mesh->get_triangle(pidx - prim_offset);
const float3 *vpos = &mesh->verts[0];
triangle.bounds_grow(vpos, bbox);
@@ -276,7 +279,7 @@ void BVH::pack_triangle(int idx, float4 tri_verts[3])
{
int tob = pack.prim_object[idx];
assert(tob >= 0 && tob < objects.size());
- const Mesh *mesh = objects[tob]->mesh;
+ const Mesh *mesh = static_cast<const Mesh *>(objects[tob]->geometry);
int tidx = pack.prim_index[idx];
Mesh::Triangle t = mesh->get_triangle(tidx);
@@ -347,15 +350,13 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
const bool use_obvh = (params.bvh_layout == BVH_LAYOUT_BVH8);
/* Adjust primitive index to point to the triangle in the global array, for
- * meshes with transform applied and already in the top level BVH.
+ * geometry with transform applied and already in the top level BVH.
*/
- for (size_t i = 0; i < pack.prim_index.size(); i++)
+ for (size_t i = 0; i < pack.prim_index.size(); i++) {
if (pack.prim_index[i] != -1) {
- if (pack.prim_type[i] & PRIMITIVE_ALL_CURVE)
- pack.prim_index[i] += objects[pack.prim_object[i]]->mesh->curve_offset;
- else
- pack.prim_index[i] += objects[pack.prim_object[i]]->mesh->tri_offset;
+ pack.prim_index[i] += objects[pack.prim_object[i]]->geometry->prim_offset;
}
+ }
/* track offsets of instanced BVH data in global array */
size_t prim_offset = pack.prim_index.size();
@@ -375,10 +376,10 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
size_t pack_leaf_nodes_offset = leaf_nodes_size;
size_t object_offset = 0;
- foreach (Mesh *mesh, meshes) {
- BVH *bvh = mesh->bvh;
+ foreach (Geometry *geom, geometry) {
+ BVH *bvh = geom->bvh;
- if (mesh->need_build_bvh(params.bvh_layout)) {
+ if (geom->need_build_bvh(params.bvh_layout)) {
prim_index_size += bvh->pack.prim_index.size();
prim_tri_verts_size += bvh->pack.prim_tri_verts.size();
nodes_size += bvh->pack.nodes.size();
@@ -410,36 +411,35 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
int4 *pack_leaf_nodes = (pack.leaf_nodes.size()) ? &pack.leaf_nodes[0] : NULL;
float2 *pack_prim_time = (pack.prim_time.size()) ? &pack.prim_time[0] : NULL;
- map<Mesh *, int> mesh_map;
+ map<Geometry *, int> geometry_map;
/* merge */
foreach (Object *ob, objects) {
- Mesh *mesh = ob->mesh;
+ Geometry *geom = ob->geometry;
/* We assume that if mesh doesn't need own BVH it was already included
* into a top-level BVH and no packing here is needed.
*/
- if (!mesh->need_build_bvh(params.bvh_layout)) {
+ if (!geom->need_build_bvh(params.bvh_layout)) {
pack.object_node[object_offset++] = 0;
continue;
}
/* if mesh already added once, don't add it again, but used set
* node offset for this object */
- map<Mesh *, int>::iterator it = mesh_map.find(mesh);
+ map<Geometry *, int>::iterator it = geometry_map.find(geom);
- if (mesh_map.find(mesh) != mesh_map.end()) {
+ if (geometry_map.find(geom) != geometry_map.end()) {
int noffset = it->second;
pack.object_node[object_offset++] = noffset;
continue;
}
- BVH *bvh = mesh->bvh;
+ BVH *bvh = geom->bvh;
int noffset = nodes_offset;
int noffset_leaf = nodes_leaf_offset;
- int mesh_tri_offset = mesh->tri_offset;
- int mesh_curve_offset = mesh->curve_offset;
+ int geom_prim_offset = geom->prim_offset;
/* fill in node indexes for instances */
if (bvh->pack.root_index == -1)
@@ -447,7 +447,7 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
else
pack.object_node[object_offset++] = noffset;
- mesh_map[mesh] = pack.object_node[object_offset - 1];
+ geometry_map[geom] = pack.object_node[object_offset - 1];
/* merge primitive, object and triangle indexes */
if (bvh->pack.prim_index.size()) {
@@ -460,11 +460,11 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
for (size_t i = 0; i < bvh_prim_index_size; i++) {
if (bvh->pack.prim_type[i] & PRIMITIVE_ALL_CURVE) {
- pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + mesh_curve_offset;
+ pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + geom_prim_offset;
pack_prim_tri_index[pack_prim_index_offset] = -1;
}
else {
- pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + mesh_tri_offset;
+ pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + geom_prim_offset;
pack_prim_tri_index[pack_prim_index_offset] = bvh_prim_tri_index[i] +
pack_prim_tri_verts_offset;
}
@@ -535,8 +535,9 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
/* Modify offsets into arrays */
int4 data = bvh_nodes[i + nsize_bbox];
- int4 data1 = bvh_nodes[i + nsize_bbox - 1];
+
if (use_obvh) {
+ int4 data1 = bvh_nodes[i + nsize_bbox - 1];
data.z += (data.z < 0) ? -noffset_leaf : noffset;
data.w += (data.w < 0) ? -noffset_leaf : noffset;
data.x += (data.x < 0) ? -noffset_leaf : noffset;
@@ -545,6 +546,8 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
data1.w += (data1.w < 0) ? -noffset_leaf : noffset;
data1.x += (data1.x < 0) ? -noffset_leaf : noffset;
data1.y += (data1.y < 0) ? -noffset_leaf : noffset;
+ pack_nodes[pack_nodes_offset + nsize_bbox] = data;
+ pack_nodes[pack_nodes_offset + nsize_bbox - 1] = data1;
}
else {
data.z += (data.z < 0) ? -noffset_leaf : noffset;
@@ -553,10 +556,7 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
data.x += (data.x < 0) ? -noffset_leaf : noffset;
data.y += (data.y < 0) ? -noffset_leaf : noffset;
}
- }
- pack_nodes[pack_nodes_offset + nsize_bbox] = data;
- if (use_obvh) {
- pack_nodes[pack_nodes_offset + nsize_bbox - 1] = data1;
+ pack_nodes[pack_nodes_offset + nsize_bbox] = data;
}
/* Usually this copies nothing, but we better
diff --git a/intern/cycles/bvh/bvh.h b/intern/cycles/bvh/bvh.h
index 92082e4de86..bdde38640c9 100644
--- a/intern/cycles/bvh/bvh.h
+++ b/intern/cycles/bvh/bvh.h
@@ -33,7 +33,7 @@ struct BVHStackEntry;
class BVHParams;
class BoundBox;
class LeafNode;
-class Mesh;
+class Geometry;
class Object;
class Progress;
@@ -84,11 +84,11 @@ class BVH {
public:
PackedBVH pack;
BVHParams params;
- vector<Mesh *> meshes;
+ vector<Geometry *> geometry;
vector<Object *> objects;
static BVH *create(const BVHParams &params,
- const vector<Mesh *> &meshes,
+ const vector<Geometry *> &geometry,
const vector<Object *> &objects);
virtual ~BVH()
{
@@ -102,7 +102,9 @@ class BVH {
void refit(Progress &progress);
protected:
- BVH(const BVHParams &params, const vector<Mesh *> &meshes, const vector<Object *> &objects);
+ BVH(const BVHParams &params,
+ const vector<Geometry *> &geometry,
+ const vector<Object *> &objects);
/* Refit range of primitives. */
void refit_primitives(int start, int end, BoundBox &bbox, uint &visibility);
diff --git a/intern/cycles/bvh/bvh2.cpp b/intern/cycles/bvh/bvh2.cpp
index b1a9148c297..c903070429e 100644
--- a/intern/cycles/bvh/bvh2.cpp
+++ b/intern/cycles/bvh/bvh2.cpp
@@ -26,9 +26,9 @@
CCL_NAMESPACE_BEGIN
BVH2::BVH2(const BVHParams &params_,
- const vector<Mesh *> &meshes_,
+ const vector<Geometry *> &geometry_,
const vector<Object *> &objects_)
- : BVH(params_, meshes_, objects_)
+ : BVH(params_, geometry_, objects_)
{
}
diff --git a/intern/cycles/bvh/bvh2.h b/intern/cycles/bvh/bvh2.h
index a3eaff9cf65..fa3e45b72d2 100644
--- a/intern/cycles/bvh/bvh2.h
+++ b/intern/cycles/bvh/bvh2.h
@@ -46,7 +46,9 @@ class BVH2 : public BVH {
protected:
/* constructor */
friend class BVH;
- BVH2(const BVHParams &params, const vector<Mesh *> &meshes, const vector<Object *> &objects);
+ BVH2(const BVHParams &params,
+ const vector<Geometry *> &geometry,
+ const vector<Object *> &objects);
/* Building process. */
virtual BVHNode *widen_children_nodes(const BVHNode *root) override;
diff --git a/intern/cycles/bvh/bvh4.cpp b/intern/cycles/bvh/bvh4.cpp
index 89b42ee1d21..143c3e54f94 100644
--- a/intern/cycles/bvh/bvh4.cpp
+++ b/intern/cycles/bvh/bvh4.cpp
@@ -32,9 +32,9 @@ CCL_NAMESPACE_BEGIN
*/
BVH4::BVH4(const BVHParams &params_,
- const vector<Mesh *> &meshes_,
+ const vector<Geometry *> &geometry_,
const vector<Object *> &objects_)
- : BVH(params_, meshes_, objects_)
+ : BVH(params_, geometry_, objects_)
{
params.bvh_layout = BVH_LAYOUT_BVH4;
}
diff --git a/intern/cycles/bvh/bvh4.h b/intern/cycles/bvh/bvh4.h
index c44f2833c84..afbb9007afb 100644
--- a/intern/cycles/bvh/bvh4.h
+++ b/intern/cycles/bvh/bvh4.h
@@ -46,7 +46,9 @@ class BVH4 : public BVH {
protected:
/* constructor */
friend class BVH;
- BVH4(const BVHParams &params, const vector<Mesh *> &meshes, const vector<Object *> &objects);
+ BVH4(const BVHParams &params,
+ const vector<Geometry *> &geometry,
+ const vector<Object *> &objects);
/* Building process. */
virtual BVHNode *widen_children_nodes(const BVHNode *root) override;
diff --git a/intern/cycles/bvh/bvh8.cpp b/intern/cycles/bvh/bvh8.cpp
index d3516525f78..342dd9e85a5 100644
--- a/intern/cycles/bvh/bvh8.cpp
+++ b/intern/cycles/bvh/bvh8.cpp
@@ -28,6 +28,7 @@
#include "bvh/bvh8.h"
+#include "render/hair.h"
#include "render/mesh.h"
#include "render/object.h"
@@ -37,9 +38,9 @@
CCL_NAMESPACE_BEGIN
BVH8::BVH8(const BVHParams &params_,
- const vector<Mesh *> &meshes_,
+ const vector<Geometry *> &geometry_,
const vector<Object *> &objects_)
- : BVH(params_, meshes_, objects_)
+ : BVH(params_, geometry_, objects_)
{
}
@@ -429,37 +430,37 @@ void BVH8::refit_node(int idx, bool leaf, BoundBox &bbox, uint &visibility)
}
else {
/* Primitives. */
- const Mesh *mesh = ob->mesh;
-
if (pack.prim_type[prim] & PRIMITIVE_ALL_CURVE) {
/* Curves. */
- int str_offset = (params.top_level) ? mesh->curve_offset : 0;
- Mesh::Curve curve = mesh->get_curve(pidx - str_offset);
+ const Hair *hair = static_cast<const Hair *>(ob->geometry);
+ int prim_offset = (params.top_level) ? hair->prim_offset : 0;
+ Hair::Curve curve = hair->get_curve(pidx - prim_offset);
int k = PRIMITIVE_UNPACK_SEGMENT(pack.prim_type[prim]);
- curve.bounds_grow(k, &mesh->curve_keys[0], &mesh->curve_radius[0], bbox);
+ curve.bounds_grow(k, &hair->curve_keys[0], &hair->curve_radius[0], bbox);
visibility |= PATH_RAY_CURVE;
/* Motion curves. */
- if (mesh->use_motion_blur) {
- Attribute *attr = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+ if (hair->use_motion_blur) {
+ Attribute *attr = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
if (attr) {
- size_t mesh_size = mesh->curve_keys.size();
- size_t steps = mesh->motion_steps - 1;
+ size_t hair_size = hair->curve_keys.size();
+ size_t steps = hair->motion_steps - 1;
float3 *key_steps = attr->data_float3();
for (size_t i = 0; i < steps; i++) {
- curve.bounds_grow(k, key_steps + i * mesh_size, &mesh->curve_radius[0], bbox);
+ curve.bounds_grow(k, key_steps + i * hair_size, &hair->curve_radius[0], bbox);
}
}
}
}
else {
/* Triangles. */
- int tri_offset = (params.top_level) ? mesh->tri_offset : 0;
- Mesh::Triangle triangle = mesh->get_triangle(pidx - tri_offset);
+ const Mesh *mesh = static_cast<const Mesh *>(ob->geometry);
+ int prim_offset = (params.top_level) ? mesh->prim_offset : 0;
+ Mesh::Triangle triangle = mesh->get_triangle(pidx - prim_offset);
const float3 *vpos = &mesh->verts[0];
triangle.bounds_grow(vpos, bbox);
diff --git a/intern/cycles/bvh/bvh8.h b/intern/cycles/bvh/bvh8.h
index 5f26fd423e1..d23fa528e3e 100644
--- a/intern/cycles/bvh/bvh8.h
+++ b/intern/cycles/bvh/bvh8.h
@@ -57,7 +57,9 @@ class BVH8 : public BVH {
protected:
/* constructor */
friend class BVH;
- BVH8(const BVHParams &params, const vector<Mesh *> &meshes, const vector<Object *> &objects);
+ BVH8(const BVHParams &params,
+ const vector<Geometry *> &geometry,
+ const vector<Object *> &objects);
/* Building process. */
virtual BVHNode *widen_children_nodes(const BVHNode *root) override;
diff --git a/intern/cycles/bvh/bvh_build.cpp b/intern/cycles/bvh/bvh_build.cpp
index 1d9b006e8cb..db156219f09 100644
--- a/intern/cycles/bvh/bvh_build.cpp
+++ b/intern/cycles/bvh/bvh_build.cpp
@@ -22,19 +22,20 @@
#include "bvh/bvh_params.h"
#include "bvh_split.h"
+#include "render/curves.h"
+#include "render/hair.h"
#include "render/mesh.h"
#include "render/object.h"
#include "render/scene.h"
-#include "render/curves.h"
#include "util/util_algorithm.h"
#include "util/util_foreach.h"
#include "util/util_logging.h"
#include "util/util_progress.h"
-#include "util/util_stack_allocator.h"
+#include "util/util_queue.h"
#include "util/util_simd.h"
+#include "util/util_stack_allocator.h"
#include "util/util_time.h"
-#include "util/util_queue.h"
CCL_NAMESPACE_BEGIN
@@ -194,21 +195,21 @@ void BVHBuild::add_reference_triangles(BoundBox &root, BoundBox &center, Mesh *m
}
}
-void BVHBuild::add_reference_curves(BoundBox &root, BoundBox &center, Mesh *mesh, int i)
+void BVHBuild::add_reference_curves(BoundBox &root, BoundBox &center, Hair *hair, int i)
{
const Attribute *curve_attr_mP = NULL;
- if (mesh->has_motion_blur()) {
- curve_attr_mP = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+ if (hair->has_motion_blur()) {
+ curve_attr_mP = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
}
- const size_t num_curves = mesh->num_curves();
+ const size_t num_curves = hair->num_curves();
for (uint j = 0; j < num_curves; j++) {
- const Mesh::Curve curve = mesh->get_curve(j);
- const float *curve_radius = &mesh->curve_radius[0];
+ const Hair::Curve curve = hair->get_curve(j);
+ const float *curve_radius = &hair->curve_radius[0];
for (int k = 0; k < curve.num_keys - 1; k++) {
if (curve_attr_mP == NULL) {
/* Really simple logic for static hair. */
BoundBox bounds = BoundBox::empty;
- curve.bounds_grow(k, &mesh->curve_keys[0], curve_radius, bounds);
+ curve.bounds_grow(k, &hair->curve_keys[0], curve_radius, bounds);
if (bounds.valid()) {
int packed_type = PRIMITIVE_PACK_SEGMENT(PRIMITIVE_CURVE, k);
references.push_back(BVHReference(bounds, j, i, packed_type));
@@ -223,9 +224,9 @@ void BVHBuild::add_reference_curves(BoundBox &root, BoundBox &center, Mesh *mesh
*/
/* TODO(sergey): Support motion steps for spatially split BVH. */
BoundBox bounds = BoundBox::empty;
- curve.bounds_grow(k, &mesh->curve_keys[0], curve_radius, bounds);
- const size_t num_keys = mesh->curve_keys.size();
- const size_t num_steps = mesh->motion_steps;
+ curve.bounds_grow(k, &hair->curve_keys[0], curve_radius, bounds);
+ const size_t num_keys = hair->curve_keys.size();
+ const size_t num_steps = hair->motion_steps;
const float3 *key_steps = curve_attr_mP->data_float3();
for (size_t step = 0; step < num_steps - 1; step++) {
curve.bounds_grow(k, key_steps + step * num_keys, curve_radius, bounds);
@@ -244,10 +245,10 @@ void BVHBuild::add_reference_curves(BoundBox &root, BoundBox &center, Mesh *mesh
*/
const int num_bvh_steps = params.num_motion_curve_steps * 2 + 1;
const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
- const size_t num_steps = mesh->motion_steps;
- const float3 *curve_keys = &mesh->curve_keys[0];
+ const size_t num_steps = hair->motion_steps;
+ const float3 *curve_keys = &hair->curve_keys[0];
const float3 *key_steps = curve_attr_mP->data_float3();
- const size_t num_keys = mesh->curve_keys.size();
+ const size_t num_keys = hair->curve_keys.size();
/* Calculate bounding box of the previous time step.
* Will be reused later to avoid duplicated work on
* calculating BVH time step boundbox.
@@ -302,13 +303,15 @@ void BVHBuild::add_reference_curves(BoundBox &root, BoundBox &center, Mesh *mesh
}
}
-void BVHBuild::add_reference_mesh(BoundBox &root, BoundBox &center, Mesh *mesh, int i)
+void BVHBuild::add_reference_geometry(BoundBox &root, BoundBox &center, Geometry *geom, int i)
{
- if (params.primitive_mask & PRIMITIVE_ALL_TRIANGLE) {
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
add_reference_triangles(root, center, mesh, i);
}
- if (params.primitive_mask & PRIMITIVE_ALL_CURVE) {
- add_reference_curves(root, center, mesh, i);
+ else if (geom->type == Geometry::HAIR) {
+ Hair *hair = static_cast<Hair *>(geom);
+ add_reference_curves(root, center, hair, i);
}
}
@@ -319,16 +322,30 @@ void BVHBuild::add_reference_object(BoundBox &root, BoundBox &center, Object *ob
center.grow(ob->bounds.center2());
}
-static size_t count_curve_segments(Mesh *mesh)
+static size_t count_curve_segments(Hair *hair)
{
- size_t num = 0, num_curves = mesh->num_curves();
+ size_t num = 0, num_curves = hair->num_curves();
for (size_t i = 0; i < num_curves; i++)
- num += mesh->get_curve(i).num_keys - 1;
+ num += hair->get_curve(i).num_keys - 1;
return num;
}
+static size_t count_primitives(Geometry *geom)
+{
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ return mesh->num_triangles();
+ }
+ else if (geom->type == Geometry::HAIR) {
+ Hair *hair = static_cast<Hair *>(geom);
+ return count_curve_segments(hair);
+ }
+
+ return 0;
+}
+
void BVHBuild::add_references(BVHRange &root)
{
/* reserve space for references */
@@ -339,24 +356,14 @@ void BVHBuild::add_references(BVHRange &root)
if (!ob->is_traceable()) {
continue;
}
- if (!ob->mesh->is_instanced()) {
- if (params.primitive_mask & PRIMITIVE_ALL_TRIANGLE) {
- num_alloc_references += ob->mesh->num_triangles();
- }
- if (params.primitive_mask & PRIMITIVE_ALL_CURVE) {
- num_alloc_references += count_curve_segments(ob->mesh);
- }
+ if (!ob->geometry->is_instanced()) {
+ num_alloc_references += count_primitives(ob->geometry);
}
else
num_alloc_references++;
}
else {
- if (params.primitive_mask & PRIMITIVE_ALL_TRIANGLE) {
- num_alloc_references += ob->mesh->num_triangles();
- }
- if (params.primitive_mask & PRIMITIVE_ALL_CURVE) {
- num_alloc_references += count_curve_segments(ob->mesh);
- }
+ num_alloc_references += count_primitives(ob->geometry);
}
}
@@ -372,13 +379,13 @@ void BVHBuild::add_references(BVHRange &root)
++i;
continue;
}
- if (!ob->mesh->is_instanced())
- add_reference_mesh(bounds, center, ob->mesh, i);
+ if (!ob->geometry->is_instanced())
+ add_reference_geometry(bounds, center, ob->geometry, i);
else
add_reference_object(bounds, center, ob, i);
}
else
- add_reference_mesh(bounds, center, ob->mesh, i);
+ add_reference_geometry(bounds, center, ob->geometry, i);
i++;
diff --git a/intern/cycles/bvh/bvh_build.h b/intern/cycles/bvh/bvh_build.h
index 9685e26cfac..3fe4c3799e2 100644
--- a/intern/cycles/bvh/bvh_build.h
+++ b/intern/cycles/bvh/bvh_build.h
@@ -35,6 +35,8 @@ class BVHNode;
class BVHSpatialSplitBuildTask;
class BVHParams;
class InnerNode;
+class Geometry;
+class Hair;
class Mesh;
class Object;
class Progress;
@@ -65,8 +67,8 @@ class BVHBuild {
/* Adding references. */
void add_reference_triangles(BoundBox &root, BoundBox &center, Mesh *mesh, int i);
- void add_reference_curves(BoundBox &root, BoundBox &center, Mesh *mesh, int i);
- void add_reference_mesh(BoundBox &root, BoundBox &center, Mesh *mesh, int i);
+ void add_reference_curves(BoundBox &root, BoundBox &center, Hair *hair, int i);
+ void add_reference_geometry(BoundBox &root, BoundBox &center, Geometry *geom, int i);
void add_reference_object(BoundBox &root, BoundBox &center, Object *ob, int i);
void add_references(BVHRange &root);
diff --git a/intern/cycles/bvh/bvh_embree.cpp b/intern/cycles/bvh/bvh_embree.cpp
index 3e4978a2c0a..9356adf3ea5 100644
--- a/intern/cycles/bvh/bvh_embree.cpp
+++ b/intern/cycles/bvh/bvh_embree.cpp
@@ -35,9 +35,9 @@
#ifdef WITH_EMBREE
+# include <embree3/rtcore_geometry.h>
# include <pmmintrin.h>
# include <xmmintrin.h>
-# include <embree3/rtcore_geometry.h>
# include "bvh/bvh_embree.h"
@@ -45,10 +45,11 @@
*/
# include "kernel/bvh/bvh_embree.h"
# include "kernel/kernel_compat_cpu.h"
-# include "kernel/split/kernel_split_data_types.h"
# include "kernel/kernel_globals.h"
# include "kernel/kernel_random.h"
+# include "kernel/split/kernel_split_data_types.h"
+# include "render/hair.h"
# include "render/mesh.h"
# include "render/object.h"
# include "util/util_foreach.h"
@@ -57,6 +58,11 @@
CCL_NAMESPACE_BEGIN
+static_assert(Object::MAX_MOTION_STEPS <= RTC_MAX_TIME_STEP_COUNT,
+ "Object and Embree max motion steps inconsistent");
+static_assert(Object::MAX_MOTION_STEPS == Geometry::MAX_MOTION_STEPS,
+ "Object and Geometry max motion steps inconsistent");
+
# define IS_HAIR(x) (x & 1)
/* This gets called by Embree at every valid ray/object intersection.
@@ -301,10 +307,24 @@ RTCDevice BVHEmbree::rtc_shared_device = NULL;
int BVHEmbree::rtc_shared_users = 0;
thread_mutex BVHEmbree::rtc_shared_mutex;
+static size_t count_primitives(Geometry *geom)
+{
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ return mesh->num_triangles();
+ }
+ else if (geom->type == Geometry::HAIR) {
+ Hair *hair = static_cast<Hair *>(geom);
+ return hair->num_segments();
+ }
+
+ return 0;
+}
+
BVHEmbree::BVHEmbree(const BVHParams &params_,
- const vector<Mesh *> &meshes_,
+ const vector<Geometry *> &geometry_,
const vector<Object *> &objects_)
- : BVH(params_, meshes_, objects_),
+ : BVH(params_, geometry_, objects_),
scene(NULL),
mem_used(0),
top_level(NULL),
@@ -325,7 +345,7 @@ BVHEmbree::BVHEmbree(const BVHParams &params_,
if (ret != 1) {
assert(0);
VLOG(1) << "Embree is compiled without the RTC_DEVICE_PROPERTY_RAY_MASK_SUPPORTED flag."
- "Ray visiblity will not work.";
+ "Ray visibility will not work.";
}
ret = rtcGetDeviceProperty(rtc_shared_device, RTC_DEVICE_PROPERTY_FILTER_FUNCTION_SUPPORTED);
if (ret != 1) {
@@ -436,29 +456,15 @@ void BVHEmbree::build(Progress &progress, Stats *stats_)
if (!ob->is_traceable()) {
continue;
}
- if (!ob->mesh->is_instanced()) {
- if (params.primitive_mask & PRIMITIVE_ALL_TRIANGLE) {
- prim_count += ob->mesh->num_triangles();
- }
- if (params.primitive_mask & PRIMITIVE_ALL_CURVE) {
- for (size_t j = 0; j < ob->mesh->num_curves(); ++j) {
- prim_count += ob->mesh->get_curve(j).num_segments();
- }
- }
+ if (!ob->geometry->is_instanced()) {
+ prim_count += count_primitives(ob->geometry);
}
else {
++prim_count;
}
}
else {
- if (params.primitive_mask & PRIMITIVE_ALL_TRIANGLE && ob->mesh->num_triangles() > 0) {
- prim_count += ob->mesh->num_triangles();
- }
- if (params.primitive_mask & PRIMITIVE_ALL_CURVE) {
- for (size_t j = 0; j < ob->mesh->num_curves(); ++j) {
- prim_count += ob->mesh->get_curve(j).num_segments();
- }
- }
+ prim_count += count_primitives(ob->geometry);
}
}
@@ -477,7 +483,7 @@ void BVHEmbree::build(Progress &progress, Stats *stats_)
++i;
continue;
}
- if (!ob->mesh->is_instanced()) {
+ if (!ob->geometry->is_instanced()) {
add_object(ob, i);
}
else {
@@ -528,36 +534,57 @@ BVHNode *BVHEmbree::widen_children_nodes(const BVHNode * /*root*/)
void BVHEmbree::add_object(Object *ob, int i)
{
- Mesh *mesh = ob->mesh;
- if (params.primitive_mask & PRIMITIVE_ALL_TRIANGLE && mesh->num_triangles() > 0) {
- add_triangles(ob, i);
+ Geometry *geom = ob->geometry;
+
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ if (mesh->num_triangles() > 0) {
+ add_triangles(ob, mesh, i);
+ }
}
- if (params.primitive_mask & PRIMITIVE_ALL_CURVE && mesh->num_curves() > 0) {
- add_curves(ob, i);
+ else if (geom->type == Geometry::HAIR) {
+ Hair *hair = static_cast<Hair *>(geom);
+ if (hair->num_curves() > 0) {
+ add_curves(ob, hair, i);
+ }
}
}
void BVHEmbree::add_instance(Object *ob, int i)
{
- if (!ob || !ob->mesh) {
+ if (!ob || !ob->geometry) {
assert(0);
return;
}
- BVHEmbree *instance_bvh = (BVHEmbree *)(ob->mesh->bvh);
+ BVHEmbree *instance_bvh = (BVHEmbree *)(ob->geometry->bvh);
if (instance_bvh->top_level != this) {
instance_bvh->top_level = this;
}
- const size_t num_motion_steps = ob->use_motion() ? ob->motion.size() : 1;
+ const size_t num_object_motion_steps = ob->use_motion() ? ob->motion.size() : 1;
+ const size_t num_motion_steps = min(num_object_motion_steps, RTC_MAX_TIME_STEP_COUNT);
+ assert(num_object_motion_steps <= RTC_MAX_TIME_STEP_COUNT);
+
RTCGeometry geom_id = rtcNewGeometry(rtc_shared_device, RTC_GEOMETRY_TYPE_INSTANCE);
rtcSetGeometryInstancedScene(geom_id, instance_bvh->scene);
rtcSetGeometryTimeStepCount(geom_id, num_motion_steps);
if (ob->use_motion()) {
+ array<DecomposedTransform> decomp(ob->motion.size());
+ transform_motion_decompose(decomp.data(), ob->motion.data(), ob->motion.size());
for (size_t step = 0; step < num_motion_steps; ++step) {
- rtcSetGeometryTransform(
- geom_id, step, RTC_FORMAT_FLOAT3X4_ROW_MAJOR, (const float *)&ob->motion[step]);
+ RTCQuaternionDecomposition rtc_decomp;
+ rtcInitQuaternionDecomposition(&rtc_decomp);
+ rtcQuaternionDecompositionSetQuaternion(
+ &rtc_decomp, decomp[step].x.w, decomp[step].x.x, decomp[step].x.y, decomp[step].x.z);
+ rtcQuaternionDecompositionSetScale(
+ &rtc_decomp, decomp[step].y.w, decomp[step].z.w, decomp[step].w.w);
+ rtcQuaternionDecompositionSetTranslation(
+ &rtc_decomp, decomp[step].y.x, decomp[step].y.y, decomp[step].y.z);
+ rtcQuaternionDecompositionSetSkew(
+ &rtc_decomp, decomp[step].z.x, decomp[step].z.y, decomp[step].w.x);
+ rtcSetGeometryTransformQuaternion(geom_id, step, &rtc_decomp);
}
}
else {
@@ -570,30 +597,28 @@ void BVHEmbree::add_instance(Object *ob, int i)
pack.prim_tri_index.push_back_slow(-1);
rtcSetGeometryUserData(geom_id, (void *)instance_bvh->scene);
- rtcSetGeometryMask(geom_id, ob->visibility);
+ rtcSetGeometryMask(geom_id, ob->visibility_for_tracing());
rtcCommitGeometry(geom_id);
rtcAttachGeometryByID(scene, geom_id, i * 2);
rtcReleaseGeometry(geom_id);
}
-void BVHEmbree::add_triangles(Object *ob, int i)
+void BVHEmbree::add_triangles(const Object *ob, const Mesh *mesh, int i)
{
size_t prim_offset = pack.prim_index.size();
- Mesh *mesh = ob->mesh;
const Attribute *attr_mP = NULL;
- size_t num_motion_steps = 1;
+ size_t num_geometry_motion_steps = 1;
if (mesh->has_motion_blur()) {
attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
if (attr_mP) {
- num_motion_steps = mesh->motion_steps;
- if (num_motion_steps > RTC_MAX_TIME_STEP_COUNT) {
- assert(0);
- num_motion_steps = RTC_MAX_TIME_STEP_COUNT;
- }
+ num_geometry_motion_steps = mesh->motion_steps;
}
}
+ const size_t num_motion_steps = min(num_geometry_motion_steps, RTC_MAX_TIME_STEP_COUNT);
+ assert(num_geometry_motion_steps <= RTC_MAX_TIME_STEP_COUNT);
+
const size_t num_triangles = mesh->num_triangles();
RTCGeometry geom_id = rtcNewGeometry(rtc_shared_device, RTC_GEOMETRY_TYPE_TRIANGLE);
rtcSetGeometryBuildQuality(geom_id, build_quality);
@@ -635,7 +660,7 @@ void BVHEmbree::add_triangles(Object *ob, int i)
rtcSetGeometryUserData(geom_id, (void *)prim_offset);
rtcSetGeometryIntersectFilterFunction(geom_id, rtc_filter_func);
rtcSetGeometryOccludedFilterFunction(geom_id, rtc_filter_occluded_func);
- rtcSetGeometryMask(geom_id, ob->visibility);
+ rtcSetGeometryMask(geom_id, ob->visibility_for_tracing());
rtcCommitGeometry(geom_id);
rtcAttachGeometryByID(scene, geom_id, i * 2);
@@ -684,31 +709,37 @@ void BVHEmbree::update_tri_vertex_buffer(RTCGeometry geom_id, const Mesh *mesh)
}
}
-void BVHEmbree::update_curve_vertex_buffer(RTCGeometry geom_id, const Mesh *mesh)
+void BVHEmbree::update_curve_vertex_buffer(RTCGeometry geom_id, const Hair *hair)
{
const Attribute *attr_mP = NULL;
size_t num_motion_steps = 1;
- if (mesh->has_motion_blur()) {
- attr_mP = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+ if (hair->has_motion_blur()) {
+ attr_mP = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
if (attr_mP) {
- num_motion_steps = mesh->motion_steps;
+ num_motion_steps = hair->motion_steps;
}
}
- const size_t num_curves = mesh->num_curves();
+ const size_t num_curves = hair->num_curves();
size_t num_keys = 0;
for (size_t j = 0; j < num_curves; ++j) {
- const Mesh::Curve c = mesh->get_curve(j);
+ const Hair::Curve c = hair->get_curve(j);
num_keys += c.num_keys;
}
+ /* Catmull-Rom splines need extra CVs at the beginning and end of each curve. */
+ size_t num_keys_embree = num_keys;
+ if (use_curves) {
+ num_keys_embree += num_curves * 2;
+ }
+
/* Copy the CV data to Embree */
const int t_mid = (num_motion_steps - 1) / 2;
- const float *curve_radius = &mesh->curve_radius[0];
+ const float *curve_radius = &hair->curve_radius[0];
for (int t = 0; t < num_motion_steps; ++t) {
const float3 *verts;
if (t == t_mid || attr_mP == NULL) {
- verts = &mesh->curve_keys[0];
+ verts = &hair->curve_keys[0];
}
else {
int t_ = (t > t_mid) ? (t - 1) : t;
@@ -716,42 +747,28 @@ void BVHEmbree::update_curve_vertex_buffer(RTCGeometry geom_id, const Mesh *mesh
}
float4 *rtc_verts = (float4 *)rtcSetNewGeometryBuffer(
- geom_id, RTC_BUFFER_TYPE_VERTEX, t, RTC_FORMAT_FLOAT4, sizeof(float) * 4, num_keys);
- float4 *rtc_tangents = NULL;
- if (use_curves) {
- rtc_tangents = (float4 *)rtcSetNewGeometryBuffer(
- geom_id, RTC_BUFFER_TYPE_TANGENT, t, RTC_FORMAT_FLOAT4, sizeof(float) * 4, num_keys);
- assert(rtc_tangents);
- }
+ geom_id, RTC_BUFFER_TYPE_VERTEX, t, RTC_FORMAT_FLOAT4, sizeof(float) * 4, num_keys_embree);
+
assert(rtc_verts);
if (rtc_verts) {
- if (use_curves && rtc_tangents) {
- const size_t num_curves = mesh->num_curves();
+ if (use_curves) {
+ const size_t num_curves = hair->num_curves();
for (size_t j = 0; j < num_curves; ++j) {
- Mesh::Curve c = mesh->get_curve(j);
+ Hair::Curve c = hair->get_curve(j);
int fk = c.first_key;
- rtc_verts[0] = float3_to_float4(verts[fk]);
- rtc_verts[0].w = curve_radius[fk];
- rtc_tangents[0] = float3_to_float4(verts[fk + 1] - verts[fk]);
- rtc_tangents[0].w = curve_radius[fk + 1] - curve_radius[fk];
- ++fk;
int k = 1;
- for (; k < c.num_segments(); ++k, ++fk) {
+ for (; k < c.num_keys + 1; ++k, ++fk) {
rtc_verts[k] = float3_to_float4(verts[fk]);
rtc_verts[k].w = curve_radius[fk];
- rtc_tangents[k] = float3_to_float4((verts[fk + 1] - verts[fk - 1]) * 0.5f);
- rtc_tangents[k].w = (curve_radius[fk + 1] - curve_radius[fk - 1]) * 0.5f;
}
- rtc_verts[k] = float3_to_float4(verts[fk]);
- rtc_verts[k].w = curve_radius[fk];
- rtc_tangents[k] = float3_to_float4(verts[fk] - verts[fk - 1]);
- rtc_tangents[k].w = curve_radius[fk] - curve_radius[fk - 1];
- rtc_verts += c.num_keys;
- rtc_tangents += c.num_keys;
+ /* Duplicate Embree's Catmull-Rom spline CVs at the start and end of each curve. */
+ rtc_verts[0] = rtc_verts[1];
+ rtc_verts[k] = rtc_verts[k - 1];
+ rtc_verts += c.num_keys + 2;
}
}
else {
- for (size_t j = 0; j < num_keys; ++j) {
+ for (size_t j = 0; j < num_keys_embree; ++j) {
rtc_verts[j] = float3_to_float4(verts[j]);
rtc_verts[j].w = curve_radius[j];
}
@@ -760,23 +777,25 @@ void BVHEmbree::update_curve_vertex_buffer(RTCGeometry geom_id, const Mesh *mesh
}
}
-void BVHEmbree::add_curves(Object *ob, int i)
+void BVHEmbree::add_curves(const Object *ob, const Hair *hair, int i)
{
size_t prim_offset = pack.prim_index.size();
- const Mesh *mesh = ob->mesh;
const Attribute *attr_mP = NULL;
- size_t num_motion_steps = 1;
- if (mesh->has_motion_blur()) {
- attr_mP = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+ size_t num_geometry_motion_steps = 1;
+ if (hair->has_motion_blur()) {
+ attr_mP = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
if (attr_mP) {
- num_motion_steps = mesh->motion_steps;
+ num_geometry_motion_steps = hair->motion_steps;
}
}
- const size_t num_curves = mesh->num_curves();
+ const size_t num_motion_steps = min(num_geometry_motion_steps, RTC_MAX_TIME_STEP_COUNT);
+ assert(num_geometry_motion_steps <= RTC_MAX_TIME_STEP_COUNT);
+
+ const size_t num_curves = hair->num_curves();
size_t num_segments = 0;
for (size_t j = 0; j < num_curves; ++j) {
- Mesh::Curve c = mesh->get_curve(j);
+ Hair::Curve c = hair->get_curve(j);
assert(c.num_segments() > 0);
num_segments += c.num_segments();
}
@@ -793,8 +812,8 @@ void BVHEmbree::add_curves(Object *ob, int i)
enum RTCGeometryType type = (!use_curves) ?
RTC_GEOMETRY_TYPE_FLAT_LINEAR_CURVE :
- (use_ribbons ? RTC_GEOMETRY_TYPE_FLAT_HERMITE_CURVE :
- RTC_GEOMETRY_TYPE_ROUND_HERMITE_CURVE);
+ (use_ribbons ? RTC_GEOMETRY_TYPE_FLAT_CATMULL_ROM_CURVE :
+ RTC_GEOMETRY_TYPE_ROUND_CATMULL_ROM_CURVE);
RTCGeometry geom_id = rtcNewGeometry(rtc_shared_device, type);
rtcSetGeometryTessellationRate(geom_id, curve_subdivisions);
@@ -802,9 +821,13 @@ void BVHEmbree::add_curves(Object *ob, int i)
geom_id, RTC_BUFFER_TYPE_INDEX, 0, RTC_FORMAT_UINT, sizeof(int), num_segments);
size_t rtc_index = 0;
for (size_t j = 0; j < num_curves; ++j) {
- Mesh::Curve c = mesh->get_curve(j);
+ Hair::Curve c = hair->get_curve(j);
for (size_t k = 0; k < c.num_segments(); ++k) {
rtc_indices[rtc_index] = c.first_key + k;
+ if (use_curves) {
+ /* Room for extra CVs at Catmull-Rom splines. */
+ rtc_indices[rtc_index] += j * 2;
+ }
/* Cycles specific data. */
pack.prim_object[prim_object_size + rtc_index] = i;
pack.prim_type[prim_type_size + rtc_index] = (PRIMITIVE_PACK_SEGMENT(
@@ -819,12 +842,12 @@ void BVHEmbree::add_curves(Object *ob, int i)
rtcSetGeometryBuildQuality(geom_id, build_quality);
rtcSetGeometryTimeStepCount(geom_id, num_motion_steps);
- update_curve_vertex_buffer(geom_id, mesh);
+ update_curve_vertex_buffer(geom_id, hair);
rtcSetGeometryUserData(geom_id, (void *)prim_offset);
rtcSetGeometryIntersectFilterFunction(geom_id, rtc_filter_func);
rtcSetGeometryOccludedFilterFunction(geom_id, rtc_filter_occluded_func);
- rtcSetGeometryMask(geom_id, ob->visibility);
+ rtcSetGeometryMask(geom_id, ob->visibility_for_tracing());
rtcCommitGeometry(geom_id);
rtcAttachGeometryByID(scene, geom_id, i * 2 + 1);
@@ -840,10 +863,7 @@ void BVHEmbree::pack_nodes(const BVHNode *)
for (size_t i = 0; i < pack.prim_index.size(); ++i) {
if (pack.prim_index[i] != -1) {
- if (pack.prim_type[i] & PRIMITIVE_ALL_CURVE)
- pack.prim_index[i] += objects[pack.prim_object[i]]->mesh->curve_offset;
- else
- pack.prim_index[i] += objects[pack.prim_object[i]]->mesh->tri_offset;
+ pack.prim_index[i] += objects[pack.prim_object[i]]->geometry->prim_offset;
}
}
@@ -857,22 +877,22 @@ void BVHEmbree::pack_nodes(const BVHNode *)
size_t pack_prim_tri_verts_offset = prim_tri_verts_size;
size_t object_offset = 0;
- map<Mesh *, int> mesh_map;
+ map<Geometry *, int> geometry_map;
foreach (Object *ob, objects) {
- Mesh *mesh = ob->mesh;
- BVH *bvh = mesh->bvh;
+ Geometry *geom = ob->geometry;
+ BVH *bvh = geom->bvh;
- if (mesh->need_build_bvh(BVH_LAYOUT_EMBREE)) {
- if (mesh_map.find(mesh) == mesh_map.end()) {
+ if (geom->need_build_bvh(BVH_LAYOUT_EMBREE)) {
+ if (geometry_map.find(geom) == geometry_map.end()) {
prim_index_size += bvh->pack.prim_index.size();
prim_tri_verts_size += bvh->pack.prim_tri_verts.size();
- mesh_map[mesh] = 1;
+ geometry_map[geom] = 1;
}
}
}
- mesh_map.clear();
+ geometry_map.clear();
pack.prim_index.resize(prim_index_size);
pack.prim_type.resize(prim_index_size);
@@ -890,38 +910,37 @@ void BVHEmbree::pack_nodes(const BVHNode *)
/* merge */
foreach (Object *ob, objects) {
- Mesh *mesh = ob->mesh;
+ Geometry *geom = ob->geometry;
/* We assume that if mesh doesn't need own BVH it was already included
* into a top-level BVH and no packing here is needed.
*/
- if (!mesh->need_build_bvh(BVH_LAYOUT_EMBREE)) {
+ if (!geom->need_build_bvh(BVH_LAYOUT_EMBREE)) {
pack.object_node[object_offset++] = prim_offset;
continue;
}
- /* if mesh already added once, don't add it again, but used set
+ /* if geom already added once, don't add it again, but used set
* node offset for this object */
- map<Mesh *, int>::iterator it = mesh_map.find(mesh);
+ map<Geometry *, int>::iterator it = geometry_map.find(geom);
- if (mesh_map.find(mesh) != mesh_map.end()) {
+ if (geometry_map.find(geom) != geometry_map.end()) {
int noffset = it->second;
pack.object_node[object_offset++] = noffset;
continue;
}
- BVHEmbree *bvh = (BVHEmbree *)mesh->bvh;
+ BVHEmbree *bvh = (BVHEmbree *)geom->bvh;
rtc_memory_monitor_func(stats, unaccounted_mem, true);
unaccounted_mem = 0;
- int mesh_tri_offset = mesh->tri_offset;
- int mesh_curve_offset = mesh->curve_offset;
+ int geom_prim_offset = geom->prim_offset;
/* fill in node indexes for instances */
pack.object_node[object_offset++] = prim_offset;
- mesh_map[mesh] = pack.object_node[object_offset - 1];
+ geometry_map[geom] = pack.object_node[object_offset - 1];
/* merge primitive, object and triangle indexes */
if (bvh->pack.prim_index.size()) {
@@ -932,11 +951,11 @@ void BVHEmbree::pack_nodes(const BVHNode *)
for (size_t i = 0; i < bvh_prim_index_size; ++i) {
if (bvh->pack.prim_type[i] & PRIMITIVE_ALL_CURVE) {
- pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + mesh_curve_offset;
+ pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + geom_prim_offset;
pack_prim_tri_index[pack_prim_index_offset] = -1;
}
else {
- pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + mesh_tri_offset;
+ pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + geom_prim_offset;
pack_prim_tri_index[pack_prim_index_offset] = bvh_prim_tri_index[i] +
pack_prim_tri_verts_offset;
}
@@ -966,15 +985,22 @@ void BVHEmbree::refit_nodes()
/* Update all vertex buffers, then tell Embree to rebuild/-fit the BVHs. */
unsigned geom_id = 0;
foreach (Object *ob, objects) {
- if (!params.top_level || (ob->is_traceable() && !ob->mesh->is_instanced())) {
- if (params.primitive_mask & PRIMITIVE_ALL_TRIANGLE && ob->mesh->num_triangles() > 0) {
- update_tri_vertex_buffer(rtcGetGeometry(scene, geom_id), ob->mesh);
- rtcCommitGeometry(rtcGetGeometry(scene, geom_id));
+ if (!params.top_level || (ob->is_traceable() && !ob->geometry->is_instanced())) {
+ Geometry *geom = ob->geometry;
+
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ if (mesh->num_triangles() > 0) {
+ update_tri_vertex_buffer(rtcGetGeometry(scene, geom_id), mesh);
+ rtcCommitGeometry(rtcGetGeometry(scene, geom_id));
+ }
}
-
- if (params.primitive_mask & PRIMITIVE_ALL_CURVE && ob->mesh->num_curves() > 0) {
- update_curve_vertex_buffer(rtcGetGeometry(scene, geom_id + 1), ob->mesh);
- rtcCommitGeometry(rtcGetGeometry(scene, geom_id + 1));
+ else if (geom->type == Geometry::HAIR) {
+ Hair *hair = static_cast<Hair *>(geom);
+ if (hair->num_curves() > 0) {
+ update_curve_vertex_buffer(rtcGetGeometry(scene, geom_id + 1), hair);
+ rtcCommitGeometry(rtcGetGeometry(scene, geom_id + 1));
+ }
}
}
geom_id += 2;
diff --git a/intern/cycles/bvh/bvh_embree.h b/intern/cycles/bvh/bvh_embree.h
index 123e87dd9b0..eb121d060b7 100644
--- a/intern/cycles/bvh/bvh_embree.h
+++ b/intern/cycles/bvh/bvh_embree.h
@@ -31,6 +31,8 @@
CCL_NAMESPACE_BEGIN
+class Geometry;
+class Hair;
class Mesh;
class BVHEmbree : public BVH {
@@ -47,7 +49,7 @@ class BVHEmbree : public BVH {
protected:
friend class BVH;
BVHEmbree(const BVHParams &params,
- const vector<Mesh *> &meshes,
+ const vector<Geometry *> &geometry,
const vector<Object *> &objects);
virtual void pack_nodes(const BVHNode *) override;
@@ -55,8 +57,8 @@ class BVHEmbree : public BVH {
void add_object(Object *ob, int i);
void add_instance(Object *ob, int i);
- void add_curves(Object *ob, int i);
- void add_triangles(Object *ob, int i);
+ void add_curves(const Object *ob, const Hair *hair, int i);
+ void add_triangles(const Object *ob, const Mesh *mesh, int i);
ssize_t mem_used;
@@ -69,7 +71,7 @@ class BVHEmbree : public BVH {
private:
void delete_rtcScene();
void update_tri_vertex_buffer(RTCGeometry geom_id, const Mesh *mesh);
- void update_curve_vertex_buffer(RTCGeometry geom_id, const Mesh *mesh);
+ void update_curve_vertex_buffer(RTCGeometry geom_id, const Hair *hair);
static RTCDevice rtc_shared_device;
static int rtc_shared_users;
diff --git a/intern/cycles/bvh/bvh_optix.cpp b/intern/cycles/bvh/bvh_optix.cpp
index 86d755ab06a..26b64c24db5 100644
--- a/intern/cycles/bvh/bvh_optix.cpp
+++ b/intern/cycles/bvh/bvh_optix.cpp
@@ -18,17 +18,20 @@
#ifdef WITH_OPTIX
# include "bvh/bvh_optix.h"
+# include "render/geometry.h"
+# include "render/hair.h"
# include "render/mesh.h"
# include "render/object.h"
+# include "util/util_foreach.h"
# include "util/util_logging.h"
# include "util/util_progress.h"
CCL_NAMESPACE_BEGIN
BVHOptiX::BVHOptiX(const BVHParams &params_,
- const vector<Mesh *> &meshes_,
+ const vector<Geometry *> &geometry_,
const vector<Object *> &objects_)
- : BVH(params_, meshes_, objects_)
+ : BVH(params_, geometry_, objects_)
{
}
@@ -56,47 +59,52 @@ void BVHOptiX::copy_to_device(Progress &progress, DeviceScene *dscene)
void BVHOptiX::pack_blas()
{
// Bottom-level BVH can contain multiple primitive types, so merge them:
- assert(meshes.size() == 1 && objects.size() == 1); // These are build per-mesh
- Mesh *const mesh = meshes[0];
-
- if (params.primitive_mask & PRIMITIVE_ALL_CURVE && mesh->num_curves() > 0) {
- const size_t num_curves = mesh->num_curves();
- const size_t num_segments = mesh->num_segments();
- pack.prim_type.reserve(pack.prim_type.size() + num_segments);
- pack.prim_index.reserve(pack.prim_index.size() + num_segments);
- pack.prim_object.reserve(pack.prim_object.size() + num_segments);
- // 'pack.prim_time' is only used in geom_curve_intersect.h
- // It is not needed because of OPTIX_MOTION_FLAG_[START|END]_VANISH
-
- uint type = PRIMITIVE_CURVE;
- if (mesh->use_motion_blur && mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION))
- type = PRIMITIVE_MOTION_CURVE;
-
- for (size_t j = 0; j < num_curves; ++j) {
- const Mesh::Curve curve = mesh->get_curve(j);
- for (size_t k = 0; k < curve.num_segments(); ++k) {
- pack.prim_type.push_back_reserved(PRIMITIVE_PACK_SEGMENT(type, k));
- // Each curve segment points back to its curve index
- pack.prim_index.push_back_reserved(j);
- pack.prim_object.push_back_reserved(0);
+ assert(geometry.size() == 1 && objects.size() == 1); // These are built per-mesh
+ Geometry *const geom = geometry[0];
+
+ if (geom->type == Geometry::HAIR) {
+ Hair *const hair = static_cast<Hair *const>(geom);
+ if (hair->num_curves() > 0) {
+ const size_t num_curves = hair->num_curves();
+ const size_t num_segments = hair->num_segments();
+ pack.prim_type.reserve(pack.prim_type.size() + num_segments);
+ pack.prim_index.reserve(pack.prim_index.size() + num_segments);
+ pack.prim_object.reserve(pack.prim_object.size() + num_segments);
+ // 'pack.prim_time' is only used in geom_curve_intersect.h
+ // It is not needed because of OPTIX_MOTION_FLAG_[START|END]_VANISH
+
+ uint type = PRIMITIVE_CURVE;
+ if (hair->use_motion_blur && hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION))
+ type = PRIMITIVE_MOTION_CURVE;
+
+ for (size_t j = 0; j < num_curves; ++j) {
+ const Hair::Curve curve = hair->get_curve(j);
+ for (size_t k = 0; k < curve.num_segments(); ++k) {
+ pack.prim_type.push_back_reserved(PRIMITIVE_PACK_SEGMENT(type, k));
+ // Each curve segment points back to its curve index
+ pack.prim_index.push_back_reserved(j);
+ pack.prim_object.push_back_reserved(0);
+ }
}
}
}
-
- if (params.primitive_mask & PRIMITIVE_ALL_TRIANGLE && mesh->num_triangles() > 0) {
- const size_t num_triangles = mesh->num_triangles();
- pack.prim_type.reserve(pack.prim_type.size() + num_triangles);
- pack.prim_index.reserve(pack.prim_index.size() + num_triangles);
- pack.prim_object.reserve(pack.prim_object.size() + num_triangles);
-
- uint type = PRIMITIVE_TRIANGLE;
- if (mesh->use_motion_blur && mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION))
- type = PRIMITIVE_MOTION_TRIANGLE;
-
- for (size_t k = 0; k < num_triangles; ++k) {
- pack.prim_type.push_back_reserved(type);
- pack.prim_index.push_back_reserved(k);
- pack.prim_object.push_back_reserved(0);
+ else if (geom->type == Geometry::MESH) {
+ Mesh *const mesh = static_cast<Mesh *const>(geom);
+ if (mesh->num_triangles() > 0) {
+ const size_t num_triangles = mesh->num_triangles();
+ pack.prim_type.reserve(pack.prim_type.size() + num_triangles);
+ pack.prim_index.reserve(pack.prim_index.size() + num_triangles);
+ pack.prim_object.reserve(pack.prim_object.size() + num_triangles);
+
+ uint type = PRIMITIVE_TRIANGLE;
+ if (mesh->use_motion_blur && mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION))
+ type = PRIMITIVE_MOTION_TRIANGLE;
+
+ for (size_t k = 0; k < num_triangles; ++k) {
+ pack.prim_type.push_back_reserved(type);
+ pack.prim_index.push_back_reserved(k);
+ pack.prim_object.push_back_reserved(0);
+ }
}
}
@@ -116,8 +124,8 @@ void BVHOptiX::pack_tlas()
// Calculate total packed size
size_t prim_index_size = 0;
size_t prim_tri_verts_size = 0;
- foreach (Mesh *mesh, meshes) {
- BVH *const bvh = mesh->bvh;
+ foreach (Geometry *geom, geometry) {
+ BVH *const bvh = geom->bvh;
prim_index_size += bvh->pack.prim_index.size();
prim_tri_verts_size += bvh->pack.prim_tri_verts.size();
}
@@ -141,13 +149,12 @@ void BVHOptiX::pack_tlas()
pack.prim_tri_verts.resize(prim_tri_verts_size);
float4 *pack_prim_tri_verts = pack.prim_tri_verts.data();
- // Top-level BVH should only contain instances, see 'Mesh::need_build_bvh'
+ // Top-level BVH should only contain instances, see 'Geometry::need_build_bvh'
// Iterate over scene mesh list instead of objects, since the 'prim_offset' is calculated based
// on that list, which may be ordered differently from the object list.
- foreach (Mesh *mesh, meshes) {
- PackedBVH &bvh_pack = mesh->bvh->pack;
- int mesh_tri_offset = mesh->tri_offset;
- int mesh_curve_offset = mesh->curve_offset;
+ foreach (Geometry *geom, geometry) {
+ PackedBVH &bvh_pack = geom->bvh->pack;
+ int geom_prim_offset = geom->prim_offset;
// Merge primitive, object and triangle indexes
if (!bvh_pack.prim_index.empty()) {
@@ -158,16 +165,16 @@ void BVHOptiX::pack_tlas()
for (size_t i = 0; i < bvh_pack.prim_index.size(); i++, pack_offset++) {
if (bvh_pack.prim_type[i] & PRIMITIVE_ALL_CURVE) {
- pack_prim_index[pack_offset] = bvh_prim_index[i] + mesh_curve_offset;
+ pack_prim_index[pack_offset] = bvh_prim_index[i] + geom_prim_offset;
pack_prim_tri_index[pack_offset] = -1;
}
else {
- pack_prim_index[pack_offset] = bvh_prim_index[i] + mesh_tri_offset;
+ pack_prim_index[pack_offset] = bvh_prim_index[i] + geom_prim_offset;
pack_prim_tri_index[pack_offset] = bvh_prim_tri_index[i] + pack_verts_offset;
}
pack_prim_type[pack_offset] = bvh_prim_type[i];
- pack_prim_object[pack_offset] = 0; // Unused for instanced meshes
+ pack_prim_object[pack_offset] = 0; // Unused for instanced geometry
pack_prim_visibility[pack_offset] = bvh_prim_visibility[i];
}
}
@@ -182,15 +189,24 @@ void BVHOptiX::pack_tlas()
}
}
- // Merge visibility flags of all objects and fix object indices for non-instanced meshes
+ // Merge visibility flags of all objects and fix object indices for non-instanced geometry
foreach (Object *ob, objects) {
- Mesh *const mesh = ob->mesh;
- for (size_t i = 0; i < mesh->num_primitives(); ++i) {
- if (!ob->mesh->is_instanced()) {
- assert(pack.prim_object[mesh->prim_offset + i] == 0);
- pack.prim_object[mesh->prim_offset + i] = ob->get_device_index();
+ Geometry *const geom = ob->geometry;
+ size_t num_primitives = 0;
+
+ if (geom->type == Geometry::MESH) {
+ num_primitives = static_cast<Mesh *const>(geom)->num_triangles();
+ }
+ else if (geom->type == Geometry::HAIR) {
+ num_primitives = static_cast<Hair *const>(geom)->num_segments();
+ }
+
+ for (size_t i = 0; i < num_primitives; ++i) {
+ if (!geom->is_instanced()) {
+ assert(pack.prim_object[geom->optix_prim_offset + i] == 0);
+ pack.prim_object[geom->optix_prim_offset + i] = ob->get_device_index();
}
- pack.prim_visibility[mesh->prim_offset + i] |= ob->visibility_for_tracing();
+ pack.prim_visibility[geom->optix_prim_offset + i] |= ob->visibility_for_tracing();
}
}
}
diff --git a/intern/cycles/bvh/bvh_optix.h b/intern/cycles/bvh/bvh_optix.h
index 35033fe635f..e4745b093b5 100644
--- a/intern/cycles/bvh/bvh_optix.h
+++ b/intern/cycles/bvh/bvh_optix.h
@@ -26,11 +26,16 @@
CCL_NAMESPACE_BEGIN
+class Geometry;
+class Optix;
+
class BVHOptiX : public BVH {
friend class BVH;
public:
- BVHOptiX(const BVHParams &params, const vector<Mesh *> &meshes, const vector<Object *> &objects);
+ BVHOptiX(const BVHParams &params,
+ const vector<Geometry *> &geometry,
+ const vector<Object *> &objects);
virtual ~BVHOptiX();
virtual void build(Progress &progress, Stats *) override;
diff --git a/intern/cycles/bvh/bvh_params.h b/intern/cycles/bvh/bvh_params.h
index 2731662a39d..5e2c4b63f1b 100644
--- a/intern/cycles/bvh/bvh_params.h
+++ b/intern/cycles/bvh/bvh_params.h
@@ -69,9 +69,6 @@ class BVHParams {
/* BVH layout to be built. */
BVHLayout bvh_layout;
- /* Mask of primitives to be included into the BVH. */
- int primitive_mask;
-
/* Use unaligned bounding boxes.
* Only used for curves BVH.
*/
@@ -120,8 +117,6 @@ class BVHParams {
bvh_layout = BVH_LAYOUT_BVH2;
use_unaligned_nodes = false;
- primitive_mask = PRIMITIVE_ALL;
-
num_motion_curve_steps = 0;
num_motion_triangle_steps = 0;
diff --git a/intern/cycles/bvh/bvh_split.cpp b/intern/cycles/bvh/bvh_split.cpp
index bd261c10d55..acdca0f13ad 100644
--- a/intern/cycles/bvh/bvh_split.cpp
+++ b/intern/cycles/bvh/bvh_split.cpp
@@ -20,6 +20,7 @@
#include "bvh/bvh_build.h"
#include "bvh/bvh_sort.h"
+#include "render/hair.h"
#include "render/mesh.h"
#include "render/object.h"
@@ -378,7 +379,7 @@ void BVHSpatialSplit::split_triangle_primitive(const Mesh *mesh,
}
}
-void BVHSpatialSplit::split_curve_primitive(const Mesh *mesh,
+void BVHSpatialSplit::split_curve_primitive(const Hair *hair,
const Transform *tfm,
int prim_index,
int segment_index,
@@ -388,11 +389,11 @@ void BVHSpatialSplit::split_curve_primitive(const Mesh *mesh,
BoundBox &right_bounds)
{
/* curve split: NOTE - Currently ignores curve width and needs to be fixed.*/
- Mesh::Curve curve = mesh->get_curve(prim_index);
+ Hair::Curve curve = hair->get_curve(prim_index);
const int k0 = curve.first_key + segment_index;
const int k1 = k0 + 1;
- float3 v0 = mesh->curve_keys[k0];
- float3 v1 = mesh->curve_keys[k1];
+ float3 v0 = hair->curve_keys[k0];
+ float3 v1 = hair->curve_keys[k1];
if (tfm != NULL) {
v0 = transform_point(tfm, v0);
@@ -436,13 +437,13 @@ void BVHSpatialSplit::split_triangle_reference(const BVHReference &ref,
}
void BVHSpatialSplit::split_curve_reference(const BVHReference &ref,
- const Mesh *mesh,
+ const Hair *hair,
int dim,
float pos,
BoundBox &left_bounds,
BoundBox &right_bounds)
{
- split_curve_primitive(mesh,
+ split_curve_primitive(hair,
NULL,
ref.prim_index(),
PRIMITIVE_UNPACK_SEGMENT(ref.prim_type()),
@@ -455,15 +456,22 @@ void BVHSpatialSplit::split_curve_reference(const BVHReference &ref,
void BVHSpatialSplit::split_object_reference(
const Object *object, int dim, float pos, BoundBox &left_bounds, BoundBox &right_bounds)
{
- Mesh *mesh = object->mesh;
- for (int tri_idx = 0; tri_idx < mesh->num_triangles(); ++tri_idx) {
- split_triangle_primitive(mesh, &object->tfm, tri_idx, dim, pos, left_bounds, right_bounds);
+ Geometry *geom = object->geometry;
+
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ for (int tri_idx = 0; tri_idx < mesh->num_triangles(); ++tri_idx) {
+ split_triangle_primitive(mesh, &object->tfm, tri_idx, dim, pos, left_bounds, right_bounds);
+ }
}
- for (int curve_idx = 0; curve_idx < mesh->num_curves(); ++curve_idx) {
- Mesh::Curve curve = mesh->get_curve(curve_idx);
- for (int segment_idx = 0; segment_idx < curve.num_keys - 1; ++segment_idx) {
- split_curve_primitive(
- mesh, &object->tfm, curve_idx, segment_idx, dim, pos, left_bounds, right_bounds);
+ else if (geom->type == Geometry::HAIR) {
+ Hair *hair = static_cast<Hair *>(geom);
+ for (int curve_idx = 0; curve_idx < hair->num_curves(); ++curve_idx) {
+ Hair::Curve curve = hair->get_curve(curve_idx);
+ for (int segment_idx = 0; segment_idx < curve.num_keys - 1; ++segment_idx) {
+ split_curve_primitive(
+ hair, &object->tfm, curve_idx, segment_idx, dim, pos, left_bounds, right_bounds);
+ }
}
}
}
@@ -481,13 +489,14 @@ void BVHSpatialSplit::split_reference(const BVHBuild &builder,
/* loop over vertices/edges. */
const Object *ob = builder.objects[ref.prim_object()];
- const Mesh *mesh = ob->mesh;
if (ref.prim_type() & PRIMITIVE_ALL_TRIANGLE) {
+ Mesh *mesh = static_cast<Mesh *>(ob->geometry);
split_triangle_reference(ref, mesh, dim, pos, left_bounds, right_bounds);
}
else if (ref.prim_type() & PRIMITIVE_ALL_CURVE) {
- split_curve_reference(ref, mesh, dim, pos, left_bounds, right_bounds);
+ Hair *hair = static_cast<Hair *>(ob->geometry);
+ split_curve_reference(ref, hair, dim, pos, left_bounds, right_bounds);
}
else {
split_object_reference(ob, dim, pos, left_bounds, right_bounds);
diff --git a/intern/cycles/bvh/bvh_split.h b/intern/cycles/bvh/bvh_split.h
index eddd1c27f49..5f2e41cf343 100644
--- a/intern/cycles/bvh/bvh_split.h
+++ b/intern/cycles/bvh/bvh_split.h
@@ -24,6 +24,8 @@
CCL_NAMESPACE_BEGIN
class BVHBuild;
+class Hair;
+class Mesh;
struct Transform;
/* Object Split */
@@ -113,7 +115,7 @@ class BVHSpatialSplit {
float pos,
BoundBox &left_bounds,
BoundBox &right_bounds);
- void split_curve_primitive(const Mesh *mesh,
+ void split_curve_primitive(const Hair *hair,
const Transform *tfm,
int prim_index,
int segment_index,
@@ -134,7 +136,7 @@ class BVHSpatialSplit {
BoundBox &left_bounds,
BoundBox &right_bounds);
void split_curve_reference(const BVHReference &ref,
- const Mesh *mesh,
+ const Hair *hair,
int dim,
float pos,
BoundBox &left_bounds,
diff --git a/intern/cycles/bvh/bvh_unaligned.cpp b/intern/cycles/bvh/bvh_unaligned.cpp
index 1843ca403a5..f0995f343fe 100644
--- a/intern/cycles/bvh/bvh_unaligned.cpp
+++ b/intern/cycles/bvh/bvh_unaligned.cpp
@@ -16,7 +16,7 @@
#include "bvh/bvh_unaligned.h"
-#include "render/mesh.h"
+#include "render/hair.h"
#include "render/object.h"
#include "bvh/bvh_binning.h"
@@ -71,10 +71,10 @@ bool BVHUnaligned::compute_aligned_space(const BVHReference &ref, Transform *ali
if (type & PRIMITIVE_CURVE) {
const int curve_index = ref.prim_index();
const int segment = PRIMITIVE_UNPACK_SEGMENT(packed_type);
- const Mesh *mesh = object->mesh;
- const Mesh::Curve &curve = mesh->get_curve(curve_index);
+ const Hair *hair = static_cast<const Hair *>(object->geometry);
+ const Hair::Curve &curve = hair->get_curve(curve_index);
const int key = curve.first_key + segment;
- const float3 v1 = mesh->curve_keys[key], v2 = mesh->curve_keys[key + 1];
+ const float3 v1 = hair->curve_keys[key], v2 = hair->curve_keys[key + 1];
float length;
const float3 axis = normalize_len(v2 - v1, &length);
if (length > 1e-6f) {
@@ -96,10 +96,10 @@ BoundBox BVHUnaligned::compute_aligned_prim_boundbox(const BVHReference &prim,
if (type & PRIMITIVE_CURVE) {
const int curve_index = prim.prim_index();
const int segment = PRIMITIVE_UNPACK_SEGMENT(packed_type);
- const Mesh *mesh = object->mesh;
- const Mesh::Curve &curve = mesh->get_curve(curve_index);
+ const Hair *hair = static_cast<const Hair *>(object->geometry);
+ const Hair::Curve &curve = hair->get_curve(curve_index);
curve.bounds_grow(
- segment, &mesh->curve_keys[0], &mesh->curve_radius[0], aligned_space, bounds);
+ segment, &hair->curve_keys[0], &hair->curve_radius[0], aligned_space, bounds);
}
else {
bounds = prim.bounds().transformed(&aligned_space);
diff --git a/intern/cycles/cmake/external_libs.cmake b/intern/cycles/cmake/external_libs.cmake
index 5bf681792ca..0b082b11cf7 100644
--- a/intern/cycles/cmake/external_libs.cmake
+++ b/intern/cycles/cmake/external_libs.cmake
@@ -135,7 +135,7 @@ if(CYCLES_STANDALONE_REPOSITORY)
####
# embree
if(WITH_CYCLES_EMBREE)
- find_package(embree 3.2.4 REQUIRED)
+ find_package(embree 3.8.0 REQUIRED)
endif()
####
diff --git a/intern/cycles/device/CMakeLists.txt b/intern/cycles/device/CMakeLists.txt
index 35a79356957..aa5b65a2b73 100644
--- a/intern/cycles/device/CMakeLists.txt
+++ b/intern/cycles/device/CMakeLists.txt
@@ -34,13 +34,17 @@ set(SRC
device_task.cpp
)
+set(SRC_CUDA
+ cuda/device_cuda.h
+ cuda/device_cuda_impl.cpp
+)
+
set(SRC_OPENCL
- opencl/opencl.h
+ opencl/device_opencl.h
+ opencl/device_opencl_impl.cpp
opencl/memory_manager.h
-
- opencl/opencl_split.cpp
- opencl/opencl_util.cpp
opencl/memory_manager.cpp
+ opencl/opencl_util.cpp
)
if(WITH_CYCLES_NETWORK)
@@ -98,4 +102,4 @@ endif()
include_directories(${INC})
include_directories(SYSTEM ${INC_SYS})
-cycles_add_library(cycles_device "${LIB}" ${SRC} ${SRC_OPENCL} ${SRC_HEADERS})
+cycles_add_library(cycles_device "${LIB}" ${SRC} ${SRC_CUDA} ${SRC_OPENCL} ${SRC_HEADERS})
diff --git a/intern/cycles/device/cuda/device_cuda.h b/intern/cycles/device/cuda/device_cuda.h
new file mode 100644
index 00000000000..3e397da895b
--- /dev/null
+++ b/intern/cycles/device/cuda/device_cuda.h
@@ -0,0 +1,269 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef WITH_CUDA
+
+# include "device/device.h"
+# include "device/device_denoising.h"
+# include "device/device_split_kernel.h"
+
+# include "util/util_map.h"
+
+# ifdef WITH_CUDA_DYNLOAD
+# include "cuew.h"
+# else
+# include "util/util_opengl.h"
+# include <cuda.h>
+# include <cudaGL.h>
+# endif
+
+CCL_NAMESPACE_BEGIN
+
+class CUDASplitKernel;
+
+class CUDADevice : public Device {
+
+ friend class CUDASplitKernelFunction;
+ friend class CUDASplitKernel;
+ friend class CUDAContextScope;
+
+ public:
+ DedicatedTaskPool task_pool;
+ CUdevice cuDevice;
+ CUcontext cuContext;
+ CUmodule cuModule, cuFilterModule;
+ size_t device_texture_headroom;
+ size_t device_working_headroom;
+ bool move_texture_to_host;
+ size_t map_host_used;
+ size_t map_host_limit;
+ int can_map_host;
+ int cuDevId;
+ int cuDevArchitecture;
+ bool first_error;
+ CUDASplitKernel *split_kernel;
+
+ struct CUDAMem {
+ CUDAMem() : texobject(0), array(0), use_mapped_host(false)
+ {
+ }
+
+ CUtexObject texobject;
+ CUarray array;
+
+ /* If true, a mapped host memory in shared_pointer is being used. */
+ bool use_mapped_host;
+ };
+ typedef map<device_memory *, CUDAMem> CUDAMemMap;
+ CUDAMemMap cuda_mem_map;
+
+ struct PixelMem {
+ GLuint cuPBO;
+ CUgraphicsResource cuPBOresource;
+ GLuint cuTexId;
+ int w, h;
+ };
+ map<device_ptr, PixelMem> pixel_mem_map;
+
+ /* Bindless Textures */
+ device_vector<TextureInfo> texture_info;
+ bool need_texture_info;
+
+ /* Kernels */
+ struct {
+ bool loaded;
+
+ CUfunction adaptive_stopping;
+ CUfunction adaptive_filter_x;
+ CUfunction adaptive_filter_y;
+ CUfunction adaptive_scale_samples;
+ int adaptive_num_threads_per_block;
+ } functions;
+
+ static bool have_precompiled_kernels();
+
+ virtual bool show_samples() const;
+
+ virtual BVHLayoutMask get_bvh_layout_mask() const;
+
+ void cuda_error_documentation();
+
+ bool cuda_error_(CUresult result, const string &stmt);
+
+ void cuda_error_message(const string &message);
+
+ CUDADevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background_);
+
+ virtual ~CUDADevice();
+
+ bool support_device(const DeviceRequestedFeatures & /*requested_features*/);
+
+ bool use_adaptive_compilation();
+
+ bool use_split_kernel();
+
+ virtual string compile_kernel_get_common_cflags(
+ const DeviceRequestedFeatures &requested_features, bool filter = false, bool split = false);
+
+ string compile_kernel(const DeviceRequestedFeatures &requested_features,
+ const char *name,
+ const char *base = "cuda",
+ bool force_ptx = false);
+
+ virtual bool load_kernels(const DeviceRequestedFeatures &requested_features);
+
+ void load_functions();
+
+ void reserve_local_memory(const DeviceRequestedFeatures &requested_features);
+
+ void init_host_memory();
+
+ void load_texture_info();
+
+ void move_textures_to_host(size_t size, bool for_texture);
+
+ CUDAMem *generic_alloc(device_memory &mem, size_t pitch_padding = 0);
+
+ void generic_copy_to(device_memory &mem);
+
+ void generic_free(device_memory &mem);
+
+ void mem_alloc(device_memory &mem);
+
+ void mem_copy_to(device_memory &mem);
+
+ void mem_copy_from(device_memory &mem, int y, int w, int h, int elem);
+
+ void mem_zero(device_memory &mem);
+
+ void mem_free(device_memory &mem);
+
+ device_ptr mem_alloc_sub_ptr(device_memory &mem, int offset, int /*size*/);
+
+ virtual void const_copy_to(const char *name, void *host, size_t size);
+
+ void global_alloc(device_memory &mem);
+
+ void global_free(device_memory &mem);
+
+ void tex_alloc(device_texture &mem);
+
+ void tex_free(device_texture &mem);
+
+ bool denoising_non_local_means(device_ptr image_ptr,
+ device_ptr guide_ptr,
+ device_ptr variance_ptr,
+ device_ptr out_ptr,
+ DenoisingTask *task);
+
+ bool denoising_construct_transform(DenoisingTask *task);
+
+ bool denoising_accumulate(device_ptr color_ptr,
+ device_ptr color_variance_ptr,
+ device_ptr scale_ptr,
+ int frame,
+ DenoisingTask *task);
+
+ bool denoising_solve(device_ptr output_ptr, DenoisingTask *task);
+
+ bool denoising_combine_halves(device_ptr a_ptr,
+ device_ptr b_ptr,
+ device_ptr mean_ptr,
+ device_ptr variance_ptr,
+ int r,
+ int4 rect,
+ DenoisingTask *task);
+
+ bool denoising_divide_shadow(device_ptr a_ptr,
+ device_ptr b_ptr,
+ device_ptr sample_variance_ptr,
+ device_ptr sv_variance_ptr,
+ device_ptr buffer_variance_ptr,
+ DenoisingTask *task);
+
+ bool denoising_get_feature(int mean_offset,
+ int variance_offset,
+ device_ptr mean_ptr,
+ device_ptr variance_ptr,
+ float scale,
+ DenoisingTask *task);
+
+ bool denoising_write_feature(int out_offset,
+ device_ptr from_ptr,
+ device_ptr buffer_ptr,
+ DenoisingTask *task);
+
+ bool denoising_detect_outliers(device_ptr image_ptr,
+ device_ptr variance_ptr,
+ device_ptr depth_ptr,
+ device_ptr output_ptr,
+ DenoisingTask *task);
+
+ void denoise(RenderTile &rtile, DenoisingTask &denoising);
+
+ void adaptive_sampling_filter(uint filter_sample,
+ WorkTile *wtile,
+ CUdeviceptr d_wtile,
+ CUstream stream = 0);
+ void adaptive_sampling_post(RenderTile &rtile,
+ WorkTile *wtile,
+ CUdeviceptr d_wtile,
+ CUstream stream = 0);
+
+ void path_trace(DeviceTask &task, RenderTile &rtile, device_vector<WorkTile> &work_tiles);
+
+ void film_convert(DeviceTask &task,
+ device_ptr buffer,
+ device_ptr rgba_byte,
+ device_ptr rgba_half);
+
+ void shader(DeviceTask &task);
+
+ CUdeviceptr map_pixels(device_ptr mem);
+
+ void unmap_pixels(device_ptr mem);
+
+ void pixels_alloc(device_memory &mem);
+
+ void pixels_copy_from(device_memory &mem, int y, int w, int h);
+
+ void pixels_free(device_memory &mem);
+
+ void draw_pixels(device_memory &mem,
+ int y,
+ int w,
+ int h,
+ int width,
+ int height,
+ int dx,
+ int dy,
+ int dw,
+ int dh,
+ bool transparent,
+ const DeviceDrawParams &draw_params);
+
+ void thread_run(DeviceTask *task);
+
+ virtual void task_add(DeviceTask &task);
+
+ virtual void task_wait();
+
+ virtual void task_cancel();
+};
+
+CCL_NAMESPACE_END
+
+#endif
diff --git a/intern/cycles/device/cuda/device_cuda_impl.cpp b/intern/cycles/device/cuda/device_cuda_impl.cpp
new file mode 100644
index 00000000000..0f261ef2f70
--- /dev/null
+++ b/intern/cycles/device/cuda/device_cuda_impl.cpp
@@ -0,0 +1,2620 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef WITH_CUDA
+
+# include <climits>
+# include <limits.h>
+# include <stdio.h>
+# include <stdlib.h>
+# include <string.h>
+
+# include "device/cuda/device_cuda.h"
+# include "device/device_intern.h"
+# include "device/device_split_kernel.h"
+
+# include "render/buffers.h"
+
+# include "kernel/filter/filter_defines.h"
+
+# include "util/util_debug.h"
+# include "util/util_foreach.h"
+# include "util/util_logging.h"
+# include "util/util_map.h"
+# include "util/util_md5.h"
+# include "util/util_opengl.h"
+# include "util/util_path.h"
+# include "util/util_string.h"
+# include "util/util_system.h"
+# include "util/util_time.h"
+# include "util/util_types.h"
+# include "util/util_windows.h"
+
+# include "kernel/split/kernel_split_data_types.h"
+
+CCL_NAMESPACE_BEGIN
+
+# ifndef WITH_CUDA_DYNLOAD
+
+/* Transparently implement some functions, so majority of the file does not need
+ * to worry about difference between dynamically loaded and linked CUDA at all.
+ */
+
+namespace {
+
+const char *cuewErrorString(CUresult result)
+{
+ /* We can only give error code here without major code duplication, that
+ * should be enough since dynamic loading is only being disabled by folks
+ * who knows what they're doing anyway.
+ *
+ * NOTE: Avoid call from several threads.
+ */
+ static string error;
+ error = string_printf("%d", result);
+ return error.c_str();
+}
+
+const char *cuewCompilerPath()
+{
+ return CYCLES_CUDA_NVCC_EXECUTABLE;
+}
+
+int cuewCompilerVersion()
+{
+ return (CUDA_VERSION / 100) + (CUDA_VERSION % 100 / 10);
+}
+
+} /* namespace */
+# endif /* WITH_CUDA_DYNLOAD */
+
+class CUDADevice;
+
+class CUDASplitKernel : public DeviceSplitKernel {
+ CUDADevice *device;
+
+ public:
+ explicit CUDASplitKernel(CUDADevice *device);
+
+ virtual uint64_t state_buffer_size(device_memory &kg, device_memory &data, size_t num_threads);
+
+ virtual bool enqueue_split_kernel_data_init(const KernelDimensions &dim,
+ RenderTile &rtile,
+ int num_global_elements,
+ device_memory &kernel_globals,
+ device_memory &kernel_data_,
+ device_memory &split_data,
+ device_memory &ray_state,
+ device_memory &queue_index,
+ device_memory &use_queues_flag,
+ device_memory &work_pool_wgs);
+
+ virtual SplitKernelFunction *get_split_kernel_function(const string &kernel_name,
+ const DeviceRequestedFeatures &);
+ virtual int2 split_kernel_local_size();
+ virtual int2 split_kernel_global_size(device_memory &kg, device_memory &data, DeviceTask *task);
+};
+
+/* Utility to push/pop CUDA context. */
+class CUDAContextScope {
+ public:
+ CUDAContextScope(CUDADevice *device);
+ ~CUDAContextScope();
+
+ private:
+ CUDADevice *device;
+};
+
+bool CUDADevice::have_precompiled_kernels()
+{
+ string cubins_path = path_get("lib");
+ return path_exists(cubins_path);
+}
+
+bool CUDADevice::show_samples() const
+{
+ /* The CUDADevice only processes one tile at a time, so showing samples is fine. */
+ return true;
+}
+
+BVHLayoutMask CUDADevice::get_bvh_layout_mask() const
+{
+ return BVH_LAYOUT_BVH2;
+}
+
+void CUDADevice::cuda_error_documentation()
+{
+ if (first_error) {
+ fprintf(stderr, "\nRefer to the Cycles GPU rendering documentation for possible solutions:\n");
+ fprintf(stderr,
+ "https://docs.blender.org/manual/en/latest/render/cycles/gpu_rendering.html\n\n");
+ first_error = false;
+ }
+}
+
+# define cuda_assert(stmt) \
+ { \
+ CUresult result = stmt; \
+\
+ if (result != CUDA_SUCCESS) { \
+ string message = string_printf( \
+ "CUDA error: %s in %s, line %d", cuewErrorString(result), #stmt, __LINE__); \
+ if (error_msg == "") \
+ error_msg = message; \
+ fprintf(stderr, "%s\n", message.c_str()); \
+ /*cuda_abort();*/ \
+ cuda_error_documentation(); \
+ } \
+ } \
+ (void)0
+
+bool CUDADevice::cuda_error_(CUresult result, const string &stmt)
+{
+ if (result == CUDA_SUCCESS)
+ return false;
+
+ string message = string_printf("CUDA error at %s: %s", stmt.c_str(), cuewErrorString(result));
+ if (error_msg == "")
+ error_msg = message;
+ fprintf(stderr, "%s\n", message.c_str());
+ cuda_error_documentation();
+ return true;
+}
+
+# define cuda_error(stmt) cuda_error_(stmt, # stmt)
+
+void CUDADevice::cuda_error_message(const string &message)
+{
+ if (error_msg == "")
+ error_msg = message;
+ fprintf(stderr, "%s\n", message.c_str());
+ cuda_error_documentation();
+}
+
+CUDADevice::CUDADevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background_)
+ : Device(info, stats, profiler, background_), texture_info(this, "__texture_info", MEM_GLOBAL)
+{
+ first_error = true;
+ background = background_;
+
+ cuDevId = info.num;
+ cuDevice = 0;
+ cuContext = 0;
+
+ cuModule = 0;
+ cuFilterModule = 0;
+
+ split_kernel = NULL;
+
+ need_texture_info = false;
+
+ device_texture_headroom = 0;
+ device_working_headroom = 0;
+ move_texture_to_host = false;
+ map_host_limit = 0;
+ map_host_used = 0;
+ can_map_host = 0;
+
+ functions.loaded = false;
+
+ /* Intialize CUDA. */
+ if (cuda_error(cuInit(0)))
+ return;
+
+ /* Setup device and context. */
+ if (cuda_error(cuDeviceGet(&cuDevice, cuDevId)))
+ return;
+
+ /* CU_CTX_MAP_HOST for mapping host memory when out of device memory.
+ * CU_CTX_LMEM_RESIZE_TO_MAX for reserving local memory ahead of render,
+ * so we can predict which memory to map to host. */
+ cuda_assert(
+ cuDeviceGetAttribute(&can_map_host, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, cuDevice));
+
+ unsigned int ctx_flags = CU_CTX_LMEM_RESIZE_TO_MAX;
+ if (can_map_host) {
+ ctx_flags |= CU_CTX_MAP_HOST;
+ init_host_memory();
+ }
+
+ /* Create context. */
+ CUresult result;
+
+ if (background) {
+ result = cuCtxCreate(&cuContext, ctx_flags, cuDevice);
+ }
+ else {
+ result = cuGLCtxCreate(&cuContext, ctx_flags, cuDevice);
+
+ if (result != CUDA_SUCCESS) {
+ result = cuCtxCreate(&cuContext, ctx_flags, cuDevice);
+ background = true;
+ }
+ }
+
+ if (cuda_error_(result, "cuCtxCreate"))
+ return;
+
+ int major, minor;
+ cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId);
+ cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId);
+ cuDevArchitecture = major * 100 + minor * 10;
+
+ /* Pop context set by cuCtxCreate. */
+ cuCtxPopCurrent(NULL);
+}
+
+CUDADevice::~CUDADevice()
+{
+ task_pool.stop();
+
+ delete split_kernel;
+
+ texture_info.free();
+
+ cuda_assert(cuCtxDestroy(cuContext));
+}
+
+bool CUDADevice::support_device(const DeviceRequestedFeatures & /*requested_features*/)
+{
+ int major, minor;
+ cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId);
+ cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId);
+
+ /* We only support sm_30 and above */
+ if (major < 3) {
+ cuda_error_message(
+ string_printf("CUDA device supported only with compute capability 3.0 or up, found %d.%d.",
+ major,
+ minor));
+ return false;
+ }
+
+ return true;
+}
+
+bool CUDADevice::use_adaptive_compilation()
+{
+ return DebugFlags().cuda.adaptive_compile;
+}
+
+bool CUDADevice::use_split_kernel()
+{
+ return DebugFlags().cuda.split_kernel;
+}
+
+/* Common NVCC flags which stays the same regardless of shading model,
+ * kernel sources md5 and only depends on compiler or compilation settings.
+ */
+string CUDADevice::compile_kernel_get_common_cflags(
+ const DeviceRequestedFeatures &requested_features, bool filter, bool split)
+{
+ const int machine = system_cpu_bits();
+ const string source_path = path_get("source");
+ const string include_path = source_path;
+ string cflags = string_printf(
+ "-m%d "
+ "--ptxas-options=\"-v\" "
+ "--use_fast_math "
+ "-DNVCC "
+ "-I\"%s\"",
+ machine,
+ include_path.c_str());
+ if (!filter && use_adaptive_compilation()) {
+ cflags += " " + requested_features.get_build_options();
+ }
+ const char *extra_cflags = getenv("CYCLES_CUDA_EXTRA_CFLAGS");
+ if (extra_cflags) {
+ cflags += string(" ") + string(extra_cflags);
+ }
+# ifdef WITH_CYCLES_DEBUG
+ cflags += " -D__KERNEL_DEBUG__";
+# endif
+
+ if (split) {
+ cflags += " -D__SPLIT__";
+ }
+
+ return cflags;
+}
+
+string CUDADevice::compile_kernel(const DeviceRequestedFeatures &requested_features,
+ const char *name,
+ const char *base,
+ bool force_ptx)
+{
+ /* Compute kernel name. */
+ int major, minor;
+ cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId);
+ cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId);
+
+ /* Attempt to use kernel provided with Blender. */
+ if (!use_adaptive_compilation()) {
+ if (!force_ptx) {
+ const string cubin = path_get(string_printf("lib/%s_sm_%d%d.cubin", name, major, minor));
+ VLOG(1) << "Testing for pre-compiled kernel " << cubin << ".";
+ if (path_exists(cubin)) {
+ VLOG(1) << "Using precompiled kernel.";
+ return cubin;
+ }
+ }
+
+ const string ptx = path_get(string_printf("lib/%s_compute_%d%d.ptx", name, major, minor));
+ VLOG(1) << "Testing for pre-compiled kernel " << ptx << ".";
+ if (path_exists(ptx)) {
+ VLOG(1) << "Using precompiled kernel.";
+ return ptx;
+ }
+ }
+
+ /* Try to use locally compiled kernel. */
+ string source_path = path_get("source");
+ const string source_md5 = path_files_md5_hash(source_path);
+
+ /* We include cflags into md5 so changing cuda toolkit or changing other
+ * compiler command line arguments makes sure cubin gets re-built.
+ */
+ string common_cflags = compile_kernel_get_common_cflags(
+ requested_features, strstr(name, "filter") != NULL, strstr(name, "split") != NULL);
+ const string kernel_md5 = util_md5_string(source_md5 + common_cflags);
+
+ const char *const kernel_ext = force_ptx ? "ptx" : "cubin";
+ const char *const kernel_arch = force_ptx ? "compute" : "sm";
+ const string cubin_file = string_printf(
+ "cycles_%s_%s_%d%d_%s.%s", name, kernel_arch, major, minor, kernel_md5.c_str(), kernel_ext);
+ const string cubin = path_cache_get(path_join("kernels", cubin_file));
+ VLOG(1) << "Testing for locally compiled kernel " << cubin << ".";
+ if (path_exists(cubin)) {
+ VLOG(1) << "Using locally compiled kernel.";
+ return cubin;
+ }
+
+# ifdef _WIN32
+ if (!use_adaptive_compilation() && have_precompiled_kernels()) {
+ if (major < 3) {
+ cuda_error_message(
+ string_printf("CUDA device requires compute capability 3.0 or up, "
+ "found %d.%d. Your GPU is not supported.",
+ major,
+ minor));
+ }
+ else {
+ cuda_error_message(
+ string_printf("CUDA binary kernel for this graphics card compute "
+ "capability (%d.%d) not found.",
+ major,
+ minor));
+ }
+ return string();
+ }
+# endif
+
+ /* Compile. */
+ const char *const nvcc = cuewCompilerPath();
+ if (nvcc == NULL) {
+ cuda_error_message(
+ "CUDA nvcc compiler not found. "
+ "Install CUDA toolkit in default location.");
+ return string();
+ }
+
+ const int nvcc_cuda_version = cuewCompilerVersion();
+ VLOG(1) << "Found nvcc " << nvcc << ", CUDA version " << nvcc_cuda_version << ".";
+ if (nvcc_cuda_version < 80) {
+ printf(
+ "Unsupported CUDA version %d.%d detected, "
+ "you need CUDA 8.0 or newer.\n",
+ nvcc_cuda_version / 10,
+ nvcc_cuda_version % 10);
+ return string();
+ }
+ else if (nvcc_cuda_version != 101) {
+ printf(
+ "CUDA version %d.%d detected, build may succeed but only "
+ "CUDA 10.1 is officially supported.\n",
+ nvcc_cuda_version / 10,
+ nvcc_cuda_version % 10);
+ }
+
+ double starttime = time_dt();
+
+ path_create_directories(cubin);
+
+ source_path = path_join(path_join(source_path, "kernel"),
+ path_join("kernels", path_join(base, string_printf("%s.cu", name))));
+
+ string command = string_printf(
+ "\"%s\" "
+ "-arch=%s_%d%d "
+ "--%s \"%s\" "
+ "-o \"%s\" "
+ "%s",
+ nvcc,
+ kernel_arch,
+ major,
+ minor,
+ kernel_ext,
+ source_path.c_str(),
+ cubin.c_str(),
+ common_cflags.c_str());
+
+ printf("Compiling CUDA kernel ...\n%s\n", command.c_str());
+
+# ifdef _WIN32
+ command = "call " + command;
+# endif
+ if (system(command.c_str()) != 0) {
+ cuda_error_message(
+ "Failed to execute compilation command, "
+ "see console for details.");
+ return string();
+ }
+
+ /* Verify if compilation succeeded */
+ if (!path_exists(cubin)) {
+ cuda_error_message(
+ "CUDA kernel compilation failed, "
+ "see console for details.");
+ return string();
+ }
+
+ printf("Kernel compilation finished in %.2lfs.\n", time_dt() - starttime);
+
+ return cubin;
+}
+
+bool CUDADevice::load_kernels(const DeviceRequestedFeatures &requested_features)
+{
+ /* TODO(sergey): Support kernels re-load for CUDA devices.
+ *
+ * Currently re-loading kernel will invalidate memory pointers,
+ * causing problems in cuCtxSynchronize.
+ */
+ if (cuFilterModule && cuModule) {
+ VLOG(1) << "Skipping kernel reload, not currently supported.";
+ return true;
+ }
+
+ /* check if cuda init succeeded */
+ if (cuContext == 0)
+ return false;
+
+ /* check if GPU is supported */
+ if (!support_device(requested_features))
+ return false;
+
+ /* get kernel */
+ const char *kernel_name = use_split_kernel() ? "kernel_split" : "kernel";
+ string cubin = compile_kernel(requested_features, kernel_name);
+ if (cubin.empty())
+ return false;
+
+ const char *filter_name = "filter";
+ string filter_cubin = compile_kernel(requested_features, filter_name);
+ if (filter_cubin.empty())
+ return false;
+
+ /* open module */
+ CUDAContextScope scope(this);
+
+ string cubin_data;
+ CUresult result;
+
+ if (path_read_text(cubin, cubin_data))
+ result = cuModuleLoadData(&cuModule, cubin_data.c_str());
+ else
+ result = CUDA_ERROR_FILE_NOT_FOUND;
+
+ if (cuda_error_(result, "cuModuleLoad"))
+ cuda_error_message(string_printf("Failed loading CUDA kernel %s.", cubin.c_str()));
+
+ if (path_read_text(filter_cubin, cubin_data))
+ result = cuModuleLoadData(&cuFilterModule, cubin_data.c_str());
+ else
+ result = CUDA_ERROR_FILE_NOT_FOUND;
+
+ if (cuda_error_(result, "cuModuleLoad"))
+ cuda_error_message(string_printf("Failed loading CUDA kernel %s.", filter_cubin.c_str()));
+
+ if (result == CUDA_SUCCESS) {
+ reserve_local_memory(requested_features);
+ }
+
+ load_functions();
+
+ return (result == CUDA_SUCCESS);
+}
+
+void CUDADevice::load_functions()
+{
+ /* TODO: load all functions here. */
+ if (functions.loaded) {
+ return;
+ }
+ functions.loaded = true;
+
+ cuda_assert(cuModuleGetFunction(
+ &functions.adaptive_stopping, cuModule, "kernel_cuda_adaptive_stopping"));
+ cuda_assert(cuModuleGetFunction(
+ &functions.adaptive_filter_x, cuModule, "kernel_cuda_adaptive_filter_x"));
+ cuda_assert(cuModuleGetFunction(
+ &functions.adaptive_filter_y, cuModule, "kernel_cuda_adaptive_filter_y"));
+ cuda_assert(cuModuleGetFunction(
+ &functions.adaptive_scale_samples, cuModule, "kernel_cuda_adaptive_scale_samples"));
+
+ cuda_assert(cuFuncSetCacheConfig(functions.adaptive_stopping, CU_FUNC_CACHE_PREFER_L1));
+ cuda_assert(cuFuncSetCacheConfig(functions.adaptive_filter_x, CU_FUNC_CACHE_PREFER_L1));
+ cuda_assert(cuFuncSetCacheConfig(functions.adaptive_filter_y, CU_FUNC_CACHE_PREFER_L1));
+ cuda_assert(cuFuncSetCacheConfig(functions.adaptive_scale_samples, CU_FUNC_CACHE_PREFER_L1));
+
+ int unused_min_blocks;
+ cuda_assert(cuOccupancyMaxPotentialBlockSize(&unused_min_blocks,
+ &functions.adaptive_num_threads_per_block,
+ functions.adaptive_scale_samples,
+ NULL,
+ 0,
+ 0));
+}
+
+void CUDADevice::reserve_local_memory(const DeviceRequestedFeatures &requested_features)
+{
+ if (use_split_kernel()) {
+ /* Split kernel mostly uses global memory and adaptive compilation,
+ * difficult to predict how much is needed currently. */
+ return;
+ }
+
+ /* Together with CU_CTX_LMEM_RESIZE_TO_MAX, this reserves local memory
+ * needed for kernel launches, so that we can reliably figure out when
+ * to allocate scene data in mapped host memory. */
+ CUDAContextScope scope(this);
+
+ size_t total = 0, free_before = 0, free_after = 0;
+ cuMemGetInfo(&free_before, &total);
+
+ /* Get kernel function. */
+ CUfunction cuPathTrace;
+
+ if (requested_features.use_integrator_branched) {
+ cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_branched_path_trace"));
+ }
+ else {
+ cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace"));
+ }
+
+ cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1));
+
+ int min_blocks, num_threads_per_block;
+ cuda_assert(cuOccupancyMaxPotentialBlockSize(
+ &min_blocks, &num_threads_per_block, cuPathTrace, NULL, 0, 0));
+
+ /* Launch kernel, using just 1 block appears sufficient to reserve
+ * memory for all multiprocessors. It would be good to do this in
+ * parallel for the multi GPU case still to make it faster. */
+ CUdeviceptr d_work_tiles = 0;
+ uint total_work_size = 0;
+
+ void *args[] = {&d_work_tiles, &total_work_size};
+
+ cuda_assert(cuLaunchKernel(cuPathTrace, 1, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0));
+
+ cuda_assert(cuCtxSynchronize());
+
+ cuMemGetInfo(&free_after, &total);
+ VLOG(1) << "Local memory reserved " << string_human_readable_number(free_before - free_after)
+ << " bytes. (" << string_human_readable_size(free_before - free_after) << ")";
+
+# if 0
+ /* For testing mapped host memory, fill up device memory. */
+ const size_t keep_mb = 1024;
+
+ while (free_after > keep_mb * 1024 * 1024LL) {
+ CUdeviceptr tmp;
+ cuda_assert(cuMemAlloc(&tmp, 10 * 1024 * 1024LL));
+ cuMemGetInfo(&free_after, &total);
+ }
+# endif
+}
+
+void CUDADevice::init_host_memory()
+{
+ /* Limit amount of host mapped memory, because allocating too much can
+ * cause system instability. Leave at least half or 4 GB of system
+ * memory free, whichever is smaller. */
+ size_t default_limit = 4 * 1024 * 1024 * 1024LL;
+ size_t system_ram = system_physical_ram();
+
+ if (system_ram > 0) {
+ if (system_ram / 2 > default_limit) {
+ map_host_limit = system_ram - default_limit;
+ }
+ else {
+ map_host_limit = system_ram / 2;
+ }
+ }
+ else {
+ VLOG(1) << "Mapped host memory disabled, failed to get system RAM";
+ map_host_limit = 0;
+ }
+
+ /* Amount of device memory to keep is free after texture memory
+ * and working memory allocations respectively. We set the working
+ * memory limit headroom lower so that some space is left after all
+ * texture memory allocations. */
+ device_working_headroom = 32 * 1024 * 1024LL; // 32MB
+ device_texture_headroom = 128 * 1024 * 1024LL; // 128MB
+
+ VLOG(1) << "Mapped host memory limit set to " << string_human_readable_number(map_host_limit)
+ << " bytes. (" << string_human_readable_size(map_host_limit) << ")";
+}
+
+void CUDADevice::load_texture_info()
+{
+ if (need_texture_info) {
+ texture_info.copy_to_device();
+ need_texture_info = false;
+ }
+}
+
+void CUDADevice::move_textures_to_host(size_t size, bool for_texture)
+{
+ /* Signal to reallocate textures in host memory only. */
+ move_texture_to_host = true;
+
+ while (size > 0) {
+ /* Find suitable memory allocation to move. */
+ device_memory *max_mem = NULL;
+ size_t max_size = 0;
+ bool max_is_image = false;
+
+ foreach (CUDAMemMap::value_type &pair, cuda_mem_map) {
+ device_memory &mem = *pair.first;
+ CUDAMem *cmem = &pair.second;
+
+ bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) &&
+ (&mem != &texture_info);
+ bool is_image = is_texture && (mem.data_height > 1);
+
+ /* Can't move this type of memory. */
+ if (!is_texture || cmem->array) {
+ continue;
+ }
+
+ /* Already in host memory. */
+ if (cmem->use_mapped_host) {
+ continue;
+ }
+
+ /* For other textures, only move image textures. */
+ if (for_texture && !is_image) {
+ continue;
+ }
+
+ /* Try to move largest allocation, prefer moving images. */
+ if (is_image > max_is_image || (is_image == max_is_image && mem.device_size > max_size)) {
+ max_is_image = is_image;
+ max_size = mem.device_size;
+ max_mem = &mem;
+ }
+ }
+
+ /* Move to host memory. This part is mutex protected since
+ * multiple CUDA devices could be moving the memory. The
+ * first one will do it, and the rest will adopt the pointer. */
+ if (max_mem) {
+ VLOG(1) << "Move memory from device to host: " << max_mem->name;
+
+ static thread_mutex move_mutex;
+ thread_scoped_lock lock(move_mutex);
+
+ /* Preserve the original device pointer, in case of multi device
+ * we can't change it because the pointer mapping would break. */
+ device_ptr prev_pointer = max_mem->device_pointer;
+ size_t prev_size = max_mem->device_size;
+
+ mem_copy_to(*max_mem);
+ size = (max_size >= size) ? 0 : size - max_size;
+
+ max_mem->device_pointer = prev_pointer;
+ max_mem->device_size = prev_size;
+ }
+ else {
+ break;
+ }
+ }
+
+ /* Update texture info array with new pointers. */
+ load_texture_info();
+
+ move_texture_to_host = false;
+}
+
+CUDADevice::CUDAMem *CUDADevice::generic_alloc(device_memory &mem, size_t pitch_padding)
+{
+ CUDAContextScope scope(this);
+
+ CUdeviceptr device_pointer = 0;
+ size_t size = mem.memory_size() + pitch_padding;
+
+ CUresult mem_alloc_result = CUDA_ERROR_OUT_OF_MEMORY;
+ const char *status = "";
+
+ /* First try allocating in device memory, respecting headroom. We make
+ * an exception for texture info. It is small and frequently accessed,
+ * so treat it as working memory.
+ *
+ * If there is not enough room for working memory, we will try to move
+ * textures to host memory, assuming the performance impact would have
+ * been worse for working memory. */
+ bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) && (&mem != &texture_info);
+ bool is_image = is_texture && (mem.data_height > 1);
+
+ size_t headroom = (is_texture) ? device_texture_headroom : device_working_headroom;
+
+ size_t total = 0, free = 0;
+ cuMemGetInfo(&free, &total);
+
+ /* Move textures to host memory if needed. */
+ if (!move_texture_to_host && !is_image && (size + headroom) >= free && can_map_host) {
+ move_textures_to_host(size + headroom - free, is_texture);
+ cuMemGetInfo(&free, &total);
+ }
+
+ /* Allocate in device memory. */
+ if (!move_texture_to_host && (size + headroom) < free) {
+ mem_alloc_result = cuMemAlloc(&device_pointer, size);
+ if (mem_alloc_result == CUDA_SUCCESS) {
+ status = " in device memory";
+ }
+ }
+
+ /* Fall back to mapped host memory if needed and possible. */
+
+ void *shared_pointer = 0;
+
+ if (mem_alloc_result != CUDA_SUCCESS && can_map_host) {
+ if (mem.shared_pointer) {
+ /* Another device already allocated host memory. */
+ mem_alloc_result = CUDA_SUCCESS;
+ shared_pointer = mem.shared_pointer;
+ }
+ else if (map_host_used + size < map_host_limit) {
+ /* Allocate host memory ourselves. */
+ mem_alloc_result = cuMemHostAlloc(
+ &shared_pointer, size, CU_MEMHOSTALLOC_DEVICEMAP | CU_MEMHOSTALLOC_WRITECOMBINED);
+
+ assert((mem_alloc_result == CUDA_SUCCESS && shared_pointer != 0) ||
+ (mem_alloc_result != CUDA_SUCCESS && shared_pointer == 0));
+ }
+
+ if (mem_alloc_result == CUDA_SUCCESS) {
+ cuda_assert(cuMemHostGetDevicePointer_v2(&device_pointer, shared_pointer, 0));
+ map_host_used += size;
+ status = " in host memory";
+ }
+ else {
+ status = " failed, out of host memory";
+ }
+ }
+
+ if (mem_alloc_result != CUDA_SUCCESS) {
+ status = " failed, out of device and host memory";
+ cuda_assert(mem_alloc_result);
+ }
+
+ if (mem.name) {
+ VLOG(1) << "Buffer allocate: " << mem.name << ", "
+ << string_human_readable_number(mem.memory_size()) << " bytes. ("
+ << string_human_readable_size(mem.memory_size()) << ")" << status;
+ }
+
+ mem.device_pointer = (device_ptr)device_pointer;
+ mem.device_size = size;
+ stats.mem_alloc(size);
+
+ if (!mem.device_pointer) {
+ return NULL;
+ }
+
+ /* Insert into map of allocations. */
+ CUDAMem *cmem = &cuda_mem_map[&mem];
+ if (shared_pointer != 0) {
+ /* Replace host pointer with our host allocation. Only works if
+ * CUDA memory layout is the same and has no pitch padding. Also
+ * does not work if we move textures to host during a render,
+ * since other devices might be using the memory. */
+
+ if (!move_texture_to_host && pitch_padding == 0 && mem.host_pointer &&
+ mem.host_pointer != shared_pointer) {
+ memcpy(shared_pointer, mem.host_pointer, size);
+
+ /* A Call to device_memory::host_free() should be preceded by
+ * a call to device_memory::device_free() for host memory
+ * allocated by a device to be handled properly. Two exceptions
+ * are here and a call in OptiXDevice::generic_alloc(), where
+ * the current host memory can be assumed to be allocated by
+ * device_memory::host_alloc(), not by a device */
+
+ mem.host_free();
+ mem.host_pointer = shared_pointer;
+ }
+ mem.shared_pointer = shared_pointer;
+ mem.shared_counter++;
+ cmem->use_mapped_host = true;
+ }
+ else {
+ cmem->use_mapped_host = false;
+ }
+
+ return cmem;
+}
+
+void CUDADevice::generic_copy_to(device_memory &mem)
+{
+ if (!mem.host_pointer || !mem.device_pointer) {
+ return;
+ }
+
+ /* If use_mapped_host of mem is false, the current device only uses device memory allocated by
+ * cuMemAlloc regardless of mem.host_pointer and mem.shared_pointer, and should copy data from
+ * mem.host_pointer. */
+ if (!cuda_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
+ const CUDAContextScope scope(this);
+ cuda_assert(
+ cuMemcpyHtoD((CUdeviceptr)mem.device_pointer, mem.host_pointer, mem.memory_size()));
+ }
+}
+
+void CUDADevice::generic_free(device_memory &mem)
+{
+ if (mem.device_pointer) {
+ CUDAContextScope scope(this);
+ const CUDAMem &cmem = cuda_mem_map[&mem];
+
+ /* If cmem.use_mapped_host is true, reference counting is used
+ * to safely free a mapped host memory. */
+
+ if (cmem.use_mapped_host) {
+ assert(mem.shared_pointer);
+ if (mem.shared_pointer) {
+ assert(mem.shared_counter > 0);
+ if (--mem.shared_counter == 0) {
+ if (mem.host_pointer == mem.shared_pointer) {
+ mem.host_pointer = 0;
+ }
+ cuMemFreeHost(mem.shared_pointer);
+ mem.shared_pointer = 0;
+ }
+ }
+ map_host_used -= mem.device_size;
+ }
+ else {
+ /* Free device memory. */
+ cuMemFree(mem.device_pointer);
+ }
+
+ stats.mem_free(mem.device_size);
+ mem.device_pointer = 0;
+ mem.device_size = 0;
+
+ cuda_mem_map.erase(cuda_mem_map.find(&mem));
+ }
+}
+
+void CUDADevice::mem_alloc(device_memory &mem)
+{
+ if (mem.type == MEM_PIXELS && !background) {
+ pixels_alloc(mem);
+ }
+ else if (mem.type == MEM_TEXTURE) {
+ assert(!"mem_alloc not supported for textures.");
+ }
+ else if (mem.type == MEM_GLOBAL) {
+ assert(!"mem_alloc not supported for global memory.");
+ }
+ else {
+ generic_alloc(mem);
+ }
+}
+
+void CUDADevice::mem_copy_to(device_memory &mem)
+{
+ if (mem.type == MEM_PIXELS) {
+ assert(!"mem_copy_to not supported for pixels.");
+ }
+ else if (mem.type == MEM_GLOBAL) {
+ global_free(mem);
+ global_alloc(mem);
+ }
+ else if (mem.type == MEM_TEXTURE) {
+ tex_free((device_texture &)mem);
+ tex_alloc((device_texture &)mem);
+ }
+ else {
+ if (!mem.device_pointer) {
+ generic_alloc(mem);
+ }
+
+ generic_copy_to(mem);
+ }
+}
+
+void CUDADevice::mem_copy_from(device_memory &mem, int y, int w, int h, int elem)
+{
+ if (mem.type == MEM_PIXELS && !background) {
+ pixels_copy_from(mem, y, w, h);
+ }
+ else if (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) {
+ assert(!"mem_copy_from not supported for textures.");
+ }
+ else if (mem.host_pointer) {
+ const size_t size = elem * w * h;
+ const size_t offset = elem * y * w;
+
+ if (mem.device_pointer) {
+ const CUDAContextScope scope(this);
+ cuda_assert(cuMemcpyDtoH(
+ (char *)mem.host_pointer + offset, (CUdeviceptr)mem.device_pointer + offset, size));
+ }
+ else {
+ memset((char *)mem.host_pointer + offset, 0, size);
+ }
+ }
+}
+
+void CUDADevice::mem_zero(device_memory &mem)
+{
+ if (!mem.device_pointer) {
+ mem_alloc(mem);
+ }
+ if (!mem.device_pointer) {
+ return;
+ }
+
+ /* If use_mapped_host of mem is false, mem.device_pointer currently refers to device memory
+ * regardless of mem.host_pointer and mem.shared_pointer. */
+ if (!cuda_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
+ const CUDAContextScope scope(this);
+ cuda_assert(cuMemsetD8((CUdeviceptr)mem.device_pointer, 0, mem.memory_size()));
+ }
+ else if (mem.host_pointer) {
+ memset(mem.host_pointer, 0, mem.memory_size());
+ }
+}
+
+void CUDADevice::mem_free(device_memory &mem)
+{
+ if (mem.type == MEM_PIXELS && !background) {
+ pixels_free(mem);
+ }
+ else if (mem.type == MEM_GLOBAL) {
+ global_free(mem);
+ }
+ else if (mem.type == MEM_TEXTURE) {
+ tex_free((device_texture &)mem);
+ }
+ else {
+ generic_free(mem);
+ }
+}
+
+device_ptr CUDADevice::mem_alloc_sub_ptr(device_memory &mem, int offset, int /*size*/)
+{
+ return (device_ptr)(((char *)mem.device_pointer) + mem.memory_elements_size(offset));
+}
+
+void CUDADevice::const_copy_to(const char *name, void *host, size_t size)
+{
+ CUDAContextScope scope(this);
+ CUdeviceptr mem;
+ size_t bytes;
+
+ cuda_assert(cuModuleGetGlobal(&mem, &bytes, cuModule, name));
+ // assert(bytes == size);
+ cuda_assert(cuMemcpyHtoD(mem, host, size));
+}
+
+void CUDADevice::global_alloc(device_memory &mem)
+{
+ CUDAContextScope scope(this);
+
+ generic_alloc(mem);
+ generic_copy_to(mem);
+
+ const_copy_to(mem.name, &mem.device_pointer, sizeof(mem.device_pointer));
+}
+
+void CUDADevice::global_free(device_memory &mem)
+{
+ if (mem.device_pointer) {
+ CUDAContextScope scope(this);
+ generic_free(mem);
+ }
+}
+
+void CUDADevice::tex_alloc(device_texture &mem)
+{
+ CUDAContextScope scope(this);
+
+ /* General variables for both architectures */
+ string bind_name = mem.name;
+ size_t dsize = datatype_size(mem.data_type);
+ size_t size = mem.memory_size();
+
+ CUaddress_mode address_mode = CU_TR_ADDRESS_MODE_WRAP;
+ switch (mem.info.extension) {
+ case EXTENSION_REPEAT:
+ address_mode = CU_TR_ADDRESS_MODE_WRAP;
+ break;
+ case EXTENSION_EXTEND:
+ address_mode = CU_TR_ADDRESS_MODE_CLAMP;
+ break;
+ case EXTENSION_CLIP:
+ address_mode = CU_TR_ADDRESS_MODE_BORDER;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ CUfilter_mode filter_mode;
+ if (mem.info.interpolation == INTERPOLATION_CLOSEST) {
+ filter_mode = CU_TR_FILTER_MODE_POINT;
+ }
+ else {
+ filter_mode = CU_TR_FILTER_MODE_LINEAR;
+ }
+
+ /* Image Texture Storage */
+ CUarray_format_enum format;
+ switch (mem.data_type) {
+ case TYPE_UCHAR:
+ format = CU_AD_FORMAT_UNSIGNED_INT8;
+ break;
+ case TYPE_UINT16:
+ format = CU_AD_FORMAT_UNSIGNED_INT16;
+ break;
+ case TYPE_UINT:
+ format = CU_AD_FORMAT_UNSIGNED_INT32;
+ break;
+ case TYPE_INT:
+ format = CU_AD_FORMAT_SIGNED_INT32;
+ break;
+ case TYPE_FLOAT:
+ format = CU_AD_FORMAT_FLOAT;
+ break;
+ case TYPE_HALF:
+ format = CU_AD_FORMAT_HALF;
+ break;
+ default:
+ assert(0);
+ return;
+ }
+
+ CUDAMem *cmem = NULL;
+ CUarray array_3d = NULL;
+ size_t src_pitch = mem.data_width * dsize * mem.data_elements;
+ size_t dst_pitch = src_pitch;
+
+ if (mem.data_depth > 1) {
+ /* 3D texture using array, there is no API for linear memory. */
+ CUDA_ARRAY3D_DESCRIPTOR desc;
+
+ desc.Width = mem.data_width;
+ desc.Height = mem.data_height;
+ desc.Depth = mem.data_depth;
+ desc.Format = format;
+ desc.NumChannels = mem.data_elements;
+ desc.Flags = 0;
+
+ VLOG(1) << "Array 3D allocate: " << mem.name << ", "
+ << string_human_readable_number(mem.memory_size()) << " bytes. ("
+ << string_human_readable_size(mem.memory_size()) << ")";
+
+ cuda_assert(cuArray3DCreate(&array_3d, &desc));
+
+ if (!array_3d) {
+ return;
+ }
+
+ CUDA_MEMCPY3D param;
+ memset(&param, 0, sizeof(param));
+ param.dstMemoryType = CU_MEMORYTYPE_ARRAY;
+ param.dstArray = array_3d;
+ param.srcMemoryType = CU_MEMORYTYPE_HOST;
+ param.srcHost = mem.host_pointer;
+ param.srcPitch = src_pitch;
+ param.WidthInBytes = param.srcPitch;
+ param.Height = mem.data_height;
+ param.Depth = mem.data_depth;
+
+ cuda_assert(cuMemcpy3D(&param));
+
+ mem.device_pointer = (device_ptr)array_3d;
+ mem.device_size = size;
+ stats.mem_alloc(size);
+
+ cmem = &cuda_mem_map[&mem];
+ cmem->texobject = 0;
+ cmem->array = array_3d;
+ }
+ else if (mem.data_height > 0) {
+ /* 2D texture, using pitch aligned linear memory. */
+ int alignment = 0;
+ cuda_assert(
+ cuDeviceGetAttribute(&alignment, CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT, cuDevice));
+ dst_pitch = align_up(src_pitch, alignment);
+ size_t dst_size = dst_pitch * mem.data_height;
+
+ cmem = generic_alloc(mem, dst_size - mem.memory_size());
+ if (!cmem) {
+ return;
+ }
+
+ CUDA_MEMCPY2D param;
+ memset(&param, 0, sizeof(param));
+ param.dstMemoryType = CU_MEMORYTYPE_DEVICE;
+ param.dstDevice = mem.device_pointer;
+ param.dstPitch = dst_pitch;
+ param.srcMemoryType = CU_MEMORYTYPE_HOST;
+ param.srcHost = mem.host_pointer;
+ param.srcPitch = src_pitch;
+ param.WidthInBytes = param.srcPitch;
+ param.Height = mem.data_height;
+
+ cuda_assert(cuMemcpy2DUnaligned(&param));
+ }
+ else {
+ /* 1D texture, using linear memory. */
+ cmem = generic_alloc(mem);
+ if (!cmem) {
+ return;
+ }
+
+ cuda_assert(cuMemcpyHtoD(mem.device_pointer, mem.host_pointer, size));
+ }
+
+ /* Kepler+, bindless textures. */
+ CUDA_RESOURCE_DESC resDesc;
+ memset(&resDesc, 0, sizeof(resDesc));
+
+ if (array_3d) {
+ resDesc.resType = CU_RESOURCE_TYPE_ARRAY;
+ resDesc.res.array.hArray = array_3d;
+ resDesc.flags = 0;
+ }
+ else if (mem.data_height > 0) {
+ resDesc.resType = CU_RESOURCE_TYPE_PITCH2D;
+ resDesc.res.pitch2D.devPtr = mem.device_pointer;
+ resDesc.res.pitch2D.format = format;
+ resDesc.res.pitch2D.numChannels = mem.data_elements;
+ resDesc.res.pitch2D.height = mem.data_height;
+ resDesc.res.pitch2D.width = mem.data_width;
+ resDesc.res.pitch2D.pitchInBytes = dst_pitch;
+ }
+ else {
+ resDesc.resType = CU_RESOURCE_TYPE_LINEAR;
+ resDesc.res.linear.devPtr = mem.device_pointer;
+ resDesc.res.linear.format = format;
+ resDesc.res.linear.numChannels = mem.data_elements;
+ resDesc.res.linear.sizeInBytes = mem.device_size;
+ }
+
+ CUDA_TEXTURE_DESC texDesc;
+ memset(&texDesc, 0, sizeof(texDesc));
+ texDesc.addressMode[0] = address_mode;
+ texDesc.addressMode[1] = address_mode;
+ texDesc.addressMode[2] = address_mode;
+ texDesc.filterMode = filter_mode;
+ texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES;
+
+ cuda_assert(cuTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL));
+
+ /* Resize once */
+ const uint slot = mem.slot;
+ if (slot >= texture_info.size()) {
+ /* Allocate some slots in advance, to reduce amount
+ * of re-allocations. */
+ texture_info.resize(slot + 128);
+ }
+
+ /* Set Mapping and tag that we need to (re-)upload to device */
+ texture_info[slot] = mem.info;
+ texture_info[slot].data = (uint64_t)cmem->texobject;
+ need_texture_info = true;
+}
+
+void CUDADevice::tex_free(device_texture &mem)
+{
+ if (mem.device_pointer) {
+ CUDAContextScope scope(this);
+ const CUDAMem &cmem = cuda_mem_map[&mem];
+
+ if (cmem.texobject) {
+ /* Free bindless texture. */
+ cuTexObjectDestroy(cmem.texobject);
+ }
+
+ if (cmem.array) {
+ /* Free array. */
+ cuArrayDestroy(cmem.array);
+ stats.mem_free(mem.device_size);
+ mem.device_pointer = 0;
+ mem.device_size = 0;
+
+ cuda_mem_map.erase(cuda_mem_map.find(&mem));
+ }
+ else {
+ generic_free(mem);
+ }
+ }
+}
+
+# define CUDA_GET_BLOCKSIZE(func, w, h) \
+ int threads_per_block; \
+ cuda_assert( \
+ cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func)); \
+ int threads = (int)sqrt((float)threads_per_block); \
+ int xblocks = ((w) + threads - 1) / threads; \
+ int yblocks = ((h) + threads - 1) / threads;
+
+# define CUDA_LAUNCH_KERNEL(func, args) \
+ cuda_assert(cuLaunchKernel(func, xblocks, yblocks, 1, threads, threads, 1, 0, 0, args, 0));
+
+/* Similar as above, but for 1-dimensional blocks. */
+# define CUDA_GET_BLOCKSIZE_1D(func, w, h) \
+ int threads_per_block; \
+ cuda_assert( \
+ cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func)); \
+ int xblocks = ((w) + threads_per_block - 1) / threads_per_block; \
+ int yblocks = h;
+
+# define CUDA_LAUNCH_KERNEL_1D(func, args) \
+ cuda_assert(cuLaunchKernel(func, xblocks, yblocks, 1, threads_per_block, 1, 1, 0, 0, args, 0));
+
+bool CUDADevice::denoising_non_local_means(device_ptr image_ptr,
+ device_ptr guide_ptr,
+ device_ptr variance_ptr,
+ device_ptr out_ptr,
+ DenoisingTask *task)
+{
+ if (have_error())
+ return false;
+
+ CUDAContextScope scope(this);
+
+ int stride = task->buffer.stride;
+ int w = task->buffer.width;
+ int h = task->buffer.h;
+ int r = task->nlm_state.r;
+ int f = task->nlm_state.f;
+ float a = task->nlm_state.a;
+ float k_2 = task->nlm_state.k_2;
+
+ int pass_stride = task->buffer.pass_stride;
+ int num_shifts = (2 * r + 1) * (2 * r + 1);
+ int channel_offset = task->nlm_state.is_color ? task->buffer.pass_stride : 0;
+ int frame_offset = 0;
+
+ if (have_error())
+ return false;
+
+ CUdeviceptr difference = (CUdeviceptr)task->buffer.temporary_mem.device_pointer;
+ CUdeviceptr blurDifference = difference + sizeof(float) * pass_stride * num_shifts;
+ CUdeviceptr weightAccum = difference + 2 * sizeof(float) * pass_stride * num_shifts;
+ CUdeviceptr scale_ptr = 0;
+
+ cuda_assert(cuMemsetD8(weightAccum, 0, sizeof(float) * pass_stride));
+ cuda_assert(cuMemsetD8(out_ptr, 0, sizeof(float) * pass_stride));
+
+ {
+ CUfunction cuNLMCalcDifference, cuNLMBlur, cuNLMCalcWeight, cuNLMUpdateOutput;
+ cuda_assert(cuModuleGetFunction(
+ &cuNLMCalcDifference, cuFilterModule, "kernel_cuda_filter_nlm_calc_difference"));
+ cuda_assert(cuModuleGetFunction(&cuNLMBlur, cuFilterModule, "kernel_cuda_filter_nlm_blur"));
+ cuda_assert(cuModuleGetFunction(
+ &cuNLMCalcWeight, cuFilterModule, "kernel_cuda_filter_nlm_calc_weight"));
+ cuda_assert(cuModuleGetFunction(
+ &cuNLMUpdateOutput, cuFilterModule, "kernel_cuda_filter_nlm_update_output"));
+
+ cuda_assert(cuFuncSetCacheConfig(cuNLMCalcDifference, CU_FUNC_CACHE_PREFER_L1));
+ cuda_assert(cuFuncSetCacheConfig(cuNLMBlur, CU_FUNC_CACHE_PREFER_L1));
+ cuda_assert(cuFuncSetCacheConfig(cuNLMCalcWeight, CU_FUNC_CACHE_PREFER_L1));
+ cuda_assert(cuFuncSetCacheConfig(cuNLMUpdateOutput, CU_FUNC_CACHE_PREFER_L1));
+
+ CUDA_GET_BLOCKSIZE_1D(cuNLMCalcDifference, w * h, num_shifts);
+
+ void *calc_difference_args[] = {&guide_ptr,
+ &variance_ptr,
+ &scale_ptr,
+ &difference,
+ &w,
+ &h,
+ &stride,
+ &pass_stride,
+ &r,
+ &channel_offset,
+ &frame_offset,
+ &a,
+ &k_2};
+ void *blur_args[] = {&difference, &blurDifference, &w, &h, &stride, &pass_stride, &r, &f};
+ void *calc_weight_args[] = {
+ &blurDifference, &difference, &w, &h, &stride, &pass_stride, &r, &f};
+ void *update_output_args[] = {&blurDifference,
+ &image_ptr,
+ &out_ptr,
+ &weightAccum,
+ &w,
+ &h,
+ &stride,
+ &pass_stride,
+ &channel_offset,
+ &r,
+ &f};
+
+ CUDA_LAUNCH_KERNEL_1D(cuNLMCalcDifference, calc_difference_args);
+ CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args);
+ CUDA_LAUNCH_KERNEL_1D(cuNLMCalcWeight, calc_weight_args);
+ CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args);
+ CUDA_LAUNCH_KERNEL_1D(cuNLMUpdateOutput, update_output_args);
+ }
+
+ {
+ CUfunction cuNLMNormalize;
+ cuda_assert(
+ cuModuleGetFunction(&cuNLMNormalize, cuFilterModule, "kernel_cuda_filter_nlm_normalize"));
+ cuda_assert(cuFuncSetCacheConfig(cuNLMNormalize, CU_FUNC_CACHE_PREFER_L1));
+ void *normalize_args[] = {&out_ptr, &weightAccum, &w, &h, &stride};
+ CUDA_GET_BLOCKSIZE(cuNLMNormalize, w, h);
+ CUDA_LAUNCH_KERNEL(cuNLMNormalize, normalize_args);
+ cuda_assert(cuCtxSynchronize());
+ }
+
+ return !have_error();
+}
+
+bool CUDADevice::denoising_construct_transform(DenoisingTask *task)
+{
+ if (have_error())
+ return false;
+
+ CUDAContextScope scope(this);
+
+ CUfunction cuFilterConstructTransform;
+ cuda_assert(cuModuleGetFunction(
+ &cuFilterConstructTransform, cuFilterModule, "kernel_cuda_filter_construct_transform"));
+ cuda_assert(cuFuncSetCacheConfig(cuFilterConstructTransform, CU_FUNC_CACHE_PREFER_SHARED));
+ CUDA_GET_BLOCKSIZE(cuFilterConstructTransform, task->storage.w, task->storage.h);
+
+ void *args[] = {&task->buffer.mem.device_pointer,
+ &task->tile_info_mem.device_pointer,
+ &task->storage.transform.device_pointer,
+ &task->storage.rank.device_pointer,
+ &task->filter_area,
+ &task->rect,
+ &task->radius,
+ &task->pca_threshold,
+ &task->buffer.pass_stride,
+ &task->buffer.frame_stride,
+ &task->buffer.use_time};
+ CUDA_LAUNCH_KERNEL(cuFilterConstructTransform, args);
+ cuda_assert(cuCtxSynchronize());
+
+ return !have_error();
+}
+
+bool CUDADevice::denoising_accumulate(device_ptr color_ptr,
+ device_ptr color_variance_ptr,
+ device_ptr scale_ptr,
+ int frame,
+ DenoisingTask *task)
+{
+ if (have_error())
+ return false;
+
+ CUDAContextScope scope(this);
+
+ int r = task->radius;
+ int f = 4;
+ float a = 1.0f;
+ float k_2 = task->nlm_k_2;
+
+ int w = task->reconstruction_state.source_w;
+ int h = task->reconstruction_state.source_h;
+ int stride = task->buffer.stride;
+ int frame_offset = frame * task->buffer.frame_stride;
+ int t = task->tile_info->frames[frame];
+
+ int pass_stride = task->buffer.pass_stride;
+ int num_shifts = (2 * r + 1) * (2 * r + 1);
+
+ if (have_error())
+ return false;
+
+ CUdeviceptr difference = (CUdeviceptr)task->buffer.temporary_mem.device_pointer;
+ CUdeviceptr blurDifference = difference + sizeof(float) * pass_stride * num_shifts;
+
+ CUfunction cuNLMCalcDifference, cuNLMBlur, cuNLMCalcWeight, cuNLMConstructGramian;
+ cuda_assert(cuModuleGetFunction(
+ &cuNLMCalcDifference, cuFilterModule, "kernel_cuda_filter_nlm_calc_difference"));
+ cuda_assert(cuModuleGetFunction(&cuNLMBlur, cuFilterModule, "kernel_cuda_filter_nlm_blur"));
+ cuda_assert(
+ cuModuleGetFunction(&cuNLMCalcWeight, cuFilterModule, "kernel_cuda_filter_nlm_calc_weight"));
+ cuda_assert(cuModuleGetFunction(
+ &cuNLMConstructGramian, cuFilterModule, "kernel_cuda_filter_nlm_construct_gramian"));
+
+ cuda_assert(cuFuncSetCacheConfig(cuNLMCalcDifference, CU_FUNC_CACHE_PREFER_L1));
+ cuda_assert(cuFuncSetCacheConfig(cuNLMBlur, CU_FUNC_CACHE_PREFER_L1));
+ cuda_assert(cuFuncSetCacheConfig(cuNLMCalcWeight, CU_FUNC_CACHE_PREFER_L1));
+ cuda_assert(cuFuncSetCacheConfig(cuNLMConstructGramian, CU_FUNC_CACHE_PREFER_SHARED));
+
+ CUDA_GET_BLOCKSIZE_1D(cuNLMCalcDifference,
+ task->reconstruction_state.source_w * task->reconstruction_state.source_h,
+ num_shifts);
+
+ void *calc_difference_args[] = {&color_ptr,
+ &color_variance_ptr,
+ &scale_ptr,
+ &difference,
+ &w,
+ &h,
+ &stride,
+ &pass_stride,
+ &r,
+ &pass_stride,
+ &frame_offset,
+ &a,
+ &k_2};
+ void *blur_args[] = {&difference, &blurDifference, &w, &h, &stride, &pass_stride, &r, &f};
+ void *calc_weight_args[] = {&blurDifference, &difference, &w, &h, &stride, &pass_stride, &r, &f};
+ void *construct_gramian_args[] = {&t,
+ &blurDifference,
+ &task->buffer.mem.device_pointer,
+ &task->storage.transform.device_pointer,
+ &task->storage.rank.device_pointer,
+ &task->storage.XtWX.device_pointer,
+ &task->storage.XtWY.device_pointer,
+ &task->reconstruction_state.filter_window,
+ &w,
+ &h,
+ &stride,
+ &pass_stride,
+ &r,
+ &f,
+ &frame_offset,
+ &task->buffer.use_time};
+
+ CUDA_LAUNCH_KERNEL_1D(cuNLMCalcDifference, calc_difference_args);
+ CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args);
+ CUDA_LAUNCH_KERNEL_1D(cuNLMCalcWeight, calc_weight_args);
+ CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args);
+ CUDA_LAUNCH_KERNEL_1D(cuNLMConstructGramian, construct_gramian_args);
+ cuda_assert(cuCtxSynchronize());
+
+ return !have_error();
+}
+
+bool CUDADevice::denoising_solve(device_ptr output_ptr, DenoisingTask *task)
+{
+ CUfunction cuFinalize;
+ cuda_assert(cuModuleGetFunction(&cuFinalize, cuFilterModule, "kernel_cuda_filter_finalize"));
+ cuda_assert(cuFuncSetCacheConfig(cuFinalize, CU_FUNC_CACHE_PREFER_L1));
+ void *finalize_args[] = {&output_ptr,
+ &task->storage.rank.device_pointer,
+ &task->storage.XtWX.device_pointer,
+ &task->storage.XtWY.device_pointer,
+ &task->filter_area,
+ &task->reconstruction_state.buffer_params.x,
+ &task->render_buffer.samples};
+ CUDA_GET_BLOCKSIZE(
+ cuFinalize, task->reconstruction_state.source_w, task->reconstruction_state.source_h);
+ CUDA_LAUNCH_KERNEL(cuFinalize, finalize_args);
+ cuda_assert(cuCtxSynchronize());
+
+ return !have_error();
+}
+
+bool CUDADevice::denoising_combine_halves(device_ptr a_ptr,
+ device_ptr b_ptr,
+ device_ptr mean_ptr,
+ device_ptr variance_ptr,
+ int r,
+ int4 rect,
+ DenoisingTask *task)
+{
+ if (have_error())
+ return false;
+
+ CUDAContextScope scope(this);
+
+ CUfunction cuFilterCombineHalves;
+ cuda_assert(cuModuleGetFunction(
+ &cuFilterCombineHalves, cuFilterModule, "kernel_cuda_filter_combine_halves"));
+ cuda_assert(cuFuncSetCacheConfig(cuFilterCombineHalves, CU_FUNC_CACHE_PREFER_L1));
+ CUDA_GET_BLOCKSIZE(
+ cuFilterCombineHalves, task->rect.z - task->rect.x, task->rect.w - task->rect.y);
+
+ void *args[] = {&mean_ptr, &variance_ptr, &a_ptr, &b_ptr, &rect, &r};
+ CUDA_LAUNCH_KERNEL(cuFilterCombineHalves, args);
+ cuda_assert(cuCtxSynchronize());
+
+ return !have_error();
+}
+
+bool CUDADevice::denoising_divide_shadow(device_ptr a_ptr,
+ device_ptr b_ptr,
+ device_ptr sample_variance_ptr,
+ device_ptr sv_variance_ptr,
+ device_ptr buffer_variance_ptr,
+ DenoisingTask *task)
+{
+ if (have_error())
+ return false;
+
+ CUDAContextScope scope(this);
+
+ CUfunction cuFilterDivideShadow;
+ cuda_assert(cuModuleGetFunction(
+ &cuFilterDivideShadow, cuFilterModule, "kernel_cuda_filter_divide_shadow"));
+ cuda_assert(cuFuncSetCacheConfig(cuFilterDivideShadow, CU_FUNC_CACHE_PREFER_L1));
+ CUDA_GET_BLOCKSIZE(
+ cuFilterDivideShadow, task->rect.z - task->rect.x, task->rect.w - task->rect.y);
+
+ void *args[] = {&task->render_buffer.samples,
+ &task->tile_info_mem.device_pointer,
+ &a_ptr,
+ &b_ptr,
+ &sample_variance_ptr,
+ &sv_variance_ptr,
+ &buffer_variance_ptr,
+ &task->rect,
+ &task->render_buffer.pass_stride,
+ &task->render_buffer.offset};
+ CUDA_LAUNCH_KERNEL(cuFilterDivideShadow, args);
+ cuda_assert(cuCtxSynchronize());
+
+ return !have_error();
+}
+
+bool CUDADevice::denoising_get_feature(int mean_offset,
+ int variance_offset,
+ device_ptr mean_ptr,
+ device_ptr variance_ptr,
+ float scale,
+ DenoisingTask *task)
+{
+ if (have_error())
+ return false;
+
+ CUDAContextScope scope(this);
+
+ CUfunction cuFilterGetFeature;
+ cuda_assert(
+ cuModuleGetFunction(&cuFilterGetFeature, cuFilterModule, "kernel_cuda_filter_get_feature"));
+ cuda_assert(cuFuncSetCacheConfig(cuFilterGetFeature, CU_FUNC_CACHE_PREFER_L1));
+ CUDA_GET_BLOCKSIZE(cuFilterGetFeature, task->rect.z - task->rect.x, task->rect.w - task->rect.y);
+
+ void *args[] = {&task->render_buffer.samples,
+ &task->tile_info_mem.device_pointer,
+ &mean_offset,
+ &variance_offset,
+ &mean_ptr,
+ &variance_ptr,
+ &scale,
+ &task->rect,
+ &task->render_buffer.pass_stride,
+ &task->render_buffer.offset};
+ CUDA_LAUNCH_KERNEL(cuFilterGetFeature, args);
+ cuda_assert(cuCtxSynchronize());
+
+ return !have_error();
+}
+
+bool CUDADevice::denoising_write_feature(int out_offset,
+ device_ptr from_ptr,
+ device_ptr buffer_ptr,
+ DenoisingTask *task)
+{
+ if (have_error())
+ return false;
+
+ CUDAContextScope scope(this);
+
+ CUfunction cuFilterWriteFeature;
+ cuda_assert(cuModuleGetFunction(
+ &cuFilterWriteFeature, cuFilterModule, "kernel_cuda_filter_write_feature"));
+ cuda_assert(cuFuncSetCacheConfig(cuFilterWriteFeature, CU_FUNC_CACHE_PREFER_L1));
+ CUDA_GET_BLOCKSIZE(cuFilterWriteFeature, task->filter_area.z, task->filter_area.w);
+
+ void *args[] = {&task->render_buffer.samples,
+ &task->reconstruction_state.buffer_params,
+ &task->filter_area,
+ &from_ptr,
+ &buffer_ptr,
+ &out_offset,
+ &task->rect};
+ CUDA_LAUNCH_KERNEL(cuFilterWriteFeature, args);
+ cuda_assert(cuCtxSynchronize());
+
+ return !have_error();
+}
+
+bool CUDADevice::denoising_detect_outliers(device_ptr image_ptr,
+ device_ptr variance_ptr,
+ device_ptr depth_ptr,
+ device_ptr output_ptr,
+ DenoisingTask *task)
+{
+ if (have_error())
+ return false;
+
+ CUDAContextScope scope(this);
+
+ CUfunction cuFilterDetectOutliers;
+ cuda_assert(cuModuleGetFunction(
+ &cuFilterDetectOutliers, cuFilterModule, "kernel_cuda_filter_detect_outliers"));
+ cuda_assert(cuFuncSetCacheConfig(cuFilterDetectOutliers, CU_FUNC_CACHE_PREFER_L1));
+ CUDA_GET_BLOCKSIZE(
+ cuFilterDetectOutliers, task->rect.z - task->rect.x, task->rect.w - task->rect.y);
+
+ void *args[] = {
+ &image_ptr, &variance_ptr, &depth_ptr, &output_ptr, &task->rect, &task->buffer.pass_stride};
+
+ CUDA_LAUNCH_KERNEL(cuFilterDetectOutliers, args);
+ cuda_assert(cuCtxSynchronize());
+
+ return !have_error();
+}
+
+void CUDADevice::denoise(RenderTile &rtile, DenoisingTask &denoising)
+{
+ denoising.functions.construct_transform = function_bind(
+ &CUDADevice::denoising_construct_transform, this, &denoising);
+ denoising.functions.accumulate = function_bind(
+ &CUDADevice::denoising_accumulate, this, _1, _2, _3, _4, &denoising);
+ denoising.functions.solve = function_bind(&CUDADevice::denoising_solve, this, _1, &denoising);
+ denoising.functions.divide_shadow = function_bind(
+ &CUDADevice::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising);
+ denoising.functions.non_local_means = function_bind(
+ &CUDADevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising);
+ denoising.functions.combine_halves = function_bind(
+ &CUDADevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising);
+ denoising.functions.get_feature = function_bind(
+ &CUDADevice::denoising_get_feature, this, _1, _2, _3, _4, _5, &denoising);
+ denoising.functions.write_feature = function_bind(
+ &CUDADevice::denoising_write_feature, this, _1, _2, _3, &denoising);
+ denoising.functions.detect_outliers = function_bind(
+ &CUDADevice::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising);
+
+ denoising.filter_area = make_int4(rtile.x, rtile.y, rtile.w, rtile.h);
+ denoising.render_buffer.samples = rtile.sample;
+ denoising.buffer.gpu_temporary_mem = true;
+
+ denoising.run_denoising(&rtile);
+}
+
+void CUDADevice::adaptive_sampling_filter(uint filter_sample,
+ WorkTile *wtile,
+ CUdeviceptr d_wtile,
+ CUstream stream)
+{
+ const int num_threads_per_block = functions.adaptive_num_threads_per_block;
+
+ /* These are a series of tiny kernels because there is no grid synchronization
+ * from within a kernel, so multiple kernel launches it is. */
+ uint total_work_size = wtile->h * wtile->w;
+ void *args2[] = {&d_wtile, &filter_sample, &total_work_size};
+ uint num_blocks = divide_up(total_work_size, num_threads_per_block);
+ cuda_assert(cuLaunchKernel(functions.adaptive_stopping,
+ num_blocks,
+ 1,
+ 1,
+ num_threads_per_block,
+ 1,
+ 1,
+ 0,
+ stream,
+ args2,
+ 0));
+ total_work_size = wtile->h;
+ num_blocks = divide_up(total_work_size, num_threads_per_block);
+ cuda_assert(cuLaunchKernel(functions.adaptive_filter_x,
+ num_blocks,
+ 1,
+ 1,
+ num_threads_per_block,
+ 1,
+ 1,
+ 0,
+ stream,
+ args2,
+ 0));
+ total_work_size = wtile->w;
+ num_blocks = divide_up(total_work_size, num_threads_per_block);
+ cuda_assert(cuLaunchKernel(functions.adaptive_filter_y,
+ num_blocks,
+ 1,
+ 1,
+ num_threads_per_block,
+ 1,
+ 1,
+ 0,
+ stream,
+ args2,
+ 0));
+}
+
+void CUDADevice::adaptive_sampling_post(RenderTile &rtile,
+ WorkTile *wtile,
+ CUdeviceptr d_wtile,
+ CUstream stream)
+{
+ const int num_threads_per_block = functions.adaptive_num_threads_per_block;
+ uint total_work_size = wtile->h * wtile->w;
+
+ void *args[] = {&d_wtile, &rtile.start_sample, &rtile.sample, &total_work_size};
+ uint num_blocks = divide_up(total_work_size, num_threads_per_block);
+ cuda_assert(cuLaunchKernel(functions.adaptive_scale_samples,
+ num_blocks,
+ 1,
+ 1,
+ num_threads_per_block,
+ 1,
+ 1,
+ 0,
+ stream,
+ args,
+ 0));
+}
+
+void CUDADevice::path_trace(DeviceTask &task,
+ RenderTile &rtile,
+ device_vector<WorkTile> &work_tiles)
+{
+ scoped_timer timer(&rtile.buffers->render_time);
+
+ if (have_error())
+ return;
+
+ CUDAContextScope scope(this);
+ CUfunction cuPathTrace;
+
+ /* Get kernel function. */
+ if (task.integrator_branched) {
+ cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_branched_path_trace"));
+ }
+ else {
+ cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace"));
+ }
+
+ if (have_error()) {
+ return;
+ }
+
+ cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1));
+
+ /* Allocate work tile. */
+ work_tiles.alloc(1);
+
+ WorkTile *wtile = work_tiles.data();
+ wtile->x = rtile.x;
+ wtile->y = rtile.y;
+ wtile->w = rtile.w;
+ wtile->h = rtile.h;
+ wtile->offset = rtile.offset;
+ wtile->stride = rtile.stride;
+ wtile->buffer = (float *)(CUdeviceptr)rtile.buffer;
+
+ /* Prepare work size. More step samples render faster, but for now we
+ * remain conservative for GPUs connected to a display to avoid driver
+ * timeouts and display freezing. */
+ int min_blocks, num_threads_per_block;
+ cuda_assert(cuOccupancyMaxPotentialBlockSize(
+ &min_blocks, &num_threads_per_block, cuPathTrace, NULL, 0, 0));
+ if (!info.display_device) {
+ min_blocks *= 8;
+ }
+
+ uint step_samples = divide_up(min_blocks * num_threads_per_block, wtile->w * wtile->h);
+ if (task.adaptive_sampling.use) {
+ step_samples = task.adaptive_sampling.align_static_samples(step_samples);
+ }
+
+ /* Render all samples. */
+ int start_sample = rtile.start_sample;
+ int end_sample = rtile.start_sample + rtile.num_samples;
+
+ for (int sample = start_sample; sample < end_sample; sample += step_samples) {
+ /* Setup and copy work tile to device. */
+ wtile->start_sample = sample;
+ wtile->num_samples = min(step_samples, end_sample - sample);
+ work_tiles.copy_to_device();
+
+ CUdeviceptr d_work_tiles = (CUdeviceptr)work_tiles.device_pointer;
+ uint total_work_size = wtile->w * wtile->h * wtile->num_samples;
+ uint num_blocks = divide_up(total_work_size, num_threads_per_block);
+
+ /* Launch kernel. */
+ void *args[] = {&d_work_tiles, &total_work_size};
+
+ cuda_assert(
+ cuLaunchKernel(cuPathTrace, num_blocks, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0));
+
+ /* Run the adaptive sampling kernels at selected samples aligned to step samples. */
+ uint filter_sample = sample + wtile->num_samples - 1;
+ if (task.adaptive_sampling.use && task.adaptive_sampling.need_filter(filter_sample)) {
+ adaptive_sampling_filter(filter_sample, wtile, d_work_tiles);
+ }
+
+ cuda_assert(cuCtxSynchronize());
+
+ /* Update progress. */
+ rtile.sample = sample + wtile->num_samples;
+ task.update_progress(&rtile, rtile.w * rtile.h * wtile->num_samples);
+
+ if (task.get_cancel()) {
+ if (task.need_finish_queue == false)
+ break;
+ }
+ }
+
+ /* Finalize adaptive sampling. */
+ if (task.adaptive_sampling.use) {
+ CUdeviceptr d_work_tiles = (CUdeviceptr)work_tiles.device_pointer;
+ adaptive_sampling_post(rtile, wtile, d_work_tiles);
+ cuda_assert(cuCtxSynchronize());
+ task.update_progress(&rtile, rtile.w * rtile.h * wtile->num_samples);
+ }
+}
+
+void CUDADevice::film_convert(DeviceTask &task,
+ device_ptr buffer,
+ device_ptr rgba_byte,
+ device_ptr rgba_half)
+{
+ if (have_error())
+ return;
+
+ CUDAContextScope scope(this);
+
+ CUfunction cuFilmConvert;
+ CUdeviceptr d_rgba = map_pixels((rgba_byte) ? rgba_byte : rgba_half);
+ CUdeviceptr d_buffer = (CUdeviceptr)buffer;
+
+ /* get kernel function */
+ if (rgba_half) {
+ cuda_assert(
+ cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_convert_to_half_float"));
+ }
+ else {
+ cuda_assert(cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_convert_to_byte"));
+ }
+
+ float sample_scale = 1.0f / (task.sample + 1);
+
+ /* pass in parameters */
+ void *args[] = {&d_rgba,
+ &d_buffer,
+ &sample_scale,
+ &task.x,
+ &task.y,
+ &task.w,
+ &task.h,
+ &task.offset,
+ &task.stride};
+
+ /* launch kernel */
+ int threads_per_block;
+ cuda_assert(cuFuncGetAttribute(
+ &threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuFilmConvert));
+
+ int xthreads = (int)sqrt(threads_per_block);
+ int ythreads = (int)sqrt(threads_per_block);
+ int xblocks = (task.w + xthreads - 1) / xthreads;
+ int yblocks = (task.h + ythreads - 1) / ythreads;
+
+ cuda_assert(cuFuncSetCacheConfig(cuFilmConvert, CU_FUNC_CACHE_PREFER_L1));
+
+ cuda_assert(cuLaunchKernel(cuFilmConvert,
+ xblocks,
+ yblocks,
+ 1, /* blocks */
+ xthreads,
+ ythreads,
+ 1, /* threads */
+ 0,
+ 0,
+ args,
+ 0));
+
+ unmap_pixels((rgba_byte) ? rgba_byte : rgba_half);
+
+ cuda_assert(cuCtxSynchronize());
+}
+
+void CUDADevice::shader(DeviceTask &task)
+{
+ if (have_error())
+ return;
+
+ CUDAContextScope scope(this);
+
+ CUfunction cuShader;
+ CUdeviceptr d_input = (CUdeviceptr)task.shader_input;
+ CUdeviceptr d_output = (CUdeviceptr)task.shader_output;
+
+ /* get kernel function */
+ if (task.shader_eval_type >= SHADER_EVAL_BAKE) {
+ cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_bake"));
+ }
+ else if (task.shader_eval_type == SHADER_EVAL_DISPLACE) {
+ cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_displace"));
+ }
+ else {
+ cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_background"));
+ }
+
+ /* do tasks in smaller chunks, so we can cancel it */
+ const int shader_chunk_size = 65536;
+ const int start = task.shader_x;
+ const int end = task.shader_x + task.shader_w;
+ int offset = task.offset;
+
+ bool canceled = false;
+ for (int sample = 0; sample < task.num_samples && !canceled; sample++) {
+ for (int shader_x = start; shader_x < end; shader_x += shader_chunk_size) {
+ int shader_w = min(shader_chunk_size, end - shader_x);
+
+ /* pass in parameters */
+ void *args[8];
+ int arg = 0;
+ args[arg++] = &d_input;
+ args[arg++] = &d_output;
+ args[arg++] = &task.shader_eval_type;
+ if (task.shader_eval_type >= SHADER_EVAL_BAKE) {
+ args[arg++] = &task.shader_filter;
+ }
+ args[arg++] = &shader_x;
+ args[arg++] = &shader_w;
+ args[arg++] = &offset;
+ args[arg++] = &sample;
+
+ /* launch kernel */
+ int threads_per_block;
+ cuda_assert(cuFuncGetAttribute(
+ &threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuShader));
+
+ int xblocks = (shader_w + threads_per_block - 1) / threads_per_block;
+
+ cuda_assert(cuFuncSetCacheConfig(cuShader, CU_FUNC_CACHE_PREFER_L1));
+ cuda_assert(cuLaunchKernel(cuShader,
+ xblocks,
+ 1,
+ 1, /* blocks */
+ threads_per_block,
+ 1,
+ 1, /* threads */
+ 0,
+ 0,
+ args,
+ 0));
+
+ cuda_assert(cuCtxSynchronize());
+
+ if (task.get_cancel()) {
+ canceled = true;
+ break;
+ }
+ }
+
+ task.update_progress(NULL);
+ }
+}
+
+CUdeviceptr CUDADevice::map_pixels(device_ptr mem)
+{
+ if (!background) {
+ PixelMem pmem = pixel_mem_map[mem];
+ CUdeviceptr buffer;
+
+ size_t bytes;
+ cuda_assert(cuGraphicsMapResources(1, &pmem.cuPBOresource, 0));
+ cuda_assert(cuGraphicsResourceGetMappedPointer(&buffer, &bytes, pmem.cuPBOresource));
+
+ return buffer;
+ }
+
+ return (CUdeviceptr)mem;
+}
+
+void CUDADevice::unmap_pixels(device_ptr mem)
+{
+ if (!background) {
+ PixelMem pmem = pixel_mem_map[mem];
+
+ cuda_assert(cuGraphicsUnmapResources(1, &pmem.cuPBOresource, 0));
+ }
+}
+
+void CUDADevice::pixels_alloc(device_memory &mem)
+{
+ PixelMem pmem;
+
+ pmem.w = mem.data_width;
+ pmem.h = mem.data_height;
+
+ CUDAContextScope scope(this);
+
+ glGenBuffers(1, &pmem.cuPBO);
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO);
+ if (mem.data_type == TYPE_HALF)
+ glBufferData(
+ GL_PIXEL_UNPACK_BUFFER, pmem.w * pmem.h * sizeof(GLhalf) * 4, NULL, GL_DYNAMIC_DRAW);
+ else
+ glBufferData(
+ GL_PIXEL_UNPACK_BUFFER, pmem.w * pmem.h * sizeof(uint8_t) * 4, NULL, GL_DYNAMIC_DRAW);
+
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+
+ glActiveTexture(GL_TEXTURE0);
+ glGenTextures(1, &pmem.cuTexId);
+ glBindTexture(GL_TEXTURE_2D, pmem.cuTexId);
+ if (mem.data_type == TYPE_HALF)
+ glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F, pmem.w, pmem.h, 0, GL_RGBA, GL_HALF_FLOAT, NULL);
+ else
+ glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, pmem.w, pmem.h, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+ glBindTexture(GL_TEXTURE_2D, 0);
+
+ CUresult result = cuGraphicsGLRegisterBuffer(
+ &pmem.cuPBOresource, pmem.cuPBO, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE);
+
+ if (result == CUDA_SUCCESS) {
+ mem.device_pointer = pmem.cuTexId;
+ pixel_mem_map[mem.device_pointer] = pmem;
+
+ mem.device_size = mem.memory_size();
+ stats.mem_alloc(mem.device_size);
+
+ return;
+ }
+ else {
+ /* failed to register buffer, fallback to no interop */
+ glDeleteBuffers(1, &pmem.cuPBO);
+ glDeleteTextures(1, &pmem.cuTexId);
+
+ background = true;
+ }
+}
+
+void CUDADevice::pixels_copy_from(device_memory &mem, int y, int w, int h)
+{
+ PixelMem pmem = pixel_mem_map[mem.device_pointer];
+
+ CUDAContextScope scope(this);
+
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO);
+ uchar *pixels = (uchar *)glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_READ_ONLY);
+ size_t offset = sizeof(uchar) * 4 * y * w;
+ memcpy((uchar *)mem.host_pointer + offset, pixels + offset, sizeof(uchar) * 4 * w * h);
+ glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+}
+
+void CUDADevice::pixels_free(device_memory &mem)
+{
+ if (mem.device_pointer) {
+ PixelMem pmem = pixel_mem_map[mem.device_pointer];
+
+ CUDAContextScope scope(this);
+
+ cuda_assert(cuGraphicsUnregisterResource(pmem.cuPBOresource));
+ glDeleteBuffers(1, &pmem.cuPBO);
+ glDeleteTextures(1, &pmem.cuTexId);
+
+ pixel_mem_map.erase(pixel_mem_map.find(mem.device_pointer));
+ mem.device_pointer = 0;
+
+ stats.mem_free(mem.device_size);
+ mem.device_size = 0;
+ }
+}
+
+void CUDADevice::draw_pixels(device_memory &mem,
+ int y,
+ int w,
+ int h,
+ int width,
+ int height,
+ int dx,
+ int dy,
+ int dw,
+ int dh,
+ bool transparent,
+ const DeviceDrawParams &draw_params)
+{
+ assert(mem.type == MEM_PIXELS);
+
+ if (!background) {
+ const bool use_fallback_shader = (draw_params.bind_display_space_shader_cb == NULL);
+ PixelMem pmem = pixel_mem_map[mem.device_pointer];
+ float *vpointer;
+
+ CUDAContextScope scope(this);
+
+ /* for multi devices, this assumes the inefficient method that we allocate
+ * all pixels on the device even though we only render to a subset */
+ size_t offset = 4 * y * w;
+
+ if (mem.data_type == TYPE_HALF)
+ offset *= sizeof(GLhalf);
+ else
+ offset *= sizeof(uint8_t);
+
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO);
+ glActiveTexture(GL_TEXTURE0);
+ glBindTexture(GL_TEXTURE_2D, pmem.cuTexId);
+ if (mem.data_type == TYPE_HALF) {
+ glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, w, h, GL_RGBA, GL_HALF_FLOAT, (void *)offset);
+ }
+ else {
+ glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, w, h, GL_RGBA, GL_UNSIGNED_BYTE, (void *)offset);
+ }
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+
+ if (transparent) {
+ glEnable(GL_BLEND);
+ glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA);
+ }
+
+ GLint shader_program;
+ if (use_fallback_shader) {
+ if (!bind_fallback_display_space_shader(dw, dh)) {
+ return;
+ }
+ shader_program = fallback_shader_program;
+ }
+ else {
+ draw_params.bind_display_space_shader_cb();
+ glGetIntegerv(GL_CURRENT_PROGRAM, &shader_program);
+ }
+
+ if (!vertex_buffer) {
+ glGenBuffers(1, &vertex_buffer);
+ }
+
+ glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer);
+ /* invalidate old contents -
+ * avoids stalling if buffer is still waiting in queue to be rendered */
+ glBufferData(GL_ARRAY_BUFFER, 16 * sizeof(float), NULL, GL_STREAM_DRAW);
+
+ vpointer = (float *)glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY);
+
+ if (vpointer) {
+ /* texture coordinate - vertex pair */
+ vpointer[0] = 0.0f;
+ vpointer[1] = 0.0f;
+ vpointer[2] = dx;
+ vpointer[3] = dy;
+
+ vpointer[4] = (float)w / (float)pmem.w;
+ vpointer[5] = 0.0f;
+ vpointer[6] = (float)width + dx;
+ vpointer[7] = dy;
+
+ vpointer[8] = (float)w / (float)pmem.w;
+ vpointer[9] = (float)h / (float)pmem.h;
+ vpointer[10] = (float)width + dx;
+ vpointer[11] = (float)height + dy;
+
+ vpointer[12] = 0.0f;
+ vpointer[13] = (float)h / (float)pmem.h;
+ vpointer[14] = dx;
+ vpointer[15] = (float)height + dy;
+
+ glUnmapBuffer(GL_ARRAY_BUFFER);
+ }
+
+ GLuint vertex_array_object;
+ GLuint position_attribute, texcoord_attribute;
+
+ glGenVertexArrays(1, &vertex_array_object);
+ glBindVertexArray(vertex_array_object);
+
+ texcoord_attribute = glGetAttribLocation(shader_program, "texCoord");
+ position_attribute = glGetAttribLocation(shader_program, "pos");
+
+ glEnableVertexAttribArray(texcoord_attribute);
+ glEnableVertexAttribArray(position_attribute);
+
+ glVertexAttribPointer(
+ texcoord_attribute, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (const GLvoid *)0);
+ glVertexAttribPointer(position_attribute,
+ 2,
+ GL_FLOAT,
+ GL_FALSE,
+ 4 * sizeof(float),
+ (const GLvoid *)(sizeof(float) * 2));
+
+ glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
+
+ if (use_fallback_shader) {
+ glUseProgram(0);
+ }
+ else {
+ draw_params.unbind_display_space_shader_cb();
+ }
+
+ if (transparent) {
+ glDisable(GL_BLEND);
+ }
+
+ glBindTexture(GL_TEXTURE_2D, 0);
+
+ return;
+ }
+
+ Device::draw_pixels(mem, y, w, h, width, height, dx, dy, dw, dh, transparent, draw_params);
+}
+
+void CUDADevice::thread_run(DeviceTask *task)
+{
+ CUDAContextScope scope(this);
+
+ if (task->type == DeviceTask::RENDER) {
+ DeviceRequestedFeatures requested_features;
+ if (use_split_kernel()) {
+ if (split_kernel == NULL) {
+ split_kernel = new CUDASplitKernel(this);
+ split_kernel->load_kernels(requested_features);
+ }
+ }
+
+ device_vector<WorkTile> work_tiles(this, "work_tiles", MEM_READ_ONLY);
+
+ /* keep rendering tiles until done */
+ RenderTile tile;
+ DenoisingTask denoising(this, *task);
+
+ while (task->acquire_tile(this, tile, task->tile_types)) {
+ if (tile.task == RenderTile::PATH_TRACE) {
+ if (use_split_kernel()) {
+ device_only_memory<uchar> void_buffer(this, "void_buffer");
+ split_kernel->path_trace(task, tile, void_buffer, void_buffer);
+ }
+ else {
+ path_trace(*task, tile, work_tiles);
+ }
+ }
+ else if (tile.task == RenderTile::DENOISE) {
+ tile.sample = tile.start_sample + tile.num_samples;
+
+ denoise(tile, denoising);
+
+ task->update_progress(&tile, tile.w * tile.h);
+ }
+
+ task->release_tile(tile);
+
+ if (task->get_cancel()) {
+ if (task->need_finish_queue == false)
+ break;
+ }
+ }
+
+ work_tiles.free();
+ }
+ else if (task->type == DeviceTask::SHADER) {
+ shader(*task);
+
+ cuda_assert(cuCtxSynchronize());
+ }
+ else if (task->type == DeviceTask::DENOISE_BUFFER) {
+ RenderTile tile;
+ tile.x = task->x;
+ tile.y = task->y;
+ tile.w = task->w;
+ tile.h = task->h;
+ tile.buffer = task->buffer;
+ tile.sample = task->sample + task->num_samples;
+ tile.num_samples = task->num_samples;
+ tile.start_sample = task->sample;
+ tile.offset = task->offset;
+ tile.stride = task->stride;
+ tile.buffers = task->buffers;
+
+ DenoisingTask denoising(this, *task);
+ denoise(tile, denoising);
+ task->update_progress(&tile, tile.w * tile.h);
+ }
+}
+
+class CUDADeviceTask : public DeviceTask {
+ public:
+ CUDADeviceTask(CUDADevice *device, DeviceTask &task) : DeviceTask(task)
+ {
+ run = function_bind(&CUDADevice::thread_run, device, this);
+ }
+};
+
+void CUDADevice::task_add(DeviceTask &task)
+{
+ CUDAContextScope scope(this);
+
+ /* Load texture info. */
+ load_texture_info();
+
+ /* Synchronize all memory copies before executing task. */
+ cuda_assert(cuCtxSynchronize());
+
+ if (task.type == DeviceTask::FILM_CONVERT) {
+ /* must be done in main thread due to opengl access */
+ film_convert(task, task.buffer, task.rgba_byte, task.rgba_half);
+ }
+ else {
+ task_pool.push(new CUDADeviceTask(this, task));
+ }
+}
+
+void CUDADevice::task_wait()
+{
+ task_pool.wait();
+}
+
+void CUDADevice::task_cancel()
+{
+ task_pool.cancel();
+}
+
+/* redefine the cuda_assert macro so it can be used outside of the CUDADevice class
+ * now that the definition of that class is complete
+ */
+# undef cuda_assert
+# define cuda_assert(stmt) \
+ { \
+ CUresult result = stmt; \
+\
+ if (result != CUDA_SUCCESS) { \
+ string message = string_printf("CUDA error: %s in %s", cuewErrorString(result), #stmt); \
+ if (device->error_msg == "") \
+ device->error_msg = message; \
+ fprintf(stderr, "%s\n", message.c_str()); \
+ /*cuda_abort();*/ \
+ device->cuda_error_documentation(); \
+ } \
+ } \
+ (void)0
+
+/* CUDA context scope. */
+
+CUDAContextScope::CUDAContextScope(CUDADevice *device) : device(device)
+{
+ cuda_assert(cuCtxPushCurrent(device->cuContext));
+}
+
+CUDAContextScope::~CUDAContextScope()
+{
+ cuda_assert(cuCtxPopCurrent(NULL));
+}
+
+/* split kernel */
+
+class CUDASplitKernelFunction : public SplitKernelFunction {
+ CUDADevice *device;
+ CUfunction func;
+
+ public:
+ CUDASplitKernelFunction(CUDADevice *device, CUfunction func) : device(device), func(func)
+ {
+ }
+
+ /* enqueue the kernel, returns false if there is an error */
+ bool enqueue(const KernelDimensions &dim, device_memory & /*kg*/, device_memory & /*data*/)
+ {
+ return enqueue(dim, NULL);
+ }
+
+ /* enqueue the kernel, returns false if there is an error */
+ bool enqueue(const KernelDimensions &dim, void *args[])
+ {
+ if (device->have_error())
+ return false;
+
+ CUDAContextScope scope(device);
+
+ /* we ignore dim.local_size for now, as this is faster */
+ int threads_per_block;
+ cuda_assert(
+ cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func));
+
+ int xblocks = (dim.global_size[0] * dim.global_size[1] + threads_per_block - 1) /
+ threads_per_block;
+
+ cuda_assert(cuFuncSetCacheConfig(func, CU_FUNC_CACHE_PREFER_L1));
+
+ cuda_assert(cuLaunchKernel(func,
+ xblocks,
+ 1,
+ 1, /* blocks */
+ threads_per_block,
+ 1,
+ 1, /* threads */
+ 0,
+ 0,
+ args,
+ 0));
+
+ return !device->have_error();
+ }
+};
+
+CUDASplitKernel::CUDASplitKernel(CUDADevice *device) : DeviceSplitKernel(device), device(device)
+{
+}
+
+uint64_t CUDASplitKernel::state_buffer_size(device_memory & /*kg*/,
+ device_memory & /*data*/,
+ size_t num_threads)
+{
+ CUDAContextScope scope(device);
+
+ device_vector<uint64_t> size_buffer(device, "size_buffer", MEM_READ_WRITE);
+ size_buffer.alloc(1);
+ size_buffer.zero_to_device();
+
+ uint threads = num_threads;
+ CUdeviceptr d_size = (CUdeviceptr)size_buffer.device_pointer;
+
+ struct args_t {
+ uint *num_threads;
+ CUdeviceptr *size;
+ };
+
+ args_t args = {&threads, &d_size};
+
+ CUfunction state_buffer_size;
+ cuda_assert(
+ cuModuleGetFunction(&state_buffer_size, device->cuModule, "kernel_cuda_state_buffer_size"));
+
+ cuda_assert(cuLaunchKernel(state_buffer_size, 1, 1, 1, 1, 1, 1, 0, 0, (void **)&args, 0));
+
+ size_buffer.copy_from_device(0, 1, 1);
+ size_t size = size_buffer[0];
+ size_buffer.free();
+
+ return size;
+}
+
+bool CUDASplitKernel::enqueue_split_kernel_data_init(const KernelDimensions &dim,
+ RenderTile &rtile,
+ int num_global_elements,
+ device_memory & /*kernel_globals*/,
+ device_memory & /*kernel_data*/,
+ device_memory &split_data,
+ device_memory &ray_state,
+ device_memory &queue_index,
+ device_memory &use_queues_flag,
+ device_memory &work_pool_wgs)
+{
+ CUDAContextScope scope(device);
+
+ CUdeviceptr d_split_data = (CUdeviceptr)split_data.device_pointer;
+ CUdeviceptr d_ray_state = (CUdeviceptr)ray_state.device_pointer;
+ CUdeviceptr d_queue_index = (CUdeviceptr)queue_index.device_pointer;
+ CUdeviceptr d_use_queues_flag = (CUdeviceptr)use_queues_flag.device_pointer;
+ CUdeviceptr d_work_pool_wgs = (CUdeviceptr)work_pool_wgs.device_pointer;
+
+ CUdeviceptr d_buffer = (CUdeviceptr)rtile.buffer;
+
+ int end_sample = rtile.start_sample + rtile.num_samples;
+ int queue_size = dim.global_size[0] * dim.global_size[1];
+
+ struct args_t {
+ CUdeviceptr *split_data_buffer;
+ int *num_elements;
+ CUdeviceptr *ray_state;
+ int *start_sample;
+ int *end_sample;
+ int *sx;
+ int *sy;
+ int *sw;
+ int *sh;
+ int *offset;
+ int *stride;
+ CUdeviceptr *queue_index;
+ int *queuesize;
+ CUdeviceptr *use_queues_flag;
+ CUdeviceptr *work_pool_wgs;
+ int *num_samples;
+ CUdeviceptr *buffer;
+ };
+
+ args_t args = {&d_split_data,
+ &num_global_elements,
+ &d_ray_state,
+ &rtile.start_sample,
+ &end_sample,
+ &rtile.x,
+ &rtile.y,
+ &rtile.w,
+ &rtile.h,
+ &rtile.offset,
+ &rtile.stride,
+ &d_queue_index,
+ &queue_size,
+ &d_use_queues_flag,
+ &d_work_pool_wgs,
+ &rtile.num_samples,
+ &d_buffer};
+
+ CUfunction data_init;
+ cuda_assert(
+ cuModuleGetFunction(&data_init, device->cuModule, "kernel_cuda_path_trace_data_init"));
+ if (device->have_error()) {
+ return false;
+ }
+
+ CUDASplitKernelFunction(device, data_init).enqueue(dim, (void **)&args);
+
+ return !device->have_error();
+}
+
+SplitKernelFunction *CUDASplitKernel::get_split_kernel_function(const string &kernel_name,
+ const DeviceRequestedFeatures &)
+{
+ CUDAContextScope scope(device);
+ CUfunction func;
+
+ cuda_assert(
+ cuModuleGetFunction(&func, device->cuModule, (string("kernel_cuda_") + kernel_name).data()));
+ if (device->have_error()) {
+ device->cuda_error_message(
+ string_printf("kernel \"kernel_cuda_%s\" not found in module", kernel_name.data()));
+ return NULL;
+ }
+
+ return new CUDASplitKernelFunction(device, func);
+}
+
+int2 CUDASplitKernel::split_kernel_local_size()
+{
+ return make_int2(32, 1);
+}
+
+int2 CUDASplitKernel::split_kernel_global_size(device_memory &kg,
+ device_memory &data,
+ DeviceTask * /*task*/)
+{
+ CUDAContextScope scope(device);
+ size_t free;
+ size_t total;
+
+ cuda_assert(cuMemGetInfo(&free, &total));
+
+ VLOG(1) << "Maximum device allocation size: " << string_human_readable_number(free)
+ << " bytes. (" << string_human_readable_size(free) << ").";
+
+ size_t num_elements = max_elements_for_max_buffer_size(kg, data, free / 2);
+ size_t side = round_down((int)sqrt(num_elements), 32);
+ int2 global_size = make_int2(side, round_down(num_elements / side, 16));
+ VLOG(1) << "Global size: " << global_size << ".";
+ return global_size;
+}
+
+CCL_NAMESPACE_END
+
+#endif
diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp
index 76670351734..d94d409175b 100644
--- a/intern/cycles/device/device.cpp
+++ b/intern/cycles/device/device.cpp
@@ -25,11 +25,11 @@
#include "util/util_logging.h"
#include "util/util_math.h"
#include "util/util_opengl.h"
-#include "util/util_time.h"
+#include "util/util_string.h"
#include "util/util_system.h"
+#include "util/util_time.h"
#include "util/util_types.h"
#include "util/util_vector.h"
-#include "util/util_string.h"
CCL_NAMESPACE_BEGIN
@@ -366,6 +366,15 @@ void Device::draw_pixels(device_memory &rgba,
Device *Device::create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background)
{
+#ifdef WITH_MULTI
+ if (!info.multi_devices.empty()) {
+ /* Always create a multi device when info contains multiple devices.
+ * This is done so that the type can still be e.g. DEVICE_CPU to indicate
+ * that it is a homogeneous collection of devices, which simplifies checks. */
+ return device_multi_create(info, stats, profiler, background);
+ }
+#endif
+
Device *device;
switch (info.type) {
@@ -388,11 +397,6 @@ Device *Device::create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool
device = NULL;
break;
#endif
-#ifdef WITH_MULTI
- case DEVICE_MULTI:
- device = device_multi_create(info, stats, profiler, background);
- break;
-#endif
#ifdef WITH_NETWORK
case DEVICE_NETWORK:
device = device_network_create(info, stats, profiler, "127.0.0.1");
@@ -586,7 +590,7 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
}
DeviceInfo info;
- info.type = DEVICE_MULTI;
+ info.type = subdevices.front().type;
info.id = "MULTI";
info.description = "Multi Device";
info.num = 0;
@@ -624,6 +628,14 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
info.multi_devices.push_back(device);
}
+ /* Create unique ID for this combination of devices. */
+ info.id += device.id;
+
+ /* Set device type to MULTI if subdevices are not of a common type. */
+ if (device.type != info.type) {
+ info.type = DEVICE_MULTI;
+ }
+
/* Accumulate device info. */
info.has_half_images &= device.has_half_images;
info.has_volume_decoupled &= device.has_volume_decoupled;
diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index 66fcac921d3..a98ac171709 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -27,8 +27,8 @@
#include "util/util_list.h"
#include "util/util_stats.h"
#include "util/util_string.h"
-#include "util/util_thread.h"
#include "util/util_texture.h"
+#include "util/util_thread.h"
#include "util/util_types.h"
#include "util/util_vector.h"
@@ -83,6 +83,7 @@ class DeviceInfo {
bool has_profiling; /* Supports runtime collection of profiling info. */
int cpu_threads;
vector<DeviceInfo> multi_devices;
+ vector<DeviceInfo> denoising_devices;
DeviceInfo()
{
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index c2843a61e6d..57e8523e02a 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -29,16 +29,19 @@
#include "device/device_intern.h"
#include "device/device_split_kernel.h"
+// clang-format off
#include "kernel/kernel.h"
#include "kernel/kernel_compat_cpu.h"
#include "kernel/kernel_types.h"
#include "kernel/split/kernel_split_data.h"
#include "kernel/kernel_globals.h"
+#include "kernel/kernel_adaptive_sampling.h"
#include "kernel/filter/filter.h"
#include "kernel/osl/osl_shader.h"
#include "kernel/osl/osl_globals.h"
+// clang-format on
#include "render/buffers.h"
#include "render/coverage.h"
@@ -261,7 +264,7 @@ class CPUDevice : public Device {
CPUDevice(DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool background_)
: Device(info_, stats_, profiler_, background_),
- texture_info(this, "__texture_info", MEM_TEXTURE),
+ texture_info(this, "__texture_info", MEM_GLOBAL),
#define REGISTER_KERNEL(name) name##_kernel(KERNEL_FUNCTIONS(name))
REGISTER_KERNEL(path_trace),
REGISTER_KERNEL(convert_to_half_float),
@@ -317,6 +320,10 @@ class CPUDevice : public Device {
REGISTER_SPLIT_KERNEL(next_iteration_setup);
REGISTER_SPLIT_KERNEL(indirect_subsurface);
REGISTER_SPLIT_KERNEL(buffer_update);
+ REGISTER_SPLIT_KERNEL(adaptive_stopping);
+ REGISTER_SPLIT_KERNEL(adaptive_filter_x);
+ REGISTER_SPLIT_KERNEL(adaptive_filter_y);
+ REGISTER_SPLIT_KERNEL(adaptive_adjust_samples);
#undef REGISTER_SPLIT_KERNEL
#undef KERNEL_FUNCTIONS
}
@@ -338,7 +345,10 @@ class CPUDevice : public Device {
if (DebugFlags().cpu.has_sse2() && system_cpu_support_sse2()) {
bvh_layout_mask |= BVH_LAYOUT_BVH4;
}
-#if defined(__x86_64__) || defined(_M_X64)
+ /* MSVC does not support the -march=native switch and you always end up */
+ /* with an sse2 kernel when you use WITH_KERNEL_NATIVE. We *cannot* feed */
+ /* that kernel BVH8 even if the CPU flags would allow for it. */
+#if (defined(__x86_64__) || defined(_M_X64)) && !(defined(_MSC_VER) && defined(WITH_KERNEL_NATIVE))
if (DebugFlags().cpu.has_avx2() && system_cpu_support_avx2()) {
bvh_layout_mask |= BVH_LAYOUT_BVH8;
}
@@ -362,6 +372,9 @@ class CPUDevice : public Device {
if (mem.type == MEM_TEXTURE) {
assert(!"mem_alloc not supported for textures.");
}
+ else if (mem.type == MEM_GLOBAL) {
+ assert(!"mem_alloc not supported for global memory.");
+ }
else {
if (mem.name) {
VLOG(1) << "Buffer allocate: " << mem.name << ", "
@@ -386,9 +399,13 @@ class CPUDevice : public Device {
void mem_copy_to(device_memory &mem)
{
- if (mem.type == MEM_TEXTURE) {
- tex_free(mem);
- tex_alloc(mem);
+ if (mem.type == MEM_GLOBAL) {
+ global_free(mem);
+ global_alloc(mem);
+ }
+ else if (mem.type == MEM_TEXTURE) {
+ tex_free((device_texture &)mem);
+ tex_alloc((device_texture &)mem);
}
else if (mem.type == MEM_PIXELS) {
assert(!"mem_copy_to not supported for pixels.");
@@ -420,8 +437,11 @@ class CPUDevice : public Device {
void mem_free(device_memory &mem)
{
- if (mem.type == MEM_TEXTURE) {
- tex_free(mem);
+ if (mem.type == MEM_GLOBAL) {
+ global_free(mem);
+ }
+ else if (mem.type == MEM_TEXTURE) {
+ tex_free((device_texture &)mem);
}
else if (mem.device_pointer) {
if (mem.type == MEM_DEVICE_ONLY) {
@@ -443,51 +463,50 @@ class CPUDevice : public Device {
kernel_const_copy(&kernel_globals, name, host, size);
}
- void tex_alloc(device_memory &mem)
+ void global_alloc(device_memory &mem)
{
- VLOG(1) << "Texture allocate: " << mem.name << ", "
+ VLOG(1) << "Global memory allocate: " << mem.name << ", "
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
<< string_human_readable_size(mem.memory_size()) << ")";
- if (mem.interpolation == INTERPOLATION_NONE) {
- /* Data texture. */
- kernel_tex_copy(&kernel_globals, mem.name, mem.host_pointer, mem.data_size);
- }
- else {
- /* Image Texture. */
- int flat_slot = 0;
- if (string_startswith(mem.name, "__tex_image")) {
- int pos = string(mem.name).rfind("_");
- flat_slot = atoi(mem.name + pos + 1);
- }
- else {
- assert(0);
- }
-
- if (flat_slot >= texture_info.size()) {
- /* Allocate some slots in advance, to reduce amount
- * of re-allocations. */
- texture_info.resize(flat_slot + 128);
- }
+ kernel_global_memory_copy(&kernel_globals, mem.name, mem.host_pointer, mem.data_size);
- TextureInfo &info = texture_info[flat_slot];
- info.data = (uint64_t)mem.host_pointer;
- info.cl_buffer = 0;
- info.interpolation = mem.interpolation;
- info.extension = mem.extension;
- info.width = mem.data_width;
- info.height = mem.data_height;
- info.depth = mem.data_depth;
+ mem.device_pointer = (device_ptr)mem.host_pointer;
+ mem.device_size = mem.memory_size();
+ stats.mem_alloc(mem.device_size);
+ }
- need_texture_info = true;
+ void global_free(device_memory &mem)
+ {
+ if (mem.device_pointer) {
+ mem.device_pointer = 0;
+ stats.mem_free(mem.device_size);
+ mem.device_size = 0;
}
+ }
+
+ void tex_alloc(device_texture &mem)
+ {
+ VLOG(1) << "Texture allocate: " << mem.name << ", "
+ << string_human_readable_number(mem.memory_size()) << " bytes. ("
+ << string_human_readable_size(mem.memory_size()) << ")";
mem.device_pointer = (device_ptr)mem.host_pointer;
mem.device_size = mem.memory_size();
stats.mem_alloc(mem.device_size);
+
+ const uint slot = mem.slot;
+ if (slot >= texture_info.size()) {
+ /* Allocate some slots in advance, to reduce amount of re-allocations. */
+ texture_info.resize(slot + 128);
+ }
+
+ texture_info[slot] = mem.info;
+ texture_info[slot].data = (uint64_t)mem.host_pointer;
+ need_texture_info = true;
}
- void tex_free(device_memory &mem)
+ void tex_free(device_texture &mem)
{
if (mem.device_pointer) {
mem.device_pointer = 0;
@@ -508,13 +527,14 @@ class CPUDevice : public Device {
void thread_run(DeviceTask *task)
{
- if (task->type == DeviceTask::RENDER) {
+ if (task->type == DeviceTask::RENDER)
thread_render(*task);
- }
- else if (task->type == DeviceTask::FILM_CONVERT)
- thread_film_convert(*task);
else if (task->type == DeviceTask::SHADER)
thread_shader(*task);
+ else if (task->type == DeviceTask::FILM_CONVERT)
+ thread_film_convert(*task);
+ else if (task->type == DeviceTask::DENOISE_BUFFER)
+ thread_denoise(*task);
}
class CPUDeviceTask : public DeviceTask {
@@ -819,6 +839,49 @@ class CPUDevice : public Device {
return true;
}
+ bool adaptive_sampling_filter(KernelGlobals *kg, RenderTile &tile)
+ {
+ WorkTile wtile;
+ wtile.x = tile.x;
+ wtile.y = tile.y;
+ wtile.w = tile.w;
+ wtile.h = tile.h;
+ wtile.offset = tile.offset;
+ wtile.stride = tile.stride;
+ wtile.buffer = (float *)tile.buffer;
+
+ bool any = false;
+ for (int y = tile.y; y < tile.y + tile.h; ++y) {
+ any |= kernel_do_adaptive_filter_x(kg, y, &wtile);
+ }
+ for (int x = tile.x; x < tile.x + tile.w; ++x) {
+ any |= kernel_do_adaptive_filter_y(kg, x, &wtile);
+ }
+ return (!any);
+ }
+
+ void adaptive_sampling_post(const RenderTile &tile, KernelGlobals *kg)
+ {
+ float *render_buffer = (float *)tile.buffer;
+ for (int y = tile.y; y < tile.y + tile.h; y++) {
+ for (int x = tile.x; x < tile.x + tile.w; x++) {
+ int index = tile.offset + x + y * tile.stride;
+ ccl_global float *buffer = render_buffer + index * kernel_data.film.pass_stride;
+ if (buffer[kernel_data.film.pass_sample_count] < 0.0f) {
+ buffer[kernel_data.film.pass_sample_count] = -buffer[kernel_data.film.pass_sample_count];
+ float sample_multiplier = tile.sample / max((float)tile.start_sample + 1.0f,
+ buffer[kernel_data.film.pass_sample_count]);
+ if (sample_multiplier != 1.0f) {
+ kernel_adaptive_post_adjust(kg, buffer, sample_multiplier);
+ }
+ }
+ else {
+ kernel_adaptive_post_adjust(kg, buffer, tile.sample / (tile.sample - 1.0f));
+ }
+ }
+ }
+ }
+
void path_trace(DeviceTask &task, RenderTile &tile, KernelGlobals *kg)
{
const bool use_coverage = kernel_data.film.cryptomatte_passes & CRYPT_ACCURATE;
@@ -851,14 +914,27 @@ class CPUDevice : public Device {
path_trace_kernel()(kg, render_buffer, sample, x, y, tile.offset, tile.stride);
}
}
-
tile.sample = sample + 1;
+ if (task.adaptive_sampling.use && task.adaptive_sampling.need_filter(sample)) {
+ const bool stop = adaptive_sampling_filter(kg, tile);
+ if (stop) {
+ const int num_progress_samples = end_sample - sample;
+ tile.sample = end_sample;
+ task.update_progress(&tile, tile.w * tile.h * num_progress_samples);
+ break;
+ }
+ }
+
task.update_progress(&tile, tile.w * tile.h);
}
if (use_coverage) {
coverage.finalize();
}
+
+ if (task.adaptive_sampling.use) {
+ adaptive_sampling_post(tile, kg);
+ }
}
void denoise(DenoisingTask &denoising, RenderTile &tile)
@@ -923,7 +999,7 @@ class CPUDevice : public Device {
DenoisingTask denoising(this, task);
denoising.profiler = &kg->profiler;
- while (task.acquire_tile(this, tile)) {
+ while (task.acquire_tile(this, tile, task.tile_types)) {
if (tile.task == RenderTile::PATH_TRACE) {
if (use_split_kernel) {
device_only_memory<uchar> void_buffer(this, "void_buffer");
@@ -954,6 +1030,33 @@ class CPUDevice : public Device {
delete split_kernel;
}
+ void thread_denoise(DeviceTask &task)
+ {
+ RenderTile tile;
+ tile.x = task.x;
+ tile.y = task.y;
+ tile.w = task.w;
+ tile.h = task.h;
+ tile.buffer = task.buffer;
+ tile.sample = task.sample + task.num_samples;
+ tile.num_samples = task.num_samples;
+ tile.start_sample = task.sample;
+ tile.offset = task.offset;
+ tile.stride = task.stride;
+ tile.buffers = task.buffers;
+
+ DenoisingTask denoising(this, task);
+
+ ProfilingState denoising_profiler_state;
+ profiler.add_state(&denoising_profiler_state);
+ denoising.profiler = &denoising_profiler_state;
+
+ denoise(denoising, tile);
+ task.update_progress(&tile, tile.w * tile.h);
+
+ profiler.remove_state(&denoising_profiler_state);
+ }
+
void thread_film_convert(DeviceTask &task)
{
float sample_scale = 1.0f / (task.sample + 1);
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index dfd80d678fd..9a703b45c0a 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -14,2562 +14,21 @@
* limitations under the License.
*/
-#include <climits>
-#include <limits.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
+#ifdef WITH_CUDA
-#include "device/device.h"
-#include "device/device_denoising.h"
-#include "device/device_intern.h"
-#include "device/device_split_kernel.h"
+# include "device/cuda/device_cuda.h"
+# include "device/device.h"
+# include "device/device_intern.h"
-#include "render/buffers.h"
-
-#include "kernel/filter/filter_defines.h"
-
-#ifdef WITH_CUDA_DYNLOAD
-# include "cuew.h"
-#else
-# include "util/util_opengl.h"
-# include <cuda.h>
-# include <cudaGL.h>
-#endif
-#include "util/util_debug.h"
-#include "util/util_foreach.h"
-#include "util/util_logging.h"
-#include "util/util_map.h"
-#include "util/util_md5.h"
-#include "util/util_opengl.h"
-#include "util/util_path.h"
-#include "util/util_string.h"
-#include "util/util_system.h"
-#include "util/util_types.h"
-#include "util/util_time.h"
-#include "util/util_windows.h"
-
-#include "kernel/split/kernel_split_data_types.h"
+# include "util/util_logging.h"
+# include "util/util_string.h"
+# include "util/util_windows.h"
CCL_NAMESPACE_BEGIN
-#ifndef WITH_CUDA_DYNLOAD
-
-/* Transparently implement some functions, so majority of the file does not need
- * to worry about difference between dynamically loaded and linked CUDA at all.
- */
-
-namespace {
-
-const char *cuewErrorString(CUresult result)
-{
- /* We can only give error code here without major code duplication, that
- * should be enough since dynamic loading is only being disabled by folks
- * who knows what they're doing anyway.
- *
- * NOTE: Avoid call from several threads.
- */
- static string error;
- error = string_printf("%d", result);
- return error.c_str();
-}
-
-const char *cuewCompilerPath()
-{
- return CYCLES_CUDA_NVCC_EXECUTABLE;
-}
-
-int cuewCompilerVersion()
-{
- return (CUDA_VERSION / 100) + (CUDA_VERSION % 100 / 10);
-}
-
-} /* namespace */
-#endif /* WITH_CUDA_DYNLOAD */
-
-class CUDADevice;
-
-class CUDASplitKernel : public DeviceSplitKernel {
- CUDADevice *device;
-
- public:
- explicit CUDASplitKernel(CUDADevice *device);
-
- virtual uint64_t state_buffer_size(device_memory &kg, device_memory &data, size_t num_threads);
-
- virtual bool enqueue_split_kernel_data_init(const KernelDimensions &dim,
- RenderTile &rtile,
- int num_global_elements,
- device_memory &kernel_globals,
- device_memory &kernel_data_,
- device_memory &split_data,
- device_memory &ray_state,
- device_memory &queue_index,
- device_memory &use_queues_flag,
- device_memory &work_pool_wgs);
-
- virtual SplitKernelFunction *get_split_kernel_function(const string &kernel_name,
- const DeviceRequestedFeatures &);
- virtual int2 split_kernel_local_size();
- virtual int2 split_kernel_global_size(device_memory &kg, device_memory &data, DeviceTask *task);
-};
-
-/* Utility to push/pop CUDA context. */
-class CUDAContextScope {
- public:
- CUDAContextScope(CUDADevice *device);
- ~CUDAContextScope();
-
- private:
- CUDADevice *device;
-};
-
-class CUDADevice : public Device {
- public:
- DedicatedTaskPool task_pool;
- CUdevice cuDevice;
- CUcontext cuContext;
- CUmodule cuModule, cuFilterModule;
- size_t device_texture_headroom;
- size_t device_working_headroom;
- bool move_texture_to_host;
- size_t map_host_used;
- size_t map_host_limit;
- int can_map_host;
- int cuDevId;
- int cuDevArchitecture;
- bool first_error;
- CUDASplitKernel *split_kernel;
-
- struct CUDAMem {
- CUDAMem() : texobject(0), array(0), use_mapped_host(false)
- {
- }
-
- CUtexObject texobject;
- CUarray array;
-
- /* If true, a mapped host memory in shared_pointer is being used. */
- bool use_mapped_host;
- };
- typedef map<device_memory *, CUDAMem> CUDAMemMap;
- CUDAMemMap cuda_mem_map;
-
- struct PixelMem {
- GLuint cuPBO;
- CUgraphicsResource cuPBOresource;
- GLuint cuTexId;
- int w, h;
- };
- map<device_ptr, PixelMem> pixel_mem_map;
-
- /* Bindless Textures */
- device_vector<TextureInfo> texture_info;
- bool need_texture_info;
-
- CUdeviceptr cuda_device_ptr(device_ptr mem)
- {
- return (CUdeviceptr)mem;
- }
-
- static bool have_precompiled_kernels()
- {
- string cubins_path = path_get("lib");
- return path_exists(cubins_path);
- }
-
- virtual bool show_samples() const
- {
- /* The CUDADevice only processes one tile at a time, so showing samples is fine. */
- return true;
- }
-
- virtual BVHLayoutMask get_bvh_layout_mask() const
- {
- return BVH_LAYOUT_BVH2;
- }
-
- /*#ifdef NDEBUG
-#define cuda_abort()
-#else
-#define cuda_abort() abort()
-#endif*/
- void cuda_error_documentation()
- {
- if (first_error) {
- fprintf(stderr,
- "\nRefer to the Cycles GPU rendering documentation for possible solutions:\n");
- fprintf(stderr,
- "https://docs.blender.org/manual/en/latest/render/cycles/gpu_rendering.html\n\n");
- first_error = false;
- }
- }
-
-#define cuda_assert(stmt) \
- { \
- CUresult result = stmt; \
-\
- if (result != CUDA_SUCCESS) { \
- string message = string_printf( \
- "CUDA error: %s in %s, line %d", cuewErrorString(result), #stmt, __LINE__); \
- if (error_msg == "") \
- error_msg = message; \
- fprintf(stderr, "%s\n", message.c_str()); \
- /*cuda_abort();*/ \
- cuda_error_documentation(); \
- } \
- } \
- (void)0
-
- bool cuda_error_(CUresult result, const string &stmt)
- {
- if (result == CUDA_SUCCESS)
- return false;
-
- string message = string_printf("CUDA error at %s: %s", stmt.c_str(), cuewErrorString(result));
- if (error_msg == "")
- error_msg = message;
- fprintf(stderr, "%s\n", message.c_str());
- cuda_error_documentation();
- return true;
- }
-
-#define cuda_error(stmt) cuda_error_(stmt, #stmt)
-
- void cuda_error_message(const string &message)
- {
- if (error_msg == "")
- error_msg = message;
- fprintf(stderr, "%s\n", message.c_str());
- cuda_error_documentation();
- }
-
- CUDADevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background_)
- : Device(info, stats, profiler, background_),
- texture_info(this, "__texture_info", MEM_TEXTURE)
- {
- first_error = true;
- background = background_;
-
- cuDevId = info.num;
- cuDevice = 0;
- cuContext = 0;
-
- cuModule = 0;
- cuFilterModule = 0;
-
- split_kernel = NULL;
-
- need_texture_info = false;
-
- device_texture_headroom = 0;
- device_working_headroom = 0;
- move_texture_to_host = false;
- map_host_limit = 0;
- map_host_used = 0;
- can_map_host = 0;
-
- /* Intialize CUDA. */
- if (cuda_error(cuInit(0)))
- return;
-
- /* Setup device and context. */
- if (cuda_error(cuDeviceGet(&cuDevice, cuDevId)))
- return;
-
- /* CU_CTX_MAP_HOST for mapping host memory when out of device memory.
- * CU_CTX_LMEM_RESIZE_TO_MAX for reserving local memory ahead of render,
- * so we can predict which memory to map to host. */
- cuda_assert(
- cuDeviceGetAttribute(&can_map_host, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, cuDevice));
-
- unsigned int ctx_flags = CU_CTX_LMEM_RESIZE_TO_MAX;
- if (can_map_host) {
- ctx_flags |= CU_CTX_MAP_HOST;
- init_host_memory();
- }
-
- /* Create context. */
- CUresult result;
-
- if (background) {
- result = cuCtxCreate(&cuContext, ctx_flags, cuDevice);
- }
- else {
- result = cuGLCtxCreate(&cuContext, ctx_flags, cuDevice);
-
- if (result != CUDA_SUCCESS) {
- result = cuCtxCreate(&cuContext, ctx_flags, cuDevice);
- background = true;
- }
- }
-
- if (cuda_error_(result, "cuCtxCreate"))
- return;
-
- int major, minor;
- cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId);
- cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId);
- cuDevArchitecture = major * 100 + minor * 10;
-
- /* Pop context set by cuCtxCreate. */
- cuCtxPopCurrent(NULL);
- }
-
- ~CUDADevice()
- {
- task_pool.stop();
-
- delete split_kernel;
-
- texture_info.free();
-
- cuda_assert(cuCtxDestroy(cuContext));
- }
-
- bool support_device(const DeviceRequestedFeatures & /*requested_features*/)
- {
- int major, minor;
- cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId);
- cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId);
-
- /* We only support sm_30 and above */
- if (major < 3) {
- cuda_error_message(string_printf(
- "CUDA device supported only with compute capability 3.0 or up, found %d.%d.",
- major,
- minor));
- return false;
- }
-
- return true;
- }
-
- bool use_adaptive_compilation()
- {
- return DebugFlags().cuda.adaptive_compile;
- }
-
- bool use_split_kernel()
- {
- return DebugFlags().cuda.split_kernel;
- }
-
- /* Common NVCC flags which stays the same regardless of shading model,
- * kernel sources md5 and only depends on compiler or compilation settings.
- */
- string compile_kernel_get_common_cflags(const DeviceRequestedFeatures &requested_features,
- bool filter = false,
- bool split = false)
- {
- const int machine = system_cpu_bits();
- const string source_path = path_get("source");
- const string include_path = source_path;
- string cflags = string_printf(
- "-m%d "
- "--ptxas-options=\"-v\" "
- "--use_fast_math "
- "-DNVCC "
- "-I\"%s\"",
- machine,
- include_path.c_str());
- if (!filter && use_adaptive_compilation()) {
- cflags += " " + requested_features.get_build_options();
- }
- const char *extra_cflags = getenv("CYCLES_CUDA_EXTRA_CFLAGS");
- if (extra_cflags) {
- cflags += string(" ") + string(extra_cflags);
- }
-#ifdef WITH_CYCLES_DEBUG
- cflags += " -D__KERNEL_DEBUG__";
-#endif
-
- if (split) {
- cflags += " -D__SPLIT__";
- }
-
- return cflags;
- }
-
- bool compile_check_compiler()
- {
- const char *nvcc = cuewCompilerPath();
- if (nvcc == NULL) {
- cuda_error_message(
- "CUDA nvcc compiler not found. "
- "Install CUDA toolkit in default location.");
- return false;
- }
- const int cuda_version = cuewCompilerVersion();
- VLOG(1) << "Found nvcc " << nvcc << ", CUDA version " << cuda_version << ".";
- const int major = cuda_version / 10, minor = cuda_version % 10;
- if (cuda_version == 0) {
- cuda_error_message("CUDA nvcc compiler version could not be parsed.");
- return false;
- }
- if (cuda_version < 80) {
- printf(
- "Unsupported CUDA version %d.%d detected, "
- "you need CUDA 8.0 or newer.\n",
- major,
- minor);
- return false;
- }
- else if (cuda_version != 101) {
- printf(
- "CUDA version %d.%d detected, build may succeed but only "
- "CUDA 10.1 is officially supported.\n",
- major,
- minor);
- }
- return true;
- }
-
- string compile_kernel(const DeviceRequestedFeatures &requested_features,
- bool filter = false,
- bool split = false)
- {
- const char *name, *source;
- if (filter) {
- name = "filter";
- source = "filter.cu";
- }
- else if (split) {
- name = "kernel_split";
- source = "kernel_split.cu";
- }
- else {
- name = "kernel";
- source = "kernel.cu";
- }
- /* Compute cubin name. */
- int major, minor;
- cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId);
- cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId);
-
- /* Attempt to use kernel provided with Blender. */
- if (!use_adaptive_compilation()) {
- const string cubin = path_get(string_printf("lib/%s_sm_%d%d.cubin", name, major, minor));
- VLOG(1) << "Testing for pre-compiled kernel " << cubin << ".";
- if (path_exists(cubin)) {
- VLOG(1) << "Using precompiled kernel.";
- return cubin;
- }
- const string ptx = path_get(string_printf("lib/%s_compute_%d%d.ptx", name, major, minor));
- VLOG(1) << "Testing for pre-compiled kernel " << ptx << ".";
- if (path_exists(ptx)) {
- VLOG(1) << "Using precompiled kernel.";
- return ptx;
- }
- }
-
- const string common_cflags = compile_kernel_get_common_cflags(
- requested_features, filter, split);
-
- /* Try to use locally compiled kernel. */
- const string source_path = path_get("source");
- const string kernel_md5 = path_files_md5_hash(source_path);
-
- /* We include cflags into md5 so changing cuda toolkit or changing other
- * compiler command line arguments makes sure cubin gets re-built.
- */
- const string cubin_md5 = util_md5_string(kernel_md5 + common_cflags);
-
- const string cubin_file = string_printf(
- "cycles_%s_sm%d%d_%s.cubin", name, major, minor, cubin_md5.c_str());
- const string cubin = path_cache_get(path_join("kernels", cubin_file));
- VLOG(1) << "Testing for locally compiled kernel " << cubin << ".";
- if (path_exists(cubin)) {
- VLOG(1) << "Using locally compiled kernel.";
- return cubin;
- }
-
-#ifdef _WIN32
- if (have_precompiled_kernels()) {
- if (major < 3) {
- cuda_error_message(
- string_printf("CUDA device requires compute capability 3.0 or up, "
- "found %d.%d. Your GPU is not supported.",
- major,
- minor));
- }
- else {
- cuda_error_message(
- string_printf("CUDA binary kernel for this graphics card compute "
- "capability (%d.%d) not found.",
- major,
- minor));
- }
- return "";
- }
-#endif
-
- /* Compile. */
- if (!compile_check_compiler()) {
- return "";
- }
- const char *nvcc = cuewCompilerPath();
- const string kernel = path_join(path_join(source_path, "kernel"),
- path_join("kernels", path_join("cuda", source)));
- double starttime = time_dt();
- printf("Compiling CUDA kernel ...\n");
-
- path_create_directories(cubin);
-
- string command = string_printf(
- "\"%s\" "
- "-arch=sm_%d%d "
- "--cubin \"%s\" "
- "-o \"%s\" "
- "%s ",
- nvcc,
- major,
- minor,
- kernel.c_str(),
- cubin.c_str(),
- common_cflags.c_str());
-
- printf("%s\n", command.c_str());
-
- if (system(command.c_str()) == -1) {
- cuda_error_message(
- "Failed to execute compilation command, "
- "see console for details.");
- return "";
- }
-
- /* Verify if compilation succeeded */
- if (!path_exists(cubin)) {
- cuda_error_message(
- "CUDA kernel compilation failed, "
- "see console for details.");
- return "";
- }
-
- printf("Kernel compilation finished in %.2lfs.\n", time_dt() - starttime);
-
- return cubin;
- }
-
- bool load_kernels(const DeviceRequestedFeatures &requested_features)
- {
- /* TODO(sergey): Support kernels re-load for CUDA devices.
- *
- * Currently re-loading kernel will invalidate memory pointers,
- * causing problems in cuCtxSynchronize.
- */
- if (cuFilterModule && cuModule) {
- VLOG(1) << "Skipping kernel reload, not currently supported.";
- return true;
- }
-
- /* check if cuda init succeeded */
- if (cuContext == 0)
- return false;
-
- /* check if GPU is supported */
- if (!support_device(requested_features))
- return false;
-
- /* get kernel */
- string cubin = compile_kernel(requested_features, false, use_split_kernel());
- if (cubin == "")
- return false;
-
- string filter_cubin = compile_kernel(requested_features, true, false);
- if (filter_cubin == "")
- return false;
-
- /* open module */
- CUDAContextScope scope(this);
-
- string cubin_data;
- CUresult result;
-
- if (path_read_text(cubin, cubin_data))
- result = cuModuleLoadData(&cuModule, cubin_data.c_str());
- else
- result = CUDA_ERROR_FILE_NOT_FOUND;
-
- if (cuda_error_(result, "cuModuleLoad"))
- cuda_error_message(string_printf("Failed loading CUDA kernel %s.", cubin.c_str()));
-
- if (path_read_text(filter_cubin, cubin_data))
- result = cuModuleLoadData(&cuFilterModule, cubin_data.c_str());
- else
- result = CUDA_ERROR_FILE_NOT_FOUND;
-
- if (cuda_error_(result, "cuModuleLoad"))
- cuda_error_message(string_printf("Failed loading CUDA kernel %s.", filter_cubin.c_str()));
-
- if (result == CUDA_SUCCESS) {
- reserve_local_memory(requested_features);
- }
-
- return (result == CUDA_SUCCESS);
- }
-
- void reserve_local_memory(const DeviceRequestedFeatures &requested_features)
- {
- if (use_split_kernel()) {
- /* Split kernel mostly uses global memory and adaptive compilation,
- * difficult to predict how much is needed currently. */
- return;
- }
-
- /* Together with CU_CTX_LMEM_RESIZE_TO_MAX, this reserves local memory
- * needed for kernel launches, so that we can reliably figure out when
- * to allocate scene data in mapped host memory. */
- CUDAContextScope scope(this);
-
- size_t total = 0, free_before = 0, free_after = 0;
- cuMemGetInfo(&free_before, &total);
-
- /* Get kernel function. */
- CUfunction cuPathTrace;
-
- if (requested_features.use_integrator_branched) {
- cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_branched_path_trace"));
- }
- else {
- cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace"));
- }
-
- cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1));
-
- int min_blocks, num_threads_per_block;
- cuda_assert(cuOccupancyMaxPotentialBlockSize(
- &min_blocks, &num_threads_per_block, cuPathTrace, NULL, 0, 0));
-
- /* Launch kernel, using just 1 block appears sufficient to reserve
- * memory for all multiprocessors. It would be good to do this in
- * parallel for the multi GPU case still to make it faster. */
- CUdeviceptr d_work_tiles = 0;
- uint total_work_size = 0;
-
- void *args[] = {&d_work_tiles, &total_work_size};
-
- cuda_assert(cuLaunchKernel(cuPathTrace, 1, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0));
-
- cuda_assert(cuCtxSynchronize());
-
- cuMemGetInfo(&free_after, &total);
- VLOG(1) << "Local memory reserved " << string_human_readable_number(free_before - free_after)
- << " bytes. (" << string_human_readable_size(free_before - free_after) << ")";
-
-#if 0
- /* For testing mapped host memory, fill up device memory. */
- const size_t keep_mb = 1024;
-
- while (free_after > keep_mb * 1024 * 1024LL) {
- CUdeviceptr tmp;
- cuda_assert(cuMemAlloc(&tmp, 10 * 1024 * 1024LL));
- cuMemGetInfo(&free_after, &total);
- }
-#endif
- }
-
- void init_host_memory()
- {
- /* Limit amount of host mapped memory, because allocating too much can
- * cause system instability. Leave at least half or 4 GB of system
- * memory free, whichever is smaller. */
- size_t default_limit = 4 * 1024 * 1024 * 1024LL;
- size_t system_ram = system_physical_ram();
-
- if (system_ram > 0) {
- if (system_ram / 2 > default_limit) {
- map_host_limit = system_ram - default_limit;
- }
- else {
- map_host_limit = system_ram / 2;
- }
- }
- else {
- VLOG(1) << "Mapped host memory disabled, failed to get system RAM";
- map_host_limit = 0;
- }
-
- /* Amount of device memory to keep is free after texture memory
- * and working memory allocations respectively. We set the working
- * memory limit headroom lower so that some space is left after all
- * texture memory allocations. */
- device_working_headroom = 32 * 1024 * 1024LL; // 32MB
- device_texture_headroom = 128 * 1024 * 1024LL; // 128MB
-
- VLOG(1) << "Mapped host memory limit set to " << string_human_readable_number(map_host_limit)
- << " bytes. (" << string_human_readable_size(map_host_limit) << ")";
- }
-
- void load_texture_info()
- {
- if (need_texture_info) {
- texture_info.copy_to_device();
- need_texture_info = false;
- }
- }
-
- void move_textures_to_host(size_t size, bool for_texture)
- {
- /* Signal to reallocate textures in host memory only. */
- move_texture_to_host = true;
-
- while (size > 0) {
- /* Find suitable memory allocation to move. */
- device_memory *max_mem = NULL;
- size_t max_size = 0;
- bool max_is_image = false;
-
- foreach (CUDAMemMap::value_type &pair, cuda_mem_map) {
- device_memory &mem = *pair.first;
- CUDAMem *cmem = &pair.second;
-
- bool is_texture = (mem.type == MEM_TEXTURE) && (&mem != &texture_info);
- bool is_image = is_texture && (mem.data_height > 1);
-
- /* Can't move this type of memory. */
- if (!is_texture || cmem->array) {
- continue;
- }
-
- /* Already in host memory. */
- if (cmem->use_mapped_host) {
- continue;
- }
-
- /* For other textures, only move image textures. */
- if (for_texture && !is_image) {
- continue;
- }
-
- /* Try to move largest allocation, prefer moving images. */
- if (is_image > max_is_image || (is_image == max_is_image && mem.device_size > max_size)) {
- max_is_image = is_image;
- max_size = mem.device_size;
- max_mem = &mem;
- }
- }
-
- /* Move to host memory. This part is mutex protected since
- * multiple CUDA devices could be moving the memory. The
- * first one will do it, and the rest will adopt the pointer. */
- if (max_mem) {
- VLOG(1) << "Move memory from device to host: " << max_mem->name;
-
- static thread_mutex move_mutex;
- thread_scoped_lock lock(move_mutex);
-
- /* Preserve the original device pointer, in case of multi device
- * we can't change it because the pointer mapping would break. */
- device_ptr prev_pointer = max_mem->device_pointer;
- size_t prev_size = max_mem->device_size;
-
- tex_free(*max_mem);
- tex_alloc(*max_mem);
- size = (max_size >= size) ? 0 : size - max_size;
-
- max_mem->device_pointer = prev_pointer;
- max_mem->device_size = prev_size;
- }
- else {
- break;
- }
- }
-
- /* Update texture info array with new pointers. */
- load_texture_info();
-
- move_texture_to_host = false;
- }
-
- CUDAMem *generic_alloc(device_memory &mem, size_t pitch_padding = 0)
- {
- CUDAContextScope scope(this);
-
- CUdeviceptr device_pointer = 0;
- size_t size = mem.memory_size() + pitch_padding;
-
- CUresult mem_alloc_result = CUDA_ERROR_OUT_OF_MEMORY;
- const char *status = "";
-
- /* First try allocating in device memory, respecting headroom. We make
- * an exception for texture info. It is small and frequently accessed,
- * so treat it as working memory.
- *
- * If there is not enough room for working memory, we will try to move
- * textures to host memory, assuming the performance impact would have
- * been worse for working memory. */
- bool is_texture = (mem.type == MEM_TEXTURE) && (&mem != &texture_info);
- bool is_image = is_texture && (mem.data_height > 1);
-
- size_t headroom = (is_texture) ? device_texture_headroom : device_working_headroom;
-
- size_t total = 0, free = 0;
- cuMemGetInfo(&free, &total);
-
- /* Move textures to host memory if needed. */
- if (!move_texture_to_host && !is_image && (size + headroom) >= free && can_map_host) {
- move_textures_to_host(size + headroom - free, is_texture);
- cuMemGetInfo(&free, &total);
- }
-
- /* Allocate in device memory. */
- if (!move_texture_to_host && (size + headroom) < free) {
- mem_alloc_result = cuMemAlloc(&device_pointer, size);
- if (mem_alloc_result == CUDA_SUCCESS) {
- status = " in device memory";
- }
- }
-
- /* Fall back to mapped host memory if needed and possible. */
-
- void *shared_pointer = 0;
-
- if (mem_alloc_result != CUDA_SUCCESS && can_map_host) {
- if (mem.shared_pointer) {
- /* Another device already allocated host memory. */
- mem_alloc_result = CUDA_SUCCESS;
- shared_pointer = mem.shared_pointer;
- }
- else if (map_host_used + size < map_host_limit) {
- /* Allocate host memory ourselves. */
- mem_alloc_result = cuMemHostAlloc(
- &shared_pointer, size, CU_MEMHOSTALLOC_DEVICEMAP | CU_MEMHOSTALLOC_WRITECOMBINED);
-
- assert((mem_alloc_result == CUDA_SUCCESS && shared_pointer != 0) ||
- (mem_alloc_result != CUDA_SUCCESS && shared_pointer == 0));
- }
-
- if (mem_alloc_result == CUDA_SUCCESS) {
- cuda_assert(cuMemHostGetDevicePointer_v2(&device_pointer, shared_pointer, 0));
- map_host_used += size;
- status = " in host memory";
- }
- else {
- status = " failed, out of host memory";
- }
- }
-
- if (mem_alloc_result != CUDA_SUCCESS) {
- status = " failed, out of device and host memory";
- cuda_assert(mem_alloc_result);
- }
-
- if (mem.name) {
- VLOG(1) << "Buffer allocate: " << mem.name << ", "
- << string_human_readable_number(mem.memory_size()) << " bytes. ("
- << string_human_readable_size(mem.memory_size()) << ")" << status;
- }
-
- mem.device_pointer = (device_ptr)device_pointer;
- mem.device_size = size;
- stats.mem_alloc(size);
-
- if (!mem.device_pointer) {
- return NULL;
- }
-
- /* Insert into map of allocations. */
- CUDAMem *cmem = &cuda_mem_map[&mem];
- if (shared_pointer != 0) {
- /* Replace host pointer with our host allocation. Only works if
- * CUDA memory layout is the same and has no pitch padding. Also
- * does not work if we move textures to host during a render,
- * since other devices might be using the memory. */
-
- if (!move_texture_to_host && pitch_padding == 0 && mem.host_pointer &&
- mem.host_pointer != shared_pointer) {
- memcpy(shared_pointer, mem.host_pointer, size);
-
- /* A Call to device_memory::host_free() should be preceded by
- * a call to device_memory::device_free() for host memory
- * allocated by a device to be handled properly. Two exceptions
- * are here and a call in OptiXDevice::generic_alloc(), where
- * the current host memory can be assumed to be allocated by
- * device_memory::host_alloc(), not by a device */
-
- mem.host_free();
- mem.host_pointer = shared_pointer;
- }
- mem.shared_pointer = shared_pointer;
- mem.shared_counter++;
- cmem->use_mapped_host = true;
- }
- else {
- cmem->use_mapped_host = false;
- }
-
- return cmem;
- }
-
- void generic_copy_to(device_memory &mem)
- {
- if (mem.host_pointer && mem.device_pointer) {
- CUDAContextScope scope(this);
-
- /* If use_mapped_host of mem is false, the current device only
- * uses device memory allocated by cuMemAlloc regardless of
- * mem.host_pointer and mem.shared_pointer, and should copy
- * data from mem.host_pointer. */
-
- if (cuda_mem_map[&mem].use_mapped_host == false || mem.host_pointer != mem.shared_pointer) {
- cuda_assert(cuMemcpyHtoD(
- cuda_device_ptr(mem.device_pointer), mem.host_pointer, mem.memory_size()));
- }
- }
- }
-
- void generic_free(device_memory &mem)
- {
- if (mem.device_pointer) {
- CUDAContextScope scope(this);
- const CUDAMem &cmem = cuda_mem_map[&mem];
-
- /* If cmem.use_mapped_host is true, reference counting is used
- * to safely free a mapped host memory. */
-
- if (cmem.use_mapped_host) {
- assert(mem.shared_pointer);
- if (mem.shared_pointer) {
- assert(mem.shared_counter > 0);
- if (--mem.shared_counter == 0) {
- if (mem.host_pointer == mem.shared_pointer) {
- mem.host_pointer = 0;
- }
- cuMemFreeHost(mem.shared_pointer);
- mem.shared_pointer = 0;
- }
- }
- map_host_used -= mem.device_size;
- }
- else {
- /* Free device memory. */
- cuMemFree(mem.device_pointer);
- }
-
- stats.mem_free(mem.device_size);
- mem.device_pointer = 0;
- mem.device_size = 0;
-
- cuda_mem_map.erase(cuda_mem_map.find(&mem));
- }
- }
-
- void mem_alloc(device_memory &mem)
- {
- if (mem.type == MEM_PIXELS && !background) {
- pixels_alloc(mem);
- }
- else if (mem.type == MEM_TEXTURE) {
- assert(!"mem_alloc not supported for textures.");
- }
- else {
- generic_alloc(mem);
- }
- }
-
- void mem_copy_to(device_memory &mem)
- {
- if (mem.type == MEM_PIXELS) {
- assert(!"mem_copy_to not supported for pixels.");
- }
- else if (mem.type == MEM_TEXTURE) {
- tex_free(mem);
- tex_alloc(mem);
- }
- else {
- if (!mem.device_pointer) {
- generic_alloc(mem);
- }
-
- generic_copy_to(mem);
- }
- }
-
- void mem_copy_from(device_memory &mem, int y, int w, int h, int elem)
- {
- if (mem.type == MEM_PIXELS && !background) {
- pixels_copy_from(mem, y, w, h);
- }
- else if (mem.type == MEM_TEXTURE) {
- assert(!"mem_copy_from not supported for textures.");
- }
- else {
- CUDAContextScope scope(this);
- size_t offset = elem * y * w;
- size_t size = elem * w * h;
-
- if (mem.host_pointer && mem.device_pointer) {
- cuda_assert(cuMemcpyDtoH(
- (uchar *)mem.host_pointer + offset, (CUdeviceptr)(mem.device_pointer + offset), size));
- }
- else if (mem.host_pointer) {
- memset((char *)mem.host_pointer + offset, 0, size);
- }
- }
- }
-
- void mem_zero(device_memory &mem)
- {
- if (!mem.device_pointer) {
- mem_alloc(mem);
- }
-
- if (mem.host_pointer) {
- memset(mem.host_pointer, 0, mem.memory_size());
- }
-
- /* If use_mapped_host of mem is false, mem.device_pointer currently
- * refers to device memory regardless of mem.host_pointer and
- * mem.shared_pointer. */
-
- if (mem.device_pointer &&
- (cuda_mem_map[&mem].use_mapped_host == false || mem.host_pointer != mem.shared_pointer)) {
- CUDAContextScope scope(this);
- cuda_assert(cuMemsetD8(cuda_device_ptr(mem.device_pointer), 0, mem.memory_size()));
- }
- }
-
- void mem_free(device_memory &mem)
- {
- if (mem.type == MEM_PIXELS && !background) {
- pixels_free(mem);
- }
- else if (mem.type == MEM_TEXTURE) {
- tex_free(mem);
- }
- else {
- generic_free(mem);
- }
- }
-
- virtual device_ptr mem_alloc_sub_ptr(device_memory &mem, int offset, int /*size*/)
- {
- return (device_ptr)(((char *)mem.device_pointer) + mem.memory_elements_size(offset));
- }
-
- void const_copy_to(const char *name, void *host, size_t size)
- {
- CUDAContextScope scope(this);
- CUdeviceptr mem;
- size_t bytes;
-
- cuda_assert(cuModuleGetGlobal(&mem, &bytes, cuModule, name));
- // assert(bytes == size);
- cuda_assert(cuMemcpyHtoD(mem, host, size));
- }
-
- void tex_alloc(device_memory &mem)
- {
- CUDAContextScope scope(this);
-
- /* General variables for both architectures */
- string bind_name = mem.name;
- size_t dsize = datatype_size(mem.data_type);
- size_t size = mem.memory_size();
-
- CUaddress_mode address_mode = CU_TR_ADDRESS_MODE_WRAP;
- switch (mem.extension) {
- case EXTENSION_REPEAT:
- address_mode = CU_TR_ADDRESS_MODE_WRAP;
- break;
- case EXTENSION_EXTEND:
- address_mode = CU_TR_ADDRESS_MODE_CLAMP;
- break;
- case EXTENSION_CLIP:
- address_mode = CU_TR_ADDRESS_MODE_BORDER;
- break;
- default:
- assert(0);
- break;
- }
-
- CUfilter_mode filter_mode;
- if (mem.interpolation == INTERPOLATION_CLOSEST) {
- filter_mode = CU_TR_FILTER_MODE_POINT;
- }
- else {
- filter_mode = CU_TR_FILTER_MODE_LINEAR;
- }
-
- /* Data Storage */
- if (mem.interpolation == INTERPOLATION_NONE) {
- generic_alloc(mem);
- generic_copy_to(mem);
-
- CUdeviceptr cumem;
- size_t cubytes;
-
- cuda_assert(cuModuleGetGlobal(&cumem, &cubytes, cuModule, bind_name.c_str()));
-
- if (cubytes == 8) {
- /* 64 bit device pointer */
- uint64_t ptr = mem.device_pointer;
- cuda_assert(cuMemcpyHtoD(cumem, (void *)&ptr, cubytes));
- }
- else {
- /* 32 bit device pointer */
- uint32_t ptr = (uint32_t)mem.device_pointer;
- cuda_assert(cuMemcpyHtoD(cumem, (void *)&ptr, cubytes));
- }
- return;
- }
-
- /* Image Texture Storage */
- CUarray_format_enum format;
- switch (mem.data_type) {
- case TYPE_UCHAR:
- format = CU_AD_FORMAT_UNSIGNED_INT8;
- break;
- case TYPE_UINT16:
- format = CU_AD_FORMAT_UNSIGNED_INT16;
- break;
- case TYPE_UINT:
- format = CU_AD_FORMAT_UNSIGNED_INT32;
- break;
- case TYPE_INT:
- format = CU_AD_FORMAT_SIGNED_INT32;
- break;
- case TYPE_FLOAT:
- format = CU_AD_FORMAT_FLOAT;
- break;
- case TYPE_HALF:
- format = CU_AD_FORMAT_HALF;
- break;
- default:
- assert(0);
- return;
- }
-
- CUDAMem *cmem = NULL;
- CUarray array_3d = NULL;
- size_t src_pitch = mem.data_width * dsize * mem.data_elements;
- size_t dst_pitch = src_pitch;
-
- if (mem.data_depth > 1) {
- /* 3D texture using array, there is no API for linear memory. */
- CUDA_ARRAY3D_DESCRIPTOR desc;
-
- desc.Width = mem.data_width;
- desc.Height = mem.data_height;
- desc.Depth = mem.data_depth;
- desc.Format = format;
- desc.NumChannels = mem.data_elements;
- desc.Flags = 0;
-
- VLOG(1) << "Array 3D allocate: " << mem.name << ", "
- << string_human_readable_number(mem.memory_size()) << " bytes. ("
- << string_human_readable_size(mem.memory_size()) << ")";
-
- cuda_assert(cuArray3DCreate(&array_3d, &desc));
-
- if (!array_3d) {
- return;
- }
-
- CUDA_MEMCPY3D param;
- memset(&param, 0, sizeof(param));
- param.dstMemoryType = CU_MEMORYTYPE_ARRAY;
- param.dstArray = array_3d;
- param.srcMemoryType = CU_MEMORYTYPE_HOST;
- param.srcHost = mem.host_pointer;
- param.srcPitch = src_pitch;
- param.WidthInBytes = param.srcPitch;
- param.Height = mem.data_height;
- param.Depth = mem.data_depth;
-
- cuda_assert(cuMemcpy3D(&param));
-
- mem.device_pointer = (device_ptr)array_3d;
- mem.device_size = size;
- stats.mem_alloc(size);
-
- cmem = &cuda_mem_map[&mem];
- cmem->texobject = 0;
- cmem->array = array_3d;
- }
- else if (mem.data_height > 0) {
- /* 2D texture, using pitch aligned linear memory. */
- int alignment = 0;
- cuda_assert(
- cuDeviceGetAttribute(&alignment, CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT, cuDevice));
- dst_pitch = align_up(src_pitch, alignment);
- size_t dst_size = dst_pitch * mem.data_height;
-
- cmem = generic_alloc(mem, dst_size - mem.memory_size());
- if (!cmem) {
- return;
- }
-
- CUDA_MEMCPY2D param;
- memset(&param, 0, sizeof(param));
- param.dstMemoryType = CU_MEMORYTYPE_DEVICE;
- param.dstDevice = mem.device_pointer;
- param.dstPitch = dst_pitch;
- param.srcMemoryType = CU_MEMORYTYPE_HOST;
- param.srcHost = mem.host_pointer;
- param.srcPitch = src_pitch;
- param.WidthInBytes = param.srcPitch;
- param.Height = mem.data_height;
-
- cuda_assert(cuMemcpy2DUnaligned(&param));
- }
- else {
- /* 1D texture, using linear memory. */
- cmem = generic_alloc(mem);
- if (!cmem) {
- return;
- }
-
- cuda_assert(cuMemcpyHtoD(mem.device_pointer, mem.host_pointer, size));
- }
-
- /* Kepler+, bindless textures. */
- int flat_slot = 0;
- if (string_startswith(mem.name, "__tex_image")) {
- int pos = string(mem.name).rfind("_");
- flat_slot = atoi(mem.name + pos + 1);
- }
- else {
- assert(0);
- }
-
- CUDA_RESOURCE_DESC resDesc;
- memset(&resDesc, 0, sizeof(resDesc));
-
- if (array_3d) {
- resDesc.resType = CU_RESOURCE_TYPE_ARRAY;
- resDesc.res.array.hArray = array_3d;
- resDesc.flags = 0;
- }
- else if (mem.data_height > 0) {
- resDesc.resType = CU_RESOURCE_TYPE_PITCH2D;
- resDesc.res.pitch2D.devPtr = mem.device_pointer;
- resDesc.res.pitch2D.format = format;
- resDesc.res.pitch2D.numChannels = mem.data_elements;
- resDesc.res.pitch2D.height = mem.data_height;
- resDesc.res.pitch2D.width = mem.data_width;
- resDesc.res.pitch2D.pitchInBytes = dst_pitch;
- }
- else {
- resDesc.resType = CU_RESOURCE_TYPE_LINEAR;
- resDesc.res.linear.devPtr = mem.device_pointer;
- resDesc.res.linear.format = format;
- resDesc.res.linear.numChannels = mem.data_elements;
- resDesc.res.linear.sizeInBytes = mem.device_size;
- }
-
- CUDA_TEXTURE_DESC texDesc;
- memset(&texDesc, 0, sizeof(texDesc));
- texDesc.addressMode[0] = address_mode;
- texDesc.addressMode[1] = address_mode;
- texDesc.addressMode[2] = address_mode;
- texDesc.filterMode = filter_mode;
- texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES;
-
- cuda_assert(cuTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL));
-
- /* Resize once */
- if (flat_slot >= texture_info.size()) {
- /* Allocate some slots in advance, to reduce amount
- * of re-allocations. */
- texture_info.resize(flat_slot + 128);
- }
-
- /* Set Mapping and tag that we need to (re-)upload to device */
- TextureInfo &info = texture_info[flat_slot];
- info.data = (uint64_t)cmem->texobject;
- info.cl_buffer = 0;
- info.interpolation = mem.interpolation;
- info.extension = mem.extension;
- info.width = mem.data_width;
- info.height = mem.data_height;
- info.depth = mem.data_depth;
- need_texture_info = true;
- }
-
- void tex_free(device_memory &mem)
- {
- if (mem.device_pointer) {
- CUDAContextScope scope(this);
- const CUDAMem &cmem = cuda_mem_map[&mem];
-
- if (cmem.texobject) {
- /* Free bindless texture. */
- cuTexObjectDestroy(cmem.texobject);
- }
-
- if (cmem.array) {
- /* Free array. */
- cuArrayDestroy(cmem.array);
- stats.mem_free(mem.device_size);
- mem.device_pointer = 0;
- mem.device_size = 0;
-
- cuda_mem_map.erase(cuda_mem_map.find(&mem));
- }
- else {
- generic_free(mem);
- }
- }
- }
-
-#define CUDA_GET_BLOCKSIZE(func, w, h) \
- int threads_per_block; \
- cuda_assert( \
- cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func)); \
- int threads = (int)sqrt((float)threads_per_block); \
- int xblocks = ((w) + threads - 1) / threads; \
- int yblocks = ((h) + threads - 1) / threads;
-
-#define CUDA_LAUNCH_KERNEL(func, args) \
- cuda_assert(cuLaunchKernel(func, xblocks, yblocks, 1, threads, threads, 1, 0, 0, args, 0));
-
-/* Similar as above, but for 1-dimensional blocks. */
-#define CUDA_GET_BLOCKSIZE_1D(func, w, h) \
- int threads_per_block; \
- cuda_assert( \
- cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func)); \
- int xblocks = ((w) + threads_per_block - 1) / threads_per_block; \
- int yblocks = h;
-
-#define CUDA_LAUNCH_KERNEL_1D(func, args) \
- cuda_assert(cuLaunchKernel(func, xblocks, yblocks, 1, threads_per_block, 1, 1, 0, 0, args, 0));
-
- bool denoising_non_local_means(device_ptr image_ptr,
- device_ptr guide_ptr,
- device_ptr variance_ptr,
- device_ptr out_ptr,
- DenoisingTask *task)
- {
- if (have_error())
- return false;
-
- CUDAContextScope scope(this);
-
- int stride = task->buffer.stride;
- int w = task->buffer.width;
- int h = task->buffer.h;
- int r = task->nlm_state.r;
- int f = task->nlm_state.f;
- float a = task->nlm_state.a;
- float k_2 = task->nlm_state.k_2;
-
- int pass_stride = task->buffer.pass_stride;
- int num_shifts = (2 * r + 1) * (2 * r + 1);
- int channel_offset = task->nlm_state.is_color ? task->buffer.pass_stride : 0;
- int frame_offset = 0;
-
- if (have_error())
- return false;
-
- CUdeviceptr difference = cuda_device_ptr(task->buffer.temporary_mem.device_pointer);
- CUdeviceptr blurDifference = difference + sizeof(float) * pass_stride * num_shifts;
- CUdeviceptr weightAccum = difference + 2 * sizeof(float) * pass_stride * num_shifts;
- CUdeviceptr scale_ptr = 0;
-
- cuda_assert(cuMemsetD8(weightAccum, 0, sizeof(float) * pass_stride));
- cuda_assert(cuMemsetD8(out_ptr, 0, sizeof(float) * pass_stride));
-
- {
- CUfunction cuNLMCalcDifference, cuNLMBlur, cuNLMCalcWeight, cuNLMUpdateOutput;
- cuda_assert(cuModuleGetFunction(
- &cuNLMCalcDifference, cuFilterModule, "kernel_cuda_filter_nlm_calc_difference"));
- cuda_assert(cuModuleGetFunction(&cuNLMBlur, cuFilterModule, "kernel_cuda_filter_nlm_blur"));
- cuda_assert(cuModuleGetFunction(
- &cuNLMCalcWeight, cuFilterModule, "kernel_cuda_filter_nlm_calc_weight"));
- cuda_assert(cuModuleGetFunction(
- &cuNLMUpdateOutput, cuFilterModule, "kernel_cuda_filter_nlm_update_output"));
-
- cuda_assert(cuFuncSetCacheConfig(cuNLMCalcDifference, CU_FUNC_CACHE_PREFER_L1));
- cuda_assert(cuFuncSetCacheConfig(cuNLMBlur, CU_FUNC_CACHE_PREFER_L1));
- cuda_assert(cuFuncSetCacheConfig(cuNLMCalcWeight, CU_FUNC_CACHE_PREFER_L1));
- cuda_assert(cuFuncSetCacheConfig(cuNLMUpdateOutput, CU_FUNC_CACHE_PREFER_L1));
-
- CUDA_GET_BLOCKSIZE_1D(cuNLMCalcDifference, w * h, num_shifts);
-
- void *calc_difference_args[] = {&guide_ptr,
- &variance_ptr,
- &scale_ptr,
- &difference,
- &w,
- &h,
- &stride,
- &pass_stride,
- &r,
- &channel_offset,
- &frame_offset,
- &a,
- &k_2};
- void *blur_args[] = {&difference, &blurDifference, &w, &h, &stride, &pass_stride, &r, &f};
- void *calc_weight_args[] = {
- &blurDifference, &difference, &w, &h, &stride, &pass_stride, &r, &f};
- void *update_output_args[] = {&blurDifference,
- &image_ptr,
- &out_ptr,
- &weightAccum,
- &w,
- &h,
- &stride,
- &pass_stride,
- &channel_offset,
- &r,
- &f};
-
- CUDA_LAUNCH_KERNEL_1D(cuNLMCalcDifference, calc_difference_args);
- CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args);
- CUDA_LAUNCH_KERNEL_1D(cuNLMCalcWeight, calc_weight_args);
- CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args);
- CUDA_LAUNCH_KERNEL_1D(cuNLMUpdateOutput, update_output_args);
- }
-
- {
- CUfunction cuNLMNormalize;
- cuda_assert(cuModuleGetFunction(
- &cuNLMNormalize, cuFilterModule, "kernel_cuda_filter_nlm_normalize"));
- cuda_assert(cuFuncSetCacheConfig(cuNLMNormalize, CU_FUNC_CACHE_PREFER_L1));
- void *normalize_args[] = {&out_ptr, &weightAccum, &w, &h, &stride};
- CUDA_GET_BLOCKSIZE(cuNLMNormalize, w, h);
- CUDA_LAUNCH_KERNEL(cuNLMNormalize, normalize_args);
- cuda_assert(cuCtxSynchronize());
- }
-
- return !have_error();
- }
-
- bool denoising_construct_transform(DenoisingTask *task)
- {
- if (have_error())
- return false;
-
- CUDAContextScope scope(this);
-
- CUfunction cuFilterConstructTransform;
- cuda_assert(cuModuleGetFunction(
- &cuFilterConstructTransform, cuFilterModule, "kernel_cuda_filter_construct_transform"));
- cuda_assert(cuFuncSetCacheConfig(cuFilterConstructTransform, CU_FUNC_CACHE_PREFER_SHARED));
- CUDA_GET_BLOCKSIZE(cuFilterConstructTransform, task->storage.w, task->storage.h);
-
- void *args[] = {&task->buffer.mem.device_pointer,
- &task->tile_info_mem.device_pointer,
- &task->storage.transform.device_pointer,
- &task->storage.rank.device_pointer,
- &task->filter_area,
- &task->rect,
- &task->radius,
- &task->pca_threshold,
- &task->buffer.pass_stride,
- &task->buffer.frame_stride,
- &task->buffer.use_time};
- CUDA_LAUNCH_KERNEL(cuFilterConstructTransform, args);
- cuda_assert(cuCtxSynchronize());
-
- return !have_error();
- }
-
- bool denoising_accumulate(device_ptr color_ptr,
- device_ptr color_variance_ptr,
- device_ptr scale_ptr,
- int frame,
- DenoisingTask *task)
- {
- if (have_error())
- return false;
-
- CUDAContextScope scope(this);
-
- int r = task->radius;
- int f = 4;
- float a = 1.0f;
- float k_2 = task->nlm_k_2;
-
- int w = task->reconstruction_state.source_w;
- int h = task->reconstruction_state.source_h;
- int stride = task->buffer.stride;
- int frame_offset = frame * task->buffer.frame_stride;
- int t = task->tile_info->frames[frame];
-
- int pass_stride = task->buffer.pass_stride;
- int num_shifts = (2 * r + 1) * (2 * r + 1);
-
- if (have_error())
- return false;
-
- CUdeviceptr difference = cuda_device_ptr(task->buffer.temporary_mem.device_pointer);
- CUdeviceptr blurDifference = difference + sizeof(float) * pass_stride * num_shifts;
-
- CUfunction cuNLMCalcDifference, cuNLMBlur, cuNLMCalcWeight, cuNLMConstructGramian;
- cuda_assert(cuModuleGetFunction(
- &cuNLMCalcDifference, cuFilterModule, "kernel_cuda_filter_nlm_calc_difference"));
- cuda_assert(cuModuleGetFunction(&cuNLMBlur, cuFilterModule, "kernel_cuda_filter_nlm_blur"));
- cuda_assert(cuModuleGetFunction(
- &cuNLMCalcWeight, cuFilterModule, "kernel_cuda_filter_nlm_calc_weight"));
- cuda_assert(cuModuleGetFunction(
- &cuNLMConstructGramian, cuFilterModule, "kernel_cuda_filter_nlm_construct_gramian"));
-
- cuda_assert(cuFuncSetCacheConfig(cuNLMCalcDifference, CU_FUNC_CACHE_PREFER_L1));
- cuda_assert(cuFuncSetCacheConfig(cuNLMBlur, CU_FUNC_CACHE_PREFER_L1));
- cuda_assert(cuFuncSetCacheConfig(cuNLMCalcWeight, CU_FUNC_CACHE_PREFER_L1));
- cuda_assert(cuFuncSetCacheConfig(cuNLMConstructGramian, CU_FUNC_CACHE_PREFER_SHARED));
-
- CUDA_GET_BLOCKSIZE_1D(cuNLMCalcDifference,
- task->reconstruction_state.source_w *
- task->reconstruction_state.source_h,
- num_shifts);
-
- void *calc_difference_args[] = {&color_ptr,
- &color_variance_ptr,
- &scale_ptr,
- &difference,
- &w,
- &h,
- &stride,
- &pass_stride,
- &r,
- &pass_stride,
- &frame_offset,
- &a,
- &k_2};
- void *blur_args[] = {&difference, &blurDifference, &w, &h, &stride, &pass_stride, &r, &f};
- void *calc_weight_args[] = {
- &blurDifference, &difference, &w, &h, &stride, &pass_stride, &r, &f};
- void *construct_gramian_args[] = {&t,
- &blurDifference,
- &task->buffer.mem.device_pointer,
- &task->storage.transform.device_pointer,
- &task->storage.rank.device_pointer,
- &task->storage.XtWX.device_pointer,
- &task->storage.XtWY.device_pointer,
- &task->reconstruction_state.filter_window,
- &w,
- &h,
- &stride,
- &pass_stride,
- &r,
- &f,
- &frame_offset,
- &task->buffer.use_time};
-
- CUDA_LAUNCH_KERNEL_1D(cuNLMCalcDifference, calc_difference_args);
- CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args);
- CUDA_LAUNCH_KERNEL_1D(cuNLMCalcWeight, calc_weight_args);
- CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args);
- CUDA_LAUNCH_KERNEL_1D(cuNLMConstructGramian, construct_gramian_args);
- cuda_assert(cuCtxSynchronize());
-
- return !have_error();
- }
-
- bool denoising_solve(device_ptr output_ptr, DenoisingTask *task)
- {
- CUfunction cuFinalize;
- cuda_assert(cuModuleGetFunction(&cuFinalize, cuFilterModule, "kernel_cuda_filter_finalize"));
- cuda_assert(cuFuncSetCacheConfig(cuFinalize, CU_FUNC_CACHE_PREFER_L1));
- void *finalize_args[] = {&output_ptr,
- &task->storage.rank.device_pointer,
- &task->storage.XtWX.device_pointer,
- &task->storage.XtWY.device_pointer,
- &task->filter_area,
- &task->reconstruction_state.buffer_params.x,
- &task->render_buffer.samples};
- CUDA_GET_BLOCKSIZE(
- cuFinalize, task->reconstruction_state.source_w, task->reconstruction_state.source_h);
- CUDA_LAUNCH_KERNEL(cuFinalize, finalize_args);
- cuda_assert(cuCtxSynchronize());
-
- return !have_error();
- }
-
- bool denoising_combine_halves(device_ptr a_ptr,
- device_ptr b_ptr,
- device_ptr mean_ptr,
- device_ptr variance_ptr,
- int r,
- int4 rect,
- DenoisingTask *task)
- {
- if (have_error())
- return false;
-
- CUDAContextScope scope(this);
-
- CUfunction cuFilterCombineHalves;
- cuda_assert(cuModuleGetFunction(
- &cuFilterCombineHalves, cuFilterModule, "kernel_cuda_filter_combine_halves"));
- cuda_assert(cuFuncSetCacheConfig(cuFilterCombineHalves, CU_FUNC_CACHE_PREFER_L1));
- CUDA_GET_BLOCKSIZE(
- cuFilterCombineHalves, task->rect.z - task->rect.x, task->rect.w - task->rect.y);
-
- void *args[] = {&mean_ptr, &variance_ptr, &a_ptr, &b_ptr, &rect, &r};
- CUDA_LAUNCH_KERNEL(cuFilterCombineHalves, args);
- cuda_assert(cuCtxSynchronize());
-
- return !have_error();
- }
-
- bool denoising_divide_shadow(device_ptr a_ptr,
- device_ptr b_ptr,
- device_ptr sample_variance_ptr,
- device_ptr sv_variance_ptr,
- device_ptr buffer_variance_ptr,
- DenoisingTask *task)
- {
- if (have_error())
- return false;
-
- CUDAContextScope scope(this);
-
- CUfunction cuFilterDivideShadow;
- cuda_assert(cuModuleGetFunction(
- &cuFilterDivideShadow, cuFilterModule, "kernel_cuda_filter_divide_shadow"));
- cuda_assert(cuFuncSetCacheConfig(cuFilterDivideShadow, CU_FUNC_CACHE_PREFER_L1));
- CUDA_GET_BLOCKSIZE(
- cuFilterDivideShadow, task->rect.z - task->rect.x, task->rect.w - task->rect.y);
-
- void *args[] = {&task->render_buffer.samples,
- &task->tile_info_mem.device_pointer,
- &a_ptr,
- &b_ptr,
- &sample_variance_ptr,
- &sv_variance_ptr,
- &buffer_variance_ptr,
- &task->rect,
- &task->render_buffer.pass_stride,
- &task->render_buffer.offset};
- CUDA_LAUNCH_KERNEL(cuFilterDivideShadow, args);
- cuda_assert(cuCtxSynchronize());
-
- return !have_error();
- }
-
- bool denoising_get_feature(int mean_offset,
- int variance_offset,
- device_ptr mean_ptr,
- device_ptr variance_ptr,
- float scale,
- DenoisingTask *task)
- {
- if (have_error())
- return false;
-
- CUDAContextScope scope(this);
-
- CUfunction cuFilterGetFeature;
- cuda_assert(cuModuleGetFunction(
- &cuFilterGetFeature, cuFilterModule, "kernel_cuda_filter_get_feature"));
- cuda_assert(cuFuncSetCacheConfig(cuFilterGetFeature, CU_FUNC_CACHE_PREFER_L1));
- CUDA_GET_BLOCKSIZE(
- cuFilterGetFeature, task->rect.z - task->rect.x, task->rect.w - task->rect.y);
-
- void *args[] = {&task->render_buffer.samples,
- &task->tile_info_mem.device_pointer,
- &mean_offset,
- &variance_offset,
- &mean_ptr,
- &variance_ptr,
- &scale,
- &task->rect,
- &task->render_buffer.pass_stride,
- &task->render_buffer.offset};
- CUDA_LAUNCH_KERNEL(cuFilterGetFeature, args);
- cuda_assert(cuCtxSynchronize());
-
- return !have_error();
- }
-
- bool denoising_write_feature(int out_offset,
- device_ptr from_ptr,
- device_ptr buffer_ptr,
- DenoisingTask *task)
- {
- if (have_error())
- return false;
-
- CUDAContextScope scope(this);
-
- CUfunction cuFilterWriteFeature;
- cuda_assert(cuModuleGetFunction(
- &cuFilterWriteFeature, cuFilterModule, "kernel_cuda_filter_write_feature"));
- cuda_assert(cuFuncSetCacheConfig(cuFilterWriteFeature, CU_FUNC_CACHE_PREFER_L1));
- CUDA_GET_BLOCKSIZE(cuFilterWriteFeature, task->filter_area.z, task->filter_area.w);
-
- void *args[] = {&task->render_buffer.samples,
- &task->reconstruction_state.buffer_params,
- &task->filter_area,
- &from_ptr,
- &buffer_ptr,
- &out_offset,
- &task->rect};
- CUDA_LAUNCH_KERNEL(cuFilterWriteFeature, args);
- cuda_assert(cuCtxSynchronize());
-
- return !have_error();
- }
-
- bool denoising_detect_outliers(device_ptr image_ptr,
- device_ptr variance_ptr,
- device_ptr depth_ptr,
- device_ptr output_ptr,
- DenoisingTask *task)
- {
- if (have_error())
- return false;
-
- CUDAContextScope scope(this);
-
- CUfunction cuFilterDetectOutliers;
- cuda_assert(cuModuleGetFunction(
- &cuFilterDetectOutliers, cuFilterModule, "kernel_cuda_filter_detect_outliers"));
- cuda_assert(cuFuncSetCacheConfig(cuFilterDetectOutliers, CU_FUNC_CACHE_PREFER_L1));
- CUDA_GET_BLOCKSIZE(
- cuFilterDetectOutliers, task->rect.z - task->rect.x, task->rect.w - task->rect.y);
-
- void *args[] = {&image_ptr,
- &variance_ptr,
- &depth_ptr,
- &output_ptr,
- &task->rect,
- &task->buffer.pass_stride};
-
- CUDA_LAUNCH_KERNEL(cuFilterDetectOutliers, args);
- cuda_assert(cuCtxSynchronize());
-
- return !have_error();
- }
-
- void denoise(RenderTile &rtile, DenoisingTask &denoising)
- {
- denoising.functions.construct_transform = function_bind(
- &CUDADevice::denoising_construct_transform, this, &denoising);
- denoising.functions.accumulate = function_bind(
- &CUDADevice::denoising_accumulate, this, _1, _2, _3, _4, &denoising);
- denoising.functions.solve = function_bind(&CUDADevice::denoising_solve, this, _1, &denoising);
- denoising.functions.divide_shadow = function_bind(
- &CUDADevice::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising);
- denoising.functions.non_local_means = function_bind(
- &CUDADevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising);
- denoising.functions.combine_halves = function_bind(
- &CUDADevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising);
- denoising.functions.get_feature = function_bind(
- &CUDADevice::denoising_get_feature, this, _1, _2, _3, _4, _5, &denoising);
- denoising.functions.write_feature = function_bind(
- &CUDADevice::denoising_write_feature, this, _1, _2, _3, &denoising);
- denoising.functions.detect_outliers = function_bind(
- &CUDADevice::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising);
-
- denoising.filter_area = make_int4(rtile.x, rtile.y, rtile.w, rtile.h);
- denoising.render_buffer.samples = rtile.sample;
- denoising.buffer.gpu_temporary_mem = true;
-
- denoising.run_denoising(&rtile);
- }
-
- void path_trace(DeviceTask &task, RenderTile &rtile, device_vector<WorkTile> &work_tiles)
- {
- scoped_timer timer(&rtile.buffers->render_time);
-
- if (have_error())
- return;
-
- CUDAContextScope scope(this);
- CUfunction cuPathTrace;
-
- /* Get kernel function. */
- if (task.integrator_branched) {
- cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_branched_path_trace"));
- }
- else {
- cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace"));
- }
-
- if (have_error()) {
- return;
- }
-
- cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1));
-
- /* Allocate work tile. */
- work_tiles.alloc(1);
-
- WorkTile *wtile = work_tiles.data();
- wtile->x = rtile.x;
- wtile->y = rtile.y;
- wtile->w = rtile.w;
- wtile->h = rtile.h;
- wtile->offset = rtile.offset;
- wtile->stride = rtile.stride;
- wtile->buffer = (float *)cuda_device_ptr(rtile.buffer);
-
- /* Prepare work size. More step samples render faster, but for now we
- * remain conservative for GPUs connected to a display to avoid driver
- * timeouts and display freezing. */
- int min_blocks, num_threads_per_block;
- cuda_assert(cuOccupancyMaxPotentialBlockSize(
- &min_blocks, &num_threads_per_block, cuPathTrace, NULL, 0, 0));
- if (!info.display_device) {
- min_blocks *= 8;
- }
-
- uint step_samples = divide_up(min_blocks * num_threads_per_block, wtile->w * wtile->h);
-
- /* Render all samples. */
- int start_sample = rtile.start_sample;
- int end_sample = rtile.start_sample + rtile.num_samples;
-
- for (int sample = start_sample; sample < end_sample; sample += step_samples) {
- /* Setup and copy work tile to device. */
- wtile->start_sample = sample;
- wtile->num_samples = min(step_samples, end_sample - sample);
- work_tiles.copy_to_device();
-
- CUdeviceptr d_work_tiles = cuda_device_ptr(work_tiles.device_pointer);
- uint total_work_size = wtile->w * wtile->h * wtile->num_samples;
- uint num_blocks = divide_up(total_work_size, num_threads_per_block);
-
- /* Launch kernel. */
- void *args[] = {&d_work_tiles, &total_work_size};
-
- cuda_assert(cuLaunchKernel(
- cuPathTrace, num_blocks, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0));
-
- cuda_assert(cuCtxSynchronize());
-
- /* Update progress. */
- rtile.sample = sample + wtile->num_samples;
- task.update_progress(&rtile, rtile.w * rtile.h * wtile->num_samples);
-
- if (task.get_cancel()) {
- if (task.need_finish_queue == false)
- break;
- }
- }
- }
-
- void film_convert(DeviceTask &task,
- device_ptr buffer,
- device_ptr rgba_byte,
- device_ptr rgba_half)
- {
- if (have_error())
- return;
-
- CUDAContextScope scope(this);
-
- CUfunction cuFilmConvert;
- CUdeviceptr d_rgba = map_pixels((rgba_byte) ? rgba_byte : rgba_half);
- CUdeviceptr d_buffer = cuda_device_ptr(buffer);
-
- /* get kernel function */
- if (rgba_half) {
- cuda_assert(
- cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_convert_to_half_float"));
- }
- else {
- cuda_assert(cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_convert_to_byte"));
- }
-
- float sample_scale = 1.0f / (task.sample + 1);
-
- /* pass in parameters */
- void *args[] = {&d_rgba,
- &d_buffer,
- &sample_scale,
- &task.x,
- &task.y,
- &task.w,
- &task.h,
- &task.offset,
- &task.stride};
-
- /* launch kernel */
- int threads_per_block;
- cuda_assert(cuFuncGetAttribute(
- &threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuFilmConvert));
-
- int xthreads = (int)sqrt(threads_per_block);
- int ythreads = (int)sqrt(threads_per_block);
- int xblocks = (task.w + xthreads - 1) / xthreads;
- int yblocks = (task.h + ythreads - 1) / ythreads;
-
- cuda_assert(cuFuncSetCacheConfig(cuFilmConvert, CU_FUNC_CACHE_PREFER_L1));
-
- cuda_assert(cuLaunchKernel(cuFilmConvert,
- xblocks,
- yblocks,
- 1, /* blocks */
- xthreads,
- ythreads,
- 1, /* threads */
- 0,
- 0,
- args,
- 0));
-
- unmap_pixels((rgba_byte) ? rgba_byte : rgba_half);
-
- cuda_assert(cuCtxSynchronize());
- }
-
- void shader(DeviceTask &task)
- {
- if (have_error())
- return;
-
- CUDAContextScope scope(this);
-
- CUfunction cuShader;
- CUdeviceptr d_input = cuda_device_ptr(task.shader_input);
- CUdeviceptr d_output = cuda_device_ptr(task.shader_output);
-
- /* get kernel function */
- if (task.shader_eval_type >= SHADER_EVAL_BAKE) {
- cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_bake"));
- }
- else if (task.shader_eval_type == SHADER_EVAL_DISPLACE) {
- cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_displace"));
- }
- else {
- cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_background"));
- }
-
- /* do tasks in smaller chunks, so we can cancel it */
- const int shader_chunk_size = 65536;
- const int start = task.shader_x;
- const int end = task.shader_x + task.shader_w;
- int offset = task.offset;
-
- bool canceled = false;
- for (int sample = 0; sample < task.num_samples && !canceled; sample++) {
- for (int shader_x = start; shader_x < end; shader_x += shader_chunk_size) {
- int shader_w = min(shader_chunk_size, end - shader_x);
-
- /* pass in parameters */
- void *args[8];
- int arg = 0;
- args[arg++] = &d_input;
- args[arg++] = &d_output;
- args[arg++] = &task.shader_eval_type;
- if (task.shader_eval_type >= SHADER_EVAL_BAKE) {
- args[arg++] = &task.shader_filter;
- }
- args[arg++] = &shader_x;
- args[arg++] = &shader_w;
- args[arg++] = &offset;
- args[arg++] = &sample;
-
- /* launch kernel */
- int threads_per_block;
- cuda_assert(cuFuncGetAttribute(
- &threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuShader));
-
- int xblocks = (shader_w + threads_per_block - 1) / threads_per_block;
-
- cuda_assert(cuFuncSetCacheConfig(cuShader, CU_FUNC_CACHE_PREFER_L1));
- cuda_assert(cuLaunchKernel(cuShader,
- xblocks,
- 1,
- 1, /* blocks */
- threads_per_block,
- 1,
- 1, /* threads */
- 0,
- 0,
- args,
- 0));
-
- cuda_assert(cuCtxSynchronize());
-
- if (task.get_cancel()) {
- canceled = true;
- break;
- }
- }
-
- task.update_progress(NULL);
- }
- }
-
- CUdeviceptr map_pixels(device_ptr mem)
- {
- if (!background) {
- PixelMem pmem = pixel_mem_map[mem];
- CUdeviceptr buffer;
-
- size_t bytes;
- cuda_assert(cuGraphicsMapResources(1, &pmem.cuPBOresource, 0));
- cuda_assert(cuGraphicsResourceGetMappedPointer(&buffer, &bytes, pmem.cuPBOresource));
-
- return buffer;
- }
-
- return cuda_device_ptr(mem);
- }
-
- void unmap_pixels(device_ptr mem)
- {
- if (!background) {
- PixelMem pmem = pixel_mem_map[mem];
-
- cuda_assert(cuGraphicsUnmapResources(1, &pmem.cuPBOresource, 0));
- }
- }
-
- void pixels_alloc(device_memory &mem)
- {
- PixelMem pmem;
-
- pmem.w = mem.data_width;
- pmem.h = mem.data_height;
-
- CUDAContextScope scope(this);
-
- glGenBuffers(1, &pmem.cuPBO);
- glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO);
- if (mem.data_type == TYPE_HALF)
- glBufferData(
- GL_PIXEL_UNPACK_BUFFER, pmem.w * pmem.h * sizeof(GLhalf) * 4, NULL, GL_DYNAMIC_DRAW);
- else
- glBufferData(
- GL_PIXEL_UNPACK_BUFFER, pmem.w * pmem.h * sizeof(uint8_t) * 4, NULL, GL_DYNAMIC_DRAW);
-
- glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
-
- glActiveTexture(GL_TEXTURE0);
- glGenTextures(1, &pmem.cuTexId);
- glBindTexture(GL_TEXTURE_2D, pmem.cuTexId);
- if (mem.data_type == TYPE_HALF)
- glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F, pmem.w, pmem.h, 0, GL_RGBA, GL_HALF_FLOAT, NULL);
- else
- glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, pmem.w, pmem.h, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
- glBindTexture(GL_TEXTURE_2D, 0);
-
- CUresult result = cuGraphicsGLRegisterBuffer(
- &pmem.cuPBOresource, pmem.cuPBO, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE);
-
- if (result == CUDA_SUCCESS) {
- mem.device_pointer = pmem.cuTexId;
- pixel_mem_map[mem.device_pointer] = pmem;
-
- mem.device_size = mem.memory_size();
- stats.mem_alloc(mem.device_size);
-
- return;
- }
- else {
- /* failed to register buffer, fallback to no interop */
- glDeleteBuffers(1, &pmem.cuPBO);
- glDeleteTextures(1, &pmem.cuTexId);
-
- background = true;
- }
- }
-
- void pixels_copy_from(device_memory &mem, int y, int w, int h)
- {
- PixelMem pmem = pixel_mem_map[mem.device_pointer];
-
- CUDAContextScope scope(this);
-
- glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO);
- uchar *pixels = (uchar *)glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_READ_ONLY);
- size_t offset = sizeof(uchar) * 4 * y * w;
- memcpy((uchar *)mem.host_pointer + offset, pixels + offset, sizeof(uchar) * 4 * w * h);
- glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
- glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
- }
-
- void pixels_free(device_memory &mem)
- {
- if (mem.device_pointer) {
- PixelMem pmem = pixel_mem_map[mem.device_pointer];
-
- CUDAContextScope scope(this);
-
- cuda_assert(cuGraphicsUnregisterResource(pmem.cuPBOresource));
- glDeleteBuffers(1, &pmem.cuPBO);
- glDeleteTextures(1, &pmem.cuTexId);
-
- pixel_mem_map.erase(pixel_mem_map.find(mem.device_pointer));
- mem.device_pointer = 0;
-
- stats.mem_free(mem.device_size);
- mem.device_size = 0;
- }
- }
-
- void draw_pixels(device_memory &mem,
- int y,
- int w,
- int h,
- int width,
- int height,
- int dx,
- int dy,
- int dw,
- int dh,
- bool transparent,
- const DeviceDrawParams &draw_params)
- {
- assert(mem.type == MEM_PIXELS);
-
- if (!background) {
- const bool use_fallback_shader = (draw_params.bind_display_space_shader_cb == NULL);
- PixelMem pmem = pixel_mem_map[mem.device_pointer];
- float *vpointer;
-
- CUDAContextScope scope(this);
-
- /* for multi devices, this assumes the inefficient method that we allocate
- * all pixels on the device even though we only render to a subset */
- size_t offset = 4 * y * w;
-
- if (mem.data_type == TYPE_HALF)
- offset *= sizeof(GLhalf);
- else
- offset *= sizeof(uint8_t);
-
- glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO);
- glActiveTexture(GL_TEXTURE0);
- glBindTexture(GL_TEXTURE_2D, pmem.cuTexId);
- if (mem.data_type == TYPE_HALF) {
- glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, w, h, GL_RGBA, GL_HALF_FLOAT, (void *)offset);
- }
- else {
- glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, w, h, GL_RGBA, GL_UNSIGNED_BYTE, (void *)offset);
- }
- glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
-
- if (transparent) {
- glEnable(GL_BLEND);
- glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA);
- }
-
- GLint shader_program;
- if (use_fallback_shader) {
- if (!bind_fallback_display_space_shader(dw, dh)) {
- return;
- }
- shader_program = fallback_shader_program;
- }
- else {
- draw_params.bind_display_space_shader_cb();
- glGetIntegerv(GL_CURRENT_PROGRAM, &shader_program);
- }
-
- if (!vertex_buffer) {
- glGenBuffers(1, &vertex_buffer);
- }
-
- glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer);
- /* invalidate old contents -
- * avoids stalling if buffer is still waiting in queue to be rendered */
- glBufferData(GL_ARRAY_BUFFER, 16 * sizeof(float), NULL, GL_STREAM_DRAW);
-
- vpointer = (float *)glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY);
-
- if (vpointer) {
- /* texture coordinate - vertex pair */
- vpointer[0] = 0.0f;
- vpointer[1] = 0.0f;
- vpointer[2] = dx;
- vpointer[3] = dy;
-
- vpointer[4] = (float)w / (float)pmem.w;
- vpointer[5] = 0.0f;
- vpointer[6] = (float)width + dx;
- vpointer[7] = dy;
-
- vpointer[8] = (float)w / (float)pmem.w;
- vpointer[9] = (float)h / (float)pmem.h;
- vpointer[10] = (float)width + dx;
- vpointer[11] = (float)height + dy;
-
- vpointer[12] = 0.0f;
- vpointer[13] = (float)h / (float)pmem.h;
- vpointer[14] = dx;
- vpointer[15] = (float)height + dy;
-
- glUnmapBuffer(GL_ARRAY_BUFFER);
- }
-
- GLuint vertex_array_object;
- GLuint position_attribute, texcoord_attribute;
-
- glGenVertexArrays(1, &vertex_array_object);
- glBindVertexArray(vertex_array_object);
-
- texcoord_attribute = glGetAttribLocation(shader_program, "texCoord");
- position_attribute = glGetAttribLocation(shader_program, "pos");
-
- glEnableVertexAttribArray(texcoord_attribute);
- glEnableVertexAttribArray(position_attribute);
-
- glVertexAttribPointer(
- texcoord_attribute, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (const GLvoid *)0);
- glVertexAttribPointer(position_attribute,
- 2,
- GL_FLOAT,
- GL_FALSE,
- 4 * sizeof(float),
- (const GLvoid *)(sizeof(float) * 2));
-
- glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
-
- if (use_fallback_shader) {
- glUseProgram(0);
- }
- else {
- draw_params.unbind_display_space_shader_cb();
- }
-
- if (transparent) {
- glDisable(GL_BLEND);
- }
-
- glBindTexture(GL_TEXTURE_2D, 0);
-
- return;
- }
-
- Device::draw_pixels(mem, y, w, h, width, height, dx, dy, dw, dh, transparent, draw_params);
- }
-
- void thread_run(DeviceTask *task)
- {
- CUDAContextScope scope(this);
-
- if (task->type == DeviceTask::RENDER) {
- DeviceRequestedFeatures requested_features;
- if (use_split_kernel()) {
- if (split_kernel == NULL) {
- split_kernel = new CUDASplitKernel(this);
- split_kernel->load_kernels(requested_features);
- }
- }
-
- device_vector<WorkTile> work_tiles(this, "work_tiles", MEM_READ_ONLY);
-
- /* keep rendering tiles until done */
- RenderTile tile;
- DenoisingTask denoising(this, *task);
-
- while (task->acquire_tile(this, tile)) {
- if (tile.task == RenderTile::PATH_TRACE) {
- if (use_split_kernel()) {
- device_only_memory<uchar> void_buffer(this, "void_buffer");
- split_kernel->path_trace(task, tile, void_buffer, void_buffer);
- }
- else {
- path_trace(*task, tile, work_tiles);
- }
- }
- else if (tile.task == RenderTile::DENOISE) {
- tile.sample = tile.start_sample + tile.num_samples;
-
- denoise(tile, denoising);
-
- task->update_progress(&tile, tile.w * tile.h);
- }
-
- task->release_tile(tile);
-
- if (task->get_cancel()) {
- if (task->need_finish_queue == false)
- break;
- }
- }
-
- work_tiles.free();
- }
- else if (task->type == DeviceTask::SHADER) {
- shader(*task);
-
- cuda_assert(cuCtxSynchronize());
- }
- }
-
- class CUDADeviceTask : public DeviceTask {
- public:
- CUDADeviceTask(CUDADevice *device, DeviceTask &task) : DeviceTask(task)
- {
- run = function_bind(&CUDADevice::thread_run, device, this);
- }
- };
-
- void task_add(DeviceTask &task)
- {
- CUDAContextScope scope(this);
-
- /* Load texture info. */
- load_texture_info();
-
- /* Synchronize all memory copies before executing task. */
- cuda_assert(cuCtxSynchronize());
-
- if (task.type == DeviceTask::FILM_CONVERT) {
- /* must be done in main thread due to opengl access */
- film_convert(task, task.buffer, task.rgba_byte, task.rgba_half);
- }
- else {
- task_pool.push(new CUDADeviceTask(this, task));
- }
- }
-
- void task_wait()
- {
- task_pool.wait();
- }
-
- void task_cancel()
- {
- task_pool.cancel();
- }
-
- friend class CUDASplitKernelFunction;
- friend class CUDASplitKernel;
- friend class CUDAContextScope;
-};
-
-/* redefine the cuda_assert macro so it can be used outside of the CUDADevice class
- * now that the definition of that class is complete
- */
-#undef cuda_assert
-#define cuda_assert(stmt) \
- { \
- CUresult result = stmt; \
-\
- if (result != CUDA_SUCCESS) { \
- string message = string_printf("CUDA error: %s in %s", cuewErrorString(result), #stmt); \
- if (device->error_msg == "") \
- device->error_msg = message; \
- fprintf(stderr, "%s\n", message.c_str()); \
- /*cuda_abort();*/ \
- device->cuda_error_documentation(); \
- } \
- } \
- (void)0
-
-/* CUDA context scope. */
-
-CUDAContextScope::CUDAContextScope(CUDADevice *device) : device(device)
-{
- cuda_assert(cuCtxPushCurrent(device->cuContext));
-}
-
-CUDAContextScope::~CUDAContextScope()
-{
- cuda_assert(cuCtxPopCurrent(NULL));
-}
-
-/* split kernel */
-
-class CUDASplitKernelFunction : public SplitKernelFunction {
- CUDADevice *device;
- CUfunction func;
-
- public:
- CUDASplitKernelFunction(CUDADevice *device, CUfunction func) : device(device), func(func)
- {
- }
-
- /* enqueue the kernel, returns false if there is an error */
- bool enqueue(const KernelDimensions &dim, device_memory & /*kg*/, device_memory & /*data*/)
- {
- return enqueue(dim, NULL);
- }
-
- /* enqueue the kernel, returns false if there is an error */
- bool enqueue(const KernelDimensions &dim, void *args[])
- {
- if (device->have_error())
- return false;
-
- CUDAContextScope scope(device);
-
- /* we ignore dim.local_size for now, as this is faster */
- int threads_per_block;
- cuda_assert(
- cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func));
-
- int xblocks = (dim.global_size[0] * dim.global_size[1] + threads_per_block - 1) /
- threads_per_block;
-
- cuda_assert(cuFuncSetCacheConfig(func, CU_FUNC_CACHE_PREFER_L1));
-
- cuda_assert(cuLaunchKernel(func,
- xblocks,
- 1,
- 1, /* blocks */
- threads_per_block,
- 1,
- 1, /* threads */
- 0,
- 0,
- args,
- 0));
-
- return !device->have_error();
- }
-};
-
-CUDASplitKernel::CUDASplitKernel(CUDADevice *device) : DeviceSplitKernel(device), device(device)
-{
-}
-
-uint64_t CUDASplitKernel::state_buffer_size(device_memory & /*kg*/,
- device_memory & /*data*/,
- size_t num_threads)
-{
- CUDAContextScope scope(device);
-
- device_vector<uint64_t> size_buffer(device, "size_buffer", MEM_READ_WRITE);
- size_buffer.alloc(1);
- size_buffer.zero_to_device();
-
- uint threads = num_threads;
- CUdeviceptr d_size = device->cuda_device_ptr(size_buffer.device_pointer);
-
- struct args_t {
- uint *num_threads;
- CUdeviceptr *size;
- };
-
- args_t args = {&threads, &d_size};
-
- CUfunction state_buffer_size;
- cuda_assert(
- cuModuleGetFunction(&state_buffer_size, device->cuModule, "kernel_cuda_state_buffer_size"));
-
- cuda_assert(cuLaunchKernel(state_buffer_size, 1, 1, 1, 1, 1, 1, 0, 0, (void **)&args, 0));
-
- size_buffer.copy_from_device(0, 1, 1);
- size_t size = size_buffer[0];
- size_buffer.free();
-
- return size;
-}
-
-bool CUDASplitKernel::enqueue_split_kernel_data_init(const KernelDimensions &dim,
- RenderTile &rtile,
- int num_global_elements,
- device_memory & /*kernel_globals*/,
- device_memory & /*kernel_data*/,
- device_memory &split_data,
- device_memory &ray_state,
- device_memory &queue_index,
- device_memory &use_queues_flag,
- device_memory &work_pool_wgs)
-{
- CUDAContextScope scope(device);
-
- CUdeviceptr d_split_data = device->cuda_device_ptr(split_data.device_pointer);
- CUdeviceptr d_ray_state = device->cuda_device_ptr(ray_state.device_pointer);
- CUdeviceptr d_queue_index = device->cuda_device_ptr(queue_index.device_pointer);
- CUdeviceptr d_use_queues_flag = device->cuda_device_ptr(use_queues_flag.device_pointer);
- CUdeviceptr d_work_pool_wgs = device->cuda_device_ptr(work_pool_wgs.device_pointer);
-
- CUdeviceptr d_buffer = device->cuda_device_ptr(rtile.buffer);
-
- int end_sample = rtile.start_sample + rtile.num_samples;
- int queue_size = dim.global_size[0] * dim.global_size[1];
-
- struct args_t {
- CUdeviceptr *split_data_buffer;
- int *num_elements;
- CUdeviceptr *ray_state;
- int *start_sample;
- int *end_sample;
- int *sx;
- int *sy;
- int *sw;
- int *sh;
- int *offset;
- int *stride;
- CUdeviceptr *queue_index;
- int *queuesize;
- CUdeviceptr *use_queues_flag;
- CUdeviceptr *work_pool_wgs;
- int *num_samples;
- CUdeviceptr *buffer;
- };
-
- args_t args = {&d_split_data,
- &num_global_elements,
- &d_ray_state,
- &rtile.start_sample,
- &end_sample,
- &rtile.x,
- &rtile.y,
- &rtile.w,
- &rtile.h,
- &rtile.offset,
- &rtile.stride,
- &d_queue_index,
- &queue_size,
- &d_use_queues_flag,
- &d_work_pool_wgs,
- &rtile.num_samples,
- &d_buffer};
-
- CUfunction data_init;
- cuda_assert(
- cuModuleGetFunction(&data_init, device->cuModule, "kernel_cuda_path_trace_data_init"));
- if (device->have_error()) {
- return false;
- }
-
- CUDASplitKernelFunction(device, data_init).enqueue(dim, (void **)&args);
-
- return !device->have_error();
-}
-
-SplitKernelFunction *CUDASplitKernel::get_split_kernel_function(const string &kernel_name,
- const DeviceRequestedFeatures &)
-{
- CUDAContextScope scope(device);
- CUfunction func;
-
- cuda_assert(
- cuModuleGetFunction(&func, device->cuModule, (string("kernel_cuda_") + kernel_name).data()));
- if (device->have_error()) {
- device->cuda_error_message(
- string_printf("kernel \"kernel_cuda_%s\" not found in module", kernel_name.data()));
- return NULL;
- }
-
- return new CUDASplitKernelFunction(device, func);
-}
-
-int2 CUDASplitKernel::split_kernel_local_size()
-{
- return make_int2(32, 1);
-}
-
-int2 CUDASplitKernel::split_kernel_global_size(device_memory &kg,
- device_memory &data,
- DeviceTask * /*task*/)
-{
- CUDAContextScope scope(device);
- size_t free;
- size_t total;
-
- cuda_assert(cuMemGetInfo(&free, &total));
-
- VLOG(1) << "Maximum device allocation size: " << string_human_readable_number(free)
- << " bytes. (" << string_human_readable_size(free) << ").";
-
- size_t num_elements = max_elements_for_max_buffer_size(kg, data, free / 2);
- size_t side = round_down((int)sqrt(num_elements), 32);
- int2 global_size = make_int2(side, round_down(num_elements / side, 16));
- VLOG(1) << "Global size: " << global_size << ".";
- return global_size;
-}
-
bool device_cuda_init()
{
-#ifdef WITH_CUDA_DYNLOAD
+# ifdef WITH_CUDA_DYNLOAD
static bool initialized = false;
static bool result = false;
@@ -2584,7 +43,6 @@ bool device_cuda_init()
VLOG(1) << "Found precompiled kernels";
result = true;
}
-# ifndef _WIN32
else if (cuewCompilerPath() != NULL) {
VLOG(1) << "Found CUDA compiler " << cuewCompilerPath();
result = true;
@@ -2593,7 +51,6 @@ bool device_cuda_init()
VLOG(1) << "Neither precompiled kernels nor CUDA compiler was found,"
<< " unable to use CUDA";
}
-# endif
}
else {
VLOG(1) << "CUEW initialization failed: "
@@ -2602,9 +59,9 @@ bool device_cuda_init()
}
return result;
-#else /* WITH_CUDA_DYNLOAD */
+# else /* WITH_CUDA_DYNLOAD */
return true;
-#endif /* WITH_CUDA_DYNLOAD */
+# endif /* WITH_CUDA_DYNLOAD */
}
Device *device_cuda_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background)
@@ -2614,7 +71,7 @@ Device *device_cuda_create(DeviceInfo &info, Stats &stats, Profiler &profiler, b
static CUresult device_cuda_safe_init()
{
-#ifdef _WIN32
+# ifdef _WIN32
__try {
return cuInit(0);
}
@@ -2625,9 +82,9 @@ static CUresult device_cuda_safe_init()
}
return CUDA_ERROR_NO_DEVICE;
-#else
+# else
return cuInit(0);
-#endif
+# endif
}
void device_cuda_info(vector<DeviceInfo> &devices)
@@ -2739,13 +196,13 @@ string device_cuda_capabilities()
}
capabilities += string("\t") + name + "\n";
int value;
-#define GET_ATTR(attr) \
- { \
- if (cuDeviceGetAttribute(&value, CU_DEVICE_ATTRIBUTE_##attr, num) == CUDA_SUCCESS) { \
- capabilities += string_printf("\t\tCU_DEVICE_ATTRIBUTE_" #attr "\t\t\t%d\n", value); \
+# define GET_ATTR(attr) \
+ { \
+ if (cuDeviceGetAttribute(&value, CU_DEVICE_ATTRIBUTE_##attr, num) == CUDA_SUCCESS) { \
+ capabilities += string_printf("\t\tCU_DEVICE_ATTRIBUTE_" #attr "\t\t\t%d\n", value); \
+ } \
} \
- } \
- (void)0
+ (void)0
/* TODO(sergey): Strip all attributes which are not useful for us
* or does not depend on the driver.
*/
@@ -2836,7 +293,7 @@ string device_cuda_capabilities()
GET_ATTR(MANAGED_MEMORY);
GET_ATTR(MULTI_GPU_BOARD);
GET_ATTR(MULTI_GPU_BOARD_GROUP_ID);
-#undef GET_ATTR
+# undef GET_ATTR
capabilities += "\n";
}
@@ -2844,3 +301,5 @@ string device_cuda_capabilities()
}
CCL_NAMESPACE_END
+
+#endif
diff --git a/intern/cycles/device/device_intern.h b/intern/cycles/device/device_intern.h
index 5b8b86886c4..0c229ac24cf 100644
--- a/intern/cycles/device/device_intern.h
+++ b/intern/cycles/device/device_intern.h
@@ -17,9 +17,15 @@
#ifndef __DEVICE_INTERN_H__
#define __DEVICE_INTERN_H__
+#include "util/util_string.h"
+#include "util/util_vector.h"
+
CCL_NAMESPACE_BEGIN
class Device;
+class DeviceInfo;
+class Profiler;
+class Stats;
Device *device_cpu_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background);
bool device_opencl_init();
diff --git a/intern/cycles/device/device_memory.cpp b/intern/cycles/device/device_memory.cpp
index 3a99a49dffc..671cd7c29f3 100644
--- a/intern/cycles/device/device_memory.cpp
+++ b/intern/cycles/device/device_memory.cpp
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include "device/device.h"
#include "device/device_memory.h"
+#include "device/device.h"
CCL_NAMESPACE_BEGIN
@@ -31,8 +31,6 @@ device_memory::device_memory(Device *device, const char *name, MemoryType type)
data_depth(0),
type(type),
name(name),
- interpolation(INTERPOLATION_NONE),
- extension(EXTENSION_REPEAT),
device(device),
device_pointer(0),
host_pointer(0),
@@ -76,7 +74,7 @@ void device_memory::host_free()
void device_memory::device_alloc()
{
- assert(!device_pointer && type != MEM_TEXTURE);
+ assert(!device_pointer && type != MEM_TEXTURE && type != MEM_GLOBAL);
device->mem_alloc(*this);
}
@@ -96,7 +94,7 @@ void device_memory::device_copy_to()
void device_memory::device_copy_from(int y, int w, int h, int elem)
{
- assert(type != MEM_TEXTURE && type != MEM_READ_ONLY);
+ assert(type != MEM_TEXTURE && type != MEM_READ_ONLY && type != MEM_GLOBAL);
device->mem_copy_from(*this, y, w, h, elem);
}
@@ -139,4 +137,93 @@ device_sub_ptr::~device_sub_ptr()
device->mem_free_sub_ptr(ptr);
}
+/* Device Texture */
+
+device_texture::device_texture(Device *device,
+ const char *name,
+ const uint slot,
+ ImageDataType image_data_type,
+ InterpolationType interpolation,
+ ExtensionType extension)
+ : device_memory(device, name, MEM_TEXTURE), slot(slot)
+{
+ switch (image_data_type) {
+ case IMAGE_DATA_TYPE_FLOAT4:
+ data_type = TYPE_FLOAT;
+ data_elements = 4;
+ break;
+ case IMAGE_DATA_TYPE_FLOAT:
+ data_type = TYPE_FLOAT;
+ data_elements = 1;
+ break;
+ case IMAGE_DATA_TYPE_BYTE4:
+ data_type = TYPE_UCHAR;
+ data_elements = 4;
+ break;
+ case IMAGE_DATA_TYPE_BYTE:
+ data_type = TYPE_UCHAR;
+ data_elements = 1;
+ break;
+ case IMAGE_DATA_TYPE_HALF4:
+ data_type = TYPE_HALF;
+ data_elements = 4;
+ break;
+ case IMAGE_DATA_TYPE_HALF:
+ data_type = TYPE_HALF;
+ data_elements = 1;
+ break;
+ case IMAGE_DATA_TYPE_USHORT4:
+ data_type = TYPE_UINT16;
+ data_elements = 4;
+ break;
+ case IMAGE_DATA_TYPE_USHORT:
+ data_type = TYPE_UINT16;
+ data_elements = 1;
+ break;
+ case IMAGE_DATA_NUM_TYPES:
+ assert(0);
+ return;
+ }
+
+ memset(&info, 0, sizeof(info));
+ info.data_type = image_data_type;
+ info.interpolation = interpolation;
+ info.extension = extension;
+}
+
+device_texture::~device_texture()
+{
+ device_free();
+ host_free();
+}
+
+/* Host memory allocation. */
+void *device_texture::alloc(const size_t width, const size_t height, const size_t depth)
+{
+ const size_t new_size = size(width, height, depth);
+
+ if (new_size != data_size) {
+ device_free();
+ host_free();
+ host_pointer = host_alloc(data_elements * datatype_size(data_type) * new_size);
+ assert(device_pointer == 0);
+ }
+
+ data_size = new_size;
+ data_width = width;
+ data_height = height;
+ data_depth = depth;
+
+ info.width = width;
+ info.height = height;
+ info.depth = depth;
+
+ return host_pointer;
+}
+
+void device_texture::copy_to_device()
+{
+ device_copy_to();
+}
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/device/device_memory.h b/intern/cycles/device/device_memory.h
index 60740807568..1c20db900bc 100644
--- a/intern/cycles/device/device_memory.h
+++ b/intern/cycles/device/device_memory.h
@@ -23,6 +23,7 @@
#include "util/util_array.h"
#include "util/util_half.h"
+#include "util/util_string.h"
#include "util/util_texture.h"
#include "util/util_types.h"
#include "util/util_vector.h"
@@ -31,7 +32,14 @@ CCL_NAMESPACE_BEGIN
class Device;
-enum MemoryType { MEM_READ_ONLY, MEM_READ_WRITE, MEM_DEVICE_ONLY, MEM_TEXTURE, MEM_PIXELS };
+enum MemoryType {
+ MEM_READ_ONLY,
+ MEM_READ_WRITE,
+ MEM_DEVICE_ONLY,
+ MEM_GLOBAL,
+ MEM_TEXTURE,
+ MEM_PIXELS
+};
/* Supported Data Types */
@@ -208,8 +216,6 @@ class device_memory {
size_t data_depth;
MemoryType type;
const char *name;
- InterpolationType interpolation;
- ExtensionType extension;
/* Pointers. */
Device *device;
@@ -310,7 +316,7 @@ template<typename T> class device_only_memory : public device_memory {
* in and copied to the device with copy_to_device(). Or alternatively
* allocated and set to zero on the device with zero_to_device().
*
- * When using memory type MEM_TEXTURE, a pointer to this memory will be
+ * When using memory type MEM_GLOBAL, a pointer to this memory will be
* automatically attached to kernel globals, using the provided name
* matching an entry in kernel_textures.h. */
@@ -427,6 +433,11 @@ template<typename T> class device_vector : public device_memory {
device_copy_to();
}
+ void copy_from_device()
+ {
+ device_copy_from(0, data_width, data_height, sizeof(T));
+ }
+
void copy_from_device(int y, int w, int h)
{
device_copy_from(y, w, h, sizeof(T));
@@ -498,6 +509,33 @@ class device_sub_ptr {
device_ptr ptr;
};
+/* Device Texture
+ *
+ * 2D or 3D image texture memory. */
+
+class device_texture : public device_memory {
+ public:
+ device_texture(Device *device,
+ const char *name,
+ const uint slot,
+ ImageDataType image_data_type,
+ InterpolationType interpolation,
+ ExtensionType extension);
+ ~device_texture();
+
+ void *alloc(const size_t width, const size_t height, const size_t depth = 0);
+ void copy_to_device();
+
+ uint slot;
+ TextureInfo info;
+
+ protected:
+ size_t size(const size_t width, const size_t height, const size_t depth)
+ {
+ return width * ((height == 0) ? 1 : height) * ((depth == 0) ? 1 : depth);
+ }
+};
+
CCL_NAMESPACE_END
#endif /* __DEVICE_MEMORY_H__ */
diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp
index b8587eb0a62..3636ecaa7a1 100644
--- a/intern/cycles/device/device_multi.cpp
+++ b/intern/cycles/device/device_multi.cpp
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include <stdlib.h>
#include <sstream>
+#include <stdlib.h>
#include "device/device.h"
#include "device/device_intern.h"
@@ -42,7 +42,7 @@ class MultiDevice : public Device {
map<device_ptr, device_ptr> ptr_map;
};
- list<SubDevice> devices;
+ list<SubDevice> devices, denoising_devices;
device_ptr unique_key;
MultiDevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background_)
@@ -61,6 +61,12 @@ class MultiDevice : public Device {
}
}
+ foreach (DeviceInfo &subinfo, info.denoising_devices) {
+ Device *device = Device::create(subinfo, sub_stats_, profiler, background);
+
+ denoising_devices.push_back(SubDevice(device));
+ }
+
#ifdef WITH_NETWORK
/* try to add network devices */
ServerDiscovery discovery(true);
@@ -80,17 +86,18 @@ class MultiDevice : public Device {
{
foreach (SubDevice &sub, devices)
delete sub.device;
+ foreach (SubDevice &sub, denoising_devices)
+ delete sub.device;
}
const string &error_message()
{
- foreach (SubDevice &sub, devices) {
- if (sub.device->error_message() != "") {
- if (error_msg == "")
- error_msg = sub.device->error_message();
- break;
- }
- }
+ error_msg.clear();
+
+ foreach (SubDevice &sub, devices)
+ error_msg += sub.device->error_message();
+ foreach (SubDevice &sub, denoising_devices)
+ error_msg += sub.device->error_message();
return error_msg;
}
@@ -118,6 +125,12 @@ class MultiDevice : public Device {
if (!sub.device->load_kernels(requested_features))
return false;
+ if (requested_features.use_denoising) {
+ foreach (SubDevice &sub, denoising_devices)
+ if (!sub.device->load_kernels(requested_features))
+ return false;
+ }
+
return true;
}
@@ -127,6 +140,12 @@ class MultiDevice : public Device {
if (!sub.device->wait_for_availability(requested_features))
return false;
+ if (requested_features.use_denoising) {
+ foreach (SubDevice &sub, denoising_devices)
+ if (!sub.device->wait_for_availability(requested_features))
+ return false;
+ }
+
return true;
}
@@ -150,19 +169,28 @@ class MultiDevice : public Device {
break;
}
}
+
return result;
}
bool build_optix_bvh(BVH *bvh)
{
- // Broadcast acceleration structure build to all devices
- foreach (SubDevice &sub, devices) {
+ // Broadcast acceleration structure build to all render devices
+ foreach (SubDevice &sub, devices)
if (!sub.device->build_optix_bvh(bvh))
return false;
- }
+
return true;
}
+ virtual void *osl_memory()
+ {
+ if (devices.size() > 1) {
+ return NULL;
+ }
+ return devices.front().device->osl_memory();
+ }
+
void mem_alloc(device_memory &mem)
{
device_ptr key = unique_key++;
@@ -236,6 +264,17 @@ class MultiDevice : public Device {
sub.ptr_map[key] = mem.device_pointer;
}
+ if (strcmp(mem.name, "RenderBuffers") == 0) {
+ foreach (SubDevice &sub, denoising_devices) {
+ mem.device = sub.device;
+ mem.device_pointer = (existing_key) ? sub.ptr_map[existing_key] : 0;
+ mem.device_size = existing_size;
+
+ sub.device->mem_zero(mem);
+ sub.ptr_map[key] = mem.device_pointer;
+ }
+ }
+
mem.device = this;
mem.device_pointer = key;
stats.mem_alloc(mem.device_size - existing_size);
@@ -255,6 +294,17 @@ class MultiDevice : public Device {
sub.ptr_map.erase(sub.ptr_map.find(key));
}
+ if (strcmp(mem.name, "RenderBuffers") == 0) {
+ foreach (SubDevice &sub, denoising_devices) {
+ mem.device = sub.device;
+ mem.device_pointer = sub.ptr_map[key];
+ mem.device_size = existing_size;
+
+ sub.device->mem_free(mem);
+ sub.ptr_map.erase(sub.ptr_map.find(key));
+ }
+ }
+
mem.device = this;
mem.device_pointer = 0;
mem.device_size = 0;
@@ -302,10 +352,21 @@ class MultiDevice : public Device {
void map_tile(Device *sub_device, RenderTile &tile)
{
+ if (!tile.buffer) {
+ return;
+ }
+
foreach (SubDevice &sub, devices) {
if (sub.device == sub_device) {
- if (tile.buffer)
- tile.buffer = sub.ptr_map[tile.buffer];
+ tile.buffer = sub.ptr_map[tile.buffer];
+ return;
+ }
+ }
+
+ foreach (SubDevice &sub, denoising_devices) {
+ if (sub.device == sub_device) {
+ tile.buffer = sub.ptr_map[tile.buffer];
+ return;
}
}
}
@@ -320,6 +381,12 @@ class MultiDevice : public Device {
i++;
}
+ foreach (SubDevice &sub, denoising_devices) {
+ if (sub.device == sub_device)
+ return i;
+ i++;
+ }
+
return -1;
}
@@ -330,24 +397,41 @@ class MultiDevice : public Device {
continue;
}
+ device_vector<float> &mem = tiles[i].buffers->buffer;
+ tiles[i].buffer = mem.device_pointer;
+
+ if (mem.device == this && denoising_devices.empty()) {
+ /* Skip unnecessary copies in viewport mode (buffer covers the
+ * whole image), but still need to fix up the tile device pointer. */
+ map_tile(sub_device, tiles[i]);
+ continue;
+ }
+
/* If the tile was rendered on another device, copy its memory to
* to the current device now, for the duration of the denoising task.
* Note that this temporarily modifies the RenderBuffers and calls
* the device, so this function is not thread safe. */
- device_vector<float> &mem = tiles[i].buffers->buffer;
if (mem.device != sub_device) {
/* Only copy from device to host once. This is faster, but
* also required for the case where a CPU thread is denoising
* a tile rendered on the GPU. In that case we have to avoid
- * overwriting the buffer being denoised by the CPU thread. */
+ * overwriting the buffer being de-noised by the CPU thread. */
if (!tiles[i].buffers->map_neighbor_copied) {
tiles[i].buffers->map_neighbor_copied = true;
- mem.copy_from_device(0, mem.data_size, 1);
+ mem.copy_from_device();
}
- mem.swap_device(sub_device, 0, 0);
+ if (mem.device == this) {
+ /* Can re-use memory if tile is already allocated on the sub device. */
+ map_tile(sub_device, tiles[i]);
+ mem.swap_device(sub_device, mem.device_size, tiles[i].buffer);
+ }
+ else {
+ mem.swap_device(sub_device, 0, 0);
+ }
mem.copy_to_device();
+
tiles[i].buffer = mem.device_pointer;
tiles[i].device_size = mem.device_size;
@@ -358,11 +442,17 @@ class MultiDevice : public Device {
void unmap_neighbor_tiles(Device *sub_device, RenderTile *tiles)
{
- /* Copy denoised result back to the host. */
device_vector<float> &mem = tiles[9].buffers->buffer;
+
+ if (mem.device == this && denoising_devices.empty()) {
+ return;
+ }
+
+ /* Copy denoised result back to the host. */
mem.swap_device(sub_device, tiles[9].device_size, tiles[9].buffer);
- mem.copy_from_device(0, mem.data_size, 1);
+ mem.copy_from_device();
mem.restore_device();
+
/* Copy denoised result to the original device. */
mem.copy_to_device();
@@ -372,7 +462,9 @@ class MultiDevice : public Device {
}
device_vector<float> &mem = tiles[i].buffers->buffer;
- if (mem.device != sub_device) {
+
+ if (mem.device != sub_device && mem.device != this) {
+ /* Free up memory again if it was allocated for the copy above. */
mem.swap_device(sub_device, tiles[i].device_size, tiles[i].buffer);
sub_device->mem_free(mem);
mem.restore_device();
@@ -398,10 +490,29 @@ class MultiDevice : public Device {
void task_add(DeviceTask &task)
{
+ list<SubDevice> task_devices = devices;
+ if (!denoising_devices.empty()) {
+ if (task.type == DeviceTask::DENOISE_BUFFER) {
+ /* Denoising tasks should be redirected to the denoising devices entirely. */
+ task_devices = denoising_devices;
+ }
+ else if (task.type == DeviceTask::RENDER && (task.tile_types & RenderTile::DENOISE)) {
+ const uint tile_types = task.tile_types;
+ /* For normal rendering tasks only redirect the denoising part to the denoising devices.
+ * Do not need to split the task here, since they all run through 'acquire_tile'. */
+ task.tile_types = RenderTile::DENOISE;
+ foreach (SubDevice &sub, denoising_devices) {
+ sub.device->task_add(task);
+ }
+ /* Rendering itself should still be executed on the rendering devices. */
+ task.tile_types = tile_types ^ RenderTile::DENOISE;
+ }
+ }
+
list<DeviceTask> tasks;
- task.split(tasks, devices.size());
+ task.split(tasks, task_devices.size());
- foreach (SubDevice &sub, devices) {
+ foreach (SubDevice &sub, task_devices) {
if (!tasks.empty()) {
DeviceTask subtask = tasks.front();
tasks.pop_front();
@@ -426,12 +537,16 @@ class MultiDevice : public Device {
{
foreach (SubDevice &sub, devices)
sub.device->task_wait();
+ foreach (SubDevice &sub, denoising_devices)
+ sub.device->task_wait();
}
void task_cancel()
{
foreach (SubDevice &sub, devices)
sub.device->task_cancel();
+ foreach (SubDevice &sub, denoising_devices)
+ sub.device->task_cancel();
}
protected:
diff --git a/intern/cycles/device/device_network.cpp b/intern/cycles/device/device_network.cpp
index 80334ad8f22..2742cbf53aa 100644
--- a/intern/cycles/device/device_network.cpp
+++ b/intern/cycles/device/device_network.cpp
@@ -14,9 +14,9 @@
* limitations under the License.
*/
+#include "device/device_network.h"
#include "device/device.h"
#include "device/device_intern.h"
-#include "device/device_network.h"
#include "util/util_foreach.h"
#include "util/util_logging.h"
diff --git a/intern/cycles/device/device_network.h b/intern/cycles/device/device_network.h
index 5b69b815cc6..e74c4508ab6 100644
--- a/intern/cycles/device/device_network.h
+++ b/intern/cycles/device/device_network.h
@@ -19,19 +19,19 @@
#ifdef WITH_NETWORK
-# include <boost/archive/text_iarchive.hpp>
-# include <boost/archive/text_oarchive.hpp>
# include <boost/archive/binary_iarchive.hpp>
# include <boost/archive/binary_oarchive.hpp>
+# include <boost/archive/text_iarchive.hpp>
+# include <boost/archive/text_oarchive.hpp>
# include <boost/array.hpp>
# include <boost/asio.hpp>
# include <boost/bind.hpp>
# include <boost/serialization/vector.hpp>
# include <boost/thread.hpp>
+# include <deque>
# include <iostream>
# include <sstream>
-# include <deque>
# include "render/buffers.h"
diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp
index b07596c60ff..891b73351a0 100644
--- a/intern/cycles/device/device_opencl.cpp
+++ b/intern/cycles/device/device_opencl.cpp
@@ -16,8 +16,8 @@
#ifdef WITH_OPENCL
-# include "device/opencl/opencl.h"
-
+# include "device/opencl/device_opencl.h"
+# include "device/device.h"
# include "device/device_intern.h"
# include "util/util_foreach.h"
diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp
index c1106b367ca..42d7b00314c 100644
--- a/intern/cycles/device/device_optix.cpp
+++ b/intern/cycles/device/device_optix.cpp
@@ -17,30 +17,28 @@
#ifdef WITH_OPTIX
-# include "device/device.h"
-# include "device/device_intern.h"
-# include "device/device_denoising.h"
# include "bvh/bvh.h"
-# include "render/scene.h"
+# include "device/cuda/device_cuda.h"
+# include "device/device_denoising.h"
+# include "device/device_intern.h"
+# include "render/buffers.h"
+# include "render/hair.h"
# include "render/mesh.h"
# include "render/object.h"
-# include "render/buffers.h"
+# include "render/scene.h"
+# include "util/util_debug.h"
+# include "util/util_logging.h"
# include "util/util_md5.h"
# include "util/util_path.h"
# include "util/util_time.h"
-# include "util/util_debug.h"
-# include "util/util_logging.h"
-
-# undef _WIN32_WINNT // Need minimum API support for Windows 7
-# define _WIN32_WINNT _WIN32_WINNT_WIN7
# ifdef WITH_CUDA_DYNLOAD
# include <cuew.h>
// Do not use CUDA SDK headers when using CUEW
# define OPTIX_DONT_INCLUDE_CUDA
# endif
-# include <optix_stubs.h>
# include <optix_function_table_definition.h>
+# include <optix_stubs.h>
// TODO(pmours): Disable this once drivers have native support
# define OPTIX_DENOISER_NO_PIXEL_STRIDE 1
@@ -110,31 +108,23 @@ struct KernelParams {
} \
(void)0
-# define CUDA_GET_BLOCKSIZE(func, w, h) \
- int threads; \
- check_result_cuda_ret( \
- cuFuncGetAttribute(&threads, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func)); \
- threads = (int)sqrt((float)threads); \
- int xblocks = ((w) + threads - 1) / threads; \
- int yblocks = ((h) + threads - 1) / threads;
-
-# define CUDA_LAUNCH_KERNEL(func, args) \
- check_result_cuda_ret(cuLaunchKernel( \
- func, xblocks, yblocks, 1, threads, threads, 1, 0, cuda_stream[thread_index], args, 0));
-
-/* Similar as above, but for 1-dimensional blocks. */
-# define CUDA_GET_BLOCKSIZE_1D(func, w, h) \
- int threads; \
- check_result_cuda_ret( \
- cuFuncGetAttribute(&threads, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func)); \
- int xblocks = ((w) + threads - 1) / threads; \
- int yblocks = h;
-
-# define CUDA_LAUNCH_KERNEL_1D(func, args) \
- check_result_cuda_ret(cuLaunchKernel( \
- func, xblocks, yblocks, 1, threads, 1, 1, 0, cuda_stream[thread_index], args, 0));
+# define launch_filter_kernel(func_name, w, h, args) \
+ { \
+ CUfunction func; \
+ check_result_cuda_ret(cuModuleGetFunction(&func, cuFilterModule, func_name)); \
+ check_result_cuda_ret(cuFuncSetCacheConfig(func, CU_FUNC_CACHE_PREFER_L1)); \
+ int threads; \
+ check_result_cuda_ret( \
+ cuFuncGetAttribute(&threads, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func)); \
+ threads = (int)sqrt((float)threads); \
+ int xblocks = ((w) + threads - 1) / threads; \
+ int yblocks = ((h) + threads - 1) / threads; \
+ check_result_cuda_ret( \
+ cuLaunchKernel(func, xblocks, yblocks, 1, threads, threads, 1, 0, 0, args, 0)); \
+ } \
+ (void)0
-class OptiXDevice : public Device {
+class OptiXDevice : public CUDADevice {
// List of OptiX program groups
enum {
@@ -183,77 +173,37 @@ class OptiXDevice : public Device {
// Use a pool with multiple threads to support launches with multiple CUDA streams
TaskPool task_pool;
- // CUDA/OptiX context handles
- CUdevice cuda_device = 0;
- CUcontext cuda_context = NULL;
vector<CUstream> cuda_stream;
OptixDeviceContext context = NULL;
- // Need CUDA kernel module for some utility functions
- CUmodule cuda_module = NULL;
- CUmodule cuda_filter_module = NULL;
- // All necessary OptiX kernels are in one module
- OptixModule optix_module = NULL;
+ OptixModule optix_module = NULL; // All necessary OptiX kernels are in one module
OptixPipeline pipelines[NUM_PIPELINES] = {};
bool motion_blur = false;
- bool need_texture_info = false;
device_vector<SbtRecord> sbt_data;
- device_vector<TextureInfo> texture_info;
device_only_memory<KernelParams> launch_params;
vector<CUdeviceptr> as_mem;
OptixTraversableHandle tlas_handle = 0;
- // TODO(pmours): This is copied from device_cuda.cpp, so move to common code eventually
- int can_map_host = 0;
- size_t map_host_used = 0;
- size_t map_host_limit = 0;
- size_t device_working_headroom = 32 * 1024 * 1024LL; // 32MB
- size_t device_texture_headroom = 128 * 1024 * 1024LL; // 128MB
- map<device_memory *, CUDAMem> cuda_mem_map;
- bool move_texture_to_host = false;
-
OptixDenoiser denoiser = NULL;
- vector<pair<int2, CUdeviceptr>> denoiser_state;
+ device_only_memory<unsigned char> denoiser_state;
+ int denoiser_input_passes = 0;
public:
OptiXDevice(DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool background_)
- : Device(info_, stats_, profiler_, background_),
+ : CUDADevice(info_, stats_, profiler_, background_),
sbt_data(this, "__sbt", MEM_READ_ONLY),
- texture_info(this, "__texture_info", MEM_TEXTURE),
- launch_params(this, "__params")
+ launch_params(this, "__params"),
+ denoiser_state(this, "__denoiser_state")
{
// Store number of CUDA streams in device info
info.cpu_threads = DebugFlags().optix.cuda_streams;
- // Initialize CUDA driver API
- check_result_cuda(cuInit(0));
-
- // Retrieve the primary CUDA context for this device
- check_result_cuda(cuDeviceGet(&cuda_device, info.num));
- check_result_cuda(cuDevicePrimaryCtxRetain(&cuda_context, cuda_device));
-
- // Make that CUDA context current
- const CUDAContextScope scope(cuda_context);
-
- // Limit amount of host mapped memory (see init_host_memory in device_cuda.cpp)
- size_t default_limit = 4 * 1024 * 1024 * 1024LL;
- size_t system_ram = system_physical_ram();
- if (system_ram > 0) {
- if (system_ram / 2 > default_limit) {
- map_host_limit = system_ram - default_limit;
- }
- else {
- map_host_limit = system_ram / 2;
- }
- }
- else {
- VLOG(1) << "Mapped host memory disabled, failed to get system RAM";
+ // Make the CUDA context current
+ if (!cuContext) {
+ return; // Do not initialize if CUDA context creation failed already
}
-
- // Check device support for pinned host memory
- check_result_cuda(
- cuDeviceGetAttribute(&can_map_host, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, cuda_device));
+ const CUDAContextScope scope(cuContext);
// Create OptiX context for this device
OptixDeviceContextOptions options = {};
@@ -277,7 +227,7 @@ class OptiXDevice : public Device {
}
};
# endif
- check_result_optix(optixDeviceContextCreate(cuda_context, &options, &context));
+ check_result_optix(optixDeviceContextCreate(cuContext, &options, &context));
# ifdef WITH_CYCLES_LOGGING
check_result_optix(optixDeviceContextSetLogCallback(
context, options.logCallbackFunction, options.logCallbackData, options.logCallbackLevel));
@@ -292,37 +242,26 @@ class OptiXDevice : public Device {
launch_params.data_elements = sizeof(KernelParams);
// Allocate launch parameter buffer memory on device
launch_params.alloc_to_device(info.cpu_threads);
-
- // Create denoiser state entries for all threads (but do not allocate yet)
- denoiser_state.resize(info.cpu_threads);
}
~OptiXDevice()
{
// Stop processing any more tasks
task_pool.stop();
+ // Make CUDA context current
+ const CUDAContextScope scope(cuContext);
+
// Free all acceleration structures
for (CUdeviceptr mem : as_mem) {
cuMemFree(mem);
}
- // Free denoiser state for all threads
- for (const pair<int2, CUdeviceptr> &state : denoiser_state) {
- cuMemFree(state.second);
- }
-
sbt_data.free();
texture_info.free();
launch_params.free();
-
- // Make CUDA context current
- const CUDAContextScope scope(cuda_context);
+ denoiser_state.free();
// Unload modules
- if (cuda_module != NULL)
- cuModuleUnload(cuda_module);
- if (cuda_filter_module != NULL)
- cuModuleUnload(cuda_filter_module);
if (optix_module != NULL)
optixModuleDestroy(optix_module);
for (unsigned int i = 0; i < NUM_PIPELINES; ++i)
@@ -336,9 +275,7 @@ class OptiXDevice : public Device {
if (denoiser != NULL)
optixDenoiserDestroy(denoiser);
- // Destroy OptiX and CUDA context
optixDeviceContextDestroy(context);
- cuDevicePrimaryCtxRelease(cuda_device);
}
private:
@@ -354,10 +291,34 @@ class OptiXDevice : public Device {
return BVH_LAYOUT_OPTIX;
}
+ string compile_kernel_get_common_cflags(const DeviceRequestedFeatures &requested_features,
+ bool filter,
+ bool /*split*/) override
+ {
+ // Split kernel is not supported in OptiX
+ string common_cflags = CUDADevice::compile_kernel_get_common_cflags(
+ requested_features, filter, false);
+
+ // Add OptiX SDK include directory to include paths
+ const char *optix_sdk_path = getenv("OPTIX_ROOT_DIR");
+ if (optix_sdk_path) {
+ common_cflags += string_printf(" -I\"%s/include\"", optix_sdk_path);
+ }
+
+ return common_cflags;
+ }
+
bool load_kernels(const DeviceRequestedFeatures &requested_features) override
{
- if (have_error())
- return false; // Abort early if context creation failed already
+ if (have_error()) {
+ // Abort early if context creation failed already
+ return false;
+ }
+
+ // Load CUDA modules because we need some of the utility kernels
+ if (!CUDADevice::load_kernels(requested_features)) {
+ return false;
+ }
// Disable baking for now, since its kernel is not well-suited for inlining and is very slow
if (requested_features.use_baking) {
@@ -370,7 +331,7 @@ class OptiXDevice : public Device {
return false;
}
- const CUDAContextScope scope(cuda_context);
+ const CUDAContextScope scope(cuContext);
// Unload existing OptiX module and pipelines first
if (optix_module != NULL) {
@@ -421,9 +382,11 @@ class OptiXDevice : public Device {
}
{ // Load and compile PTX module with OptiX kernels
- string ptx_data;
- const string ptx_filename = "lib/kernel_optix.ptx";
- if (!path_read_text(path_get(ptx_filename), ptx_data)) {
+ string ptx_data, ptx_filename = path_get("lib/kernel_optix.ptx");
+ if (use_adaptive_compilation()) {
+ ptx_filename = compile_kernel(requested_features, "kernel_optix", "optix", true);
+ }
+ if (ptx_filename.empty() || !path_read_text(ptx_filename, ptx_data)) {
set_error("Failed loading OptiX kernel " + ptx_filename + ".");
return false;
}
@@ -438,34 +401,6 @@ class OptiXDevice : public Device {
&optix_module));
}
- { // Load CUDA modules because we need some of the utility kernels
- int major, minor;
- cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, info.num);
- cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, info.num);
-
- if (cuda_module == NULL) { // Avoid reloading module if it was already loaded
- string cubin_data;
- const string cubin_filename = string_printf("lib/kernel_sm_%d%d.cubin", major, minor);
- if (!path_read_text(path_get(cubin_filename), cubin_data)) {
- set_error("Failed loading pre-compiled CUDA kernel " + cubin_filename + ".");
- return false;
- }
-
- check_result_cuda_ret(cuModuleLoadData(&cuda_module, cubin_data.data()));
- }
-
- if (requested_features.use_denoising && cuda_filter_module == NULL) {
- string filter_data;
- const string filter_filename = string_printf("lib/filter_sm_%d%d.cubin", major, minor);
- if (!path_read_text(path_get(filter_filename), filter_data)) {
- set_error("Failed loading pre-compiled CUDA filter kernel " + filter_filename + ".");
- return false;
- }
-
- check_result_cuda_ret(cuModuleLoadData(&cuda_filter_module, filter_data.data()));
- }
- }
-
// Create program groups
OptixProgramGroup groups[NUM_PROGRAM_GROUPS] = {};
OptixProgramGroupDesc group_descs[NUM_PROGRAM_GROUPS] = {};
@@ -542,9 +477,9 @@ class OptiXDevice : public Device {
// Calculate maximum trace continuation stack size
unsigned int trace_css = stack_size[PG_HITD].cssCH;
// This is based on the maximum of closest-hit and any-hit/intersection programs
- trace_css = max(trace_css, stack_size[PG_HITD].cssIS + stack_size[PG_HITD].cssAH);
- trace_css = max(trace_css, stack_size[PG_HITL].cssIS + stack_size[PG_HITL].cssAH);
- trace_css = max(trace_css, stack_size[PG_HITS].cssIS + stack_size[PG_HITS].cssAH);
+ trace_css = std::max(trace_css, stack_size[PG_HITD].cssIS + stack_size[PG_HITD].cssAH);
+ trace_css = std::max(trace_css, stack_size[PG_HITL].cssIS + stack_size[PG_HITL].cssAH);
+ trace_css = std::max(trace_css, stack_size[PG_HITS].cssIS + stack_size[PG_HITS].cssAH);
OptixPipelineLinkOptions link_options;
link_options.maxTraceDepth = 1;
@@ -613,8 +548,9 @@ class OptiXDevice : public Device {
&pipelines[PIP_SHADER_EVAL]));
// Calculate continuation stack size based on the maximum of all ray generation stack sizes
- const unsigned int css = max(stack_size[PG_BAKE].cssRG,
- max(stack_size[PG_DISP].cssRG, stack_size[PG_BACK].cssRG)) +
+ const unsigned int css = std::max(stack_size[PG_BAKE].cssRG,
+ std::max(stack_size[PG_DISP].cssRG,
+ stack_size[PG_BACK].cssRG)) +
link_options.maxTraceDepth * trace_css;
check_result_optix_ret(optixPipelineSetStackSize(
@@ -635,12 +571,17 @@ class OptiXDevice : public Device {
return; // Abort early if there was an error previously
if (task.type == DeviceTask::RENDER) {
+ if (thread_index != 0) {
+ // Only execute denoising in a single thread (see also 'task_add')
+ task.tile_types &= ~RenderTile::DENOISE;
+ }
+
RenderTile tile;
- while (task.acquire_tile(this, tile)) {
+ while (task.acquire_tile(this, tile, task.tile_types)) {
if (tile.task == RenderTile::PATH_TRACE)
launch_render(task, tile, thread_index);
else if (tile.task == RenderTile::DENOISE)
- launch_denoise(task, tile, thread_index);
+ launch_denoise(task, tile);
task.release_tile(tile);
if (task.get_cancel() && !task.need_finish_queue)
break; // User requested cancellation
@@ -651,8 +592,21 @@ class OptiXDevice : public Device {
else if (task.type == DeviceTask::SHADER) {
launch_shader_eval(task, thread_index);
}
- else if (task.type == DeviceTask::FILM_CONVERT) {
- launch_film_convert(task, thread_index);
+ else if (task.type == DeviceTask::DENOISE_BUFFER) {
+ // Set up a single tile that covers the whole task and denoise it
+ RenderTile tile;
+ tile.x = task.x;
+ tile.y = task.y;
+ tile.w = task.w;
+ tile.h = task.h;
+ tile.buffer = task.buffer;
+ tile.num_samples = task.num_samples;
+ tile.start_sample = task.sample;
+ tile.offset = task.offset;
+ tile.stride = task.stride;
+ tile.buffers = task.buffers;
+
+ launch_denoise(task, tile);
}
}
@@ -674,21 +628,24 @@ class OptiXDevice : public Device {
const int end_sample = rtile.start_sample + rtile.num_samples;
// Keep this number reasonable to avoid running into TDRs
- const int step_samples = (info.display_device ? 8 : 32);
+ int step_samples = (info.display_device ? 8 : 32);
+ if (task.adaptive_sampling.use) {
+ step_samples = task.adaptive_sampling.align_static_samples(step_samples);
+ }
+
// Offset into launch params buffer so that streams use separate data
device_ptr launch_params_ptr = launch_params.device_pointer +
thread_index * launch_params.data_elements;
- const CUDAContextScope scope(cuda_context);
+ const CUDAContextScope scope(cuContext);
for (int sample = rtile.start_sample; sample < end_sample; sample += step_samples) {
// Copy work tile information to device
wtile.num_samples = min(step_samples, end_sample - sample);
wtile.start_sample = sample;
- check_result_cuda(cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParams, tile),
- &wtile,
- sizeof(wtile),
- cuda_stream[thread_index]));
+ device_ptr d_wtile_ptr = launch_params_ptr + offsetof(KernelParams, tile);
+ check_result_cuda(
+ cuMemcpyHtoDAsync(d_wtile_ptr, &wtile, sizeof(wtile), cuda_stream[thread_index]));
OptixShaderBindingTable sbt_params = {};
sbt_params.raygenRecord = sbt_data.device_pointer + PG_RGEN * sizeof(SbtRecord);
@@ -713,6 +670,12 @@ class OptiXDevice : public Device {
wtile.h,
1));
+ // Run the adaptive sampling kernels at selected samples aligned to step samples.
+ uint filter_sample = wtile.start_sample + wtile.num_samples - 1;
+ if (task.adaptive_sampling.use && task.adaptive_sampling.need_filter(filter_sample)) {
+ adaptive_sampling_filter(filter_sample, &wtile, d_wtile_ptr, cuda_stream[thread_index]);
+ }
+
// Wait for launch to finish
check_result_cuda(cuStreamSynchronize(cuda_stream[thread_index]));
@@ -724,13 +687,23 @@ class OptiXDevice : public Device {
if (task.get_cancel() && !task.need_finish_queue)
return; // Cancel rendering
}
+
+ // Finalize adaptive sampling
+ if (task.adaptive_sampling.use) {
+ device_ptr d_wtile_ptr = launch_params_ptr + offsetof(KernelParams, tile);
+ adaptive_sampling_post(rtile, &wtile, d_wtile_ptr, cuda_stream[thread_index]);
+ check_result_cuda(cuStreamSynchronize(cuda_stream[thread_index]));
+ task.update_progress(&rtile, rtile.w * rtile.h * wtile.num_samples);
+ }
}
- bool launch_denoise(DeviceTask &task, RenderTile &rtile, int thread_index)
+ bool launch_denoise(DeviceTask &task, RenderTile &rtile)
{
- int total_samples = rtile.start_sample + rtile.num_samples;
+ // Update current sample (for display and NLM denoising task)
+ rtile.sample = rtile.start_sample + rtile.num_samples;
- const CUDAContextScope scope(cuda_context);
+ // Make CUDA context current now, since it is used for both denoising tasks
+ const CUDAContextScope scope(cuContext);
// Choose between OptiX and NLM denoising
if (task.denoising_use_optix) {
@@ -742,6 +715,7 @@ class OptiXDevice : public Device {
RenderTile rtiles[10];
rtiles[4] = rtile;
task.map_neighbor_tiles(rtiles, this);
+ rtile = rtiles[4]; // Tile may have been modified by mapping code
// Calculate size of the tile to denoise (including overlap)
int4 rect = make_int4(
@@ -808,47 +782,40 @@ class OptiXDevice : public Device {
tile_info->y[3] = rtiles[7].y + rtiles[7].h;
tile_info_mem.copy_to_device();
- CUfunction filter_copy_func;
- check_result_cuda_ret(cuModuleGetFunction(
- &filter_copy_func, cuda_filter_module, "kernel_cuda_filter_copy_input"));
- check_result_cuda_ret(cuFuncSetCacheConfig(filter_copy_func, CU_FUNC_CACHE_PREFER_L1));
-
void *args[] = {
&input.device_pointer, &tile_info_mem.device_pointer, &rect.x, &task.pass_stride};
- CUDA_GET_BLOCKSIZE(filter_copy_func, rect_size.x, rect_size.y);
- CUDA_LAUNCH_KERNEL(filter_copy_func, args);
+ launch_filter_kernel("kernel_cuda_filter_copy_input", rect_size.x, rect_size.y, args);
}
# if OPTIX_DENOISER_NO_PIXEL_STRIDE
device_only_memory<float> input_rgb(this, "denoiser input rgb");
- {
- input_rgb.alloc_to_device(rect_size.x * rect_size.y * 3 *
- task.denoising.optix_input_passes);
-
- CUfunction convert_to_rgb_func;
- check_result_cuda_ret(cuModuleGetFunction(
- &convert_to_rgb_func, cuda_filter_module, "kernel_cuda_filter_convert_to_rgb"));
- check_result_cuda_ret(cuFuncSetCacheConfig(convert_to_rgb_func, CU_FUNC_CACHE_PREFER_L1));
-
- void *args[] = {&input_rgb.device_pointer,
- &input_ptr,
- &rect_size.x,
- &rect_size.y,
- &input_stride,
- &task.pass_stride,
- const_cast<int *>(pass_offset),
- &task.denoising.optix_input_passes,
- &total_samples};
- CUDA_GET_BLOCKSIZE(convert_to_rgb_func, rect_size.x, rect_size.y);
- CUDA_LAUNCH_KERNEL(convert_to_rgb_func, args);
-
- input_ptr = input_rgb.device_pointer;
- pixel_stride = 3 * sizeof(float);
- input_stride = rect_size.x * pixel_stride;
- }
+ input_rgb.alloc_to_device(rect_size.x * rect_size.y * 3 * task.denoising.optix_input_passes);
+
+ void *input_args[] = {&input_rgb.device_pointer,
+ &input_ptr,
+ &rect_size.x,
+ &rect_size.y,
+ &input_stride,
+ &task.pass_stride,
+ const_cast<int *>(pass_offset),
+ &task.denoising.optix_input_passes,
+ &rtile.sample};
+ launch_filter_kernel(
+ "kernel_cuda_filter_convert_to_rgb", rect_size.x, rect_size.y, input_args);
+
+ input_ptr = input_rgb.device_pointer;
+ pixel_stride = 3 * sizeof(float);
+ input_stride = rect_size.x * pixel_stride;
# endif
- if (denoiser == NULL) {
+ const bool recreate_denoiser = (denoiser == NULL) ||
+ (task.denoising.optix_input_passes != denoiser_input_passes);
+ if (recreate_denoiser) {
+ // Destroy existing handle before creating new one
+ if (denoiser != NULL) {
+ optixDenoiserDestroy(denoiser);
+ }
+
// Create OptiX denoiser handle on demand when it is first used
OptixDenoiserOptions denoiser_options;
assert(task.denoising.optix_input_passes >= 1 && task.denoising.optix_input_passes <= 3);
@@ -858,35 +825,35 @@ class OptiXDevice : public Device {
check_result_optix_ret(optixDenoiserCreate(context, &denoiser_options, &denoiser));
check_result_optix_ret(
optixDenoiserSetModel(denoiser, OPTIX_DENOISER_MODEL_KIND_HDR, NULL, 0));
+
+ // OptiX denoiser handle was created with the requested number of input passes
+ denoiser_input_passes = task.denoising.optix_input_passes;
}
OptixDenoiserSizes sizes = {};
check_result_optix_ret(
optixDenoiserComputeMemoryResources(denoiser, rect_size.x, rect_size.y, &sizes));
- auto &state = denoiser_state[thread_index].second;
- auto &state_size = denoiser_state[thread_index].first;
const size_t scratch_size = sizes.recommendedScratchSizeInBytes;
const size_t scratch_offset = sizes.stateSizeInBytes;
// Allocate denoiser state if tile size has changed since last setup
- if (state_size.x != rect_size.x || state_size.y != rect_size.y) {
- if (state) {
- cuMemFree(state);
- state = 0;
- }
- check_result_cuda_ret(cuMemAlloc(&state, scratch_offset + scratch_size));
+ if (recreate_denoiser || (denoiser_state.data_width != rect_size.x ||
+ denoiser_state.data_height != rect_size.y)) {
+ denoiser_state.alloc_to_device(scratch_offset + scratch_size);
+ // Initialize denoiser state for the current tile size
check_result_optix_ret(optixDenoiserSetup(denoiser,
- cuda_stream[thread_index],
+ 0,
rect_size.x,
rect_size.y,
- state,
+ denoiser_state.device_pointer,
scratch_offset,
- state + scratch_offset,
+ denoiser_state.device_pointer + scratch_offset,
scratch_size));
- state_size = rect_size;
+ denoiser_state.data_width = rect_size.x;
+ denoiser_state.data_height = rect_size.y;
}
// Set up input and output layer information
@@ -926,94 +893,46 @@ class OptiXDevice : public Device {
// Finally run denonising
OptixDenoiserParams params = {}; // All parameters are disabled/zero
check_result_optix_ret(optixDenoiserInvoke(denoiser,
- cuda_stream[thread_index],
+ 0,
&params,
- state,
+ denoiser_state.device_pointer,
scratch_offset,
input_layers,
task.denoising.optix_input_passes,
overlap_offset.x,
overlap_offset.y,
output_layers,
- state + scratch_offset,
+ denoiser_state.device_pointer + scratch_offset,
scratch_size));
# if OPTIX_DENOISER_NO_PIXEL_STRIDE
- {
- CUfunction convert_from_rgb_func;
- check_result_cuda_ret(cuModuleGetFunction(
- &convert_from_rgb_func, cuda_filter_module, "kernel_cuda_filter_convert_from_rgb"));
- check_result_cuda_ret(
- cuFuncSetCacheConfig(convert_from_rgb_func, CU_FUNC_CACHE_PREFER_L1));
-
- void *args[] = {&input_ptr,
- &rtiles[9].buffer,
- &output_offset.x,
- &output_offset.y,
- &rect_size.x,
- &rect_size.y,
- &rtiles[9].x,
- &rtiles[9].y,
- &rtiles[9].w,
- &rtiles[9].h,
- &rtiles[9].offset,
- &rtiles[9].stride,
- &task.pass_stride};
- CUDA_GET_BLOCKSIZE(convert_from_rgb_func, rtiles[9].w, rtiles[9].h);
- CUDA_LAUNCH_KERNEL(convert_from_rgb_func, args);
- }
+ void *output_args[] = {&input_ptr,
+ &rtiles[9].buffer,
+ &output_offset.x,
+ &output_offset.y,
+ &rect_size.x,
+ &rect_size.y,
+ &rtiles[9].x,
+ &rtiles[9].y,
+ &rtiles[9].w,
+ &rtiles[9].h,
+ &rtiles[9].offset,
+ &rtiles[9].stride,
+ &task.pass_stride};
+ launch_filter_kernel(
+ "kernel_cuda_filter_convert_from_rgb", rtiles[9].w, rtiles[9].h, output_args);
# endif
- check_result_cuda_ret(cuStreamSynchronize(cuda_stream[thread_index]));
+ check_result_cuda_ret(cuStreamSynchronize(0));
task.unmap_neighbor_tiles(rtiles, this);
}
else {
// Run CUDA denoising kernels
DenoisingTask denoising(this, task);
- denoising.functions.construct_transform = function_bind(
- &OptiXDevice::denoising_construct_transform, this, &denoising, thread_index);
- denoising.functions.accumulate = function_bind(
- &OptiXDevice::denoising_accumulate, this, _1, _2, _3, _4, &denoising, thread_index);
- denoising.functions.solve = function_bind(
- &OptiXDevice::denoising_solve, this, _1, &denoising, thread_index);
- denoising.functions.divide_shadow = function_bind(&OptiXDevice::denoising_divide_shadow,
- this,
- _1,
- _2,
- _3,
- _4,
- _5,
- &denoising,
- thread_index);
- denoising.functions.non_local_means = function_bind(
- &OptiXDevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising, thread_index);
- denoising.functions.combine_halves = function_bind(&OptiXDevice::denoising_combine_halves,
- this,
- _1,
- _2,
- _3,
- _4,
- _5,
- _6,
- &denoising,
- thread_index);
- denoising.functions.get_feature = function_bind(
- &OptiXDevice::denoising_get_feature, this, _1, _2, _3, _4, _5, &denoising, thread_index);
- denoising.functions.write_feature = function_bind(
- &OptiXDevice::denoising_write_feature, this, _1, _2, _3, &denoising, thread_index);
- denoising.functions.detect_outliers = function_bind(
- &OptiXDevice::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising, thread_index);
-
- denoising.filter_area = make_int4(rtile.x, rtile.y, rtile.w, rtile.h);
- denoising.render_buffer.samples = total_samples;
- denoising.buffer.gpu_temporary_mem = true;
-
- denoising.run_denoising(&rtile);
+ CUDADevice::denoise(rtile, denoising);
}
- // Update current sample, so it is displayed correctly
- rtile.sample = total_samples;
// Update task progress after the denoiser completed processing
task.update_progress(&rtile, rtile.w * rtile.h);
@@ -1028,7 +947,7 @@ class OptiXDevice : public Device {
if (task.shader_eval_type == SHADER_EVAL_DISPLACE)
rgen_index = PG_DISP;
- const CUDAContextScope scope(cuda_context);
+ const CUDAContextScope scope(cuContext);
device_ptr launch_params_ptr = launch_params.device_pointer +
thread_index * launch_params.data_elements;
@@ -1075,62 +994,13 @@ class OptiXDevice : public Device {
}
}
- void launch_film_convert(DeviceTask &task, int thread_index)
- {
- const CUDAContextScope scope(cuda_context);
-
- CUfunction film_convert_func;
- check_result_cuda(cuModuleGetFunction(&film_convert_func,
- cuda_module,
- task.rgba_byte ? "kernel_cuda_convert_to_byte" :
- "kernel_cuda_convert_to_half_float"));
-
- float sample_scale = 1.0f / (task.sample + 1);
- CUdeviceptr rgba = (task.rgba_byte ? task.rgba_byte : task.rgba_half);
-
- void *args[] = {&rgba,
- &task.buffer,
- &sample_scale,
- &task.x,
- &task.y,
- &task.w,
- &task.h,
- &task.offset,
- &task.stride};
-
- int threads_per_block;
- check_result_cuda(cuFuncGetAttribute(
- &threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, film_convert_func));
-
- const int num_threads_x = (int)sqrt(threads_per_block);
- const int num_blocks_x = (task.w + num_threads_x - 1) / num_threads_x;
- const int num_threads_y = (int)sqrt(threads_per_block);
- const int num_blocks_y = (task.h + num_threads_y - 1) / num_threads_y;
-
- check_result_cuda(cuLaunchKernel(film_convert_func,
- num_blocks_x,
- num_blocks_y,
- 1, /* blocks */
- num_threads_x,
- num_threads_y,
- 1, /* threads */
- 0,
- cuda_stream[thread_index],
- args,
- 0));
-
- check_result_cuda(cuStreamSynchronize(cuda_stream[thread_index]));
-
- task.update_progress(NULL);
- }
-
bool build_optix_bvh(const OptixBuildInput &build_input,
uint16_t num_motion_steps,
OptixTraversableHandle &out_handle)
{
out_handle = 0;
- const CUDAContextScope scope(cuda_context);
+ const CUDAContextScope scope(cuContext);
// Compute memory usage
OptixAccelBufferSizes sizes = {};
@@ -1232,8 +1102,8 @@ class OptiXDevice : public Device {
assert(bvh->params.top_level);
unsigned int num_instances = 0;
- unordered_map<Mesh *, vector<OptixTraversableHandle>> meshes;
- meshes.reserve(bvh->meshes.size());
+ unordered_map<Geometry *, OptixTraversableHandle> geometry;
+ geometry.reserve(bvh->geometry.size());
// Free all previous acceleration structures
for (CUdeviceptr mem : as_mem) {
@@ -1244,23 +1114,25 @@ class OptiXDevice : public Device {
// Build bottom level acceleration structures (BLAS)
// Note: Always keep this logic in sync with bvh_optix.cpp!
for (Object *ob : bvh->objects) {
- // Skip meshes for which acceleration structure already exists
- if (meshes.find(ob->mesh) != meshes.end())
+ // Skip geometry for which acceleration structure already exists
+ Geometry *geom = ob->geometry;
+ if (geometry.find(geom) != geometry.end())
continue;
- Mesh *const mesh = ob->mesh;
- vector<OptixTraversableHandle> handles;
- handles.reserve(2);
+ if (geom->type == Geometry::HAIR) {
+ // Build BLAS for curve primitives
+ Hair *const hair = static_cast<Hair *const>(ob->geometry);
+ if (hair->num_curves() == 0) {
+ continue;
+ }
- // Build BLAS for curve primitives
- if (bvh->params.primitive_mask & PRIMITIVE_ALL_CURVE && mesh->num_curves() > 0) {
- const size_t num_curves = mesh->num_curves();
- const size_t num_segments = mesh->num_segments();
+ const size_t num_curves = hair->num_curves();
+ const size_t num_segments = hair->num_segments();
size_t num_motion_steps = 1;
- Attribute *motion_keys = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
- if (motion_blur && mesh->use_motion_blur && motion_keys) {
- num_motion_steps = mesh->motion_steps;
+ Attribute *motion_keys = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+ if (motion_blur && hair->use_motion_blur && motion_keys) {
+ num_motion_steps = hair->motion_steps;
}
device_vector<OptixAabb> aabb_data(this, "temp_aabb_data", MEM_READ_ONLY);
@@ -1269,21 +1141,21 @@ class OptiXDevice : public Device {
// Get AABBs for each motion step
for (size_t step = 0; step < num_motion_steps; ++step) {
// The center step for motion vertices is not stored in the attribute
- const float3 *keys = mesh->curve_keys.data();
+ const float3 *keys = hair->curve_keys.data();
size_t center_step = (num_motion_steps - 1) / 2;
if (step != center_step) {
size_t attr_offset = (step > center_step) ? step - 1 : step;
// Technically this is a float4 array, but sizeof(float3) is the same as sizeof(float4)
- keys = motion_keys->data_float3() + attr_offset * mesh->curve_keys.size();
+ keys = motion_keys->data_float3() + attr_offset * hair->curve_keys.size();
}
size_t i = step * num_segments;
for (size_t j = 0; j < num_curves; ++j) {
- const Mesh::Curve c = mesh->get_curve(j);
+ const Hair::Curve c = hair->get_curve(j);
for (size_t k = 0; k < c.num_segments(); ++i, ++k) {
BoundBox bounds = BoundBox::empty;
- c.bounds_grow(k, keys, mesh->curve_radius.data(), bounds);
+ c.bounds_grow(k, keys, hair->curve_radius.data(), bounds);
aabb_data[i].minX = bounds.min.x;
aabb_data[i].minY = bounds.min.y;
@@ -1314,16 +1186,24 @@ class OptiXDevice : public Device {
build_input.aabbArray.strideInBytes = sizeof(OptixAabb);
build_input.aabbArray.flags = &build_flags;
build_input.aabbArray.numSbtRecords = 1;
- build_input.aabbArray.primitiveIndexOffset = mesh->prim_offset;
+ build_input.aabbArray.primitiveIndexOffset = hair->optix_prim_offset;
// Allocate memory for new BLAS and build it
- handles.emplace_back();
- if (!build_optix_bvh(build_input, num_motion_steps, handles.back()))
+ OptixTraversableHandle handle;
+ if (build_optix_bvh(build_input, num_motion_steps, handle)) {
+ geometry.insert({ob->geometry, handle});
+ }
+ else {
return false;
+ }
}
+ else if (geom->type == Geometry::MESH) {
+ // Build BLAS for triangle primitives
+ Mesh *const mesh = static_cast<Mesh *const>(ob->geometry);
+ if (mesh->num_triangles() == 0) {
+ continue;
+ }
- // Build BLAS for triangle primitives
- if (bvh->params.primitive_mask & PRIMITIVE_ALL_TRIANGLE && mesh->num_triangles() > 0) {
const size_t num_verts = mesh->verts.size();
size_t num_motion_steps = 1;
@@ -1378,23 +1258,24 @@ class OptiXDevice : public Device {
// buffers for that purpose. OptiX does not allow this to be zero though, so just pass in
// one and rely on that having the same meaning in this case.
build_input.triangleArray.numSbtRecords = 1;
- // Triangle primitives are packed right after the curve primitives of this mesh
- build_input.triangleArray.primitiveIndexOffset = mesh->prim_offset + mesh->num_segments();
+ build_input.triangleArray.primitiveIndexOffset = mesh->optix_prim_offset;
// Allocate memory for new BLAS and build it
- handles.emplace_back();
- if (!build_optix_bvh(build_input, num_motion_steps, handles.back()))
+ OptixTraversableHandle handle;
+ if (build_optix_bvh(build_input, num_motion_steps, handle)) {
+ geometry.insert({ob->geometry, handle});
+ }
+ else {
return false;
+ }
}
-
- meshes.insert({mesh, handles});
}
// Fill instance descriptions
device_vector<OptixAabb> aabbs(this, "tlas_aabbs", MEM_READ_ONLY);
- aabbs.alloc(bvh->objects.size() * 2);
+ aabbs.alloc(bvh->objects.size());
device_vector<OptixInstance> instances(this, "tlas_instances", MEM_READ_ONLY);
- instances.alloc(bvh->objects.size() * 2);
+ instances.alloc(bvh->objects.size());
for (Object *ob : bvh->objects) {
// Skip non-traceable objects
@@ -1402,113 +1283,117 @@ class OptiXDevice : public Device {
continue;
// Create separate instance for triangle/curve meshes of an object
- for (OptixTraversableHandle handle : meshes[ob->mesh]) {
- OptixAabb &aabb = aabbs[num_instances];
- aabb.minX = ob->bounds.min.x;
- aabb.minY = ob->bounds.min.y;
- aabb.minZ = ob->bounds.min.z;
- aabb.maxX = ob->bounds.max.x;
- aabb.maxY = ob->bounds.max.y;
- aabb.maxZ = ob->bounds.max.z;
-
- OptixInstance &instance = instances[num_instances++];
- memset(&instance, 0, sizeof(instance));
-
- // Clear transform to identity matrix
- instance.transform[0] = 1.0f;
- instance.transform[5] = 1.0f;
- instance.transform[10] = 1.0f;
-
- // Set user instance ID to object index
- instance.instanceId = ob->get_device_index();
-
- // Volumes have a special bit set in the visibility mask so a trace can mask only volumes
- // See 'scene_intersect_volume' in bvh.h
- instance.visibilityMask = (ob->mesh->has_volume ? 3 : 1);
-
- // Insert motion traversable if object has motion
- if (motion_blur && ob->use_motion()) {
- size_t motion_keys = max(ob->motion.size(), 2) - 2;
- size_t motion_transform_size = sizeof(OptixSRTMotionTransform) +
- motion_keys * sizeof(OptixSRTData);
-
- const CUDAContextScope scope(cuda_context);
-
- CUdeviceptr motion_transform_gpu = 0;
- check_result_cuda_ret(cuMemAlloc(&motion_transform_gpu, motion_transform_size));
- as_mem.push_back(motion_transform_gpu);
-
- // Allocate host side memory for motion transform and fill it with transform data
- OptixSRTMotionTransform &motion_transform = *reinterpret_cast<OptixSRTMotionTransform *>(
- new uint8_t[motion_transform_size]);
- motion_transform.child = handle;
- motion_transform.motionOptions.numKeys = ob->motion.size();
- motion_transform.motionOptions.flags = OPTIX_MOTION_FLAG_NONE;
- motion_transform.motionOptions.timeBegin = 0.0f;
- motion_transform.motionOptions.timeEnd = 1.0f;
-
- OptixSRTData *const srt_data = motion_transform.srtData;
- array<DecomposedTransform> decomp(ob->motion.size());
- transform_motion_decompose(decomp.data(), ob->motion.data(), ob->motion.size());
-
- for (size_t i = 0; i < ob->motion.size(); ++i) {
- // Scale
- srt_data[i].sx = decomp[i].y.w; // scale.x.x
- srt_data[i].sy = decomp[i].z.w; // scale.y.y
- srt_data[i].sz = decomp[i].w.w; // scale.z.z
-
- // Shear
- srt_data[i].a = decomp[i].z.x; // scale.x.y
- srt_data[i].b = decomp[i].z.y; // scale.x.z
- srt_data[i].c = decomp[i].w.x; // scale.y.z
- assert(decomp[i].z.z == 0.0f); // scale.y.x
- assert(decomp[i].w.y == 0.0f); // scale.z.x
- assert(decomp[i].w.z == 0.0f); // scale.z.y
-
- // Pivot point
- srt_data[i].pvx = 0.0f;
- srt_data[i].pvy = 0.0f;
- srt_data[i].pvz = 0.0f;
-
- // Rotation
- srt_data[i].qx = decomp[i].x.x;
- srt_data[i].qy = decomp[i].x.y;
- srt_data[i].qz = decomp[i].x.z;
- srt_data[i].qw = decomp[i].x.w;
-
- // Translation
- srt_data[i].tx = decomp[i].y.x;
- srt_data[i].ty = decomp[i].y.y;
- srt_data[i].tz = decomp[i].y.z;
- }
+ auto handle_it = geometry.find(ob->geometry);
+ if (handle_it == geometry.end()) {
+ continue;
+ }
+ OptixTraversableHandle handle = handle_it->second;
+
+ OptixAabb &aabb = aabbs[num_instances];
+ aabb.minX = ob->bounds.min.x;
+ aabb.minY = ob->bounds.min.y;
+ aabb.minZ = ob->bounds.min.z;
+ aabb.maxX = ob->bounds.max.x;
+ aabb.maxY = ob->bounds.max.y;
+ aabb.maxZ = ob->bounds.max.z;
+
+ OptixInstance &instance = instances[num_instances++];
+ memset(&instance, 0, sizeof(instance));
+
+ // Clear transform to identity matrix
+ instance.transform[0] = 1.0f;
+ instance.transform[5] = 1.0f;
+ instance.transform[10] = 1.0f;
+
+ // Set user instance ID to object index
+ instance.instanceId = ob->get_device_index();
+
+ // Volumes have a special bit set in the visibility mask so a trace can mask only volumes
+ // See 'scene_intersect_volume' in bvh.h
+ instance.visibilityMask = (ob->geometry->has_volume ? 3 : 1);
+
+ // Insert motion traversable if object has motion
+ if (motion_blur && ob->use_motion()) {
+ size_t motion_keys = max(ob->motion.size(), 2) - 2;
+ size_t motion_transform_size = sizeof(OptixSRTMotionTransform) +
+ motion_keys * sizeof(OptixSRTData);
+
+ const CUDAContextScope scope(cuContext);
+
+ CUdeviceptr motion_transform_gpu = 0;
+ check_result_cuda_ret(cuMemAlloc(&motion_transform_gpu, motion_transform_size));
+ as_mem.push_back(motion_transform_gpu);
+
+ // Allocate host side memory for motion transform and fill it with transform data
+ OptixSRTMotionTransform &motion_transform = *reinterpret_cast<OptixSRTMotionTransform *>(
+ new uint8_t[motion_transform_size]);
+ motion_transform.child = handle;
+ motion_transform.motionOptions.numKeys = ob->motion.size();
+ motion_transform.motionOptions.flags = OPTIX_MOTION_FLAG_NONE;
+ motion_transform.motionOptions.timeBegin = 0.0f;
+ motion_transform.motionOptions.timeEnd = 1.0f;
+
+ OptixSRTData *const srt_data = motion_transform.srtData;
+ array<DecomposedTransform> decomp(ob->motion.size());
+ transform_motion_decompose(decomp.data(), ob->motion.data(), ob->motion.size());
+
+ for (size_t i = 0; i < ob->motion.size(); ++i) {
+ // Scale
+ srt_data[i].sx = decomp[i].y.w; // scale.x.x
+ srt_data[i].sy = decomp[i].z.w; // scale.y.y
+ srt_data[i].sz = decomp[i].w.w; // scale.z.z
+
+ // Shear
+ srt_data[i].a = decomp[i].z.x; // scale.x.y
+ srt_data[i].b = decomp[i].z.y; // scale.x.z
+ srt_data[i].c = decomp[i].w.x; // scale.y.z
+ assert(decomp[i].z.z == 0.0f); // scale.y.x
+ assert(decomp[i].w.y == 0.0f); // scale.z.x
+ assert(decomp[i].w.z == 0.0f); // scale.z.y
+
+ // Pivot point
+ srt_data[i].pvx = 0.0f;
+ srt_data[i].pvy = 0.0f;
+ srt_data[i].pvz = 0.0f;
+
+ // Rotation
+ srt_data[i].qx = decomp[i].x.x;
+ srt_data[i].qy = decomp[i].x.y;
+ srt_data[i].qz = decomp[i].x.z;
+ srt_data[i].qw = decomp[i].x.w;
+
+ // Translation
+ srt_data[i].tx = decomp[i].y.x;
+ srt_data[i].ty = decomp[i].y.y;
+ srt_data[i].tz = decomp[i].y.z;
+ }
- // Upload motion transform to GPU
- cuMemcpyHtoD(motion_transform_gpu, &motion_transform, motion_transform_size);
- delete[] reinterpret_cast<uint8_t *>(&motion_transform);
+ // Upload motion transform to GPU
+ cuMemcpyHtoD(motion_transform_gpu, &motion_transform, motion_transform_size);
+ delete[] reinterpret_cast<uint8_t *>(&motion_transform);
- // Disable instance transform if object uses motion transform already
- instance.flags = OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM;
+ // Disable instance transform if object uses motion transform already
+ instance.flags = OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM;
+
+ // Get traversable handle to motion transform
+ optixConvertPointerToTraversableHandle(context,
+ motion_transform_gpu,
+ OPTIX_TRAVERSABLE_TYPE_SRT_MOTION_TRANSFORM,
+ &instance.traversableHandle);
+ }
+ else {
+ instance.traversableHandle = handle;
- // Get traversable handle to motion transform
- optixConvertPointerToTraversableHandle(context,
- motion_transform_gpu,
- OPTIX_TRAVERSABLE_TYPE_SRT_MOTION_TRANSFORM,
- &instance.traversableHandle);
+ if (ob->geometry->is_instanced()) {
+ // Set transform matrix
+ memcpy(instance.transform, &ob->tfm, sizeof(instance.transform));
}
else {
- instance.traversableHandle = handle;
-
- if (ob->mesh->is_instanced()) {
- // Set transform matrix
- memcpy(instance.transform, &ob->tfm, sizeof(instance.transform));
- }
- else {
- // Disable instance transform if mesh already has it applied to vertex data
- instance.flags = OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM;
- // Non-instanced objects read ID from prim_object, so
- // distinguish them from instanced objects with high bit set
- instance.instanceId |= 0x800000;
- }
+ // Disable instance transform if geometry already has it applied to vertex data
+ instance.flags = OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM;
+ // Non-instanced objects read ID from prim_object, so
+ // distinguish them from instanced objects with high bit set
+ instance.instanceId |= 0x800000;
}
}
}
@@ -1530,655 +1415,76 @@ class OptiXDevice : public Device {
return build_optix_bvh(build_input, 0, tlas_handle);
}
- void update_texture_info()
- {
- if (need_texture_info) {
- texture_info.copy_to_device();
- need_texture_info = false;
- }
- }
-
- void update_launch_params(const char *name, size_t offset, void *data, size_t data_size)
+ void const_copy_to(const char *name, void *host, size_t size) override
{
- const CUDAContextScope scope(cuda_context);
-
- for (int i = 0; i < info.cpu_threads; ++i)
- check_result_cuda(
- cuMemcpyHtoD(launch_params.device_pointer + i * launch_params.data_elements + offset,
- data,
- data_size));
-
// Set constant memory for CUDA module
- // TODO(pmours): This is only used for tonemapping (see 'launch_film_convert').
+ // TODO(pmours): This is only used for tonemapping (see 'film_convert').
// Could be removed by moving those functions to filter CUDA module.
- size_t bytes = 0;
- CUdeviceptr mem = 0;
- check_result_cuda(cuModuleGetGlobal(&mem, &bytes, cuda_module, name));
- assert(mem != 0 && bytes == data_size);
- check_result_cuda(cuMemcpyHtoD(mem, data, data_size));
- }
-
- void mem_alloc(device_memory &mem) override
- {
- if (mem.type == MEM_PIXELS && !background) {
- // Always fall back to no interop for now
- // TODO(pmours): Support OpenGL interop when moving CUDA memory management to common code
- background = true;
- }
- else if (mem.type == MEM_TEXTURE) {
- assert(!"mem_alloc not supported for textures.");
- return;
- }
-
- generic_alloc(mem);
- }
-
- CUDAMem *generic_alloc(device_memory &mem, size_t pitch_padding = 0)
- {
- CUDAContextScope scope(cuda_context);
-
- CUdeviceptr device_pointer = 0;
- size_t size = mem.memory_size() + pitch_padding;
-
- CUresult mem_alloc_result = CUDA_ERROR_OUT_OF_MEMORY;
- const char *status = "";
-
- /* First try allocating in device memory, respecting headroom. We make
- * an exception for texture info. It is small and frequently accessed,
- * so treat it as working memory.
- *
- * If there is not enough room for working memory, we will try to move
- * textures to host memory, assuming the performance impact would have
- * been worse for working memory. */
- bool is_texture = (mem.type == MEM_TEXTURE) && (&mem != &texture_info);
- bool is_image = is_texture && (mem.data_height > 1);
-
- size_t headroom = (is_texture) ? device_texture_headroom : device_working_headroom;
-
- size_t total = 0, free = 0;
- cuMemGetInfo(&free, &total);
+ CUDADevice::const_copy_to(name, host, size);
- /* Move textures to host memory if needed. */
- if (!move_texture_to_host && !is_image && (size + headroom) >= free && can_map_host) {
- move_textures_to_host(size + headroom - free, is_texture);
- cuMemGetInfo(&free, &total);
- }
-
- /* Allocate in device memory. */
- if (!move_texture_to_host && (size + headroom) < free) {
- mem_alloc_result = cuMemAlloc(&device_pointer, size);
- if (mem_alloc_result == CUDA_SUCCESS) {
- status = " in device memory";
- }
- }
-
- /* Fall back to mapped host memory if needed and possible. */
- void *shared_pointer = 0;
-
- if (mem_alloc_result != CUDA_SUCCESS && can_map_host) {
- if (mem.shared_pointer) {
- /* Another device already allocated host memory. */
- mem_alloc_result = CUDA_SUCCESS;
- shared_pointer = mem.shared_pointer;
- }
- else if (map_host_used + size < map_host_limit) {
- /* Allocate host memory ourselves. */
- mem_alloc_result = cuMemHostAlloc(
- &shared_pointer, size, CU_MEMHOSTALLOC_DEVICEMAP | CU_MEMHOSTALLOC_WRITECOMBINED);
-
- assert((mem_alloc_result == CUDA_SUCCESS && shared_pointer != 0) ||
- (mem_alloc_result != CUDA_SUCCESS && shared_pointer == 0));
- }
-
- if (mem_alloc_result == CUDA_SUCCESS) {
- cuMemHostGetDevicePointer_v2(&device_pointer, shared_pointer, 0);
- map_host_used += size;
- status = " in host memory";
- }
- else {
- status = " failed, out of host memory";
- }
- }
- else if (mem_alloc_result != CUDA_SUCCESS) {
- status = " failed, out of device and host memory";
- }
-
- if (mem.name) {
- VLOG(1) << "Buffer allocate: " << mem.name << ", "
- << string_human_readable_number(mem.memory_size()) << " bytes. ("
- << string_human_readable_size(mem.memory_size()) << ")" << status;
- }
-
- if (mem_alloc_result != CUDA_SUCCESS) {
- set_error(string_printf("Buffer allocate %s", status));
- return NULL;
- }
-
- mem.device_pointer = (device_ptr)device_pointer;
- mem.device_size = size;
- stats.mem_alloc(size);
-
- if (!mem.device_pointer) {
- return NULL;
- }
-
- /* Insert into map of allocations. */
- CUDAMem *cmem = &cuda_mem_map[&mem];
- if (shared_pointer != 0) {
- /* Replace host pointer with our host allocation. Only works if
- * CUDA memory layout is the same and has no pitch padding. Also
- * does not work if we move textures to host during a render,
- * since other devices might be using the memory. */
-
- if (!move_texture_to_host && pitch_padding == 0 && mem.host_pointer &&
- mem.host_pointer != shared_pointer) {
- memcpy(shared_pointer, mem.host_pointer, size);
-
- /* A call to device_memory::host_free() should be preceded by
- * a call to device_memory::device_free() for host memory
- * allocated by a device to be handled properly. Two exceptions
- * are here and a call in CUDADevice::generic_alloc(), where
- * the current host memory can be assumed to be allocated by
- * device_memory::host_alloc(), not by a device */
-
- mem.host_free();
- mem.host_pointer = shared_pointer;
- }
- mem.shared_pointer = shared_pointer;
- mem.shared_counter++;
- cmem->use_mapped_host = true;
- }
- else {
- cmem->use_mapped_host = false;
- }
-
- return cmem;
- }
+ if (strcmp(name, "__data") == 0) {
+ assert(size <= sizeof(KernelData));
- void tex_alloc(device_memory &mem)
- {
- CUDAContextScope scope(cuda_context);
-
- /* General variables for both architectures */
- string bind_name = mem.name;
- size_t dsize = datatype_size(mem.data_type);
- size_t size = mem.memory_size();
-
- CUaddress_mode address_mode = CU_TR_ADDRESS_MODE_WRAP;
- switch (mem.extension) {
- case EXTENSION_REPEAT:
- address_mode = CU_TR_ADDRESS_MODE_WRAP;
- break;
- case EXTENSION_EXTEND:
- address_mode = CU_TR_ADDRESS_MODE_CLAMP;
- break;
- case EXTENSION_CLIP:
- address_mode = CU_TR_ADDRESS_MODE_BORDER;
- break;
- default:
- assert(0);
- break;
- }
+ // Fix traversable handle on multi devices
+ KernelData *const data = (KernelData *)host;
+ *(OptixTraversableHandle *)&data->bvh.scene = tlas_handle;
- CUfilter_mode filter_mode;
- if (mem.interpolation == INTERPOLATION_CLOSEST) {
- filter_mode = CU_TR_FILTER_MODE_POINT;
- }
- else {
- filter_mode = CU_TR_FILTER_MODE_LINEAR;
+ update_launch_params(name, offsetof(KernelParams, data), host, size);
+ return;
}
- /* Data Storage */
- if (mem.interpolation == INTERPOLATION_NONE) {
- generic_alloc(mem);
- generic_copy_to(mem);
-
- // Update data storage pointers in launch parameters
+ // Update data storage pointers in launch parameters
# define KERNEL_TEX(data_type, tex_name) \
- if (strcmp(mem.name, #tex_name) == 0) \
- update_launch_params( \
- mem.name, offsetof(KernelParams, tex_name), &mem.device_pointer, sizeof(device_ptr));
+ if (strcmp(name, #tex_name) == 0) { \
+ update_launch_params(name, offsetof(KernelParams, tex_name), host, size); \
+ return; \
+ }
# include "kernel/kernel_textures.h"
# undef KERNEL_TEX
- return;
- }
-
- /* Image Texture Storage */
- CUarray_format_enum format;
- switch (mem.data_type) {
- case TYPE_UCHAR:
- format = CU_AD_FORMAT_UNSIGNED_INT8;
- break;
- case TYPE_UINT16:
- format = CU_AD_FORMAT_UNSIGNED_INT16;
- break;
- case TYPE_UINT:
- format = CU_AD_FORMAT_UNSIGNED_INT32;
- break;
- case TYPE_INT:
- format = CU_AD_FORMAT_SIGNED_INT32;
- break;
- case TYPE_FLOAT:
- format = CU_AD_FORMAT_FLOAT;
- break;
- case TYPE_HALF:
- format = CU_AD_FORMAT_HALF;
- break;
- default:
- assert(0);
- return;
- }
-
- CUDAMem *cmem = NULL;
- CUarray array_3d = NULL;
- size_t src_pitch = mem.data_width * dsize * mem.data_elements;
- size_t dst_pitch = src_pitch;
-
- if (mem.data_depth > 1) {
- /* 3D texture using array, there is no API for linear memory. */
- CUDA_ARRAY3D_DESCRIPTOR desc;
-
- desc.Width = mem.data_width;
- desc.Height = mem.data_height;
- desc.Depth = mem.data_depth;
- desc.Format = format;
- desc.NumChannels = mem.data_elements;
- desc.Flags = 0;
-
- VLOG(1) << "Array 3D allocate: " << mem.name << ", "
- << string_human_readable_number(mem.memory_size()) << " bytes. ("
- << string_human_readable_size(mem.memory_size()) << ")";
-
- check_result_cuda(cuArray3DCreate(&array_3d, &desc));
-
- if (!array_3d) {
- return;
- }
-
- CUDA_MEMCPY3D param;
- memset(&param, 0, sizeof(param));
- param.dstMemoryType = CU_MEMORYTYPE_ARRAY;
- param.dstArray = array_3d;
- param.srcMemoryType = CU_MEMORYTYPE_HOST;
- param.srcHost = mem.host_pointer;
- param.srcPitch = src_pitch;
- param.WidthInBytes = param.srcPitch;
- param.Height = mem.data_height;
- param.Depth = mem.data_depth;
-
- check_result_cuda(cuMemcpy3D(&param));
-
- mem.device_pointer = (device_ptr)array_3d;
- mem.device_size = size;
- stats.mem_alloc(size);
-
- cmem = &cuda_mem_map[&mem];
- cmem->texobject = 0;
- cmem->array = array_3d;
- }
- else if (mem.data_height > 0) {
- /* 2D texture, using pitch aligned linear memory. */
- int alignment = 0;
- check_result_cuda(cuDeviceGetAttribute(
- &alignment, CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT, cuda_device));
- dst_pitch = align_up(src_pitch, alignment);
- size_t dst_size = dst_pitch * mem.data_height;
-
- cmem = generic_alloc(mem, dst_size - mem.memory_size());
- if (!cmem) {
- return;
- }
-
- CUDA_MEMCPY2D param;
- memset(&param, 0, sizeof(param));
- param.dstMemoryType = CU_MEMORYTYPE_DEVICE;
- param.dstDevice = mem.device_pointer;
- param.dstPitch = dst_pitch;
- param.srcMemoryType = CU_MEMORYTYPE_HOST;
- param.srcHost = mem.host_pointer;
- param.srcPitch = src_pitch;
- param.WidthInBytes = param.srcPitch;
- param.Height = mem.data_height;
-
- check_result_cuda(cuMemcpy2DUnaligned(&param));
- }
- else {
- /* 1D texture, using linear memory. */
- cmem = generic_alloc(mem);
- if (!cmem) {
- return;
- }
-
- check_result_cuda(cuMemcpyHtoD(mem.device_pointer, mem.host_pointer, size));
- }
-
- /* Kepler+, bindless textures. */
- int flat_slot = 0;
- if (string_startswith(mem.name, "__tex_image")) {
- int pos = string(mem.name).rfind("_");
- flat_slot = atoi(mem.name + pos + 1);
- }
- else {
- assert(0);
- }
-
- CUDA_RESOURCE_DESC resDesc;
- memset(&resDesc, 0, sizeof(resDesc));
-
- if (array_3d) {
- resDesc.resType = CU_RESOURCE_TYPE_ARRAY;
- resDesc.res.array.hArray = array_3d;
- resDesc.flags = 0;
- }
- else if (mem.data_height > 0) {
- resDesc.resType = CU_RESOURCE_TYPE_PITCH2D;
- resDesc.res.pitch2D.devPtr = mem.device_pointer;
- resDesc.res.pitch2D.format = format;
- resDesc.res.pitch2D.numChannels = mem.data_elements;
- resDesc.res.pitch2D.height = mem.data_height;
- resDesc.res.pitch2D.width = mem.data_width;
- resDesc.res.pitch2D.pitchInBytes = dst_pitch;
- }
- else {
- resDesc.resType = CU_RESOURCE_TYPE_LINEAR;
- resDesc.res.linear.devPtr = mem.device_pointer;
- resDesc.res.linear.format = format;
- resDesc.res.linear.numChannels = mem.data_elements;
- resDesc.res.linear.sizeInBytes = mem.device_size;
- }
-
- CUDA_TEXTURE_DESC texDesc;
- memset(&texDesc, 0, sizeof(texDesc));
- texDesc.addressMode[0] = address_mode;
- texDesc.addressMode[1] = address_mode;
- texDesc.addressMode[2] = address_mode;
- texDesc.filterMode = filter_mode;
- texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES;
-
- check_result_cuda(cuTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL));
-
- /* Resize once */
- if (flat_slot >= texture_info.size()) {
- /* Allocate some slots in advance, to reduce amount
- * of re-allocations. */
- texture_info.resize(flat_slot + 128);
- }
-
- /* Set Mapping and tag that we need to (re-)upload to device */
- TextureInfo &info = texture_info[flat_slot];
- info.data = (uint64_t)cmem->texobject;
- info.cl_buffer = 0;
- info.interpolation = mem.interpolation;
- info.extension = mem.extension;
- info.width = mem.data_width;
- info.height = mem.data_height;
- info.depth = mem.data_depth;
- need_texture_info = true;
- }
-
- void mem_copy_to(device_memory &mem) override
- {
- if (mem.type == MEM_PIXELS) {
- assert(!"mem_copy_to not supported for pixels.");
- }
- else if (mem.type == MEM_TEXTURE) {
- tex_free(mem);
- tex_alloc(mem);
- }
- else {
- if (!mem.device_pointer) {
- generic_alloc(mem);
- }
-
- generic_copy_to(mem);
- }
- }
-
- void generic_copy_to(device_memory &mem)
- {
- if (mem.host_pointer && mem.device_pointer) {
- CUDAContextScope scope(cuda_context);
-
- /* If use_mapped_host of mem is false, the current device only
- * uses device memory allocated by cuMemAlloc regardless of
- * mem.host_pointer and mem.shared_pointer, and should copy
- * data from mem.host_pointer. */
-
- if (cuda_mem_map[&mem].use_mapped_host == false || mem.host_pointer != mem.shared_pointer) {
- check_result_cuda(
- cuMemcpyHtoD((CUdeviceptr)mem.device_pointer, mem.host_pointer, mem.memory_size()));
- }
- }
- }
-
- void mem_copy_from(device_memory &mem, int y, int w, int h, int elem) override
- {
- if (mem.type == MEM_PIXELS && !background) {
- assert(!"mem_copy_from not supported for pixels.");
- }
- else if (mem.type == MEM_TEXTURE) {
- assert(!"mem_copy_from not supported for textures.");
- }
- else {
- // Calculate linear memory offset and size
- const size_t size = elem * w * h;
- const size_t offset = elem * y * w;
-
- if (mem.host_pointer && mem.device_pointer) {
- const CUDAContextScope scope(cuda_context);
- check_result_cuda(cuMemcpyDtoH(
- (char *)mem.host_pointer + offset, (CUdeviceptr)mem.device_pointer + offset, size));
- }
- else if (mem.host_pointer) {
- memset((char *)mem.host_pointer + offset, 0, size);
- }
- }
- }
-
- void mem_zero(device_memory &mem) override
- {
- if (mem.host_pointer)
- memset(mem.host_pointer, 0, mem.memory_size());
-
- if (!mem.device_pointer)
- mem_alloc(mem); // Need to allocate memory first if it does not exist yet
-
- /* If use_mapped_host of mem is false, mem.device_pointer currently
- * refers to device memory regardless of mem.host_pointer and
- * mem.shared_pointer. */
-
- if (mem.device_pointer &&
- (cuda_mem_map[&mem].use_mapped_host == false || mem.host_pointer != mem.shared_pointer)) {
- const CUDAContextScope scope(cuda_context);
- check_result_cuda(cuMemsetD8((CUdeviceptr)mem.device_pointer, 0, mem.memory_size()));
- }
- }
-
- void mem_free(device_memory &mem) override
- {
- if (mem.type == MEM_PIXELS && !background) {
- assert(!"mem_free not supported for pixels.");
- }
- else if (mem.type == MEM_TEXTURE) {
- tex_free(mem);
- }
- else {
- generic_free(mem);
- }
- }
-
- void generic_free(device_memory &mem)
- {
- if (mem.device_pointer) {
- CUDAContextScope scope(cuda_context);
- const CUDAMem &cmem = cuda_mem_map[&mem];
-
- /* If cmem.use_mapped_host is true, reference counting is used
- * to safely free a mapped host memory. */
-
- if (cmem.use_mapped_host) {
- assert(mem.shared_pointer);
- if (mem.shared_pointer) {
- assert(mem.shared_counter > 0);
- if (--mem.shared_counter == 0) {
- if (mem.host_pointer == mem.shared_pointer) {
- mem.host_pointer = 0;
- }
- cuMemFreeHost(mem.shared_pointer);
- mem.shared_pointer = 0;
- }
- }
- map_host_used -= mem.device_size;
- }
- else {
- /* Free device memory. */
- cuMemFree(mem.device_pointer);
- }
-
- stats.mem_free(mem.device_size);
- mem.device_pointer = 0;
- mem.device_size = 0;
-
- cuda_mem_map.erase(cuda_mem_map.find(&mem));
- }
}
- void tex_free(device_memory &mem)
+ void update_launch_params(const char *name, size_t offset, void *data, size_t data_size)
{
- if (mem.device_pointer) {
- CUDAContextScope scope(cuda_context);
- const CUDAMem &cmem = cuda_mem_map[&mem];
-
- if (cmem.texobject) {
- /* Free bindless texture. */
- cuTexObjectDestroy(cmem.texobject);
- }
+ const CUDAContextScope scope(cuContext);
- if (cmem.array) {
- /* Free array. */
- cuArrayDestroy(cmem.array);
- stats.mem_free(mem.device_size);
- mem.device_pointer = 0;
- mem.device_size = 0;
-
- cuda_mem_map.erase(cuda_mem_map.find(&mem));
- }
- else {
- generic_free(mem);
- }
- }
+ for (int i = 0; i < info.cpu_threads; ++i)
+ check_result_cuda(
+ cuMemcpyHtoD(launch_params.device_pointer + i * launch_params.data_elements + offset,
+ data,
+ data_size));
}
- void move_textures_to_host(size_t size, bool for_texture)
+ void task_add(DeviceTask &task) override
{
- /* Signal to reallocate textures in host memory only. */
- move_texture_to_host = true;
-
- while (size > 0) {
- /* Find suitable memory allocation to move. */
- device_memory *max_mem = NULL;
- size_t max_size = 0;
- bool max_is_image = false;
-
- foreach (auto &pair, cuda_mem_map) {
- device_memory &mem = *pair.first;
- CUDAMem *cmem = &pair.second;
-
- bool is_texture = (mem.type == MEM_TEXTURE) && (&mem != &texture_info);
- bool is_image = is_texture && (mem.data_height > 1);
-
- /* Can't move this type of memory. */
- if (!is_texture || cmem->array) {
- continue;
- }
-
- /* Already in host memory. */
- if (cmem->use_mapped_host) {
- continue;
- }
-
- /* For other textures, only move image textures. */
- if (for_texture && !is_image) {
- continue;
- }
-
- /* Try to move largest allocation, prefer moving images. */
- if (is_image > max_is_image || (is_image == max_is_image && mem.device_size > max_size)) {
- max_is_image = is_image;
- max_size = mem.device_size;
- max_mem = &mem;
- }
+ struct OptiXDeviceTask : public DeviceTask {
+ OptiXDeviceTask(OptiXDevice *device, DeviceTask &task, int task_index) : DeviceTask(task)
+ {
+ // Using task index parameter instead of thread index, since number of CUDA streams may
+ // differ from number of threads
+ run = function_bind(&OptiXDevice::thread_run, device, *this, task_index);
}
+ };
- /* Move to host memory. This part is mutex protected since
- * multiple CUDA devices could be moving the memory. The
- * first one will do it, and the rest will adopt the pointer. */
- if (max_mem) {
- VLOG(1) << "Move memory from device to host: " << max_mem->name;
-
- static thread_mutex move_mutex;
- thread_scoped_lock lock(move_mutex);
-
- /* Preserve the original device pointer, in case of multi device
- * we can't change it because the pointer mapping would break. */
- device_ptr prev_pointer = max_mem->device_pointer;
- size_t prev_size = max_mem->device_size;
-
- tex_free(*max_mem);
- tex_alloc(*max_mem);
- size = (max_size >= size) ? 0 : size - max_size;
+ // Upload texture information to device if it has changed since last launch
+ load_texture_info();
- max_mem->device_pointer = prev_pointer;
- max_mem->device_size = prev_size;
- }
- else {
- break;
- }
+ if (task.type == DeviceTask::FILM_CONVERT) {
+ // Execute in main thread because of OpenGL access
+ film_convert(task, task.buffer, task.rgba_byte, task.rgba_half);
+ return;
}
- /* Update texture info array with new pointers. */
- update_texture_info();
-
- move_texture_to_host = false;
- }
-
- void const_copy_to(const char *name, void *host, size_t size) override
- {
- if (strcmp(name, "__data") == 0) {
- assert(size <= sizeof(KernelData));
-
- // Fix traversable handle on multi devices
- KernelData *const data = (KernelData *)host;
- *(OptixTraversableHandle *)&data->bvh.scene = tlas_handle;
-
- update_launch_params(name, offsetof(KernelParams, data), host, size);
+ if (task.type == DeviceTask::DENOISE_BUFFER) {
+ // Execute denoising in a single thread (e.g. to avoid race conditions during creation)
+ task_pool.push(new OptiXDeviceTask(this, task, 0));
+ return;
}
- }
-
- device_ptr mem_alloc_sub_ptr(device_memory &mem, int offset, int /*size*/) override
- {
- return (device_ptr)(((char *)mem.device_pointer) + mem.memory_elements_size(offset));
- }
-
- void task_add(DeviceTask &task) override
- {
- // Upload texture information to device if it has changed since last launch
- update_texture_info();
// Split task into smaller ones
list<DeviceTask> tasks;
task.split(tasks, info.cpu_threads);
// Queue tasks in internal task pool
- struct OptiXDeviceTask : public DeviceTask {
- OptiXDeviceTask(OptiXDevice *device, DeviceTask &task, int task_index) : DeviceTask(task)
- {
- // Using task index parameter instead of thread index, since number of CUDA streams may
- // differ from number of threads
- run = function_bind(&OptiXDevice::thread_run, device, *this, task_index);
- }
- };
-
int task_index = 0;
for (DeviceTask &task : tasks)
task_pool.push(new OptiXDeviceTask(this, task, task_index++));
@@ -2195,403 +1501,6 @@ class OptiXDevice : public Device {
// Cancel any remaining tasks in the internal pool
task_pool.cancel();
}
-
- bool denoising_non_local_means(device_ptr image_ptr,
- device_ptr guide_ptr,
- device_ptr variance_ptr,
- device_ptr out_ptr,
- DenoisingTask *task,
- int thread_index)
- {
- if (have_error())
- return false;
-
- int stride = task->buffer.stride;
- int w = task->buffer.width;
- int h = task->buffer.h;
- int r = task->nlm_state.r;
- int f = task->nlm_state.f;
- float a = task->nlm_state.a;
- float k_2 = task->nlm_state.k_2;
-
- int pass_stride = task->buffer.pass_stride;
- int num_shifts = (2 * r + 1) * (2 * r + 1);
- int channel_offset = task->nlm_state.is_color ? task->buffer.pass_stride : 0;
- int frame_offset = 0;
-
- CUdeviceptr difference = (CUdeviceptr)task->buffer.temporary_mem.device_pointer;
- CUdeviceptr blurDifference = difference + sizeof(float) * pass_stride * num_shifts;
- CUdeviceptr weightAccum = difference + 2 * sizeof(float) * pass_stride * num_shifts;
- CUdeviceptr scale_ptr = 0;
-
- check_result_cuda_ret(
- cuMemsetD8Async(weightAccum, 0, sizeof(float) * pass_stride, cuda_stream[thread_index]));
- check_result_cuda_ret(
- cuMemsetD8Async(out_ptr, 0, sizeof(float) * pass_stride, cuda_stream[thread_index]));
-
- {
- CUfunction cuNLMCalcDifference, cuNLMBlur, cuNLMCalcWeight, cuNLMUpdateOutput;
- check_result_cuda_ret(cuModuleGetFunction(
- &cuNLMCalcDifference, cuda_filter_module, "kernel_cuda_filter_nlm_calc_difference"));
- check_result_cuda_ret(
- cuModuleGetFunction(&cuNLMBlur, cuda_filter_module, "kernel_cuda_filter_nlm_blur"));
- check_result_cuda_ret(cuModuleGetFunction(
- &cuNLMCalcWeight, cuda_filter_module, "kernel_cuda_filter_nlm_calc_weight"));
- check_result_cuda_ret(cuModuleGetFunction(
- &cuNLMUpdateOutput, cuda_filter_module, "kernel_cuda_filter_nlm_update_output"));
-
- check_result_cuda_ret(cuFuncSetCacheConfig(cuNLMCalcDifference, CU_FUNC_CACHE_PREFER_L1));
- check_result_cuda_ret(cuFuncSetCacheConfig(cuNLMBlur, CU_FUNC_CACHE_PREFER_L1));
- check_result_cuda_ret(cuFuncSetCacheConfig(cuNLMCalcWeight, CU_FUNC_CACHE_PREFER_L1));
- check_result_cuda_ret(cuFuncSetCacheConfig(cuNLMUpdateOutput, CU_FUNC_CACHE_PREFER_L1));
-
- CUDA_GET_BLOCKSIZE_1D(cuNLMCalcDifference, w * h, num_shifts);
-
- void *calc_difference_args[] = {&guide_ptr,
- &variance_ptr,
- &scale_ptr,
- &difference,
- &w,
- &h,
- &stride,
- &pass_stride,
- &r,
- &channel_offset,
- &frame_offset,
- &a,
- &k_2};
- void *blur_args[] = {&difference, &blurDifference, &w, &h, &stride, &pass_stride, &r, &f};
- void *calc_weight_args[] = {
- &blurDifference, &difference, &w, &h, &stride, &pass_stride, &r, &f};
- void *update_output_args[] = {&blurDifference,
- &image_ptr,
- &out_ptr,
- &weightAccum,
- &w,
- &h,
- &stride,
- &pass_stride,
- &channel_offset,
- &r,
- &f};
-
- CUDA_LAUNCH_KERNEL_1D(cuNLMCalcDifference, calc_difference_args);
- CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args);
- CUDA_LAUNCH_KERNEL_1D(cuNLMCalcWeight, calc_weight_args);
- CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args);
- CUDA_LAUNCH_KERNEL_1D(cuNLMUpdateOutput, update_output_args);
- }
-
- {
- CUfunction cuNLMNormalize;
- check_result_cuda_ret(cuModuleGetFunction(
- &cuNLMNormalize, cuda_filter_module, "kernel_cuda_filter_nlm_normalize"));
- check_result_cuda_ret(cuFuncSetCacheConfig(cuNLMNormalize, CU_FUNC_CACHE_PREFER_L1));
- void *normalize_args[] = {&out_ptr, &weightAccum, &w, &h, &stride};
- CUDA_GET_BLOCKSIZE(cuNLMNormalize, w, h);
- CUDA_LAUNCH_KERNEL(cuNLMNormalize, normalize_args);
- check_result_cuda_ret(cuStreamSynchronize(cuda_stream[thread_index]));
- }
-
- return !have_error();
- }
-
- bool denoising_construct_transform(DenoisingTask *task, int thread_index)
- {
- if (have_error())
- return false;
-
- CUfunction cuFilterConstructTransform;
- check_result_cuda_ret(cuModuleGetFunction(&cuFilterConstructTransform,
- cuda_filter_module,
- "kernel_cuda_filter_construct_transform"));
- check_result_cuda_ret(
- cuFuncSetCacheConfig(cuFilterConstructTransform, CU_FUNC_CACHE_PREFER_SHARED));
- CUDA_GET_BLOCKSIZE(cuFilterConstructTransform, task->storage.w, task->storage.h);
-
- void *args[] = {&task->buffer.mem.device_pointer,
- &task->tile_info_mem.device_pointer,
- &task->storage.transform.device_pointer,
- &task->storage.rank.device_pointer,
- &task->filter_area,
- &task->rect,
- &task->radius,
- &task->pca_threshold,
- &task->buffer.pass_stride,
- &task->buffer.frame_stride,
- &task->buffer.use_time};
- CUDA_LAUNCH_KERNEL(cuFilterConstructTransform, args);
- check_result_cuda_ret(cuCtxSynchronize());
-
- return !have_error();
- }
-
- bool denoising_accumulate(device_ptr color_ptr,
- device_ptr color_variance_ptr,
- device_ptr scale_ptr,
- int frame,
- DenoisingTask *task,
- int thread_index)
- {
- if (have_error())
- return false;
-
- int r = task->radius;
- int f = 4;
- float a = 1.0f;
- float k_2 = task->nlm_k_2;
-
- int w = task->reconstruction_state.source_w;
- int h = task->reconstruction_state.source_h;
- int stride = task->buffer.stride;
- int frame_offset = frame * task->buffer.frame_stride;
- int t = task->tile_info->frames[frame];
-
- int pass_stride = task->buffer.pass_stride;
- int num_shifts = (2 * r + 1) * (2 * r + 1);
-
- CUdeviceptr difference = (CUdeviceptr)task->buffer.temporary_mem.device_pointer;
- CUdeviceptr blurDifference = difference + sizeof(float) * pass_stride * num_shifts;
-
- CUfunction cuNLMCalcDifference, cuNLMBlur, cuNLMCalcWeight, cuNLMConstructGramian;
- check_result_cuda_ret(cuModuleGetFunction(
- &cuNLMCalcDifference, cuda_filter_module, "kernel_cuda_filter_nlm_calc_difference"));
- check_result_cuda_ret(
- cuModuleGetFunction(&cuNLMBlur, cuda_filter_module, "kernel_cuda_filter_nlm_blur"));
- check_result_cuda_ret(cuModuleGetFunction(
- &cuNLMCalcWeight, cuda_filter_module, "kernel_cuda_filter_nlm_calc_weight"));
- check_result_cuda_ret(cuModuleGetFunction(
- &cuNLMConstructGramian, cuda_filter_module, "kernel_cuda_filter_nlm_construct_gramian"));
-
- check_result_cuda_ret(cuFuncSetCacheConfig(cuNLMCalcDifference, CU_FUNC_CACHE_PREFER_L1));
- check_result_cuda_ret(cuFuncSetCacheConfig(cuNLMBlur, CU_FUNC_CACHE_PREFER_L1));
- check_result_cuda_ret(cuFuncSetCacheConfig(cuNLMCalcWeight, CU_FUNC_CACHE_PREFER_L1));
- check_result_cuda_ret(
- cuFuncSetCacheConfig(cuNLMConstructGramian, CU_FUNC_CACHE_PREFER_SHARED));
-
- CUDA_GET_BLOCKSIZE_1D(cuNLMCalcDifference,
- task->reconstruction_state.source_w *
- task->reconstruction_state.source_h,
- num_shifts);
-
- void *calc_difference_args[] = {&color_ptr,
- &color_variance_ptr,
- &scale_ptr,
- &difference,
- &w,
- &h,
- &stride,
- &pass_stride,
- &r,
- &pass_stride,
- &frame_offset,
- &a,
- &k_2};
- void *blur_args[] = {&difference, &blurDifference, &w, &h, &stride, &pass_stride, &r, &f};
- void *calc_weight_args[] = {
- &blurDifference, &difference, &w, &h, &stride, &pass_stride, &r, &f};
- void *construct_gramian_args[] = {&t,
- &blurDifference,
- &task->buffer.mem.device_pointer,
- &task->storage.transform.device_pointer,
- &task->storage.rank.device_pointer,
- &task->storage.XtWX.device_pointer,
- &task->storage.XtWY.device_pointer,
- &task->reconstruction_state.filter_window,
- &w,
- &h,
- &stride,
- &pass_stride,
- &r,
- &f,
- &frame_offset,
- &task->buffer.use_time};
-
- CUDA_LAUNCH_KERNEL_1D(cuNLMCalcDifference, calc_difference_args);
- CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args);
- CUDA_LAUNCH_KERNEL_1D(cuNLMCalcWeight, calc_weight_args);
- CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args);
- CUDA_LAUNCH_KERNEL_1D(cuNLMConstructGramian, construct_gramian_args);
- check_result_cuda_ret(cuCtxSynchronize());
-
- return !have_error();
- }
-
- bool denoising_solve(device_ptr output_ptr, DenoisingTask *task, int thread_index)
- {
- if (have_error())
- return false;
-
- CUfunction cuFinalize;
- check_result_cuda_ret(
- cuModuleGetFunction(&cuFinalize, cuda_filter_module, "kernel_cuda_filter_finalize"));
- check_result_cuda_ret(cuFuncSetCacheConfig(cuFinalize, CU_FUNC_CACHE_PREFER_L1));
- void *finalize_args[] = {&output_ptr,
- &task->storage.rank.device_pointer,
- &task->storage.XtWX.device_pointer,
- &task->storage.XtWY.device_pointer,
- &task->filter_area,
- &task->reconstruction_state.buffer_params.x,
- &task->render_buffer.samples};
- CUDA_GET_BLOCKSIZE(
- cuFinalize, task->reconstruction_state.source_w, task->reconstruction_state.source_h);
- CUDA_LAUNCH_KERNEL(cuFinalize, finalize_args);
- check_result_cuda_ret(cuStreamSynchronize(cuda_stream[thread_index]));
-
- return !have_error();
- }
-
- bool denoising_combine_halves(device_ptr a_ptr,
- device_ptr b_ptr,
- device_ptr mean_ptr,
- device_ptr variance_ptr,
- int r,
- int4 rect,
- DenoisingTask *task,
- int thread_index)
- {
- if (have_error())
- return false;
-
- CUfunction cuFilterCombineHalves;
- check_result_cuda_ret(cuModuleGetFunction(
- &cuFilterCombineHalves, cuda_filter_module, "kernel_cuda_filter_combine_halves"));
- check_result_cuda_ret(cuFuncSetCacheConfig(cuFilterCombineHalves, CU_FUNC_CACHE_PREFER_L1));
- CUDA_GET_BLOCKSIZE(
- cuFilterCombineHalves, task->rect.z - task->rect.x, task->rect.w - task->rect.y);
-
- void *args[] = {&mean_ptr, &variance_ptr, &a_ptr, &b_ptr, &rect, &r};
- CUDA_LAUNCH_KERNEL(cuFilterCombineHalves, args);
- check_result_cuda_ret(cuStreamSynchronize(cuda_stream[thread_index]));
-
- return !have_error();
- }
-
- bool denoising_divide_shadow(device_ptr a_ptr,
- device_ptr b_ptr,
- device_ptr sample_variance_ptr,
- device_ptr sv_variance_ptr,
- device_ptr buffer_variance_ptr,
- DenoisingTask *task,
- int thread_index)
- {
- if (have_error())
- return false;
-
- CUfunction cuFilterDivideShadow;
- check_result_cuda_ret(cuModuleGetFunction(
- &cuFilterDivideShadow, cuda_filter_module, "kernel_cuda_filter_divide_shadow"));
- check_result_cuda_ret(cuFuncSetCacheConfig(cuFilterDivideShadow, CU_FUNC_CACHE_PREFER_L1));
- CUDA_GET_BLOCKSIZE(
- cuFilterDivideShadow, task->rect.z - task->rect.x, task->rect.w - task->rect.y);
-
- void *args[] = {&task->render_buffer.samples,
- &task->tile_info_mem.device_pointer,
- &a_ptr,
- &b_ptr,
- &sample_variance_ptr,
- &sv_variance_ptr,
- &buffer_variance_ptr,
- &task->rect,
- &task->render_buffer.pass_stride,
- &task->render_buffer.offset};
- CUDA_LAUNCH_KERNEL(cuFilterDivideShadow, args);
- check_result_cuda_ret(cuStreamSynchronize(cuda_stream[thread_index]));
-
- return !have_error();
- }
-
- bool denoising_get_feature(int mean_offset,
- int variance_offset,
- device_ptr mean_ptr,
- device_ptr variance_ptr,
- float scale,
- DenoisingTask *task,
- int thread_index)
- {
- if (have_error())
- return false;
-
- CUfunction cuFilterGetFeature;
- check_result_cuda_ret(cuModuleGetFunction(
- &cuFilterGetFeature, cuda_filter_module, "kernel_cuda_filter_get_feature"));
- check_result_cuda_ret(cuFuncSetCacheConfig(cuFilterGetFeature, CU_FUNC_CACHE_PREFER_L1));
- CUDA_GET_BLOCKSIZE(
- cuFilterGetFeature, task->rect.z - task->rect.x, task->rect.w - task->rect.y);
-
- void *args[] = {&task->render_buffer.samples,
- &task->tile_info_mem.device_pointer,
- &mean_offset,
- &variance_offset,
- &mean_ptr,
- &variance_ptr,
- &scale,
- &task->rect,
- &task->render_buffer.pass_stride,
- &task->render_buffer.offset};
- CUDA_LAUNCH_KERNEL(cuFilterGetFeature, args);
- check_result_cuda_ret(cuStreamSynchronize(cuda_stream[thread_index]));
-
- return !have_error();
- }
-
- bool denoising_write_feature(int out_offset,
- device_ptr from_ptr,
- device_ptr buffer_ptr,
- DenoisingTask *task,
- int thread_index)
- {
- if (have_error())
- return false;
-
- CUfunction cuFilterWriteFeature;
- check_result_cuda_ret(cuModuleGetFunction(
- &cuFilterWriteFeature, cuda_filter_module, "kernel_cuda_filter_write_feature"));
- check_result_cuda_ret(cuFuncSetCacheConfig(cuFilterWriteFeature, CU_FUNC_CACHE_PREFER_L1));
- CUDA_GET_BLOCKSIZE(cuFilterWriteFeature, task->filter_area.z, task->filter_area.w);
-
- void *args[] = {&task->render_buffer.samples,
- &task->reconstruction_state.buffer_params,
- &task->filter_area,
- &from_ptr,
- &buffer_ptr,
- &out_offset,
- &task->rect};
- CUDA_LAUNCH_KERNEL(cuFilterWriteFeature, args);
- check_result_cuda_ret(cuStreamSynchronize(cuda_stream[thread_index]));
-
- return !have_error();
- }
-
- bool denoising_detect_outliers(device_ptr image_ptr,
- device_ptr variance_ptr,
- device_ptr depth_ptr,
- device_ptr output_ptr,
- DenoisingTask *task,
- int thread_index)
- {
- if (have_error())
- return false;
-
- CUfunction cuFilterDetectOutliers;
- check_result_cuda_ret(cuModuleGetFunction(
- &cuFilterDetectOutliers, cuda_filter_module, "kernel_cuda_filter_detect_outliers"));
- check_result_cuda_ret(cuFuncSetCacheConfig(cuFilterDetectOutliers, CU_FUNC_CACHE_PREFER_L1));
- CUDA_GET_BLOCKSIZE(
- cuFilterDetectOutliers, task->rect.z - task->rect.x, task->rect.w - task->rect.y);
-
- void *args[] = {&image_ptr,
- &variance_ptr,
- &depth_ptr,
- &output_ptr,
- &task->rect,
- &task->buffer.pass_stride};
-
- CUDA_LAUNCH_KERNEL(cuFilterDetectOutliers, args);
- check_result_cuda_ret(cuStreamSynchronize(cuda_stream[thread_index]));
-
- return !have_error();
- }
};
bool device_optix_init()
@@ -2603,14 +1512,6 @@ bool device_optix_init()
if (!device_cuda_init())
return false;
-# ifdef WITH_CUDA_DYNLOAD
- // Load NVRTC function pointers for adaptive kernel compilation
- if (DebugFlags().cuda.adaptive_compile && cuewInit(CUEW_INIT_NVRTC) != CUEW_SUCCESS) {
- VLOG(1)
- << "CUEW initialization failed for NVRTC. Adaptive kernel compilation won't be available.";
- }
-# endif
-
const OptixResult result = optixInit();
if (result == OPTIX_ERROR_UNSUPPORTED_ABI_VERSION) {
@@ -2657,7 +1558,7 @@ void device_optix_info(vector<DeviceInfo> &devices)
}
// Only add devices with RTX support
- if (rtcore_version == 0)
+ if (rtcore_version == 0 && !getenv("CYCLES_OPTIX_TEST"))
it = cuda_devices.erase(it);
else
++it;
diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp
index 42e597a34d7..f22d8761058 100644
--- a/intern/cycles/device/device_split_kernel.cpp
+++ b/intern/cycles/device/device_split_kernel.cpp
@@ -55,6 +55,10 @@ DeviceSplitKernel::DeviceSplitKernel(Device *device)
kernel_next_iteration_setup = NULL;
kernel_indirect_subsurface = NULL;
kernel_buffer_update = NULL;
+ kernel_adaptive_stopping = NULL;
+ kernel_adaptive_filter_x = NULL;
+ kernel_adaptive_filter_y = NULL;
+ kernel_adaptive_adjust_samples = NULL;
}
DeviceSplitKernel::~DeviceSplitKernel()
@@ -83,6 +87,10 @@ DeviceSplitKernel::~DeviceSplitKernel()
delete kernel_next_iteration_setup;
delete kernel_indirect_subsurface;
delete kernel_buffer_update;
+ delete kernel_adaptive_stopping;
+ delete kernel_adaptive_filter_x;
+ delete kernel_adaptive_filter_y;
+ delete kernel_adaptive_adjust_samples;
}
bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures &requested_features)
@@ -114,6 +122,10 @@ bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures &requested_fe
LOAD_KERNEL(next_iteration_setup);
LOAD_KERNEL(indirect_subsurface);
LOAD_KERNEL(buffer_update);
+ LOAD_KERNEL(adaptive_stopping);
+ LOAD_KERNEL(adaptive_filter_x);
+ LOAD_KERNEL(adaptive_filter_y);
+ LOAD_KERNEL(adaptive_adjust_samples);
#undef LOAD_KERNEL
@@ -202,13 +214,21 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
/* initial guess to start rolling average */
const int initial_num_samples = 1;
/* approx number of samples per second */
- int samples_per_second = (avg_time_per_sample > 0.0) ?
- int(double(time_multiplier) / avg_time_per_sample) + 1 :
- initial_num_samples;
+ const int samples_per_second = (avg_time_per_sample > 0.0) ?
+ int(double(time_multiplier) / avg_time_per_sample) + 1 :
+ initial_num_samples;
RenderTile subtile = tile;
subtile.start_sample = tile.sample;
- subtile.num_samples = min(samples_per_second,
+ subtile.num_samples = samples_per_second;
+
+ if (task->adaptive_sampling.use) {
+ subtile.num_samples = task->adaptive_sampling.align_dynamic_samples(subtile.start_sample,
+ subtile.num_samples);
+ }
+
+ /* Don't go beyond requested number of samples. */
+ subtile.num_samples = min(subtile.num_samples,
tile.start_sample + tile.num_samples - tile.sample);
if (device->have_error()) {
@@ -302,6 +322,23 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
}
}
+ int filter_sample = tile.sample + subtile.num_samples - 1;
+ if (task->adaptive_sampling.use && task->adaptive_sampling.need_filter(filter_sample)) {
+ size_t buffer_size[2];
+ buffer_size[0] = round_up(tile.w, local_size[0]);
+ buffer_size[1] = round_up(tile.h, local_size[1]);
+ kernel_adaptive_stopping->enqueue(
+ KernelDimensions(buffer_size, local_size), kgbuffer, kernel_data);
+ buffer_size[0] = round_up(tile.h, local_size[0]);
+ buffer_size[1] = round_up(1, local_size[1]);
+ kernel_adaptive_filter_x->enqueue(
+ KernelDimensions(buffer_size, local_size), kgbuffer, kernel_data);
+ buffer_size[0] = round_up(tile.w, local_size[0]);
+ buffer_size[1] = round_up(1, local_size[1]);
+ kernel_adaptive_filter_y->enqueue(
+ KernelDimensions(buffer_size, local_size), kgbuffer, kernel_data);
+ }
+
double time_per_sample = ((time_dt() - start_time) / subtile.num_samples);
if (avg_time_per_sample == 0.0) {
@@ -324,6 +361,28 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
}
}
+ if (task->adaptive_sampling.use) {
+ /* Reset the start samples. */
+ RenderTile subtile = tile;
+ subtile.start_sample = tile.start_sample;
+ subtile.num_samples = tile.sample - tile.start_sample;
+ enqueue_split_kernel_data_init(KernelDimensions(global_size, local_size),
+ subtile,
+ num_global_elements,
+ kgbuffer,
+ kernel_data,
+ split_data,
+ ray_state,
+ queue_index,
+ use_queues_flag,
+ work_pool_wgs);
+ size_t buffer_size[2];
+ buffer_size[0] = round_up(tile.w, local_size[0]);
+ buffer_size[1] = round_up(tile.h, local_size[1]);
+ kernel_adaptive_adjust_samples->enqueue(
+ KernelDimensions(buffer_size, local_size), kgbuffer, kernel_data);
+ }
+
return true;
}
diff --git a/intern/cycles/device/device_split_kernel.h b/intern/cycles/device/device_split_kernel.h
index 6ff326bf214..9d6b9efdd62 100644
--- a/intern/cycles/device/device_split_kernel.h
+++ b/intern/cycles/device/device_split_kernel.h
@@ -75,6 +75,10 @@ class DeviceSplitKernel {
SplitKernelFunction *kernel_next_iteration_setup;
SplitKernelFunction *kernel_indirect_subsurface;
SplitKernelFunction *kernel_buffer_update;
+ SplitKernelFunction *kernel_adaptive_stopping;
+ SplitKernelFunction *kernel_adaptive_filter_x;
+ SplitKernelFunction *kernel_adaptive_filter_y;
+ SplitKernelFunction *kernel_adaptive_adjust_samples;
/* Global memory variables [porting]; These memory is used for
* co-operation between different kernels; Data written by one
diff --git a/intern/cycles/device/device_task.cpp b/intern/cycles/device/device_task.cpp
index 376ad06a734..c36b1344c3b 100644
--- a/intern/cycles/device/device_task.cpp
+++ b/intern/cycles/device/device_task.cpp
@@ -115,7 +115,7 @@ void DeviceTask::split(list<DeviceTask> &tasks, int num, int max_size)
void DeviceTask::update_progress(RenderTile *rtile, int pixel_samples)
{
- if ((type != RENDER) && (type != SHADER))
+ if (type == FILM_CONVERT)
return;
if (update_progress_sample) {
@@ -136,4 +136,59 @@ void DeviceTask::update_progress(RenderTile *rtile, int pixel_samples)
}
}
+/* Adaptive Sampling */
+
+AdaptiveSampling::AdaptiveSampling()
+ : use(true), adaptive_step(ADAPTIVE_SAMPLE_STEP), min_samples(0)
+{
+}
+
+/* Render samples in steps that align with the adaptive filtering. */
+int AdaptiveSampling::align_static_samples(int samples) const
+{
+ if (samples > adaptive_step) {
+ /* Make multiple of adaptive_step. */
+ while (samples % adaptive_step != 0) {
+ samples--;
+ }
+ }
+ else if (samples < adaptive_step) {
+ /* Make divisor of adaptive_step. */
+ while (adaptive_step % samples != 0) {
+ samples--;
+ }
+ }
+
+ return max(samples, 1);
+}
+
+/* Render samples in steps that align with the adaptive filtering, with the
+ * suggested number of samples dynamically changing. */
+int AdaptiveSampling::align_dynamic_samples(int offset, int samples) const
+{
+ /* Round so that we end up on multiples of adaptive_samples. */
+ samples += offset;
+
+ if (samples > adaptive_step) {
+ /* Make multiple of adaptive_step. */
+ while (samples % adaptive_step != 0) {
+ samples--;
+ }
+ }
+
+ samples -= offset;
+
+ return max(samples, 1);
+}
+
+bool AdaptiveSampling::need_filter(int sample) const
+{
+ if (sample > min_samples) {
+ return (sample & (adaptive_step - 1)) == (adaptive_step - 1);
+ }
+ else {
+ return false;
+ }
+}
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/device/device_task.h b/intern/cycles/device/device_task.h
index 1b1e97cdb10..8c4e682adb1 100644
--- a/intern/cycles/device/device_task.h
+++ b/intern/cycles/device/device_task.h
@@ -47,7 +47,7 @@ class DenoiseParams {
int neighbor_frames;
/* Clamp the input to the range of +-1e8. Should be enough for any legitimate data. */
bool clamp_input;
- /* Controls which passes the OptiX AI denoiser should use as input. */
+ /* Passes handed over to the OptiX denoiser (default to color + albedo). */
int optix_input_passes;
DenoiseParams()
@@ -58,13 +58,26 @@ class DenoiseParams {
relative_pca = false;
neighbor_frames = 2;
clamp_input = true;
- optix_input_passes = 1;
+ optix_input_passes = 2;
}
};
+class AdaptiveSampling {
+ public:
+ AdaptiveSampling();
+
+ int align_static_samples(int samples) const;
+ int align_dynamic_samples(int offset, int samples) const;
+ bool need_filter(int sample) const;
+
+ bool use;
+ int adaptive_step;
+ int min_samples;
+};
+
class DeviceTask : public Task {
public:
- typedef enum { RENDER, FILM_CONVERT, SHADER } Type;
+ typedef enum { RENDER, FILM_CONVERT, SHADER, DENOISE_BUFFER } Type;
Type type;
int x, y, w, h;
@@ -81,7 +94,7 @@ class DeviceTask : public Task {
int shader_filter;
int shader_x, shader_w;
- int passes_size;
+ RenderBuffers *buffers;
explicit DeviceTask(Type type = RENDER);
@@ -90,7 +103,7 @@ class DeviceTask : public Task {
void update_progress(RenderTile *rtile, int pixel_samples = -1);
- function<bool(Device *device, RenderTile &)> acquire_tile;
+ function<bool(Device *device, RenderTile &, uint)> acquire_tile;
function<void(long, int)> update_progress_sample;
function<void(RenderTile &)> update_tile_sample;
function<void(RenderTile &)> release_tile;
@@ -98,6 +111,7 @@ class DeviceTask : public Task {
function<void(RenderTile *, Device *)> map_neighbor_tiles;
function<void(RenderTile *, Device *)> unmap_neighbor_tiles;
+ uint tile_types;
DenoiseParams denoising;
bool denoising_from_render;
vector<int> denoising_frames;
@@ -114,7 +128,7 @@ class DeviceTask : public Task {
bool need_finish_queue;
bool integrator_branched;
- int2 requested_tile_size;
+ AdaptiveSampling adaptive_sampling;
protected:
double last_update_time;
diff --git a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/device_opencl.h
index 61b1e3e3b6b..d6f4fb43061 100644
--- a/intern/cycles/device/opencl/opencl.h
+++ b/intern/cycles/device/opencl/device_opencl.h
@@ -88,9 +88,12 @@ class OpenCLInfo {
static bool device_supported(const string &platform_name, const cl_device_id device_id);
static bool platform_version_check(cl_platform_id platform, string *error = NULL);
static bool device_version_check(cl_device_id device, string *error = NULL);
+ static bool get_device_version(cl_device_id device,
+ int *r_major,
+ int *r_minor,
+ string *error = NULL);
static string get_hardware_id(const string &platform_name, cl_device_id device_id);
- static void get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices,
- bool force_all = false);
+ static void get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices);
/* ** Some handy shortcuts to low level cl*GetInfo() functions. ** */
@@ -428,8 +431,10 @@ class OpenCLDevice : public Device {
int mem_sub_ptr_alignment();
void const_copy_to(const char *name, void *host, size_t size);
- void tex_alloc(device_memory &mem);
- void tex_free(device_memory &mem);
+ void global_alloc(device_memory &mem);
+ void global_free(device_memory &mem);
+ void tex_alloc(device_texture &mem);
+ void tex_free(device_texture &mem);
size_t global_size_round_up(int group_size, int global_size);
void enqueue_kernel(cl_kernel kernel,
@@ -445,6 +450,7 @@ class OpenCLDevice : public Device {
device_ptr rgba_byte,
device_ptr rgba_half);
void shader(DeviceTask &task);
+ void update_adaptive(DeviceTask &task, RenderTile &tile, int sample);
void denoise(RenderTile &tile, DenoisingTask &denoising);
diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/device_opencl_impl.cpp
index 76f9ce7a18f..2766f85d17c 100644
--- a/intern/cycles/device/opencl/opencl_split.cpp
+++ b/intern/cycles/device/opencl/device_opencl_impl.cpp
@@ -16,7 +16,7 @@
#ifdef WITH_OPENCL
-# include "device/opencl/opencl.h"
+# include "device/opencl/device_opencl.h"
# include "kernel/kernel_types.h"
# include "kernel/split/kernel_split_data_types.h"
@@ -56,7 +56,11 @@ static const string SPLIT_BUNDLE_KERNELS =
"enqueue_inactive "
"next_iteration_setup "
"indirect_subsurface "
- "buffer_update";
+ "buffer_update "
+ "adaptive_stopping "
+ "adaptive_filter_x "
+ "adaptive_filter_y "
+ "adaptive_adjust_samples";
const string OpenCLDevice::get_opencl_program_name(const string &kernel_name)
{
@@ -253,16 +257,16 @@ void OpenCLDevice::OpenCLSplitPrograms::load_kernels(
/* Ordered with most complex kernels first, to reduce overall compile time. */
ADD_SPLIT_KERNEL_PROGRAM(subsurface_scatter);
+ ADD_SPLIT_KERNEL_PROGRAM(direct_lighting);
+ ADD_SPLIT_KERNEL_PROGRAM(indirect_background);
if (requested_features.use_volume || is_preview) {
ADD_SPLIT_KERNEL_PROGRAM(do_volume);
}
+ ADD_SPLIT_KERNEL_PROGRAM(shader_eval);
+ ADD_SPLIT_KERNEL_PROGRAM(lamp_emission);
+ ADD_SPLIT_KERNEL_PROGRAM(holdout_emission_blurring_pathtermination_ao);
ADD_SPLIT_KERNEL_PROGRAM(shadow_blocked_dl);
ADD_SPLIT_KERNEL_PROGRAM(shadow_blocked_ao);
- ADD_SPLIT_KERNEL_PROGRAM(holdout_emission_blurring_pathtermination_ao);
- ADD_SPLIT_KERNEL_PROGRAM(lamp_emission);
- ADD_SPLIT_KERNEL_PROGRAM(direct_lighting);
- ADD_SPLIT_KERNEL_PROGRAM(indirect_background);
- ADD_SPLIT_KERNEL_PROGRAM(shader_eval);
/* Quick kernels bundled in a single program to reduce overhead of starting
* Blender processes. */
@@ -283,6 +287,10 @@ void OpenCLDevice::OpenCLSplitPrograms::load_kernels(
ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(next_iteration_setup);
ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(indirect_subsurface);
ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(buffer_update);
+ ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(adaptive_stopping);
+ ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(adaptive_filter_x);
+ ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(adaptive_filter_y);
+ ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(adaptive_adjust_samples);
programs.push_back(&program_split);
# undef ADD_SPLIT_KERNEL_PROGRAM
@@ -605,7 +613,7 @@ OpenCLDevice::OpenCLDevice(DeviceInfo &info, Stats &stats, Profiler &profiler, b
kernel_programs(this),
preview_programs(this),
memory_manager(this),
- texture_info(this, "__texture_info", MEM_TEXTURE)
+ texture_info(this, "__texture_info", MEM_GLOBAL)
{
cpPlatform = NULL;
cdDevice = NULL;
@@ -937,7 +945,7 @@ void OpenCLDevice::mem_alloc(device_memory &mem)
cl_mem_flags mem_flag;
void *mem_ptr = NULL;
- if (mem.type == MEM_READ_ONLY || mem.type == MEM_TEXTURE)
+ if (mem.type == MEM_READ_ONLY || mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL)
mem_flag = CL_MEM_READ_ONLY;
else
mem_flag = CL_MEM_READ_WRITE;
@@ -961,9 +969,13 @@ void OpenCLDevice::mem_alloc(device_memory &mem)
void OpenCLDevice::mem_copy_to(device_memory &mem)
{
- if (mem.type == MEM_TEXTURE) {
- tex_free(mem);
- tex_alloc(mem);
+ if (mem.type == MEM_GLOBAL) {
+ global_free(mem);
+ global_alloc(mem);
+ }
+ else if (mem.type == MEM_TEXTURE) {
+ tex_free((device_texture &)mem);
+ tex_alloc((device_texture &)mem);
}
else {
if (!mem.device_pointer) {
@@ -1069,8 +1081,11 @@ void OpenCLDevice::mem_zero(device_memory &mem)
void OpenCLDevice::mem_free(device_memory &mem)
{
- if (mem.type == MEM_TEXTURE) {
- tex_free(mem);
+ if (mem.type == MEM_GLOBAL) {
+ global_free(mem);
+ }
+ else if (mem.type == MEM_TEXTURE) {
+ tex_free((device_texture &)mem);
}
else {
if (mem.device_pointer) {
@@ -1093,7 +1108,7 @@ int OpenCLDevice::mem_sub_ptr_alignment()
device_ptr OpenCLDevice::mem_alloc_sub_ptr(device_memory &mem, int offset, int size)
{
cl_mem_flags mem_flag;
- if (mem.type == MEM_READ_ONLY || mem.type == MEM_TEXTURE)
+ if (mem.type == MEM_READ_ONLY || mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL)
mem_flag = CL_MEM_READ_ONLY;
else
mem_flag = CL_MEM_READ_WRITE;
@@ -1133,9 +1148,9 @@ void OpenCLDevice::const_copy_to(const char *name, void *host, size_t size)
data->copy_to_device();
}
-void OpenCLDevice::tex_alloc(device_memory &mem)
+void OpenCLDevice::global_alloc(device_memory &mem)
{
- VLOG(1) << "Texture allocate: " << mem.name << ", "
+ VLOG(1) << "Global memory allocate: " << mem.name << ", "
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
<< string_human_readable_size(mem.memory_size()) << ")";
@@ -1147,7 +1162,7 @@ void OpenCLDevice::tex_alloc(device_memory &mem)
textures_need_update = true;
}
-void OpenCLDevice::tex_free(device_memory &mem)
+void OpenCLDevice::global_free(device_memory &mem)
{
if (mem.device_pointer) {
mem.device_pointer = 0;
@@ -1165,6 +1180,25 @@ void OpenCLDevice::tex_free(device_memory &mem)
}
}
+void OpenCLDevice::tex_alloc(device_texture &mem)
+{
+ VLOG(1) << "Texture allocate: " << mem.name << ", "
+ << string_human_readable_number(mem.memory_size()) << " bytes. ("
+ << string_human_readable_size(mem.memory_size()) << ")";
+
+ memory_manager.alloc(mem.name, mem);
+ /* Set the pointer to non-null to keep code that inspects its value from thinking its
+ * unallocated. */
+ mem.device_pointer = 1;
+ textures[mem.name] = &mem;
+ textures_need_update = true;
+}
+
+void OpenCLDevice::tex_free(device_texture &mem)
+{
+ global_free(mem);
+}
+
size_t OpenCLDevice::global_size_round_up(int group_size, int global_size)
{
int r = global_size % group_size;
@@ -1265,10 +1299,10 @@ void OpenCLDevice::flush_texture_buffers()
foreach (TexturesMap::value_type &tex, textures) {
string name = tex.first;
+ device_memory *mem = tex.second;
- if (string_startswith(name, "__tex_image")) {
- int pos = name.rfind("_");
- int id = atoi(name.data() + pos + 1);
+ if (mem->type == MEM_TEXTURE) {
+ const uint id = ((device_texture *)mem)->slot;
texture_slots.push_back(texture_slot_t(name, num_data_slots + id));
num_slots = max(num_slots, num_data_slots + id + 1);
}
@@ -1281,22 +1315,20 @@ void OpenCLDevice::flush_texture_buffers()
/* Fill in descriptors */
foreach (texture_slot_t &slot, texture_slots) {
+ device_memory *mem = textures[slot.name];
TextureInfo &info = texture_info[slot.slot];
MemoryManager::BufferDescriptor desc = memory_manager.get_descriptor(slot.name);
- info.data = desc.offset;
- info.cl_buffer = desc.device_buffer;
-
- if (string_startswith(slot.name, "__tex_image")) {
- device_memory *mem = textures[slot.name];
- info.width = mem->data_width;
- info.height = mem->data_height;
- info.depth = mem->data_depth;
-
- info.interpolation = mem->interpolation;
- info.extension = mem->extension;
+ if (mem->type == MEM_TEXTURE) {
+ info = ((device_texture *)mem)->info;
+ }
+ else {
+ memset(&info, 0, sizeof(TextureInfo));
}
+
+ info.data = desc.offset;
+ info.cl_buffer = desc.device_buffer;
}
/* Force write of descriptors. */
@@ -1308,13 +1340,7 @@ void OpenCLDevice::thread_run(DeviceTask *task)
{
flush_texture_buffers();
- if (task->type == DeviceTask::FILM_CONVERT) {
- film_convert(*task, task->buffer, task->rgba_byte, task->rgba_half);
- }
- else if (task->type == DeviceTask::SHADER) {
- shader(*task);
- }
- else if (task->type == DeviceTask::RENDER) {
+ if (task->type == DeviceTask::RENDER) {
RenderTile tile;
DenoisingTask denoising(this, *task);
@@ -1323,7 +1349,7 @@ void OpenCLDevice::thread_run(DeviceTask *task)
kgbuffer.alloc_to_device(1);
/* Keep rendering tiles until done. */
- while (task->acquire_tile(this, tile)) {
+ while (task->acquire_tile(this, tile, task->tile_types)) {
if (tile.task == RenderTile::PATH_TRACE) {
assert(tile.task == RenderTile::PATH_TRACE);
scoped_timer timer(&tile.buffers->render_time);
@@ -1352,6 +1378,30 @@ void OpenCLDevice::thread_run(DeviceTask *task)
kgbuffer.free();
}
+ else if (task->type == DeviceTask::SHADER) {
+ shader(*task);
+ }
+ else if (task->type == DeviceTask::FILM_CONVERT) {
+ film_convert(*task, task->buffer, task->rgba_byte, task->rgba_half);
+ }
+ else if (task->type == DeviceTask::DENOISE_BUFFER) {
+ RenderTile tile;
+ tile.x = task->x;
+ tile.y = task->y;
+ tile.w = task->w;
+ tile.h = task->h;
+ tile.buffer = task->buffer;
+ tile.sample = task->sample + task->num_samples;
+ tile.num_samples = task->num_samples;
+ tile.start_sample = task->sample;
+ tile.offset = task->offset;
+ tile.stride = task->stride;
+ tile.buffers = task->buffers;
+
+ DenoisingTask denoising(this, *task);
+ denoise(tile, denoising);
+ task->update_progress(&tile, tile.w * tile.h);
+ }
}
void OpenCLDevice::film_convert(DeviceTask &task,
@@ -1846,6 +1896,17 @@ string OpenCLDevice::kernel_build_options(const string *debug_src)
{
string build_options = "-cl-no-signed-zeros -cl-mad-enable ";
+ /* Build with OpenCL 2.0 if available, this improves performance
+ * with AMD OpenCL drivers on Windows and Linux (legacy drivers).
+ * Note that OpenCL selects the highest 1.x version by default,
+ * only for 2.0 do we need the explicit compiler flag. */
+ int version_major, version_minor;
+ if (OpenCLInfo::get_device_version(cdDevice, &version_major, &version_minor)) {
+ if (version_major >= 2) {
+ build_options += "-cl-std=CL2.0 ";
+ }
+ }
+
if (platform_name == "NVIDIA CUDA") {
build_options +=
"-D__KERNEL_OPENCL_NVIDIA__ "
diff --git a/intern/cycles/device/opencl/memory_manager.cpp b/intern/cycles/device/opencl/memory_manager.cpp
index 06d4746a86e..fedb3ea8c6a 100644
--- a/intern/cycles/device/opencl/memory_manager.cpp
+++ b/intern/cycles/device/opencl/memory_manager.cpp
@@ -18,7 +18,7 @@
# include "util/util_foreach.h"
-# include "device/opencl/opencl.h"
+# include "device/opencl/device_opencl.h"
# include "device/opencl/memory_manager.h"
CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/device/opencl/memory_manager.h b/intern/cycles/device/opencl/memory_manager.h
index 2fbc97a0756..23624f837a6 100644
--- a/intern/cycles/device/opencl/memory_manager.h
+++ b/intern/cycles/device/opencl/memory_manager.h
@@ -19,8 +19,8 @@
#include "device/device.h"
#include "util/util_map.h"
-#include "util/util_vector.h"
#include "util/util_string.h"
+#include "util/util_vector.h"
#include "clew.h"
diff --git a/intern/cycles/device/opencl/opencl_util.cpp b/intern/cycles/device/opencl/opencl_util.cpp
index 3eeff31f8c2..b8b07cf2947 100644
--- a/intern/cycles/device/opencl/opencl_util.cpp
+++ b/intern/cycles/device/opencl/opencl_util.cpp
@@ -16,15 +16,16 @@
#ifdef WITH_OPENCL
-# include "device/opencl/opencl.h"
# include "device/device_intern.h"
+# include "device/opencl/device_opencl.h"
# include "util/util_debug.h"
# include "util/util_logging.h"
# include "util/util_md5.h"
# include "util/util_path.h"
-# include "util/util_time.h"
+# include "util/util_semaphore.h"
# include "util/util_system.h"
+# include "util/util_time.h"
using std::cerr;
using std::endl;
@@ -390,8 +391,27 @@ static void escape_python_string(string &str)
string_replace(str, "'", "\'");
}
+static int opencl_compile_process_limit()
+{
+ /* Limit number of concurrent processes compiling, with a heuristic based
+ * on total physical RAM and estimate of memory usage needed when compiling
+ * with all Cycles features enabled.
+ *
+ * This is somewhat arbitrary as we don't know the actual available RAM or
+ * how much the kernel compilation will needed depending on the features, but
+ * better than not limiting at all. */
+ static const int64_t GB = 1024LL * 1024LL * 1024LL;
+ static const int64_t process_memory = 2 * GB;
+ static const int64_t base_memory = 2 * GB;
+ static const int64_t system_memory = system_physical_ram();
+ static const int64_t process_limit = (system_memory - base_memory) / process_memory;
+
+ return max((int)process_limit, 1);
+}
+
bool OpenCLDevice::OpenCLProgram::compile_separate(const string &clbin)
{
+ /* Construct arguments. */
vector<string> args;
args.push_back("--background");
args.push_back("--factory-startup");
@@ -419,14 +439,23 @@ bool OpenCLDevice::OpenCLProgram::compile_separate(const string &clbin)
kernel_file_escaped.c_str(),
clbin_escaped.c_str()));
- double starttime = time_dt();
+ /* Limit number of concurrent processes compiling. */
+ static thread_counting_semaphore semaphore(opencl_compile_process_limit());
+ semaphore.acquire();
+
+ /* Compile. */
+ const double starttime = time_dt();
add_log(string("Cycles: compiling OpenCL program ") + program_name + "...", false);
add_log(string("Build flags: ") + kernel_build_options, true);
- if (!system_call_self(args) || !path_exists(clbin)) {
+ const bool success = system_call_self(args);
+ const double elapsed = time_dt() - starttime;
+
+ semaphore.release();
+
+ if (!success || !path_exists(clbin)) {
return false;
}
- double elapsed = time_dt() - starttime;
add_log(
string_printf("Kernel compilation of %s finished in %.2lfs.", program_name.c_str(), elapsed),
false);
@@ -747,6 +776,10 @@ bool OpenCLInfo::device_supported(const string &platform_name, const cl_device_i
}
VLOG(3) << "OpenCL driver version " << driver_major << "." << driver_minor;
+ if (getenv("CYCLES_OPENCL_TEST")) {
+ return true;
+ }
+
/* It is possible to have Iris GPU on AMD/Apple OpenCL framework
* (aka, it will not be on Intel framework). This isn't supported
* and needs an explicit blacklist.
@@ -806,18 +839,30 @@ bool OpenCLInfo::platform_version_check(cl_platform_id platform, string *error)
return true;
}
-bool OpenCLInfo::device_version_check(cl_device_id device, string *error)
+bool OpenCLInfo::get_device_version(cl_device_id device, int *r_major, int *r_minor, string *error)
{
- const int req_major = 1, req_minor = 1;
- int major, minor;
char version[256];
clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION, sizeof(version), &version, NULL);
- if (sscanf(version, "OpenCL C %d.%d", &major, &minor) < 2) {
+ if (sscanf(version, "OpenCL C %d.%d", r_major, r_minor) < 2) {
if (error != NULL) {
*error = string_printf("OpenCL: failed to parse OpenCL C version string (%s).", version);
}
return false;
}
+ if (error != NULL) {
+ *error = "";
+ }
+ return true;
+}
+
+bool OpenCLInfo::device_version_check(cl_device_id device, string *error)
+{
+ const int req_major = 1, req_minor = 1;
+ int major, minor;
+ if (!get_device_version(device, &major, &minor, error)) {
+ return false;
+ }
+
if (!((major == req_major && minor >= req_minor) || (major > req_major))) {
if (error != NULL) {
*error = string_printf("OpenCL: C version 1.1 or later required, found %d.%d", major, minor);
@@ -858,7 +903,7 @@ string OpenCLInfo::get_hardware_id(const string &platform_name, cl_device_id dev
return "";
}
-void OpenCLInfo::get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices, bool force_all)
+void OpenCLInfo::get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices)
{
const cl_device_type device_type = OpenCLInfo::device_type();
static bool first_time = true;
@@ -924,7 +969,7 @@ void OpenCLInfo::get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices
FIRST_VLOG(2) << "Ignoring device " << device_name << " due to old compiler version.";
continue;
}
- if (force_all || device_supported(platform_name, device_id)) {
+ if (device_supported(platform_name, device_id)) {
cl_device_type device_type;
if (!get_device_type(device_id, &device_type, &error)) {
FIRST_VLOG(2) << "Ignoring device " << device_name
diff --git a/intern/cycles/graph/node.cpp b/intern/cycles/graph/node.cpp
index 4f79a7518dc..1439fb5a407 100644
--- a/intern/cycles/graph/node.cpp
+++ b/intern/cycles/graph/node.cpp
@@ -669,4 +669,14 @@ size_t Node::get_total_size_in_bytes() const
return total_size;
}
+bool Node::is_a(const NodeType *type_)
+{
+ for (const NodeType *base = type; base; base = base->base) {
+ if (base == type_) {
+ return true;
+ }
+ }
+ return false;
+}
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/graph/node.h b/intern/cycles/graph/node.h
index d35a1bb489c..4473b8aca28 100644
--- a/intern/cycles/graph/node.h
+++ b/intern/cycles/graph/node.h
@@ -94,6 +94,9 @@ struct Node {
/* Get total size of this node. */
size_t get_total_size_in_bytes() const;
+ /* Type testing, taking into account base classes. */
+ bool is_a(const NodeType *type);
+
ustring name;
const NodeType *type;
};
diff --git a/intern/cycles/graph/node_type.cpp b/intern/cycles/graph/node_type.cpp
index f46d4e48026..0283ed7c817 100644
--- a/intern/cycles/graph/node_type.cpp
+++ b/intern/cycles/graph/node_type.cpp
@@ -135,8 +135,13 @@ bool SocketType::is_float3(Type type)
/* Node Type */
-NodeType::NodeType(Type type_) : type(type_)
+NodeType::NodeType(Type type, const NodeType *base) : type(type), base(base)
{
+ if (base) {
+ /* Inherit sockets. */
+ inputs = base->inputs;
+ outputs = base->outputs;
+ }
}
NodeType::~NodeType()
@@ -209,7 +214,7 @@ unordered_map<ustring, NodeType, ustringHash> &NodeType::types()
return _types;
}
-NodeType *NodeType::add(const char *name_, CreateFunc create_, Type type_)
+NodeType *NodeType::add(const char *name_, CreateFunc create_, Type type_, const NodeType *base_)
{
ustring name(name_);
@@ -219,7 +224,7 @@ NodeType *NodeType::add(const char *name_, CreateFunc create_, Type type_)
return NULL;
}
- types()[name] = NodeType(type_);
+ types()[name] = NodeType(type_, base_);
NodeType *type = &types()[name];
type->name = name;
diff --git a/intern/cycles/graph/node_type.h b/intern/cycles/graph/node_type.h
index e9496a42658..a79d44b82f3 100644
--- a/intern/cycles/graph/node_type.h
+++ b/intern/cycles/graph/node_type.h
@@ -103,7 +103,7 @@ struct SocketType {
struct NodeType {
enum Type { NONE, SHADER };
- explicit NodeType(Type type = NONE);
+ explicit NodeType(Type type = NONE, const NodeType *base = NULL);
~NodeType();
void register_input(ustring name,
@@ -124,11 +124,15 @@ struct NodeType {
ustring name;
Type type;
+ const NodeType *base;
vector<SocketType, std::allocator<SocketType>> inputs;
vector<SocketType, std::allocator<SocketType>> outputs;
CreateFunc create;
- static NodeType *add(const char *name, CreateFunc create, Type type = NONE);
+ static NodeType *add(const char *name,
+ CreateFunc create,
+ Type type = NONE,
+ const NodeType *base = NULL);
static const NodeType *find(ustring name);
static unordered_map<ustring, NodeType, ustringHash> &types();
};
@@ -148,6 +152,14 @@ struct NodeType {
} \
template<typename T> const NodeType *structname::register_type()
+#define NODE_ABSTRACT_DECLARE \
+ template<typename T> static const NodeType *register_base_type(); \
+ static const NodeType *node_base_type;
+
+#define NODE_ABSTRACT_DEFINE(structname) \
+ const NodeType *structname::node_base_type = structname::register_base_type<structname>(); \
+ template<typename T> const NodeType *structname::register_base_type()
+
/* Sock Definition Macros */
#define SOCKET_OFFSETOF(T, name) (((char *)&(((T *)1)->name)) - (char *)1)
diff --git a/intern/cycles/graph/node_xml.cpp b/intern/cycles/graph/node_xml.cpp
index a96970cc904..d333400cc4a 100644
--- a/intern/cycles/graph/node_xml.cpp
+++ b/intern/cycles/graph/node_xml.cpp
@@ -200,7 +200,7 @@ void xml_read_node(XMLReader &reader, Node *node, xml_node xml_node)
map<ustring, Node *>::iterator it = reader.node_map.find(value);
if (it != reader.node_map.end()) {
Node *value_node = it->second;
- if (value_node->type == *(socket.node_type))
+ if (value_node->is_a(*(socket.node_type)))
node->set(socket, it->second);
}
break;
@@ -215,7 +215,7 @@ void xml_read_node(XMLReader &reader, Node *node, xml_node xml_node)
map<ustring, Node *>::iterator it = reader.node_map.find(ustring(tokens[i]));
if (it != reader.node_map.end()) {
Node *value_node = it->second;
- value[i] = (value_node->type == *(socket.node_type)) ? value_node : NULL;
+ value[i] = (value_node->is_a(*(socket.node_type))) ? value_node : NULL;
}
else {
value[i] = NULL;
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 99172f30b8b..3264b5afea2 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -36,6 +36,10 @@ set(SRC_CUDA_KERNELS
)
set(SRC_OPENCL_KERNELS
+ kernels/opencl/kernel_adaptive_stopping.cl
+ kernels/opencl/kernel_adaptive_filter_x.cl
+ kernels/opencl/kernel_adaptive_filter_y.cl
+ kernels/opencl/kernel_adaptive_adjust_samples.cl
kernels/opencl/kernel_bake.cl
kernels/opencl/kernel_base.cl
kernels/opencl/kernel_displace.cl
@@ -94,6 +98,7 @@ set(SRC_BVH_HEADERS
set(SRC_HEADERS
kernel_accumulate.h
+ kernel_adaptive_sampling.h
kernel_bake.h
kernel_camera.h
kernel_color.h
@@ -228,6 +233,7 @@ set(SRC_SVM_HEADERS
svm/svm_fractal_noise.h
svm/svm_types.h
svm/svm_value.h
+ svm/svm_vector_rotate.h
svm/svm_vector_transform.h
svm/svm_voronoi.h
svm/svm_voxel.h
@@ -323,6 +329,10 @@ set(SRC_UTIL_HEADERS
)
set(SRC_SPLIT_HEADERS
+ split/kernel_adaptive_adjust_samples.h
+ split/kernel_adaptive_filter_x.h
+ split/kernel_adaptive_filter_y.h
+ split/kernel_adaptive_stopping.h
split/kernel_branched.h
split/kernel_buffer_update.h
split/kernel_data_init.h
@@ -442,7 +452,7 @@ if(WITH_CYCLES_CUDA_BINARIES)
endif()
add_custom_command(
- OUTPUT ${cuda_cubin}
+ OUTPUT ${cuda_file}
COMMAND ${CUBIN_CC_ENV}
"$<TARGET_FILE:cycles_cubin_cc>"
-target ${CUDA_ARCH}
@@ -451,7 +461,6 @@ if(WITH_CYCLES_CUDA_BINARIES)
-v
-cuda-toolkit-dir "${CUDA_TOOLKIT_ROOT_DIR}"
DEPENDS ${kernel_sources} cycles_cubin_cc)
- set(cuda_file ${cuda_cubin})
else()
add_custom_command(
OUTPUT ${cuda_file}
@@ -507,7 +516,6 @@ if(WITH_CYCLES_DEVICE_OPTIX)
-I "${OPTIX_INCLUDE_DIR}"
-I "${CMAKE_CURRENT_SOURCE_DIR}/.."
-I "${CMAKE_CURRENT_SOURCE_DIR}/kernels/cuda"
- -arch=sm_30
--use_fast_math
-o ${output})
@@ -515,25 +523,62 @@ if(WITH_CYCLES_DEVICE_OPTIX)
set(cuda_flags ${cuda_flags}
-D __KERNEL_DEBUG__)
endif()
+ if(WITH_CYCLES_CUBIN_COMPILER)
- add_custom_command(
- OUTPUT
- ${output}
- DEPENDS
- ${input}
- ${SRC_HEADERS}
- ${SRC_KERNELS_CUDA_HEADERS}
- ${SRC_KERNELS_OPTIX_HEADERS}
- ${SRC_BVH_HEADERS}
- ${SRC_SVM_HEADERS}
- ${SRC_GEOM_HEADERS}
- ${SRC_CLOSURE_HEADERS}
- ${SRC_UTIL_HEADERS}
- COMMAND
- ${CUDA_NVCC_EXECUTABLE} --ptx ${cuda_flags} ${input}
- WORKING_DIRECTORY
- "${CMAKE_CURRENT_SOURCE_DIR}")
+ # Needed to find libnvrtc-builtins.so. Can't do it from inside
+ # cycles_cubin_cc since the env variable is read before main()
+ if(APPLE)
+ set(CUBIN_CC_ENV ${CMAKE_COMMAND}
+ -E env DYLD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib")
+ elseif(UNIX)
+ set(CUBIN_CC_ENV ${CMAKE_COMMAND}
+ -E env LD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib64")
+ endif()
+ add_custom_command(
+ OUTPUT ${output}
+ DEPENDS
+ ${input}
+ ${SRC_HEADERS}
+ ${SRC_KERNELS_CUDA_HEADERS}
+ ${SRC_KERNELS_OPTIX_HEADERS}
+ ${SRC_BVH_HEADERS}
+ ${SRC_SVM_HEADERS}
+ ${SRC_GEOM_HEADERS}
+ ${SRC_CLOSURE_HEADERS}
+ ${SRC_UTIL_HEADERS}
+ COMMAND ${CUBIN_CC_ENV}
+ "$<TARGET_FILE:cycles_cubin_cc>"
+ -target 30
+ -ptx
+ -i ${CMAKE_CURRENT_SOURCE_DIR}/${input}
+ ${cuda_flags}
+ -v
+ -cuda-toolkit-dir "${CUDA_TOOLKIT_ROOT_DIR}"
+ DEPENDS ${kernel_sources} cycles_cubin_cc)
+ else()
+ add_custom_command(
+ OUTPUT
+ ${output}
+ DEPENDS
+ ${input}
+ ${SRC_HEADERS}
+ ${SRC_KERNELS_CUDA_HEADERS}
+ ${SRC_KERNELS_OPTIX_HEADERS}
+ ${SRC_BVH_HEADERS}
+ ${SRC_SVM_HEADERS}
+ ${SRC_GEOM_HEADERS}
+ ${SRC_CLOSURE_HEADERS}
+ ${SRC_UTIL_HEADERS}
+ COMMAND
+ ${CUDA_NVCC_EXECUTABLE}
+ --ptx
+ -arch=sm_30
+ ${cuda_flags}
+ ${input}
+ WORKING_DIRECTORY
+ "${CMAKE_CURRENT_SOURCE_DIR}")
+ endif()
list(APPEND optix_ptx ${output})
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${output}" ${CYCLES_INSTALL_PATH}/lib)
diff --git a/intern/cycles/kernel/bvh/bvh.h b/intern/cycles/kernel/bvh/bvh.h
index 8e17ab9af7a..b3992c03a9a 100644
--- a/intern/cycles/kernel/bvh/bvh.h
+++ b/intern/cycles/kernel/bvh/bvh.h
@@ -336,7 +336,9 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals *kg,
ctx.lcg_state = lcg_state;
ctx.max_hits = max_hits;
ctx.local_isect = local_isect;
- local_isect->num_hits = 0;
+ if (local_isect) {
+ local_isect->num_hits = 0;
+ }
ctx.local_object_id = local_object;
IntersectContext rtc_ctx(&ctx);
RTCRay rtc_ray;
@@ -373,7 +375,9 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals *kg,
rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray);
}
- return local_isect->num_hits > 0;
+ /* rtcOccluded1 sets tfar to -inf if a hit was found. */
+ return (local_isect && local_isect->num_hits > 0) || (rtc_ray.tfar < 0);
+ ;
}
# endif /* __EMBREE__ */
@@ -439,7 +443,7 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg,
ctx.num_hits = 0;
IntersectContext rtc_ctx(&ctx);
RTCRay rtc_ray;
- kernel_embree_setup_ray(*ray, rtc_ray, PATH_RAY_SHADOW);
+ kernel_embree_setup_ray(*ray, rtc_ray, visibility);
rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray);
if (ctx.num_hits > max_hits) {
diff --git a/intern/cycles/kernel/bvh/bvh_embree.h b/intern/cycles/kernel/bvh/bvh_embree.h
index ffea7d37440..ca637288bee 100644
--- a/intern/cycles/kernel/bvh/bvh_embree.h
+++ b/intern/cycles/kernel/bvh/bvh_embree.h
@@ -17,9 +17,12 @@
#include <embree3/rtcore_ray.h>
#include <embree3/rtcore_scene.h>
+// clang-format off
#include "kernel/kernel_compat_cpu.h"
#include "kernel/split/kernel_split_data_types.h"
#include "kernel/kernel_globals.h"
+// clang-format on
+
#include "util/util_vector.h"
CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/kernel/closure/bsdf.h b/intern/cycles/kernel/closure/bsdf.h
index b282bf5a350..0a9631ad931 100644
--- a/intern/cycles/kernel/closure/bsdf.h
+++ b/intern/cycles/kernel/closure/bsdf.h
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+// clang-format off
#include "kernel/closure/bsdf_ashikhmin_velvet.h"
#include "kernel/closure/bsdf_diffuse.h"
#include "kernel/closure/bsdf_oren_nayar.h"
@@ -32,6 +33,7 @@
#include "kernel/closure/bsdf_principled_sheen.h"
#include "kernel/closure/bssrdf.h"
#include "kernel/closure/volume.h"
+// clang-format on
CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/kernel/closure/bsdf_hair_principled.h b/intern/cycles/kernel/closure/bsdf_hair_principled.h
index 4db5a6cc830..f78bbeb5d9d 100644
--- a/intern/cycles/kernel/closure/bsdf_hair_principled.h
+++ b/intern/cycles/kernel/closure/bsdf_hair_principled.h
@@ -493,6 +493,36 @@ ccl_device void bsdf_principled_hair_blur(ShaderClosure *sc, float roughness)
bsdf->m0_roughness = fmaxf(roughness, bsdf->m0_roughness);
}
+/* Hair Albedo */
+
+ccl_device_inline float bsdf_principled_hair_albedo_roughness_scale(
+ const float azimuthal_roughness)
+{
+ const float x = azimuthal_roughness;
+ return (((((0.245f * x) + 5.574f) * x - 10.73f) * x + 2.532f) * x - 0.215f) * x + 5.969f;
+}
+
+ccl_device float3 bsdf_principled_hair_albedo(ShaderClosure *sc)
+{
+ PrincipledHairBSDF *bsdf = (PrincipledHairBSDF *)sc;
+ return exp3(-sqrt(bsdf->sigma) * bsdf_principled_hair_albedo_roughness_scale(bsdf->v));
+}
+
+ccl_device_inline float3
+bsdf_principled_hair_sigma_from_reflectance(const float3 color, const float azimuthal_roughness)
+{
+ const float3 sigma = log3(color) /
+ bsdf_principled_hair_albedo_roughness_scale(azimuthal_roughness);
+ return sigma * sigma;
+}
+
+ccl_device_inline float3 bsdf_principled_hair_sigma_from_concentration(const float eumelanin,
+ const float pheomelanin)
+{
+ return eumelanin * make_float3(0.506f, 0.841f, 1.653f) +
+ pheomelanin * make_float3(0.343f, 0.733f, 1.924f);
+}
+
CCL_NAMESPACE_END
#endif /* __BSDF_HAIR_PRINCIPLED_H__ */
diff --git a/intern/cycles/kernel/filter/filter_features_sse.h b/intern/cycles/kernel/filter/filter_features_sse.h
index 7bbd17066fd..59d4ace2bef 100644
--- a/intern/cycles/kernel/filter/filter_features_sse.h
+++ b/intern/cycles/kernel/filter/filter_features_sse.h
@@ -109,7 +109,6 @@ ccl_device_inline void filter_calculate_scale_sse(float4 *scale, bool use_time)
scale[2] = rcp(max(reduce_max(scale[2]), make_float4(0.01f)));
if (use_time) {
scale[10] = rcp(max(reduce_max(scale[6]), make_float4(0.01f)));
- ;
}
scale[6] = rcp(max(reduce_max(scale[4]), make_float4(0.01f)));
scale[7] = scale[8] = scale[9] = rcp(max(reduce_max(sqrt(scale[5])), make_float4(0.01f)));
diff --git a/intern/cycles/kernel/geom/geom.h b/intern/cycles/kernel/geom/geom.h
index e81c1b781c8..5ff4d5f7053 100644
--- a/intern/cycles/kernel/geom/geom.h
+++ b/intern/cycles/kernel/geom/geom.h
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+// clang-format off
#include "kernel/geom/geom_attribute.h"
#include "kernel/geom/geom_object.h"
#ifdef __PATCH_EVAL__
@@ -30,3 +31,4 @@
#include "kernel/geom/geom_curve_intersect.h"
#include "kernel/geom/geom_volume.h"
#include "kernel/geom/geom_primitive.h"
+// clang-format on
diff --git a/intern/cycles/kernel/geom/geom_attribute.h b/intern/cycles/kernel/geom/geom_attribute.h
index 456608bfa22..e1b0e6fb81c 100644
--- a/intern/cycles/kernel/geom/geom_attribute.h
+++ b/intern/cycles/kernel/geom/geom_attribute.h
@@ -29,17 +29,11 @@ ccl_device_inline uint subd_triangle_patch(KernelGlobals *kg, const ShaderData *
ccl_device_inline uint attribute_primitive_type(KernelGlobals *kg, const ShaderData *sd)
{
-#ifdef __HAIR__
- if (sd->type & PRIMITIVE_ALL_CURVE) {
- return ATTR_PRIM_CURVE;
- }
- else
-#endif
- if (subd_triangle_patch(kg, sd) != ~0) {
+ if ((sd->type & PRIMITIVE_ALL_TRIANGLE) && subd_triangle_patch(kg, sd) != ~0) {
return ATTR_PRIM_SUBD;
}
else {
- return ATTR_PRIM_TRIANGLE;
+ return ATTR_PRIM_GEOMETRY;
}
}
diff --git a/intern/cycles/kernel/geom/geom_curve.h b/intern/cycles/kernel/geom/geom_curve.h
index e0aacb434eb..928cad58452 100644
--- a/intern/cycles/kernel/geom/geom_curve.h
+++ b/intern/cycles/kernel/geom/geom_curve.h
@@ -83,6 +83,16 @@ ccl_device float curve_attribute_float(
return (1.0f - sd->u) * f0 + sd->u * f1;
}
+ else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) {
+# ifdef __RAY_DIFFERENTIALS__
+ if (dx)
+ *dx = 0.0f;
+ if (dy)
+ *dy = 0.0f;
+# endif
+
+ return kernel_tex_fetch(__attributes_float, desc.offset);
+ }
else {
# ifdef __RAY_DIFFERENTIALS__
if (dx)
@@ -133,6 +143,16 @@ ccl_device float2 curve_attribute_float2(KernelGlobals *kg,
return (1.0f - sd->u) * f0 + sd->u * f1;
}
+ else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) {
+# ifdef __RAY_DIFFERENTIALS__
+ if (dx)
+ *dx = make_float2(0.0f, 0.0f);
+ if (dy)
+ *dy = make_float2(0.0f, 0.0f);
+# endif
+
+ return kernel_tex_fetch(__attributes_float2, desc.offset);
+ }
else {
# ifdef __RAY_DIFFERENTIALS__
if (dx)
@@ -183,6 +203,16 @@ ccl_device float3 curve_attribute_float3(KernelGlobals *kg,
return (1.0f - sd->u) * f0 + sd->u * f1;
}
+ else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) {
+# ifdef __RAY_DIFFERENTIALS__
+ if (dx)
+ *dx = make_float3(0.0f, 0.0f, 0.0f);
+ if (dy)
+ *dy = make_float3(0.0f, 0.0f, 0.0f);
+# endif
+
+ return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset));
+ }
else {
# ifdef __RAY_DIFFERENTIALS__
if (dx)
diff --git a/intern/cycles/kernel/geom/geom_motion_curve.h b/intern/cycles/kernel/geom/geom_motion_curve.h
index 7380c506bf4..0e2a00e9d2e 100644
--- a/intern/cycles/kernel/geom/geom_motion_curve.h
+++ b/intern/cycles/kernel/geom/geom_motion_curve.h
@@ -36,7 +36,7 @@ ccl_device_inline int find_attribute_curve_motion(KernelGlobals *kg,
* zero iterations and rendering is really slow with motion curves. For until other
* areas are speed up it's probably not so crucial to optimize this out.
*/
- uint attr_offset = object_attribute_map_offset(kg, object) + ATTR_PRIM_CURVE;
+ uint attr_offset = object_attribute_map_offset(kg, object) + ATTR_PRIM_GEOMETRY;
uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
while (attr_map.x != id) {
diff --git a/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h b/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h
index 49d4829af38..859d919f0bb 100644
--- a/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h
+++ b/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h
@@ -103,17 +103,21 @@ ccl_device_inline
const Ray *ray,
float3 verts[3])
{
+# ifdef __KERNEL_OPTIX__
+ /* isect->t is always in world space with OptiX. */
+ return motion_triangle_refine(kg, sd, isect, ray, verts);
+# else
float3 P = ray->P;
float3 D = ray->D;
float t = isect->t;
-# ifdef __INTERSECTION_REFINE__
+# ifdef __INTERSECTION_REFINE__
if (isect->object != OBJECT_NONE) {
-# ifdef __OBJECT_MOTION__
+# ifdef __OBJECT_MOTION__
Transform tfm = sd->ob_itfm;
-# else
+# else
Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
-# endif
+# endif
P = transform_point(&tfm, P);
D = transform_direction(&tfm, D);
@@ -135,19 +139,20 @@ ccl_device_inline
P = P + D * rt;
if (isect->object != OBJECT_NONE) {
-# ifdef __OBJECT_MOTION__
+# ifdef __OBJECT_MOTION__
Transform tfm = sd->ob_tfm;
-# else
+# else
Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
-# endif
+# endif
P = transform_point(&tfm, P);
}
return P;
-# else /* __INTERSECTION_REFINE__ */
+# else /* __INTERSECTION_REFINE__ */
return P + D * t;
-# endif /* __INTERSECTION_REFINE__ */
+# endif /* __INTERSECTION_REFINE__ */
+# endif
}
#endif /* __BVH_LOCAL__ */
diff --git a/intern/cycles/kernel/geom/geom_object.h b/intern/cycles/kernel/geom/geom_object.h
index af4e6fbd89b..3aa68e1f84e 100644
--- a/intern/cycles/kernel/geom/geom_object.h
+++ b/intern/cycles/kernel/geom/geom_object.h
@@ -81,13 +81,7 @@ ccl_device_inline Transform object_fetch_transform_motion(KernelGlobals *kg,
const uint num_steps = kernel_tex_fetch(__objects, object).numsteps * 2 + 1;
Transform tfm;
-# ifdef __EMBREE__
- if (kernel_data.bvh.scene) {
- transform_motion_array_interpolate_straight(&tfm, motion, num_steps, time);
- }
- else
-# endif
- transform_motion_array_interpolate(&tfm, motion, num_steps, time);
+ transform_motion_array_interpolate(&tfm, motion, num_steps, time);
return tfm;
}
@@ -326,6 +320,26 @@ ccl_device_inline uint object_patch_map_offset(KernelGlobals *kg, int object)
return kernel_tex_fetch(__objects, object).patch_map_offset;
}
+/* Volume step size */
+
+ccl_device_inline float object_volume_density(KernelGlobals *kg, int object)
+{
+ if (object == OBJECT_NONE) {
+ return 1.0f;
+ }
+
+ return kernel_tex_fetch(__objects, object).surface_area;
+}
+
+ccl_device_inline float object_volume_step_size(KernelGlobals *kg, int object)
+{
+ if (object == OBJECT_NONE) {
+ return kernel_data.background.volume_step_size;
+ }
+
+ return kernel_tex_fetch(__object_volume_step, object);
+}
+
/* Pass ID for shader */
ccl_device int shader_pass_id(KernelGlobals *kg, const ShaderData *sd)
diff --git a/intern/cycles/kernel/geom/geom_subd_triangle.h b/intern/cycles/kernel/geom/geom_subd_triangle.h
index 81bac6e6ee1..3eef9857ae3 100644
--- a/intern/cycles/kernel/geom/geom_subd_triangle.h
+++ b/intern/cycles/kernel/geom/geom_subd_triangle.h
@@ -217,6 +217,14 @@ ccl_device_noinline float subd_triangle_attribute_float(
return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
}
+ else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) {
+ if (dx)
+ *dx = 0.0f;
+ if (dy)
+ *dy = 0.0f;
+
+ return kernel_tex_fetch(__attributes_float, desc.offset);
+ }
else {
if (dx)
*dx = 0.0f;
@@ -352,6 +360,14 @@ ccl_device_noinline float2 subd_triangle_attribute_float2(KernelGlobals *kg,
return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
}
+ else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) {
+ if (dx)
+ *dx = make_float2(0.0f, 0.0f);
+ if (dy)
+ *dy = make_float2(0.0f, 0.0f);
+
+ return kernel_tex_fetch(__attributes_float2, desc.offset);
+ }
else {
if (dx)
*dx = make_float2(0.0f, 0.0f);
@@ -486,6 +502,14 @@ ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals *kg,
return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
}
+ else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) {
+ if (dx)
+ *dx = make_float3(0.0f, 0.0f, 0.0f);
+ if (dy)
+ *dy = make_float3(0.0f, 0.0f, 0.0f);
+
+ return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset));
+ }
else {
if (dx)
*dx = make_float3(0.0f, 0.0f, 0.0f);
@@ -584,6 +608,14 @@ ccl_device_noinline float4 subd_triangle_attribute_float4(KernelGlobals *kg,
return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
}
+ else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) {
+ if (dx)
+ *dx = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ if (dy)
+ *dy = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+
+ return color_uchar4_to_float4(kernel_tex_fetch(__attributes_uchar4, desc.offset));
+ }
else {
if (dx)
*dx = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
diff --git a/intern/cycles/kernel/geom/geom_triangle.h b/intern/cycles/kernel/geom/geom_triangle.h
index fdb7f655f64..a2731bf2bd0 100644
--- a/intern/cycles/kernel/geom/geom_triangle.h
+++ b/intern/cycles/kernel/geom/geom_triangle.h
@@ -153,6 +153,14 @@ ccl_device float triangle_attribute_float(
return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2;
}
+ else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) {
+ if (dx)
+ *dx = 0.0f;
+ if (dy)
+ *dy = 0.0f;
+
+ return kernel_tex_fetch(__attributes_float, desc.offset);
+ }
else {
if (dx)
*dx = 0.0f;
@@ -212,6 +220,14 @@ ccl_device float2 triangle_attribute_float2(KernelGlobals *kg,
return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2;
}
+ else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) {
+ if (dx)
+ *dx = make_float2(0.0f, 0.0f);
+ if (dy)
+ *dy = make_float2(0.0f, 0.0f);
+
+ return kernel_tex_fetch(__attributes_float2, desc.offset);
+ }
else {
if (dx)
*dx = make_float2(0.0f, 0.0f);
@@ -272,6 +288,14 @@ ccl_device float3 triangle_attribute_float3(KernelGlobals *kg,
return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2;
}
+ else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) {
+ if (dx)
+ *dx = make_float3(0.0f, 0.0f, 0.0f);
+ if (dy)
+ *dy = make_float3(0.0f, 0.0f, 0.0f);
+
+ return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset));
+ }
else {
if (dx)
*dx = make_float3(0.0f, 0.0f, 0.0f);
@@ -304,6 +328,14 @@ ccl_device float4 triangle_attribute_float4(KernelGlobals *kg,
return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2;
}
+ else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) {
+ if (dx)
+ *dx = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ if (dy)
+ *dy = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+
+ return color_uchar4_to_float4(kernel_tex_fetch(__attributes_uchar4, desc.offset));
+ }
else {
if (dx)
*dx = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
diff --git a/intern/cycles/kernel/geom/geom_triangle_intersect.h b/intern/cycles/kernel/geom/geom_triangle_intersect.h
index 68075199402..6604806f73b 100644
--- a/intern/cycles/kernel/geom/geom_triangle_intersect.h
+++ b/intern/cycles/kernel/geom/geom_triangle_intersect.h
@@ -690,16 +690,20 @@ ccl_device_inline float3 triangle_refine_local(KernelGlobals *kg,
const Intersection *isect,
const Ray *ray)
{
+#ifdef __KERNEL_OPTIX__
+ /* isect->t is always in world space with OptiX. */
+ return triangle_refine(kg, sd, isect, ray);
+#else
float3 P = ray->P;
float3 D = ray->D;
float t = isect->t;
if (isect->object != OBJECT_NONE) {
-#ifdef __OBJECT_MOTION__
+# ifdef __OBJECT_MOTION__
Transform tfm = sd->ob_itfm;
-#else
+# else
Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
-#endif
+# endif
P = transform_point(&tfm, P);
D = transform_direction(&tfm, D);
@@ -708,7 +712,7 @@ ccl_device_inline float3 triangle_refine_local(KernelGlobals *kg,
P = P + D * t;
-#ifdef __INTERSECTION_REFINE__
+# ifdef __INTERSECTION_REFINE__
const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, isect->prim);
const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0),
tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1),
@@ -728,19 +732,20 @@ ccl_device_inline float3 triangle_refine_local(KernelGlobals *kg,
float rt = dot(edge2, qvec) / det;
P = P + D * rt;
}
-#endif /* __INTERSECTION_REFINE__ */
+# endif /* __INTERSECTION_REFINE__ */
if (isect->object != OBJECT_NONE) {
-#ifdef __OBJECT_MOTION__
+# ifdef __OBJECT_MOTION__
Transform tfm = sd->ob_tfm;
-#else
+# else
Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
-#endif
+# endif
P = transform_point(&tfm, P);
}
return P;
+#endif
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_volume.h b/intern/cycles/kernel/geom/geom_volume.h
index 96cf35a40dc..f43a7841b46 100644
--- a/intern/cycles/kernel/geom/geom_volume.h
+++ b/intern/cycles/kernel/geom/geom_volume.h
@@ -51,10 +51,14 @@ ccl_device float volume_attribute_float(KernelGlobals *kg,
const ShaderData *sd,
const AttributeDescriptor desc)
{
- float3 P = volume_normalized_position(kg, sd, sd->P);
+ /* todo: optimize this so we don't have to transform both here and in
+ * kernel_tex_image_interp_3d when possible. Also could optimize for the
+ * common case where transform is translation/scale only. */
+ float3 P = sd->P;
+ object_inverse_position_transform(kg, sd, &P);
InterpolationType interp = (sd->flag & SD_VOLUME_CUBIC) ? INTERPOLATION_CUBIC :
INTERPOLATION_NONE;
- float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z, interp);
+ float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P, interp);
return average(float4_to_float3(r));
}
@@ -62,10 +66,11 @@ ccl_device float3 volume_attribute_float3(KernelGlobals *kg,
const ShaderData *sd,
const AttributeDescriptor desc)
{
- float3 P = volume_normalized_position(kg, sd, sd->P);
+ float3 P = sd->P;
+ object_inverse_position_transform(kg, sd, &P);
InterpolationType interp = (sd->flag & SD_VOLUME_CUBIC) ? INTERPOLATION_CUBIC :
INTERPOLATION_NONE;
- float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z, interp);
+ float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P, interp);
if (r.w > 1e-6f && r.w != 1.0f) {
/* For RGBA colors, unpremultiply after interpolation. */
diff --git a/intern/cycles/kernel/kernel.h b/intern/cycles/kernel/kernel.h
index dfdd8843f29..b907c6a2bac 100644
--- a/intern/cycles/kernel/kernel.h
+++ b/intern/cycles/kernel/kernel.h
@@ -19,8 +19,8 @@
/* CPU Kernel Interface */
-#include "util/util_types.h"
#include "kernel/kernel_types.h"
+#include "util/util_types.h"
CCL_NAMESPACE_BEGIN
@@ -38,7 +38,7 @@ void *kernel_osl_memory(KernelGlobals *kg);
bool kernel_osl_use(KernelGlobals *kg);
void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t size);
-void kernel_tex_copy(KernelGlobals *kg, const char *name, void *mem, size_t size);
+void kernel_global_memory_copy(KernelGlobals *kg, const char *name, void *mem, size_t size);
#define KERNEL_ARCH cpu
#include "kernel/kernels/cpu/kernel_cpu.h"
diff --git a/intern/cycles/kernel/kernel_accumulate.h b/intern/cycles/kernel/kernel_accumulate.h
index 606c288649a..79ea03f4f6f 100644
--- a/intern/cycles/kernel/kernel_accumulate.h
+++ b/intern/cycles/kernel/kernel_accumulate.h
@@ -36,21 +36,18 @@ ccl_device_inline void bsdf_eval_init(BsdfEval *eval,
eval->glossy = make_float3(0.0f, 0.0f, 0.0f);
eval->transmission = make_float3(0.0f, 0.0f, 0.0f);
eval->transparent = make_float3(0.0f, 0.0f, 0.0f);
- eval->subsurface = make_float3(0.0f, 0.0f, 0.0f);
- eval->scatter = make_float3(0.0f, 0.0f, 0.0f);
+ eval->volume = make_float3(0.0f, 0.0f, 0.0f);
if (type == CLOSURE_BSDF_TRANSPARENT_ID)
eval->transparent = value;
- else if (CLOSURE_IS_BSDF_DIFFUSE(type))
+ else if (CLOSURE_IS_BSDF_DIFFUSE(type) || CLOSURE_IS_BSDF_BSSRDF(type))
eval->diffuse = value;
else if (CLOSURE_IS_BSDF_GLOSSY(type))
eval->glossy = value;
else if (CLOSURE_IS_BSDF_TRANSMISSION(type))
eval->transmission = value;
- else if (CLOSURE_IS_BSDF_BSSRDF(type))
- eval->subsurface = value;
else if (CLOSURE_IS_PHASE(type))
- eval->scatter = value;
+ eval->volume = value;
}
else
#endif
@@ -73,16 +70,14 @@ ccl_device_inline void bsdf_eval_accum(BsdfEval *eval,
value *= mis_weight;
#ifdef __PASSES__
if (eval->use_light_pass) {
- if (CLOSURE_IS_BSDF_DIFFUSE(type))
+ if (CLOSURE_IS_BSDF_DIFFUSE(type) || CLOSURE_IS_BSDF_BSSRDF(type))
eval->diffuse += value;
else if (CLOSURE_IS_BSDF_GLOSSY(type))
eval->glossy += value;
else if (CLOSURE_IS_BSDF_TRANSMISSION(type))
eval->transmission += value;
- else if (CLOSURE_IS_BSDF_BSSRDF(type))
- eval->subsurface += value;
else if (CLOSURE_IS_PHASE(type))
- eval->scatter += value;
+ eval->volume += value;
/* skipping transparent, this function is used by for eval(), will be zero then */
}
@@ -98,7 +93,7 @@ ccl_device_inline bool bsdf_eval_is_zero(BsdfEval *eval)
#ifdef __PASSES__
if (eval->use_light_pass) {
return is_zero(eval->diffuse) && is_zero(eval->glossy) && is_zero(eval->transmission) &&
- is_zero(eval->transparent) && is_zero(eval->subsurface) && is_zero(eval->scatter);
+ is_zero(eval->transparent) && is_zero(eval->volume);
}
else
#endif
@@ -114,8 +109,7 @@ ccl_device_inline void bsdf_eval_mis(BsdfEval *eval, float value)
eval->diffuse *= value;
eval->glossy *= value;
eval->transmission *= value;
- eval->subsurface *= value;
- eval->scatter *= value;
+ eval->volume *= value;
/* skipping transparent, this function is used by for eval(), will be zero then */
}
@@ -144,8 +138,7 @@ ccl_device_inline void bsdf_eval_mul3(BsdfEval *eval, float3 value)
eval->diffuse *= value;
eval->glossy *= value;
eval->transmission *= value;
- eval->subsurface *= value;
- eval->scatter *= value;
+ eval->volume *= value;
/* skipping transparent, this function is used by for eval(), will be zero then */
}
@@ -160,7 +153,7 @@ ccl_device_inline float3 bsdf_eval_sum(const BsdfEval *eval)
{
#ifdef __PASSES__
if (eval->use_light_pass) {
- return eval->diffuse + eval->glossy + eval->transmission + eval->subsurface + eval->scatter;
+ return eval->diffuse + eval->glossy + eval->transmission + eval->volume;
}
else
#endif
@@ -187,19 +180,16 @@ ccl_device_inline void path_radiance_init(KernelGlobals *kg, PathRadiance *L)
L->color_diffuse = make_float3(0.0f, 0.0f, 0.0f);
L->color_glossy = make_float3(0.0f, 0.0f, 0.0f);
L->color_transmission = make_float3(0.0f, 0.0f, 0.0f);
- L->color_subsurface = make_float3(0.0f, 0.0f, 0.0f);
L->direct_diffuse = make_float3(0.0f, 0.0f, 0.0f);
L->direct_glossy = make_float3(0.0f, 0.0f, 0.0f);
L->direct_transmission = make_float3(0.0f, 0.0f, 0.0f);
- L->direct_subsurface = make_float3(0.0f, 0.0f, 0.0f);
- L->direct_scatter = make_float3(0.0f, 0.0f, 0.0f);
+ L->direct_volume = make_float3(0.0f, 0.0f, 0.0f);
L->indirect_diffuse = make_float3(0.0f, 0.0f, 0.0f);
L->indirect_glossy = make_float3(0.0f, 0.0f, 0.0f);
L->indirect_transmission = make_float3(0.0f, 0.0f, 0.0f);
- L->indirect_subsurface = make_float3(0.0f, 0.0f, 0.0f);
- L->indirect_scatter = make_float3(0.0f, 0.0f, 0.0f);
+ L->indirect_volume = make_float3(0.0f, 0.0f, 0.0f);
L->transparent = 0.0f;
L->emission = make_float3(0.0f, 0.0f, 0.0f);
@@ -211,8 +201,7 @@ ccl_device_inline void path_radiance_init(KernelGlobals *kg, PathRadiance *L)
L->state.diffuse = make_float3(0.0f, 0.0f, 0.0f);
L->state.glossy = make_float3(0.0f, 0.0f, 0.0f);
L->state.transmission = make_float3(0.0f, 0.0f, 0.0f);
- L->state.subsurface = make_float3(0.0f, 0.0f, 0.0f);
- L->state.scatter = make_float3(0.0f, 0.0f, 0.0f);
+ L->state.volume = make_float3(0.0f, 0.0f, 0.0f);
L->state.direct = make_float3(0.0f, 0.0f, 0.0f);
}
else
@@ -264,11 +253,9 @@ ccl_device_inline void path_radiance_bsdf_bounce(KernelGlobals *kg,
L_state->diffuse = bsdf_eval->diffuse * value;
L_state->glossy = bsdf_eval->glossy * value;
L_state->transmission = bsdf_eval->transmission * value;
- L_state->subsurface = bsdf_eval->subsurface * value;
- L_state->scatter = bsdf_eval->scatter * value;
+ L_state->volume = bsdf_eval->volume * value;
- *throughput = L_state->diffuse + L_state->glossy + L_state->transmission +
- L_state->subsurface + L_state->scatter;
+ *throughput = L_state->diffuse + L_state->glossy + L_state->transmission + L_state->volume;
L_state->direct = *throughput;
}
@@ -449,8 +436,7 @@ ccl_device_inline void path_radiance_accum_light(KernelGlobals *kg,
L->direct_diffuse += shaded_throughput * bsdf_eval->diffuse;
L->direct_glossy += shaded_throughput * bsdf_eval->glossy;
L->direct_transmission += shaded_throughput * bsdf_eval->transmission;
- L->direct_subsurface += shaded_throughput * bsdf_eval->subsurface;
- L->direct_scatter += shaded_throughput * bsdf_eval->scatter;
+ L->direct_volume += shaded_throughput * bsdf_eval->volume;
if (is_lamp) {
L->shadow.x += shadow.x * shadow_fac;
@@ -528,7 +514,8 @@ ccl_device_inline void path_radiance_accum_background(KernelGlobals *kg,
}
#ifdef __DENOISING_FEATURES__
- L->denoising_albedo += state->denoising_feature_weight * value;
+ L->denoising_albedo += state->denoising_feature_weight * state->denoising_feature_throughput *
+ value;
#endif /* __DENOISING_FEATURES__ */
}
@@ -561,15 +548,13 @@ ccl_device_inline void path_radiance_sum_indirect(PathRadiance *L)
L->direct_diffuse += L->state.diffuse * L->direct_emission;
L->direct_glossy += L->state.glossy * L->direct_emission;
L->direct_transmission += L->state.transmission * L->direct_emission;
- L->direct_subsurface += L->state.subsurface * L->direct_emission;
- L->direct_scatter += L->state.scatter * L->direct_emission;
+ L->direct_volume += L->state.volume * L->direct_emission;
L->indirect = safe_divide_color(L->indirect, L->state.direct);
L->indirect_diffuse += L->state.diffuse * L->indirect;
L->indirect_glossy += L->state.glossy * L->indirect;
L->indirect_transmission += L->state.transmission * L->indirect;
- L->indirect_subsurface += L->state.subsurface * L->indirect;
- L->indirect_scatter += L->state.scatter * L->indirect;
+ L->indirect_volume += L->state.volume * L->indirect;
}
#endif
}
@@ -581,8 +566,7 @@ ccl_device_inline void path_radiance_reset_indirect(PathRadiance *L)
L->state.diffuse = make_float3(0.0f, 0.0f, 0.0f);
L->state.glossy = make_float3(0.0f, 0.0f, 0.0f);
L->state.transmission = make_float3(0.0f, 0.0f, 0.0f);
- L->state.subsurface = make_float3(0.0f, 0.0f, 0.0f);
- L->state.scatter = make_float3(0.0f, 0.0f, 0.0f);
+ L->state.volume = make_float3(0.0f, 0.0f, 0.0f);
L->direct_emission = make_float3(0.0f, 0.0f, 0.0f);
L->indirect = make_float3(0.0f, 0.0f, 0.0f);
@@ -646,10 +630,10 @@ ccl_device_inline float3 path_radiance_clamp_and_sum(KernelGlobals *kg,
if (L->use_light_pass) {
path_radiance_sum_indirect(L);
- L_direct = L->direct_diffuse + L->direct_glossy + L->direct_transmission +
- L->direct_subsurface + L->direct_scatter + L->emission;
+ L_direct = L->direct_diffuse + L->direct_glossy + L->direct_transmission + L->direct_volume +
+ L->emission;
L_indirect = L->indirect_diffuse + L->indirect_glossy + L->indirect_transmission +
- L->indirect_subsurface + L->indirect_scatter;
+ L->indirect_volume;
if (!kernel_data.background.transparent)
L_direct += L->background;
@@ -665,14 +649,12 @@ ccl_device_inline float3 path_radiance_clamp_and_sum(KernelGlobals *kg,
L->direct_diffuse = make_float3(0.0f, 0.0f, 0.0f);
L->direct_glossy = make_float3(0.0f, 0.0f, 0.0f);
L->direct_transmission = make_float3(0.0f, 0.0f, 0.0f);
- L->direct_subsurface = make_float3(0.0f, 0.0f, 0.0f);
- L->direct_scatter = make_float3(0.0f, 0.0f, 0.0f);
+ L->direct_volume = make_float3(0.0f, 0.0f, 0.0f);
L->indirect_diffuse = make_float3(0.0f, 0.0f, 0.0f);
L->indirect_glossy = make_float3(0.0f, 0.0f, 0.0f);
L->indirect_transmission = make_float3(0.0f, 0.0f, 0.0f);
- L->indirect_subsurface = make_float3(0.0f, 0.0f, 0.0f);
- L->indirect_scatter = make_float3(0.0f, 0.0f, 0.0f);
+ L->indirect_volume = make_float3(0.0f, 0.0f, 0.0f);
L->emission = make_float3(0.0f, 0.0f, 0.0f);
}
@@ -714,7 +696,7 @@ ccl_device_inline void path_radiance_split_denoising(KernelGlobals *kg,
kernel_assert(L->use_light_pass);
*clean = L->emission + L->background;
- *noisy = L->direct_scatter + L->indirect_scatter;
+ *noisy = L->direct_volume + L->indirect_volume;
# define ADD_COMPONENT(flag, component) \
if (kernel_data.film.denoising_flags & flag) \
@@ -728,8 +710,6 @@ ccl_device_inline void path_radiance_split_denoising(KernelGlobals *kg,
ADD_COMPONENT(DENOISING_CLEAN_GLOSSY_IND, L->indirect_glossy);
ADD_COMPONENT(DENOISING_CLEAN_TRANSMISSION_DIR, L->direct_transmission);
ADD_COMPONENT(DENOISING_CLEAN_TRANSMISSION_IND, L->indirect_transmission);
- ADD_COMPONENT(DENOISING_CLEAN_SUBSURFACE_DIR, L->direct_subsurface);
- ADD_COMPONENT(DENOISING_CLEAN_SUBSURFACE_IND, L->indirect_subsurface);
# undef ADD_COMPONENT
#else
*noisy = L->emission;
@@ -766,14 +746,12 @@ ccl_device_inline void path_radiance_accum_sample(PathRadiance *L, PathRadiance
safe_float3_add(L->direct_diffuse, L_sample->direct_diffuse);
safe_float3_add(L->direct_glossy, L_sample->direct_glossy);
safe_float3_add(L->direct_transmission, L_sample->direct_transmission);
- safe_float3_add(L->direct_subsurface, L_sample->direct_subsurface);
- safe_float3_add(L->direct_scatter, L_sample->direct_scatter);
+ safe_float3_add(L->direct_volume, L_sample->direct_volume);
safe_float3_add(L->indirect_diffuse, L_sample->indirect_diffuse);
safe_float3_add(L->indirect_glossy, L_sample->indirect_glossy);
safe_float3_add(L->indirect_transmission, L_sample->indirect_transmission);
- safe_float3_add(L->indirect_subsurface, L_sample->indirect_subsurface);
- safe_float3_add(L->indirect_scatter, L_sample->indirect_scatter);
+ safe_float3_add(L->indirect_volume, L_sample->indirect_volume);
safe_float3_add(L->background, L_sample->background);
safe_float3_add(L->ao, L_sample->ao);
diff --git a/intern/cycles/kernel/kernel_adaptive_sampling.h b/intern/cycles/kernel/kernel_adaptive_sampling.h
new file mode 100644
index 00000000000..047fe8c92ec
--- /dev/null
+++ b/intern/cycles/kernel/kernel_adaptive_sampling.h
@@ -0,0 +1,230 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __KERNEL_ADAPTIVE_SAMPLING_H__
+#define __KERNEL_ADAPTIVE_SAMPLING_H__
+
+CCL_NAMESPACE_BEGIN
+
+/* Determines whether to continue sampling a given pixel or if it has sufficiently converged. */
+
+ccl_device void kernel_do_adaptive_stopping(KernelGlobals *kg,
+ ccl_global float *buffer,
+ int sample)
+{
+ /* TODO Stefan: Is this better in linear, sRGB or something else? */
+ float4 I = *((ccl_global float4 *)buffer);
+ float4 A = *(ccl_global float4 *)(buffer + kernel_data.film.pass_adaptive_aux_buffer);
+ /* The per pixel error as seen in section 2.1 of
+ * "A hierarchical automatic stopping condition for Monte Carlo global illumination"
+ * A small epsilon is added to the divisor to prevent division by zero. */
+ float error = (fabsf(I.x - A.x) + fabsf(I.y - A.y) + fabsf(I.z - A.z)) /
+ (sample * 0.0001f + sqrtf(I.x + I.y + I.z));
+ if (error < kernel_data.integrator.adaptive_threshold * (float)sample) {
+ /* Set the fourth component to non-zero value to indicate that this pixel has converged. */
+ buffer[kernel_data.film.pass_adaptive_aux_buffer + 3] += 1.0f;
+ }
+}
+
+/* Adjust the values of an adaptively sampled pixel. */
+
+ccl_device void kernel_adaptive_post_adjust(KernelGlobals *kg,
+ ccl_global float *buffer,
+ float sample_multiplier)
+{
+ *(ccl_global float4 *)(buffer) *= sample_multiplier;
+
+ /* Scale the aux pass too, this is necessary for progressive rendering to work properly. */
+ kernel_assert(kernel_data.film.pass_adaptive_aux_buffer);
+ *(ccl_global float4 *)(buffer + kernel_data.film.pass_adaptive_aux_buffer) *= sample_multiplier;
+
+#ifdef __PASSES__
+ int flag = kernel_data.film.pass_flag;
+
+ if (flag & PASSMASK(NORMAL))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_normal) *= sample_multiplier;
+
+ if (flag & PASSMASK(UV))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_uv) *= sample_multiplier;
+
+ if (flag & PASSMASK(MOTION)) {
+ *(ccl_global float4 *)(buffer + kernel_data.film.pass_motion) *= sample_multiplier;
+ *(ccl_global float *)(buffer + kernel_data.film.pass_motion_weight) *= sample_multiplier;
+ }
+
+ if (kernel_data.film.use_light_pass) {
+ int light_flag = kernel_data.film.light_pass_flag;
+
+ if (light_flag & PASSMASK(MIST))
+ *(ccl_global float *)(buffer + kernel_data.film.pass_mist) *= sample_multiplier;
+
+ /* Shadow pass omitted on purpose. It has its own scale parameter. */
+
+ if (light_flag & PASSMASK(DIFFUSE_INDIRECT))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_diffuse_indirect) *= sample_multiplier;
+ if (light_flag & PASSMASK(GLOSSY_INDIRECT))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_glossy_indirect) *= sample_multiplier;
+ if (light_flag & PASSMASK(TRANSMISSION_INDIRECT))
+ *(ccl_global float3 *)(buffer +
+ kernel_data.film.pass_transmission_indirect) *= sample_multiplier;
+ if (light_flag & PASSMASK(VOLUME_INDIRECT))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_volume_indirect) *= sample_multiplier;
+ if (light_flag & PASSMASK(DIFFUSE_DIRECT))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_diffuse_direct) *= sample_multiplier;
+ if (light_flag & PASSMASK(GLOSSY_DIRECT))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_glossy_direct) *= sample_multiplier;
+ if (light_flag & PASSMASK(TRANSMISSION_DIRECT))
+ *(ccl_global float3 *)(buffer +
+ kernel_data.film.pass_transmission_direct) *= sample_multiplier;
+ if (light_flag & PASSMASK(VOLUME_DIRECT))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_volume_direct) *= sample_multiplier;
+
+ if (light_flag & PASSMASK(EMISSION))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_emission) *= sample_multiplier;
+ if (light_flag & PASSMASK(BACKGROUND))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_background) *= sample_multiplier;
+ if (light_flag & PASSMASK(AO))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_ao) *= sample_multiplier;
+
+ if (light_flag & PASSMASK(DIFFUSE_COLOR))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_diffuse_color) *= sample_multiplier;
+ if (light_flag & PASSMASK(GLOSSY_COLOR))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_glossy_color) *= sample_multiplier;
+ if (light_flag & PASSMASK(TRANSMISSION_COLOR))
+ *(ccl_global float3 *)(buffer +
+ kernel_data.film.pass_transmission_color) *= sample_multiplier;
+ }
+#endif
+
+#ifdef __DENOISING_FEATURES__
+
+# define scale_float3_variance(buffer, offset, scale) \
+ *(buffer + offset) *= scale; \
+ *(buffer + offset + 1) *= scale; \
+ *(buffer + offset + 2) *= scale; \
+ *(buffer + offset + 3) *= scale * scale; \
+ *(buffer + offset + 4) *= scale * scale; \
+ *(buffer + offset + 5) *= scale * scale;
+
+# define scale_shadow_variance(buffer, offset, scale) \
+ *(buffer + offset) *= scale; \
+ *(buffer + offset + 1) *= scale; \
+ *(buffer + offset + 2) *= scale * scale;
+
+ if (kernel_data.film.pass_denoising_data) {
+ scale_shadow_variance(
+ buffer, kernel_data.film.pass_denoising_data + DENOISING_PASS_SHADOW_A, sample_multiplier);
+ scale_shadow_variance(
+ buffer, kernel_data.film.pass_denoising_data + DENOISING_PASS_SHADOW_B, sample_multiplier);
+ if (kernel_data.film.pass_denoising_clean) {
+ scale_float3_variance(
+ buffer, kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR, sample_multiplier);
+ *(buffer + kernel_data.film.pass_denoising_clean) *= sample_multiplier;
+ *(buffer + kernel_data.film.pass_denoising_clean + 1) *= sample_multiplier;
+ *(buffer + kernel_data.film.pass_denoising_clean + 2) *= sample_multiplier;
+ }
+ else {
+ scale_float3_variance(
+ buffer, kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR, sample_multiplier);
+ }
+ scale_float3_variance(
+ buffer, kernel_data.film.pass_denoising_data + DENOISING_PASS_NORMAL, sample_multiplier);
+ scale_float3_variance(
+ buffer, kernel_data.film.pass_denoising_data + DENOISING_PASS_ALBEDO, sample_multiplier);
+ *(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_DEPTH) *= sample_multiplier;
+ *(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_DEPTH +
+ 1) *= sample_multiplier * sample_multiplier;
+ }
+#endif /* __DENOISING_FEATURES__ */
+
+ if (kernel_data.film.cryptomatte_passes) {
+ int num_slots = 0;
+ num_slots += (kernel_data.film.cryptomatte_passes & CRYPT_OBJECT) ? 1 : 0;
+ num_slots += (kernel_data.film.cryptomatte_passes & CRYPT_MATERIAL) ? 1 : 0;
+ num_slots += (kernel_data.film.cryptomatte_passes & CRYPT_ASSET) ? 1 : 0;
+ num_slots = num_slots * 2 * kernel_data.film.cryptomatte_depth;
+ ccl_global float2 *id_buffer = (ccl_global float2 *)(buffer +
+ kernel_data.film.pass_cryptomatte);
+ for (int slot = 0; slot < num_slots; slot++) {
+ id_buffer[slot].y *= sample_multiplier;
+ }
+ }
+}
+
+/* This is a simple box filter in two passes.
+ * When a pixel demands more adaptive samples, let its neighboring pixels draw more samples too. */
+
+ccl_device bool kernel_do_adaptive_filter_x(KernelGlobals *kg, int y, ccl_global WorkTile *tile)
+{
+ bool any = false;
+ bool prev = false;
+ for (int x = tile->x; x < tile->x + tile->w; ++x) {
+ int index = tile->offset + x + y * tile->stride;
+ ccl_global float *buffer = tile->buffer + index * kernel_data.film.pass_stride;
+ ccl_global float4 *aux = (ccl_global float4 *)(buffer +
+ kernel_data.film.pass_adaptive_aux_buffer);
+ if (aux->w == 0.0f) {
+ any = true;
+ if (x > tile->x && !prev) {
+ index = index - 1;
+ buffer = tile->buffer + index * kernel_data.film.pass_stride;
+ aux = (ccl_global float4 *)(buffer + kernel_data.film.pass_adaptive_aux_buffer);
+ aux->w = 0.0f;
+ }
+ prev = true;
+ }
+ else {
+ if (prev) {
+ aux->w = 0.0f;
+ }
+ prev = false;
+ }
+ }
+ return any;
+}
+
+ccl_device bool kernel_do_adaptive_filter_y(KernelGlobals *kg, int x, ccl_global WorkTile *tile)
+{
+ bool prev = false;
+ bool any = false;
+ for (int y = tile->y; y < tile->y + tile->h; ++y) {
+ int index = tile->offset + x + y * tile->stride;
+ ccl_global float *buffer = tile->buffer + index * kernel_data.film.pass_stride;
+ ccl_global float4 *aux = (ccl_global float4 *)(buffer +
+ kernel_data.film.pass_adaptive_aux_buffer);
+ if (aux->w == 0.0f) {
+ any = true;
+ if (y > tile->y && !prev) {
+ index = index - tile->stride;
+ buffer = tile->buffer + index * kernel_data.film.pass_stride;
+ aux = (ccl_global float4 *)(buffer + kernel_data.film.pass_adaptive_aux_buffer);
+ aux->w = 0.0f;
+ }
+ prev = true;
+ }
+ else {
+ if (prev) {
+ aux->w = 0.0f;
+ }
+ prev = false;
+ }
+ }
+ return any;
+}
+
+CCL_NAMESPACE_END
+
+#endif /* __KERNEL_ADAPTIVE_SAMPLING_H__ */
diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h
index a349b225abb..f1fc697553a 100644
--- a/intern/cycles/kernel/kernel_bake.h
+++ b/intern/cycles/kernel/kernel_bake.h
@@ -71,7 +71,7 @@ ccl_device_inline void compute_light_pass(
# ifdef __SUBSURFACE__
/* sample subsurface scattering */
- if ((pass_filter & BAKE_FILTER_SUBSURFACE) && (sd->flag & SD_BSSRDF)) {
+ if ((pass_filter & BAKE_FILTER_DIFFUSE) && (sd->flag & SD_BSSRDF)) {
/* When mixing BSSRDF and BSDF closures we should skip BSDF lighting
* if scattering was successful. */
SubsurfaceIndirectRays ss_indirect;
@@ -123,7 +123,7 @@ ccl_device_inline void compute_light_pass(
# ifdef __SUBSURFACE__
/* sample subsurface scattering */
- if ((pass_filter & BAKE_FILTER_SUBSURFACE) && (sd->flag & SD_BSSRDF)) {
+ if ((pass_filter & BAKE_FILTER_DIFFUSE) && (sd->flag & SD_BSSRDF)) {
/* When mixing BSSRDF and BSDF closures we should skip BSDF lighting
* if scattering was successful. */
kernel_branched_path_subsurface_scatter(
@@ -178,10 +178,6 @@ ccl_device_inline float3 kernel_bake_shader_bsdf(KernelGlobals *kg,
return shader_bsdf_glossy(kg, sd);
case SHADER_EVAL_TRANSMISSION:
return shader_bsdf_transmission(kg, sd);
-# ifdef __SUBSURFACE__
- case SHADER_EVAL_SUBSURFACE:
- return shader_bsdf_subsurface(kg, sd);
-# endif
default:
kernel_assert(!"Unknown bake type passed to BSDF evaluate");
return make_float3(0.0f, 0.0f, 0.0f);
@@ -385,11 +381,6 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg,
if ((pass_filter & BAKE_FILTER_TRANSMISSION_INDIRECT) == BAKE_FILTER_TRANSMISSION_INDIRECT)
out += L.indirect_transmission;
- if ((pass_filter & BAKE_FILTER_SUBSURFACE_DIRECT) == BAKE_FILTER_SUBSURFACE_DIRECT)
- out += L.direct_subsurface;
- if ((pass_filter & BAKE_FILTER_SUBSURFACE_INDIRECT) == BAKE_FILTER_SUBSURFACE_INDIRECT)
- out += L.indirect_subsurface;
-
if ((pass_filter & BAKE_FILTER_EMISSION) != 0)
out += L.emission;
@@ -414,13 +405,6 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg,
kg, &sd, &state, L.direct_transmission, L.indirect_transmission, type, pass_filter);
break;
}
- case SHADER_EVAL_SUBSURFACE: {
-# ifdef __SUBSURFACE__
- out = kernel_bake_evaluate_direct_indirect(
- kg, &sd, &state, L.direct_subsurface, L.indirect_subsurface, type, pass_filter);
-# endif
- break;
- }
# endif
/* extra */
diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h
index 006dd00dd73..88f6a264a5a 100644
--- a/intern/cycles/kernel/kernel_compat_cpu.h
+++ b/intern/cycles/kernel/kernel_compat_cpu.h
@@ -35,11 +35,11 @@
# define __NODES_FEATURES__ NODE_FEATURE_ALL
#endif
+#include "util/util_half.h"
#include "util/util_math.h"
#include "util/util_simd.h"
-#include "util/util_half.h"
-#include "util/util_types.h"
#include "util/util_texture.h"
+#include "util/util_types.h"
#define ccl_addr_space
diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h
index 4f508d7cdaa..3c5a10540d5 100644
--- a/intern/cycles/kernel/kernel_compat_cuda.h
+++ b/intern/cycles/kernel/kernel_compat_cuda.h
@@ -37,8 +37,11 @@ typedef unsigned long long uint64_t;
typedef unsigned short half;
typedef unsigned long long CUtexObject;
-#define FLT_MIN 1.175494350822287507969e-38f
-#define FLT_MAX 340282346638528859811704183484516925440.0f
+#ifdef CYCLES_CUBIN_CC
+# define FLT_MIN 1.175494350822287507969e-38f
+# define FLT_MAX 340282346638528859811704183484516925440.0f
+# define FLT_EPSILON 1.192092896e-07F
+#endif
__device__ half __float2half(const float f)
{
diff --git a/intern/cycles/kernel/kernel_compat_optix.h b/intern/cycles/kernel/kernel_compat_optix.h
index 61b9d87a020..7068acc3a32 100644
--- a/intern/cycles/kernel/kernel_compat_optix.h
+++ b/intern/cycles/kernel/kernel_compat_optix.h
@@ -35,9 +35,11 @@ typedef unsigned int uint32_t;
typedef unsigned long long uint64_t;
typedef unsigned short half;
typedef unsigned long long CUtexObject;
-
-#define FLT_MIN 1.175494350822287507969e-38f
-#define FLT_MAX 340282346638528859811704183484516925440.0f
+#ifdef CYCLES_CUBIN_CC
+# define FLT_MIN 1.175494350822287507969e-38f
+# define FLT_MAX 340282346638528859811704183484516925440.0f
+# define FLT_EPSILON 1.192092896e-07F
+#endif
__device__ half __float2half(const float f)
{
diff --git a/intern/cycles/kernel/kernel_emission.h b/intern/cycles/kernel/kernel_emission.h
index c63d1149d03..71b176a0a8f 100644
--- a/intern/cycles/kernel/kernel_emission.h
+++ b/intern/cycles/kernel/kernel_emission.h
@@ -145,16 +145,14 @@ ccl_device_noinline_cpu bool direct_emission(KernelGlobals *kg,
#ifdef __PASSES__
/* use visibility flag to skip lights */
if (ls->shader & SHADER_EXCLUDE_ANY) {
- if (ls->shader & SHADER_EXCLUDE_DIFFUSE) {
+ if (ls->shader & SHADER_EXCLUDE_DIFFUSE)
eval->diffuse = make_float3(0.0f, 0.0f, 0.0f);
- eval->subsurface = make_float3(0.0f, 0.0f, 0.0f);
- }
if (ls->shader & SHADER_EXCLUDE_GLOSSY)
eval->glossy = make_float3(0.0f, 0.0f, 0.0f);
if (ls->shader & SHADER_EXCLUDE_TRANSMIT)
eval->transmission = make_float3(0.0f, 0.0f, 0.0f);
if (ls->shader & SHADER_EXCLUDE_SCATTER)
- eval->scatter = make_float3(0.0f, 0.0f, 0.0f);
+ eval->volume = make_float3(0.0f, 0.0f, 0.0f);
}
#endif
diff --git a/intern/cycles/kernel/kernel_film.h b/intern/cycles/kernel/kernel_film.h
index fc3a6152b79..3829426f261 100644
--- a/intern/cycles/kernel/kernel_film.h
+++ b/intern/cycles/kernel/kernel_film.h
@@ -40,15 +40,9 @@ ccl_device float4 film_get_pass_result(KernelGlobals *kg,
if (display_divide_pass_stride != -1) {
ccl_global float4 *divide_in = (ccl_global float4 *)(buffer + display_divide_pass_stride +
index * kernel_data.film.pass_stride);
- if (divide_in->x != 0.0f) {
- pass_result.x /= divide_in->x;
- }
- if (divide_in->y != 0.0f) {
- pass_result.y /= divide_in->y;
- }
- if (divide_in->z != 0.0f) {
- pass_result.z /= divide_in->z;
- }
+ float3 divided = safe_divide_even_color(float4_to_float3(pass_result),
+ float4_to_float3(*divide_in));
+ pass_result = make_float4(divided.x, divided.y, divided.z, pass_result.w);
}
if (kernel_data.film.use_display_exposure) {
diff --git a/intern/cycles/kernel/kernel_globals.h b/intern/cycles/kernel/kernel_globals.h
index a440021b6b9..c186e8560eb 100644
--- a/intern/cycles/kernel/kernel_globals.h
+++ b/intern/cycles/kernel/kernel_globals.h
@@ -22,8 +22,8 @@
#include "kernel/kernel_profiling.h"
#ifdef __KERNEL_CPU__
-# include "util/util_vector.h"
# include "util/util_map.h"
+# include "util/util_vector.h"
#endif
#ifdef __KERNEL_OPENCL__
diff --git a/intern/cycles/kernel/kernel_jitter.h b/intern/cycles/kernel/kernel_jitter.h
index e59d8946950..5b6e3bbf501 100644
--- a/intern/cycles/kernel/kernel_jitter.h
+++ b/intern/cycles/kernel/kernel_jitter.h
@@ -195,4 +195,36 @@ ccl_device void cmj_sample_2D(int s, int N, int p, float *fx, float *fy)
}
#endif
+ccl_device float pmj_sample_1D(KernelGlobals *kg, int sample, int rng_hash, int dimension)
+{
+ /* Fallback to random */
+ if (sample >= NUM_PMJ_SAMPLES) {
+ int p = rng_hash + dimension;
+ return cmj_randfloat(sample, p);
+ }
+ uint tmp_rng = cmj_hash_simple(dimension, rng_hash);
+ int index = ((dimension % NUM_PMJ_PATTERNS) * NUM_PMJ_SAMPLES + sample) * 2;
+ return __uint_as_float(kernel_tex_fetch(__sample_pattern_lut, index) ^ (tmp_rng & 0x007fffff)) -
+ 1.0f;
+}
+
+ccl_device void pmj_sample_2D(
+ KernelGlobals *kg, int sample, int rng_hash, int dimension, float *fx, float *fy)
+{
+ if (sample >= NUM_PMJ_SAMPLES) {
+ int p = rng_hash + dimension;
+ *fx = cmj_randfloat(sample, p);
+ *fy = cmj_randfloat(sample, p + 1);
+ return;
+ }
+ uint tmp_rng = cmj_hash_simple(dimension, rng_hash);
+ int index = ((dimension % NUM_PMJ_PATTERNS) * NUM_PMJ_SAMPLES + sample) * 2;
+ *fx = __uint_as_float(kernel_tex_fetch(__sample_pattern_lut, index) ^ (tmp_rng & 0x007fffff)) -
+ 1.0f;
+ tmp_rng = cmj_hash_simple(dimension + 1, rng_hash);
+ *fy = __uint_as_float(kernel_tex_fetch(__sample_pattern_lut, index + 1) ^
+ (tmp_rng & 0x007fffff)) -
+ 1.0f;
+}
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_passes.h b/intern/cycles/kernel/kernel_passes.h
index 7345e9ee5bb..98136bc7047 100644
--- a/intern/cycles/kernel/kernel_passes.h
+++ b/intern/cycles/kernel/kernel_passes.h
@@ -29,7 +29,9 @@ ccl_device_inline void kernel_write_denoising_shadow(KernelGlobals *kg,
if (kernel_data.film.pass_denoising_data == 0)
return;
- buffer += (sample & 1) ? DENOISING_PASS_SHADOW_B : DENOISING_PASS_SHADOW_A;
+ buffer += sample_is_even(kernel_data.integrator.sampling_pattern, sample) ?
+ DENOISING_PASS_SHADOW_B :
+ DENOISING_PASS_SHADOW_A;
path_total = ensure_finite(path_total);
path_total_shaded = ensure_finite(path_total_shaded);
@@ -58,7 +60,8 @@ ccl_device_inline void kernel_update_denoising_features(KernelGlobals *kg,
}
float3 normal = make_float3(0.0f, 0.0f, 0.0f);
- float3 albedo = make_float3(0.0f, 0.0f, 0.0f);
+ float3 diffuse_albedo = make_float3(0.0f, 0.0f, 0.0f);
+ float3 specular_albedo = make_float3(0.0f, 0.0f, 0.0f);
float sum_weight = 0.0f, sum_nonspecular_weight = 0.0f;
for (int i = 0; i < sd->num_closure; i++) {
@@ -70,24 +73,31 @@ ccl_device_inline void kernel_update_denoising_features(KernelGlobals *kg,
/* All closures contribute to the normal feature, but only diffuse-like ones to the albedo. */
normal += sc->N * sc->sample_weight;
sum_weight += sc->sample_weight;
- if (bsdf_get_specular_roughness_squared(sc) > sqr(0.075f)) {
- float3 closure_albedo = sc->weight;
- /* Closures that include a Fresnel term typically have weights close to 1 even though their
- * actual contribution is significantly lower.
- * To account for this, we scale their weight by the average fresnel factor (the same is also
- * done for the sample weight in the BSDF setup, so we don't need to scale that here). */
- if (CLOSURE_IS_BSDF_MICROFACET_FRESNEL(sc->type)) {
- MicrofacetBsdf *bsdf = (MicrofacetBsdf *)sc;
- closure_albedo *= bsdf->extra->fresnel_color;
- }
- else if (sc->type == CLOSURE_BSDF_PRINCIPLED_SHEEN_ID) {
- PrincipledSheenBsdf *bsdf = (PrincipledSheenBsdf *)sc;
- closure_albedo *= bsdf->avg_value;
- }
- albedo += closure_albedo;
+ float3 closure_albedo = sc->weight;
+ /* Closures that include a Fresnel term typically have weights close to 1 even though their
+ * actual contribution is significantly lower.
+ * To account for this, we scale their weight by the average fresnel factor (the same is also
+ * done for the sample weight in the BSDF setup, so we don't need to scale that here). */
+ if (CLOSURE_IS_BSDF_MICROFACET_FRESNEL(sc->type)) {
+ MicrofacetBsdf *bsdf = (MicrofacetBsdf *)sc;
+ closure_albedo *= bsdf->extra->fresnel_color;
+ }
+ else if (sc->type == CLOSURE_BSDF_PRINCIPLED_SHEEN_ID) {
+ PrincipledSheenBsdf *bsdf = (PrincipledSheenBsdf *)sc;
+ closure_albedo *= bsdf->avg_value;
+ }
+ else if (sc->type == CLOSURE_BSDF_HAIR_PRINCIPLED_ID) {
+ closure_albedo *= bsdf_principled_hair_albedo(sc);
+ }
+
+ if (bsdf_get_specular_roughness_squared(sc) > sqr(0.075f)) {
+ diffuse_albedo += closure_albedo;
sum_nonspecular_weight += sc->sample_weight;
}
+ else {
+ specular_albedo += closure_albedo;
+ }
}
/* Wait for next bounce if 75% or more sample weight belongs to specular-like closures. */
@@ -101,10 +111,14 @@ ccl_device_inline void kernel_update_denoising_features(KernelGlobals *kg,
normal = transform_direction(&worldtocamera, normal);
L->denoising_normal += ensure_finite3(state->denoising_feature_weight * normal);
- L->denoising_albedo += ensure_finite3(state->denoising_feature_weight * albedo);
+ L->denoising_albedo += ensure_finite3(state->denoising_feature_weight *
+ state->denoising_feature_throughput * diffuse_albedo);
state->denoising_feature_weight = 0.0f;
}
+ else {
+ state->denoising_feature_throughput *= specular_albedo;
+ }
}
#endif /* __DENOISING_FEATURES__ */
@@ -240,8 +254,6 @@ ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg,
L->color_glossy += shader_bsdf_glossy(kg, sd) * throughput;
if (light_flag & PASSMASK_COMPONENT(TRANSMISSION))
L->color_transmission += shader_bsdf_transmission(kg, sd) * throughput;
- if (light_flag & PASSMASK_COMPONENT(SUBSURFACE))
- L->color_subsurface += shader_bsdf_subsurface(kg, sd) * throughput;
if (light_flag & PASSMASK(MIST)) {
/* bring depth into 0..1 range */
@@ -287,11 +299,8 @@ ccl_device_inline void kernel_write_light_passes(KernelGlobals *kg,
if (light_flag & PASSMASK(TRANSMISSION_INDIRECT))
kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_indirect,
L->indirect_transmission);
- if (light_flag & PASSMASK(SUBSURFACE_INDIRECT))
- kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_indirect,
- L->indirect_subsurface);
if (light_flag & PASSMASK(VOLUME_INDIRECT))
- kernel_write_pass_float3(buffer + kernel_data.film.pass_volume_indirect, L->indirect_scatter);
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_volume_indirect, L->indirect_volume);
if (light_flag & PASSMASK(DIFFUSE_DIRECT))
kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_direct, L->direct_diffuse);
if (light_flag & PASSMASK(GLOSSY_DIRECT))
@@ -299,11 +308,8 @@ ccl_device_inline void kernel_write_light_passes(KernelGlobals *kg,
if (light_flag & PASSMASK(TRANSMISSION_DIRECT))
kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_direct,
L->direct_transmission);
- if (light_flag & PASSMASK(SUBSURFACE_DIRECT))
- kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_direct,
- L->direct_subsurface);
if (light_flag & PASSMASK(VOLUME_DIRECT))
- kernel_write_pass_float3(buffer + kernel_data.film.pass_volume_direct, L->direct_scatter);
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_volume_direct, L->direct_volume);
if (light_flag & PASSMASK(EMISSION))
kernel_write_pass_float3(buffer + kernel_data.film.pass_emission, L->emission);
@@ -319,8 +325,6 @@ ccl_device_inline void kernel_write_light_passes(KernelGlobals *kg,
if (light_flag & PASSMASK(TRANSMISSION_COLOR))
kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_color,
L->color_transmission);
- if (light_flag & PASSMASK(SUBSURFACE_COLOR))
- kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_color, L->color_subsurface);
if (light_flag & PASSMASK(SHADOW)) {
float4 shadow = L->shadow;
shadow.w = kernel_data.film.pass_shadow_scale;
@@ -387,6 +391,41 @@ ccl_device_inline void kernel_write_result(KernelGlobals *kg,
#ifdef __KERNEL_DEBUG__
kernel_write_debug_passes(kg, buffer, L);
#endif
+
+ /* Adaptive Sampling. Fill the additional buffer with the odd samples and calculate our stopping
+ criteria. This is the heuristic from "A hierarchical automatic stopping condition for Monte
+ Carlo global illumination" except that here it is applied per pixel and not in hierarchical
+ tiles. */
+ if (kernel_data.film.pass_adaptive_aux_buffer &&
+ kernel_data.integrator.adaptive_threshold > 0.0f) {
+ if (sample_is_even(kernel_data.integrator.sampling_pattern, sample)) {
+ kernel_write_pass_float4(buffer + kernel_data.film.pass_adaptive_aux_buffer,
+ make_float4(L_sum.x * 2.0f, L_sum.y * 2.0f, L_sum.z * 2.0f, 0.0f));
+ }
+#ifdef __KERNEL_CPU__
+ if (sample > kernel_data.integrator.adaptive_min_samples &&
+ (sample & (ADAPTIVE_SAMPLE_STEP - 1)) == (ADAPTIVE_SAMPLE_STEP - 1)) {
+ kernel_do_adaptive_stopping(kg, buffer, sample);
+ }
+#endif
+ }
+
+ /* Write the sample count as negative numbers initially to mark the samples as in progress.
+ * Once the tile has finished rendering, the sign gets flipped and all the pixel values
+ * are scaled as if they were taken at a uniform sample count. */
+ if (kernel_data.film.pass_sample_count) {
+ /* Make sure it's a negative number. In progressive refine mode, this bit gets flipped between
+ * passes. */
+#ifdef __ATOMIC_PASS_WRITE__
+ atomic_fetch_and_or_uint32((ccl_global uint *)(buffer + kernel_data.film.pass_sample_count),
+ 0x80000000);
+#else
+ if (buffer[kernel_data.film.pass_sample_count] > 0) {
+ buffer[kernel_data.film.pass_sample_count] *= -1.0f;
+ }
+#endif
+ kernel_write_pass_float(buffer + kernel_data.film.pass_sample_count, -1.0f);
+ }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
index 1a0b67275a7..db35303e3f1 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -18,6 +18,7 @@
# include "kernel/osl/osl_shader.h"
#endif
+// clang-format off
#include "kernel/kernel_random.h"
#include "kernel/kernel_projection.h"
#include "kernel/kernel_montecarlo.h"
@@ -31,6 +32,7 @@
#include "kernel/kernel_accumulate.h"
#include "kernel/kernel_shader.h"
#include "kernel/kernel_light.h"
+#include "kernel/kernel_adaptive_sampling.h"
#include "kernel/kernel_passes.h"
#if defined(__VOLUME__) || defined(__SUBSURFACE__)
@@ -48,6 +50,7 @@
#include "kernel/kernel_path_surface.h"
#include "kernel/kernel_path_volume.h"
#include "kernel/kernel_path_subsurface.h"
+// clang-format on
CCL_NAMESPACE_BEGIN
@@ -168,19 +171,19 @@ ccl_device_forceinline VolumeIntegrateResult kernel_path_volume(KernelGlobals *k
Ray volume_ray = *ray;
volume_ray.t = (hit) ? isect->t : FLT_MAX;
- bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
+ float step_size = volume_stack_step_size(kg, state->volume_stack);
# ifdef __VOLUME_DECOUPLED__
int sampling_method = volume_stack_sampling_method(kg, state->volume_stack);
bool direct = (state->flag & PATH_RAY_CAMERA) != 0;
- bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, direct, sampling_method);
+ bool decoupled = kernel_volume_use_decoupled(kg, step_size, direct, sampling_method);
if (decoupled) {
/* cache steps along volume for repeated sampling */
VolumeSegment volume_segment;
shader_setup_from_volume(kg, sd, &volume_ray);
- kernel_volume_decoupled_record(kg, state, &volume_ray, sd, &volume_segment, heterogeneous);
+ kernel_volume_decoupled_record(kg, state, &volume_ray, sd, &volume_segment, step_size);
volume_segment.sampling_method = sampling_method;
@@ -226,7 +229,7 @@ ccl_device_forceinline VolumeIntegrateResult kernel_path_volume(KernelGlobals *k
{
/* integrate along volume segment with distance sampling */
VolumeIntegrateResult result = kernel_volume_integrate(
- kg, state, sd, &volume_ray, L, throughput, heterogeneous);
+ kg, state, sd, &volume_ray, L, throughput, step_size);
# ifdef __VOLUME_SCATTER__
if (result == VOLUME_PATH_SCATTERED) {
@@ -656,6 +659,14 @@ ccl_device void kernel_path_trace(
buffer += index * pass_stride;
+ if (kernel_data.film.pass_adaptive_aux_buffer) {
+ ccl_global float4 *aux = (ccl_global float4 *)(buffer +
+ kernel_data.film.pass_adaptive_aux_buffer);
+ if (aux->w > 0.0f) {
+ return;
+ }
+ }
+
/* Initialize random numbers and sample ray. */
uint rng_hash;
Ray ray;
diff --git a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h
index f75e4ab4c97..337c4fb1d10 100644
--- a/intern/cycles/kernel/kernel_path_branched.h
+++ b/intern/cycles/kernel/kernel_path_branched.h
@@ -91,7 +91,7 @@ ccl_device_forceinline void kernel_branched_path_volume(KernelGlobals *kg,
Ray volume_ray = *ray;
volume_ray.t = (hit) ? isect->t : FLT_MAX;
- bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
+ float step_size = volume_stack_step_size(kg, state->volume_stack);
# ifdef __VOLUME_DECOUPLED__
/* decoupled ray marching only supported on CPU */
@@ -100,7 +100,7 @@ ccl_device_forceinline void kernel_branched_path_volume(KernelGlobals *kg,
VolumeSegment volume_segment;
shader_setup_from_volume(kg, sd, &volume_ray);
- kernel_volume_decoupled_record(kg, state, &volume_ray, sd, &volume_segment, heterogeneous);
+ kernel_volume_decoupled_record(kg, state, &volume_ray, sd, &volume_segment, step_size);
/* direct light sampling */
if (volume_segment.closure_flag & SD_SCATTER) {
@@ -171,7 +171,7 @@ ccl_device_forceinline void kernel_branched_path_volume(KernelGlobals *kg,
path_state_branch(&ps, j, num_samples);
VolumeIntegrateResult result = kernel_volume_integrate(
- kg, &ps, sd, &volume_ray, L, &tp, heterogeneous);
+ kg, &ps, sd, &volume_ray, L, &tp, step_size);
# ifdef __VOLUME_SCATTER__
if (result == VOLUME_PATH_SCATTERED) {
@@ -523,6 +523,14 @@ ccl_device void kernel_branched_path_trace(
buffer += index * pass_stride;
+ if (kernel_data.film.pass_adaptive_aux_buffer) {
+ ccl_global float4 *aux = (ccl_global float4 *)(buffer +
+ kernel_data.film.pass_adaptive_aux_buffer);
+ if (aux->w > 0.0f) {
+ return;
+ }
+ }
+
/* initialize random numbers and ray */
uint rng_hash;
Ray ray;
diff --git a/intern/cycles/kernel/kernel_path_state.h b/intern/cycles/kernel/kernel_path_state.h
index 8735e3208db..c389c815ae2 100644
--- a/intern/cycles/kernel/kernel_path_state.h
+++ b/intern/cycles/kernel/kernel_path_state.h
@@ -41,9 +41,11 @@ ccl_device_inline void path_state_init(KernelGlobals *kg,
if (kernel_data.film.pass_denoising_data) {
state->flag |= PATH_RAY_STORE_SHADOW_INFO;
state->denoising_feature_weight = 1.0f;
+ state->denoising_feature_throughput = make_float3(1.0f, 1.0f, 1.0f);
}
else {
state->denoising_feature_weight = 0.0f;
+ state->denoising_feature_throughput = make_float3(0.0f, 0.0f, 0.0f);
}
#endif /* __DENOISING_FEATURES__ */
diff --git a/intern/cycles/kernel/kernel_random.h b/intern/cycles/kernel/kernel_random.h
index 80738213d2a..f4c3b36e778 100644
--- a/intern/cycles/kernel/kernel_random.h
+++ b/intern/cycles/kernel/kernel_random.h
@@ -43,7 +43,7 @@ ccl_device uint sobol_dimension(KernelGlobals *kg, int index, int dimension)
uint i = index + SOBOL_SKIP;
for (int j = 0, x; (x = find_first_set(i)); i >>= x) {
j += x;
- result ^= kernel_tex_fetch(__sobol_directions, 32 * dimension + j - 1);
+ result ^= kernel_tex_fetch(__sample_pattern_lut, 32 * dimension + j - 1);
}
return result;
}
@@ -56,7 +56,9 @@ ccl_device_forceinline float path_rng_1D(
#ifdef __DEBUG_CORRELATION__
return (float)drand48();
#endif
-
+ if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_PMJ) {
+ return pmj_sample_1D(kg, sample, rng_hash, dimension);
+ }
#ifdef __CMJ__
# ifdef __SOBOL__
if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ)
@@ -99,7 +101,10 @@ ccl_device_forceinline void path_rng_2D(KernelGlobals *kg,
*fy = (float)drand48();
return;
#endif
-
+ if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_PMJ) {
+ pmj_sample_2D(kg, sample, rng_hash, dimension, fx, fy);
+ return;
+ }
#ifdef __CMJ__
# ifdef __SOBOL__
if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ)
@@ -284,4 +289,31 @@ ccl_device float lcg_step_float_addrspace(ccl_addr_space uint *rng)
return (float)*rng * (1.0f / (float)0xFFFFFFFF);
}
+ccl_device_inline bool sample_is_even(int pattern, int sample)
+{
+ if (pattern == SAMPLING_PATTERN_PMJ) {
+ /* See Section 10.2.1, "Progressive Multi-Jittered Sample Sequences", Christensen et al.
+ * We can use this to get divide sample sequence into two classes for easier variance
+ * estimation. */
+#if defined(__GNUC__) && !defined(__KERNEL_GPU__)
+ return __builtin_popcount(sample & 0xaaaaaaaa) & 1;
+#elif defined(__NVCC__)
+ return __popc(sample & 0xaaaaaaaa) & 1;
+#elif defined(__KERNEL_OPENCL__)
+ return popcount(sample & 0xaaaaaaaa) & 1;
+#else
+ /* TODO(Stefan): popcnt intrinsic for Windows with fallback for older CPUs. */
+ int i = sample & 0xaaaaaaaa;
+ i = i - ((i >> 1) & 0x55555555);
+ i = (i & 0x33333333) + ((i >> 2) & 0x33333333);
+ i = (((i + (i >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24;
+ return i & 1;
+#endif
+ }
+ else {
+ /* TODO(Stefan): Are there reliable ways of dividing CMJ and Sobol into two classes? */
+ return sample & 0x1;
+ }
+}
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h
index d03faff4242..9700aaba80f 100644
--- a/intern/cycles/kernel/kernel_shader.h
+++ b/intern/cycles/kernel/kernel_shader.h
@@ -23,10 +23,12 @@
* Release.
*/
+// clang-format off
#include "kernel/closure/alloc.h"
#include "kernel/closure/bsdf_util.h"
#include "kernel/closure/bsdf.h"
#include "kernel/closure/emissive.h"
+// clang-format on
#include "kernel/svm/svm.h"
@@ -901,7 +903,8 @@ ccl_device float3 shader_bsdf_diffuse(KernelGlobals *kg, ShaderData *sd)
for (int i = 0; i < sd->num_closure; i++) {
ShaderClosure *sc = &sd->closure[i];
- if (CLOSURE_IS_BSDF_DIFFUSE(sc->type))
+ if (CLOSURE_IS_BSDF_DIFFUSE(sc->type) || CLOSURE_IS_BSSRDF(sc->type) ||
+ CLOSURE_IS_BSDF_BSSRDF(sc->type))
eval += sc->weight;
}
@@ -936,20 +939,6 @@ ccl_device float3 shader_bsdf_transmission(KernelGlobals *kg, ShaderData *sd)
return eval;
}
-ccl_device float3 shader_bsdf_subsurface(KernelGlobals *kg, ShaderData *sd)
-{
- float3 eval = make_float3(0.0f, 0.0f, 0.0f);
-
- for (int i = 0; i < sd->num_closure; i++) {
- ShaderClosure *sc = &sd->closure[i];
-
- if (CLOSURE_IS_BSSRDF(sc->type) || CLOSURE_IS_BSDF_BSSRDF(sc->type))
- eval += sc->weight;
- }
-
- return eval;
-}
-
ccl_device float3 shader_bsdf_average_normal(KernelGlobals *kg, ShaderData *sd)
{
float3 N = make_float3(0.0f, 0.0f, 0.0f);
diff --git a/intern/cycles/kernel/kernel_subsurface.h b/intern/cycles/kernel/kernel_subsurface.h
index 23e30db1b08..ed8572467ea 100644
--- a/intern/cycles/kernel/kernel_subsurface.h
+++ b/intern/cycles/kernel/kernel_subsurface.h
@@ -428,12 +428,17 @@ ccl_device_noinline
hit = (ss_isect->num_hits > 0);
if (hit) {
+#ifdef __KERNEL_OPTIX__
+ /* t is always in world space with OptiX. */
+ t = ss_isect->hits[0].t;
+#else
/* Compute world space distance to surface hit. */
float3 D = ray->D;
object_inverse_dir_transform(kg, sd, &D);
D = normalize(D) * ss_isect->hits[0].t;
object_dir_transform(kg, sd, &D);
t = len(D);
+#endif
}
/* Advance to new scatter location. */
diff --git a/intern/cycles/kernel/kernel_textures.h b/intern/cycles/kernel/kernel_textures.h
index 9eaa6b5516e..c8e01677d09 100644
--- a/intern/cycles/kernel/kernel_textures.h
+++ b/intern/cycles/kernel/kernel_textures.h
@@ -35,6 +35,7 @@ KERNEL_TEX(KernelObject, __objects)
KERNEL_TEX(Transform, __object_motion_pass)
KERNEL_TEX(DecomposedTransform, __object_motion)
KERNEL_TEX(uint, __object_flag)
+KERNEL_TEX(float, __object_volume_step)
/* cameras */
KERNEL_TEX(DecomposedTransform, __camera_motion)
@@ -77,7 +78,7 @@ KERNEL_TEX(KernelShader, __shaders)
KERNEL_TEX(float, __lookup_table)
/* sobol */
-KERNEL_TEX(uint, __sobol_directions)
+KERNEL_TEX(uint, __sample_pattern_lut)
/* image textures */
KERNEL_TEX(TextureInfo, __texture_info)
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index c35e345763a..b6d319311a1 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -63,6 +63,11 @@ CCL_NAMESPACE_BEGIN
#define VOLUME_STACK_SIZE 32
+/* Adaptive sampling constants */
+#define ADAPTIVE_SAMPLE_STEP 4
+static_assert((ADAPTIVE_SAMPLE_STEP & (ADAPTIVE_SAMPLE_STEP - 1)) == 0,
+ "ADAPTIVE_SAMPLE_STEP must be power of two for bitwise operations to work");
+
/* Split kernel constants */
#define WORK_POOL_SIZE_GPU 64
#define WORK_POOL_SIZE_CPU 1
@@ -106,8 +111,6 @@ CCL_NAMESPACE_BEGIN
#ifndef __KERNEL_AO_PREVIEW__
# define __SVM__
# define __EMISSION__
-# define __TEXTURES__
-# define __EXTRA_NODES__
# define __HOLDOUT__
# define __MULTI_CLOSURE__
# define __TRANSPARENT_SHADOWS__
@@ -220,7 +223,6 @@ typedef enum ShaderEvalType {
SHADER_EVAL_DIFFUSE_COLOR,
SHADER_EVAL_GLOSSY_COLOR,
SHADER_EVAL_TRANSMISSION_COLOR,
- SHADER_EVAL_SUBSURFACE_COLOR,
SHADER_EVAL_EMISSION,
SHADER_EVAL_AOV_COLOR,
SHADER_EVAL_AOV_VALUE,
@@ -232,7 +234,6 @@ typedef enum ShaderEvalType {
SHADER_EVAL_DIFFUSE,
SHADER_EVAL_GLOSSY,
SHADER_EVAL_TRANSMISSION,
- SHADER_EVAL_SUBSURFACE,
/* extra */
SHADER_EVAL_ENVIRONMENT,
@@ -269,6 +270,7 @@ enum PathTraceDimension {
enum SamplingPattern {
SAMPLING_PATTERN_SOBOL = 0,
SAMPLING_PATTERN_CMJ = 1,
+ SAMPLING_PATTERN_PMJ = 2,
SAMPLING_NUM_PATTERNS,
};
@@ -375,6 +377,8 @@ typedef enum PassType {
PASS_CRYPTOMATTE,
PASS_AOV_COLOR,
PASS_AOV_VALUE,
+ PASS_ADAPTIVE_AUX_BUFFER,
+ PASS_SAMPLE_COUNT,
PASS_CATEGORY_MAIN_END = 31,
PASS_MIST = 32,
@@ -392,10 +396,7 @@ typedef enum PassType {
PASS_TRANSMISSION_DIRECT,
PASS_TRANSMISSION_INDIRECT,
PASS_TRANSMISSION_COLOR,
- PASS_SUBSURFACE_DIRECT,
- PASS_SUBSURFACE_INDIRECT,
- PASS_SUBSURFACE_COLOR,
- PASS_VOLUME_DIRECT,
+ PASS_VOLUME_DIRECT = 50,
PASS_VOLUME_INDIRECT,
/* No Scatter color since it's tricky to define what it would even mean. */
PASS_CATEGORY_LIGHT_END = 63,
@@ -445,23 +446,20 @@ typedef enum eBakePassFilter {
BAKE_FILTER_DIFFUSE = (1 << 3),
BAKE_FILTER_GLOSSY = (1 << 4),
BAKE_FILTER_TRANSMISSION = (1 << 5),
- BAKE_FILTER_SUBSURFACE = (1 << 6),
- BAKE_FILTER_EMISSION = (1 << 7),
- BAKE_FILTER_AO = (1 << 8),
+ BAKE_FILTER_EMISSION = (1 << 6),
+ BAKE_FILTER_AO = (1 << 7),
} eBakePassFilter;
typedef enum BakePassFilterCombos {
BAKE_FILTER_COMBINED = (BAKE_FILTER_DIRECT | BAKE_FILTER_INDIRECT | BAKE_FILTER_DIFFUSE |
- BAKE_FILTER_GLOSSY | BAKE_FILTER_TRANSMISSION | BAKE_FILTER_SUBSURFACE |
- BAKE_FILTER_EMISSION | BAKE_FILTER_AO),
+ BAKE_FILTER_GLOSSY | BAKE_FILTER_TRANSMISSION | BAKE_FILTER_EMISSION |
+ BAKE_FILTER_AO),
BAKE_FILTER_DIFFUSE_DIRECT = (BAKE_FILTER_DIRECT | BAKE_FILTER_DIFFUSE),
BAKE_FILTER_GLOSSY_DIRECT = (BAKE_FILTER_DIRECT | BAKE_FILTER_GLOSSY),
BAKE_FILTER_TRANSMISSION_DIRECT = (BAKE_FILTER_DIRECT | BAKE_FILTER_TRANSMISSION),
- BAKE_FILTER_SUBSURFACE_DIRECT = (BAKE_FILTER_DIRECT | BAKE_FILTER_SUBSURFACE),
BAKE_FILTER_DIFFUSE_INDIRECT = (BAKE_FILTER_INDIRECT | BAKE_FILTER_DIFFUSE),
BAKE_FILTER_GLOSSY_INDIRECT = (BAKE_FILTER_INDIRECT | BAKE_FILTER_GLOSSY),
BAKE_FILTER_TRANSMISSION_INDIRECT = (BAKE_FILTER_INDIRECT | BAKE_FILTER_TRANSMISSION),
- BAKE_FILTER_SUBSURFACE_INDIRECT = (BAKE_FILTER_INDIRECT | BAKE_FILTER_SUBSURFACE),
} BakePassFilterCombos;
typedef enum DenoiseFlag {
@@ -471,9 +469,7 @@ typedef enum DenoiseFlag {
DENOISING_CLEAN_GLOSSY_IND = (1 << 3),
DENOISING_CLEAN_TRANSMISSION_DIR = (1 << 4),
DENOISING_CLEAN_TRANSMISSION_IND = (1 << 5),
- DENOISING_CLEAN_SUBSURFACE_DIR = (1 << 6),
- DENOISING_CLEAN_SUBSURFACE_IND = (1 << 7),
- DENOISING_CLEAN_ALL_PASSES = (1 << 8) - 1,
+ DENOISING_CLEAN_ALL_PASSES = (1 << 6) - 1,
} DenoiseFlag;
#ifdef __KERNEL_DEBUG__
@@ -493,8 +489,7 @@ typedef ccl_addr_space struct PathRadianceState {
float3 diffuse;
float3 glossy;
float3 transmission;
- float3 subsurface;
- float3 scatter;
+ float3 volume;
float3 direct;
#endif
@@ -517,19 +512,16 @@ typedef ccl_addr_space struct PathRadiance {
float3 color_diffuse;
float3 color_glossy;
float3 color_transmission;
- float3 color_subsurface;
float3 direct_diffuse;
float3 direct_glossy;
float3 direct_transmission;
- float3 direct_subsurface;
- float3 direct_scatter;
+ float3 direct_volume;
float3 indirect_diffuse;
float3 indirect_glossy;
float3 indirect_transmission;
- float3 indirect_subsurface;
- float3 indirect_scatter;
+ float3 indirect_volume;
float4 shadow;
float mist;
@@ -583,8 +575,7 @@ typedef struct BsdfEval {
float3 glossy;
float3 transmission;
float3 transparent;
- float3 subsurface;
- float3 scatter;
+ float3 volume;
#endif
#ifdef __SHADOW_TRICKS__
float3 sum_no_mis;
@@ -725,8 +716,7 @@ typedef enum PrimitiveType {
/* Attributes */
typedef enum AttributePrimitive {
- ATTR_PRIM_TRIANGLE = 0,
- ATTR_PRIM_CURVE,
+ ATTR_PRIM_GEOMETRY = 0,
ATTR_PRIM_SUBD,
ATTR_PRIM_TYPES
@@ -754,6 +744,7 @@ typedef enum AttributeStandard {
ATTR_STD_UV,
ATTR_STD_UV_TANGENT,
ATTR_STD_UV_TANGENT_SIGN,
+ ATTR_STD_VERTEX_COLOR,
ATTR_STD_GENERATED,
ATTR_STD_GENERATED_TRANSFORM,
ATTR_STD_POSITION_UNDEFORMED,
@@ -894,13 +885,13 @@ enum ShaderDataFlag {
SD_HAS_DISPLACEMENT = (1 << 26),
/* Has constant emission (value stored in __shaders) */
SD_HAS_CONSTANT_EMISSION = (1 << 27),
- /* Needs to access attributes */
- SD_NEED_ATTRIBUTES = (1 << 28),
+ /* Needs to access attributes for volume rendering */
+ SD_NEED_VOLUME_ATTRIBUTES = (1 << 28),
SD_SHADER_FLAGS = (SD_USE_MIS | SD_HAS_TRANSPARENT_SHADOW | SD_HAS_VOLUME | SD_HAS_ONLY_VOLUME |
SD_HETEROGENEOUS_VOLUME | SD_HAS_BSSRDF_BUMP | SD_VOLUME_EQUIANGULAR |
SD_VOLUME_MIS | SD_VOLUME_CUBIC | SD_HAS_BUMP | SD_HAS_DISPLACEMENT |
- SD_HAS_CONSTANT_EMISSION | SD_NEED_ATTRIBUTES)
+ SD_HAS_CONSTANT_EMISSION | SD_NEED_VOLUME_ATTRIBUTES)
};
/* Object flags. */
@@ -1057,6 +1048,7 @@ typedef struct PathState {
#ifdef __DENOISING_FEATURES__
float denoising_feature_weight;
+ float3 denoising_feature_throughput;
#endif /* __DENOISING_FEATURES__ */
/* multiple importance sampling */
@@ -1213,18 +1205,15 @@ typedef struct KernelFilm {
int pass_diffuse_color;
int pass_glossy_color;
int pass_transmission_color;
- int pass_subsurface_color;
int pass_diffuse_indirect;
int pass_glossy_indirect;
int pass_transmission_indirect;
- int pass_subsurface_indirect;
int pass_volume_indirect;
int pass_diffuse_direct;
int pass_glossy_direct;
int pass_transmission_direct;
- int pass_subsurface_direct;
int pass_volume_direct;
int pass_emission;
@@ -1239,6 +1228,9 @@ typedef struct KernelFilm {
int cryptomatte_depth;
int pass_cryptomatte;
+ int pass_adaptive_aux_buffer;
+ int pass_sample_count;
+
int pass_mist;
float mist_start;
float mist_inv_depth;
@@ -1251,7 +1243,6 @@ typedef struct KernelFilm {
int pass_aov_color;
int pass_aov_value;
int pad1;
- int pad2;
/* XYZ to rendering color space transform. float4 instead of float3 to
* ensure consistent padding/alignment across devices. */
@@ -1273,6 +1264,8 @@ typedef struct KernelFilm {
int display_divide_pass_stride;
int use_display_exposure;
int use_display_pass_alpha;
+
+ int pad3, pad4, pad5;
} KernelFilm;
static_assert_align(KernelFilm, 16);
@@ -1280,6 +1273,7 @@ typedef struct KernelBackground {
/* only shader index */
int surface_shader;
int volume_shader;
+ float volume_step_size;
int transparent;
float transparent_roughness_squared_threshold;
@@ -1287,7 +1281,6 @@ typedef struct KernelBackground {
float ao_factor;
float ao_distance;
float ao_bounces_factor;
- float ao_pad;
} KernelBackground;
static_assert_align(KernelBackground, 16);
@@ -1354,18 +1347,20 @@ typedef struct KernelIntegrator {
/* sampler */
int sampling_pattern;
int aa_samples;
+ int adaptive_min_samples;
+ float adaptive_threshold;
/* volume render */
int use_volumes;
int volume_max_steps;
- float volume_step_size;
+ float volume_step_rate;
int volume_samples;
int start_sample;
int max_closures;
- int pad1;
+ int pad1, pad2, pad3;
} KernelIntegrator;
static_assert_align(KernelIntegrator, 16);
@@ -1679,12 +1674,16 @@ typedef struct WorkTile {
uint start_sample;
uint num_samples;
- uint offset;
+ int offset;
uint stride;
ccl_global float *buffer;
} WorkTile;
+/* Precoumputed sample table sizes for PMJ02 sampler. */
+#define NUM_PMJ_SAMPLES 64 * 64
+#define NUM_PMJ_PATTERNS 48
+
CCL_NAMESPACE_END
#endif /* __KERNEL_TYPES_H__ */
diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h
index f443bb88463..b4f9d2186f4 100644
--- a/intern/cycles/kernel/kernel_volume.h
+++ b/intern/cycles/kernel/kernel_volume.h
@@ -48,7 +48,8 @@ ccl_device_inline bool volume_shader_extinction_sample(KernelGlobals *kg,
shader_eval_volume(kg, sd, state, state->volume_stack, PATH_RAY_SHADOW);
if (sd->flag & SD_EXTINCTION) {
- *extinction = sd->closure_transparent_extinction;
+ const float density = object_volume_density(kg, sd->object);
+ *extinction = sd->closure_transparent_extinction * density;
return true;
}
else {
@@ -84,6 +85,11 @@ ccl_device_inline bool volume_shader_sample(KernelGlobals *kg,
}
}
+ const float density = object_volume_density(kg, sd->object);
+ coeff->sigma_s *= density;
+ coeff->sigma_t *= density;
+ coeff->emission *= density;
+
return true;
}
@@ -101,15 +107,19 @@ ccl_device float kernel_volume_channel_get(float3 value, int channel)
#ifdef __VOLUME__
-ccl_device bool volume_stack_is_heterogeneous(KernelGlobals *kg, ccl_addr_space VolumeStack *stack)
+ccl_device float volume_stack_step_size(KernelGlobals *kg, ccl_addr_space VolumeStack *stack)
{
+ float step_size = FLT_MAX;
+
for (int i = 0; stack[i].shader != SHADER_NONE; i++) {
int shader_flag = kernel_tex_fetch(__shaders, (stack[i].shader & SHADER_MASK)).flags;
+ bool heterogeneous = false;
+
if (shader_flag & SD_HETEROGENEOUS_VOLUME) {
- return true;
+ heterogeneous = true;
}
- else if (shader_flag & SD_NEED_ATTRIBUTES) {
+ else if (shader_flag & SD_NEED_VOLUME_ATTRIBUTES) {
/* We want to render world or objects without any volume grids
* as homogeneous, but can only verify this at run-time since other
* heterogeneous volume objects may be using the same shader. */
@@ -117,13 +127,19 @@ ccl_device bool volume_stack_is_heterogeneous(KernelGlobals *kg, ccl_addr_space
if (object != OBJECT_NONE) {
int object_flag = kernel_tex_fetch(__object_flag, object);
if (object_flag & SD_OBJECT_HAS_VOLUME_ATTRIBUTES) {
- return true;
+ heterogeneous = true;
}
}
}
+
+ if (heterogeneous) {
+ float object_step_size = object_volume_step_size(kg, stack[i].object);
+ object_step_size *= kernel_data.integrator.volume_step_rate;
+ step_size = fminf(object_step_size, step_size);
+ }
}
- return false;
+ return step_size;
}
ccl_device int volume_stack_sampling_method(KernelGlobals *kg, VolumeStack *stack)
@@ -158,12 +174,13 @@ ccl_device int volume_stack_sampling_method(KernelGlobals *kg, VolumeStack *stac
ccl_device_inline void kernel_volume_step_init(KernelGlobals *kg,
ccl_addr_space PathState *state,
+ const float object_step_size,
float t,
float *step_size,
float *step_offset)
{
const int max_steps = kernel_data.integrator.volume_max_steps;
- float step = min(kernel_data.integrator.volume_step_size, t);
+ float step = min(object_step_size, t);
/* compute exact steps in advance for malloc */
if (t > max_steps * step) {
@@ -199,7 +216,8 @@ ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg,
ccl_addr_space PathState *state,
Ray *ray,
ShaderData *sd,
- float3 *throughput)
+ float3 *throughput,
+ const float object_step_size)
{
float3 tp = *throughput;
const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
@@ -207,7 +225,7 @@ ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg,
/* prepare for stepping */
int max_steps = kernel_data.integrator.volume_max_steps;
float step_offset, step_size;
- kernel_volume_step_init(kg, state, ray->t, &step_size, &step_offset);
+ kernel_volume_step_init(kg, state, object_step_size, ray->t, &step_size, &step_offset);
/* compute extinction at the start */
float t = 0.0f;
@@ -264,8 +282,9 @@ ccl_device_noinline void kernel_volume_shadow(KernelGlobals *kg,
{
shader_setup_from_volume(kg, shadow_sd, ray);
- if (volume_stack_is_heterogeneous(kg, state->volume_stack))
- kernel_volume_shadow_heterogeneous(kg, state, ray, shadow_sd, throughput);
+ float step_size = volume_stack_step_size(kg, state->volume_stack);
+ if (step_size != FLT_MAX)
+ kernel_volume_shadow_heterogeneous(kg, state, ray, shadow_sd, throughput, step_size);
else
kernel_volume_shadow_homogeneous(kg, state, ray, shadow_sd, throughput);
}
@@ -533,7 +552,8 @@ kernel_volume_integrate_heterogeneous_distance(KernelGlobals *kg,
Ray *ray,
ShaderData *sd,
PathRadiance *L,
- ccl_addr_space float3 *throughput)
+ ccl_addr_space float3 *throughput,
+ const float object_step_size)
{
float3 tp = *throughput;
const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
@@ -541,7 +561,7 @@ kernel_volume_integrate_heterogeneous_distance(KernelGlobals *kg,
/* prepare for stepping */
int max_steps = kernel_data.integrator.volume_max_steps;
float step_offset, step_size;
- kernel_volume_step_init(kg, state, ray->t, &step_size, &step_offset);
+ kernel_volume_step_init(kg, state, object_step_size, ray->t, &step_size, &step_offset);
/* compute coefficients at the start */
float t = 0.0f;
@@ -679,12 +699,13 @@ kernel_volume_integrate(KernelGlobals *kg,
Ray *ray,
PathRadiance *L,
ccl_addr_space float3 *throughput,
- bool heterogeneous)
+ float step_size)
{
shader_setup_from_volume(kg, sd, ray);
- if (heterogeneous)
- return kernel_volume_integrate_heterogeneous_distance(kg, state, ray, sd, L, throughput);
+ if (step_size != FLT_MAX)
+ return kernel_volume_integrate_heterogeneous_distance(
+ kg, state, ray, sd, L, throughput, step_size);
else
return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, true);
}
@@ -735,7 +756,7 @@ ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg,
Ray *ray,
ShaderData *sd,
VolumeSegment *segment,
- bool heterogeneous)
+ const float object_step_size)
{
const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
@@ -743,9 +764,9 @@ ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg,
int max_steps;
float step_size, step_offset;
- if (heterogeneous) {
+ if (object_step_size != FLT_MAX) {
max_steps = kernel_data.integrator.volume_max_steps;
- kernel_volume_step_init(kg, state, ray->t, &step_size, &step_offset);
+ kernel_volume_step_init(kg, state, object_step_size, ray->t, &step_size, &step_offset);
# ifdef __KERNEL_CPU__
/* NOTE: For the branched path tracing it's possible to have direct
diff --git a/intern/cycles/kernel/kernel_work_stealing.h b/intern/cycles/kernel/kernel_work_stealing.h
index 799561a7466..c642d227e4b 100644
--- a/intern/cycles/kernel/kernel_work_stealing.h
+++ b/intern/cycles/kernel/kernel_work_stealing.h
@@ -23,17 +23,41 @@ CCL_NAMESPACE_BEGIN
* Utility functions for work stealing
*/
+/* Map global work index to tile, pixel X/Y and sample. */
+ccl_device_inline void get_work_pixel(ccl_global const WorkTile *tile,
+ uint global_work_index,
+ ccl_private uint *x,
+ ccl_private uint *y,
+ ccl_private uint *sample)
+{
+#ifdef __KERNEL_CUDA__
+ /* Keeping threads for the same pixel together improves performance on CUDA. */
+ uint sample_offset = global_work_index % tile->num_samples;
+ uint pixel_offset = global_work_index / tile->num_samples;
+#else /* __KERNEL_CUDA__ */
+ uint tile_pixels = tile->w * tile->h;
+ uint sample_offset = global_work_index / tile_pixels;
+ uint pixel_offset = global_work_index - sample_offset * tile_pixels;
+#endif /* __KERNEL_CUDA__ */
+ uint y_offset = pixel_offset / tile->w;
+ uint x_offset = pixel_offset - y_offset * tile->w;
+
+ *x = tile->x + x_offset;
+ *y = tile->y + y_offset;
+ *sample = tile->start_sample + sample_offset;
+}
+
#ifdef __KERNEL_OPENCL__
# pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
#endif
#ifdef __SPLIT_KERNEL__
/* Returns true if there is work */
-ccl_device bool get_next_work(KernelGlobals *kg,
- ccl_global uint *work_pools,
- uint total_work_size,
- uint ray_index,
- ccl_private uint *global_work_index)
+ccl_device bool get_next_work_item(KernelGlobals *kg,
+ ccl_global uint *work_pools,
+ uint total_work_size,
+ uint ray_index,
+ ccl_private uint *global_work_index)
{
/* With a small amount of work there may be more threads than work due to
* rounding up of global size, stop such threads immediately. */
@@ -56,31 +80,37 @@ ccl_device bool get_next_work(KernelGlobals *kg,
/* Test if all work for this pool is done. */
return (*global_work_index < total_work_size);
}
-#endif
-/* Map global work index to tile, pixel X/Y and sample. */
-ccl_device_inline void get_work_pixel(ccl_global const WorkTile *tile,
- uint global_work_index,
- ccl_private uint *x,
- ccl_private uint *y,
- ccl_private uint *sample)
+ccl_device bool get_next_work(KernelGlobals *kg,
+ ccl_global uint *work_pools,
+ uint total_work_size,
+ uint ray_index,
+ ccl_private uint *global_work_index)
{
-#ifdef __KERNEL_CUDA__
- /* Keeping threads for the same pixel together improves performance on CUDA. */
- uint sample_offset = global_work_index % tile->num_samples;
- uint pixel_offset = global_work_index / tile->num_samples;
-#else /* __KERNEL_CUDA__ */
- uint tile_pixels = tile->w * tile->h;
- uint sample_offset = global_work_index / tile_pixels;
- uint pixel_offset = global_work_index - sample_offset * tile_pixels;
-#endif /* __KERNEL_CUDA__ */
- uint y_offset = pixel_offset / tile->w;
- uint x_offset = pixel_offset - y_offset * tile->w;
-
- *x = tile->x + x_offset;
- *y = tile->y + y_offset;
- *sample = tile->start_sample + sample_offset;
+ bool got_work = false;
+ if (kernel_data.film.pass_adaptive_aux_buffer) {
+ do {
+ got_work = get_next_work_item(kg, work_pools, total_work_size, ray_index, global_work_index);
+ if (got_work) {
+ ccl_global WorkTile *tile = &kernel_split_params.tile;
+ uint x, y, sample;
+ get_work_pixel(tile, *global_work_index, &x, &y, &sample);
+ uint buffer_offset = (tile->offset + x + y * tile->stride) * kernel_data.film.pass_stride;
+ ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
+ ccl_global float4 *aux = (ccl_global float4 *)(buffer +
+ kernel_data.film.pass_adaptive_aux_buffer);
+ if (aux->w == 0.0f) {
+ break;
+ }
+ }
+ } while (got_work);
+ }
+ else {
+ got_work = get_next_work_item(kg, work_pools, total_work_size, ray_index, global_work_index);
+ }
+ return got_work;
}
+#endif
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernels/cpu/kernel.cpp b/intern/cycles/kernel/kernels/cpu/kernel.cpp
index f2146302a27..8829a14ead5 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel.cpp
@@ -72,7 +72,7 @@ void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t s
assert(0);
}
-void kernel_tex_copy(KernelGlobals *kg, const char *name, void *mem, size_t size)
+void kernel_global_memory_copy(KernelGlobals *kg, const char *name, void *mem, size_t size)
{
if (0) {
}
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
index f5d981fb71a..683f4b88d79 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
@@ -89,5 +89,9 @@ DECLARE_SPLIT_KERNEL_FUNCTION(enqueue_inactive)
DECLARE_SPLIT_KERNEL_FUNCTION(next_iteration_setup)
DECLARE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
DECLARE_SPLIT_KERNEL_FUNCTION(buffer_update)
+DECLARE_SPLIT_KERNEL_FUNCTION(adaptive_stopping)
+DECLARE_SPLIT_KERNEL_FUNCTION(adaptive_filter_x)
+DECLARE_SPLIT_KERNEL_FUNCTION(adaptive_filter_y)
+DECLARE_SPLIT_KERNEL_FUNCTION(adaptive_adjust_samples)
#undef KERNEL_ARCH
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
index 8f311baf010..f87501db258 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
@@ -474,7 +474,7 @@ ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, fl
{
const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
- switch (kernel_tex_type(id)) {
+ switch (info.data_type) {
case IMAGE_DATA_TYPE_HALF:
return TextureInterpolator<half>::interp(info, x, y);
case IMAGE_DATA_TYPE_BYTE:
@@ -498,28 +498,34 @@ ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, fl
}
}
-ccl_device float4 kernel_tex_image_interp_3d(
- KernelGlobals *kg, int id, float x, float y, float z, InterpolationType interp)
+ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg,
+ int id,
+ float3 P,
+ InterpolationType interp)
{
const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
- switch (kernel_tex_type(id)) {
+ if (info.use_transform_3d) {
+ P = transform_point(&info.transform_3d, P);
+ }
+
+ switch (info.data_type) {
case IMAGE_DATA_TYPE_HALF:
- return TextureInterpolator<half>::interp_3d(info, x, y, z, interp);
+ return TextureInterpolator<half>::interp_3d(info, P.x, P.y, P.z, interp);
case IMAGE_DATA_TYPE_BYTE:
- return TextureInterpolator<uchar>::interp_3d(info, x, y, z, interp);
+ return TextureInterpolator<uchar>::interp_3d(info, P.x, P.y, P.z, interp);
case IMAGE_DATA_TYPE_USHORT:
- return TextureInterpolator<uint16_t>::interp_3d(info, x, y, z, interp);
+ return TextureInterpolator<uint16_t>::interp_3d(info, P.x, P.y, P.z, interp);
case IMAGE_DATA_TYPE_FLOAT:
- return TextureInterpolator<float>::interp_3d(info, x, y, z, interp);
+ return TextureInterpolator<float>::interp_3d(info, P.x, P.y, P.z, interp);
case IMAGE_DATA_TYPE_HALF4:
- return TextureInterpolator<half4>::interp_3d(info, x, y, z, interp);
+ return TextureInterpolator<half4>::interp_3d(info, P.x, P.y, P.z, interp);
case IMAGE_DATA_TYPE_BYTE4:
- return TextureInterpolator<uchar4>::interp_3d(info, x, y, z, interp);
+ return TextureInterpolator<uchar4>::interp_3d(info, P.x, P.y, P.z, interp);
case IMAGE_DATA_TYPE_USHORT4:
- return TextureInterpolator<ushort4>::interp_3d(info, x, y, z, interp);
+ return TextureInterpolator<ushort4>::interp_3d(info, P.x, P.y, P.z, interp);
case IMAGE_DATA_TYPE_FLOAT4:
- return TextureInterpolator<float4>::interp_3d(info, x, y, z, interp);
+ return TextureInterpolator<float4>::interp_3d(info, P.x, P.y, P.z, interp);
default:
assert(0);
return make_float4(
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
index 9ca3f46b5b6..091e53cfd83 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
@@ -20,6 +20,7 @@
* simply includes this file without worry of copying actual implementation over.
*/
+// clang-format off
#include "kernel/kernel_compat_cpu.h"
#ifndef KERNEL_STUB
@@ -58,6 +59,10 @@
# include "kernel/split/kernel_next_iteration_setup.h"
# include "kernel/split/kernel_indirect_subsurface.h"
# include "kernel/split/kernel_buffer_update.h"
+# include "kernel/split/kernel_adaptive_stopping.h"
+# include "kernel/split/kernel_adaptive_filter_x.h"
+# include "kernel/split/kernel_adaptive_filter_y.h"
+# include "kernel/split/kernel_adaptive_adjust_samples.h"
# endif /* __SPLIT_KERNEL__ */
#else
# define STUB_ASSERT(arch, name) \
@@ -67,6 +72,7 @@
# include "kernel/split/kernel_data_init.h"
# endif /* __SPLIT_KERNEL__ */
#endif /* KERNEL_STUB */
+// clang-format on
CCL_NAMESPACE_BEGIN
@@ -204,6 +210,10 @@ DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(enqueue_inactive, uint)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint)
DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint)
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_stopping)
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_filter_x)
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_filter_y)
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_adjust_samples)
#endif /* __SPLIT_KERNEL__ */
#undef KERNEL_STUB
diff --git a/intern/cycles/kernel/kernels/cuda/filter.cu b/intern/cycles/kernel/kernels/cuda/filter.cu
index fbb773533ce..22fd5ea5634 100644
--- a/intern/cycles/kernel/kernels/cuda/filter.cu
+++ b/intern/cycles/kernel/kernels/cuda/filter.cu
@@ -57,9 +57,9 @@ kernel_cuda_filter_convert_to_rgb(float *rgb, float *buf, int sw, int sh, int st
if (num_inputs > 0) {
float *in = buf + x * pass_stride + (y * stride + pass_offset.x) / sizeof(float);
float *out = rgb + (x + y * sw) * 3;
- out[0] = in[0];
- out[1] = in[1];
- out[2] = in[2];
+ out[0] = clamp(in[0], 0.0f, 10000.0f);
+ out[1] = clamp(in[1], 0.0f, 10000.0f);
+ out[2] = clamp(in[2], 0.0f, 10000.0f);
}
if (num_inputs > 1) {
float *in = buf + x * pass_stride + (y * stride + pass_offset.y) / sizeof(float);
diff --git a/intern/cycles/kernel/kernels/cuda/kernel.cu b/intern/cycles/kernel/kernels/cuda/kernel.cu
index af311027f78..c4c810c6a82 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel.cu
+++ b/intern/cycles/kernel/kernels/cuda/kernel.cu
@@ -33,6 +33,7 @@
#include "kernel/kernel_path_branched.h"
#include "kernel/kernel_bake.h"
#include "kernel/kernel_work_stealing.h"
+#include "kernel/kernel_adaptive_sampling.h"
/* kernels */
extern "C" __global__ void
@@ -83,6 +84,75 @@ kernel_cuda_branched_path_trace(WorkTile *tile, uint total_work_size)
extern "C" __global__ void
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
+kernel_cuda_adaptive_stopping(WorkTile *tile, int sample, uint total_work_size)
+{
+ int work_index = ccl_global_id(0);
+ bool thread_is_active = work_index < total_work_size;
+ KernelGlobals kg;
+ if(thread_is_active && kernel_data.film.pass_adaptive_aux_buffer) {
+ uint x = tile->x + work_index % tile->w;
+ uint y = tile->y + work_index / tile->w;
+ int index = tile->offset + x + y * tile->stride;
+ ccl_global float *buffer = tile->buffer + index * kernel_data.film.pass_stride;
+ kernel_do_adaptive_stopping(&kg, buffer, sample);
+ }
+}
+
+extern "C" __global__ void
+CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
+kernel_cuda_adaptive_filter_x(WorkTile *tile, int sample, uint)
+{
+ KernelGlobals kg;
+ if(kernel_data.film.pass_adaptive_aux_buffer && sample > kernel_data.integrator.adaptive_min_samples) {
+ if(ccl_global_id(0) < tile->h) {
+ int y = tile->y + ccl_global_id(0);
+ kernel_do_adaptive_filter_x(&kg, y, tile);
+ }
+ }
+}
+
+extern "C" __global__ void
+CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
+kernel_cuda_adaptive_filter_y(WorkTile *tile, int sample, uint)
+{
+ KernelGlobals kg;
+ if(kernel_data.film.pass_adaptive_aux_buffer && sample > kernel_data.integrator.adaptive_min_samples) {
+ if(ccl_global_id(0) < tile->w) {
+ int x = tile->x + ccl_global_id(0);
+ kernel_do_adaptive_filter_y(&kg, x, tile);
+ }
+ }
+}
+
+extern "C" __global__ void
+CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
+kernel_cuda_adaptive_scale_samples(WorkTile *tile, int start_sample, int sample, uint total_work_size)
+{
+ if(kernel_data.film.pass_adaptive_aux_buffer) {
+ int work_index = ccl_global_id(0);
+ bool thread_is_active = work_index < total_work_size;
+ KernelGlobals kg;
+ if(thread_is_active) {
+ uint x = tile->x + work_index % tile->w;
+ uint y = tile->y + work_index / tile->w;
+ int index = tile->offset + x + y * tile->stride;
+ ccl_global float *buffer = tile->buffer + index * kernel_data.film.pass_stride;
+ if(buffer[kernel_data.film.pass_sample_count] < 0.0f) {
+ buffer[kernel_data.film.pass_sample_count] = -buffer[kernel_data.film.pass_sample_count];
+ float sample_multiplier = sample / max((float)start_sample + 1.0f, buffer[kernel_data.film.pass_sample_count]);
+ if(sample_multiplier != 1.0f) {
+ kernel_adaptive_post_adjust(&kg, buffer, sample_multiplier);
+ }
+ }
+ else {
+ kernel_adaptive_post_adjust(&kg, buffer, sample / (sample - 1.0f));
+ }
+ }
+ }
+}
+
+extern "C" __global__ void
+CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
kernel_cuda_convert_to_byte(uchar4 *rgba, float *buffer, float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride)
{
int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
diff --git a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
index 7c68f08ea10..1d425d132a1 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
+++ b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
@@ -124,7 +124,7 @@ ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, fl
CUtexObject tex = (CUtexObject)info.data;
/* float4, byte4, ushort4 and half4 */
- const int texture_type = kernel_tex_type(id);
+ const int texture_type = info.data_type;
if (texture_type == IMAGE_DATA_TYPE_FLOAT4 || texture_type == IMAGE_DATA_TYPE_BYTE4 ||
texture_type == IMAGE_DATA_TYPE_HALF4 || texture_type == IMAGE_DATA_TYPE_USHORT4) {
if (info.interpolation == INTERPOLATION_CUBIC) {
@@ -149,14 +149,25 @@ ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, fl
}
}
-ccl_device float4 kernel_tex_image_interp_3d(
- KernelGlobals *kg, int id, float x, float y, float z, InterpolationType interp)
+ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg,
+ int id,
+ float3 P,
+ InterpolationType interp)
{
const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
+
+ if (info.use_transform_3d) {
+ P = transform_point(&info.transform_3d, P);
+ }
+
+ const float x = P.x;
+ const float y = P.y;
+ const float z = P.z;
+
CUtexObject tex = (CUtexObject)info.data;
uint interpolation = (interp == INTERPOLATION_NONE) ? info.interpolation : interp;
- const int texture_type = kernel_tex_type(id);
+ const int texture_type = info.data_type;
if (texture_type == IMAGE_DATA_TYPE_FLOAT4 || texture_type == IMAGE_DATA_TYPE_BYTE4 ||
texture_type == IMAGE_DATA_TYPE_HALF4 || texture_type == IMAGE_DATA_TYPE_USHORT4) {
if (interpolation == INTERPOLATION_CUBIC) {
diff --git a/intern/cycles/kernel/kernels/cuda/kernel_split.cu b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
index 43b3d0aa0e6..95ad7599cf1 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel_split.cu
+++ b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
@@ -43,6 +43,10 @@
#include "kernel/split/kernel_next_iteration_setup.h"
#include "kernel/split/kernel_indirect_subsurface.h"
#include "kernel/split/kernel_buffer_update.h"
+#include "kernel/split/kernel_adaptive_stopping.h"
+#include "kernel/split/kernel_adaptive_filter_x.h"
+#include "kernel/split/kernel_adaptive_filter_y.h"
+#include "kernel/split/kernel_adaptive_adjust_samples.h"
#include "kernel/kernel_film.h"
@@ -121,6 +125,10 @@ DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(enqueue_inactive, uint)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint)
DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint)
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_stopping)
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_filter_x)
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_filter_y)
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_adjust_samples)
extern "C" __global__ void
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_adjust_samples.cl b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_adjust_samples.cl
new file mode 100644
index 00000000000..ebdb99d4730
--- /dev/null
+++ b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_adjust_samples.cl
@@ -0,0 +1,23 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel/kernel_compat_opencl.h"
+#include "kernel/split/kernel_split_common.h"
+#include "kernel/split/kernel_adaptive_adjust_samples.h"
+
+#define KERNEL_NAME adaptive_adjust_samples
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_x.cl b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_x.cl
new file mode 100644
index 00000000000..76d82d4184e
--- /dev/null
+++ b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_x.cl
@@ -0,0 +1,23 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel/kernel_compat_opencl.h"
+#include "kernel/split/kernel_split_common.h"
+#include "kernel/split/kernel_adaptive_filter_x.h"
+
+#define KERNEL_NAME adaptive_filter_x
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_y.cl b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_y.cl
new file mode 100644
index 00000000000..1e6d15ba0f2
--- /dev/null
+++ b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_y.cl
@@ -0,0 +1,23 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel/kernel_compat_opencl.h"
+#include "kernel/split/kernel_split_common.h"
+#include "kernel/split/kernel_adaptive_filter_y.h"
+
+#define KERNEL_NAME adaptive_filter_y
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_stopping.cl b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_stopping.cl
new file mode 100644
index 00000000000..51de0059667
--- /dev/null
+++ b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_stopping.cl
@@ -0,0 +1,23 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel/kernel_compat_opencl.h"
+#include "kernel/split/kernel_split_common.h"
+#include "kernel/split/kernel_adaptive_stopping.h"
+
+#define KERNEL_NAME adaptive_stopping
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h
index b6390679331..89fcb0ae60f 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h
+++ b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h
@@ -47,7 +47,7 @@ ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg,
int id,
int offset)
{
- const int texture_type = kernel_tex_type(id);
+ const int texture_type = info->data_type;
/* Float4 */
if (texture_type == IMAGE_DATA_TYPE_FLOAT4) {
@@ -202,11 +202,19 @@ ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, fl
}
}
-ccl_device float4
-kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z, int interp)
+ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float3 P, int interp)
{
const ccl_global TextureInfo *info = kernel_tex_info(kg, id);
+ if (info->use_transform_3d) {
+ Transform tfm = info->transform_3d;
+ P = transform_point(&tfm, P);
+ }
+
+ const float x = P.x;
+ const float y = P.y;
+ const float z = P.z;
+
if (info->extension == EXTENSION_CLIP) {
if (x < 0.0f || y < 0.0f || z < 0.0f || x > 1.0f || y > 1.0f || z > 1.0f) {
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl b/intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl
index 6041f13b52b..c3b7b09460a 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl
@@ -28,3 +28,7 @@
#include "kernel/kernels/opencl/kernel_next_iteration_setup.cl"
#include "kernel/kernels/opencl/kernel_indirect_subsurface.cl"
#include "kernel/kernels/opencl/kernel_buffer_update.cl"
+#include "kernel/kernels/opencl/kernel_adaptive_stopping.cl"
+#include "kernel/kernels/opencl/kernel_adaptive_filter_x.cl"
+#include "kernel/kernels/opencl/kernel_adaptive_filter_y.cl"
+#include "kernel/kernels/opencl/kernel_adaptive_adjust_samples.cl"
diff --git a/intern/cycles/kernel/osl/CMakeLists.txt b/intern/cycles/kernel/osl/CMakeLists.txt
index 5be5bd181ec..fc0c845fd4f 100644
--- a/intern/cycles/kernel/osl/CMakeLists.txt
+++ b/intern/cycles/kernel/osl/CMakeLists.txt
@@ -33,6 +33,9 @@ set(LIB
${LLVM_LIBRARY}
)
+# OSL and LLVM are built without RTTI
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${RTTI_DISABLE_FLAGS}")
+
include_directories(${INC})
include_directories(SYSTEM ${INC_SYS})
diff --git a/intern/cycles/kernel/osl/background.cpp b/intern/cycles/kernel/osl/background.cpp
index b395227845d..3f9de5ab33d 100644
--- a/intern/cycles/kernel/osl/background.cpp
+++ b/intern/cycles/kernel/osl/background.cpp
@@ -36,9 +36,11 @@
#include "kernel/osl/osl_closures.h"
+// clang-format off
#include "kernel/kernel_compat_cpu.h"
#include "kernel/closure/alloc.h"
#include "kernel/closure/emissive.h"
+// clang-format on
CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp b/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp
index c5edc7c9be3..76a2e41abfa 100644
--- a/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp
+++ b/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp
@@ -37,10 +37,12 @@
#include "kernel/kernel_compat_cpu.h"
#include "kernel/osl/osl_closures.h"
+// clang-format off
#include "kernel/kernel_types.h"
#include "kernel/kernel_montecarlo.h"
#include "kernel/closure/alloc.h"
#include "kernel/closure/bsdf_diffuse_ramp.h"
+// clang-format on
CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp b/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp
index 4b7e59ff932..b78dc8a3a67 100644
--- a/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp
+++ b/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp
@@ -37,9 +37,11 @@
#include "kernel/kernel_compat_cpu.h"
#include "kernel/osl/osl_closures.h"
+// clang-format off
#include "kernel/kernel_types.h"
#include "kernel/closure/alloc.h"
#include "kernel/closure/bsdf_phong_ramp.h"
+// clang-format on
CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/kernel/osl/emissive.cpp b/intern/cycles/kernel/osl/emissive.cpp
index c29ddb13e2e..d656723bac2 100644
--- a/intern/cycles/kernel/osl/emissive.cpp
+++ b/intern/cycles/kernel/osl/emissive.cpp
@@ -36,10 +36,12 @@
#include "kernel/osl/osl_closures.h"
+// clang-format off
#include "kernel/kernel_compat_cpu.h"
#include "kernel/kernel_types.h"
#include "kernel/closure/alloc.h"
#include "kernel/closure/emissive.h"
+// clang-format on
CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/kernel/osl/osl_bssrdf.cpp b/intern/cycles/kernel/osl/osl_bssrdf.cpp
index dd52c33071c..c5ca8616fbd 100644
--- a/intern/cycles/kernel/osl/osl_bssrdf.cpp
+++ b/intern/cycles/kernel/osl/osl_bssrdf.cpp
@@ -35,6 +35,7 @@
#include "kernel/kernel_compat_cpu.h"
#include "kernel/osl/osl_closures.h"
+// clang-format off
#include "kernel/kernel_types.h"
#include "kernel/kernel_montecarlo.h"
@@ -43,6 +44,7 @@
#include "kernel/closure/bsdf_diffuse.h"
#include "kernel/closure/bsdf_principled_diffuse.h"
#include "kernel/closure/bssrdf.h"
+// clang-format on
CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/kernel/osl/osl_closures.cpp b/intern/cycles/kernel/osl/osl_closures.cpp
index 463a65f21a0..ea5e00ec23c 100644
--- a/intern/cycles/kernel/osl/osl_closures.cpp
+++ b/intern/cycles/kernel/osl/osl_closures.cpp
@@ -39,6 +39,7 @@
#include "util/util_math.h"
#include "util/util_param.h"
+// clang-format off
#include "kernel/kernel_types.h"
#include "kernel/kernel_compat_cpu.h"
#include "kernel/split/kernel_split_data_types.h"
@@ -63,6 +64,7 @@
#include "kernel/closure/bsdf_principled_diffuse.h"
#include "kernel/closure/bsdf_principled_sheen.h"
#include "kernel/closure/volume.h"
+// clang-format on
CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/kernel/osl/osl_closures.h b/intern/cycles/kernel/osl/osl_closures.h
index d3db6b71f5c..d12afdb80dd 100644
--- a/intern/cycles/kernel/osl/osl_closures.h
+++ b/intern/cycles/kernel/osl/osl_closures.h
@@ -33,12 +33,12 @@
#ifndef __OSL_CLOSURES_H__
#define __OSL_CLOSURES_H__
-#include "util/util_types.h"
#include "kernel/kernel_types.h"
+#include "util/util_types.h"
+#include <OSL/genclosure.h>
#include <OSL/oslclosure.h>
#include <OSL/oslexec.h>
-#include <OSL/genclosure.h>
CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/kernel/osl/osl_globals.h b/intern/cycles/kernel/osl/osl_globals.h
index 0e6c8d21534..c06c9abd4c1 100644
--- a/intern/cycles/kernel/osl/osl_globals.h
+++ b/intern/cycles/kernel/osl/osl_globals.h
@@ -27,8 +27,8 @@
# include "util/util_map.h"
# include "util/util_param.h"
# include "util/util_thread.h"
-# include "util/util_vector.h"
# include "util/util_unique_ptr.h"
+# include "util/util_vector.h"
# ifndef WIN32
using std::isfinite;
diff --git a/intern/cycles/kernel/osl/osl_services.cpp b/intern/cycles/kernel/osl/osl_services.cpp
index 767bd7702ae..2857de533f3 100644
--- a/intern/cycles/kernel/osl/osl_services.cpp
+++ b/intern/cycles/kernel/osl/osl_services.cpp
@@ -39,6 +39,7 @@
#include "util/util_logging.h"
#include "util/util_string.h"
+// clang-format off
#include "kernel/kernel_compat_cpu.h"
#include "kernel/split/kernel_split_data_types.h"
#include "kernel/kernel_globals.h"
@@ -56,6 +57,7 @@
#include "kernel/kernel_projection.h"
#include "kernel/kernel_accumulate.h"
#include "kernel/kernel_shader.h"
+// clang-format on
CCL_NAMESPACE_BEGIN
@@ -1220,8 +1222,8 @@ bool OSLRenderServices::texture3d(ustring filename,
ShaderData *sd = (ShaderData *)(sg->renderstate);
KernelGlobals *kernel_globals = sd->osl_globals;
int slot = handle->svm_slot;
- float4 rgba = kernel_tex_image_interp_3d(
- kernel_globals, slot, P.x, P.y, P.z, INTERPOLATION_NONE);
+ float3 P_float3 = make_float3(P.x, P.y, P.z);
+ float4 rgba = kernel_tex_image_interp_3d(kernel_globals, slot, P_float3, INTERPOLATION_NONE);
result[0] = rgba[0];
if (nchannels > 1)
diff --git a/intern/cycles/kernel/osl/osl_services.h b/intern/cycles/kernel/osl/osl_services.h
index 469c5188730..d32dace23bf 100644
--- a/intern/cycles/kernel/osl/osl_services.h
+++ b/intern/cycles/kernel/osl/osl_services.h
@@ -25,8 +25,8 @@
* attributes.
*/
-#include <OSL/oslexec.h>
#include <OSL/oslclosure.h>
+#include <OSL/oslexec.h>
#ifdef WITH_PTEX
class PtexCache;
diff --git a/intern/cycles/kernel/osl/osl_shader.cpp b/intern/cycles/kernel/osl/osl_shader.cpp
index db5ad06d3fc..2318813949e 100644
--- a/intern/cycles/kernel/osl/osl_shader.cpp
+++ b/intern/cycles/kernel/osl/osl_shader.cpp
@@ -16,6 +16,7 @@
#include <OSL/oslexec.h>
+// clang-format off
#include "kernel/kernel_compat_cpu.h"
#include "kernel/kernel_montecarlo.h"
#include "kernel/kernel_types.h"
@@ -28,6 +29,7 @@
#include "kernel/osl/osl_globals.h"
#include "kernel/osl/osl_services.h"
#include "kernel/osl/osl_shader.h"
+// clang-format on
#include "util/util_foreach.h"
@@ -382,10 +384,6 @@ int OSLShader::find_attribute(KernelGlobals *kg,
{
/* for OSL, a hash map is used to lookup the attribute by name. */
int object = sd->object * ATTR_PRIM_TYPES;
-#ifdef __HAIR__
- if (sd->type & PRIMITIVE_ALL_CURVE)
- object += ATTR_PRIM_CURVE;
-#endif
OSLGlobals::AttributeMap &attr_map = kg->osl->attribute_map[object];
ustring stdname(std::string("geom:") +
diff --git a/intern/cycles/kernel/shaders/CMakeLists.txt b/intern/cycles/kernel/shaders/CMakeLists.txt
index f4258da70d3..9dcedc9ba19 100644
--- a/intern/cycles/kernel/shaders/CMakeLists.txt
+++ b/intern/cycles/kernel/shaders/CMakeLists.txt
@@ -78,6 +78,7 @@ set(SRC_OSL
node_value.osl
node_vector_curves.osl
node_vector_math.osl
+ node_vector_rotate.osl
node_vector_transform.osl
node_velvet_bsdf.osl
node_vertex_color.osl
@@ -95,16 +96,19 @@ set(SRC_OSL
node_rgb_to_bw.osl
)
+# The headers that OSL ships differs per release so we can not
+# hardcode this.
+file(GLOB SRC_OSL_HEADER_DIST ${OSL_SHADER_DIR}/*.h)
+
set(SRC_OSL_HEADERS
node_color.h
node_fresnel.h
node_hash.h
+ node_math.h
node_noise.h
node_ramp_util.h
- stdosl.h
- oslutil.h
- vector2.h
- vector4.h
+ stdcycles.h
+ ${SRC_OSL_HEADER_DIST}
)
set(SRC_OSO
@@ -119,7 +123,7 @@ foreach(_file ${SRC_OSL})
string(REPLACE ${CMAKE_SOURCE_DIR} ${CMAKE_BINARY_DIR} _OSO_FILE ${_OSO_FILE})
add_custom_command(
OUTPUT ${_OSO_FILE}
- COMMAND ${OSL_COMPILER} -q -O2 -I"${CMAKE_CURRENT_SOURCE_DIR}" -o ${_OSO_FILE} ${_OSL_FILE}
+ COMMAND ${OSL_COMPILER} -q -O2 -I"${CMAKE_CURRENT_SOURCE_DIR}" -I"${OSL_SHADER_DIR}" -o ${_OSO_FILE} ${_OSL_FILE}
DEPENDS ${_OSL_FILE} ${SRC_OSL_HEADERS} ${OSL_COMPILER})
list(APPEND SRC_OSO
${_OSO_FILE}
diff --git a/intern/cycles/kernel/shaders/node_absorption_volume.osl b/intern/cycles/kernel/shaders/node_absorption_volume.osl
index e99bd254666..37ccc4c969f 100644
--- a/intern/cycles/kernel/shaders/node_absorption_volume.osl
+++ b/intern/cycles/kernel/shaders/node_absorption_volume.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_absorption_volume(color Color = color(0.8, 0.8, 0.8),
float Density = 1.0,
diff --git a/intern/cycles/kernel/shaders/node_add_closure.osl b/intern/cycles/kernel/shaders/node_add_closure.osl
index 077e2735e61..27ecc9ef0c2 100644
--- a/intern/cycles/kernel/shaders/node_add_closure.osl
+++ b/intern/cycles/kernel/shaders/node_add_closure.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_add_closure(closure color Closure1 = 0,
closure color Closure2 = 0,
diff --git a/intern/cycles/kernel/shaders/node_ambient_occlusion.osl b/intern/cycles/kernel/shaders/node_ambient_occlusion.osl
index 7bf28719e78..22d245d0698 100644
--- a/intern/cycles/kernel/shaders/node_ambient_occlusion.osl
+++ b/intern/cycles/kernel/shaders/node_ambient_occlusion.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_ambient_occlusion(color ColorIn = color(1.0, 1.0, 1.0),
int samples = 16,
diff --git a/intern/cycles/kernel/shaders/node_anisotropic_bsdf.osl b/intern/cycles/kernel/shaders/node_anisotropic_bsdf.osl
index 165c09eb8e0..739cd375ab2 100644
--- a/intern/cycles/kernel/shaders/node_anisotropic_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_anisotropic_bsdf.osl
@@ -13,8 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_anisotropic_bsdf(color Color = 0.0,
string distribution = "GGX",
diff --git a/intern/cycles/kernel/shaders/node_attribute.osl b/intern/cycles/kernel/shaders/node_attribute.osl
index 336543cc130..abec8ebfbf0 100644
--- a/intern/cycles/kernel/shaders/node_attribute.osl
+++ b/intern/cycles/kernel/shaders/node_attribute.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_attribute(string bump_offset = "center",
string name = "",
diff --git a/intern/cycles/kernel/shaders/node_background.osl b/intern/cycles/kernel/shaders/node_background.osl
index 6ded0d2c65c..3f45db751b3 100644
--- a/intern/cycles/kernel/shaders/node_background.osl
+++ b/intern/cycles/kernel/shaders/node_background.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_background(color Color = 0.8,
float Strength = 1.0,
diff --git a/intern/cycles/kernel/shaders/node_bevel.osl b/intern/cycles/kernel/shaders/node_bevel.osl
index 189c20c52e7..e87ddab716d 100644
--- a/intern/cycles/kernel/shaders/node_bevel.osl
+++ b/intern/cycles/kernel/shaders/node_bevel.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_bevel(int samples = 4,
float Radius = 0.05,
diff --git a/intern/cycles/kernel/shaders/node_blackbody.osl b/intern/cycles/kernel/shaders/node_blackbody.osl
index 8a24bf1e28b..741efae755d 100644
--- a/intern/cycles/kernel/shaders/node_blackbody.osl
+++ b/intern/cycles/kernel/shaders/node_blackbody.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_blackbody(float Temperature = 1200.0, output color Color = 0.0)
{
diff --git a/intern/cycles/kernel/shaders/node_brick_texture.osl b/intern/cycles/kernel/shaders/node_brick_texture.osl
index 30644ef2ff3..075a324c730 100644
--- a/intern/cycles/kernel/shaders/node_brick_texture.osl
+++ b/intern/cycles/kernel/shaders/node_brick_texture.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
/* Brick */
diff --git a/intern/cycles/kernel/shaders/node_brightness.osl b/intern/cycles/kernel/shaders/node_brightness.osl
index 2defbc4b1db..019edfb79a3 100644
--- a/intern/cycles/kernel/shaders/node_brightness.osl
+++ b/intern/cycles/kernel/shaders/node_brightness.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_brightness(color ColorIn = 0.8,
float Bright = 0.0,
diff --git a/intern/cycles/kernel/shaders/node_bump.osl b/intern/cycles/kernel/shaders/node_bump.osl
index 3697bb37fd9..811182f40b5 100644
--- a/intern/cycles/kernel/shaders/node_bump.osl
+++ b/intern/cycles/kernel/shaders/node_bump.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
/* "Bump Mapping Unparameterized Surfaces on the GPU"
* Morten S. Mikkelsen, 2010 */
diff --git a/intern/cycles/kernel/shaders/node_camera.osl b/intern/cycles/kernel/shaders/node_camera.osl
index 833e9e775fe..45ca50c6e1e 100644
--- a/intern/cycles/kernel/shaders/node_camera.osl
+++ b/intern/cycles/kernel/shaders/node_camera.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_camera(output vector ViewVector = vector(0.0, 0.0, 0.0),
output float ViewZDepth = 0.0,
diff --git a/intern/cycles/kernel/shaders/node_checker_texture.osl b/intern/cycles/kernel/shaders/node_checker_texture.osl
index e05cf20099f..d6a30dbdb40 100644
--- a/intern/cycles/kernel/shaders/node_checker_texture.osl
+++ b/intern/cycles/kernel/shaders/node_checker_texture.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
/* Checker */
diff --git a/intern/cycles/kernel/shaders/node_clamp.osl b/intern/cycles/kernel/shaders/node_clamp.osl
index d689ba7f809..ce9392a0d98 100644
--- a/intern/cycles/kernel/shaders/node_clamp.osl
+++ b/intern/cycles/kernel/shaders/node_clamp.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_clamp(string type = "minmax",
float Value = 1.0,
diff --git a/intern/cycles/kernel/shaders/node_combine_hsv.osl b/intern/cycles/kernel/shaders/node_combine_hsv.osl
index 1658cf3d774..05e502b5bc1 100644
--- a/intern/cycles/kernel/shaders/node_combine_hsv.osl
+++ b/intern/cycles/kernel/shaders/node_combine_hsv.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_combine_hsv(float H = 0.0, float S = 0.0, float V = 0.0, output color Color = 0.8)
{
diff --git a/intern/cycles/kernel/shaders/node_combine_rgb.osl b/intern/cycles/kernel/shaders/node_combine_rgb.osl
index aaa95e9c5af..036f371eb5c 100644
--- a/intern/cycles/kernel/shaders/node_combine_rgb.osl
+++ b/intern/cycles/kernel/shaders/node_combine_rgb.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_combine_rgb(float R = 0.0, float G = 0.0, float B = 0.0, output color Image = 0.8)
{
diff --git a/intern/cycles/kernel/shaders/node_combine_xyz.osl b/intern/cycles/kernel/shaders/node_combine_xyz.osl
index 4ab49168704..4ebd86b605c 100644
--- a/intern/cycles/kernel/shaders/node_combine_xyz.osl
+++ b/intern/cycles/kernel/shaders/node_combine_xyz.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_combine_xyz(float X = 0.0, float Y = 0.0, float Z = 0.0, output vector Vector = 0.8)
{
diff --git a/intern/cycles/kernel/shaders/node_convert_from_color.osl b/intern/cycles/kernel/shaders/node_convert_from_color.osl
index 7ea9a1e4fb3..c3f0e118844 100644
--- a/intern/cycles/kernel/shaders/node_convert_from_color.osl
+++ b/intern/cycles/kernel/shaders/node_convert_from_color.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_convert_from_color(color value_color = 0.0,
output string value_string = "",
diff --git a/intern/cycles/kernel/shaders/node_convert_from_float.osl b/intern/cycles/kernel/shaders/node_convert_from_float.osl
index 13b5dea0838..61a15a1c2b0 100644
--- a/intern/cycles/kernel/shaders/node_convert_from_float.osl
+++ b/intern/cycles/kernel/shaders/node_convert_from_float.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_convert_from_float(float value_float = 0.0,
output string value_string = "",
diff --git a/intern/cycles/kernel/shaders/node_convert_from_int.osl b/intern/cycles/kernel/shaders/node_convert_from_int.osl
index a59e025d822..2e6a99b2765 100644
--- a/intern/cycles/kernel/shaders/node_convert_from_int.osl
+++ b/intern/cycles/kernel/shaders/node_convert_from_int.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_convert_from_int(int value_int = 0,
output string value_string = "",
diff --git a/intern/cycles/kernel/shaders/node_convert_from_normal.osl b/intern/cycles/kernel/shaders/node_convert_from_normal.osl
index 7bdd94d1941..64201d63190 100644
--- a/intern/cycles/kernel/shaders/node_convert_from_normal.osl
+++ b/intern/cycles/kernel/shaders/node_convert_from_normal.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_convert_from_normal(normal value_normal = normal(0.0, 0.0, 0.0),
output string value_string = "",
diff --git a/intern/cycles/kernel/shaders/node_convert_from_point.osl b/intern/cycles/kernel/shaders/node_convert_from_point.osl
index 79c1719e7a7..11d64f76d6f 100644
--- a/intern/cycles/kernel/shaders/node_convert_from_point.osl
+++ b/intern/cycles/kernel/shaders/node_convert_from_point.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_convert_from_point(point value_point = point(0.0, 0.0, 0.0),
output string value_string = "",
diff --git a/intern/cycles/kernel/shaders/node_convert_from_string.osl b/intern/cycles/kernel/shaders/node_convert_from_string.osl
index 48d894a6b3e..b496c4e6d05 100644
--- a/intern/cycles/kernel/shaders/node_convert_from_string.osl
+++ b/intern/cycles/kernel/shaders/node_convert_from_string.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_convert_from_string(string value_string = "",
output color value_color = color(0.0, 0.0, 0.0),
diff --git a/intern/cycles/kernel/shaders/node_convert_from_vector.osl b/intern/cycles/kernel/shaders/node_convert_from_vector.osl
index 92ab2313bcb..820faabd32b 100644
--- a/intern/cycles/kernel/shaders/node_convert_from_vector.osl
+++ b/intern/cycles/kernel/shaders/node_convert_from_vector.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_convert_from_vector(vector value_vector = vector(0.0, 0.0, 0.0),
output string value_string = "",
diff --git a/intern/cycles/kernel/shaders/node_diffuse_bsdf.osl b/intern/cycles/kernel/shaders/node_diffuse_bsdf.osl
index bd5554b838a..f5886f534eb 100644
--- a/intern/cycles/kernel/shaders/node_diffuse_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_diffuse_bsdf.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_diffuse_bsdf(color Color = 0.8,
float Roughness = 0.0,
diff --git a/intern/cycles/kernel/shaders/node_displacement.osl b/intern/cycles/kernel/shaders/node_displacement.osl
index a1f3b7b7737..44a4828d511 100644
--- a/intern/cycles/kernel/shaders/node_displacement.osl
+++ b/intern/cycles/kernel/shaders/node_displacement.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_displacement(string space = "object",
float Height = 0.0,
diff --git a/intern/cycles/kernel/shaders/node_emission.osl b/intern/cycles/kernel/shaders/node_emission.osl
index 57973f57ac6..f289a9711d9 100644
--- a/intern/cycles/kernel/shaders/node_emission.osl
+++ b/intern/cycles/kernel/shaders/node_emission.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_emission(color Color = 0.8, float Strength = 1.0, output closure color Emission = 0)
{
diff --git a/intern/cycles/kernel/shaders/node_environment_texture.osl b/intern/cycles/kernel/shaders/node_environment_texture.osl
index 43f607f7cb0..d04743eb368 100644
--- a/intern/cycles/kernel/shaders/node_environment_texture.osl
+++ b/intern/cycles/kernel/shaders/node_environment_texture.osl
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include "stdosl.h"
#include "node_color.h"
+#include "stdcycles.h"
vector environment_texture_direction_to_equirectangular(vector dir)
{
diff --git a/intern/cycles/kernel/shaders/node_fresnel.osl b/intern/cycles/kernel/shaders/node_fresnel.osl
index 89250db40f3..cff084c344d 100644
--- a/intern/cycles/kernel/shaders/node_fresnel.osl
+++ b/intern/cycles/kernel/shaders/node_fresnel.osl
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include "stdosl.h"
#include "node_fresnel.h"
+#include "stdcycles.h"
shader node_fresnel(float IOR = 1.45, normal Normal = N, output float Fac = 0.0)
{
diff --git a/intern/cycles/kernel/shaders/node_gamma.osl b/intern/cycles/kernel/shaders/node_gamma.osl
index 9b9c17dc8af..0816df64fe8 100644
--- a/intern/cycles/kernel/shaders/node_gamma.osl
+++ b/intern/cycles/kernel/shaders/node_gamma.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_gamma(color ColorIn = 0.8, float Gamma = 1.0, output color ColorOut = 0.0)
{
diff --git a/intern/cycles/kernel/shaders/node_geometry.osl b/intern/cycles/kernel/shaders/node_geometry.osl
index 3cf2e974022..55cda71db1b 100644
--- a/intern/cycles/kernel/shaders/node_geometry.osl
+++ b/intern/cycles/kernel/shaders/node_geometry.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_geometry(normal NormalIn = N,
string bump_offset = "center",
diff --git a/intern/cycles/kernel/shaders/node_glass_bsdf.osl b/intern/cycles/kernel/shaders/node_glass_bsdf.osl
index c0b8a002536..0042d573f8d 100644
--- a/intern/cycles/kernel/shaders/node_glass_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_glass_bsdf.osl
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include "stdosl.h"
#include "node_fresnel.h"
+#include "stdcycles.h"
shader node_glass_bsdf(color Color = 0.8,
string distribution = "sharp",
diff --git a/intern/cycles/kernel/shaders/node_glossy_bsdf.osl b/intern/cycles/kernel/shaders/node_glossy_bsdf.osl
index 2d40ee8d3f6..c73604d3650 100644
--- a/intern/cycles/kernel/shaders/node_glossy_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_glossy_bsdf.osl
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include "stdosl.h"
#include "node_fresnel.h"
+#include "stdcycles.h"
shader node_glossy_bsdf(color Color = 0.8,
string distribution = "GGX",
diff --git a/intern/cycles/kernel/shaders/node_gradient_texture.osl b/intern/cycles/kernel/shaders/node_gradient_texture.osl
index 6cb181aee72..e9acebc0572 100644
--- a/intern/cycles/kernel/shaders/node_gradient_texture.osl
+++ b/intern/cycles/kernel/shaders/node_gradient_texture.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
/* Gradient */
diff --git a/intern/cycles/kernel/shaders/node_hair_bsdf.osl b/intern/cycles/kernel/shaders/node_hair_bsdf.osl
index bc912087666..3e0ac7af2e0 100644
--- a/intern/cycles/kernel/shaders/node_hair_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_hair_bsdf.osl
@@ -16,7 +16,7 @@
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_hair_bsdf(color Color = 0.8,
string component = "reflection",
diff --git a/intern/cycles/kernel/shaders/node_hair_info.osl b/intern/cycles/kernel/shaders/node_hair_info.osl
index 991a27c4103..ee08ea57e68 100644
--- a/intern/cycles/kernel/shaders/node_hair_info.osl
+++ b/intern/cycles/kernel/shaders/node_hair_info.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_hair_info(output float IsStrand = 0.0,
output float Intercept = 0.0,
diff --git a/intern/cycles/kernel/shaders/node_hash.h b/intern/cycles/kernel/shaders/node_hash.h
index 7affe432bf2..b42e42ff910 100644
--- a/intern/cycles/kernel/shaders/node_hash.h
+++ b/intern/cycles/kernel/shaders/node_hash.h
@@ -1,4 +1,4 @@
-#include "stdosl.h"
+#include "stdcycles.h"
#include "vector2.h"
#include "vector4.h"
diff --git a/intern/cycles/kernel/shaders/node_holdout.osl b/intern/cycles/kernel/shaders/node_holdout.osl
index b51bc0543a5..92e41c92f72 100644
--- a/intern/cycles/kernel/shaders/node_holdout.osl
+++ b/intern/cycles/kernel/shaders/node_holdout.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_holdout(output closure color Holdout = holdout())
{
diff --git a/intern/cycles/kernel/shaders/node_hsv.osl b/intern/cycles/kernel/shaders/node_hsv.osl
index 30c56a20a92..4417057b10f 100644
--- a/intern/cycles/kernel/shaders/node_hsv.osl
+++ b/intern/cycles/kernel/shaders/node_hsv.osl
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include "stdosl.h"
#include "node_color.h"
+#include "stdcycles.h"
shader node_hsv(float Hue = 0.5,
float Saturation = 1.0,
diff --git a/intern/cycles/kernel/shaders/node_ies_light.osl b/intern/cycles/kernel/shaders/node_ies_light.osl
index 4d881eb3b65..76348b4d758 100644
--- a/intern/cycles/kernel/shaders/node_ies_light.osl
+++ b/intern/cycles/kernel/shaders/node_ies_light.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
/* IES Light */
diff --git a/intern/cycles/kernel/shaders/node_image_texture.osl b/intern/cycles/kernel/shaders/node_image_texture.osl
index 9a0f2d054ea..22d34a1082c 100644
--- a/intern/cycles/kernel/shaders/node_image_texture.osl
+++ b/intern/cycles/kernel/shaders/node_image_texture.osl
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include "stdosl.h"
#include "node_color.h"
+#include "stdcycles.h"
point texco_remap_square(point co)
{
diff --git a/intern/cycles/kernel/shaders/node_invert.osl b/intern/cycles/kernel/shaders/node_invert.osl
index c7d41e4e129..23c16935ca1 100644
--- a/intern/cycles/kernel/shaders/node_invert.osl
+++ b/intern/cycles/kernel/shaders/node_invert.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_invert(float Fac = 1.0, color ColorIn = 0.8, output color ColorOut = 0.8)
{
diff --git a/intern/cycles/kernel/shaders/node_layer_weight.osl b/intern/cycles/kernel/shaders/node_layer_weight.osl
index 7c46f28b41b..1662be2cad1 100644
--- a/intern/cycles/kernel/shaders/node_layer_weight.osl
+++ b/intern/cycles/kernel/shaders/node_layer_weight.osl
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include "stdosl.h"
#include "node_fresnel.h"
+#include "stdcycles.h"
shader node_layer_weight(float Blend = 0.5,
normal Normal = N,
diff --git a/intern/cycles/kernel/shaders/node_light_falloff.osl b/intern/cycles/kernel/shaders/node_light_falloff.osl
index d0d7dd9c5aa..3f3c9444a5a 100644
--- a/intern/cycles/kernel/shaders/node_light_falloff.osl
+++ b/intern/cycles/kernel/shaders/node_light_falloff.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_light_falloff(float Strength = 0.0,
float Smooth = 0.0,
diff --git a/intern/cycles/kernel/shaders/node_light_path.osl b/intern/cycles/kernel/shaders/node_light_path.osl
index c4a3624a67f..4ff06915771 100644
--- a/intern/cycles/kernel/shaders/node_light_path.osl
+++ b/intern/cycles/kernel/shaders/node_light_path.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_light_path(output float IsCameraRay = 0.0,
output float IsShadowRay = 0.0,
diff --git a/intern/cycles/kernel/shaders/node_magic_texture.osl b/intern/cycles/kernel/shaders/node_magic_texture.osl
index 26e7d57278b..476c6895f05 100644
--- a/intern/cycles/kernel/shaders/node_magic_texture.osl
+++ b/intern/cycles/kernel/shaders/node_magic_texture.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
/* Magic */
diff --git a/intern/cycles/kernel/shaders/node_map_range.osl b/intern/cycles/kernel/shaders/node_map_range.osl
index 242ec4271ed..1c49027e6dd 100644
--- a/intern/cycles/kernel/shaders/node_map_range.osl
+++ b/intern/cycles/kernel/shaders/node_map_range.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
float safe_divide(float a, float b)
{
diff --git a/intern/cycles/kernel/shaders/node_mapping.osl b/intern/cycles/kernel/shaders/node_mapping.osl
index e8a9d940eda..8d204999630 100644
--- a/intern/cycles/kernel/shaders/node_mapping.osl
+++ b/intern/cycles/kernel/shaders/node_mapping.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
point safe_divide(point a, point b)
{
diff --git a/intern/cycles/kernel/shaders/node_math.h b/intern/cycles/kernel/shaders/node_math.h
new file mode 100644
index 00000000000..4b1a6c5bc16
--- /dev/null
+++ b/intern/cycles/kernel/shaders/node_math.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright 2011-2020 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+float safe_divide(float a, float b)
+{
+ return (b != 0.0) ? a / b : 0.0;
+}
+
+vector safe_divide(vector a, vector b)
+{
+ return vector((b[0] != 0.0) ? a[0] / b[0] : 0.0,
+ (b[1] != 0.0) ? a[1] / b[1] : 0.0,
+ (b[2] != 0.0) ? a[2] / b[2] : 0.0);
+}
+
+float safe_modulo(float a, float b)
+{
+ return (b != 0.0) ? fmod(a, b) : 0.0;
+}
+
+float fract(float a)
+{
+ return a - floor(a);
+}
+
+/* See: https://www.iquilezles.org/www/articles/smin/smin.htm. */
+float smoothmin(float a, float b, float c)
+{
+ if (c != 0.0) {
+ float h = max(c - abs(a - b), 0.0) / c;
+ return min(a, b) - h * h * h * c * (1.0 / 6.0);
+ }
+ else {
+ return min(a, b);
+ }
+}
+
+float pingpong(float a, float b)
+{
+ return (b != 0.0) ? abs(fract((a - b) / (b * 2.0)) * b * 2.0 - b) : 0.0;
+}
+
+float safe_sqrt(float a)
+{
+ return (a > 0.0) ? sqrt(a) : 0.0;
+}
+
+float safe_log(float a, float b)
+{
+ return (a > 0.0 && b > 0.0) ? log(a) / log(b) : 0.0;
+}
+
+vector project(vector v, vector v_proj)
+{
+ float lenSquared = dot(v_proj, v_proj);
+ return (lenSquared != 0.0) ? (dot(v, v_proj) / lenSquared) * v_proj : vector(0.0);
+}
+
+vector snap(vector a, vector b)
+{
+ return floor(safe_divide(a, b)) * b;
+}
+
+/* Adapted from godotengine math_funcs.h. */
+float wrap(float value, float max, float min)
+{
+ float range = max - min;
+ return (range != 0.0) ? value - (range * floor((value - min) / range)) : min;
+}
+
+point wrap(point value, point max, point min)
+{
+ return point(wrap(value[0], max[0], min[0]),
+ wrap(value[1], max[1], min[1]),
+ wrap(value[2], max[2], min[2]));
+}
+
+matrix euler_to_mat(point euler)
+{
+ float cx = cos(euler[0]);
+ float cy = cos(euler[1]);
+ float cz = cos(euler[2]);
+ float sx = sin(euler[0]);
+ float sy = sin(euler[1]);
+ float sz = sin(euler[2]);
+ matrix mat = matrix(1.0);
+ mat[0][0] = cy * cz;
+ mat[0][1] = cy * sz;
+ mat[0][2] = -sy;
+ mat[1][0] = sy * sx * cz - cx * sz;
+ mat[1][1] = sy * sx * sz + cx * cz;
+ mat[1][2] = cy * sx;
+ +mat[2][0] = sy * cx * cz + sx * sz;
+ mat[2][1] = sy * cx * sz - sx * cz;
+ mat[2][2] = cy * cx;
+ return mat;
+}
diff --git a/intern/cycles/kernel/shaders/node_math.osl b/intern/cycles/kernel/shaders/node_math.osl
index 1eccb56405b..dbaa7ccb60e 100644
--- a/intern/cycles/kernel/shaders/node_math.osl
+++ b/intern/cycles/kernel/shaders/node_math.osl
@@ -14,56 +14,8 @@
* limitations under the License.
*/
-#include "stdosl.h"
-
-float safe_divide(float a, float b)
-{
- return (b != 0.0) ? a / b : 0.0;
-}
-
-float safe_modulo(float a, float b)
-{
- return (b != 0.0) ? fmod(a, b) : 0.0;
-}
-
-float fract(float a)
-{
- return a - floor(a);
-}
-
-/* Adapted from godotengine math_funcs.h. */
-float wrap(float value, float max, float min)
-{
- float range = max - min;
- return (range != 0.0) ? value - (range * floor((value - min) / range)) : min;
-}
-
-/* See: https://www.iquilezles.org/www/articles/smin/smin.htm. */
-float smoothmin(float a, float b, float c)
-{
- if (c != 0.0) {
- float h = max(c - abs(a - b), 0.0) / c;
- return min(a, b) - h * h * h * c * (1.0 / 6.0);
- }
- else {
- return min(a, b);
- }
-}
-
-float pingpong(float a, float b)
-{
- return (b != 0.0) ? abs(fract((a - b) / (b * 2.0)) * b * 2.0 - b) : 0.0;
-}
-
-float safe_sqrt(float a)
-{
- return (a > 0.0) ? sqrt(a) : 0.0;
-}
-
-float safe_log(float a, float b)
-{
- return (a > 0.0 && b > 0.0) ? log(a) / log(b) : 0.0;
-}
+#include "node_math.h"
+#include "stdcycles.h"
/* OSL asin, acos, and pow functions are safe by default. */
shader node_math(string type = "add",
diff --git a/intern/cycles/kernel/shaders/node_mix.osl b/intern/cycles/kernel/shaders/node_mix.osl
index 9fbd3391ade..a13b4bb7b96 100644
--- a/intern/cycles/kernel/shaders/node_mix.osl
+++ b/intern/cycles/kernel/shaders/node_mix.osl
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include "stdosl.h"
#include "node_color.h"
+#include "stdcycles.h"
color node_mix_blend(float t, color col1, color col2)
{
diff --git a/intern/cycles/kernel/shaders/node_mix_closure.osl b/intern/cycles/kernel/shaders/node_mix_closure.osl
index 517c59c8786..94fc2171c44 100644
--- a/intern/cycles/kernel/shaders/node_mix_closure.osl
+++ b/intern/cycles/kernel/shaders/node_mix_closure.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_mix_closure(float Fac = 0.5,
closure color Closure1 = 0,
diff --git a/intern/cycles/kernel/shaders/node_musgrave_texture.osl b/intern/cycles/kernel/shaders/node_musgrave_texture.osl
index 8861f9a671a..d03b84c1ab4 100644
--- a/intern/cycles/kernel/shaders/node_musgrave_texture.osl
+++ b/intern/cycles/kernel/shaders/node_musgrave_texture.osl
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include "stdosl.h"
#include "node_noise.h"
+#include "stdcycles.h"
#include "vector2.h"
#include "vector4.h"
@@ -691,7 +691,7 @@ shader node_musgrave_texture(
float Dimension = 2.0,
float Scale = 5.0,
float Detail = 2.0,
- float Lacunarity = 1.0,
+ float Lacunarity = 2.0,
float Offset = 0.0,
float Gain = 1.0,
output float Fac = 0.0)
diff --git a/intern/cycles/kernel/shaders/node_noise_texture.osl b/intern/cycles/kernel/shaders/node_noise_texture.osl
index 6cff1cdab2c..4121b415673 100644
--- a/intern/cycles/kernel/shaders/node_noise_texture.osl
+++ b/intern/cycles/kernel/shaders/node_noise_texture.osl
@@ -14,10 +14,10 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "node_noise.h"
+#include "stdcycles.h"
#include "vector2.h"
#include "vector4.h"
-#include "node_noise.h"
#define vector3 point
diff --git a/intern/cycles/kernel/shaders/node_normal.osl b/intern/cycles/kernel/shaders/node_normal.osl
index 1d20c3e7cac..a0a88445427 100644
--- a/intern/cycles/kernel/shaders/node_normal.osl
+++ b/intern/cycles/kernel/shaders/node_normal.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_normal(normal direction = normal(0.0, 0.0, 0.0),
normal NormalIn = normal(0.0, 0.0, 0.0),
diff --git a/intern/cycles/kernel/shaders/node_normal_map.osl b/intern/cycles/kernel/shaders/node_normal_map.osl
index 90b593d00bc..912960f13ab 100644
--- a/intern/cycles/kernel/shaders/node_normal_map.osl
+++ b/intern/cycles/kernel/shaders/node_normal_map.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_normal_map(normal NormalIn = N,
float Strength = 1.0,
diff --git a/intern/cycles/kernel/shaders/node_object_info.osl b/intern/cycles/kernel/shaders/node_object_info.osl
index 350404bb747..44513d9a1ba 100644
--- a/intern/cycles/kernel/shaders/node_object_info.osl
+++ b/intern/cycles/kernel/shaders/node_object_info.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_object_info(output point Location = point(0.0, 0.0, 0.0),
output color Color = color(1.0, 1.0, 1.0),
diff --git a/intern/cycles/kernel/shaders/node_output_displacement.osl b/intern/cycles/kernel/shaders/node_output_displacement.osl
index fa7f603980b..bd60fc2b7e1 100644
--- a/intern/cycles/kernel/shaders/node_output_displacement.osl
+++ b/intern/cycles/kernel/shaders/node_output_displacement.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
displacement node_output_displacement(vector Displacement = 0.0)
{
diff --git a/intern/cycles/kernel/shaders/node_output_surface.osl b/intern/cycles/kernel/shaders/node_output_surface.osl
index 013666145da..cd746f79c4a 100644
--- a/intern/cycles/kernel/shaders/node_output_surface.osl
+++ b/intern/cycles/kernel/shaders/node_output_surface.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
surface node_output_surface(closure color Surface = 0)
{
diff --git a/intern/cycles/kernel/shaders/node_output_volume.osl b/intern/cycles/kernel/shaders/node_output_volume.osl
index dd479e751b3..4cc14cd6699 100644
--- a/intern/cycles/kernel/shaders/node_output_volume.osl
+++ b/intern/cycles/kernel/shaders/node_output_volume.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
volume node_output_volume(closure color Volume = 0)
{
diff --git a/intern/cycles/kernel/shaders/node_particle_info.osl b/intern/cycles/kernel/shaders/node_particle_info.osl
index e286c33a1ff..2dcdf3d0f3c 100644
--- a/intern/cycles/kernel/shaders/node_particle_info.osl
+++ b/intern/cycles/kernel/shaders/node_particle_info.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_particle_info(output float Index = 0.0,
output float Random = 0.0,
diff --git a/intern/cycles/kernel/shaders/node_principled_bsdf.osl b/intern/cycles/kernel/shaders/node_principled_bsdf.osl
index 657ced9b6e6..1711811ac65 100644
--- a/intern/cycles/kernel/shaders/node_principled_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_principled_bsdf.osl
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include "stdosl.h"
#include "node_fresnel.h"
+#include "stdcycles.h"
shader node_principled_bsdf(string distribution = "Multiscatter GGX",
string subsurface_method = "burley",
diff --git a/intern/cycles/kernel/shaders/node_principled_hair_bsdf.osl b/intern/cycles/kernel/shaders/node_principled_hair_bsdf.osl
index bf986438fca..4cf17e0e703 100644
--- a/intern/cycles/kernel/shaders/node_principled_hair_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_principled_hair_bsdf.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
color log3(color a)
{
diff --git a/intern/cycles/kernel/shaders/node_principled_volume.osl b/intern/cycles/kernel/shaders/node_principled_volume.osl
index 39cf6837eb2..0cb4cdebdaa 100644
--- a/intern/cycles/kernel/shaders/node_principled_volume.osl
+++ b/intern/cycles/kernel/shaders/node_principled_volume.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_principled_volume(color Color = color(0.5, 0.5, 0.5),
float Density = 1.0,
diff --git a/intern/cycles/kernel/shaders/node_refraction_bsdf.osl b/intern/cycles/kernel/shaders/node_refraction_bsdf.osl
index 941d99dd44d..9e9b31d9a87 100644
--- a/intern/cycles/kernel/shaders/node_refraction_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_refraction_bsdf.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_refraction_bsdf(color Color = 0.8,
string distribution = "sharp",
diff --git a/intern/cycles/kernel/shaders/node_rgb_curves.osl b/intern/cycles/kernel/shaders/node_rgb_curves.osl
index e34eb027cc3..8850040d580 100644
--- a/intern/cycles/kernel/shaders/node_rgb_curves.osl
+++ b/intern/cycles/kernel/shaders/node_rgb_curves.osl
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include "stdosl.h"
#include "node_ramp_util.h"
+#include "stdcycles.h"
shader node_rgb_curves(color ramp[] = {0.0},
float min_x = 0.0,
diff --git a/intern/cycles/kernel/shaders/node_rgb_ramp.osl b/intern/cycles/kernel/shaders/node_rgb_ramp.osl
index c9f9746a4fb..2131edb2688 100644
--- a/intern/cycles/kernel/shaders/node_rgb_ramp.osl
+++ b/intern/cycles/kernel/shaders/node_rgb_ramp.osl
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include "stdosl.h"
#include "node_ramp_util.h"
+#include "stdcycles.h"
shader node_rgb_ramp(color ramp_color[] = {0.0},
float ramp_alpha[] = {0.0},
diff --git a/intern/cycles/kernel/shaders/node_rgb_to_bw.osl b/intern/cycles/kernel/shaders/node_rgb_to_bw.osl
index 837d6caf5fc..f0a094d5b57 100644
--- a/intern/cycles/kernel/shaders/node_rgb_to_bw.osl
+++ b/intern/cycles/kernel/shaders/node_rgb_to_bw.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_rgb_to_bw(color Color = 0.0, output float Val = 0.0)
{
diff --git a/intern/cycles/kernel/shaders/node_scatter_volume.osl b/intern/cycles/kernel/shaders/node_scatter_volume.osl
index fce5716f372..36ad952dee6 100644
--- a/intern/cycles/kernel/shaders/node_scatter_volume.osl
+++ b/intern/cycles/kernel/shaders/node_scatter_volume.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_scatter_volume(color Color = color(0.8, 0.8, 0.8),
float Density = 1.0,
diff --git a/intern/cycles/kernel/shaders/node_separate_hsv.osl b/intern/cycles/kernel/shaders/node_separate_hsv.osl
index c77ed1f3755..2f902b72dbc 100644
--- a/intern/cycles/kernel/shaders/node_separate_hsv.osl
+++ b/intern/cycles/kernel/shaders/node_separate_hsv.osl
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include "stdosl.h"
#include "node_color.h"
+#include "stdcycles.h"
shader node_separate_hsv(color Color = 0.8,
output float H = 0.0,
diff --git a/intern/cycles/kernel/shaders/node_separate_rgb.osl b/intern/cycles/kernel/shaders/node_separate_rgb.osl
index ee64add27e2..62e4aedb879 100644
--- a/intern/cycles/kernel/shaders/node_separate_rgb.osl
+++ b/intern/cycles/kernel/shaders/node_separate_rgb.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_separate_rgb(color Image = 0.8,
output float R = 0.0,
diff --git a/intern/cycles/kernel/shaders/node_separate_xyz.osl b/intern/cycles/kernel/shaders/node_separate_xyz.osl
index 8a563f5e920..acaf3942b6f 100644
--- a/intern/cycles/kernel/shaders/node_separate_xyz.osl
+++ b/intern/cycles/kernel/shaders/node_separate_xyz.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_separate_xyz(vector Vector = 0.8,
output float X = 0.0,
diff --git a/intern/cycles/kernel/shaders/node_set_normal.osl b/intern/cycles/kernel/shaders/node_set_normal.osl
index 9541b829ef7..26a97e2b5d1 100644
--- a/intern/cycles/kernel/shaders/node_set_normal.osl
+++ b/intern/cycles/kernel/shaders/node_set_normal.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
surface node_set_normal(normal Direction = N, output normal Normal = N)
{
diff --git a/intern/cycles/kernel/shaders/node_sky_texture.osl b/intern/cycles/kernel/shaders/node_sky_texture.osl
index 9b29e5489c2..4def237a2e0 100644
--- a/intern/cycles/kernel/shaders/node_sky_texture.osl
+++ b/intern/cycles/kernel/shaders/node_sky_texture.osl
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include "stdosl.h"
#include "node_color.h"
+#include "stdcycles.h"
float sky_angle_between(float thetav, float phiv, float theta, float phi)
{
diff --git a/intern/cycles/kernel/shaders/node_subsurface_scattering.osl b/intern/cycles/kernel/shaders/node_subsurface_scattering.osl
index e12199d8c3d..b1e854150ab 100644
--- a/intern/cycles/kernel/shaders/node_subsurface_scattering.osl
+++ b/intern/cycles/kernel/shaders/node_subsurface_scattering.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_subsurface_scattering(color Color = 0.8,
float Scale = 1.0,
diff --git a/intern/cycles/kernel/shaders/node_tangent.osl b/intern/cycles/kernel/shaders/node_tangent.osl
index 44eb9973f3d..83f19a4610b 100644
--- a/intern/cycles/kernel/shaders/node_tangent.osl
+++ b/intern/cycles/kernel/shaders/node_tangent.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_tangent(normal NormalIn = N,
string attr_name = "geom:tangent",
diff --git a/intern/cycles/kernel/shaders/node_texture_coordinate.osl b/intern/cycles/kernel/shaders/node_texture_coordinate.osl
index 13861653d04..ac05e984af2 100644
--- a/intern/cycles/kernel/shaders/node_texture_coordinate.osl
+++ b/intern/cycles/kernel/shaders/node_texture_coordinate.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_texture_coordinate(
normal NormalIn = N,
diff --git a/intern/cycles/kernel/shaders/node_toon_bsdf.osl b/intern/cycles/kernel/shaders/node_toon_bsdf.osl
index ed3a0b25c60..4a44730c70c 100644
--- a/intern/cycles/kernel/shaders/node_toon_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_toon_bsdf.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_toon_bsdf(color Color = 0.8,
string component = "diffuse",
diff --git a/intern/cycles/kernel/shaders/node_translucent_bsdf.osl b/intern/cycles/kernel/shaders/node_translucent_bsdf.osl
index 7ce1ab08c59..23a562bf34d 100644
--- a/intern/cycles/kernel/shaders/node_translucent_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_translucent_bsdf.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_translucent_bsdf(color Color = 0.8, normal Normal = N, output closure color BSDF = 0)
{
diff --git a/intern/cycles/kernel/shaders/node_transparent_bsdf.osl b/intern/cycles/kernel/shaders/node_transparent_bsdf.osl
index a735513ba89..eb737a05c41 100644
--- a/intern/cycles/kernel/shaders/node_transparent_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_transparent_bsdf.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_transparent_bsdf(color Color = 0.8, normal Normal = N, output closure color BSDF = 0)
{
diff --git a/intern/cycles/kernel/shaders/node_uv_map.osl b/intern/cycles/kernel/shaders/node_uv_map.osl
index 6f2887be63c..88d8c5ba394 100644
--- a/intern/cycles/kernel/shaders/node_uv_map.osl
+++ b/intern/cycles/kernel/shaders/node_uv_map.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_uv_map(int from_dupli = 0,
string attribute = "",
diff --git a/intern/cycles/kernel/shaders/node_value.osl b/intern/cycles/kernel/shaders/node_value.osl
index 398e2c0e392..13197b9a27a 100644
--- a/intern/cycles/kernel/shaders/node_value.osl
+++ b/intern/cycles/kernel/shaders/node_value.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_value(float value_value = 0.0,
vector vector_value = vector(0.0, 0.0, 0.0),
diff --git a/intern/cycles/kernel/shaders/node_vector_curves.osl b/intern/cycles/kernel/shaders/node_vector_curves.osl
index e8c8036b550..9d3a2b82b0a 100644
--- a/intern/cycles/kernel/shaders/node_vector_curves.osl
+++ b/intern/cycles/kernel/shaders/node_vector_curves.osl
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include "stdosl.h"
#include "node_ramp_util.h"
+#include "stdcycles.h"
shader node_vector_curves(color ramp[] = {0.0},
float min_x = 0.0,
diff --git a/intern/cycles/kernel/shaders/node_vector_displacement.osl b/intern/cycles/kernel/shaders/node_vector_displacement.osl
index e9bd336347f..7cd9c2a37f2 100644
--- a/intern/cycles/kernel/shaders/node_vector_displacement.osl
+++ b/intern/cycles/kernel/shaders/node_vector_displacement.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_vector_displacement(color Vector = color(0.0, 0.0, 0.0),
float Midlevel = 0.0,
diff --git a/intern/cycles/kernel/shaders/node_vector_math.osl b/intern/cycles/kernel/shaders/node_vector_math.osl
index 4fa9b3bb57b..218851598b4 100644
--- a/intern/cycles/kernel/shaders/node_vector_math.osl
+++ b/intern/cycles/kernel/shaders/node_vector_math.osl
@@ -14,34 +14,13 @@
* limitations under the License.
*/
-#include "stdosl.h"
-
-float safe_divide(float a, float b)
-{
- return (b != 0.0) ? a / b : 0.0;
-}
-
-vector safe_divide(vector a, vector b)
-{
- return vector((b[0] != 0.0) ? a[0] / b[0] : 0.0,
- (b[1] != 0.0) ? a[1] / b[1] : 0.0,
- (b[2] != 0.0) ? a[2] / b[2] : 0.0);
-}
-
-vector project(vector v, vector v_proj)
-{
- float lenSquared = dot(v_proj, v_proj);
- return (lenSquared != 0.0) ? (dot(v, v_proj) / lenSquared) * v_proj : vector(0.0);
-}
-
-vector snap(vector a, vector b)
-{
- return floor(safe_divide(a, b)) * b;
-}
+#include "node_math.h"
+#include "stdcycles.h"
shader node_vector_math(string type = "add",
vector Vector1 = vector(0.0, 0.0, 0.0),
vector Vector2 = vector(0.0, 0.0, 0.0),
+ vector Vector3 = vector(0.0, 0.0, 0.0),
float Scale = 1.0,
output float Value = 0.0,
output vector Vector = vector(0.0, 0.0, 0.0))
@@ -94,6 +73,9 @@ shader node_vector_math(string type = "add",
else if (type == "modulo") {
Vector = fmod(Vector1, Vector2);
}
+ else if (type == "wrap") {
+ Vector = wrap(Vector1, Vector2, Vector3);
+ }
else if (type == "fraction") {
Vector = Vector1 - floor(Vector1);
}
@@ -106,6 +88,15 @@ shader node_vector_math(string type = "add",
else if (type == "maximum") {
Vector = max(Vector1, Vector2);
}
+ else if (type == "sine") {
+ Vector = sin(Vector1);
+ }
+ else if (type == "cosine") {
+ Vector = cos(Vector1);
+ }
+ else if (type == "tangent") {
+ Vector = tan(Vector1);
+ }
else {
warning("%s", "Unknown vector math operator!");
}
diff --git a/intern/cycles/kernel/shaders/node_vector_rotate.osl b/intern/cycles/kernel/shaders/node_vector_rotate.osl
new file mode 100644
index 00000000000..2efe3470ae2
--- /dev/null
+++ b/intern/cycles/kernel/shaders/node_vector_rotate.osl
@@ -0,0 +1,49 @@
+/*
+ * Copyright 2011-2020 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "node_math.h"
+#include "stdcycles.h"
+
+shader node_vector_rotate(int invert = 0,
+ string type = "axis",
+ vector VectorIn = vector(0.0, 0.0, 0.0),
+ point Center = point(0.0, 0.0, 0.0),
+ point Rotation = point(0.0, 0.0, 0.0),
+ vector Axis = vector(0.0, 0.0, 1.0),
+ float Angle = 0.0,
+ output vector VectorOut = vector(0.0, 0.0, 0.0))
+{
+ if (type == "euler_xyz") {
+ matrix rmat = (invert) ? transpose(euler_to_mat(Rotation)) : euler_to_mat(Rotation);
+ VectorOut = transform(rmat, VectorIn - Center) + Center;
+ }
+ else {
+ float a = (invert) ? -Angle : Angle;
+ if (type == "x_axis") {
+ VectorOut = rotate(VectorIn - Center, a, point(0.0), vector(1.0, 0.0, 0.0)) + Center;
+ }
+ else if (type == "y_axis") {
+ VectorOut = rotate(VectorIn - Center, a, point(0.0), vector(0.0, 1.0, 0.0)) + Center;
+ }
+ else if (type == "z_axis") {
+ VectorOut = rotate(VectorIn - Center, a, point(0.0), vector(0.0, 0.0, 1.0)) + Center;
+ }
+ else { // axis
+ VectorOut = (length(Axis) != 0.0) ? rotate(VectorIn - Center, a, point(0.0), Axis) + Center :
+ VectorIn;
+ }
+ }
+}
diff --git a/intern/cycles/kernel/shaders/node_vector_transform.osl b/intern/cycles/kernel/shaders/node_vector_transform.osl
index 22939577be0..1db799cfc9e 100644
--- a/intern/cycles/kernel/shaders/node_vector_transform.osl
+++ b/intern/cycles/kernel/shaders/node_vector_transform.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_vector_transform(string type = "vector",
string convert_from = "world",
diff --git a/intern/cycles/kernel/shaders/node_velvet_bsdf.osl b/intern/cycles/kernel/shaders/node_velvet_bsdf.osl
index 9290b845325..299acef35ee 100644
--- a/intern/cycles/kernel/shaders/node_velvet_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_velvet_bsdf.osl
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include "stdosl.h"
#include "node_fresnel.h"
+#include "stdcycles.h"
shader node_velvet_bsdf(color Color = 0.8,
float Sigma = 0.0,
diff --git a/intern/cycles/kernel/shaders/node_vertex_color.osl b/intern/cycles/kernel/shaders/node_vertex_color.osl
index 16bf3dd146e..ffaf7a2f720 100644
--- a/intern/cycles/kernel/shaders/node_vertex_color.osl
+++ b/intern/cycles/kernel/shaders/node_vertex_color.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_vertex_color(string bump_offset = "center",
string layer_name = "",
@@ -22,7 +22,16 @@ shader node_vertex_color(string bump_offset = "center",
output float Alpha = 0.0)
{
float vertex_color[4];
- if (getattribute(layer_name, vertex_color)) {
+ string vertex_color_layer;
+
+ if (layer_name == "") {
+ vertex_color_layer = "geom:vertex_color";
+ }
+ else {
+ vertex_color_layer = layer_name;
+ }
+
+ if (getattribute(vertex_color_layer, vertex_color)) {
Color = color(vertex_color[0], vertex_color[1], vertex_color[2]);
Alpha = vertex_color[3];
diff --git a/intern/cycles/kernel/shaders/node_voronoi_texture.osl b/intern/cycles/kernel/shaders/node_voronoi_texture.osl
index 10a9f7a6329..04d61c32f8a 100644
--- a/intern/cycles/kernel/shaders/node_voronoi_texture.osl
+++ b/intern/cycles/kernel/shaders/node_voronoi_texture.osl
@@ -14,10 +14,10 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "node_hash.h"
+#include "stdcycles.h"
#include "vector2.h"
#include "vector4.h"
-#include "node_hash.h"
#define vector3 point
diff --git a/intern/cycles/kernel/shaders/node_voxel_texture.osl b/intern/cycles/kernel/shaders/node_voxel_texture.osl
index 0e4484561d8..14489298367 100644
--- a/intern/cycles/kernel/shaders/node_voxel_texture.osl
+++ b/intern/cycles/kernel/shaders/node_voxel_texture.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_voxel_texture(string filename = "",
string interpolation = "linear",
diff --git a/intern/cycles/kernel/shaders/node_wave_texture.osl b/intern/cycles/kernel/shaders/node_wave_texture.osl
index a706c442368..f17397be243 100644
--- a/intern/cycles/kernel/shaders/node_wave_texture.osl
+++ b/intern/cycles/kernel/shaders/node_wave_texture.osl
@@ -14,45 +14,86 @@
* limitations under the License.
*/
-#include "stdosl.h"
#include "node_noise.h"
+#include "stdcycles.h"
/* Wave */
-float wave(point p, string type, string profile, float detail, float distortion, float dscale)
+float wave(point p_input,
+ string type,
+ string bands_direction,
+ string rings_direction,
+ string profile,
+ float detail,
+ float distortion,
+ float dscale,
+ float phase)
{
+ /* Prevent precision issues on unit coordinates. */
+ point p = (p_input + 0.000001) * 0.999999;
+
float n = 0.0;
if (type == "bands") {
- n = (p[0] + p[1] + p[2]) * 10.0;
+ if (bands_direction == "x") {
+ n = p[0] * 20.0;
+ }
+ else if (bands_direction == "y") {
+ n = p[1] * 20.0;
+ }
+ else if (bands_direction == "z") {
+ n = p[2] * 20.0;
+ }
+ else { /* diagonal */
+ n = (p[0] + p[1] + p[2]) * 10.0;
+ }
}
else if (type == "rings") {
- n = length(p) * 20.0;
+ point rp = p;
+ if (rings_direction == "x") {
+ rp *= point(0.0, 1.0, 1.0);
+ }
+ else if (rings_direction == "y") {
+ rp *= point(1.0, 0.0, 1.0);
+ }
+ else if (rings_direction == "z") {
+ rp *= point(1.0, 1.0, 0.0);
+ }
+ /* else: "spherical" */
+
+ n = length(rp) * 20.0;
}
+ n += phase;
+
if (distortion != 0.0) {
n = n + (distortion * (fractal_noise(p * dscale, detail) * 2.0 - 1.0));
}
if (profile == "sine") {
- return 0.5 + 0.5 * sin(n);
+ return 0.5 + 0.5 * sin(n - M_PI_2);
+ }
+ else if (profile == "saw") {
+ n /= M_2PI;
+ return n - floor(n);
}
- else {
- /* Saw profile */
+ else { /* profile tri */
n /= M_2PI;
- n -= (int)n;
- return (n < 0.0) ? n + 1.0 : n;
+ return abs(n - floor(n + 0.5)) * 2.0;
}
}
shader node_wave_texture(int use_mapping = 0,
matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
string type = "bands",
+ string bands_direction = "x",
+ string rings_direction = "x",
string profile = "sine",
float Scale = 5.0,
float Distortion = 0.0,
float Detail = 2.0,
float DetailScale = 1.0,
+ float PhaseOffset = 0.0,
point Vector = P,
output float Fac = 0.0,
output color Color = 0.0)
@@ -62,6 +103,14 @@ shader node_wave_texture(int use_mapping = 0,
if (use_mapping)
p = transform(mapping, p);
- Fac = wave(p * Scale, type, profile, Detail, Distortion, DetailScale);
+ Fac = wave(p * Scale,
+ type,
+ bands_direction,
+ rings_direction,
+ profile,
+ Detail,
+ Distortion,
+ DetailScale,
+ PhaseOffset);
Color = Fac;
}
diff --git a/intern/cycles/kernel/shaders/node_wavelength.osl b/intern/cycles/kernel/shaders/node_wavelength.osl
index c8c6eecb171..f484c4b4788 100644
--- a/intern/cycles/kernel/shaders/node_wavelength.osl
+++ b/intern/cycles/kernel/shaders/node_wavelength.osl
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "stdcycles.h"
shader node_wavelength(float Wavelength = 500.0, output color Color = 0.0)
{
diff --git a/intern/cycles/kernel/shaders/node_white_noise_texture.osl b/intern/cycles/kernel/shaders/node_white_noise_texture.osl
index 95f91d25e5e..94735a019d5 100644
--- a/intern/cycles/kernel/shaders/node_white_noise_texture.osl
+++ b/intern/cycles/kernel/shaders/node_white_noise_texture.osl
@@ -14,10 +14,10 @@
* limitations under the License.
*/
-#include "stdosl.h"
+#include "node_hash.h"
+#include "stdcycles.h"
#include "vector2.h"
#include "vector4.h"
-#include "node_hash.h"
#define vector3 point
diff --git a/intern/cycles/kernel/shaders/node_wireframe.osl b/intern/cycles/kernel/shaders/node_wireframe.osl
index ea4bd3a4c87..673a451c928 100644
--- a/intern/cycles/kernel/shaders/node_wireframe.osl
+++ b/intern/cycles/kernel/shaders/node_wireframe.osl
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include "stdosl.h"
#include "oslutil.h"
+#include "stdcycles.h"
shader node_wireframe(string bump_offset = "center",
int use_pixel_size = 0,
diff --git a/intern/cycles/kernel/shaders/oslutil.h b/intern/cycles/kernel/shaders/oslutil.h
deleted file mode 100644
index d48bfa4a665..00000000000
--- a/intern/cycles/kernel/shaders/oslutil.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Adapted from Open Shading Language with this license:
- *
- * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al.
- * All Rights Reserved.
- *
- * Modifications Copyright 2011, Blender Foundation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Sony Pictures Imageworks nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef CCL_OSLUTIL_H
-#define CCL_OSLUTIL_H
-
-// Return wireframe opacity factor [0, 1] given a geometry type in
-// ("triangles", "polygons" or "patches"), and a line_width in raster
-// or world space depending on the last (raster) boolean argument.
-//
-float wireframe(string edge_type, float line_width, int raster)
-{
- // ray differentials are so big in diffuse context that this function would always return "wire"
- if (raytype("path:diffuse"))
- return 0.0;
-
- int np = 0;
- point p[64];
- float pixelWidth = 1;
-
- if (edge_type == "triangles") {
- np = 3;
- if (!getattribute("geom:trianglevertices", p))
- return 0.0;
- }
- else if (edge_type == "polygons" || edge_type == "patches") {
- getattribute("geom:numpolyvertices", np);
- if (np < 3 || !getattribute("geom:polyvertices", p))
- return 0.0;
- }
-
- if (raster) {
- // Project the derivatives of P to the viewing plane defined
- // by I so we have a measure of how big is a pixel at this point
- float pixelWidthX = length(Dx(P) - dot(Dx(P), I) * I);
- float pixelWidthY = length(Dy(P) - dot(Dy(P), I) * I);
- // Take the average of both axis' length
- pixelWidth = (pixelWidthX + pixelWidthY) / 2;
- }
-
- // Use half the width as the neighbor face will render the
- // other half. And take the square for fast comparison
- pixelWidth *= 0.5 * line_width;
- pixelWidth *= pixelWidth;
- for (int i = 0; i < np; i++) {
- int i2 = i ? i - 1 : np - 1;
- vector dir = P - p[i];
- vector edge = p[i] - p[i2];
- vector crs = cross(edge, dir);
- // At this point dot(crs, crs) / dot(edge, edge) is
- // the square of area / length(edge) == square of the
- // distance to the edge.
- if (dot(crs, crs) < (dot(edge, edge) * pixelWidth))
- return 1;
- }
- return 0;
-}
-
-float wireframe(string edge_type, float line_width)
-{
- return wireframe(edge_type, line_width, 1);
-}
-float wireframe(string edge_type)
-{
- return wireframe(edge_type, 1.0, 1);
-}
-float wireframe()
-{
- return wireframe("polygons", 1.0, 1);
-}
-
-#endif /* CCL_OSLUTIL_H */
diff --git a/intern/cycles/kernel/shaders/stdcycles.h b/intern/cycles/kernel/shaders/stdcycles.h
new file mode 100644
index 00000000000..dd604da68ce
--- /dev/null
+++ b/intern/cycles/kernel/shaders/stdcycles.h
@@ -0,0 +1,150 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+// * Neither the name of Sony Pictures Imageworks nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/////////////////////////////////////////////////////////////////////////////
+
+#ifndef CCL_STDCYCLESOSL_H
+#define CCL_STDCYCLESOSL_H
+
+#include "stdosl.h"
+
+// Declaration of built-in functions and closures, stdosl.h does not make
+// these available so we have to redefine them.
+#define BUILTIN [[int builtin = 1]]
+#define BUILTIN_DERIV [[ int builtin = 1, int deriv = 1 ]]
+
+closure color diffuse_ramp(normal N, color colors[8]) BUILTIN;
+closure color phong_ramp(normal N, float exponent, color colors[8]) BUILTIN;
+closure color diffuse_toon(normal N, float size, float smooth) BUILTIN;
+closure color glossy_toon(normal N, float size, float smooth) BUILTIN;
+closure color microfacet_ggx(normal N, float ag) BUILTIN;
+closure color microfacet_ggx_aniso(normal N, vector T, float ax, float ay) BUILTIN;
+closure color microfacet_ggx_refraction(normal N, float ag, float eta) BUILTIN;
+closure color microfacet_multi_ggx(normal N, float ag, color C) BUILTIN;
+closure color microfacet_multi_ggx_aniso(normal N, vector T, float ax, float ay, color C) BUILTIN;
+closure color microfacet_multi_ggx_glass(normal N, float ag, float eta, color C) BUILTIN;
+closure color microfacet_ggx_fresnel(normal N, float ag, float eta, color C, color Cspec0) BUILTIN;
+closure color microfacet_ggx_aniso_fresnel(
+ normal N, vector T, float ax, float ay, float eta, color C, color Cspec0) BUILTIN;
+closure color
+microfacet_multi_ggx_fresnel(normal N, float ag, float eta, color C, color Cspec0) BUILTIN;
+closure color microfacet_multi_ggx_aniso_fresnel(
+ normal N, vector T, float ax, float ay, float eta, color C, color Cspec0) BUILTIN;
+closure color
+microfacet_multi_ggx_glass_fresnel(normal N, float ag, float eta, color C, color Cspec0) BUILTIN;
+closure color microfacet_beckmann(normal N, float ab) BUILTIN;
+closure color microfacet_beckmann_aniso(normal N, vector T, float ax, float ay) BUILTIN;
+closure color microfacet_beckmann_refraction(normal N, float ab, float eta) BUILTIN;
+closure color ashikhmin_shirley(normal N, vector T, float ax, float ay) BUILTIN;
+closure color ashikhmin_velvet(normal N, float sigma) BUILTIN;
+closure color ambient_occlusion() BUILTIN;
+closure color principled_diffuse(normal N, float roughness) BUILTIN;
+closure color principled_sheen(normal N) BUILTIN;
+closure color principled_clearcoat(normal N, float clearcoat, float clearcoat_roughness) BUILTIN;
+
+// BSSRDF
+closure color bssrdf(string method, normal N, vector radius, color albedo) BUILTIN;
+
+// Hair
+closure color
+hair_reflection(normal N, float roughnessu, float roughnessv, vector T, float offset) BUILTIN;
+closure color
+hair_transmission(normal N, float roughnessu, float roughnessv, vector T, float offset) BUILTIN;
+closure color principled_hair(normal N,
+ color sigma,
+ float roughnessu,
+ float roughnessv,
+ float coat,
+ float alpha,
+ float eta) BUILTIN;
+
+// Volume
+closure color henyey_greenstein(float g) BUILTIN;
+closure color absorption() BUILTIN;
+
+normal ensure_valid_reflection(normal Ng, vector I, normal N)
+{
+ /* The implementation here mirrors the one in kernel_montecarlo.h,
+ * check there for an explanation of the algorithm. */
+
+ float sqr(float x)
+ {
+ return x * x;
+ }
+
+ vector R = 2 * dot(N, I) * N - I;
+
+ float threshold = min(0.9 * dot(Ng, I), 0.01);
+ if (dot(Ng, R) >= threshold) {
+ return N;
+ }
+
+ float NdotNg = dot(N, Ng);
+ vector X = normalize(N - NdotNg * Ng);
+
+ float Ix = dot(I, X), Iz = dot(I, Ng);
+ float Ix2 = sqr(Ix), Iz2 = sqr(Iz);
+ float a = Ix2 + Iz2;
+
+ float b = sqrt(Ix2 * (a - sqr(threshold)));
+ float c = Iz * threshold + a;
+
+ float fac = 0.5 / a;
+ float N1_z2 = fac * (b + c), N2_z2 = fac * (-b + c);
+ int valid1 = (N1_z2 > 1e-5) && (N1_z2 <= (1.0 + 1e-5));
+ int valid2 = (N2_z2 > 1e-5) && (N2_z2 <= (1.0 + 1e-5));
+
+ float N_new_x, N_new_z;
+ if (valid1 && valid2) {
+ float N1_x = sqrt(1.0 - N1_z2), N1_z = sqrt(N1_z2);
+ float N2_x = sqrt(1.0 - N2_z2), N2_z = sqrt(N2_z2);
+
+ float R1 = 2 * (N1_x * Ix + N1_z * Iz) * N1_z - Iz;
+ float R2 = 2 * (N2_x * Ix + N2_z * Iz) * N2_z - Iz;
+
+ valid1 = (R1 >= 1e-5);
+ valid2 = (R2 >= 1e-5);
+ if (valid1 && valid2) {
+ N_new_x = (R1 < R2) ? N1_x : N2_x;
+ N_new_z = (R1 < R2) ? N1_z : N2_z;
+ }
+ else {
+ N_new_x = (R1 > R2) ? N1_x : N2_x;
+ N_new_z = (R1 > R2) ? N1_z : N2_z;
+ }
+ }
+ else if (valid1 || valid2) {
+ float Nz2 = valid1 ? N1_z2 : N2_z2;
+ N_new_x = sqrt(1.0 - Nz2);
+ N_new_z = sqrt(Nz2);
+ }
+ else {
+ return Ng;
+ }
+
+ return N_new_x * X + N_new_z * Ng;
+}
+
+#endif /* CCL_STDOSL_H */
diff --git a/intern/cycles/kernel/shaders/stdosl.h b/intern/cycles/kernel/shaders/stdosl.h
deleted file mode 100644
index 6515d914909..00000000000
--- a/intern/cycles/kernel/shaders/stdosl.h
+++ /dev/null
@@ -1,880 +0,0 @@
-/////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al. All Rights Reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-// * Neither the name of Sony Pictures Imageworks nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-/////////////////////////////////////////////////////////////////////////////
-
-#ifndef CCL_STDOSL_H
-#define CCL_STDOSL_H
-
-#ifndef M_PI
-# define M_PI 3.1415926535897932 /* pi */
-# define M_PI_2 1.5707963267948966 /* pi/2 */
-# define M_PI_4 0.7853981633974483 /* pi/4 */
-# define M_2_PI 0.6366197723675813 /* 2/pi */
-# define M_2PI 6.2831853071795865 /* 2*pi */
-# define M_4PI 12.566370614359173 /* 4*pi */
-# define M_2_SQRTPI 1.1283791670955126 /* 2/sqrt(pi) */
-# define M_E 2.7182818284590452 /* e (Euler's number) */
-# define M_LN2 0.6931471805599453 /* ln(2) */
-# define M_LN10 2.3025850929940457 /* ln(10) */
-# define M_LOG2E 1.4426950408889634 /* log_2(e) */
-# define M_LOG10E 0.4342944819032518 /* log_10(e) */
-# define M_SQRT2 1.4142135623730950 /* sqrt(2) */
-# define M_SQRT1_2 0.7071067811865475 /* 1/sqrt(2) */
-#endif
-
-// Declaration of built-in functions and closures
-#define BUILTIN [[int builtin = 1]]
-#define BUILTIN_DERIV [[ int builtin = 1, int deriv = 1 ]]
-
-#define PERCOMP1(name) \
- normal name(normal x) BUILTIN; \
- vector name(vector x) BUILTIN; \
- point name(point x) BUILTIN; \
- color name(color x) BUILTIN; \
- float name(float x) BUILTIN;
-
-#define PERCOMP2(name) \
- normal name(normal x, normal y) BUILTIN; \
- vector name(vector x, vector y) BUILTIN; \
- point name(point x, point y) BUILTIN; \
- color name(color x, color y) BUILTIN; \
- float name(float x, float y) BUILTIN;
-
-#define PERCOMP2F(name) \
- normal name(normal x, float y) BUILTIN; \
- vector name(vector x, float y) BUILTIN; \
- point name(point x, float y) BUILTIN; \
- color name(color x, float y) BUILTIN; \
- float name(float x, float y) BUILTIN;
-
-// Basic math
-normal degrees(normal x)
-{
- return x * (180.0 / M_PI);
-}
-vector degrees(vector x)
-{
- return x * (180.0 / M_PI);
-}
-point degrees(point x)
-{
- return x * (180.0 / M_PI);
-}
-color degrees(color x)
-{
- return x * (180.0 / M_PI);
-}
-float degrees(float x)
-{
- return x * (180.0 / M_PI);
-}
-normal radians(normal x)
-{
- return x * (M_PI / 180.0);
-}
-vector radians(vector x)
-{
- return x * (M_PI / 180.0);
-}
-point radians(point x)
-{
- return x * (M_PI / 180.0);
-}
-color radians(color x)
-{
- return x * (M_PI / 180.0);
-}
-float radians(float x)
-{
- return x * (M_PI / 180.0);
-}
-PERCOMP1(cos)
-PERCOMP1(sin)
-PERCOMP1(tan)
-PERCOMP1(acos)
-PERCOMP1(asin)
-PERCOMP1(atan)
-PERCOMP2(atan2)
-PERCOMP1(cosh)
-PERCOMP1(sinh)
-PERCOMP1(tanh)
-PERCOMP2F(pow)
-PERCOMP1(exp)
-PERCOMP1(exp2)
-PERCOMP1(expm1)
-PERCOMP1(log)
-point log(point a, float b)
-{
- return log(a) / log(b);
-}
-vector log(vector a, float b)
-{
- return log(a) / log(b);
-}
-color log(color a, float b)
-{
- return log(a) / log(b);
-}
-float log(float a, float b)
-{
- return log(a) / log(b);
-}
-PERCOMP1(log2)
-PERCOMP1(log10)
-PERCOMP1(logb)
-PERCOMP1(sqrt)
-PERCOMP1(inversesqrt)
-float hypot(float a, float b)
-{
- return sqrt(a * a + b * b);
-}
-float hypot(float a, float b, float c)
-{
- return sqrt(a * a + b * b + c * c);
-}
-PERCOMP1(abs)
-int abs(int x) BUILTIN;
-PERCOMP1(fabs)
-int fabs(int x) BUILTIN;
-PERCOMP1(sign)
-PERCOMP1(floor)
-PERCOMP1(ceil)
-PERCOMP1(round)
-PERCOMP1(trunc)
-PERCOMP2(fmod)
-PERCOMP2F(fmod)
-int mod(int a, int b)
-{
- return a - b * (int)floor(a / b);
-}
-point mod(point a, point b)
-{
- return a - b * floor(a / b);
-}
-vector mod(vector a, vector b)
-{
- return a - b * floor(a / b);
-}
-normal mod(normal a, normal b)
-{
- return a - b * floor(a / b);
-}
-color mod(color a, color b)
-{
- return a - b * floor(a / b);
-}
-point mod(point a, float b)
-{
- return a - b * floor(a / b);
-}
-vector mod(vector a, float b)
-{
- return a - b * floor(a / b);
-}
-normal mod(normal a, float b)
-{
- return a - b * floor(a / b);
-}
-color mod(color a, float b)
-{
- return a - b * floor(a / b);
-}
-float mod(float a, float b)
-{
- return a - b * floor(a / b);
-}
-PERCOMP2(min)
-int min(int a, int b) BUILTIN;
-PERCOMP2(max)
-int max(int a, int b) BUILTIN;
-normal clamp(normal x, normal minval, normal maxval)
-{
- return max(min(x, maxval), minval);
-}
-vector clamp(vector x, vector minval, vector maxval)
-{
- return max(min(x, maxval), minval);
-}
-point clamp(point x, point minval, point maxval)
-{
- return max(min(x, maxval), minval);
-}
-color clamp(color x, color minval, color maxval)
-{
- return max(min(x, maxval), minval);
-}
-float clamp(float x, float minval, float maxval)
-{
- return max(min(x, maxval), minval);
-}
-int clamp(int x, int minval, int maxval)
-{
- return max(min(x, maxval), minval);
-}
-#if 0
-normal mix(normal x, normal y, normal a)
-{
- return x * (1 - a) + y * a;
-}
-normal mix(normal x, normal y, float a)
-{
- return x * (1 - a) + y * a;
-}
-vector mix(vector x, vector y, vector a)
-{
- return x * (1 - a) + y * a;
-}
-vector mix(vector x, vector y, float a)
-{
- return x * (1 - a) + y * a;
-}
-point mix(point x, point y, point a)
-{
- return x * (1 - a) + y * a;
-}
-point mix(point x, point y, float a)
-{
- return x * (1 - a) + y * a;
-}
-color mix(color x, color y, color a)
-{
- return x * (1 - a) + y * a;
-}
-color mix(color x, color y, float a)
-{
- return x * (1 - a) + y * a;
-}
-float mix(float x, float y, float a)
-{
- return x * (1 - a) + y * a;
-}
-#else
-normal mix(normal x, normal y, normal a) BUILTIN;
-normal mix(normal x, normal y, float a) BUILTIN;
-vector mix(vector x, vector y, vector a) BUILTIN;
-vector mix(vector x, vector y, float a) BUILTIN;
-point mix(point x, point y, point a) BUILTIN;
-point mix(point x, point y, float a) BUILTIN;
-color mix(color x, color y, color a) BUILTIN;
-color mix(color x, color y, float a) BUILTIN;
-float mix(float x, float y, float a) BUILTIN;
-#endif
-int isnan(float x) BUILTIN;
-int isinf(float x) BUILTIN;
-int isfinite(float x) BUILTIN;
-float erf(float x) BUILTIN;
-float erfc(float x) BUILTIN;
-
-// Vector functions
-
-vector cross(vector a, vector b) BUILTIN;
-float dot(vector a, vector b) BUILTIN;
-float length(vector v) BUILTIN;
-float distance(point a, point b) BUILTIN;
-float distance(point a, point b, point q)
-{
- vector d = b - a;
- float dd = dot(d, d);
- if (dd == 0.0)
- return distance(q, a);
- float t = dot(q - a, d) / dd;
- return distance(q, a + clamp(t, 0.0, 1.0) * d);
-}
-normal normalize(normal v) BUILTIN;
-vector normalize(vector v) BUILTIN;
-vector faceforward(vector N, vector I, vector Nref) BUILTIN;
-vector faceforward(vector N, vector I) BUILTIN;
-vector reflect(vector I, vector N)
-{
- return I - 2 * dot(N, I) * N;
-}
-vector refract(vector I, vector N, float eta)
-{
- float IdotN = dot(I, N);
- float k = 1 - eta * eta * (1 - IdotN * IdotN);
- return (k < 0) ? vector(0, 0, 0) : (eta * I - N * (eta * IdotN + sqrt(k)));
-}
-void fresnel(vector I,
- normal N,
- float eta,
- output float Kr,
- output float Kt,
- output vector R,
- output vector T)
-{
- float sqr(float x)
- {
- return x * x;
- }
- float c = dot(I, N);
- if (c < 0)
- c = -c;
- R = reflect(I, N);
- float g = 1.0 / sqr(eta) - 1.0 + c * c;
- if (g >= 0.0) {
- g = sqrt(g);
- float beta = g - c;
- float F = (c * (g + c) - 1.0) / (c * beta + 1.0);
- F = 0.5 * (1.0 + sqr(F));
- F *= sqr(beta / (g + c));
- Kr = F;
- Kt = (1.0 - Kr) * eta * eta;
- // OPT: the following recomputes some of the above values, but it
- // gives us the same result as if the shader-writer called refract()
- T = refract(I, N, eta);
- }
- else {
- // total internal reflection
- Kr = 1.0;
- Kt = 0.0;
- T = vector(0, 0, 0);
- }
-}
-
-void fresnel(vector I, normal N, float eta, output float Kr, output float Kt)
-{
- vector R, T;
- fresnel(I, N, eta, Kr, Kt, R, T);
-}
-
-normal transform(matrix Mto, normal p) BUILTIN;
-vector transform(matrix Mto, vector p) BUILTIN;
-point transform(matrix Mto, point p) BUILTIN;
-normal transform(string from, string to, normal p) BUILTIN;
-vector transform(string from, string to, vector p) BUILTIN;
-point transform(string from, string to, point p) BUILTIN;
-normal transform(string to, normal p)
-{
- return transform("common", to, p);
-}
-vector transform(string to, vector p)
-{
- return transform("common", to, p);
-}
-point transform(string to, point p)
-{
- return transform("common", to, p);
-}
-
-float transformu(string tounits, float x) BUILTIN;
-float transformu(string fromunits, string tounits, float x) BUILTIN;
-
-point rotate(point p, float angle, point a, point b)
-{
- vector axis = normalize(b - a);
- float cosang, sinang;
- /* Older OSX has major issues with sincos() function,
- * it's likely a big in OSL or LLVM. For until we've
- * updated to new versions of this libraries we'll
- * use a workaround to prevent possible crashes on all
- * the platforms.
- *
- * Shouldn't be that bad because it's mainly used for
- * anisotropic shader where angle is usually constant.
- */
-#if 0
- sincos(angle, sinang, cosang);
-#else
- sinang = sin(angle);
- cosang = cos(angle);
-#endif
- float cosang1 = 1.0 - cosang;
- float x = axis[0], y = axis[1], z = axis[2];
- matrix M = matrix(x * x + (1.0 - x * x) * cosang,
- x * y * cosang1 + z * sinang,
- x * z * cosang1 - y * sinang,
- 0.0,
- x * y * cosang1 - z * sinang,
- y * y + (1.0 - y * y) * cosang,
- y * z * cosang1 + x * sinang,
- 0.0,
- x * z * cosang1 + y * sinang,
- y * z * cosang1 - x * sinang,
- z * z + (1.0 - z * z) * cosang,
- 0.0,
- 0.0,
- 0.0,
- 0.0,
- 1.0);
- return transform(M, p - a) + a;
-}
-
-normal ensure_valid_reflection(normal Ng, vector I, normal N)
-{
- /* The implementation here mirrors the one in kernel_montecarlo.h,
- * check there for an explanation of the algorithm. */
-
- float sqr(float x)
- {
- return x * x;
- }
-
- vector R = 2 * dot(N, I) * N - I;
-
- float threshold = min(0.9 * dot(Ng, I), 0.01);
- if (dot(Ng, R) >= threshold) {
- return N;
- }
-
- float NdotNg = dot(N, Ng);
- vector X = normalize(N - NdotNg * Ng);
-
- float Ix = dot(I, X), Iz = dot(I, Ng);
- float Ix2 = sqr(Ix), Iz2 = sqr(Iz);
- float a = Ix2 + Iz2;
-
- float b = sqrt(Ix2 * (a - sqr(threshold)));
- float c = Iz * threshold + a;
-
- float fac = 0.5 / a;
- float N1_z2 = fac * (b + c), N2_z2 = fac * (-b + c);
- int valid1 = (N1_z2 > 1e-5) && (N1_z2 <= (1.0 + 1e-5));
- int valid2 = (N2_z2 > 1e-5) && (N2_z2 <= (1.0 + 1e-5));
-
- float N_new_x, N_new_z;
- if (valid1 && valid2) {
- float N1_x = sqrt(1.0 - N1_z2), N1_z = sqrt(N1_z2);
- float N2_x = sqrt(1.0 - N2_z2), N2_z = sqrt(N2_z2);
-
- float R1 = 2 * (N1_x * Ix + N1_z * Iz) * N1_z - Iz;
- float R2 = 2 * (N2_x * Ix + N2_z * Iz) * N2_z - Iz;
-
- valid1 = (R1 >= 1e-5);
- valid2 = (R2 >= 1e-5);
- if (valid1 && valid2) {
- N_new_x = (R1 < R2) ? N1_x : N2_x;
- N_new_z = (R1 < R2) ? N1_z : N2_z;
- }
- else {
- N_new_x = (R1 > R2) ? N1_x : N2_x;
- N_new_z = (R1 > R2) ? N1_z : N2_z;
- }
- }
- else if (valid1 || valid2) {
- float Nz2 = valid1 ? N1_z2 : N2_z2;
- N_new_x = sqrt(1.0 - Nz2);
- N_new_z = sqrt(Nz2);
- }
- else {
- return Ng;
- }
-
- return N_new_x * X + N_new_z * Ng;
-}
-
-// Color functions
-
-float luminance(color c) BUILTIN;
-color blackbody(float temperatureK) BUILTIN;
-color wavelength_color(float wavelength_nm) BUILTIN;
-
-color transformc(string to, color x)
-{
- color rgb_to_hsv(color rgb)
- { // See Foley & van Dam
- float r = rgb[0], g = rgb[1], b = rgb[2];
- float mincomp = min(r, min(g, b));
- float maxcomp = max(r, max(g, b));
- float delta = maxcomp - mincomp; // chroma
- float h, s, v;
- v = maxcomp;
- if (maxcomp > 0)
- s = delta / maxcomp;
- else
- s = 0;
- if (s <= 0)
- h = 0;
- else {
- if (r >= maxcomp)
- h = (g - b) / delta;
- else if (g >= maxcomp)
- h = 2 + (b - r) / delta;
- else
- h = 4 + (r - g) / delta;
- h /= 6;
- if (h < 0)
- h += 1;
- }
- return color(h, s, v);
- }
-
- color rgb_to_hsl(color rgb)
- { // See Foley & van Dam
- // First convert rgb to hsv, then to hsl
- float minval = min(rgb[0], min(rgb[1], rgb[2]));
- color hsv = rgb_to_hsv(rgb);
- float maxval = hsv[2]; // v == maxval
- float h = hsv[0], s, l = (minval + maxval) / 2;
- if (minval == maxval)
- s = 0; // special 'achromatic' case, hue is 0
- else if (l <= 0.5)
- s = (maxval - minval) / (maxval + minval);
- else
- s = (maxval - minval) / (2 - maxval - minval);
- return color(h, s, l);
- }
-
- color r;
- if (to == "rgb" || to == "RGB")
- r = x;
- else if (to == "hsv")
- r = rgb_to_hsv(x);
- else if (to == "hsl")
- r = rgb_to_hsl(x);
- else if (to == "YIQ")
- r = color(dot(vector(0.299, 0.587, 0.114), (vector)x),
- dot(vector(0.596, -0.275, -0.321), (vector)x),
- dot(vector(0.212, -0.523, 0.311), (vector)x));
- else if (to == "XYZ")
- r = color(dot(vector(0.412453, 0.357580, 0.180423), (vector)x),
- dot(vector(0.212671, 0.715160, 0.072169), (vector)x),
- dot(vector(0.019334, 0.119193, 0.950227), (vector)x));
- else {
- error("Unknown color space \"%s\"", to);
- r = x;
- }
- return r;
-}
-
-color transformc(string from, string to, color x)
-{
- color hsv_to_rgb(color c)
- { // Reference: Foley & van Dam
- float h = c[0], s = c[1], v = c[2];
- color r;
- if (s < 0.0001) {
- r = v;
- }
- else {
- h = 6 * (h - floor(h)); // expand to [0..6)
- int hi = (int)h;
- float f = h - hi;
- float p = v * (1 - s);
- float q = v * (1 - s * f);
- float t = v * (1 - s * (1 - f));
- if (hi == 0)
- r = color(v, t, p);
- else if (hi == 1)
- r = color(q, v, p);
- else if (hi == 2)
- r = color(p, v, t);
- else if (hi == 3)
- r = color(p, q, v);
- else if (hi == 4)
- r = color(t, p, v);
- else
- r = color(v, p, q);
- }
- return r;
- }
-
- color hsl_to_rgb(color c)
- {
- float h = c[0], s = c[1], l = c[2];
- // Easiest to convert hsl -> hsv, then hsv -> RGB (per Foley & van Dam)
- float v = (l <= 0.5) ? (l * (1 + s)) : (l * (1 - s) + s);
- color r;
- if (v <= 0) {
- r = 0;
- }
- else {
- float min = 2 * l - v;
- s = (v - min) / v;
- r = hsv_to_rgb(color(h, s, v));
- }
- return r;
- }
-
- color r;
- if (from == "rgb" || from == "RGB")
- r = x;
- else if (from == "hsv")
- r = hsv_to_rgb(x);
- else if (from == "hsl")
- r = hsl_to_rgb(x);
- else if (from == "YIQ")
- r = color(dot(vector(1, 0.9557, 0.6199), (vector)x),
- dot(vector(1, -0.2716, -0.6469), (vector)x),
- dot(vector(1, -1.1082, 1.7051), (vector)x));
- else if (from == "XYZ")
- r = color(dot(vector(3.240479, -1.537150, -0.498535), (vector)x),
- dot(vector(-0.969256, 1.875991, 0.041556), (vector)x),
- dot(vector(0.055648, -0.204043, 1.057311), (vector)x));
- else {
- error("Unknown color space \"%s\"", to);
- r = x;
- }
- return transformc(to, r);
-}
-
-// Matrix functions
-
-float determinant(matrix m) BUILTIN;
-matrix transpose(matrix m) BUILTIN;
-
-// Pattern generation
-
-color step(color edge, color x) BUILTIN;
-point step(point edge, point x) BUILTIN;
-vector step(vector edge, vector x) BUILTIN;
-normal step(normal edge, normal x) BUILTIN;
-float step(float edge, float x) BUILTIN;
-float smoothstep(float edge0, float edge1, float x) BUILTIN;
-
-float linearstep(float edge0, float edge1, float x)
-{
- float result;
- if (edge0 != edge1) {
- float xclamped = clamp(x, edge0, edge1);
- result = (xclamped - edge0) / (edge1 - edge0);
- }
- else { // special case: edges coincide
- result = step(edge0, x);
- }
- return result;
-}
-
-float smooth_linearstep(float edge0, float edge1, float x_, float eps_)
-{
- float result;
- if (edge0 != edge1) {
- float rampup(float x, float r)
- {
- return 0.5 / r * x * x;
- }
- float width_inv = 1.0 / (edge1 - edge0);
- float eps = eps_ * width_inv;
- float x = (x_ - edge0) * width_inv;
- if (x <= -eps)
- result = 0;
- else if (x >= eps && x <= 1.0 - eps)
- result = x;
- else if (x >= 1.0 + eps)
- result = 1;
- else if (x < eps)
- result = rampup(x + eps, 2.0 * eps);
- else /* if (x < 1.0+eps) */
- result = 1.0 - rampup(1.0 + eps - x, 2.0 * eps);
- }
- else {
- result = step(edge0, x_);
- }
- return result;
-}
-
-float aastep(float edge, float s, float dedge, float ds)
-{
- // Box filtered AA step
- float width = fabs(dedge) + fabs(ds);
- float halfwidth = 0.5 * width;
- float e1 = edge - halfwidth;
- return (s <= e1) ? 0.0 : ((s >= (edge + halfwidth)) ? 1.0 : (s - e1) / width);
-}
-float aastep(float edge, float s, float ds)
-{
- return aastep(edge, s, filterwidth(edge), ds);
-}
-float aastep(float edge, float s)
-{
- return aastep(edge, s, filterwidth(edge), filterwidth(s));
-}
-
-// Derivatives and area operators
-
-// Displacement functions
-
-// String functions
-int strlen(string s) BUILTIN;
-int hash(string s) BUILTIN;
-int getchar(string s, int index) BUILTIN;
-int startswith(string s, string prefix) BUILTIN;
-int endswith(string s, string suffix) BUILTIN;
-string substr(string s, int start, int len) BUILTIN;
-string substr(string s, int start)
-{
- return substr(s, start, strlen(s));
-}
-float stof(string str) BUILTIN;
-int stoi(string str) BUILTIN;
-
-// Define concat in terms of shorter concat
-string concat(string a, string b, string c)
-{
- return concat(concat(a, b), c);
-}
-string concat(string a, string b, string c, string d)
-{
- return concat(concat(a, b, c), d);
-}
-string concat(string a, string b, string c, string d, string e)
-{
- return concat(concat(a, b, c, d), e);
-}
-string concat(string a, string b, string c, string d, string e, string f)
-{
- return concat(concat(a, b, c, d, e), f);
-}
-
-// Texture
-
-// Closures
-
-closure color diffuse(normal N) BUILTIN;
-closure color oren_nayar(normal N, float sigma) BUILTIN;
-closure color diffuse_ramp(normal N, color colors[8]) BUILTIN;
-closure color phong_ramp(normal N, float exponent, color colors[8]) BUILTIN;
-closure color diffuse_toon(normal N, float size, float smooth) BUILTIN;
-closure color glossy_toon(normal N, float size, float smooth) BUILTIN;
-closure color translucent(normal N) BUILTIN;
-closure color reflection(normal N) BUILTIN;
-closure color refraction(normal N, float eta) BUILTIN;
-closure color transparent() BUILTIN;
-closure color microfacet_ggx(normal N, float ag) BUILTIN;
-closure color microfacet_ggx_aniso(normal N, vector T, float ax, float ay) BUILTIN;
-closure color microfacet_ggx_refraction(normal N, float ag, float eta) BUILTIN;
-closure color microfacet_multi_ggx(normal N, float ag, color C) BUILTIN;
-closure color microfacet_multi_ggx_aniso(normal N, vector T, float ax, float ay, color C) BUILTIN;
-closure color microfacet_multi_ggx_glass(normal N, float ag, float eta, color C) BUILTIN;
-closure color microfacet_ggx_fresnel(normal N, float ag, float eta, color C, color Cspec0) BUILTIN;
-closure color microfacet_ggx_aniso_fresnel(
- normal N, vector T, float ax, float ay, float eta, color C, color Cspec0) BUILTIN;
-closure color
-microfacet_multi_ggx_fresnel(normal N, float ag, float eta, color C, color Cspec0) BUILTIN;
-closure color microfacet_multi_ggx_aniso_fresnel(
- normal N, vector T, float ax, float ay, float eta, color C, color Cspec0) BUILTIN;
-closure color
-microfacet_multi_ggx_glass_fresnel(normal N, float ag, float eta, color C, color Cspec0) BUILTIN;
-closure color microfacet_beckmann(normal N, float ab) BUILTIN;
-closure color microfacet_beckmann_aniso(normal N, vector T, float ax, float ay) BUILTIN;
-closure color microfacet_beckmann_refraction(normal N, float ab, float eta) BUILTIN;
-closure color ashikhmin_shirley(normal N, vector T, float ax, float ay) BUILTIN;
-closure color ashikhmin_velvet(normal N, float sigma) BUILTIN;
-closure color emission() BUILTIN;
-closure color background() BUILTIN;
-closure color holdout() BUILTIN;
-closure color ambient_occlusion() BUILTIN;
-closure color principled_diffuse(normal N, float roughness) BUILTIN;
-closure color principled_sheen(normal N) BUILTIN;
-closure color principled_clearcoat(normal N, float clearcoat, float clearcoat_roughness) BUILTIN;
-
-// BSSRDF
-closure color bssrdf(string method, normal N, vector radius, color albedo) BUILTIN;
-
-// Hair
-closure color
-hair_reflection(normal N, float roughnessu, float roughnessv, vector T, float offset) BUILTIN;
-closure color
-hair_transmission(normal N, float roughnessu, float roughnessv, vector T, float offset) BUILTIN;
-closure color principled_hair(normal N,
- color sigma,
- float roughnessu,
- float roughnessv,
- float coat,
- float alpha,
- float eta) BUILTIN;
-
-// Volume
-closure color henyey_greenstein(float g) BUILTIN;
-closure color absorption() BUILTIN;
-
-// OSL 1.5 Microfacet functions
-closure color microfacet(
- string distribution, normal N, vector U, float xalpha, float yalpha, float eta, int refract)
-{
- /* GGX */
- if (distribution == "ggx" || distribution == "default") {
- if (!refract) {
- if (xalpha == yalpha) {
- /* Isotropic */
- return microfacet_ggx(N, xalpha);
- }
- else {
- /* Anisotropic */
- return microfacet_ggx_aniso(N, U, xalpha, yalpha);
- }
- }
- else {
- return microfacet_ggx_refraction(N, xalpha, eta);
- }
- }
- /* Beckmann */
- else {
- if (!refract) {
- if (xalpha == yalpha) {
- /* Isotropic */
- return microfacet_beckmann(N, xalpha);
- }
- else {
- /* Anisotropic */
- return microfacet_beckmann_aniso(N, U, xalpha, yalpha);
- }
- }
- else {
- return microfacet_beckmann_refraction(N, xalpha, eta);
- }
- }
-}
-
-closure color microfacet(string distribution, normal N, float alpha, float eta, int refract)
-{
- return microfacet(distribution, N, vector(0), alpha, alpha, eta, refract);
-}
-
-// Renderer state
-int backfacing() BUILTIN;
-int raytype(string typename) BUILTIN;
-// the individual 'isFOOray' functions are deprecated
-int iscameraray()
-{
- return raytype("camera");
-}
-int isdiffuseray()
-{
- return raytype("diffuse");
-}
-int isglossyray()
-{
- return raytype("glossy");
-}
-int isshadowray()
-{
- return raytype("shadow");
-}
-int getmatrix(string fromspace, string tospace, output matrix M) BUILTIN;
-int getmatrix(string fromspace, output matrix M)
-{
- return getmatrix(fromspace, "common", M);
-}
-
-// Miscellaneous
-
-#undef BUILTIN
-#undef BUILTIN_DERIV
-#undef PERCOMP1
-#undef PERCOMP2
-#undef PERCOMP2F
-
-#endif /* CCL_STDOSL_H */
diff --git a/intern/cycles/kernel/shaders/vector2.h b/intern/cycles/kernel/shaders/vector2.h
deleted file mode 100644
index c524735d892..00000000000
--- a/intern/cycles/kernel/shaders/vector2.h
+++ /dev/null
@@ -1,291 +0,0 @@
-// Open Shading Language : Copyright (c) 2009-2017 Sony Pictures Imageworks Inc., et al.
-// https://github.com/imageworks/OpenShadingLanguage/blob/master/LICENSE
-
-#pragma once
-#define VECTOR2_H
-
-// vector2 is a 2D vector
-struct vector2 {
- float x;
- float y;
-};
-
-//
-// For vector2, define math operators to match vector
-//
-
-vector2 __operator__neg__(vector2 a)
-{
- return vector2(-a.x, -a.y);
-}
-
-vector2 __operator__add__(vector2 a, vector2 b)
-{
- return vector2(a.x + b.x, a.y + b.y);
-}
-
-vector2 __operator__add__(vector2 a, int b)
-{
- return a + vector2(b, b);
-}
-
-vector2 __operator__add__(vector2 a, float b)
-{
- return a + vector2(b, b);
-}
-
-vector2 __operator__add__(int a, vector2 b)
-{
- return vector2(a, a) + b;
-}
-
-vector2 __operator__add__(float a, vector2 b)
-{
- return vector2(a, a) + b;
-}
-
-vector2 __operator__sub__(vector2 a, vector2 b)
-{
- return vector2(a.x - b.x, a.y - b.y);
-}
-
-vector2 __operator__sub__(vector2 a, int b)
-{
- return a - vector2(b, b);
-}
-
-vector2 __operator__sub__(vector2 a, float b)
-{
- return a - vector2(b, b);
-}
-
-vector2 __operator__sub__(int a, vector2 b)
-{
- return vector2(a, a) - b;
-}
-
-vector2 __operator__sub__(float a, vector2 b)
-{
- return vector2(a, a) - b;
-}
-
-vector2 __operator__mul__(vector2 a, vector2 b)
-{
- return vector2(a.x * b.x, a.y * b.y);
-}
-
-vector2 __operator__mul__(vector2 a, int b)
-{
- return a * vector2(b, b);
-}
-
-vector2 __operator__mul__(vector2 a, float b)
-{
- return a * vector2(b, b);
-}
-
-vector2 __operator__mul__(int a, vector2 b)
-{
- return b * vector2(a, a);
-}
-
-vector2 __operator__mul__(float a, vector2 b)
-{
- return b * vector2(a, a);
-}
-
-vector2 __operator__div__(vector2 a, vector2 b)
-{
- return vector2(a.x / b.x, a.y / b.y);
-}
-
-vector2 __operator__div__(vector2 a, int b)
-{
- float b_inv = 1 / b;
- return a * vector2(b_inv, b_inv);
-}
-
-vector2 __operator__div__(vector2 a, float b)
-{
- float b_inv = 1 / b;
- return a * vector2(b_inv, b_inv);
-}
-
-vector2 __operator__div__(int a, vector2 b)
-{
- return vector2(a, a) / b;
-}
-
-vector2 __operator__div__(float a, vector2 b)
-{
- return vector2(a, a) / b;
-}
-
-int __operator__eq__(vector2 a, vector2 b)
-{
- return (a.x == b.x) && (a.y == b.y);
-}
-
-int __operator__ne__(vector2 a, vector2 b)
-{
- return (a.x != b.x) || (a.y != b.y);
-}
-
-//
-// For vector2, define most of the stdosl functions to match vector
-//
-
-vector2 abs(vector2 a)
-{
- return vector2(abs(a.x), abs(a.y));
-}
-
-vector2 ceil(vector2 a)
-{
- return vector2(ceil(a.x), ceil(a.y));
-}
-
-vector2 floor(vector2 a)
-{
- return vector2(floor(a.x), floor(a.y));
-}
-
-vector2 sqrt(vector2 a)
-{
- return vector2(sqrt(a.x), sqrt(a.y));
-}
-
-vector2 exp(vector2 a)
-{
- return vector2(exp(a.x), exp(a.y));
-}
-
-vector2 log(vector2 a)
-{
- return vector2(log(a.x), log(a.y));
-}
-
-vector2 log2(vector2 a)
-{
- return vector2(log2(a.x), log2(a.y));
-}
-
-vector2 mix(vector2 a, vector2 b, float x)
-{
- return vector2(mix(a.x, b.x, x), mix(a.y, b.y, x));
-}
-
-float dot(vector2 a, vector2 b)
-{
- return (a.x * b.x + a.y * b.y);
-}
-
-float length(vector2 a)
-{
- return hypot(a.x, a.y);
-}
-
-vector2 smoothstep(vector2 low, vector2 high, vector2 in)
-{
- return vector2(smoothstep(low.x, high.x, in.x), smoothstep(low.y, high.y, in.y));
-}
-
-vector2 smoothstep(float low, float high, vector2 in)
-{
- return vector2(smoothstep(low, high, in.x), smoothstep(low, high, in.y));
-}
-
-vector2 clamp(vector2 in, vector2 low, vector2 high)
-{
- return vector2(clamp(in.x, low.x, high.x), clamp(in.y, low.y, high.y));
-}
-
-vector2 clamp(vector2 in, float low, float high)
-{
- return clamp(in, vector2(low, low), vector2(high, high));
-}
-
-vector2 max(vector2 a, vector2 b)
-{
- return vector2(max(a.x, b.x), max(a.y, b.y));
-}
-
-vector2 max(vector2 a, float b)
-{
- return max(a, vector2(b, b));
-}
-
-vector2 normalize(vector2 a)
-{
- return a / length(a);
-}
-
-vector2 min(vector2 a, vector2 b)
-{
- return vector2(min(a.x, a.x), min(b.y, b.y));
-}
-
-vector2 min(vector2 a, float b)
-{
- return min(a, vector2(b, b));
-}
-
-vector2 fmod(vector2 a, vector2 b)
-{
- return vector2(fmod(a.x, b.x), fmod(a.y, b.y));
-}
-
-vector2 fmod(vector2 a, float b)
-{
- return fmod(a, vector2(b, b));
-}
-
-vector2 pow(vector2 in, vector2 amount)
-{
- return vector2(pow(in.x, amount.x), pow(in.y, amount.y));
-}
-
-vector2 pow(vector2 in, float amount)
-{
- return pow(in, vector2(amount, amount));
-}
-
-vector2 sign(vector2 a)
-{
- return vector2(sign(a.x), sign(a.y));
-}
-
-vector2 sin(vector2 a)
-{
- return vector2(sin(a.x), sin(a.y));
-}
-
-vector2 cos(vector2 a)
-{
- return vector2(cos(a.x), cos(a.y));
-}
-
-vector2 tan(vector2 a)
-{
- return vector2(tan(a.x), tan(a.y));
-}
-
-vector2 asin(vector2 a)
-{
- return vector2(asin(a.x), asin(a.y));
-}
-
-vector2 acos(vector2 a)
-{
- return vector2(acos(a.x), acos(a.y));
-}
-
-vector2 atan2(vector2 a, float f)
-{
- return vector2(atan2(a.x, f), atan2(a.y, f));
-}
-
-vector2 atan2(vector2 a, vector2 b)
-{
- return vector2(atan2(a.x, b.x), atan2(a.y, b.y));
-}
diff --git a/intern/cycles/kernel/shaders/vector4.h b/intern/cycles/kernel/shaders/vector4.h
deleted file mode 100644
index 58e1b3c2e23..00000000000
--- a/intern/cycles/kernel/shaders/vector4.h
+++ /dev/null
@@ -1,327 +0,0 @@
-// Open Shading Language : Copyright (c) 2009-2017 Sony Pictures Imageworks Inc., et al.
-// https://github.com/imageworks/OpenShadingLanguage/blob/master/LICENSE
-
-#pragma once
-#define VECTOR4_H
-
-// vector4 is a 4D vector
-struct vector4 {
- float x;
- float y;
- float z;
- float w;
-};
-
-//
-// For vector4, define math operators to match vector
-//
-
-vector4 __operator__neg__(vector4 a)
-{
- return vector4(-a.x, -a.y, -a.z, -a.w);
-}
-
-vector4 __operator__add__(vector4 a, vector4 b)
-{
- return vector4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
-}
-
-vector4 __operator__add__(vector4 a, int b)
-{
- return a + vector4(b, b, b, b);
-}
-
-vector4 __operator__add__(vector4 a, float b)
-{
- return a + vector4(b, b, b, b);
-}
-
-vector4 __operator__add__(int a, vector4 b)
-{
- return vector4(a, a, a, a) + b;
-}
-
-vector4 __operator__add__(float a, vector4 b)
-{
- return vector4(a, a, a, a) + b;
-}
-
-vector4 __operator__sub__(vector4 a, vector4 b)
-{
- return vector4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
-}
-
-vector4 __operator__sub__(vector4 a, int b)
-{
- return a - vector4(b, b, b, b);
-}
-
-vector4 __operator__sub__(vector4 a, float b)
-{
- return a - vector4(b, b, b, b);
-}
-
-vector4 __operator__sub__(int a, vector4 b)
-{
- return vector4(a, a, a, a) - b;
-}
-
-vector4 __operator__sub__(float a, vector4 b)
-{
- return vector4(a, a, a, a) - b;
-}
-
-vector4 __operator__mul__(vector4 a, vector4 b)
-{
- return vector4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
-}
-
-vector4 __operator__mul__(vector4 a, int b)
-{
- return a * vector4(b, b, b, b);
-}
-
-vector4 __operator__mul__(vector4 a, float b)
-{
- return a * vector4(b, b, b, b);
-}
-
-vector4 __operator__mul__(int a, vector4 b)
-{
- return vector4(a, a, a, a) * b;
-}
-
-vector4 __operator__mul__(float a, vector4 b)
-{
- return vector4(a, a, a, a) * b;
-}
-
-vector4 __operator__div__(vector4 a, vector4 b)
-{
- return vector4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w);
-}
-
-vector4 __operator__div__(vector4 a, int b)
-{
- float b_inv = 1 / b;
- return a * vector4(b_inv, b_inv, b_inv, b_inv);
-}
-
-vector4 __operator__div__(vector4 a, float b)
-{
- float b_inv = 1 / b;
- return a * vector4(b_inv, b_inv, b_inv, b_inv);
-}
-
-vector4 __operator__div__(int a, vector4 b)
-{
- return vector4(a, a, a, a) / b;
-}
-
-vector4 __operator__div__(float a, vector4 b)
-{
- return vector4(a, a, a, a) / b;
-}
-
-int __operator__eq__(vector4 a, vector4 b)
-{
- return (a.x == b.x) && (a.y == b.y) && (a.z == b.z) && (a.w == b.w);
-}
-
-int __operator__ne__(vector4 a, vector4 b)
-{
- return (a.x != b.x) || (a.y != b.y) || (a.z != b.z) || (a.w != b.w);
-}
-
-//
-// For vector4, define most of the stdosl functions to match vector
-//
-
-vector4 abs(vector4 in)
-{
- return vector4(abs(in.x), abs(in.y), abs(in.z), abs(in.w));
-}
-
-vector4 ceil(vector4 in)
-{
- return vector4(ceil(in.x), ceil(in.y), ceil(in.z), ceil(in.w));
-}
-
-vector4 floor(vector4 in)
-{
- return vector4(floor(in.x), floor(in.y), floor(in.z), floor(in.w));
-}
-
-vector4 sqrt(vector4 in)
-{
- return vector4(sqrt(in.x), sqrt(in.y), sqrt(in.z), sqrt(in.w));
-}
-
-vector4 exp(vector4 in)
-{
- return vector4(exp(in.x), exp(in.y), exp(in.z), exp(in.w));
-}
-
-vector4 log(vector4 in)
-{
- return vector4(log(in.x), log(in.y), log(in.z), log(in.w));
-}
-
-vector4 log2(vector4 in)
-{
- return vector4(log2(in.x), log2(in.y), log2(in.z), log2(in.w));
-}
-
-vector4 mix(vector4 value1, vector4 value2, float x)
-{
- return vector4(mix(value1.x, value2.x, x),
- mix(value1.y, value2.y, x),
- mix(value1.z, value2.z, x),
- mix(value1.w, value2.w, x));
-}
-
-vector vec4ToVec3(vector4 v)
-{
- return vector(v.x, v.y, v.z) / v.w;
-}
-
-float dot(vector4 a, vector4 b)
-{
- return ((a.x * b.x) + (a.y * b.y) + (a.z * b.z) + (a.w * b.w));
-}
-
-float length(vector4 a)
-{
- return sqrt(a.x * a.x + a.y * a.y + a.z * a.z + a.w * a.w);
-}
-
-vector4 smoothstep(vector4 low, vector4 high, vector4 in)
-{
- return vector4(smoothstep(low.x, high.x, in.x),
- smoothstep(low.y, high.y, in.y),
- smoothstep(low.z, high.z, in.z),
- smoothstep(low.w, high.w, in.w));
-}
-
-vector4 smoothstep(float low, float high, vector4 in)
-{
- return vector4(smoothstep(low, high, in.x),
- smoothstep(low, high, in.y),
- smoothstep(low, high, in.z),
- smoothstep(low, high, in.w));
-}
-
-vector4 clamp(vector4 in, vector4 low, vector4 high)
-{
- return vector4(clamp(in.x, low.x, high.x),
- clamp(in.y, low.y, high.y),
- clamp(in.z, low.z, high.z),
- clamp(in.w, low.w, high.w));
-}
-
-vector4 clamp(vector4 in, float low, float high)
-{
- return vector4(clamp(in.x, low, high),
- clamp(in.y, low, high),
- clamp(in.z, low, high),
- clamp(in.w, low, high));
-}
-
-vector4 max(vector4 a, vector4 b)
-{
- return vector4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w));
-}
-
-vector4 max(vector4 a, float b)
-{
- return max(a, vector4(b, b, b, b));
-}
-
-vector4 normalize(vector4 a)
-{
- return a / length(a);
-}
-
-vector4 min(vector4 a, vector4 b)
-{
- return vector4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w));
-}
-
-vector4 min(vector4 a, float b)
-{
- return min(a, vector4(b, b, b, b));
-}
-
-vector4 fmod(vector4 a, vector4 b)
-{
- return vector4(fmod(a.x, b.x), fmod(a.y, b.y), fmod(a.z, b.z), fmod(a.w, b.w));
-}
-
-vector4 fmod(vector4 a, float b)
-{
- return fmod(a, vector4(b, b, b, b));
-}
-
-vector4 pow(vector4 in, vector4 amount)
-{
- return vector4(
- pow(in.x, amount.x), pow(in.y, amount.y), pow(in.z, amount.z), pow(in.w, amount.w));
-}
-
-vector4 pow(vector4 in, float amount)
-{
- return vector4(pow(in.x, amount), pow(in.y, amount), pow(in.z, amount), pow(in.w, amount));
-}
-
-vector4 sign(vector4 a)
-{
- return vector4(sign(a.x), sign(a.y), sign(a.z), sign(a.w));
-}
-
-vector4 sin(vector4 a)
-{
- return vector4(sin(a.x), sin(a.y), sin(a.z), sin(a.w));
-}
-
-vector4 cos(vector4 a)
-{
- return vector4(cos(a.x), cos(a.y), cos(a.z), cos(a.w));
-}
-
-vector4 tan(vector4 a)
-{
- return vector4(tan(a.x), tan(a.y), tan(a.z), tan(a.w));
-}
-
-vector4 asin(vector4 a)
-{
- return vector4(asin(a.x), asin(a.y), asin(a.z), asin(a.w));
-}
-
-vector4 acos(vector4 a)
-{
- return vector4(acos(a.x), acos(a.y), acos(a.z), acos(a.w));
-}
-
-vector4 atan2(vector4 a, float f)
-{
- return vector4(atan2(a.x, f), atan2(a.y, f), atan2(a.z, f), atan2(a.w, f));
-}
-
-vector4 atan2(vector4 a, vector4 b)
-{
- return vector4(atan2(a.x, b.x), atan2(a.y, b.y), atan2(a.z, b.z), atan2(a.w, b.w));
-}
-
-vector4 transform(matrix M, vector4 p)
-{
- return vector4(M[0][0] * p.x + M[0][1] * p.y + M[0][2] * p.z + M[0][2] * p.w,
- M[1][0] * p.x + M[1][1] * p.y + M[1][2] * p.z + M[1][2] * p.w,
- M[2][0] * p.x + M[2][1] * p.y + M[2][2] * p.z + M[2][2] * p.w,
- M[3][0] * p.x + M[3][1] * p.y + M[3][2] * p.z + M[3][2] * p.w);
-}
-
-vector4 transform(string fromspace, string tospace, vector4 p)
-{
- return transform(matrix(fromspace, tospace), p);
-}
diff --git a/intern/cycles/kernel/split/kernel_adaptive_adjust_samples.h b/intern/cycles/kernel/split/kernel_adaptive_adjust_samples.h
new file mode 100644
index 00000000000..60ebf415970
--- /dev/null
+++ b/intern/cycles/kernel/split/kernel_adaptive_adjust_samples.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device void kernel_adaptive_adjust_samples(KernelGlobals *kg)
+{
+ int pixel_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ if (pixel_index < kernel_split_params.tile.w * kernel_split_params.tile.h) {
+ int x = kernel_split_params.tile.x + pixel_index % kernel_split_params.tile.w;
+ int y = kernel_split_params.tile.y + pixel_index / kernel_split_params.tile.w;
+ int buffer_offset = (kernel_split_params.tile.offset + x +
+ y * kernel_split_params.tile.stride) *
+ kernel_data.film.pass_stride;
+ ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
+ int sample = kernel_split_params.tile.start_sample + kernel_split_params.tile.num_samples;
+ if (buffer[kernel_data.film.pass_sample_count] < 0.0f) {
+ buffer[kernel_data.film.pass_sample_count] = -buffer[kernel_data.film.pass_sample_count];
+ float sample_multiplier = sample / max((float)kernel_split_params.tile.start_sample + 1.0f,
+ buffer[kernel_data.film.pass_sample_count]);
+ if (sample_multiplier != 1.0f) {
+ kernel_adaptive_post_adjust(kg, buffer, sample_multiplier);
+ }
+ }
+ else {
+ kernel_adaptive_post_adjust(kg, buffer, sample / (sample - 1.0f));
+ }
+ }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_adaptive_filter_x.h b/intern/cycles/kernel/split/kernel_adaptive_filter_x.h
new file mode 100644
index 00000000000..93f41f7ced4
--- /dev/null
+++ b/intern/cycles/kernel/split/kernel_adaptive_filter_x.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device void kernel_adaptive_filter_x(KernelGlobals *kg)
+{
+ int pixel_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ if (pixel_index < kernel_split_params.tile.h &&
+ kernel_split_params.tile.start_sample + kernel_split_params.tile.num_samples >=
+ kernel_data.integrator.adaptive_min_samples) {
+ int y = kernel_split_params.tile.y + pixel_index;
+ kernel_do_adaptive_filter_x(kg, y, &kernel_split_params.tile);
+ }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_adaptive_filter_y.h b/intern/cycles/kernel/split/kernel_adaptive_filter_y.h
new file mode 100644
index 00000000000..eca53d079ec
--- /dev/null
+++ b/intern/cycles/kernel/split/kernel_adaptive_filter_y.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device void kernel_adaptive_filter_y(KernelGlobals *kg)
+{
+ int pixel_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ if (pixel_index < kernel_split_params.tile.w &&
+ kernel_split_params.tile.start_sample + kernel_split_params.tile.num_samples >=
+ kernel_data.integrator.adaptive_min_samples) {
+ int x = kernel_split_params.tile.x + pixel_index;
+ kernel_do_adaptive_filter_y(kg, x, &kernel_split_params.tile);
+ }
+}
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_adaptive_stopping.h b/intern/cycles/kernel/split/kernel_adaptive_stopping.h
new file mode 100644
index 00000000000..c8eb1ebd705
--- /dev/null
+++ b/intern/cycles/kernel/split/kernel_adaptive_stopping.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device void kernel_adaptive_stopping(KernelGlobals *kg)
+{
+ int pixel_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ if (pixel_index < kernel_split_params.tile.w * kernel_split_params.tile.h &&
+ kernel_split_params.tile.start_sample + kernel_split_params.tile.num_samples >=
+ kernel_data.integrator.adaptive_min_samples) {
+ int x = kernel_split_params.tile.x + pixel_index % kernel_split_params.tile.w;
+ int y = kernel_split_params.tile.y + pixel_index / kernel_split_params.tile.w;
+ int buffer_offset = (kernel_split_params.tile.offset + x +
+ y * kernel_split_params.tile.stride) *
+ kernel_data.film.pass_stride;
+ ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
+ kernel_do_adaptive_stopping(kg,
+ buffer,
+ kernel_split_params.tile.start_sample +
+ kernel_split_params.tile.num_samples - 1);
+ }
+}
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_do_volume.h b/intern/cycles/kernel/split/kernel_do_volume.h
index 45b839db05f..b24699ec39c 100644
--- a/intern/cycles/kernel/split/kernel_do_volume.h
+++ b/intern/cycles/kernel/split/kernel_do_volume.h
@@ -44,7 +44,7 @@ ccl_device_noinline bool kernel_split_branched_path_volume_indirect_light_iter(K
branched_state->isect.t :
FLT_MAX;
- bool heterogeneous = volume_stack_is_heterogeneous(kg, branched_state->path_state.volume_stack);
+ float step_size = volume_stack_step_size(kg, branched_state->path_state.volume_stack);
for (int j = branched_state->next_sample; j < num_samples; j++) {
ccl_global PathState *ps = &kernel_split_state.path_state[ray_index];
@@ -61,7 +61,7 @@ ccl_device_noinline bool kernel_split_branched_path_volume_indirect_light_iter(K
/* integrate along volume segment with distance sampling */
VolumeIntegrateResult result = kernel_volume_integrate(
- kg, ps, sd, &volume_ray, L, tp, heterogeneous);
+ kg, ps, sd, &volume_ray, L, tp, step_size);
# ifdef __VOLUME_SCATTER__
if (result == VOLUME_PATH_SCATTERED) {
@@ -164,12 +164,12 @@ ccl_device void kernel_do_volume(KernelGlobals *kg)
if (!kernel_data.integrator.branched ||
IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
# endif /* __BRANCHED_PATH__ */
- bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
+ float step_size = volume_stack_step_size(kg, state->volume_stack);
{
/* integrate along volume segment with distance sampling */
VolumeIntegrateResult result = kernel_volume_integrate(
- kg, state, sd, &volume_ray, L, throughput, heterogeneous);
+ kg, state, sd, &volume_ray, L, throughput, step_size);
# ifdef __VOLUME_SCATTER__
if (result == VOLUME_PATH_SCATTERED) {
diff --git a/intern/cycles/kernel/split/kernel_split_common.h b/intern/cycles/kernel/split/kernel_split_common.h
index 384bc952460..5114f2b03e5 100644
--- a/intern/cycles/kernel/split/kernel_split_common.h
+++ b/intern/cycles/kernel/split/kernel_split_common.h
@@ -17,6 +17,7 @@
#ifndef __KERNEL_SPLIT_H__
#define __KERNEL_SPLIT_H__
+// clang-format off
#include "kernel/kernel_math.h"
#include "kernel/kernel_types.h"
@@ -52,6 +53,7 @@
#ifdef __BRANCHED_PATH__
# include "kernel/split/kernel_branched.h"
#endif
+// clang-format on
CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/kernel/split/kernel_split_data.h b/intern/cycles/kernel/split/kernel_split_data.h
index 433b1221a37..decc537b39b 100644
--- a/intern/cycles/kernel/split/kernel_split_data.h
+++ b/intern/cycles/kernel/split/kernel_split_data.h
@@ -18,6 +18,7 @@
#define __KERNEL_SPLIT_DATA_H__
#include "kernel/split/kernel_split_data_types.h"
+
#include "kernel/kernel_globals.h"
CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h
index fd2833ee687..abeb8fa7457 100644
--- a/intern/cycles/kernel/svm/svm.h
+++ b/intern/cycles/kernel/svm/svm.h
@@ -161,52 +161,53 @@ CCL_NAMESPACE_END
#include "svm_fractal_noise.h"
#include "kernel/svm/svm_color_util.h"
-#include "kernel/svm/svm_math_util.h"
#include "kernel/svm/svm_mapping_util.h"
+#include "kernel/svm/svm_math_util.h"
#include "kernel/svm/svm_aov.h"
#include "kernel/svm/svm_attribute.h"
-#include "kernel/svm/svm_gradient.h"
#include "kernel/svm/svm_blackbody.h"
+#include "kernel/svm/svm_brick.h"
+#include "kernel/svm/svm_brightness.h"
+#include "kernel/svm/svm_bump.h"
+#include "kernel/svm/svm_camera.h"
+#include "kernel/svm/svm_checker.h"
+#include "kernel/svm/svm_clamp.h"
#include "kernel/svm/svm_closure.h"
-#include "kernel/svm/svm_noisetex.h"
#include "kernel/svm/svm_convert.h"
#include "kernel/svm/svm_displace.h"
#include "kernel/svm/svm_fresnel.h"
-#include "kernel/svm/svm_wireframe.h"
-#include "kernel/svm/svm_wavelength.h"
-#include "kernel/svm/svm_camera.h"
+#include "kernel/svm/svm_gamma.h"
#include "kernel/svm/svm_geometry.h"
+#include "kernel/svm/svm_gradient.h"
#include "kernel/svm/svm_hsv.h"
#include "kernel/svm/svm_ies.h"
#include "kernel/svm/svm_image.h"
-#include "kernel/svm/svm_gamma.h"
-#include "kernel/svm/svm_brightness.h"
#include "kernel/svm/svm_invert.h"
#include "kernel/svm/svm_light_path.h"
#include "kernel/svm/svm_magic.h"
+#include "kernel/svm/svm_map_range.h"
#include "kernel/svm/svm_mapping.h"
-#include "kernel/svm/svm_normal.h"
-#include "kernel/svm/svm_wave.h"
#include "kernel/svm/svm_math.h"
#include "kernel/svm/svm_mix.h"
+#include "kernel/svm/svm_musgrave.h"
+#include "kernel/svm/svm_noisetex.h"
+#include "kernel/svm/svm_normal.h"
#include "kernel/svm/svm_ramp.h"
#include "kernel/svm/svm_sepcomb_hsv.h"
#include "kernel/svm/svm_sepcomb_vector.h"
-#include "kernel/svm/svm_musgrave.h"
#include "kernel/svm/svm_sky.h"
#include "kernel/svm/svm_tex_coord.h"
#include "kernel/svm/svm_value.h"
-#include "kernel/svm/svm_voronoi.h"
-#include "kernel/svm/svm_checker.h"
-#include "kernel/svm/svm_brick.h"
+#include "kernel/svm/svm_vector_rotate.h"
#include "kernel/svm/svm_vector_transform.h"
+#include "kernel/svm/svm_vertex_color.h"
+#include "kernel/svm/svm_voronoi.h"
#include "kernel/svm/svm_voxel.h"
-#include "kernel/svm/svm_bump.h"
-#include "kernel/svm/svm_map_range.h"
-#include "kernel/svm/svm_clamp.h"
+#include "kernel/svm/svm_wave.h"
+#include "kernel/svm/svm_wavelength.h"
#include "kernel/svm/svm_white_noise.h"
-#include "kernel/svm/svm_vertex_color.h"
+#include "kernel/svm/svm_wireframe.h"
#ifdef __SHADER_RAYTRACE__
# include "kernel/svm/svm_ao.h"
@@ -230,6 +231,8 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg,
uint4 node = read_node(kg, &offset);
switch (node.x) {
+ case NODE_END:
+ return;
#if NODES_GROUP(NODE_GROUP_LEVEL_0)
case NODE_SHADER_JUMP: {
if (type == SHADER_TYPE_SURFACE)
@@ -309,7 +312,6 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg,
svm_node_vector_displacement(kg, sd, stack, node, &offset);
break;
# endif /* NODES_FEATURE(NODE_FEATURE_BUMP) */
-# ifdef __TEXTURES__
case NODE_TEX_IMAGE:
svm_node_tex_image(kg, sd, stack, node, &offset);
break;
@@ -319,9 +321,7 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg,
case NODE_TEX_NOISE:
svm_node_tex_noise(kg, sd, stack, node.y, node.z, node.w, &offset);
break;
-# endif /* __TEXTURES__ */
-# ifdef __EXTRA_NODES__
-# if NODES_FEATURE(NODE_FEATURE_BUMP)
+# if NODES_FEATURE(NODE_FEATURE_BUMP)
case NODE_SET_BUMP:
svm_node_set_bump(kg, sd, stack, node);
break;
@@ -346,20 +346,19 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg,
case NODE_CLOSURE_SET_NORMAL:
svm_node_set_normal(kg, sd, stack, node.y, node.z);
break;
-# if NODES_FEATURE(NODE_FEATURE_BUMP_STATE)
+# if NODES_FEATURE(NODE_FEATURE_BUMP_STATE)
case NODE_ENTER_BUMP_EVAL:
svm_node_enter_bump_eval(kg, sd, stack, node.y);
break;
case NODE_LEAVE_BUMP_EVAL:
svm_node_leave_bump_eval(kg, sd, stack, node.y);
break;
-# endif /* NODES_FEATURE(NODE_FEATURE_BUMP_STATE) */
-# endif /* NODES_FEATURE(NODE_FEATURE_BUMP) */
+# endif /* NODES_FEATURE(NODE_FEATURE_BUMP_STATE) */
+# endif /* NODES_FEATURE(NODE_FEATURE_BUMP) */
case NODE_HSV:
svm_node_hsv(kg, sd, stack, node, &offset);
break;
-# endif /* __EXTRA_NODES__ */
-#endif /* NODES_GROUP(NODE_GROUP_LEVEL_0) */
+#endif /* NODES_GROUP(NODE_GROUP_LEVEL_0) */
#if NODES_GROUP(NODE_GROUP_LEVEL_1)
case NODE_CLOSURE_HOLDOUT:
@@ -379,7 +378,6 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg,
svm_node_principled_volume(kg, sd, stack, node, type, path_flag, &offset);
break;
# endif /* NODES_FEATURE(NODE_FEATURE_VOLUME) */
-# ifdef __EXTRA_NODES__
case NODE_MATH:
svm_node_math(kg, sd, stack, node.y, node.z, node.w, &offset);
break;
@@ -404,15 +402,12 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg,
case NODE_PARTICLE_INFO:
svm_node_particle_info(kg, sd, stack, node.y, node.z);
break;
-# ifdef __HAIR__
-# if NODES_FEATURE(NODE_FEATURE_HAIR)
+# if defined(__HAIR__) && NODES_FEATURE(NODE_FEATURE_HAIR)
case NODE_HAIR_INFO:
svm_node_hair_info(kg, sd, stack, node.y, node.z);
break;
-# endif /* NODES_FEATURE(NODE_FEATURE_HAIR) */
-# endif /* __HAIR__ */
-# endif /* __EXTRA_NODES__ */
-#endif /* NODES_GROUP(NODE_GROUP_LEVEL_1) */
+# endif /* NODES_FEATURE(NODE_FEATURE_HAIR) */
+#endif /* NODES_GROUP(NODE_GROUP_LEVEL_1) */
#if NODES_GROUP(NODE_GROUP_LEVEL_2)
case NODE_TEXTURE_MAPPING:
@@ -427,7 +422,6 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg,
case NODE_CAMERA:
svm_node_camera(kg, sd, stack, node.y, node.z, node.w);
break;
-# ifdef __TEXTURES__
case NODE_TEX_ENVIRONMENT:
svm_node_tex_environment(kg, sd, stack, node);
break;
@@ -458,8 +452,6 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg,
case NODE_TEX_WHITE_NOISE:
svm_node_tex_white_noise(kg, sd, stack, node.y, node.z, node.w, &offset);
break;
-# endif /* __TEXTURES__ */
-# ifdef __EXTRA_NODES__
case NODE_NORMAL:
svm_node_normal(kg, sd, stack, node.y, node.z, node.w, &offset);
break;
@@ -469,19 +461,7 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg,
case NODE_IES:
svm_node_ies(kg, sd, stack, node, &offset);
break;
- case NODE_AOV_START:
- if (!svm_node_aov_check(state, buffer)) {
- return;
- }
- break;
- case NODE_AOV_COLOR:
- svm_node_aov_color(kg, sd, stack, node, buffer);
- break;
- case NODE_AOV_VALUE:
- svm_node_aov_value(kg, sd, stack, node, buffer);
- break;
-# endif /* __EXTRA_NODES__ */
-#endif /* NODES_GROUP(NODE_GROUP_LEVEL_2) */
+#endif /* NODES_GROUP(NODE_GROUP_LEVEL_2) */
#if NODES_GROUP(NODE_GROUP_LEVEL_3)
case NODE_RGB_CURVES:
@@ -494,7 +474,6 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg,
case NODE_NORMAL_MAP:
svm_node_normal_map(kg, sd, stack, node);
break;
-# ifdef __EXTRA_NODES__
case NODE_INVERT:
svm_node_invert(sd, stack, node.y, node.z, node.w);
break;
@@ -513,6 +492,9 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg,
case NODE_COMBINE_HSV:
svm_node_combine_hsv(kg, sd, stack, node.y, node.z, node.w, &offset);
break;
+ case NODE_VECTOR_ROTATE:
+ svm_node_vector_rotate(sd, stack, node.y, node.z, node.w);
+ break;
case NODE_VECTOR_TRANSFORM:
svm_node_vector_transform(kg, sd, stack, node);
break;
@@ -531,12 +513,6 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg,
case NODE_CLAMP:
svm_node_clamp(kg, sd, stack, node.y, node.z, node.w, &offset);
break;
-# endif /* __EXTRA_NODES__ */
-# if NODES_FEATURE(NODE_FEATURE_VOLUME)
- case NODE_TEX_VOXEL:
- svm_node_tex_voxel(kg, sd, stack, node, &offset);
- break;
-# endif /* NODES_FEATURE(NODE_FEATURE_VOLUME) */
# ifdef __SHADER_RAYTRACE__
case NODE_BEVEL:
svm_node_bevel(kg, sd, state, stack, node);
@@ -546,8 +522,25 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg,
break;
# endif /* __SHADER_RAYTRACE__ */
#endif /* NODES_GROUP(NODE_GROUP_LEVEL_3) */
- case NODE_END:
- return;
+
+#if NODES_GROUP(NODE_GROUP_LEVEL_4)
+# if NODES_FEATURE(NODE_FEATURE_VOLUME)
+ case NODE_TEX_VOXEL:
+ svm_node_tex_voxel(kg, sd, stack, node, &offset);
+ break;
+# endif /* NODES_FEATURE(NODE_FEATURE_VOLUME) */
+ case NODE_AOV_START:
+ if (!svm_node_aov_check(state, buffer)) {
+ return;
+ }
+ break;
+ case NODE_AOV_COLOR:
+ svm_node_aov_color(kg, sd, stack, node, buffer);
+ break;
+ case NODE_AOV_VALUE:
+ svm_node_aov_value(kg, sd, stack, node, buffer);
+ break;
+#endif /* NODES_GROUP(NODE_GROUP_LEVEL_4) */
default:
kernel_assert(!"Unknown node type was passed to the SVM machine");
return;
diff --git a/intern/cycles/kernel/svm/svm_closure.h b/intern/cycles/kernel/svm/svm_closure.h
index bf2d3f4fbff..cb1b521c585 100644
--- a/intern/cycles/kernel/svm/svm_closure.h
+++ b/intern/cycles/kernel/svm/svm_closure.h
@@ -16,23 +16,6 @@
CCL_NAMESPACE_BEGIN
-/* Hair Melanin */
-
-ccl_device_inline float3 sigma_from_concentration(float eumelanin, float pheomelanin)
-{
- return eumelanin * make_float3(0.506f, 0.841f, 1.653f) +
- pheomelanin * make_float3(0.343f, 0.733f, 1.924f);
-}
-
-ccl_device_inline float3 sigma_from_reflectance(float3 color, float azimuthal_roughness)
-{
- float x = azimuthal_roughness;
- float roughness_fac = (((((0.245f * x) + 5.574f) * x - 10.73f) * x + 2.532f) * x - 0.215f) * x +
- 5.969f;
- float3 sigma = log3(color) / roughness_fac;
- return sigma * sigma;
-}
-
/* Closure Nodes */
ccl_device void svm_node_glass_setup(
@@ -868,24 +851,26 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg,
/* Benedikt Bitterli's melanin ratio remapping. */
float eumelanin = melanin * (1.0f - melanin_redness);
float pheomelanin = melanin * melanin_redness;
- float3 melanin_sigma = sigma_from_concentration(eumelanin, pheomelanin);
+ float3 melanin_sigma = bsdf_principled_hair_sigma_from_concentration(eumelanin,
+ pheomelanin);
/* Optional tint. */
float3 tint = stack_load_float3(stack, tint_ofs);
- float3 tint_sigma = sigma_from_reflectance(tint, radial_roughness);
+ float3 tint_sigma = bsdf_principled_hair_sigma_from_reflectance(tint,
+ radial_roughness);
bsdf->sigma = melanin_sigma + tint_sigma;
break;
}
case NODE_PRINCIPLED_HAIR_REFLECTANCE: {
float3 color = stack_load_float3(stack, color_ofs);
- bsdf->sigma = sigma_from_reflectance(color, radial_roughness);
+ bsdf->sigma = bsdf_principled_hair_sigma_from_reflectance(color, radial_roughness);
break;
}
default: {
/* Fallback to brownish hair, same as defaults for melanin. */
kernel_assert(!"Invalid Principled Hair parametrization!");
- bsdf->sigma = sigma_from_concentration(0.0f, 0.8054375f);
+ bsdf->sigma = bsdf_principled_hair_sigma_from_concentration(0.0f, 0.8054375f);
break;
}
}
diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h
index 90f1a7845c7..f57c85fc23e 100644
--- a/intern/cycles/kernel/svm/svm_image.h
+++ b/intern/cycles/kernel/svm/svm_image.h
@@ -16,8 +16,6 @@
CCL_NAMESPACE_BEGIN
-#ifdef __TEXTURES__
-
ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, uint flags)
{
if (id == -1) {
@@ -30,10 +28,6 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y,
if ((flags & NODE_IMAGE_ALPHA_UNASSOCIATE) && alpha != 1.0f && alpha != 0.0f) {
r /= alpha;
- const int texture_type = kernel_tex_type(id);
- if (texture_type == IMAGE_DATA_TYPE_BYTE4 || texture_type == IMAGE_DATA_TYPE_BYTE) {
- r = min(r, make_float4(1.0f, 1.0f, 1.0f, 1.0f));
- }
r.w = alpha;
}
@@ -250,6 +244,4 @@ ccl_device void svm_node_tex_environment(KernelGlobals *kg,
stack_store_float(stack, alpha_offset, f.w);
}
-#endif /* __TEXTURES__ */
-
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_math.h b/intern/cycles/kernel/svm/svm_math.h
index 82cae7bbacf..01e01c399ea 100644
--- a/intern/cycles/kernel/svm/svm_math.h
+++ b/intern/cycles/kernel/svm/svm_math.h
@@ -51,11 +51,19 @@ ccl_device void svm_node_vector_math(KernelGlobals *kg,
float3 a = stack_load_float3(stack, a_stack_offset);
float3 b = stack_load_float3(stack, b_stack_offset);
+ float3 c;
float scale = stack_load_float(stack, scale_stack_offset);
float value;
float3 vector;
- svm_vector_math(&value, &vector, (NodeVectorMathType)type, a, b, scale);
+
+ /* 3 Vector Operators */
+ if (type == NODE_VECTOR_MATH_WRAP) {
+ uint4 extra_node = read_node(kg, offset);
+ c = stack_load_float3(stack, extra_node.x);
+ }
+
+ svm_vector_math(&value, &vector, (NodeVectorMathType)type, a, b, c, scale);
if (stack_valid(value_stack_offset))
stack_store_float(stack, value_stack_offset, value);
diff --git a/intern/cycles/kernel/svm/svm_math_util.h b/intern/cycles/kernel/svm/svm_math_util.h
index 7b9eaaeb710..d1e1fa87e53 100644
--- a/intern/cycles/kernel/svm/svm_math_util.h
+++ b/intern/cycles/kernel/svm/svm_math_util.h
@@ -16,8 +16,13 @@
CCL_NAMESPACE_BEGIN
-ccl_device void svm_vector_math(
- float *value, float3 *vector, NodeVectorMathType type, float3 a, float3 b, float scale)
+ccl_device void svm_vector_math(float *value,
+ float3 *vector,
+ NodeVectorMathType type,
+ float3 a,
+ float3 b,
+ float3 c,
+ float scale)
{
switch (type) {
case NODE_VECTOR_MATH_ADD:
@@ -68,6 +73,9 @@ ccl_device void svm_vector_math(
case NODE_VECTOR_MATH_MODULO:
*vector = make_float3(safe_modulo(a.x, b.x), safe_modulo(a.y, b.y), safe_modulo(a.z, b.z));
break;
+ case NODE_VECTOR_MATH_WRAP:
+ *vector = make_float3(wrapf(a.x, b.x, c.x), wrapf(a.y, b.y, c.y), wrapf(a.z, b.z, c.z));
+ break;
case NODE_VECTOR_MATH_FRACTION:
*vector = a - floor(a);
break;
@@ -80,6 +88,15 @@ ccl_device void svm_vector_math(
case NODE_VECTOR_MATH_MAXIMUM:
*vector = max(a, b);
break;
+ case NODE_VECTOR_MATH_SINE:
+ *vector = make_float3(sinf(a.x), sinf(a.y), sinf(a.z));
+ break;
+ case NODE_VECTOR_MATH_COSINE:
+ *vector = make_float3(cosf(a.x), cosf(a.y), cosf(a.z));
+ break;
+ case NODE_VECTOR_MATH_TANGENT:
+ *vector = make_float3(tanf(a.x), tanf(a.y), tanf(a.z));
+ break;
default:
*vector = make_float3(0.0f, 0.0f, 0.0f);
*value = 0.0f;
diff --git a/intern/cycles/kernel/svm/svm_types.h b/intern/cycles/kernel/svm/svm_types.h
index 8dbb147e76a..85ede7770e9 100644
--- a/intern/cycles/kernel/svm/svm_types.h
+++ b/intern/cycles/kernel/svm/svm_types.h
@@ -42,7 +42,8 @@ CCL_NAMESPACE_BEGIN
#define NODE_GROUP_LEVEL_1 1
#define NODE_GROUP_LEVEL_2 2
#define NODE_GROUP_LEVEL_3 3
-#define NODE_GROUP_LEVEL_MAX NODE_GROUP_LEVEL_3
+#define NODE_GROUP_LEVEL_4 4
+#define NODE_GROUP_LEVEL_MAX NODE_GROUP_LEVEL_4
#define NODE_FEATURE_VOLUME (1 << 0)
#define NODE_FEATURE_HAIR (1 << 1)
@@ -62,97 +63,98 @@ CCL_NAMESPACE_BEGIN
typedef enum ShaderNodeType {
NODE_END = 0,
+ NODE_SHADER_JUMP,
NODE_CLOSURE_BSDF,
NODE_CLOSURE_EMISSION,
NODE_CLOSURE_BACKGROUND,
NODE_CLOSURE_SET_WEIGHT,
NODE_CLOSURE_WEIGHT,
+ NODE_EMISSION_WEIGHT,
NODE_MIX_CLOSURE,
NODE_JUMP_IF_ZERO,
NODE_JUMP_IF_ONE,
- NODE_TEX_IMAGE,
- NODE_TEX_IMAGE_BOX,
- NODE_TEX_SKY,
NODE_GEOMETRY,
- NODE_GEOMETRY_DUPLI,
- NODE_LIGHT_PATH,
+ NODE_CONVERT,
+ NODE_TEX_COORD,
NODE_VALUE_F,
NODE_VALUE_V,
- NODE_MIX,
NODE_ATTR,
- NODE_CONVERT,
- NODE_FRESNEL,
- NODE_WIREFRAME,
- NODE_WAVELENGTH,
- NODE_BLACKBODY,
- NODE_EMISSION_WEIGHT,
- NODE_TEX_GRADIENT,
- NODE_TEX_VORONOI,
- NODE_TEX_MUSGRAVE,
- NODE_TEX_WAVE,
- NODE_TEX_MAGIC,
- NODE_TEX_NOISE,
- NODE_SHADER_JUMP,
- NODE_SET_DISPLACEMENT,
+ NODE_VERTEX_COLOR,
NODE_GEOMETRY_BUMP_DX,
NODE_GEOMETRY_BUMP_DY,
+ NODE_SET_DISPLACEMENT,
+ NODE_DISPLACEMENT,
+ NODE_VECTOR_DISPLACEMENT,
+ NODE_TEX_IMAGE,
+ NODE_TEX_IMAGE_BOX,
+ NODE_TEX_NOISE,
NODE_SET_BUMP,
- NODE_MATH,
- NODE_VECTOR_MATH,
- NODE_VECTOR_TRANSFORM,
- NODE_MAPPING,
- NODE_TEX_COORD,
- NODE_TEX_COORD_BUMP_DX,
- NODE_TEX_COORD_BUMP_DY,
NODE_ATTR_BUMP_DX,
NODE_ATTR_BUMP_DY,
- NODE_TEX_ENVIRONMENT,
+ NODE_VERTEX_COLOR_BUMP_DX,
+ NODE_VERTEX_COLOR_BUMP_DY,
+ NODE_TEX_COORD_BUMP_DX,
+ NODE_TEX_COORD_BUMP_DY,
+ NODE_CLOSURE_SET_NORMAL,
+ NODE_ENTER_BUMP_EVAL,
+ NODE_LEAVE_BUMP_EVAL,
+ NODE_HSV,
NODE_CLOSURE_HOLDOUT,
+ NODE_FRESNEL,
NODE_LAYER_WEIGHT,
NODE_CLOSURE_VOLUME,
- NODE_SEPARATE_VECTOR,
- NODE_COMBINE_VECTOR,
- NODE_SEPARATE_HSV,
- NODE_COMBINE_HSV,
- NODE_HSV,
- NODE_CAMERA,
- NODE_INVERT,
- NODE_NORMAL,
+ NODE_PRINCIPLED_VOLUME,
+ NODE_MATH,
+ NODE_VECTOR_MATH,
+ NODE_RGB_RAMP,
NODE_GAMMA,
- NODE_TEX_CHECKER,
NODE_BRIGHTCONTRAST,
- NODE_RGB_RAMP,
- NODE_RGB_CURVES,
- NODE_VECTOR_CURVES,
- NODE_MIN_MAX,
- NODE_LIGHT_FALLOFF,
+ NODE_LIGHT_PATH,
NODE_OBJECT_INFO,
NODE_PARTICLE_INFO,
+ NODE_HAIR_INFO,
+ NODE_TEXTURE_MAPPING,
+ NODE_MAPPING,
+ NODE_MIN_MAX,
+ NODE_CAMERA,
+ NODE_TEX_ENVIRONMENT,
+ NODE_TEX_SKY,
+ NODE_TEX_GRADIENT,
+ NODE_TEX_VORONOI,
+ NODE_TEX_MUSGRAVE,
+ NODE_TEX_WAVE,
+ NODE_TEX_MAGIC,
+ NODE_TEX_CHECKER,
NODE_TEX_BRICK,
- NODE_CLOSURE_SET_NORMAL,
- NODE_AMBIENT_OCCLUSION,
+ NODE_TEX_WHITE_NOISE,
+ NODE_NORMAL,
+ NODE_LIGHT_FALLOFF,
+ NODE_IES,
+ NODE_RGB_CURVES,
+ NODE_VECTOR_CURVES,
NODE_TANGENT,
NODE_NORMAL_MAP,
- NODE_HAIR_INFO,
- NODE_UVMAP,
- NODE_TEX_VOXEL,
- NODE_ENTER_BUMP_EVAL,
- NODE_LEAVE_BUMP_EVAL,
- NODE_BEVEL,
- NODE_DISPLACEMENT,
- NODE_VECTOR_DISPLACEMENT,
- NODE_PRINCIPLED_VOLUME,
- NODE_IES,
+ NODE_INVERT,
+ NODE_MIX,
+ NODE_SEPARATE_VECTOR,
+ NODE_COMBINE_VECTOR,
+ NODE_SEPARATE_HSV,
+ NODE_COMBINE_HSV,
+ NODE_VECTOR_ROTATE,
+ NODE_VECTOR_TRANSFORM,
+ NODE_WIREFRAME,
+ NODE_WAVELENGTH,
+ NODE_BLACKBODY,
NODE_MAP_RANGE,
NODE_CLAMP,
- NODE_TEXTURE_MAPPING,
- NODE_TEX_WHITE_NOISE,
- NODE_VERTEX_COLOR,
- NODE_VERTEX_COLOR_BUMP_DX,
- NODE_VERTEX_COLOR_BUMP_DY,
+ NODE_BEVEL,
+ NODE_AMBIENT_OCCLUSION,
+ NODE_TEX_VOXEL,
NODE_AOV_START,
- NODE_AOV_VALUE,
NODE_AOV_COLOR,
+ NODE_AOV_VALUE,
+ /* NOTE: for best OpenCL performance, item definition in the enum must
+ * match the switch case order in svm.h. */
} ShaderNodeType;
typedef enum NodeAttributeType {
@@ -326,6 +328,10 @@ typedef enum NodeVectorMathType {
NODE_VECTOR_MATH_ABSOLUTE,
NODE_VECTOR_MATH_MINIMUM,
NODE_VECTOR_MATH_MAXIMUM,
+ NODE_VECTOR_MATH_WRAP,
+ NODE_VECTOR_MATH_SINE,
+ NODE_VECTOR_MATH_COSINE,
+ NODE_VECTOR_MATH_TANGENT,
} NodeVectorMathType;
typedef enum NodeClampType {
@@ -347,6 +353,14 @@ typedef enum NodeMappingType {
NODE_MAPPING_TYPE_NORMAL
} NodeMappingType;
+typedef enum NodeVectorRotateType {
+ NODE_VECTOR_ROTATE_TYPE_AXIS,
+ NODE_VECTOR_ROTATE_TYPE_AXIS_X,
+ NODE_VECTOR_ROTATE_TYPE_AXIS_Y,
+ NODE_VECTOR_ROTATE_TYPE_AXIS_Z,
+ NODE_VECTOR_ROTATE_TYPE_EULER_XYZ,
+} NodeVectorRotateType;
+
typedef enum NodeVectorTransformType {
NODE_VECTOR_TRANSFORM_TYPE_VECTOR,
NODE_VECTOR_TRANSFORM_TYPE_POINT,
@@ -380,9 +394,24 @@ typedef enum NodeMusgraveType {
typedef enum NodeWaveType { NODE_WAVE_BANDS, NODE_WAVE_RINGS } NodeWaveType;
-typedef enum NodeWaveProfiles {
+typedef enum NodeWaveBandsDirection {
+ NODE_WAVE_BANDS_DIRECTION_X,
+ NODE_WAVE_BANDS_DIRECTION_Y,
+ NODE_WAVE_BANDS_DIRECTION_Z,
+ NODE_WAVE_BANDS_DIRECTION_DIAGONAL
+} NodeWaveBandsDirection;
+
+typedef enum NodeWaveRingsDirection {
+ NODE_WAVE_RINGS_DIRECTION_X,
+ NODE_WAVE_RINGS_DIRECTION_Y,
+ NODE_WAVE_RINGS_DIRECTION_Z,
+ NODE_WAVE_RINGS_DIRECTION_SPHERICAL
+} NodeWaveRingsDirection;
+
+typedef enum NodeWaveProfile {
NODE_WAVE_PROFILE_SIN,
NODE_WAVE_PROFILE_SAW,
+ NODE_WAVE_PROFILE_TRI,
} NodeWaveProfile;
typedef enum NodeSkyType { NODE_SKY_OLD, NODE_SKY_NEW } NodeSkyType;
@@ -499,6 +528,7 @@ typedef enum ClosureType {
CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID,
CLOSURE_BSDF_PRINCIPLED_SHEEN_ID,
CLOSURE_BSDF_DIFFUSE_TOON_ID,
+ CLOSURE_BSDF_TRANSLUCENT_ID,
/* Glossy */
CLOSURE_BSDF_REFLECTION_ID,
@@ -521,7 +551,6 @@ typedef enum ClosureType {
CLOSURE_BSDF_HAIR_REFLECTION_ID,
/* Transmission */
- CLOSURE_BSDF_TRANSLUCENT_ID,
CLOSURE_BSDF_REFRACTION_ID,
CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID,
CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID,
@@ -562,12 +591,12 @@ typedef enum ClosureType {
/* watch this, being lazy with memory usage */
#define CLOSURE_IS_BSDF(type) (type <= CLOSURE_BSDF_TRANSPARENT_ID)
#define CLOSURE_IS_BSDF_DIFFUSE(type) \
- (type >= CLOSURE_BSDF_DIFFUSE_ID && type <= CLOSURE_BSDF_DIFFUSE_TOON_ID)
+ (type >= CLOSURE_BSDF_DIFFUSE_ID && type <= CLOSURE_BSDF_TRANSLUCENT_ID)
#define CLOSURE_IS_BSDF_GLOSSY(type) \
((type >= CLOSURE_BSDF_REFLECTION_ID && type <= CLOSURE_BSDF_HAIR_REFLECTION_ID) || \
(type == CLOSURE_BSDF_HAIR_PRINCIPLED_ID))
#define CLOSURE_IS_BSDF_TRANSMISSION(type) \
- (type >= CLOSURE_BSDF_TRANSLUCENT_ID && type <= CLOSURE_BSDF_HAIR_TRANSMISSION_ID)
+ (type >= CLOSURE_BSDF_REFRACTION_ID && type <= CLOSURE_BSDF_HAIR_TRANSMISSION_ID)
#define CLOSURE_IS_BSDF_BSSRDF(type) \
(type == CLOSURE_BSDF_BSSRDF_ID || type == CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID)
#define CLOSURE_IS_BSDF_SINGULAR(type) \
diff --git a/intern/cycles/kernel/svm/svm_vector_rotate.h b/intern/cycles/kernel/svm/svm_vector_rotate.h
new file mode 100644
index 00000000000..79a4ec2c40e
--- /dev/null
+++ b/intern/cycles/kernel/svm/svm_vector_rotate.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright 2011-2020 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+/* Vector Rotate */
+
+ccl_device void svm_node_vector_rotate(ShaderData *sd,
+ float *stack,
+ uint input_stack_offsets,
+ uint axis_stack_offsets,
+ uint result_stack_offset)
+{
+ uint type, vector_stack_offset, rotation_stack_offset, center_stack_offset, axis_stack_offset,
+ angle_stack_offset, invert;
+
+ svm_unpack_node_uchar4(
+ input_stack_offsets, &type, &vector_stack_offset, &rotation_stack_offset, &invert);
+ svm_unpack_node_uchar3(
+ axis_stack_offsets, &center_stack_offset, &axis_stack_offset, &angle_stack_offset);
+
+ if (stack_valid(result_stack_offset)) {
+
+ float3 vector = stack_load_float3(stack, vector_stack_offset);
+ float3 center = stack_load_float3(stack, center_stack_offset);
+ float3 result = make_float3(0.0f, 0.0f, 0.0f);
+
+ if (type == NODE_VECTOR_ROTATE_TYPE_EULER_XYZ) {
+ float3 rotation = stack_load_float3(stack, rotation_stack_offset); // Default XYZ.
+ Transform rotationTransform = euler_to_transform(rotation);
+ if (invert) {
+ result = transform_direction_transposed(&rotationTransform, vector - center) + center;
+ }
+ else {
+ result = transform_direction(&rotationTransform, vector - center) + center;
+ }
+ }
+ else {
+ float3 axis;
+ switch (type) {
+ case NODE_VECTOR_ROTATE_TYPE_AXIS_X:
+ axis = make_float3(1.0f, 0.0f, 0.0f);
+ break;
+ case NODE_VECTOR_ROTATE_TYPE_AXIS_Y:
+ axis = make_float3(0.0f, 1.0f, 0.0f);
+ break;
+ case NODE_VECTOR_ROTATE_TYPE_AXIS_Z:
+ axis = make_float3(0.0f, 0.0f, 1.0f);
+ break;
+ default:
+ axis = normalize(stack_load_float3(stack, axis_stack_offset));
+ break;
+ }
+ float angle = stack_load_float(stack, angle_stack_offset);
+ angle = invert ? -angle : angle;
+ result = (len_squared(axis) != 0.0f) ?
+ rotate_around_axis(vector - center, axis, angle) + center :
+ vector;
+ }
+
+ stack_store_float3(stack, result_stack_offset, result);
+ }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_voxel.h b/intern/cycles/kernel/svm/svm_voxel.h
index b79be8e5bde..4bc14f82382 100644
--- a/intern/cycles/kernel/svm/svm_voxel.h
+++ b/intern/cycles/kernel/svm/svm_voxel.h
@@ -39,7 +39,7 @@ ccl_device void svm_node_tex_voxel(
co = transform_point(&tfm, co);
}
- float4 r = kernel_tex_image_interp_3d(kg, id, co.x, co.y, co.z, INTERPOLATION_NONE);
+ float4 r = kernel_tex_image_interp_3d(kg, id, co, INTERPOLATION_NONE);
#else
float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
#endif
diff --git a/intern/cycles/kernel/svm/svm_wave.h b/intern/cycles/kernel/svm/svm_wave.h
index 50c868c0f82..64102535f7d 100644
--- a/intern/cycles/kernel/svm/svm_wave.h
+++ b/intern/cycles/kernel/svm/svm_wave.h
@@ -19,52 +19,101 @@ CCL_NAMESPACE_BEGIN
/* Wave */
ccl_device_noinline_cpu float svm_wave(NodeWaveType type,
+ NodeWaveBandsDirection bands_dir,
+ NodeWaveRingsDirection rings_dir,
NodeWaveProfile profile,
float3 p,
float detail,
float distortion,
- float dscale)
+ float dscale,
+ float phase)
{
+ /* Prevent precision issues on unit coordinates. */
+ p = (p + 0.000001f) * 0.999999f;
+
float n;
- if (type == NODE_WAVE_BANDS)
- n = (p.x + p.y + p.z) * 10.0f;
- else /* NODE_WAVE_RINGS */
- n = len(p) * 20.0f;
+ if (type == NODE_WAVE_BANDS) {
+ if (bands_dir == NODE_WAVE_BANDS_DIRECTION_X) {
+ n = p.x * 20.0f;
+ }
+ else if (bands_dir == NODE_WAVE_BANDS_DIRECTION_Y) {
+ n = p.y * 20.0f;
+ }
+ else if (bands_dir == NODE_WAVE_BANDS_DIRECTION_Z) {
+ n = p.z * 20.0f;
+ }
+ else { /* NODE_WAVE_BANDS_DIRECTION_DIAGONAL */
+ n = (p.x + p.y + p.z) * 10.0f;
+ }
+ }
+ else { /* NODE_WAVE_RINGS */
+ float3 rp = p;
+ if (rings_dir == NODE_WAVE_RINGS_DIRECTION_X) {
+ rp *= make_float3(0.0f, 1.0f, 1.0f);
+ }
+ else if (rings_dir == NODE_WAVE_RINGS_DIRECTION_Y) {
+ rp *= make_float3(1.0f, 0.0f, 1.0f);
+ }
+ else if (rings_dir == NODE_WAVE_RINGS_DIRECTION_Z) {
+ rp *= make_float3(1.0f, 1.0f, 0.0f);
+ }
+ /* else: NODE_WAVE_RINGS_DIRECTION_SPHERICAL */
+
+ n = len(rp) * 20.0f;
+ }
+
+ n += phase;
if (distortion != 0.0f)
n += distortion * (fractal_noise_3d(p * dscale, detail) * 2.0f - 1.0f);
if (profile == NODE_WAVE_PROFILE_SIN) {
- return 0.5f + 0.5f * sinf(n);
+ return 0.5f + 0.5f * sinf(n - M_PI_2_F);
+ }
+ else if (profile == NODE_WAVE_PROFILE_SAW) {
+ n /= M_2PI_F;
+ return n - floorf(n);
}
- else { /* NODE_WAVE_PROFILE_SAW */
+ else { /* NODE_WAVE_PROFILE_TRI */
n /= M_2PI_F;
- n -= (int)n;
- return (n < 0.0f) ? n + 1.0f : n;
+ return fabsf(n - floorf(n + 0.5f)) * 2.0f;
}
}
ccl_device void svm_node_tex_wave(
KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
{
- uint4 node2 = read_node(kg, offset);
+ uint4 defaults1 = read_node(kg, offset);
+ uint4 defaults2 = read_node(kg, offset);
- uint type;
- uint co_offset, scale_offset, detail_offset, dscale_offset, distortion_offset, color_offset,
- fac_offset;
+ /* RNA properties */
+ uint type_offset, bands_dir_offset, rings_dir_offset, profile_offset;
+ /* Inputs, Outputs */
+ uint co_offset, scale_offset, distortion_offset, detail_offset, dscale_offset, phase_offset;
+ uint color_offset, fac_offset;
- svm_unpack_node_uchar4(node.y, &type, &color_offset, &fac_offset, &dscale_offset);
- svm_unpack_node_uchar4(node.z, &co_offset, &scale_offset, &detail_offset, &distortion_offset);
+ svm_unpack_node_uchar4(
+ node.y, &type_offset, &bands_dir_offset, &rings_dir_offset, &profile_offset);
+ svm_unpack_node_uchar4(node.z, &co_offset, &scale_offset, &distortion_offset, &detail_offset);
+ svm_unpack_node_uchar4(node.w, &dscale_offset, &phase_offset, &color_offset, &fac_offset);
float3 co = stack_load_float3(stack, co_offset);
- float scale = stack_load_float_default(stack, scale_offset, node2.x);
- float detail = stack_load_float_default(stack, detail_offset, node2.y);
- float distortion = stack_load_float_default(stack, distortion_offset, node2.z);
- float dscale = stack_load_float_default(stack, dscale_offset, node2.w);
+ float scale = stack_load_float_default(stack, scale_offset, defaults1.x);
+ float detail = stack_load_float_default(stack, detail_offset, defaults1.y);
+ float distortion = stack_load_float_default(stack, distortion_offset, defaults1.z);
+ float dscale = stack_load_float_default(stack, dscale_offset, defaults1.w);
+ float phase = stack_load_float_default(stack, phase_offset, defaults2.x);
- float f = svm_wave(
- (NodeWaveType)type, (NodeWaveProfile)node.w, co * scale, detail, distortion, dscale);
+ float f = svm_wave((NodeWaveType)type_offset,
+ (NodeWaveBandsDirection)bands_dir_offset,
+ (NodeWaveRingsDirection)rings_dir_offset,
+ (NodeWaveProfile)profile_offset,
+ co * scale,
+ detail,
+ distortion,
+ dscale,
+ phase);
if (stack_valid(fac_offset))
stack_store_float(stack, fac_offset, f);
diff --git a/intern/cycles/render/CMakeLists.txt b/intern/cycles/render/CMakeLists.txt
index 92578b888a6..472b5a0c101 100644
--- a/intern/cycles/render/CMakeLists.txt
+++ b/intern/cycles/render/CMakeLists.txt
@@ -19,9 +19,14 @@ set(SRC
coverage.cpp
denoising.cpp
film.cpp
+ geometry.cpp
graph.cpp
+ hair.cpp
image.cpp
+ image_oiio.cpp
+ image_vdb.cpp
integrator.cpp
+ jitter.cpp
light.cpp
merge.cpp
mesh.cpp
@@ -54,10 +59,15 @@ set(SRC_HEADERS
coverage.h
denoising.h
film.h
+ geometry.h
graph.h
+ hair.h
image.h
+ image_oiio.h
+ image_vdb.h
integrator.h
light.h
+ jitter.h
merge.h
mesh.h
nodes.h
@@ -86,6 +96,29 @@ if(WITH_CYCLES_OSL)
list(APPEND LIB
cycles_kernel_osl
)
+
+ SET_PROPERTY(SOURCE osl.cpp PROPERTY COMPILE_FLAGS ${RTTI_DISABLE_FLAGS})
+endif()
+
+if(WITH_OPENCOLORIO)
+ add_definitions(-DWITH_OCIO)
+ include_directories(
+ SYSTEM
+ ${OPENCOLORIO_INCLUDE_DIRS}
+ )
+ if(WIN32)
+ add_definitions(-DOpenColorIO_STATIC)
+ endif()
+endif()
+
+if(WITH_OPENVDB)
+ add_definitions(-DWITH_OPENVDB ${OPENVDB_DEFINITIONS})
+ list(APPEND INC_SYS
+ ${OPENVDB_INCLUDE_DIRS}
+ )
+ list(APPEND LIB
+ ${OPENVDB_LIBRARIES}
+ )
endif()
include_directories(${INC})
diff --git a/intern/cycles/render/attribute.cpp b/intern/cycles/render/attribute.cpp
index b65c2faa788..4c26d5e8365 100644
--- a/intern/cycles/render/attribute.cpp
+++ b/intern/cycles/render/attribute.cpp
@@ -14,9 +14,10 @@
* limitations under the License.
*/
+#include "render/attribute.h"
+#include "render/hair.h"
#include "render/image.h"
#include "render/mesh.h"
-#include "render/attribute.h"
#include "util/util_foreach.h"
#include "util/util_transform.h"
@@ -25,46 +26,51 @@ CCL_NAMESPACE_BEGIN
/* Attribute */
-Attribute::~Attribute()
-{
- /* for voxel data, we need to remove the image from the image manager */
- if (element == ATTR_ELEMENT_VOXEL) {
- VoxelAttribute *voxel_data = data_voxel();
-
- if (voxel_data && voxel_data->slot != -1) {
- voxel_data->manager->remove_image(voxel_data->slot);
- }
- }
-}
-
-void Attribute::set(ustring name_, TypeDesc type_, AttributeElement element_)
+Attribute::Attribute(
+ ustring name, TypeDesc type, AttributeElement element, Geometry *geom, AttributePrimitive prim)
+ : name(name), std(ATTR_STD_NONE), type(type), element(element), flags(0)
{
- name = name_;
- type = type_;
- element = element_;
- std = ATTR_STD_NONE;
- flags = 0;
-
/* string and matrix not supported! */
assert(type == TypeDesc::TypeFloat || type == TypeDesc::TypeColor ||
type == TypeDesc::TypePoint || type == TypeDesc::TypeVector ||
type == TypeDesc::TypeNormal || type == TypeDesc::TypeMatrix || type == TypeFloat2 ||
type == TypeRGBA);
+
+ if (element == ATTR_ELEMENT_VOXEL) {
+ buffer.resize(sizeof(ImageHandle));
+ new (buffer.data()) ImageHandle();
+ }
+ else {
+ resize(geom, prim, false);
+ }
}
-void Attribute::resize(Mesh *mesh, AttributePrimitive prim, bool reserve_only)
+Attribute::~Attribute()
{
- if (reserve_only) {
- buffer.reserve(buffer_size(mesh, prim));
+ /* For voxel data, we need to free the image handle. */
+ if (element == ATTR_ELEMENT_VOXEL && buffer.size()) {
+ ImageHandle &handle = data_voxel();
+ handle.~ImageHandle();
}
- else {
- buffer.resize(buffer_size(mesh, prim), 0);
+}
+
+void Attribute::resize(Geometry *geom, AttributePrimitive prim, bool reserve_only)
+{
+ if (element != ATTR_ELEMENT_VOXEL) {
+ if (reserve_only) {
+ buffer.reserve(buffer_size(geom, prim));
+ }
+ else {
+ buffer.resize(buffer_size(geom, prim), 0);
+ }
}
}
void Attribute::resize(size_t num_elements)
{
- buffer.resize(num_elements * data_sizeof(), 0);
+ if (element != ATTR_ELEMENT_VOXEL) {
+ buffer.resize(num_elements * data_sizeof(), 0);
+ }
}
void Attribute::add(const float &f)
@@ -122,17 +128,6 @@ void Attribute::add(const Transform &f)
buffer.push_back(data[i]);
}
-void Attribute::add(const VoxelAttribute &f)
-{
- assert(data_sizeof() == sizeof(VoxelAttribute));
-
- char *data = (char *)&f;
- size_t size = sizeof(f);
-
- for (size_t i = 0; i < size; i++)
- buffer.push_back(data[i]);
-}
-
void Attribute::add(const char *data)
{
size_t size = data_sizeof();
@@ -144,7 +139,7 @@ void Attribute::add(const char *data)
size_t Attribute::data_sizeof() const
{
if (element == ATTR_ELEMENT_VOXEL)
- return sizeof(VoxelAttribute);
+ return sizeof(ImageHandle);
else if (element == ATTR_ELEMENT_CORNER_BYTE)
return sizeof(uchar4);
else if (type == TypeDesc::TypeFloat)
@@ -157,13 +152,13 @@ size_t Attribute::data_sizeof() const
return sizeof(float3);
}
-size_t Attribute::element_size(Mesh *mesh, AttributePrimitive prim) const
+size_t Attribute::element_size(Geometry *geom, AttributePrimitive prim) const
{
if (flags & ATTR_FINAL_SIZE) {
return buffer.size() / data_sizeof();
}
- size_t size;
+ size_t size = 0;
switch (element) {
case ATTR_ELEMENT_OBJECT:
@@ -172,54 +167,74 @@ size_t Attribute::element_size(Mesh *mesh, AttributePrimitive prim) const
size = 1;
break;
case ATTR_ELEMENT_VERTEX:
- size = mesh->verts.size() + mesh->num_ngons;
- if (prim == ATTR_PRIM_SUBD) {
- size -= mesh->num_subd_verts;
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ size = mesh->verts.size() + mesh->num_ngons;
+ if (prim == ATTR_PRIM_SUBD) {
+ size -= mesh->num_subd_verts;
+ }
}
break;
case ATTR_ELEMENT_VERTEX_MOTION:
- size = (mesh->verts.size() + mesh->num_ngons) * (mesh->motion_steps - 1);
- if (prim == ATTR_PRIM_SUBD) {
- size -= mesh->num_subd_verts * (mesh->motion_steps - 1);
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ size = (mesh->verts.size() + mesh->num_ngons) * (mesh->motion_steps - 1);
+ if (prim == ATTR_PRIM_SUBD) {
+ size -= mesh->num_subd_verts * (mesh->motion_steps - 1);
+ }
}
break;
case ATTR_ELEMENT_FACE:
- if (prim == ATTR_PRIM_TRIANGLE) {
- size = mesh->num_triangles();
- }
- else {
- size = mesh->subd_faces.size() + mesh->num_ngons;
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ if (prim == ATTR_PRIM_GEOMETRY) {
+ size = mesh->num_triangles();
+ }
+ else {
+ size = mesh->subd_faces.size() + mesh->num_ngons;
+ }
}
break;
case ATTR_ELEMENT_CORNER:
case ATTR_ELEMENT_CORNER_BYTE:
- if (prim == ATTR_PRIM_TRIANGLE) {
- size = mesh->num_triangles() * 3;
- }
- else {
- size = mesh->subd_face_corners.size() + mesh->num_ngons;
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ if (prim == ATTR_PRIM_GEOMETRY) {
+ size = mesh->num_triangles() * 3;
+ }
+ else {
+ size = mesh->subd_face_corners.size() + mesh->num_ngons;
+ }
}
break;
case ATTR_ELEMENT_CURVE:
- size = mesh->num_curves();
+ if (geom->type == Geometry::HAIR) {
+ Hair *hair = static_cast<Hair *>(geom);
+ size = hair->num_curves();
+ }
break;
case ATTR_ELEMENT_CURVE_KEY:
- size = mesh->curve_keys.size();
+ if (geom->type == Geometry::HAIR) {
+ Hair *hair = static_cast<Hair *>(geom);
+ size = hair->curve_keys.size();
+ }
break;
case ATTR_ELEMENT_CURVE_KEY_MOTION:
- size = mesh->curve_keys.size() * (mesh->motion_steps - 1);
+ if (geom->type == Geometry::HAIR) {
+ Hair *hair = static_cast<Hair *>(geom);
+ size = hair->curve_keys.size() * (hair->motion_steps - 1);
+ }
break;
default:
- size = 0;
break;
}
return size;
}
-size_t Attribute::buffer_size(Mesh *mesh, AttributePrimitive prim) const
+size_t Attribute::buffer_size(Geometry *geom, AttributePrimitive prim) const
{
- return element_size(mesh, prim) * data_sizeof();
+ return element_size(geom, prim) * data_sizeof();
}
bool Attribute::same_storage(TypeDesc a, TypeDesc b)
@@ -280,6 +295,8 @@ const char *Attribute::standard_name(AttributeStandard std)
return "tangent";
case ATTR_STD_UV_TANGENT_SIGN:
return "tangent_sign";
+ case ATTR_STD_VERTEX_COLOR:
+ return "vertex_color";
case ATTR_STD_POSITION_UNDEFORMED:
return "undeformed";
case ATTR_STD_POSITION_UNDISPLACED:
@@ -336,13 +353,42 @@ AttributeStandard Attribute::name_standard(const char *name)
return ATTR_STD_NONE;
}
+void Attribute::get_uv_tiles(Geometry *geom,
+ AttributePrimitive prim,
+ unordered_set<int> &tiles) const
+{
+ if (type != TypeFloat2) {
+ return;
+ }
+
+ const int num = element_size(geom, prim);
+ const float2 *uv = data_float2();
+ for (int i = 0; i < num; i++, uv++) {
+ float u = uv->x, v = uv->y;
+ int x = (int)u, y = (int)v;
+
+ if (x < 0 || y < 0 || x >= 10) {
+ continue;
+ }
+
+ /* Be conservative in corners - precisely touching the right or upper edge of a tile
+ * should not load its right/upper neighbor as well. */
+ if (x > 0 && (u < x + 1e-6f)) {
+ x--;
+ }
+ if (y > 0 && (v < y + 1e-6f)) {
+ y--;
+ }
+
+ tiles.insert(1001 + 10 * y + x);
+ }
+}
+
/* Attribute Set */
-AttributeSet::AttributeSet()
+AttributeSet::AttributeSet(Geometry *geometry, AttributePrimitive prim)
+ : geometry(geometry), prim(prim)
{
- triangle_mesh = NULL;
- curve_mesh = NULL;
- subd_mesh = NULL;
}
AttributeSet::~AttributeSet()
@@ -362,28 +408,9 @@ Attribute *AttributeSet::add(ustring name, TypeDesc type, AttributeElement eleme
remove(name);
}
-#if __cplusplus >= 201103L
- attributes.emplace_back();
- attr = &attributes.back();
- attr->set(name, type, element);
-#else
- {
- Attribute attr_temp;
- attr_temp.set(name, type, element);
- attributes.push_back(attr_temp);
- attr = &attributes.back();
- }
-#endif
-
- /* this is weak .. */
- if (triangle_mesh)
- attr->resize(triangle_mesh, ATTR_PRIM_TRIANGLE, false);
- if (curve_mesh)
- attr->resize(curve_mesh, ATTR_PRIM_CURVE, false);
- if (subd_mesh)
- attr->resize(subd_mesh, ATTR_PRIM_SUBD, false);
-
- return attr;
+ Attribute new_attr(name, type, element, geometry, prim);
+ attributes.emplace_back(std::move(new_attr));
+ return &attributes.back();
}
Attribute *AttributeSet::find(ustring name) const
@@ -418,7 +445,7 @@ Attribute *AttributeSet::add(AttributeStandard std, ustring name)
if (name == ustring())
name = Attribute::standard_name(std);
- if (triangle_mesh || subd_mesh) {
+ if (geometry->type == Geometry::MESH) {
switch (std) {
case ATTR_STD_VERTEX_NORMAL:
attr = add(name, TypeDesc::TypeNormal, ATTR_ELEMENT_VERTEX);
@@ -435,6 +462,9 @@ Attribute *AttributeSet::add(AttributeStandard std, ustring name)
case ATTR_STD_UV_TANGENT_SIGN:
attr = add(name, TypeDesc::TypeFloat, ATTR_ELEMENT_CORNER);
break;
+ case ATTR_STD_VERTEX_COLOR:
+ attr = add(name, TypeRGBA, ATTR_ELEMENT_CORNER_BYTE);
+ break;
case ATTR_STD_GENERATED:
case ATTR_STD_POSITION_UNDEFORMED:
case ATTR_STD_POSITION_UNDISPLACED:
@@ -478,7 +508,7 @@ Attribute *AttributeSet::add(AttributeStandard std, ustring name)
break;
}
}
- else if (curve_mesh) {
+ else if (geometry->type == Geometry::HAIR) {
switch (std) {
case ATTR_STD_UV:
attr = add(name, TypeFloat2, ATTR_ELEMENT_CURVE);
@@ -561,12 +591,7 @@ void AttributeSet::remove(Attribute *attribute)
void AttributeSet::resize(bool reserve_only)
{
foreach (Attribute &attr, attributes) {
- if (triangle_mesh)
- attr.resize(triangle_mesh, ATTR_PRIM_TRIANGLE, reserve_only);
- if (curve_mesh)
- attr.resize(curve_mesh, ATTR_PRIM_CURVE, reserve_only);
- if (subd_mesh)
- attr.resize(subd_mesh, ATTR_PRIM_SUBD, reserve_only);
+ attr.resize(geometry, prim, reserve_only);
}
}
@@ -596,15 +621,10 @@ AttributeRequest::AttributeRequest(ustring name_)
name = name_;
std = ATTR_STD_NONE;
- triangle_type = TypeDesc::TypeFloat;
- triangle_desc.element = ATTR_ELEMENT_NONE;
- triangle_desc.offset = 0;
- triangle_desc.type = NODE_ATTR_FLOAT;
-
- curve_type = TypeDesc::TypeFloat;
- curve_desc.element = ATTR_ELEMENT_NONE;
- curve_desc.offset = 0;
- curve_desc.type = NODE_ATTR_FLOAT;
+ type = TypeDesc::TypeFloat;
+ desc.element = ATTR_ELEMENT_NONE;
+ desc.offset = 0;
+ desc.type = NODE_ATTR_FLOAT;
subd_type = TypeDesc::TypeFloat;
subd_desc.element = ATTR_ELEMENT_NONE;
@@ -617,15 +637,10 @@ AttributeRequest::AttributeRequest(AttributeStandard std_)
name = ustring();
std = std_;
- triangle_type = TypeDesc::TypeFloat;
- triangle_desc.element = ATTR_ELEMENT_NONE;
- triangle_desc.offset = 0;
- triangle_desc.type = NODE_ATTR_FLOAT;
-
- curve_type = TypeDesc::TypeFloat;
- curve_desc.element = ATTR_ELEMENT_NONE;
- curve_desc.offset = 0;
- curve_desc.type = NODE_ATTR_FLOAT;
+ type = TypeDesc::TypeFloat;
+ desc.element = ATTR_ELEMENT_NONE;
+ desc.offset = 0;
+ desc.type = NODE_ATTR_FLOAT;
subd_type = TypeDesc::TypeFloat;
subd_desc.element = ATTR_ELEMENT_NONE;
diff --git a/intern/cycles/render/attribute.h b/intern/cycles/render/attribute.h
index ebab0fe7f88..5871fa04a31 100644
--- a/intern/cycles/render/attribute.h
+++ b/intern/cycles/render/attribute.h
@@ -17,10 +17,13 @@
#ifndef __ATTRIBUTE_H__
#define __ATTRIBUTE_H__
+#include "render/image.h"
+
#include "kernel/kernel_types.h"
#include "util/util_list.h"
#include "util/util_param.h"
+#include "util/util_set.h"
#include "util/util_types.h"
#include "util/util_vector.h"
@@ -30,17 +33,12 @@ class Attribute;
class AttributeRequest;
class AttributeRequestSet;
class AttributeSet;
-class ImageManager;
+class ImageHandle;
+class Geometry;
+class Hair;
class Mesh;
struct Transform;
-/* Attributes for voxels are images */
-
-struct VoxelAttribute {
- ImageManager *manager;
- int slot;
-};
-
/* Attribute
*
* Arbitrary data layers on meshes.
@@ -56,17 +54,23 @@ class Attribute {
AttributeElement element;
uint flags; /* enum AttributeFlag */
- Attribute()
- {
- }
+ Attribute(ustring name,
+ TypeDesc type,
+ AttributeElement element,
+ Geometry *geom,
+ AttributePrimitive prim);
+ Attribute(Attribute &&other) = default;
+ Attribute(const Attribute &other) = delete;
+ Attribute &operator=(const Attribute &other) = delete;
~Attribute();
+
void set(ustring name, TypeDesc type, AttributeElement element);
- void resize(Mesh *mesh, AttributePrimitive prim, bool reserve_only);
+ void resize(Geometry *geom, AttributePrimitive prim, bool reserve_only);
void resize(size_t num_elements);
size_t data_sizeof() const;
- size_t element_size(Mesh *mesh, AttributePrimitive prim) const;
- size_t buffer_size(Mesh *mesh, AttributePrimitive prim) const;
+ size_t element_size(Geometry *geom, AttributePrimitive prim) const;
+ size_t buffer_size(Geometry *geom, AttributePrimitive prim) const;
char *data()
{
@@ -102,10 +106,12 @@ class Attribute {
assert(data_sizeof() == sizeof(Transform));
return (Transform *)data();
}
- VoxelAttribute *data_voxel()
+
+ /* Attributes for voxels are images */
+ ImageHandle &data_voxel()
{
- assert(data_sizeof() == sizeof(VoxelAttribute));
- return (VoxelAttribute *)data();
+ assert(data_sizeof() == sizeof(ImageHandle));
+ return *(ImageHandle *)data();
}
const char *data() const
@@ -137,10 +143,10 @@ class Attribute {
assert(data_sizeof() == sizeof(Transform));
return (const Transform *)data();
}
- const VoxelAttribute *data_voxel() const
+ const ImageHandle &data_voxel() const
{
- assert(data_sizeof() == sizeof(VoxelAttribute));
- return (const VoxelAttribute *)data();
+ assert(data_sizeof() == sizeof(ImageHandle));
+ return *(const ImageHandle *)data();
}
void zero_data(void *dst);
@@ -150,13 +156,14 @@ class Attribute {
void add(const float2 &f);
void add(const float3 &f);
void add(const uchar4 &f);
- void add(const Transform &f);
- void add(const VoxelAttribute &f);
+ void add(const Transform &tfm);
void add(const char *data);
static bool same_storage(TypeDesc a, TypeDesc b);
static const char *standard_name(AttributeStandard std);
static AttributeStandard name_standard(const char *name);
+
+ void get_uv_tiles(Geometry *geom, AttributePrimitive prim, unordered_set<int> &tiles) const;
};
/* Attribute Set
@@ -165,12 +172,11 @@ class Attribute {
class AttributeSet {
public:
- Mesh *triangle_mesh;
- Mesh *curve_mesh;
- Mesh *subd_mesh;
+ Geometry *geometry;
+ AttributePrimitive prim;
list<Attribute> attributes;
- AttributeSet();
+ AttributeSet(Geometry *geometry, AttributePrimitive prim);
~AttributeSet();
Attribute *add(ustring name, TypeDesc type, AttributeElement element);
@@ -200,9 +206,9 @@ class AttributeRequest {
ustring name;
AttributeStandard std;
- /* temporary variables used by MeshManager */
- TypeDesc triangle_type, curve_type, subd_type;
- AttributeDescriptor triangle_desc, curve_desc, subd_desc;
+ /* temporary variables used by GeometryManager */
+ TypeDesc type, subd_type;
+ AttributeDescriptor desc, subd_desc;
explicit AttributeRequest(ustring name_);
explicit AttributeRequest(AttributeStandard std);
diff --git a/intern/cycles/render/background.cpp b/intern/cycles/render/background.cpp
index 6553ca735e4..694bb640995 100644
--- a/intern/cycles/render/background.cpp
+++ b/intern/cycles/render/background.cpp
@@ -16,8 +16,8 @@
#include "render/background.h"
#include "device/device.h"
-#include "render/integrator.h"
#include "render/graph.h"
+#include "render/integrator.h"
#include "render/nodes.h"
#include "render/scene.h"
#include "render/shader.h"
@@ -43,6 +43,8 @@ NODE_DEFINE(Background)
SOCKET_BOOLEAN(transparent_glass, "Transparent Glass", false);
SOCKET_FLOAT(transparent_roughness_threshold, "Transparent Roughness Threshold", 0.0f);
+ SOCKET_FLOAT(volume_step_size, "Volume Step Size", 0.1f);
+
SOCKET_NODE(shader, "Shader", &Shader::node_type);
return type;
@@ -51,6 +53,7 @@ NODE_DEFINE(Background)
Background::Background() : Node(node_type)
{
need_update = true;
+ shader = NULL;
}
Background::~Background()
@@ -91,6 +94,8 @@ void Background::device_update(Device *device, DeviceScene *dscene, Scene *scene
else
kbackground->volume_shader = SHADER_NONE;
+ kbackground->volume_step_size = volume_step_size * scene->integrator->volume_step_rate;
+
/* No background node, make world shader invisible to all rays, to skip evaluation in kernel. */
if (bg_shader->graph->nodes.size() <= 1) {
kbackground->surface_shader |= SHADER_EXCLUDE_ANY;
diff --git a/intern/cycles/render/background.h b/intern/cycles/render/background.h
index fb27430f9a3..c2ca1f75179 100644
--- a/intern/cycles/render/background.h
+++ b/intern/cycles/render/background.h
@@ -45,6 +45,8 @@ class Background : public Node {
bool transparent_glass;
float transparent_roughness_threshold;
+ float volume_step_size;
+
bool need_update;
Background();
diff --git a/intern/cycles/render/bake.cpp b/intern/cycles/render/bake.cpp
index b906357b7b5..35f942b3e9b 100644
--- a/intern/cycles/render/bake.cpp
+++ b/intern/cycles/render/bake.cpp
@@ -15,10 +15,10 @@
*/
#include "render/bake.h"
+#include "render/integrator.h"
#include "render/mesh.h"
#include "render/object.h"
#include "render/shader.h"
-#include "render/integrator.h"
#include "util/util_foreach.h"
@@ -253,8 +253,8 @@ int BakeManager::aa_samples(Scene *scene, BakeData *bake_data, ShaderEvalType ty
/* Only antialias normal if mesh has bump mapping. */
Object *object = scene->objects[bake_data->object()];
- if (object->mesh) {
- foreach (Shader *shader, object->mesh->used_shaders) {
+ if (object->geometry) {
+ foreach (Shader *shader, object->geometry->used_shaders) {
if (shader->has_bump) {
return scene->integrator->aa_samples;
}
@@ -285,8 +285,6 @@ int BakeManager::shader_type_to_pass_filter(ShaderEvalType type, const int pass_
return BAKE_FILTER_GLOSSY | component_flags;
case SHADER_EVAL_TRANSMISSION:
return BAKE_FILTER_TRANSMISSION | component_flags;
- case SHADER_EVAL_SUBSURFACE:
- return BAKE_FILTER_SUBSURFACE | component_flags;
case SHADER_EVAL_COMBINED:
return pass_filter;
default:
diff --git a/intern/cycles/render/buffers.cpp b/intern/cycles/render/buffers.cpp
index 50308d0d377..2d89fb9ffba 100644
--- a/intern/cycles/render/buffers.cpp
+++ b/intern/cycles/render/buffers.cpp
@@ -16,8 +16,8 @@
#include <stdlib.h>
-#include "render/buffers.h"
#include "device/device.h"
+#include "render/buffers.h"
#include "util/util_foreach.h"
#include "util/util_hash.h"
@@ -43,6 +43,8 @@ BufferParams::BufferParams()
denoising_data_pass = false;
denoising_clean_pass = false;
denoising_prefiltered_pass = false;
+
+ Pass::add(PASS_COMBINED, passes);
}
void BufferParams::get_offset_stride(int &offset, int &stride)
@@ -144,7 +146,7 @@ void RenderBuffers::reset(BufferParams &params_)
params = params_;
/* re-allocate buffer */
- buffer.alloc(params.width * params.height * params.get_passes_size());
+ buffer.alloc(params.width * params.get_passes_size(), params.height);
buffer.zero_to_device();
}
@@ -258,6 +260,22 @@ bool RenderBuffers::get_pass_rect(
return false;
}
+ float *sample_count = NULL;
+ if (name == "Combined") {
+ int sample_offset = 0;
+ for (size_t j = 0; j < params.passes.size(); j++) {
+ Pass &pass = params.passes[j];
+ if (pass.type != PASS_SAMPLE_COUNT) {
+ sample_offset += pass.components;
+ continue;
+ }
+ else {
+ sample_count = buffer.data() + sample_offset;
+ break;
+ }
+ }
+ }
+
int pass_offset = 0;
for (size_t j = 0; j < params.passes.size(); j++) {
@@ -418,6 +436,11 @@ bool RenderBuffers::get_pass_rect(
}
else {
for (int i = 0; i < size; i++, in += pass_stride, pixels += 4) {
+ if (sample_count && sample_count[i * pass_stride] < 0.0f) {
+ scale = (pass.filter) ? -1.0f / (sample_count[i * pass_stride]) : 1.0f;
+ scale_exposure = (pass.exposure) ? scale * exposure : scale;
+ }
+
float4 f = make_float4(in[0], in[1], in[2], in[3]);
pixels[0] = f.x * scale_exposure;
diff --git a/intern/cycles/render/buffers.h b/intern/cycles/render/buffers.h
index 1042b42810f..42efb031843 100644
--- a/intern/cycles/render/buffers.h
+++ b/intern/cycles/render/buffers.h
@@ -130,7 +130,7 @@ class DisplayBuffer {
class RenderTile {
public:
- typedef enum { PATH_TRACE, DENOISE } Task;
+ typedef enum { PATH_TRACE = (1 << 0), DENOISE = (1 << 1) } Task;
Task task;
int x, y, w, h;
diff --git a/intern/cycles/render/camera.cpp b/intern/cycles/render/camera.cpp
index 38306a63c74..74953afae9d 100644
--- a/intern/cycles/render/camera.cpp
+++ b/intern/cycles/render/camera.cpp
@@ -29,6 +29,7 @@
#include "util/util_vector.h"
/* needed for calculating differentials */
+// clang-format off
#include "kernel/kernel_compat_cpu.h"
#include "kernel/split/kernel_split_data.h"
#include "kernel/kernel_globals.h"
@@ -36,6 +37,7 @@
#include "kernel/kernel_differential.h"
#include "kernel/kernel_montecarlo.h"
#include "kernel/kernel_camera.h"
+// clang-format on
CCL_NAMESPACE_BEGIN
@@ -498,7 +500,7 @@ void Camera::device_update_volume(Device * /*device*/, DeviceScene *dscene, Scen
BoundBox viewplane_boundbox = viewplane_bounds_get();
for (size_t i = 0; i < scene->objects.size(); ++i) {
Object *object = scene->objects[i];
- if (object->mesh->has_volume && viewplane_boundbox.intersects(object->bounds)) {
+ if (object->geometry->has_volume && viewplane_boundbox.intersects(object->bounds)) {
/* TODO(sergey): Consider adding more grained check. */
VLOG(1) << "Detected camera inside volume.";
kcam->is_inside_volume = 1;
diff --git a/intern/cycles/render/colorspace.cpp b/intern/cycles/render/colorspace.cpp
index 2e5b53057c0..7605fcaf5ff 100644
--- a/intern/cycles/render/colorspace.cpp
+++ b/intern/cycles/render/colorspace.cpp
@@ -17,8 +17,8 @@
#include "render/colorspace.h"
#include "util/util_color.h"
-#include "util/util_image.h"
#include "util/util_half.h"
+#include "util/util_image.h"
#include "util/util_logging.h"
#include "util/util_math.h"
#include "util/util_thread.h"
@@ -262,56 +262,49 @@ template<typename T> inline void cast_from_float4(T *data, float4 value)
/* Slower versions for other all data types, which needs to convert to float and back. */
template<typename T, bool compress_as_srgb = false>
-inline void processor_apply_pixels(const OCIO::Processor *processor,
- T *pixels,
- size_t width,
- size_t height)
+inline void processor_apply_pixels(const OCIO::Processor *processor, T *pixels, size_t num_pixels)
{
/* TODO: implement faster version for when we know the conversion
* is a simple matrix transform between linear spaces. In that case
* unpremultiply is not needed. */
/* Process large images in chunks to keep temporary memory requirement down. */
- size_t y_chunk_size = max(1, 16 * 1024 * 1024 / (sizeof(float4) * width));
- vector<float4> float_pixels(y_chunk_size * width);
-
- for (size_t y0 = 0; y0 < height; y0 += y_chunk_size) {
- size_t y1 = std::min(y0 + y_chunk_size, height);
- size_t i = 0;
+ const size_t chunk_size = std::min((size_t)(16 * 1024 * 1024), num_pixels);
+ vector<float4> float_pixels(chunk_size);
- for (size_t y = y0; y < y1; y++) {
- for (size_t x = 0; x < width; x++, i++) {
- float4 value = cast_to_float4(pixels + 4 * (y * width + x));
+ for (size_t j = 0; j < num_pixels; j += chunk_size) {
+ size_t width = std::min(chunk_size, num_pixels - j);
- if (!(value.w == 0.0f || value.w == 1.0f)) {
- float inv_alpha = 1.0f / value.w;
- value.x *= inv_alpha;
- value.y *= inv_alpha;
- value.z *= inv_alpha;
- }
+ for (size_t i = 0; i < width; i++) {
+ float4 value = cast_to_float4(pixels + 4 * (j + i));
- float_pixels[i] = value;
+ if (!(value.w <= 0.0f || value.w == 1.0f)) {
+ float inv_alpha = 1.0f / value.w;
+ value.x *= inv_alpha;
+ value.y *= inv_alpha;
+ value.z *= inv_alpha;
}
+
+ float_pixels[i] = value;
}
- OCIO::PackedImageDesc desc((float *)float_pixels.data(), width, y_chunk_size, 4);
+ OCIO::PackedImageDesc desc((float *)float_pixels.data(), width, 1, 4);
processor->apply(desc);
- i = 0;
- for (size_t y = y0; y < y1; y++) {
- for (size_t x = 0; x < width; x++, i++) {
- float4 value = float_pixels[i];
+ for (size_t i = 0; i < width; i++) {
+ float4 value = float_pixels[i];
+
+ if (compress_as_srgb) {
+ value = color_linear_to_srgb_v4(value);
+ }
+ if (!(value.w <= 0.0f || value.w == 1.0f)) {
value.x *= value.w;
value.y *= value.w;
value.z *= value.w;
-
- if (compress_as_srgb) {
- value = color_linear_to_srgb_v4(value);
- }
-
- cast_from_float4(pixels + 4 * (y * width + x), value);
}
+
+ cast_from_float4(pixels + 4 * (j + i), value);
}
}
}
@@ -320,9 +313,7 @@ inline void processor_apply_pixels(const OCIO::Processor *processor,
template<typename T>
void ColorSpaceManager::to_scene_linear(ustring colorspace,
T *pixels,
- size_t width,
- size_t height,
- size_t depth,
+ size_t num_pixels,
bool compress_as_srgb)
{
#ifdef WITH_OCIO
@@ -331,23 +322,17 @@ void ColorSpaceManager::to_scene_linear(ustring colorspace,
if (processor) {
if (compress_as_srgb) {
/* Compress output as sRGB. */
- for (size_t z = 0; z < depth; z++) {
- processor_apply_pixels<T, true>(processor, &pixels[z * width * height], width, height);
- }
+ processor_apply_pixels<T, true>(processor, pixels, num_pixels);
}
else {
/* Write output as scene linear directly. */
- for (size_t z = 0; z < depth; z++) {
- processor_apply_pixels<T>(processor, &pixels[z * width * height], width, height);
- }
+ processor_apply_pixels<T>(processor, pixels, num_pixels);
}
}
#else
(void)colorspace;
(void)pixels;
- (void)width;
- (void)height;
- (void)depth;
+ (void)num_pixels;
(void)compress_as_srgb;
#endif
}
@@ -402,9 +387,9 @@ void ColorSpaceManager::free_memory()
}
/* Template instanstations so we don't have to inline functions. */
-template void ColorSpaceManager::to_scene_linear(ustring, uchar *, size_t, size_t, size_t, bool);
-template void ColorSpaceManager::to_scene_linear(ustring, ushort *, size_t, size_t, size_t, bool);
-template void ColorSpaceManager::to_scene_linear(ustring, half *, size_t, size_t, size_t, bool);
-template void ColorSpaceManager::to_scene_linear(ustring, float *, size_t, size_t, size_t, bool);
+template void ColorSpaceManager::to_scene_linear(ustring, uchar *, size_t, bool);
+template void ColorSpaceManager::to_scene_linear(ustring, ushort *, size_t, bool);
+template void ColorSpaceManager::to_scene_linear(ustring, half *, size_t, bool);
+template void ColorSpaceManager::to_scene_linear(ustring, float *, size_t, bool);
CCL_NAMESPACE_END
diff --git a/intern/cycles/render/colorspace.h b/intern/cycles/render/colorspace.h
index 9fea2d6efc6..51d0b121cc0 100644
--- a/intern/cycles/render/colorspace.h
+++ b/intern/cycles/render/colorspace.h
@@ -45,9 +45,7 @@ class ColorSpaceManager {
template<typename T>
static void to_scene_linear(ustring colorspace,
T *pixels,
- size_t width,
- size_t height,
- size_t depth,
+ size_t num_pixels,
bool compress_as_srgb);
/* Efficiently convert pixels to scene linear colorspace at render time,
diff --git a/intern/cycles/render/constant_fold.h b/intern/cycles/render/constant_fold.h
index 7f622488a88..fec4123c361 100644
--- a/intern/cycles/render/constant_fold.h
+++ b/intern/cycles/render/constant_fold.h
@@ -17,8 +17,8 @@
#ifndef __CONSTANT_FOLD_H__
#define __CONSTANT_FOLD_H__
-#include "util/util_types.h"
#include "kernel/svm/svm_types.h"
+#include "util/util_types.h"
CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/render/coverage.cpp b/intern/cycles/render/coverage.cpp
index 0a29903728a..99d4daa6961 100644
--- a/intern/cycles/render/coverage.cpp
+++ b/intern/cycles/render/coverage.cpp
@@ -15,13 +15,16 @@
*/
#include "render/coverage.h"
+#include "render/buffers.h"
+
#include "kernel/kernel_compat_cpu.h"
+#include "kernel/kernel_types.h"
#include "kernel/split/kernel_split_data.h"
+
#include "kernel/kernel_globals.h"
#include "kernel/kernel_id_passes.h"
-#include "kernel/kernel_types.h"
+
#include "util/util_map.h"
-#include "util/util_vector.h"
CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/render/coverage.h b/intern/cycles/render/coverage.h
index 3d1f6a2b040..12182c614da 100644
--- a/intern/cycles/render/coverage.h
+++ b/intern/cycles/render/coverage.h
@@ -14,18 +14,19 @@
* limitations under the License.
*/
-#include "render/buffers.h"
-#include "kernel/kernel_compat_cpu.h"
-#include "kernel/split/kernel_split_data.h"
-#include "kernel/kernel_globals.h"
+#ifndef __COVERAGE_H__
+#define __COVERAGE_H__
+
#include "util/util_map.h"
#include "util/util_vector.h"
-#ifndef __COVERAGE_H__
-# define __COVERAGE_H__
-
CCL_NAMESPACE_BEGIN
+struct KernelGlobals;
+class RenderTile;
+
+typedef unordered_map<float, float> CoverageMap;
+
class Coverage {
public:
Coverage(KernelGlobals *kg_, RenderTile &tile_) : kg(kg_), tile(tile_)
diff --git a/intern/cycles/render/curves.cpp b/intern/cycles/render/curves.cpp
index 66fbc9eb4a8..1907bb33d06 100644
--- a/intern/cycles/render/curves.cpp
+++ b/intern/cycles/render/curves.cpp
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include "device/device.h"
#include "render/curves.h"
+#include "device/device.h"
#include "render/mesh.h"
#include "render/object.h"
#include "render/scene.h"
diff --git a/intern/cycles/render/denoising.h b/intern/cycles/render/denoising.h
index c234d00eb49..5c6f913cb38 100644
--- a/intern/cycles/render/denoising.h
+++ b/intern/cycles/render/denoising.h
@@ -23,8 +23,8 @@
#include "render/buffers.h"
#include "util/util_string.h"
-#include "util/util_vector.h"
#include "util/util_unique_ptr.h"
+#include "util/util_vector.h"
#include <OpenImageIO/imageio.h>
diff --git a/intern/cycles/render/film.cpp b/intern/cycles/render/film.cpp
index bd274844b52..baf02901123 100644
--- a/intern/cycles/render/film.cpp
+++ b/intern/cycles/render/film.cpp
@@ -14,9 +14,9 @@
* limitations under the License.
*/
-#include "render/camera.h"
-#include "device/device.h"
#include "render/film.h"
+#include "device/device.h"
+#include "render/camera.h"
#include "render/integrator.h"
#include "render/mesh.h"
#include "render/scene.h"
@@ -155,7 +155,6 @@ void Pass::add(PassType type, vector<Pass> &passes, const char *name)
case PASS_DIFFUSE_COLOR:
case PASS_GLOSSY_COLOR:
case PASS_TRANSMISSION_COLOR:
- case PASS_SUBSURFACE_COLOR:
pass.components = 4;
break;
case PASS_DIFFUSE_DIRECT:
@@ -176,12 +175,6 @@ void Pass::add(PassType type, vector<Pass> &passes, const char *name)
pass.exposure = true;
pass.divide_type = PASS_TRANSMISSION_COLOR;
break;
- case PASS_SUBSURFACE_DIRECT:
- case PASS_SUBSURFACE_INDIRECT:
- pass.components = 4;
- pass.exposure = true;
- pass.divide_type = PASS_SUBSURFACE_COLOR;
- break;
case PASS_VOLUME_DIRECT:
case PASS_VOLUME_INDIRECT:
pass.components = 4;
@@ -190,6 +183,13 @@ void Pass::add(PassType type, vector<Pass> &passes, const char *name)
case PASS_CRYPTOMATTE:
pass.components = 4;
break;
+ case PASS_ADAPTIVE_AUX_BUFFER:
+ pass.components = 4;
+ break;
+ case PASS_SAMPLE_COUNT:
+ pass.components = 1;
+ pass.exposure = false;
+ break;
case PASS_AOV_COLOR:
pass.components = 4;
break;
@@ -203,9 +203,10 @@ void Pass::add(PassType type, vector<Pass> &passes, const char *name)
passes.push_back(pass);
- /* order from by components, to ensure alignment so passes with size 4
- * come first and then passes with size 1 */
- sort(&passes[0], &passes[0] + passes.size(), compare_pass_order);
+ /* Order from by components, to ensure alignment so passes with size 4
+ * come first and then passes with size 1. Note this must use stable sort
+ * so cryptomatte passes remain in the right order. */
+ stable_sort(&passes[0], &passes[0] + passes.size(), compare_pass_order);
if (pass.divide_type != PASS_NONE)
Pass::add(pass.divide_type, passes);
@@ -318,15 +319,19 @@ NODE_DEFINE(Film)
SOCKET_BOOLEAN(denoising_clean_pass, "Generate Denoising Clean Pass", false);
SOCKET_BOOLEAN(denoising_prefiltered_pass, "Generate Denoising Prefiltered Pass", false);
SOCKET_INT(denoising_flags, "Denoising Flags", 0);
+ SOCKET_BOOLEAN(use_adaptive_sampling, "Use Adaptive Sampling", false);
return type;
}
Film::Film() : Node(node_type)
{
+ Pass::add(PASS_COMBINED, passes);
+
use_light_visibility = false;
filter_table_offset = TABLE_OFFSET_INVALID;
cryptomatte_passes = CRYPT_NONE;
+ display_pass = PASS_COMBINED;
need_update = true;
}
@@ -439,9 +444,6 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene)
case PASS_TRANSMISSION_COLOR:
kfilm->pass_transmission_color = kfilm->pass_stride;
break;
- case PASS_SUBSURFACE_COLOR:
- kfilm->pass_subsurface_color = kfilm->pass_stride;
- break;
case PASS_DIFFUSE_INDIRECT:
kfilm->pass_diffuse_indirect = kfilm->pass_stride;
break;
@@ -451,9 +453,6 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene)
case PASS_TRANSMISSION_INDIRECT:
kfilm->pass_transmission_indirect = kfilm->pass_stride;
break;
- case PASS_SUBSURFACE_INDIRECT:
- kfilm->pass_subsurface_indirect = kfilm->pass_stride;
- break;
case PASS_VOLUME_INDIRECT:
kfilm->pass_volume_indirect = kfilm->pass_stride;
break;
@@ -466,9 +465,6 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene)
case PASS_TRANSMISSION_DIRECT:
kfilm->pass_transmission_direct = kfilm->pass_stride;
break;
- case PASS_SUBSURFACE_DIRECT:
- kfilm->pass_subsurface_direct = kfilm->pass_stride;
- break;
case PASS_VOLUME_DIRECT:
kfilm->pass_volume_direct = kfilm->pass_stride;
break;
@@ -495,6 +491,12 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene)
kfilm->pass_stride;
have_cryptomatte = true;
break;
+ case PASS_ADAPTIVE_AUX_BUFFER:
+ kfilm->pass_adaptive_aux_buffer = kfilm->pass_stride;
+ break;
+ case PASS_SAMPLE_COUNT:
+ kfilm->pass_sample_count = kfilm->pass_stride;
+ break;
case PASS_AOV_COLOR:
if (!have_aov_color) {
kfilm->pass_aov_color = kfilm->pass_stride;
@@ -518,7 +520,7 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene)
kfilm->use_display_exposure = pass.exposure && (kfilm->exposure != 1.0f);
}
else if (pass.type == PASS_DIFFUSE_COLOR || pass.type == PASS_TRANSMISSION_COLOR ||
- pass.type == PASS_GLOSSY_COLOR || pass.type == PASS_SUBSURFACE_COLOR) {
+ pass.type == PASS_GLOSSY_COLOR) {
kfilm->display_divide_pass_stride = kfilm->pass_stride;
}
@@ -590,13 +592,13 @@ bool Film::modified(const Film &film)
void Film::tag_passes_update(Scene *scene, const vector<Pass> &passes_, bool update_passes)
{
if (Pass::contains(passes, PASS_UV) != Pass::contains(passes_, PASS_UV)) {
- scene->mesh_manager->tag_update(scene);
+ scene->geometry_manager->tag_update(scene);
foreach (Shader *shader, scene->shaders)
- shader->need_update_mesh = true;
+ shader->need_update_geometry = true;
}
else if (Pass::contains(passes, PASS_MOTION) != Pass::contains(passes_, PASS_MOTION)) {
- scene->mesh_manager->tag_update(scene);
+ scene->geometry_manager->tag_update(scene);
}
else if (Pass::contains(passes, PASS_AO) != Pass::contains(passes_, PASS_AO)) {
scene->integrator->tag_update(scene);
diff --git a/intern/cycles/render/film.h b/intern/cycles/render/film.h
index 95e54cb54d8..aae8fb404b0 100644
--- a/intern/cycles/render/film.h
+++ b/intern/cycles/render/film.h
@@ -81,6 +81,8 @@ class Film : public Node {
CryptomatteType cryptomatte_passes;
int cryptomatte_depth;
+ bool use_adaptive_sampling;
+
bool need_update;
Film();
diff --git a/intern/cycles/render/geometry.cpp b/intern/cycles/render/geometry.cpp
new file mode 100644
index 00000000000..d46ed430c4f
--- /dev/null
+++ b/intern/cycles/render/geometry.cpp
@@ -0,0 +1,1470 @@
+/*
+ * Copyright 2011-2020 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "bvh/bvh.h"
+#include "bvh/bvh_build.h"
+
+#ifdef WITH_EMBREE
+# include "bvh/bvh_embree.h"
+#endif
+
+#include "render/attribute.h"
+#include "render/camera.h"
+#include "render/geometry.h"
+#include "render/hair.h"
+#include "render/light.h"
+#include "render/mesh.h"
+#include "render/nodes.h"
+#include "render/object.h"
+#include "render/scene.h"
+#include "render/shader.h"
+#include "render/stats.h"
+
+#include "subd/subd_patch_table.h"
+#include "subd/subd_split.h"
+
+#include "kernel/osl/osl_globals.h"
+
+#include "util/util_foreach.h"
+#include "util/util_logging.h"
+#include "util/util_progress.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Geometry */
+
+NODE_ABSTRACT_DEFINE(Geometry)
+{
+ NodeType *type = NodeType::add("geometry_base", NULL);
+
+ SOCKET_UINT(motion_steps, "Motion Steps", 3);
+ SOCKET_BOOLEAN(use_motion_blur, "Use Motion Blur", false);
+
+ return type;
+}
+
+Geometry::Geometry(const NodeType *node_type, const Type type)
+ : Node(node_type), type(type), attributes(this, ATTR_PRIM_GEOMETRY)
+{
+ need_update = true;
+ need_update_rebuild = false;
+
+ transform_applied = false;
+ transform_negative_scaled = false;
+ transform_normal = transform_identity();
+ bounds = BoundBox::empty;
+
+ has_volume = false;
+ has_surface_bssrdf = false;
+
+ bvh = NULL;
+ attr_map_offset = 0;
+ optix_prim_offset = 0;
+ prim_offset = 0;
+}
+
+Geometry::~Geometry()
+{
+ delete bvh;
+}
+
+void Geometry::clear()
+{
+ used_shaders.clear();
+ transform_applied = false;
+ transform_negative_scaled = false;
+ transform_normal = transform_identity();
+}
+
+bool Geometry::need_attribute(Scene *scene, AttributeStandard std)
+{
+ if (std == ATTR_STD_NONE)
+ return false;
+
+ if (scene->need_global_attribute(std))
+ return true;
+
+ foreach (Shader *shader, used_shaders)
+ if (shader->attributes.find(std))
+ return true;
+
+ return false;
+}
+
+bool Geometry::need_attribute(Scene * /*scene*/, ustring name)
+{
+ if (name == ustring())
+ return false;
+
+ foreach (Shader *shader, used_shaders)
+ if (shader->attributes.find(name))
+ return true;
+
+ return false;
+}
+
+float Geometry::motion_time(int step) const
+{
+ return (motion_steps > 1) ? 2.0f * step / (motion_steps - 1) - 1.0f : 0.0f;
+}
+
+int Geometry::motion_step(float time) const
+{
+ if (motion_steps > 1) {
+ int attr_step = 0;
+
+ for (int step = 0; step < motion_steps; step++) {
+ float step_time = motion_time(step);
+ if (step_time == time) {
+ return attr_step;
+ }
+
+ /* Center step is stored in a separate attribute. */
+ if (step != motion_steps / 2) {
+ attr_step++;
+ }
+ }
+ }
+
+ return -1;
+}
+
+bool Geometry::need_build_bvh(BVHLayout layout) const
+{
+ return !transform_applied || has_surface_bssrdf || layout == BVH_LAYOUT_OPTIX;
+}
+
+bool Geometry::is_instanced() const
+{
+ /* Currently we treat subsurface objects as instanced.
+ *
+ * While it might be not very optimal for ray traversal, it avoids having
+ * duplicated BVH in the memory, saving quite some space.
+ */
+ return !transform_applied || has_surface_bssrdf;
+}
+
+bool Geometry::has_true_displacement() const
+{
+ foreach (Shader *shader, used_shaders) {
+ if (shader->has_displacement && shader->displacement_method != DISPLACE_BUMP) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+void Geometry::compute_bvh(
+ Device *device, DeviceScene *dscene, SceneParams *params, Progress *progress, int n, int total)
+{
+ if (progress->get_cancel())
+ return;
+
+ compute_bounds();
+
+ const BVHLayout bvh_layout = BVHParams::best_bvh_layout(params->bvh_layout,
+ device->get_bvh_layout_mask());
+ if (need_build_bvh(bvh_layout)) {
+ string msg = "Updating Geometry BVH ";
+ if (name.empty())
+ msg += string_printf("%u/%u", (uint)(n + 1), (uint)total);
+ else
+ msg += string_printf("%s %u/%u", name.c_str(), (uint)(n + 1), (uint)total);
+
+ Object object;
+ object.geometry = this;
+
+ vector<Geometry *> geometry;
+ geometry.push_back(this);
+ vector<Object *> objects;
+ objects.push_back(&object);
+
+ if (bvh && !need_update_rebuild) {
+ progress->set_status(msg, "Refitting BVH");
+
+ bvh->geometry = geometry;
+ bvh->objects = objects;
+
+ bvh->refit(*progress);
+ }
+ else {
+ progress->set_status(msg, "Building BVH");
+
+ BVHParams bparams;
+ bparams.use_spatial_split = params->use_bvh_spatial_split;
+ bparams.bvh_layout = bvh_layout;
+ bparams.use_unaligned_nodes = dscene->data.bvh.have_curves &&
+ params->use_bvh_unaligned_nodes;
+ bparams.num_motion_triangle_steps = params->num_bvh_time_steps;
+ bparams.num_motion_curve_steps = params->num_bvh_time_steps;
+ bparams.bvh_type = params->bvh_type;
+ bparams.curve_flags = dscene->data.curve.curveflags;
+ bparams.curve_subdivisions = dscene->data.curve.subdivisions;
+
+ delete bvh;
+ bvh = BVH::create(bparams, geometry, objects);
+ MEM_GUARDED_CALL(progress, bvh->build, *progress);
+ }
+ }
+
+ need_update = false;
+ need_update_rebuild = false;
+}
+
+bool Geometry::has_motion_blur() const
+{
+ return (use_motion_blur && attributes.find(ATTR_STD_MOTION_VERTEX_POSITION));
+}
+
+bool Geometry::has_voxel_attributes() const
+{
+ foreach (const Attribute &attr, attributes.attributes) {
+ if (attr.element == ATTR_ELEMENT_VOXEL) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+void Geometry::tag_update(Scene *scene, bool rebuild)
+{
+ need_update = true;
+
+ if (rebuild) {
+ need_update_rebuild = true;
+ scene->light_manager->need_update = true;
+ }
+ else {
+ foreach (Shader *shader, used_shaders)
+ if (shader->has_surface_emission)
+ scene->light_manager->need_update = true;
+ }
+
+ scene->geometry_manager->need_update = true;
+ scene->object_manager->need_update = true;
+}
+
+/* Geometry Manager */
+
+GeometryManager::GeometryManager()
+{
+ need_update = true;
+ need_flags_update = true;
+}
+
+GeometryManager::~GeometryManager()
+{
+}
+
+void GeometryManager::update_osl_attributes(Device *device,
+ Scene *scene,
+ vector<AttributeRequestSet> &geom_attributes)
+{
+#ifdef WITH_OSL
+ /* for OSL, a hash map is used to lookup the attribute by name. */
+ OSLGlobals *og = (OSLGlobals *)device->osl_memory();
+
+ og->object_name_map.clear();
+ og->attribute_map.clear();
+ og->object_names.clear();
+
+ og->attribute_map.resize(scene->objects.size() * ATTR_PRIM_TYPES);
+
+ for (size_t i = 0; i < scene->objects.size(); i++) {
+ /* set object name to object index map */
+ Object *object = scene->objects[i];
+ og->object_name_map[object->name] = i;
+ og->object_names.push_back(object->name);
+
+ /* set object attributes */
+ foreach (ParamValue &attr, object->attributes) {
+ OSLGlobals::Attribute osl_attr;
+
+ osl_attr.type = attr.type();
+ osl_attr.desc.element = ATTR_ELEMENT_OBJECT;
+ osl_attr.value = attr;
+ osl_attr.desc.offset = 0;
+ osl_attr.desc.flags = 0;
+
+ og->attribute_map[i * ATTR_PRIM_TYPES + ATTR_PRIM_GEOMETRY][attr.name()] = osl_attr;
+ og->attribute_map[i * ATTR_PRIM_TYPES + ATTR_PRIM_SUBD][attr.name()] = osl_attr;
+ }
+
+ /* find geometry attributes */
+ size_t j;
+
+ for (j = 0; j < scene->geometry.size(); j++)
+ if (scene->geometry[j] == object->geometry)
+ break;
+
+ AttributeRequestSet &attributes = geom_attributes[j];
+
+ /* set object attributes */
+ foreach (AttributeRequest &req, attributes.requests) {
+ OSLGlobals::Attribute osl_attr;
+
+ if (req.desc.element != ATTR_ELEMENT_NONE) {
+ osl_attr.desc = req.desc;
+
+ if (req.type == TypeDesc::TypeFloat)
+ osl_attr.type = TypeDesc::TypeFloat;
+ else if (req.type == TypeDesc::TypeMatrix)
+ osl_attr.type = TypeDesc::TypeMatrix;
+ else if (req.type == TypeFloat2)
+ osl_attr.type = TypeFloat2;
+ else if (req.type == TypeRGBA)
+ osl_attr.type = TypeRGBA;
+ else
+ osl_attr.type = TypeDesc::TypeColor;
+
+ if (req.std != ATTR_STD_NONE) {
+ /* if standard attribute, add lookup by geom: name convention */
+ ustring stdname(string("geom:") + string(Attribute::standard_name(req.std)));
+ og->attribute_map[i * ATTR_PRIM_TYPES + ATTR_PRIM_GEOMETRY][stdname] = osl_attr;
+ }
+ else if (req.name != ustring()) {
+ /* add lookup by geometry attribute name */
+ og->attribute_map[i * ATTR_PRIM_TYPES + ATTR_PRIM_GEOMETRY][req.name] = osl_attr;
+ }
+ }
+
+ if (req.subd_desc.element != ATTR_ELEMENT_NONE) {
+ osl_attr.desc = req.subd_desc;
+
+ if (req.subd_type == TypeDesc::TypeFloat)
+ osl_attr.type = TypeDesc::TypeFloat;
+ else if (req.subd_type == TypeDesc::TypeMatrix)
+ osl_attr.type = TypeDesc::TypeMatrix;
+ else if (req.subd_type == TypeFloat2)
+ osl_attr.type = TypeFloat2;
+ else if (req.subd_type == TypeRGBA)
+ osl_attr.type = TypeRGBA;
+ else
+ osl_attr.type = TypeDesc::TypeColor;
+
+ if (req.std != ATTR_STD_NONE) {
+ /* if standard attribute, add lookup by geom: name convention */
+ ustring stdname(string("geom:") + string(Attribute::standard_name(req.std)));
+ og->attribute_map[i * ATTR_PRIM_TYPES + ATTR_PRIM_SUBD][stdname] = osl_attr;
+ }
+ else if (req.name != ustring()) {
+ /* add lookup by geometry attribute name */
+ og->attribute_map[i * ATTR_PRIM_TYPES + ATTR_PRIM_SUBD][req.name] = osl_attr;
+ }
+ }
+ }
+ }
+#else
+ (void)device;
+ (void)scene;
+ (void)geom_attributes;
+#endif
+}
+
+void GeometryManager::update_svm_attributes(Device *,
+ DeviceScene *dscene,
+ Scene *scene,
+ vector<AttributeRequestSet> &geom_attributes)
+{
+ /* for SVM, the attributes_map table is used to lookup the offset of an
+ * attribute, based on a unique shader attribute id. */
+
+ /* compute array stride */
+ int attr_map_size = 0;
+
+ for (size_t i = 0; i < scene->geometry.size(); i++) {
+ Geometry *geom = scene->geometry[i];
+ geom->attr_map_offset = attr_map_size;
+ attr_map_size += (geom_attributes[i].size() + 1) * ATTR_PRIM_TYPES;
+ }
+
+ if (attr_map_size == 0)
+ return;
+
+ /* create attribute map */
+ uint4 *attr_map = dscene->attributes_map.alloc(attr_map_size);
+ memset(attr_map, 0, dscene->attributes_map.size() * sizeof(uint));
+
+ for (size_t i = 0; i < scene->geometry.size(); i++) {
+ Geometry *geom = scene->geometry[i];
+ AttributeRequestSet &attributes = geom_attributes[i];
+
+ /* set object attributes */
+ int index = geom->attr_map_offset;
+
+ foreach (AttributeRequest &req, attributes.requests) {
+ uint id;
+
+ if (req.std == ATTR_STD_NONE)
+ id = scene->shader_manager->get_attribute_id(req.name);
+ else
+ id = scene->shader_manager->get_attribute_id(req.std);
+
+ attr_map[index].x = id;
+ attr_map[index].y = req.desc.element;
+ attr_map[index].z = as_uint(req.desc.offset);
+
+ if (req.type == TypeDesc::TypeFloat)
+ attr_map[index].w = NODE_ATTR_FLOAT;
+ else if (req.type == TypeDesc::TypeMatrix)
+ attr_map[index].w = NODE_ATTR_MATRIX;
+ else if (req.type == TypeFloat2)
+ attr_map[index].w = NODE_ATTR_FLOAT2;
+ else if (req.type == TypeRGBA)
+ attr_map[index].w = NODE_ATTR_RGBA;
+ else
+ attr_map[index].w = NODE_ATTR_FLOAT3;
+
+ attr_map[index].w |= req.desc.flags << 8;
+
+ index++;
+
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ if (mesh->subd_faces.size()) {
+ attr_map[index].x = id;
+ attr_map[index].y = req.subd_desc.element;
+ attr_map[index].z = as_uint(req.subd_desc.offset);
+
+ if (req.subd_type == TypeDesc::TypeFloat)
+ attr_map[index].w = NODE_ATTR_FLOAT;
+ else if (req.subd_type == TypeDesc::TypeMatrix)
+ attr_map[index].w = NODE_ATTR_MATRIX;
+ else if (req.subd_type == TypeFloat2)
+ attr_map[index].w = NODE_ATTR_FLOAT2;
+ else if (req.subd_type == TypeRGBA)
+ attr_map[index].w = NODE_ATTR_RGBA;
+ else
+ attr_map[index].w = NODE_ATTR_FLOAT3;
+
+ attr_map[index].w |= req.subd_desc.flags << 8;
+ }
+ }
+
+ index++;
+ }
+
+ /* terminator */
+ for (int j = 0; j < ATTR_PRIM_TYPES; j++) {
+ attr_map[index].x = ATTR_STD_NONE;
+ attr_map[index].y = 0;
+ attr_map[index].z = 0;
+ attr_map[index].w = 0;
+
+ index++;
+ }
+ }
+
+ /* copy to device */
+ dscene->attributes_map.copy_to_device();
+}
+
+static void update_attribute_element_size(Geometry *geom,
+ Attribute *mattr,
+ AttributePrimitive prim,
+ size_t *attr_float_size,
+ size_t *attr_float2_size,
+ size_t *attr_float3_size,
+ size_t *attr_uchar4_size)
+{
+ if (mattr) {
+ size_t size = mattr->element_size(geom, prim);
+
+ if (mattr->element == ATTR_ELEMENT_VOXEL) {
+ /* pass */
+ }
+ else if (mattr->element == ATTR_ELEMENT_CORNER_BYTE) {
+ *attr_uchar4_size += size;
+ }
+ else if (mattr->type == TypeDesc::TypeFloat) {
+ *attr_float_size += size;
+ }
+ else if (mattr->type == TypeFloat2) {
+ *attr_float2_size += size;
+ }
+ else if (mattr->type == TypeDesc::TypeMatrix) {
+ *attr_float3_size += size * 4;
+ }
+ else {
+ *attr_float3_size += size;
+ }
+ }
+}
+
+static void update_attribute_element_offset(Geometry *geom,
+ device_vector<float> &attr_float,
+ size_t &attr_float_offset,
+ device_vector<float2> &attr_float2,
+ size_t &attr_float2_offset,
+ device_vector<float4> &attr_float3,
+ size_t &attr_float3_offset,
+ device_vector<uchar4> &attr_uchar4,
+ size_t &attr_uchar4_offset,
+ Attribute *mattr,
+ AttributePrimitive prim,
+ TypeDesc &type,
+ AttributeDescriptor &desc)
+{
+ if (mattr) {
+ /* store element and type */
+ desc.element = mattr->element;
+ desc.flags = mattr->flags;
+ type = mattr->type;
+
+ /* store attribute data in arrays */
+ size_t size = mattr->element_size(geom, prim);
+
+ AttributeElement &element = desc.element;
+ int &offset = desc.offset;
+
+ if (mattr->element == ATTR_ELEMENT_VOXEL) {
+ /* store slot in offset value */
+ ImageHandle &handle = mattr->data_voxel();
+ offset = handle.svm_slot();
+ }
+ else if (mattr->element == ATTR_ELEMENT_CORNER_BYTE) {
+ uchar4 *data = mattr->data_uchar4();
+ offset = attr_uchar4_offset;
+
+ assert(attr_uchar4.size() >= offset + size);
+ for (size_t k = 0; k < size; k++) {
+ attr_uchar4[offset + k] = data[k];
+ }
+ attr_uchar4_offset += size;
+ }
+ else if (mattr->type == TypeDesc::TypeFloat) {
+ float *data = mattr->data_float();
+ offset = attr_float_offset;
+
+ assert(attr_float.size() >= offset + size);
+ for (size_t k = 0; k < size; k++) {
+ attr_float[offset + k] = data[k];
+ }
+ attr_float_offset += size;
+ }
+ else if (mattr->type == TypeFloat2) {
+ float2 *data = mattr->data_float2();
+ offset = attr_float2_offset;
+
+ assert(attr_float2.size() >= offset + size);
+ for (size_t k = 0; k < size; k++) {
+ attr_float2[offset + k] = data[k];
+ }
+ attr_float2_offset += size;
+ }
+ else if (mattr->type == TypeDesc::TypeMatrix) {
+ Transform *tfm = mattr->data_transform();
+ offset = attr_float3_offset;
+
+ assert(attr_float3.size() >= offset + size * 3);
+ for (size_t k = 0; k < size * 3; k++) {
+ attr_float3[offset + k] = (&tfm->x)[k];
+ }
+ attr_float3_offset += size * 3;
+ }
+ else {
+ float4 *data = mattr->data_float4();
+ offset = attr_float3_offset;
+
+ assert(attr_float3.size() >= offset + size);
+ for (size_t k = 0; k < size; k++) {
+ attr_float3[offset + k] = data[k];
+ }
+ attr_float3_offset += size;
+ }
+
+ /* mesh vertex/curve index is global, not per object, so we sneak
+ * a correction for that in here */
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ if (mesh->subdivision_type == Mesh::SUBDIVISION_CATMULL_CLARK &&
+ desc.flags & ATTR_SUBDIVIDED) {
+ /* indices for subdivided attributes are retrieved
+ * from patch table so no need for correction here*/
+ }
+ else if (element == ATTR_ELEMENT_VERTEX)
+ offset -= mesh->vert_offset;
+ else if (element == ATTR_ELEMENT_VERTEX_MOTION)
+ offset -= mesh->vert_offset;
+ else if (element == ATTR_ELEMENT_FACE) {
+ if (prim == ATTR_PRIM_GEOMETRY)
+ offset -= mesh->prim_offset;
+ else
+ offset -= mesh->face_offset;
+ }
+ else if (element == ATTR_ELEMENT_CORNER || element == ATTR_ELEMENT_CORNER_BYTE) {
+ if (prim == ATTR_PRIM_GEOMETRY)
+ offset -= 3 * mesh->prim_offset;
+ else
+ offset -= mesh->corner_offset;
+ }
+ }
+ else if (geom->type == Geometry::HAIR) {
+ Hair *hair = static_cast<Hair *>(geom);
+ if (element == ATTR_ELEMENT_CURVE)
+ offset -= hair->prim_offset;
+ else if (element == ATTR_ELEMENT_CURVE_KEY)
+ offset -= hair->curvekey_offset;
+ else if (element == ATTR_ELEMENT_CURVE_KEY_MOTION)
+ offset -= hair->curvekey_offset;
+ }
+ }
+ else {
+ /* attribute not found */
+ desc.element = ATTR_ELEMENT_NONE;
+ desc.offset = 0;
+ }
+}
+
+void GeometryManager::device_update_attributes(Device *device,
+ DeviceScene *dscene,
+ Scene *scene,
+ Progress &progress)
+{
+ progress.set_status("Updating Mesh", "Computing attributes");
+
+ /* gather per mesh requested attributes. as meshes may have multiple
+ * shaders assigned, this merges the requested attributes that have
+ * been set per shader by the shader manager */
+ vector<AttributeRequestSet> geom_attributes(scene->geometry.size());
+
+ for (size_t i = 0; i < scene->geometry.size(); i++) {
+ Geometry *geom = scene->geometry[i];
+
+ scene->need_global_attributes(geom_attributes[i]);
+
+ foreach (Shader *shader, geom->used_shaders) {
+ geom_attributes[i].add(shader->attributes);
+ }
+ }
+
+ /* mesh attribute are stored in a single array per data type. here we fill
+ * those arrays, and set the offset and element type to create attribute
+ * maps next */
+
+ /* Pre-allocate attributes to avoid arrays re-allocation which would
+ * take 2x of overall attribute memory usage.
+ */
+ size_t attr_float_size = 0;
+ size_t attr_float2_size = 0;
+ size_t attr_float3_size = 0;
+ size_t attr_uchar4_size = 0;
+ for (size_t i = 0; i < scene->geometry.size(); i++) {
+ Geometry *geom = scene->geometry[i];
+ AttributeRequestSet &attributes = geom_attributes[i];
+ foreach (AttributeRequest &req, attributes.requests) {
+ Attribute *attr = geom->attributes.find(req);
+
+ update_attribute_element_size(geom,
+ attr,
+ ATTR_PRIM_GEOMETRY,
+ &attr_float_size,
+ &attr_float2_size,
+ &attr_float3_size,
+ &attr_uchar4_size);
+
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ Attribute *subd_attr = mesh->subd_attributes.find(req);
+
+ update_attribute_element_size(mesh,
+ subd_attr,
+ ATTR_PRIM_SUBD,
+ &attr_float_size,
+ &attr_float2_size,
+ &attr_float3_size,
+ &attr_uchar4_size);
+ }
+ }
+ }
+
+ dscene->attributes_float.alloc(attr_float_size);
+ dscene->attributes_float2.alloc(attr_float2_size);
+ dscene->attributes_float3.alloc(attr_float3_size);
+ dscene->attributes_uchar4.alloc(attr_uchar4_size);
+
+ size_t attr_float_offset = 0;
+ size_t attr_float2_offset = 0;
+ size_t attr_float3_offset = 0;
+ size_t attr_uchar4_offset = 0;
+
+ /* Fill in attributes. */
+ for (size_t i = 0; i < scene->geometry.size(); i++) {
+ Geometry *geom = scene->geometry[i];
+ AttributeRequestSet &attributes = geom_attributes[i];
+
+ /* todo: we now store std and name attributes from requests even if
+ * they actually refer to the same mesh attributes, optimize */
+ foreach (AttributeRequest &req, attributes.requests) {
+ Attribute *attr = geom->attributes.find(req);
+ update_attribute_element_offset(geom,
+ dscene->attributes_float,
+ attr_float_offset,
+ dscene->attributes_float2,
+ attr_float2_offset,
+ dscene->attributes_float3,
+ attr_float3_offset,
+ dscene->attributes_uchar4,
+ attr_uchar4_offset,
+ attr,
+ ATTR_PRIM_GEOMETRY,
+ req.type,
+ req.desc);
+
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ Attribute *subd_attr = mesh->subd_attributes.find(req);
+
+ update_attribute_element_offset(mesh,
+ dscene->attributes_float,
+ attr_float_offset,
+ dscene->attributes_float2,
+ attr_float2_offset,
+ dscene->attributes_float3,
+ attr_float3_offset,
+ dscene->attributes_uchar4,
+ attr_uchar4_offset,
+ subd_attr,
+ ATTR_PRIM_SUBD,
+ req.subd_type,
+ req.subd_desc);
+ }
+
+ if (progress.get_cancel())
+ return;
+ }
+ }
+
+ /* create attribute lookup maps */
+ if (scene->shader_manager->use_osl())
+ update_osl_attributes(device, scene, geom_attributes);
+
+ update_svm_attributes(device, dscene, scene, geom_attributes);
+
+ if (progress.get_cancel())
+ return;
+
+ /* copy to device */
+ progress.set_status("Updating Mesh", "Copying Attributes to device");
+
+ if (dscene->attributes_float.size()) {
+ dscene->attributes_float.copy_to_device();
+ }
+ if (dscene->attributes_float2.size()) {
+ dscene->attributes_float2.copy_to_device();
+ }
+ if (dscene->attributes_float3.size()) {
+ dscene->attributes_float3.copy_to_device();
+ }
+ if (dscene->attributes_uchar4.size()) {
+ dscene->attributes_uchar4.copy_to_device();
+ }
+
+ if (progress.get_cancel())
+ return;
+
+ /* After mesh attributes and patch tables have been copied to device memory,
+ * we need to update offsets in the objects. */
+ scene->object_manager->device_update_mesh_offsets(device, dscene, scene);
+}
+
+void GeometryManager::mesh_calc_offset(Scene *scene)
+{
+ size_t vert_size = 0;
+ size_t tri_size = 0;
+
+ size_t curve_key_size = 0;
+ size_t curve_size = 0;
+
+ size_t patch_size = 0;
+ size_t face_size = 0;
+ size_t corner_size = 0;
+
+ size_t optix_prim_size = 0;
+
+ foreach (Geometry *geom, scene->geometry) {
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+
+ mesh->vert_offset = vert_size;
+ mesh->prim_offset = tri_size;
+
+ mesh->patch_offset = patch_size;
+ mesh->face_offset = face_size;
+ mesh->corner_offset = corner_size;
+
+ vert_size += mesh->verts.size();
+ tri_size += mesh->num_triangles();
+
+ if (mesh->subd_faces.size()) {
+ Mesh::SubdFace &last = mesh->subd_faces[mesh->subd_faces.size() - 1];
+ patch_size += (last.ptex_offset + last.num_ptex_faces()) * 8;
+
+ /* patch tables are stored in same array so include them in patch_size */
+ if (mesh->patch_table) {
+ mesh->patch_table_offset = patch_size;
+ patch_size += mesh->patch_table->total_size();
+ }
+ }
+
+ face_size += mesh->subd_faces.size();
+ corner_size += mesh->subd_face_corners.size();
+
+ mesh->optix_prim_offset = optix_prim_size;
+ optix_prim_size += mesh->num_triangles();
+ }
+ else if (geom->type == Geometry::HAIR) {
+ Hair *hair = static_cast<Hair *>(geom);
+
+ hair->curvekey_offset = curve_key_size;
+ hair->prim_offset = curve_size;
+
+ curve_key_size += hair->curve_keys.size();
+ curve_size += hair->num_curves();
+
+ hair->optix_prim_offset = optix_prim_size;
+ optix_prim_size += hair->num_segments();
+ }
+ }
+}
+
+void GeometryManager::device_update_mesh(
+ Device *, DeviceScene *dscene, Scene *scene, bool for_displacement, Progress &progress)
+{
+ /* Count. */
+ size_t vert_size = 0;
+ size_t tri_size = 0;
+
+ size_t curve_key_size = 0;
+ size_t curve_size = 0;
+
+ size_t patch_size = 0;
+
+ foreach (Geometry *geom, scene->geometry) {
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+
+ vert_size += mesh->verts.size();
+ tri_size += mesh->num_triangles();
+
+ if (mesh->subd_faces.size()) {
+ Mesh::SubdFace &last = mesh->subd_faces[mesh->subd_faces.size() - 1];
+ patch_size += (last.ptex_offset + last.num_ptex_faces()) * 8;
+
+ /* patch tables are stored in same array so include them in patch_size */
+ if (mesh->patch_table) {
+ mesh->patch_table_offset = patch_size;
+ patch_size += mesh->patch_table->total_size();
+ }
+ }
+ }
+ else if (geom->type == Geometry::HAIR) {
+ Hair *hair = static_cast<Hair *>(geom);
+
+ curve_key_size += hair->curve_keys.size();
+ curve_size += hair->num_curves();
+ }
+ }
+
+ /* Create mapping from triangle to primitive triangle array. */
+ vector<uint> tri_prim_index(tri_size);
+ if (for_displacement) {
+ /* For displacement kernels we do some trickery to make them believe
+ * we've got all required data ready. However, that data is different
+ * from final render kernels since we don't have BVH yet, so can't
+ * really use same semantic of arrays.
+ */
+ foreach (Geometry *geom, scene->geometry) {
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ for (size_t i = 0; i < mesh->num_triangles(); ++i) {
+ tri_prim_index[i + mesh->prim_offset] = 3 * (i + mesh->prim_offset);
+ }
+ }
+ }
+ }
+ else {
+ for (size_t i = 0; i < dscene->prim_index.size(); ++i) {
+ if ((dscene->prim_type[i] & PRIMITIVE_ALL_TRIANGLE) != 0) {
+ tri_prim_index[dscene->prim_index[i]] = dscene->prim_tri_index[i];
+ }
+ }
+ }
+
+ /* Fill in all the arrays. */
+ if (tri_size != 0) {
+ /* normals */
+ progress.set_status("Updating Mesh", "Computing normals");
+
+ uint *tri_shader = dscene->tri_shader.alloc(tri_size);
+ float4 *vnormal = dscene->tri_vnormal.alloc(vert_size);
+ uint4 *tri_vindex = dscene->tri_vindex.alloc(tri_size);
+ uint *tri_patch = dscene->tri_patch.alloc(tri_size);
+ float2 *tri_patch_uv = dscene->tri_patch_uv.alloc(vert_size);
+
+ foreach (Geometry *geom, scene->geometry) {
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ mesh->pack_shaders(scene, &tri_shader[mesh->prim_offset]);
+ mesh->pack_normals(&vnormal[mesh->vert_offset]);
+ mesh->pack_verts(tri_prim_index,
+ &tri_vindex[mesh->prim_offset],
+ &tri_patch[mesh->prim_offset],
+ &tri_patch_uv[mesh->vert_offset],
+ mesh->vert_offset,
+ mesh->prim_offset);
+ if (progress.get_cancel())
+ return;
+ }
+ }
+
+ /* vertex coordinates */
+ progress.set_status("Updating Mesh", "Copying Mesh to device");
+
+ dscene->tri_shader.copy_to_device();
+ dscene->tri_vnormal.copy_to_device();
+ dscene->tri_vindex.copy_to_device();
+ dscene->tri_patch.copy_to_device();
+ dscene->tri_patch_uv.copy_to_device();
+ }
+
+ if (curve_size != 0) {
+ progress.set_status("Updating Mesh", "Copying Strands to device");
+
+ float4 *curve_keys = dscene->curve_keys.alloc(curve_key_size);
+ float4 *curves = dscene->curves.alloc(curve_size);
+
+ foreach (Geometry *geom, scene->geometry) {
+ if (geom->type == Geometry::HAIR) {
+ Hair *hair = static_cast<Hair *>(geom);
+ hair->pack_curves(scene,
+ &curve_keys[hair->curvekey_offset],
+ &curves[hair->prim_offset],
+ hair->curvekey_offset);
+ if (progress.get_cancel())
+ return;
+ }
+ }
+
+ dscene->curve_keys.copy_to_device();
+ dscene->curves.copy_to_device();
+ }
+
+ if (patch_size != 0) {
+ progress.set_status("Updating Mesh", "Copying Patches to device");
+
+ uint *patch_data = dscene->patches.alloc(patch_size);
+
+ foreach (Geometry *geom, scene->geometry) {
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ mesh->pack_patches(&patch_data[mesh->patch_offset],
+ mesh->vert_offset,
+ mesh->face_offset,
+ mesh->corner_offset);
+
+ if (mesh->patch_table) {
+ mesh->patch_table->copy_adjusting_offsets(&patch_data[mesh->patch_table_offset],
+ mesh->patch_table_offset);
+ }
+
+ if (progress.get_cancel())
+ return;
+ }
+ }
+
+ dscene->patches.copy_to_device();
+ }
+
+ if (for_displacement) {
+ float4 *prim_tri_verts = dscene->prim_tri_verts.alloc(tri_size * 3);
+ foreach (Geometry *geom, scene->geometry) {
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ for (size_t i = 0; i < mesh->num_triangles(); ++i) {
+ Mesh::Triangle t = mesh->get_triangle(i);
+ size_t offset = 3 * (i + mesh->prim_offset);
+ prim_tri_verts[offset + 0] = float3_to_float4(mesh->verts[t.v[0]]);
+ prim_tri_verts[offset + 1] = float3_to_float4(mesh->verts[t.v[1]]);
+ prim_tri_verts[offset + 2] = float3_to_float4(mesh->verts[t.v[2]]);
+ }
+ }
+ }
+ dscene->prim_tri_verts.copy_to_device();
+ }
+}
+
+void GeometryManager::device_update_bvh(Device *device,
+ DeviceScene *dscene,
+ Scene *scene,
+ Progress &progress)
+{
+ /* bvh build */
+ progress.set_status("Updating Scene BVH", "Building");
+
+ BVHParams bparams;
+ bparams.top_level = true;
+ bparams.bvh_layout = BVHParams::best_bvh_layout(scene->params.bvh_layout,
+ device->get_bvh_layout_mask());
+ bparams.use_spatial_split = scene->params.use_bvh_spatial_split;
+ bparams.use_unaligned_nodes = dscene->data.bvh.have_curves &&
+ scene->params.use_bvh_unaligned_nodes;
+ bparams.num_motion_triangle_steps = scene->params.num_bvh_time_steps;
+ bparams.num_motion_curve_steps = scene->params.num_bvh_time_steps;
+ bparams.bvh_type = scene->params.bvh_type;
+ bparams.curve_flags = dscene->data.curve.curveflags;
+ bparams.curve_subdivisions = dscene->data.curve.subdivisions;
+
+ VLOG(1) << "Using " << bvh_layout_name(bparams.bvh_layout) << " layout.";
+
+#ifdef WITH_EMBREE
+ if (bparams.bvh_layout == BVH_LAYOUT_EMBREE) {
+ if (dscene->data.bvh.scene) {
+ BVHEmbree::destroy(dscene->data.bvh.scene);
+ }
+ }
+#endif
+
+ BVH *bvh = BVH::create(bparams, scene->geometry, scene->objects);
+ bvh->build(progress, &device->stats);
+
+ if (progress.get_cancel()) {
+#ifdef WITH_EMBREE
+ if (bparams.bvh_layout == BVH_LAYOUT_EMBREE) {
+ if (dscene->data.bvh.scene) {
+ BVHEmbree::destroy(dscene->data.bvh.scene);
+ }
+ }
+#endif
+ delete bvh;
+ return;
+ }
+
+ /* copy to device */
+ progress.set_status("Updating Scene BVH", "Copying BVH to device");
+
+ PackedBVH &pack = bvh->pack;
+
+ if (pack.nodes.size()) {
+ dscene->bvh_nodes.steal_data(pack.nodes);
+ dscene->bvh_nodes.copy_to_device();
+ }
+ if (pack.leaf_nodes.size()) {
+ dscene->bvh_leaf_nodes.steal_data(pack.leaf_nodes);
+ dscene->bvh_leaf_nodes.copy_to_device();
+ }
+ if (pack.object_node.size()) {
+ dscene->object_node.steal_data(pack.object_node);
+ dscene->object_node.copy_to_device();
+ }
+ if (pack.prim_tri_index.size()) {
+ dscene->prim_tri_index.steal_data(pack.prim_tri_index);
+ dscene->prim_tri_index.copy_to_device();
+ }
+ if (pack.prim_tri_verts.size()) {
+ dscene->prim_tri_verts.steal_data(pack.prim_tri_verts);
+ dscene->prim_tri_verts.copy_to_device();
+ }
+ if (pack.prim_type.size()) {
+ dscene->prim_type.steal_data(pack.prim_type);
+ dscene->prim_type.copy_to_device();
+ }
+ if (pack.prim_visibility.size()) {
+ dscene->prim_visibility.steal_data(pack.prim_visibility);
+ dscene->prim_visibility.copy_to_device();
+ }
+ if (pack.prim_index.size()) {
+ dscene->prim_index.steal_data(pack.prim_index);
+ dscene->prim_index.copy_to_device();
+ }
+ if (pack.prim_object.size()) {
+ dscene->prim_object.steal_data(pack.prim_object);
+ dscene->prim_object.copy_to_device();
+ }
+ if (pack.prim_time.size()) {
+ dscene->prim_time.steal_data(pack.prim_time);
+ dscene->prim_time.copy_to_device();
+ }
+
+ dscene->data.bvh.root = pack.root_index;
+ dscene->data.bvh.bvh_layout = bparams.bvh_layout;
+ dscene->data.bvh.use_bvh_steps = (scene->params.num_bvh_time_steps != 0);
+
+ bvh->copy_to_device(progress, dscene);
+
+ delete bvh;
+}
+
+void GeometryManager::device_update_preprocess(Device *device, Scene *scene, Progress &progress)
+{
+ if (!need_update && !need_flags_update) {
+ return;
+ }
+
+ progress.set_status("Updating Meshes Flags");
+
+ /* Update flags. */
+ bool volume_images_updated = false;
+
+ foreach (Geometry *geom, scene->geometry) {
+ geom->has_volume = false;
+
+ foreach (const Shader *shader, geom->used_shaders) {
+ if (shader->has_volume) {
+ geom->has_volume = true;
+ }
+ if (shader->has_surface_bssrdf) {
+ geom->has_surface_bssrdf = true;
+ }
+ }
+
+ if (need_update && geom->has_volume && geom->type == Geometry::MESH) {
+ /* Create volume meshes if there is voxel data. */
+ if (geom->has_voxel_attributes()) {
+ if (!volume_images_updated) {
+ progress.set_status("Updating Meshes Volume Bounds");
+ device_update_volume_images(device, scene, progress);
+ volume_images_updated = true;
+ }
+
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ create_volume_mesh(mesh, progress);
+ }
+ }
+ }
+
+ need_flags_update = false;
+}
+
+void GeometryManager::device_update_displacement_images(Device *device,
+ Scene *scene,
+ Progress &progress)
+{
+ progress.set_status("Updating Displacement Images");
+ TaskPool pool;
+ ImageManager *image_manager = scene->image_manager;
+ set<int> bump_images;
+ foreach (Geometry *geom, scene->geometry) {
+ if (geom->need_update) {
+ foreach (Shader *shader, geom->used_shaders) {
+ if (!shader->has_displacement || shader->displacement_method == DISPLACE_BUMP) {
+ continue;
+ }
+ foreach (ShaderNode *node, shader->graph->nodes) {
+ if (node->special_type != SHADER_SPECIAL_TYPE_IMAGE_SLOT) {
+ continue;
+ }
+
+ ImageSlotTextureNode *image_node = static_cast<ImageSlotTextureNode *>(node);
+ for (int i = 0; i < image_node->handle.num_tiles(); i++) {
+ const int slot = image_node->handle.svm_slot(i);
+ if (slot != -1) {
+ bump_images.insert(slot);
+ }
+ }
+ }
+ }
+ }
+ }
+ foreach (int slot, bump_images) {
+ pool.push(function_bind(
+ &ImageManager::device_update_slot, image_manager, device, scene, slot, &progress));
+ }
+ pool.wait_work();
+}
+
+void GeometryManager::device_update_volume_images(Device *device, Scene *scene, Progress &progress)
+{
+ progress.set_status("Updating Volume Images");
+ TaskPool pool;
+ ImageManager *image_manager = scene->image_manager;
+ set<int> volume_images;
+
+ foreach (Geometry *geom, scene->geometry) {
+ if (!geom->need_update) {
+ continue;
+ }
+
+ foreach (Attribute &attr, geom->attributes.attributes) {
+ if (attr.element != ATTR_ELEMENT_VOXEL) {
+ continue;
+ }
+
+ ImageHandle &handle = attr.data_voxel();
+ const int slot = handle.svm_slot();
+ if (slot != -1) {
+ volume_images.insert(slot);
+ }
+ }
+ }
+
+ foreach (int slot, volume_images) {
+ pool.push(function_bind(
+ &ImageManager::device_update_slot, image_manager, device, scene, slot, &progress));
+ }
+ pool.wait_work();
+}
+
+void GeometryManager::device_update(Device *device,
+ DeviceScene *dscene,
+ Scene *scene,
+ Progress &progress)
+{
+ if (!need_update)
+ return;
+
+ VLOG(1) << "Total " << scene->geometry.size() << " meshes.";
+
+ bool true_displacement_used = false;
+ size_t total_tess_needed = 0;
+
+ foreach (Geometry *geom, scene->geometry) {
+ foreach (Shader *shader, geom->used_shaders) {
+ if (shader->need_update_geometry)
+ geom->need_update = true;
+ }
+
+ if (geom->need_update && geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+
+ /* Update normals. */
+ mesh->add_face_normals();
+ mesh->add_vertex_normals();
+
+ if (mesh->need_attribute(scene, ATTR_STD_POSITION_UNDISPLACED)) {
+ mesh->add_undisplaced();
+ }
+
+ /* Test if we need tessellation. */
+ if (mesh->subdivision_type != Mesh::SUBDIVISION_NONE && mesh->num_subd_verts == 0 &&
+ mesh->subd_params) {
+ total_tess_needed++;
+ }
+
+ /* Test if we need displacement. */
+ if (mesh->has_true_displacement()) {
+ true_displacement_used = true;
+ }
+
+ if (progress.get_cancel())
+ return;
+ }
+ }
+
+ /* Tessellate meshes that are using subdivision */
+ if (total_tess_needed) {
+ Camera *dicing_camera = scene->dicing_camera;
+ dicing_camera->update(scene);
+
+ size_t i = 0;
+ foreach (Geometry *geom, scene->geometry) {
+ if (!(geom->need_update && geom->type == Geometry::MESH)) {
+ continue;
+ }
+
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ if (mesh->subdivision_type != Mesh::SUBDIVISION_NONE && mesh->num_subd_verts == 0 &&
+ mesh->subd_params) {
+ string msg = "Tessellating ";
+ if (mesh->name == "")
+ msg += string_printf("%u/%u", (uint)(i + 1), (uint)total_tess_needed);
+ else
+ msg += string_printf(
+ "%s %u/%u", mesh->name.c_str(), (uint)(i + 1), (uint)total_tess_needed);
+
+ progress.set_status("Updating Mesh", msg);
+
+ mesh->subd_params->camera = dicing_camera;
+ DiagSplit dsplit(*mesh->subd_params);
+ mesh->tessellate(&dsplit);
+
+ i++;
+
+ if (progress.get_cancel())
+ return;
+ }
+ }
+ }
+
+ /* Update images needed for true displacement. */
+ bool old_need_object_flags_update = false;
+ if (true_displacement_used) {
+ VLOG(1) << "Updating images used for true displacement.";
+ device_update_displacement_images(device, scene, progress);
+ old_need_object_flags_update = scene->object_manager->need_flags_update;
+ scene->object_manager->device_update_flags(device, dscene, scene, progress, false);
+ }
+
+ /* Device update. */
+ device_free(device, dscene);
+
+ mesh_calc_offset(scene);
+ if (true_displacement_used) {
+ device_update_mesh(device, dscene, scene, true, progress);
+ }
+ if (progress.get_cancel())
+ return;
+
+ device_update_attributes(device, dscene, scene, progress);
+ if (progress.get_cancel())
+ return;
+
+ /* Update displacement. */
+ bool displacement_done = false;
+ size_t num_bvh = 0;
+ BVHLayout bvh_layout = BVHParams::best_bvh_layout(scene->params.bvh_layout,
+ device->get_bvh_layout_mask());
+
+ foreach (Geometry *geom, scene->geometry) {
+ if (geom->need_update) {
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ if (displace(device, dscene, scene, mesh, progress)) {
+ displacement_done = true;
+ }
+ }
+
+ if (geom->need_build_bvh(bvh_layout)) {
+ num_bvh++;
+ }
+ }
+
+ if (progress.get_cancel())
+ return;
+ }
+
+ /* Device re-update after displacement. */
+ if (displacement_done) {
+ device_free(device, dscene);
+
+ device_update_attributes(device, dscene, scene, progress);
+ if (progress.get_cancel())
+ return;
+ }
+
+ TaskPool pool;
+
+ size_t i = 0;
+ foreach (Geometry *geom, scene->geometry) {
+ if (geom->need_update) {
+ pool.push(function_bind(
+ &Geometry::compute_bvh, geom, device, dscene, &scene->params, &progress, i, num_bvh));
+ if (geom->need_build_bvh(bvh_layout)) {
+ i++;
+ }
+ }
+ }
+
+ TaskPool::Summary summary;
+ pool.wait_work(&summary);
+ VLOG(2) << "Objects BVH build pool statistics:\n" << summary.full_report();
+
+ foreach (Shader *shader, scene->shaders) {
+ shader->need_update_geometry = false;
+ }
+
+ Scene::MotionType need_motion = scene->need_motion();
+ bool motion_blur = need_motion == Scene::MOTION_BLUR;
+
+ /* Update objects. */
+ vector<Object *> volume_objects;
+ foreach (Object *object, scene->objects) {
+ object->compute_bounds(motion_blur);
+ }
+
+ if (progress.get_cancel())
+ return;
+
+ device_update_bvh(device, dscene, scene, progress);
+ if (progress.get_cancel())
+ return;
+
+ device_update_mesh(device, dscene, scene, false, progress);
+ if (progress.get_cancel())
+ return;
+
+ need_update = false;
+
+ if (true_displacement_used) {
+ /* Re-tag flags for update, so they're re-evaluated
+ * for meshes with correct bounding boxes.
+ *
+ * This wouldn't cause wrong results, just true
+ * displacement might be less optimal ot calculate.
+ */
+ scene->object_manager->need_flags_update = old_need_object_flags_update;
+ }
+}
+
+void GeometryManager::device_free(Device *device, DeviceScene *dscene)
+{
+ dscene->bvh_nodes.free();
+ dscene->bvh_leaf_nodes.free();
+ dscene->object_node.free();
+ dscene->prim_tri_verts.free();
+ dscene->prim_tri_index.free();
+ dscene->prim_type.free();
+ dscene->prim_visibility.free();
+ dscene->prim_index.free();
+ dscene->prim_object.free();
+ dscene->prim_time.free();
+ dscene->tri_shader.free();
+ dscene->tri_vnormal.free();
+ dscene->tri_vindex.free();
+ dscene->tri_patch.free();
+ dscene->tri_patch_uv.free();
+ dscene->curves.free();
+ dscene->curve_keys.free();
+ dscene->patches.free();
+ dscene->attributes_map.free();
+ dscene->attributes_float.free();
+ dscene->attributes_float2.free();
+ dscene->attributes_float3.free();
+ dscene->attributes_uchar4.free();
+
+ /* Signal for shaders like displacement not to do ray tracing. */
+ dscene->data.bvh.bvh_layout = BVH_LAYOUT_NONE;
+
+#ifdef WITH_OSL
+ OSLGlobals *og = (OSLGlobals *)device->osl_memory();
+
+ if (og) {
+ og->object_name_map.clear();
+ og->attribute_map.clear();
+ og->object_names.clear();
+ }
+#else
+ (void)device;
+#endif
+}
+
+void GeometryManager::tag_update(Scene *scene)
+{
+ need_update = true;
+ scene->object_manager->need_update = true;
+}
+
+void GeometryManager::collect_statistics(const Scene *scene, RenderStats *stats)
+{
+ foreach (Geometry *geometry, scene->geometry) {
+ stats->mesh.geometry.add_entry(
+ NamedSizeEntry(string(geometry->name.c_str()), geometry->get_total_size_in_bytes()));
+ }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/render/geometry.h b/intern/cycles/render/geometry.h
new file mode 100644
index 00000000000..b0284304843
--- /dev/null
+++ b/intern/cycles/render/geometry.h
@@ -0,0 +1,205 @@
+/*
+ * Copyright 2011-2020 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __GEOMETRY_H__
+#define __GEOMETRY_H__
+
+#include "graph/node.h"
+
+#include "bvh/bvh_params.h"
+
+#include "render/attribute.h"
+
+#include "util/util_boundbox.h"
+#include "util/util_set.h"
+#include "util/util_transform.h"
+#include "util/util_types.h"
+#include "util/util_vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+class BVH;
+class Device;
+class DeviceScene;
+class Mesh;
+class Progress;
+class RenderStats;
+class Scene;
+class SceneParams;
+class Shader;
+
+/* Geometry
+ *
+ * Base class for geometric types like Mesh and Hair. */
+
+class Geometry : public Node {
+ public:
+ NODE_ABSTRACT_DECLARE
+
+ enum Type {
+ MESH,
+ HAIR,
+ };
+
+ Type type;
+
+ /* Attributes */
+ AttributeSet attributes;
+
+ /* Shaders */
+ vector<Shader *> used_shaders;
+
+ /* Transform */
+ BoundBox bounds;
+ bool transform_applied;
+ bool transform_negative_scaled;
+ Transform transform_normal;
+
+ /* Motion Blur */
+ uint motion_steps;
+ bool use_motion_blur;
+
+ /* Maximum number of motion steps supported (due to Embree). */
+ static const uint MAX_MOTION_STEPS = 129;
+
+ /* BVH */
+ BVH *bvh;
+ size_t attr_map_offset;
+ size_t prim_offset;
+ size_t optix_prim_offset;
+
+ /* Shader Properties */
+ bool has_volume; /* Set in the device_update_flags(). */
+ bool has_surface_bssrdf; /* Set in the device_update_flags(). */
+
+ /* Update Flags */
+ bool need_update;
+ bool need_update_rebuild;
+
+ /* Constructor/Destructor */
+ explicit Geometry(const NodeType *node_type, const Type type);
+ virtual ~Geometry();
+
+ /* Geometry */
+ virtual void clear();
+ virtual void compute_bounds() = 0;
+ virtual void apply_transform(const Transform &tfm, const bool apply_to_motion) = 0;
+
+ /* Attribute Requests */
+ bool need_attribute(Scene *scene, AttributeStandard std);
+ bool need_attribute(Scene *scene, ustring name);
+
+ /* UDIM */
+ virtual void get_uv_tiles(ustring map, unordered_set<int> &tiles) = 0;
+
+ /* Convert between normalized -1..1 motion time and index in the
+ * VERTEX_MOTION attribute. */
+ float motion_time(int step) const;
+ int motion_step(float time) const;
+
+ /* BVH */
+ void compute_bvh(Device *device,
+ DeviceScene *dscene,
+ SceneParams *params,
+ Progress *progress,
+ int n,
+ int total);
+
+ /* Check whether the geometry should have own BVH built separately. Briefly,
+ * own BVH is needed for geometry, if:
+ *
+ * - It is instanced multiple times, so each instance object should share the
+ * same BVH tree.
+ * - Special ray intersection is needed, for example to limit subsurface rays
+ * to only the geometry itself.
+ * - The BVH layout requires the top level to only contain instances.
+ */
+ bool need_build_bvh(BVHLayout layout) const;
+
+ /* Test if the geometry should be treated as instanced. */
+ bool is_instanced() const;
+
+ bool has_true_displacement() const;
+ bool has_motion_blur() const;
+ bool has_voxel_attributes() const;
+
+ /* Updates */
+ void tag_update(Scene *scene, bool rebuild);
+};
+
+/* Geometry Manager */
+
+class GeometryManager {
+ public:
+ /* Update Flags */
+ bool need_update;
+ bool need_flags_update;
+
+ /* Constructor/Destructor */
+ GeometryManager();
+ ~GeometryManager();
+
+ /* Device Updates */
+ void device_update_preprocess(Device *device, Scene *scene, Progress &progress);
+ void device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress &progress);
+ void device_free(Device *device, DeviceScene *dscene);
+
+ /* Updates */
+ void tag_update(Scene *scene);
+
+ /* Statistics */
+ void collect_statistics(const Scene *scene, RenderStats *stats);
+
+ protected:
+ bool displace(Device *device, DeviceScene *dscene, Scene *scene, Mesh *mesh, Progress &progress);
+
+ void create_volume_mesh(Mesh *mesh, Progress &progress);
+
+ /* Attributes */
+ void update_osl_attributes(Device *device,
+ Scene *scene,
+ vector<AttributeRequestSet> &geom_attributes);
+ void update_svm_attributes(Device *device,
+ DeviceScene *dscene,
+ Scene *scene,
+ vector<AttributeRequestSet> &geom_attributes);
+
+ /* Compute verts/triangles/curves offsets in global arrays. */
+ void mesh_calc_offset(Scene *scene);
+
+ void device_update_object(Device *device, DeviceScene *dscene, Scene *scene, Progress &progress);
+
+ void device_update_mesh(Device *device,
+ DeviceScene *dscene,
+ Scene *scene,
+ bool for_displacement,
+ Progress &progress);
+
+ void device_update_attributes(Device *device,
+ DeviceScene *dscene,
+ Scene *scene,
+ Progress &progress);
+
+ void device_update_bvh(Device *device, DeviceScene *dscene, Scene *scene, Progress &progress);
+
+ void device_update_displacement_images(Device *device, Scene *scene, Progress &progress);
+
+ void device_update_volume_images(Device *device, Scene *scene, Progress &progress);
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __GEOMETRY_H__ */
diff --git a/intern/cycles/render/graph.cpp b/intern/cycles/render/graph.cpp
index 0e520c700a7..d2db59894ea 100644
--- a/intern/cycles/render/graph.cpp
+++ b/intern/cycles/render/graph.cpp
@@ -14,12 +14,12 @@
* limitations under the License.
*/
-#include "render/attribute.h"
#include "render/graph.h"
+#include "render/attribute.h"
+#include "render/constant_fold.h"
#include "render/nodes.h"
#include "render/scene.h"
#include "render/shader.h"
-#include "render/constant_fold.h"
#include "util/util_algorithm.h"
#include "util/util_foreach.h"
diff --git a/intern/cycles/render/hair.cpp b/intern/cycles/render/hair.cpp
new file mode 100644
index 00000000000..3daa4cc1e35
--- /dev/null
+++ b/intern/cycles/render/hair.cpp
@@ -0,0 +1,487 @@
+/*
+ * Copyright 2011-2020 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "render/hair.h"
+#include "render/curves.h"
+#include "render/scene.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Hair Curve */
+
+void Hair::Curve::bounds_grow(const int k,
+ const float3 *curve_keys,
+ const float *curve_radius,
+ BoundBox &bounds) const
+{
+ float3 P[4];
+
+ P[0] = curve_keys[max(first_key + k - 1, first_key)];
+ P[1] = curve_keys[first_key + k];
+ P[2] = curve_keys[first_key + k + 1];
+ P[3] = curve_keys[min(first_key + k + 2, first_key + num_keys - 1)];
+
+ float3 lower;
+ float3 upper;
+
+ curvebounds(&lower.x, &upper.x, P, 0);
+ curvebounds(&lower.y, &upper.y, P, 1);
+ curvebounds(&lower.z, &upper.z, P, 2);
+
+ float mr = max(curve_radius[first_key + k], curve_radius[first_key + k + 1]);
+
+ bounds.grow(lower, mr);
+ bounds.grow(upper, mr);
+}
+
+void Hair::Curve::bounds_grow(const int k,
+ const float3 *curve_keys,
+ const float *curve_radius,
+ const Transform &aligned_space,
+ BoundBox &bounds) const
+{
+ float3 P[4];
+
+ P[0] = curve_keys[max(first_key + k - 1, first_key)];
+ P[1] = curve_keys[first_key + k];
+ P[2] = curve_keys[first_key + k + 1];
+ P[3] = curve_keys[min(first_key + k + 2, first_key + num_keys - 1)];
+
+ P[0] = transform_point(&aligned_space, P[0]);
+ P[1] = transform_point(&aligned_space, P[1]);
+ P[2] = transform_point(&aligned_space, P[2]);
+ P[3] = transform_point(&aligned_space, P[3]);
+
+ float3 lower;
+ float3 upper;
+
+ curvebounds(&lower.x, &upper.x, P, 0);
+ curvebounds(&lower.y, &upper.y, P, 1);
+ curvebounds(&lower.z, &upper.z, P, 2);
+
+ float mr = max(curve_radius[first_key + k], curve_radius[first_key + k + 1]);
+
+ bounds.grow(lower, mr);
+ bounds.grow(upper, mr);
+}
+
+void Hair::Curve::bounds_grow(float4 keys[4], BoundBox &bounds) const
+{
+ float3 P[4] = {
+ float4_to_float3(keys[0]),
+ float4_to_float3(keys[1]),
+ float4_to_float3(keys[2]),
+ float4_to_float3(keys[3]),
+ };
+
+ float3 lower;
+ float3 upper;
+
+ curvebounds(&lower.x, &upper.x, P, 0);
+ curvebounds(&lower.y, &upper.y, P, 1);
+ curvebounds(&lower.z, &upper.z, P, 2);
+
+ float mr = max(keys[1].w, keys[2].w);
+
+ bounds.grow(lower, mr);
+ bounds.grow(upper, mr);
+}
+
+void Hair::Curve::motion_keys(const float3 *curve_keys,
+ const float *curve_radius,
+ const float3 *key_steps,
+ size_t num_curve_keys,
+ size_t num_steps,
+ float time,
+ size_t k0,
+ size_t k1,
+ float4 r_keys[2]) const
+{
+ /* Figure out which steps we need to fetch and their interpolation factor. */
+ const size_t max_step = num_steps - 1;
+ const size_t step = min((int)(time * max_step), max_step - 1);
+ const float t = time * max_step - step;
+ /* Fetch vertex coordinates. */
+ float4 curr_keys[2];
+ float4 next_keys[2];
+ keys_for_step(
+ curve_keys, curve_radius, key_steps, num_curve_keys, num_steps, step, k0, k1, curr_keys);
+ keys_for_step(
+ curve_keys, curve_radius, key_steps, num_curve_keys, num_steps, step + 1, k0, k1, next_keys);
+ /* Interpolate between steps. */
+ r_keys[0] = (1.0f - t) * curr_keys[0] + t * next_keys[0];
+ r_keys[1] = (1.0f - t) * curr_keys[1] + t * next_keys[1];
+}
+
+void Hair::Curve::cardinal_motion_keys(const float3 *curve_keys,
+ const float *curve_radius,
+ const float3 *key_steps,
+ size_t num_curve_keys,
+ size_t num_steps,
+ float time,
+ size_t k0,
+ size_t k1,
+ size_t k2,
+ size_t k3,
+ float4 r_keys[4]) const
+{
+ /* Figure out which steps we need to fetch and their interpolation factor. */
+ const size_t max_step = num_steps - 1;
+ const size_t step = min((int)(time * max_step), max_step - 1);
+ const float t = time * max_step - step;
+ /* Fetch vertex coordinates. */
+ float4 curr_keys[4];
+ float4 next_keys[4];
+ cardinal_keys_for_step(curve_keys,
+ curve_radius,
+ key_steps,
+ num_curve_keys,
+ num_steps,
+ step,
+ k0,
+ k1,
+ k2,
+ k3,
+ curr_keys);
+ cardinal_keys_for_step(curve_keys,
+ curve_radius,
+ key_steps,
+ num_curve_keys,
+ num_steps,
+ step + 1,
+ k0,
+ k1,
+ k2,
+ k3,
+ next_keys);
+ /* Interpolate between steps. */
+ r_keys[0] = (1.0f - t) * curr_keys[0] + t * next_keys[0];
+ r_keys[1] = (1.0f - t) * curr_keys[1] + t * next_keys[1];
+ r_keys[2] = (1.0f - t) * curr_keys[2] + t * next_keys[2];
+ r_keys[3] = (1.0f - t) * curr_keys[3] + t * next_keys[3];
+}
+
+void Hair::Curve::keys_for_step(const float3 *curve_keys,
+ const float *curve_radius,
+ const float3 *key_steps,
+ size_t num_curve_keys,
+ size_t num_steps,
+ size_t step,
+ size_t k0,
+ size_t k1,
+ float4 r_keys[2]) const
+{
+ k0 = max(k0, 0);
+ k1 = min(k1, num_keys - 1);
+ const size_t center_step = ((num_steps - 1) / 2);
+ if (step == center_step) {
+ /* Center step: regular key location. */
+ /* TODO(sergey): Consider adding make_float4(float3, float)
+ * function.
+ */
+ r_keys[0] = make_float4(curve_keys[first_key + k0].x,
+ curve_keys[first_key + k0].y,
+ curve_keys[first_key + k0].z,
+ curve_radius[first_key + k0]);
+ r_keys[1] = make_float4(curve_keys[first_key + k1].x,
+ curve_keys[first_key + k1].y,
+ curve_keys[first_key + k1].z,
+ curve_radius[first_key + k1]);
+ }
+ else {
+ /* Center step is not stored in this array. */
+ if (step > center_step) {
+ step--;
+ }
+ const size_t offset = first_key + step * num_curve_keys;
+ r_keys[0] = make_float4(key_steps[offset + k0].x,
+ key_steps[offset + k0].y,
+ key_steps[offset + k0].z,
+ curve_radius[first_key + k0]);
+ r_keys[1] = make_float4(key_steps[offset + k1].x,
+ key_steps[offset + k1].y,
+ key_steps[offset + k1].z,
+ curve_radius[first_key + k1]);
+ }
+}
+
+void Hair::Curve::cardinal_keys_for_step(const float3 *curve_keys,
+ const float *curve_radius,
+ const float3 *key_steps,
+ size_t num_curve_keys,
+ size_t num_steps,
+ size_t step,
+ size_t k0,
+ size_t k1,
+ size_t k2,
+ size_t k3,
+ float4 r_keys[4]) const
+{
+ k0 = max(k0, 0);
+ k3 = min(k3, num_keys - 1);
+ const size_t center_step = ((num_steps - 1) / 2);
+ if (step == center_step) {
+ /* Center step: regular key location. */
+ r_keys[0] = make_float4(curve_keys[first_key + k0].x,
+ curve_keys[first_key + k0].y,
+ curve_keys[first_key + k0].z,
+ curve_radius[first_key + k0]);
+ r_keys[1] = make_float4(curve_keys[first_key + k1].x,
+ curve_keys[first_key + k1].y,
+ curve_keys[first_key + k1].z,
+ curve_radius[first_key + k1]);
+ r_keys[2] = make_float4(curve_keys[first_key + k2].x,
+ curve_keys[first_key + k2].y,
+ curve_keys[first_key + k2].z,
+ curve_radius[first_key + k2]);
+ r_keys[3] = make_float4(curve_keys[first_key + k3].x,
+ curve_keys[first_key + k3].y,
+ curve_keys[first_key + k3].z,
+ curve_radius[first_key + k3]);
+ }
+ else {
+ /* Center step is not stored in this array. */
+ if (step > center_step) {
+ step--;
+ }
+ const size_t offset = first_key + step * num_curve_keys;
+ r_keys[0] = make_float4(key_steps[offset + k0].x,
+ key_steps[offset + k0].y,
+ key_steps[offset + k0].z,
+ curve_radius[first_key + k0]);
+ r_keys[1] = make_float4(key_steps[offset + k1].x,
+ key_steps[offset + k1].y,
+ key_steps[offset + k1].z,
+ curve_radius[first_key + k1]);
+ r_keys[2] = make_float4(key_steps[offset + k2].x,
+ key_steps[offset + k2].y,
+ key_steps[offset + k2].z,
+ curve_radius[first_key + k2]);
+ r_keys[3] = make_float4(key_steps[offset + k3].x,
+ key_steps[offset + k3].y,
+ key_steps[offset + k3].z,
+ curve_radius[first_key + k3]);
+ }
+}
+
+/* Hair */
+
+NODE_DEFINE(Hair)
+{
+ NodeType *type = NodeType::add("hair", create, NodeType::NONE, Geometry::node_base_type);
+
+ SOCKET_POINT_ARRAY(curve_keys, "Curve Keys", array<float3>());
+ SOCKET_FLOAT_ARRAY(curve_radius, "Curve Radius", array<float>());
+ SOCKET_INT_ARRAY(curve_first_key, "Curve First Key", array<int>());
+ SOCKET_INT_ARRAY(curve_shader, "Curve Shader", array<int>());
+
+ return type;
+}
+
+Hair::Hair() : Geometry(node_type, Geometry::HAIR)
+{
+ curvekey_offset = 0;
+}
+
+Hair::~Hair()
+{
+}
+
+void Hair::resize_curves(int numcurves, int numkeys)
+{
+ curve_keys.resize(numkeys);
+ curve_radius.resize(numkeys);
+ curve_first_key.resize(numcurves);
+ curve_shader.resize(numcurves);
+
+ attributes.resize();
+}
+
+void Hair::reserve_curves(int numcurves, int numkeys)
+{
+ curve_keys.reserve(numkeys);
+ curve_radius.reserve(numkeys);
+ curve_first_key.reserve(numcurves);
+ curve_shader.reserve(numcurves);
+
+ attributes.resize(true);
+}
+
+void Hair::clear()
+{
+ Geometry::clear();
+
+ curve_keys.clear();
+ curve_radius.clear();
+ curve_first_key.clear();
+ curve_shader.clear();
+
+ attributes.clear();
+}
+
+void Hair::add_curve_key(float3 co, float radius)
+{
+ curve_keys.push_back_reserved(co);
+ curve_radius.push_back_reserved(radius);
+}
+
+void Hair::add_curve(int first_key, int shader)
+{
+ curve_first_key.push_back_reserved(first_key);
+ curve_shader.push_back_reserved(shader);
+}
+
+void Hair::copy_center_to_motion_step(const int motion_step)
+{
+ Attribute *attr_mP = attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+ if (attr_mP) {
+ float3 *keys = &curve_keys[0];
+ size_t numkeys = curve_keys.size();
+ memcpy(attr_mP->data_float3() + motion_step * numkeys, keys, sizeof(float3) * numkeys);
+ }
+}
+
+void Hair::get_uv_tiles(ustring map, unordered_set<int> &tiles)
+{
+ Attribute *attr;
+
+ if (map.empty()) {
+ attr = attributes.find(ATTR_STD_UV);
+ }
+ else {
+ attr = attributes.find(map);
+ }
+
+ if (attr) {
+ attr->get_uv_tiles(this, ATTR_PRIM_GEOMETRY, tiles);
+ }
+}
+
+void Hair::compute_bounds()
+{
+ BoundBox bnds = BoundBox::empty;
+ size_t curve_keys_size = curve_keys.size();
+
+ if (curve_keys_size > 0) {
+ for (size_t i = 0; i < curve_keys_size; i++)
+ bnds.grow(curve_keys[i], curve_radius[i]);
+
+ Attribute *curve_attr = attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+ if (use_motion_blur && curve_attr) {
+ size_t steps_size = curve_keys.size() * (motion_steps - 1);
+ float3 *key_steps = curve_attr->data_float3();
+
+ for (size_t i = 0; i < steps_size; i++)
+ bnds.grow(key_steps[i]);
+ }
+
+ if (!bnds.valid()) {
+ bnds = BoundBox::empty;
+
+ /* skip nan or inf coordinates */
+ for (size_t i = 0; i < curve_keys_size; i++)
+ bnds.grow_safe(curve_keys[i], curve_radius[i]);
+
+ if (use_motion_blur && curve_attr) {
+ size_t steps_size = curve_keys.size() * (motion_steps - 1);
+ float3 *key_steps = curve_attr->data_float3();
+
+ for (size_t i = 0; i < steps_size; i++)
+ bnds.grow_safe(key_steps[i]);
+ }
+ }
+ }
+
+ if (!bnds.valid()) {
+ /* empty mesh */
+ bnds.grow(make_float3(0.0f, 0.0f, 0.0f));
+ }
+
+ bounds = bnds;
+}
+
+void Hair::apply_transform(const Transform &tfm, const bool apply_to_motion)
+{
+ /* compute uniform scale */
+ float3 c0 = transform_get_column(&tfm, 0);
+ float3 c1 = transform_get_column(&tfm, 1);
+ float3 c2 = transform_get_column(&tfm, 2);
+ float scalar = powf(fabsf(dot(cross(c0, c1), c2)), 1.0f / 3.0f);
+
+ /* apply transform to curve keys */
+ for (size_t i = 0; i < curve_keys.size(); i++) {
+ float3 co = transform_point(&tfm, curve_keys[i]);
+ float radius = curve_radius[i] * scalar;
+
+ /* scale for curve radius is only correct for uniform scale */
+ curve_keys[i] = co;
+ curve_radius[i] = radius;
+ }
+
+ if (apply_to_motion) {
+ Attribute *curve_attr = attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+
+ if (curve_attr) {
+ /* apply transform to motion curve keys */
+ size_t steps_size = curve_keys.size() * (motion_steps - 1);
+ float4 *key_steps = curve_attr->data_float4();
+
+ for (size_t i = 0; i < steps_size; i++) {
+ float3 co = transform_point(&tfm, float4_to_float3(key_steps[i]));
+ float radius = key_steps[i].w * scalar;
+
+ /* scale for curve radius is only correct for uniform scale */
+ key_steps[i] = float3_to_float4(co);
+ key_steps[i].w = radius;
+ }
+ }
+ }
+}
+
+void Hair::pack_curves(Scene *scene,
+ float4 *curve_key_co,
+ float4 *curve_data,
+ size_t curvekey_offset)
+{
+ size_t curve_keys_size = curve_keys.size();
+
+ /* pack curve keys */
+ if (curve_keys_size) {
+ float3 *keys_ptr = curve_keys.data();
+ float *radius_ptr = curve_radius.data();
+
+ for (size_t i = 0; i < curve_keys_size; i++)
+ curve_key_co[i] = make_float4(keys_ptr[i].x, keys_ptr[i].y, keys_ptr[i].z, radius_ptr[i]);
+ }
+
+ /* pack curve segments */
+ size_t curve_num = num_curves();
+
+ for (size_t i = 0; i < curve_num; i++) {
+ Curve curve = get_curve(i);
+ int shader_id = curve_shader[i];
+ Shader *shader = (shader_id < used_shaders.size()) ? used_shaders[shader_id] :
+ scene->default_surface;
+ shader_id = scene->shader_manager->get_shader_id(shader, false);
+
+ curve_data[i] = make_float4(__int_as_float(curve.first_key + curvekey_offset),
+ __int_as_float(curve.num_keys),
+ __int_as_float(shader_id),
+ 0.0f);
+ }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/render/hair.h b/intern/cycles/render/hair.h
new file mode 100644
index 00000000000..79f77a78753
--- /dev/null
+++ b/intern/cycles/render/hair.h
@@ -0,0 +1,151 @@
+/*
+ * Copyright 2011-2020 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __HAIR_H__
+#define __HAIR_H__
+
+#include "render/geometry.h"
+
+CCL_NAMESPACE_BEGIN
+
+class Hair : public Geometry {
+ public:
+ NODE_DECLARE
+
+ /* Hair Curve */
+ struct Curve {
+ int first_key;
+ int num_keys;
+
+ int num_segments() const
+ {
+ return num_keys - 1;
+ }
+
+ void bounds_grow(const int k,
+ const float3 *curve_keys,
+ const float *curve_radius,
+ BoundBox &bounds) const;
+ void bounds_grow(float4 keys[4], BoundBox &bounds) const;
+ void bounds_grow(const int k,
+ const float3 *curve_keys,
+ const float *curve_radius,
+ const Transform &aligned_space,
+ BoundBox &bounds) const;
+
+ void motion_keys(const float3 *curve_keys,
+ const float *curve_radius,
+ const float3 *key_steps,
+ size_t num_curve_keys,
+ size_t num_steps,
+ float time,
+ size_t k0,
+ size_t k1,
+ float4 r_keys[2]) const;
+ void cardinal_motion_keys(const float3 *curve_keys,
+ const float *curve_radius,
+ const float3 *key_steps,
+ size_t num_curve_keys,
+ size_t num_steps,
+ float time,
+ size_t k0,
+ size_t k1,
+ size_t k2,
+ size_t k3,
+ float4 r_keys[4]) const;
+
+ void keys_for_step(const float3 *curve_keys,
+ const float *curve_radius,
+ const float3 *key_steps,
+ size_t num_curve_keys,
+ size_t num_steps,
+ size_t step,
+ size_t k0,
+ size_t k1,
+ float4 r_keys[2]) const;
+ void cardinal_keys_for_step(const float3 *curve_keys,
+ const float *curve_radius,
+ const float3 *key_steps,
+ size_t num_curve_keys,
+ size_t num_steps,
+ size_t step,
+ size_t k0,
+ size_t k1,
+ size_t k2,
+ size_t k3,
+ float4 r_keys[4]) const;
+ };
+
+ array<float3> curve_keys;
+ array<float> curve_radius;
+ array<int> curve_first_key;
+ array<int> curve_shader;
+
+ /* BVH */
+ size_t curvekey_offset;
+
+ /* Constructor/Destructor */
+ Hair();
+ ~Hair();
+
+ /* Geometry */
+ void clear() override;
+
+ void resize_curves(int numcurves, int numkeys);
+ void reserve_curves(int numcurves, int numkeys);
+ void add_curve_key(float3 loc, float radius);
+ void add_curve(int first_key, int shader);
+
+ void copy_center_to_motion_step(const int motion_step);
+
+ void compute_bounds() override;
+ void apply_transform(const Transform &tfm, const bool apply_to_motion) override;
+
+ /* Curves */
+ Curve get_curve(size_t i) const
+ {
+ int first = curve_first_key[i];
+ int next_first = (i + 1 < curve_first_key.size()) ? curve_first_key[i + 1] : curve_keys.size();
+
+ Curve curve = {first, next_first - first};
+ return curve;
+ }
+
+ size_t num_keys() const
+ {
+ return curve_keys.size();
+ }
+
+ size_t num_curves() const
+ {
+ return curve_first_key.size();
+ }
+
+ size_t num_segments() const
+ {
+ return curve_keys.size() - curve_first_key.size();
+ }
+
+ /* UDIM */
+ void get_uv_tiles(ustring map, unordered_set<int> &tiles) override;
+
+ /* BVH */
+ void pack_curves(Scene *scene, float4 *curve_key_co, float4 *curve_data, size_t curvekey_offset);
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __HAIR_H__ */
diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp
index 212a867f9cd..67ed1176171 100644
--- a/intern/cycles/render/image.cpp
+++ b/intern/cycles/render/image.cpp
@@ -17,10 +17,12 @@
#include "render/image.h"
#include "device/device.h"
#include "render/colorspace.h"
+#include "render/image_oiio.h"
#include "render/scene.h"
#include "render/stats.h"
#include "util/util_foreach.h"
+#include "util/util_image.h"
#include "util/util_image_impl.h"
#include "util/util_logging.h"
#include "util/util_path.h"
@@ -50,21 +52,6 @@ bool isfinite(uint16_t /*value*/)
return true;
}
-/* The lower three bits of a device texture slot number indicate its type.
- * These functions convert the slot ids from ImageManager "images" ones
- * to device ones and vice verse.
- */
-int type_index_to_flattened_slot(int slot, ImageDataType type)
-{
- return (slot << IMAGE_DATA_TYPE_SHIFT) | (type);
-}
-
-int flattened_slot_to_type_index(int flat_slot, ImageDataType *type)
-{
- *type = (ImageDataType)(flat_slot & IMAGE_DATA_TYPE_MASK);
- return flat_slot >> IMAGE_DATA_TYPE_SHIFT;
-}
-
const char *name_from_type(ImageDataType type)
{
switch (type) {
@@ -94,342 +81,352 @@ const char *name_from_type(ImageDataType type)
} // namespace
-ImageManager::ImageManager(const DeviceInfo &info)
+/* Image Handle */
+
+ImageHandle::ImageHandle() : manager(NULL)
{
- need_update = true;
- osl_texture_system = NULL;
- animation_frame = 0;
+}
- /* Set image limits */
- max_num_images = TEX_NUM_MAX;
- has_half_images = info.has_half_images;
+ImageHandle::ImageHandle(const ImageHandle &other)
+ : tile_slots(other.tile_slots), manager(other.manager)
+{
+ /* Increase image user count. */
+ foreach (const int slot, tile_slots) {
+ manager->add_image_user(slot);
+ }
+}
+
+ImageHandle &ImageHandle::operator=(const ImageHandle &other)
+{
+ clear();
+ manager = other.manager;
+ tile_slots = other.tile_slots;
- for (size_t type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
- tex_num_images[type] = 0;
+ foreach (const int slot, tile_slots) {
+ manager->add_image_user(slot);
}
+
+ return *this;
}
-ImageManager::~ImageManager()
+ImageHandle::~ImageHandle()
+{
+ clear();
+}
+
+void ImageHandle::clear()
{
- for (size_t type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
- for (size_t slot = 0; slot < images[type].size(); slot++)
- assert(!images[type][slot]);
+ foreach (const int slot, tile_slots) {
+ manager->remove_image_user(slot);
}
+
+ tile_slots.clear();
+ manager = NULL;
}
-void ImageManager::set_osl_texture_system(void *texture_system)
+bool ImageHandle::empty()
{
- osl_texture_system = texture_system;
+ return tile_slots.empty();
}
-bool ImageManager::set_animation_frame_update(int frame)
+int ImageHandle::num_tiles()
{
- if (frame != animation_frame) {
- animation_frame = frame;
+ return tile_slots.size();
+}
- for (size_t type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
- for (size_t slot = 0; slot < images[type].size(); slot++) {
- if (images[type][slot] && images[type][slot]->animated)
- return true;
- }
- }
+ImageMetaData ImageHandle::metadata()
+{
+ if (tile_slots.empty()) {
+ return ImageMetaData();
}
- return false;
+ ImageManager::Image *img = manager->images[tile_slots.front()];
+ manager->load_image_metadata(img);
+ return img->metadata;
}
-device_memory *ImageManager::image_memory(int flat_slot)
+int ImageHandle::svm_slot(const int tile_index) const
{
- ImageDataType type;
- int slot = flattened_slot_to_type_index(flat_slot, &type);
+ if (tile_index >= tile_slots.size()) {
+ return -1;
+ }
- Image *img = images[type][slot];
+ if (manager->osl_texture_system) {
+ ImageManager::Image *img = manager->images[tile_slots[tile_index]];
+ if (!img->loader->osl_filepath().empty()) {
+ return -1;
+ }
+ }
- return img->mem;
+ return tile_slots[tile_index];
}
-bool ImageManager::get_image_metadata(int flat_slot, ImageMetaData &metadata)
+device_texture *ImageHandle::image_memory(const int tile_index) const
{
- if (flat_slot == -1) {
- return false;
+ if (tile_index >= tile_slots.size()) {
+ return NULL;
}
- ImageDataType type;
- int slot = flattened_slot_to_type_index(flat_slot, &type);
+ ImageManager::Image *img = manager->images[tile_slots[tile_index]];
+ return img ? img->mem : NULL;
+}
- Image *img = images[type][slot];
- if (img) {
- metadata = img->metadata;
- return true;
- }
+bool ImageHandle::operator==(const ImageHandle &other) const
+{
+ return manager == other.manager && tile_slots == other.tile_slots;
+}
- return false;
+/* Image MetaData */
+
+ImageMetaData::ImageMetaData()
+ : channels(0),
+ width(0),
+ height(0),
+ depth(0),
+ type(IMAGE_DATA_NUM_TYPES),
+ colorspace(u_colorspace_raw),
+ colorspace_file_format(""),
+ use_transform_3d(false),
+ compress_as_srgb(false)
+{
+}
+
+bool ImageMetaData::operator==(const ImageMetaData &other) const
+{
+ return channels == other.channels && width == other.width && height == other.height &&
+ depth == other.depth && use_transform_3d == other.use_transform_3d &&
+ (!use_transform_3d || transform_3d == other.transform_3d) && type == other.type &&
+ colorspace == other.colorspace && compress_as_srgb == other.compress_as_srgb;
+}
+
+bool ImageMetaData::is_float() const
+{
+ return (type == IMAGE_DATA_TYPE_FLOAT || type == IMAGE_DATA_TYPE_FLOAT4 ||
+ type == IMAGE_DATA_TYPE_HALF || type == IMAGE_DATA_TYPE_HALF4);
}
-void ImageManager::metadata_detect_colorspace(ImageMetaData &metadata, const char *file_format)
+void ImageMetaData::detect_colorspace()
{
/* Convert used specified color spaces to one we know how to handle. */
- metadata.colorspace = ColorSpaceManager::detect_known_colorspace(
- metadata.colorspace, file_format, metadata.is_float || metadata.is_half);
+ colorspace = ColorSpaceManager::detect_known_colorspace(
+ colorspace, colorspace_file_format, is_float());
- if (metadata.colorspace == u_colorspace_raw) {
+ if (colorspace == u_colorspace_raw) {
/* Nothing to do. */
}
- else if (metadata.colorspace == u_colorspace_srgb) {
+ else if (colorspace == u_colorspace_srgb) {
/* Keep sRGB colorspace stored as sRGB, to save memory and/or loading time
* for the common case of 8bit sRGB images like PNG. */
- metadata.compress_as_srgb = true;
+ compress_as_srgb = true;
}
else {
/* Always compress non-raw 8bit images as scene linear + sRGB, as a
* heuristic to keep memory usage the same without too much data loss
* due to quantization in common cases. */
- metadata.compress_as_srgb = (metadata.type == IMAGE_DATA_TYPE_BYTE ||
- metadata.type == IMAGE_DATA_TYPE_BYTE4);
+ compress_as_srgb = (type == IMAGE_DATA_TYPE_BYTE || type == IMAGE_DATA_TYPE_BYTE4);
/* If colorspace conversion needed, use half instead of short so we can
* represent HDR values that might result from conversion. */
- if (metadata.type == IMAGE_DATA_TYPE_USHORT) {
- metadata.type = IMAGE_DATA_TYPE_HALF;
+ if (type == IMAGE_DATA_TYPE_USHORT) {
+ type = IMAGE_DATA_TYPE_HALF;
}
- else if (metadata.type == IMAGE_DATA_TYPE_USHORT4) {
- metadata.type = IMAGE_DATA_TYPE_HALF4;
+ else if (type == IMAGE_DATA_TYPE_USHORT4) {
+ type = IMAGE_DATA_TYPE_HALF4;
}
}
}
-bool ImageManager::get_image_metadata(const string &filename,
- void *builtin_data,
- ustring colorspace,
- ImageMetaData &metadata)
-{
- metadata = ImageMetaData();
- metadata.colorspace = colorspace;
-
- if (builtin_data) {
- if (builtin_image_info_cb) {
- builtin_image_info_cb(filename, builtin_data, metadata);
- }
- else {
- return false;
- }
+/* Image Loader */
- if (metadata.is_float) {
- metadata.type = (metadata.channels > 1) ? IMAGE_DATA_TYPE_FLOAT4 : IMAGE_DATA_TYPE_FLOAT;
- }
- else {
- metadata.type = (metadata.channels > 1) ? IMAGE_DATA_TYPE_BYTE4 : IMAGE_DATA_TYPE_BYTE;
- }
+ImageLoader::ImageLoader()
+{
+}
- metadata_detect_colorspace(metadata, "");
+ustring ImageLoader::osl_filepath() const
+{
+ return ustring();
+}
+bool ImageLoader::equals(const ImageLoader *a, const ImageLoader *b)
+{
+ if (a == NULL && b == NULL) {
return true;
}
-
- /* Perform preliminary checks, with meaningful logging. */
- if (!path_exists(filename)) {
- VLOG(1) << "File '" << filename << "' does not exist.";
- return false;
- }
- if (path_is_directory(filename)) {
- VLOG(1) << "File '" << filename << "' is a directory, can't use as image.";
- return false;
+ else {
+ return (a && b && typeid(*a) == typeid(*b) && a->equals(*b));
}
+}
- unique_ptr<ImageInput> in(ImageInput::create(filename));
+/* Image Manager */
- if (!in) {
- return false;
- }
+ImageManager::ImageManager(const DeviceInfo &info)
+{
+ need_update = true;
+ osl_texture_system = NULL;
+ animation_frame = 0;
- ImageSpec spec;
- if (!in->open(filename, spec)) {
- return false;
- }
+ /* Set image limits */
+ has_half_images = info.has_half_images;
+}
- metadata.width = spec.width;
- metadata.height = spec.height;
- metadata.depth = spec.depth;
- metadata.compress_as_srgb = false;
+ImageManager::~ImageManager()
+{
+ for (size_t slot = 0; slot < images.size(); slot++)
+ assert(!images[slot]);
+}
- /* Check the main format, and channel formats. */
- size_t channel_size = spec.format.basesize();
+void ImageManager::set_osl_texture_system(void *texture_system)
+{
+ osl_texture_system = texture_system;
+}
- if (spec.format.is_floating_point()) {
- metadata.is_float = true;
- }
+bool ImageManager::set_animation_frame_update(int frame)
+{
+ if (frame != animation_frame) {
+ animation_frame = frame;
- for (size_t channel = 0; channel < spec.channelformats.size(); channel++) {
- channel_size = max(channel_size, spec.channelformats[channel].basesize());
- if (spec.channelformats[channel].is_floating_point()) {
- metadata.is_float = true;
+ for (size_t slot = 0; slot < images.size(); slot++) {
+ if (images[slot] && images[slot]->params.animated)
+ return true;
}
}
- /* check if it's half float */
- if (spec.format == TypeDesc::HALF) {
- metadata.is_half = true;
- }
-
- /* set type and channels */
- metadata.channels = spec.nchannels;
+ return false;
+}
- if (metadata.is_half) {
- metadata.type = (metadata.channels > 1) ? IMAGE_DATA_TYPE_HALF4 : IMAGE_DATA_TYPE_HALF;
+void ImageManager::load_image_metadata(Image *img)
+{
+ if (!img->need_metadata) {
+ return;
}
- else if (metadata.is_float) {
- metadata.type = (metadata.channels > 1) ? IMAGE_DATA_TYPE_FLOAT4 : IMAGE_DATA_TYPE_FLOAT;
+
+ thread_scoped_lock image_lock(img->mutex);
+ if (!img->need_metadata) {
+ return;
}
- else if (spec.format == TypeDesc::USHORT) {
- metadata.type = (metadata.channels > 1) ? IMAGE_DATA_TYPE_USHORT4 : IMAGE_DATA_TYPE_USHORT;
+
+ ImageMetaData &metadata = img->metadata;
+ metadata = ImageMetaData();
+ metadata.colorspace = img->params.colorspace;
+
+ if (img->loader->load_metadata(metadata)) {
+ assert(metadata.type != IMAGE_DATA_NUM_TYPES);
}
else {
- metadata.type = (metadata.channels > 1) ? IMAGE_DATA_TYPE_BYTE4 : IMAGE_DATA_TYPE_BYTE;
+ metadata.type = IMAGE_DATA_TYPE_BYTE4;
+ }
+
+ metadata.detect_colorspace();
+
+ /* No half textures on OpenCL, use full float instead. */
+ if (!has_half_images) {
+ if (metadata.type == IMAGE_DATA_TYPE_HALF4) {
+ metadata.type = IMAGE_DATA_TYPE_FLOAT4;
+ }
+ else if (metadata.type == IMAGE_DATA_TYPE_HALF) {
+ metadata.type = IMAGE_DATA_TYPE_FLOAT;
+ }
}
- metadata_detect_colorspace(metadata, in->format_name());
+ img->need_metadata = false;
+}
+
+ImageHandle ImageManager::add_image(const string &filename, const ImageParams &params)
+{
+ const int slot = add_image_slot(new OIIOImageLoader(filename), params, false);
- in->close();
+ ImageHandle handle;
+ handle.tile_slots.push_back(slot);
+ handle.manager = this;
+ return handle;
+}
- return true;
+ImageHandle ImageManager::add_image(const string &filename,
+ const ImageParams &params,
+ const vector<int> &tiles)
+{
+ ImageHandle handle;
+ handle.manager = this;
+
+ foreach (int tile, tiles) {
+ string tile_filename = filename;
+ if (tile != 0) {
+ string_replace(tile_filename, "<UDIM>", string_printf("%04d", tile));
+ }
+ const int slot = add_image_slot(new OIIOImageLoader(tile_filename), params, false);
+ handle.tile_slots.push_back(slot);
+ }
+
+ return handle;
}
-static bool image_equals(ImageManager::Image *image,
- const string &filename,
- void *builtin_data,
- InterpolationType interpolation,
- ExtensionType extension,
- ImageAlphaType alpha_type,
- ustring colorspace)
+ImageHandle ImageManager::add_image(ImageLoader *loader, const ImageParams &params)
{
- return image->filename == filename && image->builtin_data == builtin_data &&
- image->interpolation == interpolation && image->extension == extension &&
- image->alpha_type == alpha_type && image->colorspace == colorspace;
+ const int slot = add_image_slot(loader, params, true);
+
+ ImageHandle handle;
+ handle.tile_slots.push_back(slot);
+ handle.manager = this;
+ return handle;
}
-int ImageManager::add_image(const string &filename,
- void *builtin_data,
- bool animated,
- float frame,
- InterpolationType interpolation,
- ExtensionType extension,
- ImageAlphaType alpha_type,
- ustring colorspace,
- ImageMetaData &metadata)
+int ImageManager::add_image_slot(ImageLoader *loader,
+ const ImageParams &params,
+ const bool builtin)
{
Image *img;
size_t slot;
- get_image_metadata(filename, builtin_data, colorspace, metadata);
- ImageDataType type = metadata.type;
-
thread_scoped_lock device_lock(device_mutex);
- /* No half textures on OpenCL, use full float instead. */
- if (!has_half_images) {
- if (type == IMAGE_DATA_TYPE_HALF4) {
- type = IMAGE_DATA_TYPE_FLOAT4;
- }
- else if (type == IMAGE_DATA_TYPE_HALF) {
- type = IMAGE_DATA_TYPE_FLOAT;
- }
- }
-
/* Fnd existing image. */
- for (slot = 0; slot < images[type].size(); slot++) {
- img = images[type][slot];
- if (img &&
- image_equals(
- img, filename, builtin_data, interpolation, extension, alpha_type, colorspace)) {
- if (img->frame != frame) {
- img->frame = frame;
- img->need_load = true;
- }
- if (img->alpha_type != alpha_type) {
- img->alpha_type = alpha_type;
- img->need_load = true;
- }
- if (img->colorspace != colorspace) {
- img->colorspace = colorspace;
- img->need_load = true;
- }
- if (!(img->metadata == metadata)) {
- img->metadata = metadata;
- img->need_load = true;
- }
+ for (slot = 0; slot < images.size(); slot++) {
+ img = images[slot];
+ if (img && ImageLoader::equals(img->loader, loader) && img->params == params) {
img->users++;
- return type_index_to_flattened_slot(slot, type);
+ delete loader;
+ return slot;
}
}
/* Find free slot. */
- for (slot = 0; slot < images[type].size(); slot++) {
- if (!images[type][slot])
+ for (slot = 0; slot < images.size(); slot++) {
+ if (!images[slot])
break;
}
- /* Count if we're over the limit.
- * Very unlikely, since max_num_images is insanely big. But better safe
- * than sorry.
- */
- int tex_count = 0;
- for (int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
- tex_count += tex_num_images[type];
- }
- if (tex_count > max_num_images) {
- printf(
- "ImageManager::add_image: Reached image limit (%d), "
- "skipping '%s'\n",
- max_num_images,
- filename.c_str());
- return -1;
- }
-
- if (slot == images[type].size()) {
- images[type].resize(images[type].size() + 1);
+ if (slot == images.size()) {
+ images.resize(images.size() + 1);
}
/* Add new image. */
img = new Image();
- img->filename = filename;
- img->builtin_data = builtin_data;
- img->metadata = metadata;
- img->need_load = true;
- img->animated = animated;
- img->frame = frame;
- img->interpolation = interpolation;
- img->extension = extension;
+ img->params = params;
+ img->loader = loader;
+ img->need_metadata = true;
+ img->need_load = !(osl_texture_system && !img->loader->osl_filepath().empty());
+ img->builtin = builtin;
img->users = 1;
- img->alpha_type = alpha_type;
- img->colorspace = colorspace;
img->mem = NULL;
- images[type][slot] = img;
-
- ++tex_num_images[type];
+ images[slot] = img;
need_update = true;
- return type_index_to_flattened_slot(slot, type);
+ return slot;
}
-void ImageManager::add_image_user(int flat_slot)
+void ImageManager::add_image_user(int slot)
{
- ImageDataType type;
- int slot = flattened_slot_to_type_index(flat_slot, &type);
-
- Image *image = images[type][slot];
+ Image *image = images[slot];
assert(image && image->users >= 1);
image->users++;
}
-void ImageManager::remove_image(int flat_slot)
+void ImageManager::remove_image_user(int slot)
{
- ImageDataType type;
- int slot = flattened_slot_to_type_index(flat_slot, &type);
-
- Image *image = images[type][slot];
+ Image *image = images[slot];
assert(image && image->users >= 1);
/* decrement user count */
@@ -442,119 +439,20 @@ void ImageManager::remove_image(int flat_slot)
need_update = true;
}
-void ImageManager::remove_image(const string &filename,
- void *builtin_data,
- InterpolationType interpolation,
- ExtensionType extension,
- ImageAlphaType alpha_type,
- ustring colorspace)
-{
- size_t slot;
-
- for (int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
- for (slot = 0; slot < images[type].size(); slot++) {
- if (images[type][slot] && image_equals(images[type][slot],
- filename,
- builtin_data,
- interpolation,
- extension,
- alpha_type,
- colorspace)) {
- remove_image(type_index_to_flattened_slot(slot, (ImageDataType)type));
- return;
- }
- }
- }
-}
-
-/* TODO(sergey): Deduplicate with the iteration above, but make it pretty,
- * without bunch of arguments passing around making code readability even
- * more cluttered.
- */
-void ImageManager::tag_reload_image(const string &filename,
- void *builtin_data,
- InterpolationType interpolation,
- ExtensionType extension,
- ImageAlphaType alpha_type,
- ustring colorspace)
-{
- for (size_t type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
- for (size_t slot = 0; slot < images[type].size(); slot++) {
- if (images[type][slot] && image_equals(images[type][slot],
- filename,
- builtin_data,
- interpolation,
- extension,
- alpha_type,
- colorspace)) {
- images[type][slot]->need_load = true;
- break;
- }
- }
- }
-}
-
static bool image_associate_alpha(ImageManager::Image *img)
{
/* For typical RGBA images we let OIIO convert to associated alpha,
* but some types we want to leave the RGB channels untouched. */
- return !(ColorSpaceManager::colorspace_is_data(img->colorspace) ||
- img->alpha_type == IMAGE_ALPHA_IGNORE || img->alpha_type == IMAGE_ALPHA_CHANNEL_PACKED);
+ return !(ColorSpaceManager::colorspace_is_data(img->params.colorspace) ||
+ img->params.alpha_type == IMAGE_ALPHA_IGNORE ||
+ img->params.alpha_type == IMAGE_ALPHA_CHANNEL_PACKED);
}
-bool ImageManager::file_load_image_generic(Image *img, unique_ptr<ImageInput> *in)
+template<TypeDesc::BASETYPE FileFormat, typename StorageType>
+bool ImageManager::file_load_image(Image *img, int texture_limit)
{
- if (img->filename == "")
- return false;
-
- if (!img->builtin_data) {
- /* NOTE: Error logging is done in meta data acquisition. */
- if (!path_exists(img->filename) || path_is_directory(img->filename)) {
- return false;
- }
-
- /* load image from file through OIIO */
- *in = unique_ptr<ImageInput>(ImageInput::create(img->filename));
-
- if (!*in)
- return false;
-
- ImageSpec spec = ImageSpec();
- ImageSpec config = ImageSpec();
-
- if (!image_associate_alpha(img)) {
- config.attribute("oiio:UnassociatedAlpha", 1);
- }
-
- if (!(*in)->open(img->filename, spec, config)) {
- return false;
- }
- }
- else {
- /* load image using builtin images callbacks */
- if (!builtin_image_info_cb || !builtin_image_pixels_cb)
- return false;
- }
-
/* we only handle certain number of components */
if (!(img->metadata.channels >= 1 && img->metadata.channels <= 4)) {
- if (*in) {
- (*in)->close();
- }
- return false;
- }
-
- return true;
-}
-
-template<TypeDesc::BASETYPE FileFormat, typename StorageType, typename DeviceType>
-bool ImageManager::file_load_image(Image *img,
- ImageDataType type,
- int texture_limit,
- device_vector<DeviceType> &tex_img)
-{
- unique_ptr<ImageInput> in = NULL;
- if (!file_load_image_generic(img, &in)) {
return false;
}
@@ -580,7 +478,7 @@ bool ImageManager::file_load_image(Image *img,
}
else {
thread_scoped_lock device_lock(device_mutex);
- pixels = (StorageType *)tex_img.alloc(width, height, depth);
+ pixels = (StorageType *)img->mem->alloc(width, height, depth);
}
if (pixels == NULL) {
@@ -588,90 +486,21 @@ bool ImageManager::file_load_image(Image *img,
return false;
}
- bool cmyk = false;
const size_t num_pixels = ((size_t)width) * height * depth;
- if (in) {
- /* Read pixels through OpenImageIO. */
- StorageType *readpixels = pixels;
- vector<StorageType> tmppixels;
- if (components > 4) {
- tmppixels.resize(((size_t)width) * height * components);
- readpixels = &tmppixels[0];
- }
-
- if (depth <= 1) {
- size_t scanlinesize = ((size_t)width) * components * sizeof(StorageType);
- in->read_image(FileFormat,
- (uchar *)readpixels + (height - 1) * scanlinesize,
- AutoStride,
- -scanlinesize,
- AutoStride);
- }
- else {
- in->read_image(FileFormat, (uchar *)readpixels);
- }
-
- if (components > 4) {
- size_t dimensions = ((size_t)width) * height;
- for (size_t i = dimensions - 1, pixel = 0; pixel < dimensions; pixel++, i--) {
- pixels[i * 4 + 3] = tmppixels[i * components + 3];
- pixels[i * 4 + 2] = tmppixels[i * components + 2];
- pixels[i * 4 + 1] = tmppixels[i * components + 1];
- pixels[i * 4 + 0] = tmppixels[i * components + 0];
- }
- tmppixels.clear();
- }
-
- cmyk = strcmp(in->format_name(), "jpeg") == 0 && components == 4;
- in->close();
- }
- else {
- /* Read pixels through callback. */
- if (FileFormat == TypeDesc::FLOAT) {
- builtin_image_float_pixels_cb(img->filename,
- img->builtin_data,
- 0, /* TODO(lukas): Support tiles here? */
- (float *)&pixels[0],
- num_pixels * components,
- image_associate_alpha(img),
- img->metadata.builtin_free_cache);
- }
- else if (FileFormat == TypeDesc::UINT8) {
- builtin_image_pixels_cb(img->filename,
- img->builtin_data,
- 0, /* TODO(lukas): Support tiles here? */
- (uchar *)&pixels[0],
- num_pixels * components,
- image_associate_alpha(img),
- img->metadata.builtin_free_cache);
- }
- else {
- /* TODO(dingto): Support half for ImBuf. */
- }
- }
+ img->loader->load_pixels(
+ img->metadata, pixels, num_pixels * components, image_associate_alpha(img));
/* The kernel can handle 1 and 4 channel images. Anything that is not a single
* channel image is converted to RGBA format. */
- bool is_rgba = (type == IMAGE_DATA_TYPE_FLOAT4 || type == IMAGE_DATA_TYPE_HALF4 ||
- type == IMAGE_DATA_TYPE_BYTE4 || type == IMAGE_DATA_TYPE_USHORT4);
+ bool is_rgba = (img->metadata.type == IMAGE_DATA_TYPE_FLOAT4 ||
+ img->metadata.type == IMAGE_DATA_TYPE_HALF4 ||
+ img->metadata.type == IMAGE_DATA_TYPE_BYTE4 ||
+ img->metadata.type == IMAGE_DATA_TYPE_USHORT4);
if (is_rgba) {
const StorageType one = util_image_cast_from_float<StorageType>(1.0f);
- if (cmyk) {
- /* CMYK to RGBA. */
- for (size_t i = num_pixels - 1, pixel = 0; pixel < num_pixels; pixel++, i--) {
- float c = util_image_cast_to_float(pixels[i * 4 + 0]);
- float m = util_image_cast_to_float(pixels[i * 4 + 1]);
- float y = util_image_cast_to_float(pixels[i * 4 + 2]);
- float k = util_image_cast_to_float(pixels[i * 4 + 3]);
- pixels[i * 4 + 0] = util_image_cast_from_float<StorageType>((1.0f - c) * (1.0f - k));
- pixels[i * 4 + 1] = util_image_cast_from_float<StorageType>((1.0f - m) * (1.0f - k));
- pixels[i * 4 + 2] = util_image_cast_from_float<StorageType>((1.0f - y) * (1.0f - k));
- pixels[i * 4 + 3] = one;
- }
- }
- else if (components == 2) {
+ if (components == 2) {
/* Grayscale + alpha to RGBA. */
for (size_t i = num_pixels - 1, pixel = 0; pixel < num_pixels; pixel++, i--) {
pixels[i * 4 + 3] = pixels[i * 2 + 1];
@@ -700,7 +529,7 @@ bool ImageManager::file_load_image(Image *img,
}
/* Disable alpha if requested by the user. */
- if (img->alpha_type == IMAGE_ALPHA_IGNORE) {
+ if (img->params.alpha_type == IMAGE_ALPHA_IGNORE) {
for (size_t i = num_pixels - 1, pixel = 0; pixel < num_pixels; pixel++, i--) {
pixels[i * 4 + 3] = one;
}
@@ -710,7 +539,7 @@ bool ImageManager::file_load_image(Image *img,
img->metadata.colorspace != u_colorspace_srgb) {
/* Convert to scene linear. */
ColorSpaceManager::to_scene_linear(
- img->metadata.colorspace, pixels, width, height, depth, img->metadata.compress_as_srgb);
+ img->metadata.colorspace, pixels, num_pixels, img->metadata.compress_as_srgb);
}
}
@@ -747,7 +576,8 @@ bool ImageManager::file_load_image(Image *img,
while (max_size * scale_factor > texture_limit) {
scale_factor *= 0.5f;
}
- VLOG(1) << "Scaling image " << img->filename << " by a factor of " << scale_factor << ".";
+ VLOG(1) << "Scaling image " << img->loader->name() << " by a factor of " << scale_factor
+ << ".";
vector<StorageType> scaled_pixels;
size_t scaled_width, scaled_height, scaled_depth;
util_image_resize_pixels(pixels_storage,
@@ -765,7 +595,7 @@ bool ImageManager::file_load_image(Image *img,
{
thread_scoped_lock device_lock(device_mutex);
- texture_pixels = (StorageType *)tex_img.alloc(scaled_width, scaled_height, scaled_depth);
+ texture_pixels = (StorageType *)img->mem->alloc(scaled_width, scaled_height, scaled_depth);
}
memcpy(texture_pixels, &scaled_pixels[0], scaled_pixels.size() * sizeof(StorageType));
@@ -774,25 +604,23 @@ bool ImageManager::file_load_image(Image *img,
return true;
}
-void ImageManager::device_load_image(
- Device *device, Scene *scene, ImageDataType type, int slot, Progress *progress)
+void ImageManager::device_load_image(Device *device, Scene *scene, int slot, Progress *progress)
{
- if (progress->get_cancel())
+ if (progress->get_cancel()) {
return;
+ }
- Image *img = images[type][slot];
+ Image *img = images[slot];
- if (osl_texture_system && !img->builtin_data)
- return;
-
- string filename = path_filename(images[type][slot]->filename);
- progress->set_status("Updating Images", "Loading " + filename);
+ progress->set_status("Updating Images", "Loading " + img->loader->name());
const int texture_limit = scene->params.texture_limit;
- /* Slot assignment */
- int flat_slot = type_index_to_flattened_slot(slot, type);
- img->mem_name = string_printf("__tex_image_%s_%03d", name_from_type(type), flat_slot);
+ load_image_metadata(img);
+ ImageDataType type = img->metadata.type;
+
+ /* Name for debugging. */
+ img->mem_name = string_printf("__tex_image_%s_%03d", name_from_type(type), slot);
/* Free previous texture in slot. */
if (img->mem) {
@@ -801,195 +629,131 @@ void ImageManager::device_load_image(
img->mem = NULL;
}
+ img->mem = new device_texture(
+ device, img->mem_name.c_str(), slot, type, img->params.interpolation, img->params.extension);
+ img->mem->info.use_transform_3d = img->metadata.use_transform_3d;
+ img->mem->info.transform_3d = img->metadata.transform_3d;
+
/* Create new texture. */
if (type == IMAGE_DATA_TYPE_FLOAT4) {
- device_vector<float4> *tex_img = new device_vector<float4>(
- device, img->mem_name.c_str(), MEM_TEXTURE);
-
- if (!file_load_image<TypeDesc::FLOAT, float>(img, type, texture_limit, *tex_img)) {
+ if (!file_load_image<TypeDesc::FLOAT, float>(img, texture_limit)) {
/* on failure to load, we set a 1x1 pixels pink image */
thread_scoped_lock device_lock(device_mutex);
- float *pixels = (float *)tex_img->alloc(1, 1);
+ float *pixels = (float *)img->mem->alloc(1, 1);
pixels[0] = TEX_IMAGE_MISSING_R;
pixels[1] = TEX_IMAGE_MISSING_G;
pixels[2] = TEX_IMAGE_MISSING_B;
pixels[3] = TEX_IMAGE_MISSING_A;
}
-
- img->mem = tex_img;
- img->mem->interpolation = img->interpolation;
- img->mem->extension = img->extension;
-
- thread_scoped_lock device_lock(device_mutex);
- tex_img->copy_to_device();
}
else if (type == IMAGE_DATA_TYPE_FLOAT) {
- device_vector<float> *tex_img = new device_vector<float>(
- device, img->mem_name.c_str(), MEM_TEXTURE);
-
- if (!file_load_image<TypeDesc::FLOAT, float>(img, type, texture_limit, *tex_img)) {
+ if (!file_load_image<TypeDesc::FLOAT, float>(img, texture_limit)) {
/* on failure to load, we set a 1x1 pixels pink image */
thread_scoped_lock device_lock(device_mutex);
- float *pixels = (float *)tex_img->alloc(1, 1);
+ float *pixels = (float *)img->mem->alloc(1, 1);
pixels[0] = TEX_IMAGE_MISSING_R;
}
-
- img->mem = tex_img;
- img->mem->interpolation = img->interpolation;
- img->mem->extension = img->extension;
-
- thread_scoped_lock device_lock(device_mutex);
- tex_img->copy_to_device();
}
else if (type == IMAGE_DATA_TYPE_BYTE4) {
- device_vector<uchar4> *tex_img = new device_vector<uchar4>(
- device, img->mem_name.c_str(), MEM_TEXTURE);
-
- if (!file_load_image<TypeDesc::UINT8, uchar>(img, type, texture_limit, *tex_img)) {
+ if (!file_load_image<TypeDesc::UINT8, uchar>(img, texture_limit)) {
/* on failure to load, we set a 1x1 pixels pink image */
thread_scoped_lock device_lock(device_mutex);
- uchar *pixels = (uchar *)tex_img->alloc(1, 1);
+ uchar *pixels = (uchar *)img->mem->alloc(1, 1);
pixels[0] = (TEX_IMAGE_MISSING_R * 255);
pixels[1] = (TEX_IMAGE_MISSING_G * 255);
pixels[2] = (TEX_IMAGE_MISSING_B * 255);
pixels[3] = (TEX_IMAGE_MISSING_A * 255);
}
-
- img->mem = tex_img;
- img->mem->interpolation = img->interpolation;
- img->mem->extension = img->extension;
-
- thread_scoped_lock device_lock(device_mutex);
- tex_img->copy_to_device();
}
else if (type == IMAGE_DATA_TYPE_BYTE) {
- device_vector<uchar> *tex_img = new device_vector<uchar>(
- device, img->mem_name.c_str(), MEM_TEXTURE);
-
- if (!file_load_image<TypeDesc::UINT8, uchar>(img, type, texture_limit, *tex_img)) {
+ if (!file_load_image<TypeDesc::UINT8, uchar>(img, texture_limit)) {
/* on failure to load, we set a 1x1 pixels pink image */
thread_scoped_lock device_lock(device_mutex);
- uchar *pixels = (uchar *)tex_img->alloc(1, 1);
+ uchar *pixels = (uchar *)img->mem->alloc(1, 1);
pixels[0] = (TEX_IMAGE_MISSING_R * 255);
}
-
- img->mem = tex_img;
- img->mem->interpolation = img->interpolation;
- img->mem->extension = img->extension;
-
- thread_scoped_lock device_lock(device_mutex);
- tex_img->copy_to_device();
}
else if (type == IMAGE_DATA_TYPE_HALF4) {
- device_vector<half4> *tex_img = new device_vector<half4>(
- device, img->mem_name.c_str(), MEM_TEXTURE);
-
- if (!file_load_image<TypeDesc::HALF, half>(img, type, texture_limit, *tex_img)) {
+ if (!file_load_image<TypeDesc::HALF, half>(img, texture_limit)) {
/* on failure to load, we set a 1x1 pixels pink image */
thread_scoped_lock device_lock(device_mutex);
- half *pixels = (half *)tex_img->alloc(1, 1);
+ half *pixels = (half *)img->mem->alloc(1, 1);
pixels[0] = TEX_IMAGE_MISSING_R;
pixels[1] = TEX_IMAGE_MISSING_G;
pixels[2] = TEX_IMAGE_MISSING_B;
pixels[3] = TEX_IMAGE_MISSING_A;
}
-
- img->mem = tex_img;
- img->mem->interpolation = img->interpolation;
- img->mem->extension = img->extension;
-
- thread_scoped_lock device_lock(device_mutex);
- tex_img->copy_to_device();
}
else if (type == IMAGE_DATA_TYPE_USHORT) {
- device_vector<uint16_t> *tex_img = new device_vector<uint16_t>(
- device, img->mem_name.c_str(), MEM_TEXTURE);
-
- if (!file_load_image<TypeDesc::USHORT, uint16_t>(img, type, texture_limit, *tex_img)) {
+ if (!file_load_image<TypeDesc::USHORT, uint16_t>(img, texture_limit)) {
/* on failure to load, we set a 1x1 pixels pink image */
thread_scoped_lock device_lock(device_mutex);
- uint16_t *pixels = (uint16_t *)tex_img->alloc(1, 1);
+ uint16_t *pixels = (uint16_t *)img->mem->alloc(1, 1);
pixels[0] = (TEX_IMAGE_MISSING_R * 65535);
}
-
- img->mem = tex_img;
- img->mem->interpolation = img->interpolation;
- img->mem->extension = img->extension;
-
- thread_scoped_lock device_lock(device_mutex);
- tex_img->copy_to_device();
}
else if (type == IMAGE_DATA_TYPE_USHORT4) {
- device_vector<ushort4> *tex_img = new device_vector<ushort4>(
- device, img->mem_name.c_str(), MEM_TEXTURE);
-
- if (!file_load_image<TypeDesc::USHORT, uint16_t>(img, type, texture_limit, *tex_img)) {
+ if (!file_load_image<TypeDesc::USHORT, uint16_t>(img, texture_limit)) {
/* on failure to load, we set a 1x1 pixels pink image */
thread_scoped_lock device_lock(device_mutex);
- uint16_t *pixels = (uint16_t *)tex_img->alloc(1, 1);
+ uint16_t *pixels = (uint16_t *)img->mem->alloc(1, 1);
pixels[0] = (TEX_IMAGE_MISSING_R * 65535);
pixels[1] = (TEX_IMAGE_MISSING_G * 65535);
pixels[2] = (TEX_IMAGE_MISSING_B * 65535);
pixels[3] = (TEX_IMAGE_MISSING_A * 65535);
}
-
- img->mem = tex_img;
- img->mem->interpolation = img->interpolation;
- img->mem->extension = img->extension;
-
- thread_scoped_lock device_lock(device_mutex);
- tex_img->copy_to_device();
}
else if (type == IMAGE_DATA_TYPE_HALF) {
- device_vector<half> *tex_img = new device_vector<half>(
- device, img->mem_name.c_str(), MEM_TEXTURE);
-
- if (!file_load_image<TypeDesc::HALF, half>(img, type, texture_limit, *tex_img)) {
+ if (!file_load_image<TypeDesc::HALF, half>(img, texture_limit)) {
/* on failure to load, we set a 1x1 pixels pink image */
thread_scoped_lock device_lock(device_mutex);
- half *pixels = (half *)tex_img->alloc(1, 1);
+ half *pixels = (half *)img->mem->alloc(1, 1);
pixels[0] = TEX_IMAGE_MISSING_R;
}
+ }
- img->mem = tex_img;
- img->mem->interpolation = img->interpolation;
- img->mem->extension = img->extension;
-
+ {
thread_scoped_lock device_lock(device_mutex);
- tex_img->copy_to_device();
+ img->mem->copy_to_device();
}
+
+ /* Cleanup memory in image loader. */
+ img->loader->cleanup();
img->need_load = false;
}
-void ImageManager::device_free_image(Device *, ImageDataType type, int slot)
+void ImageManager::device_free_image(Device *, int slot)
{
- Image *img = images[type][slot];
+ Image *img = images[slot];
+ if (img == NULL) {
+ return;
+ }
- if (img) {
- if (osl_texture_system && !img->builtin_data) {
+ if (osl_texture_system) {
#ifdef WITH_OSL
- ustring filename(images[type][slot]->filename);
- ((OSL::TextureSystem *)osl_texture_system)->invalidate(filename);
-#endif
- }
-
- if (img->mem) {
- thread_scoped_lock device_lock(device_mutex);
- delete img->mem;
+ ustring filepath = img->loader->osl_filepath();
+ if (!filepath.empty()) {
+ ((OSL::TextureSystem *)osl_texture_system)->invalidate(filepath);
}
+#endif
+ }
- delete img;
- images[type][slot] = NULL;
- --tex_num_images[type];
+ if (img->mem) {
+ thread_scoped_lock device_lock(device_mutex);
+ delete img->mem;
}
+
+ delete img->loader;
+ delete img;
+ images[slot] = NULL;
}
void ImageManager::device_update(Device *device, Scene *scene, Progress &progress)
@@ -999,24 +763,14 @@ void ImageManager::device_update(Device *device, Scene *scene, Progress &progres
}
TaskPool pool;
- for (int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
- for (size_t slot = 0; slot < images[type].size(); slot++) {
- if (!images[type][slot])
- continue;
-
- if (images[type][slot]->users == 0) {
- device_free_image(device, (ImageDataType)type, slot);
- }
- else if (images[type][slot]->need_load) {
- if (!osl_texture_system || images[type][slot]->builtin_data)
- pool.push(function_bind(&ImageManager::device_load_image,
- this,
- device,
- scene,
- (ImageDataType)type,
- slot,
- &progress));
- }
+ for (size_t slot = 0; slot < images.size(); slot++) {
+ Image *img = images[slot];
+ if (img && img->users == 0) {
+ device_free_image(device, slot);
+ }
+ else if (img && img->need_load) {
+ pool.push(
+ function_bind(&ImageManager::device_load_image, this, device, scene, slot, &progress));
}
}
@@ -1025,23 +779,16 @@ void ImageManager::device_update(Device *device, Scene *scene, Progress &progres
need_update = false;
}
-void ImageManager::device_update_slot(Device *device,
- Scene *scene,
- int flat_slot,
- Progress *progress)
+void ImageManager::device_update_slot(Device *device, Scene *scene, int slot, Progress *progress)
{
- ImageDataType type;
- int slot = flattened_slot_to_type_index(flat_slot, &type);
-
- Image *image = images[type][slot];
- assert(image != NULL);
+ Image *img = images[slot];
+ assert(img != NULL);
- if (image->users == 0) {
- device_free_image(device, type, slot);
+ if (img->users == 0) {
+ device_free_image(device, slot);
}
- else if (image->need_load) {
- if (!osl_texture_system || image->builtin_data)
- device_load_image(device, scene, type, slot, progress);
+ else if (img->need_load) {
+ device_load_image(device, scene, slot, progress);
}
}
@@ -1054,22 +801,11 @@ void ImageManager::device_load_builtin(Device *device, Scene *scene, Progress &p
}
TaskPool pool;
- for (int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
- for (size_t slot = 0; slot < images[type].size(); slot++) {
- if (!images[type][slot])
- continue;
-
- if (images[type][slot]->need_load) {
- if (images[type][slot]->builtin_data) {
- pool.push(function_bind(&ImageManager::device_load_image,
- this,
- device,
- scene,
- (ImageDataType)type,
- slot,
- &progress));
- }
- }
+ for (size_t slot = 0; slot < images.size(); slot++) {
+ Image *img = images[slot];
+ if (img && img->need_load && img->builtin) {
+ pool.push(
+ function_bind(&ImageManager::device_load_image, this, device, scene, slot, &progress));
}
}
@@ -1078,31 +814,27 @@ void ImageManager::device_load_builtin(Device *device, Scene *scene, Progress &p
void ImageManager::device_free_builtin(Device *device)
{
- for (int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
- for (size_t slot = 0; slot < images[type].size(); slot++) {
- if (images[type][slot] && images[type][slot]->builtin_data)
- device_free_image(device, (ImageDataType)type, slot);
+ for (size_t slot = 0; slot < images.size(); slot++) {
+ Image *img = images[slot];
+ if (img && img->builtin) {
+ device_free_image(device, slot);
}
}
}
void ImageManager::device_free(Device *device)
{
- for (int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
- for (size_t slot = 0; slot < images[type].size(); slot++) {
- device_free_image(device, (ImageDataType)type, slot);
- }
- images[type].clear();
+ for (size_t slot = 0; slot < images.size(); slot++) {
+ device_free_image(device, slot);
}
+ images.clear();
}
void ImageManager::collect_statistics(RenderStats *stats)
{
- for (int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
- foreach (const Image *image, images[type]) {
- stats->image.textures.add_entry(
- NamedSizeEntry(path_filename(image->filename), image->mem->memory_size()));
- }
+ foreach (const Image *image, images) {
+ stats->image.textures.add_entry(
+ NamedSizeEntry(image->loader->name(), image->mem->memory_size()));
}
}
diff --git a/intern/cycles/render/image.h b/intern/cycles/render/image.h
index bc04a667953..00ab12afd7a 100644
--- a/intern/cycles/render/image.h
+++ b/intern/cycles/render/image.h
@@ -22,92 +22,157 @@
#include "render/colorspace.h"
-#include "util/util_image.h"
#include "util/util_string.h"
#include "util/util_thread.h"
+#include "util/util_transform.h"
#include "util/util_unique_ptr.h"
#include "util/util_vector.h"
CCL_NAMESPACE_BEGIN
class Device;
+class ImageHandle;
+class ImageKey;
+class ImageMetaData;
+class ImageManager;
class Progress;
class RenderStats;
class Scene;
class ColorSpaceProcessor;
-class ImageMetaData {
+/* Image Parameters */
+class ImageParams {
public:
- /* Must be set by image file or builtin callback. */
- bool is_float, is_half;
- int channels;
- size_t width, height, depth;
- bool builtin_free_cache;
-
- /* Automatically set. */
- ImageDataType type;
+ bool animated;
+ InterpolationType interpolation;
+ ExtensionType extension;
+ ImageAlphaType alpha_type;
ustring colorspace;
- bool compress_as_srgb;
+ float frame;
- ImageMetaData()
- : is_float(false),
- is_half(false),
- channels(0),
- width(0),
- height(0),
- depth(0),
- builtin_free_cache(false),
- type((ImageDataType)0),
+ ImageParams()
+ : animated(false),
+ interpolation(INTERPOLATION_LINEAR),
+ extension(EXTENSION_CLIP),
+ alpha_type(IMAGE_ALPHA_AUTO),
colorspace(u_colorspace_raw),
- compress_as_srgb(false)
+ frame(0.0f)
{
}
- bool operator==(const ImageMetaData &other) const
+ bool operator==(const ImageParams &other) const
{
- return is_float == other.is_float && is_half == other.is_half && channels == other.channels &&
- width == other.width && height == other.height && depth == other.depth &&
- type == other.type && colorspace == other.colorspace &&
- compress_as_srgb == other.compress_as_srgb;
+ return (animated == other.animated && interpolation == other.interpolation &&
+ extension == other.extension && alpha_type == other.alpha_type &&
+ colorspace == other.colorspace && frame == other.frame);
}
};
+/* Image MetaData
+ *
+ * Information about the image that is available before the image pixels are loaded. */
+class ImageMetaData {
+ public:
+ /* Set by ImageLoader.load_metadata(). */
+ int channels;
+ size_t width, height, depth;
+ ImageDataType type;
+
+ /* Optional color space, defaults to raw. */
+ ustring colorspace;
+ const char *colorspace_file_format;
+
+ /* Optional transform for 3D images. */
+ bool use_transform_3d;
+ Transform transform_3d;
+
+ /* Automatically set. */
+ bool compress_as_srgb;
+
+ ImageMetaData();
+ bool operator==(const ImageMetaData &other) const;
+ bool is_float() const;
+ void detect_colorspace();
+};
+
+/* Image loader base class, that can be subclassed to load image data
+ * from custom sources (file, memory, procedurally generated, etc). */
+class ImageLoader {
+ public:
+ ImageLoader();
+ virtual ~ImageLoader(){};
+
+ /* Load metadata without actual image yet, should be fast. */
+ virtual bool load_metadata(ImageMetaData &metadata) = 0;
+
+ /* Load actual image contents. */
+ virtual bool load_pixels(const ImageMetaData &metadata,
+ void *pixels,
+ const size_t pixels_size,
+ const bool associate_alpha) = 0;
+
+ /* Name for logs and stats. */
+ virtual string name() const = 0;
+
+ /* Optional for OSL texture cache. */
+ virtual ustring osl_filepath() const;
+
+ /* Free any memory used for loading metadata and pixels. */
+ virtual void cleanup(){};
+
+ /* Compare avoid loading the same image multiple times. */
+ virtual bool equals(const ImageLoader &other) const = 0;
+ static bool equals(const ImageLoader *a, const ImageLoader *b);
+
+ /* Work around for no RTTI. */
+};
+
+/* Image Handle
+ *
+ * Access handle for image in the image manager. Multiple shader nodes may
+ * share the same image, and this class handles reference counting for that. */
+class ImageHandle {
+ public:
+ ImageHandle();
+ ImageHandle(const ImageHandle &other);
+ ImageHandle &operator=(const ImageHandle &other);
+ ~ImageHandle();
+
+ bool operator==(const ImageHandle &other) const;
+
+ void clear();
+
+ bool empty();
+ int num_tiles();
+
+ ImageMetaData metadata();
+ int svm_slot(const int tile_index = 0) const;
+ device_texture *image_memory(const int tile_index = 0) const;
+
+ protected:
+ vector<int> tile_slots;
+ ImageManager *manager;
+
+ friend class ImageManager;
+};
+
+/* Image Manager
+ *
+ * Handles loading and storage of all images in the scene. This includes 2D
+ * texture images and 3D volume images. */
class ImageManager {
public:
explicit ImageManager(const DeviceInfo &info);
~ImageManager();
- int add_image(const string &filename,
- void *builtin_data,
- bool animated,
- float frame,
- InterpolationType interpolation,
- ExtensionType extension,
- ImageAlphaType alpha_type,
- ustring colorspace,
- ImageMetaData &metadata);
- void add_image_user(int flat_slot);
- void remove_image(int flat_slot);
- void remove_image(const string &filename,
- void *builtin_data,
- InterpolationType interpolation,
- ExtensionType extension,
- ImageAlphaType alpha_type,
- ustring colorspace);
- void tag_reload_image(const string &filename,
- void *builtin_data,
- InterpolationType interpolation,
- ExtensionType extension,
- ImageAlphaType alpha_type,
- ustring colorspace);
- bool get_image_metadata(const string &filename,
- void *builtin_data,
- ustring colorspace,
- ImageMetaData &metadata);
- bool get_image_metadata(int flat_slot, ImageMetaData &metadata);
+ ImageHandle add_image(const string &filename, const ImageParams &params);
+ ImageHandle add_image(const string &filename,
+ const ImageParams &params,
+ const vector<int> &tiles);
+ ImageHandle add_image(ImageLoader *loader, const ImageParams &params);
void device_update(Device *device, Scene *scene, Progress &progress);
- void device_update_slot(Device *device, Scene *scene, int flat_slot, Progress *progress);
+ void device_update_slot(Device *device, Scene *scene, int slot, Progress *progress);
void device_free(Device *device);
void device_load_builtin(Device *device, Scene *scene, Progress &progress);
@@ -116,78 +181,49 @@ class ImageManager {
void set_osl_texture_system(void *texture_system);
bool set_animation_frame_update(int frame);
- device_memory *image_memory(int flat_slot);
-
void collect_statistics(RenderStats *stats);
bool need_update;
- /* NOTE: Here pixels_size is a size of storage, which equals to
- * width * height * depth.
- * Use this to avoid some nasty memory corruptions.
- */
- function<void(const string &filename, void *data, ImageMetaData &metadata)>
- builtin_image_info_cb;
- function<bool(const string &filename,
- void *data,
- int tile,
- unsigned char *pixels,
- const size_t pixels_size,
- const bool associate_alpha,
- const bool free_cache)>
- builtin_image_pixels_cb;
- function<bool(const string &filename,
- void *data,
- int tile,
- float *pixels,
- const size_t pixels_size,
- const bool associate_alpha,
- const bool free_cache)>
- builtin_image_float_pixels_cb;
-
struct Image {
- string filename;
- void *builtin_data;
+ ImageParams params;
ImageMetaData metadata;
+ ImageLoader *loader;
- ustring colorspace;
- ImageAlphaType alpha_type;
- bool need_load;
- bool animated;
float frame;
- InterpolationType interpolation;
- ExtensionType extension;
+ bool need_metadata;
+ bool need_load;
+ bool builtin;
string mem_name;
- device_memory *mem;
+ device_texture *mem;
int users;
+ thread_mutex mutex;
};
private:
- int tex_num_images[IMAGE_DATA_NUM_TYPES];
- int max_num_images;
bool has_half_images;
thread_mutex device_mutex;
int animation_frame;
- vector<Image *> images[IMAGE_DATA_NUM_TYPES];
+ vector<Image *> images;
void *osl_texture_system;
- bool file_load_image_generic(Image *img, unique_ptr<ImageInput> *in);
+ int add_image_slot(ImageLoader *loader, const ImageParams &params, const bool builtin);
+ void add_image_user(int slot);
+ void remove_image_user(int slot);
+
+ void load_image_metadata(Image *img);
- template<TypeDesc::BASETYPE FileFormat, typename StorageType, typename DeviceType>
- bool file_load_image(Image *img,
- ImageDataType type,
- int texture_limit,
- device_vector<DeviceType> &tex_img);
+ template<TypeDesc::BASETYPE FileFormat, typename StorageType>
+ bool file_load_image(Image *img, int texture_limit);
- void metadata_detect_colorspace(ImageMetaData &metadata, const char *file_format);
+ void device_load_image(Device *device, Scene *scene, int slot, Progress *progress);
+ void device_free_image(Device *device, int slot);
- void device_load_image(
- Device *device, Scene *scene, ImageDataType type, int slot, Progress *progress);
- void device_free_image(Device *device, ImageDataType type, int slot);
+ friend class ImageHandle;
};
CCL_NAMESPACE_END
diff --git a/intern/cycles/render/image_oiio.cpp b/intern/cycles/render/image_oiio.cpp
new file mode 100644
index 00000000000..c4f95c6b4bc
--- /dev/null
+++ b/intern/cycles/render/image_oiio.cpp
@@ -0,0 +1,236 @@
+/*
+ * Copyright 2011-2020 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "render/image_oiio.h"
+
+#include "util/util_image.h"
+#include "util/util_logging.h"
+#include "util/util_path.h"
+
+CCL_NAMESPACE_BEGIN
+
+OIIOImageLoader::OIIOImageLoader(const string &filepath) : filepath(filepath)
+{
+}
+
+OIIOImageLoader::~OIIOImageLoader()
+{
+}
+
+bool OIIOImageLoader::load_metadata(ImageMetaData &metadata)
+{
+ /* Perform preliminary checks, with meaningful logging. */
+ if (!path_exists(filepath.string())) {
+ VLOG(1) << "File '" << filepath.string() << "' does not exist.";
+ return false;
+ }
+ if (path_is_directory(filepath.string())) {
+ VLOG(1) << "File '" << filepath.string() << "' is a directory, can't use as image.";
+ return false;
+ }
+
+ unique_ptr<ImageInput> in(ImageInput::create(filepath.string()));
+
+ if (!in) {
+ return false;
+ }
+
+ ImageSpec spec;
+ if (!in->open(filepath.string(), spec)) {
+ return false;
+ }
+
+ metadata.width = spec.width;
+ metadata.height = spec.height;
+ metadata.depth = spec.depth;
+ metadata.compress_as_srgb = false;
+
+ /* Check the main format, and channel formats. */
+ size_t channel_size = spec.format.basesize();
+
+ bool is_float = false;
+ bool is_half = false;
+
+ if (spec.format.is_floating_point()) {
+ is_float = true;
+ }
+
+ for (size_t channel = 0; channel < spec.channelformats.size(); channel++) {
+ channel_size = max(channel_size, spec.channelformats[channel].basesize());
+ if (spec.channelformats[channel].is_floating_point()) {
+ is_float = true;
+ }
+ }
+
+ /* check if it's half float */
+ if (spec.format == TypeDesc::HALF) {
+ is_half = true;
+ }
+
+ /* set type and channels */
+ metadata.channels = spec.nchannels;
+
+ if (is_half) {
+ metadata.type = (metadata.channels > 1) ? IMAGE_DATA_TYPE_HALF4 : IMAGE_DATA_TYPE_HALF;
+ }
+ else if (is_float) {
+ metadata.type = (metadata.channels > 1) ? IMAGE_DATA_TYPE_FLOAT4 : IMAGE_DATA_TYPE_FLOAT;
+ }
+ else if (spec.format == TypeDesc::USHORT) {
+ metadata.type = (metadata.channels > 1) ? IMAGE_DATA_TYPE_USHORT4 : IMAGE_DATA_TYPE_USHORT;
+ }
+ else {
+ metadata.type = (metadata.channels > 1) ? IMAGE_DATA_TYPE_BYTE4 : IMAGE_DATA_TYPE_BYTE;
+ }
+
+ metadata.colorspace_file_format = in->format_name();
+
+ in->close();
+
+ return true;
+}
+
+template<TypeDesc::BASETYPE FileFormat, typename StorageType>
+static void oiio_load_pixels(const ImageMetaData &metadata,
+ const unique_ptr<ImageInput> &in,
+ StorageType *pixels)
+{
+ const int width = metadata.width;
+ const int height = metadata.height;
+ const int depth = metadata.depth;
+ const int components = metadata.channels;
+
+ /* Read pixels through OpenImageIO. */
+ StorageType *readpixels = pixels;
+ vector<StorageType> tmppixels;
+ if (components > 4) {
+ tmppixels.resize(((size_t)width) * height * components);
+ readpixels = &tmppixels[0];
+ }
+
+ if (depth <= 1) {
+ size_t scanlinesize = ((size_t)width) * components * sizeof(StorageType);
+ in->read_image(FileFormat,
+ (uchar *)readpixels + (height - 1) * scanlinesize,
+ AutoStride,
+ -scanlinesize,
+ AutoStride);
+ }
+ else {
+ in->read_image(FileFormat, (uchar *)readpixels);
+ }
+
+ if (components > 4) {
+ size_t dimensions = ((size_t)width) * height;
+ for (size_t i = dimensions - 1, pixel = 0; pixel < dimensions; pixel++, i--) {
+ pixels[i * 4 + 3] = tmppixels[i * components + 3];
+ pixels[i * 4 + 2] = tmppixels[i * components + 2];
+ pixels[i * 4 + 1] = tmppixels[i * components + 1];
+ pixels[i * 4 + 0] = tmppixels[i * components + 0];
+ }
+ tmppixels.clear();
+ }
+
+ /* CMYK to RGBA. */
+ const bool cmyk = strcmp(in->format_name(), "jpeg") == 0 && components == 4;
+ if (cmyk) {
+ const StorageType one = util_image_cast_from_float<StorageType>(1.0f);
+
+ const size_t num_pixels = ((size_t)width) * height * depth;
+ for (size_t i = num_pixels - 1, pixel = 0; pixel < num_pixels; pixel++, i--) {
+ float c = util_image_cast_to_float(pixels[i * 4 + 0]);
+ float m = util_image_cast_to_float(pixels[i * 4 + 1]);
+ float y = util_image_cast_to_float(pixels[i * 4 + 2]);
+ float k = util_image_cast_to_float(pixels[i * 4 + 3]);
+ pixels[i * 4 + 0] = util_image_cast_from_float<StorageType>((1.0f - c) * (1.0f - k));
+ pixels[i * 4 + 1] = util_image_cast_from_float<StorageType>((1.0f - m) * (1.0f - k));
+ pixels[i * 4 + 2] = util_image_cast_from_float<StorageType>((1.0f - y) * (1.0f - k));
+ pixels[i * 4 + 3] = one;
+ }
+ }
+}
+
+bool OIIOImageLoader::load_pixels(const ImageMetaData &metadata,
+ void *pixels,
+ const size_t,
+ const bool associate_alpha)
+{
+ unique_ptr<ImageInput> in = NULL;
+
+ /* NOTE: Error logging is done in meta data acquisition. */
+ if (!path_exists(filepath.string()) || path_is_directory(filepath.string())) {
+ return false;
+ }
+
+ /* load image from file through OIIO */
+ in = unique_ptr<ImageInput>(ImageInput::create(filepath.string()));
+ if (!in) {
+ return false;
+ }
+
+ ImageSpec spec = ImageSpec();
+ ImageSpec config = ImageSpec();
+
+ if (!associate_alpha) {
+ config.attribute("oiio:UnassociatedAlpha", 1);
+ }
+
+ if (!in->open(filepath.string(), spec, config)) {
+ return false;
+ }
+
+ switch (metadata.type) {
+ case IMAGE_DATA_TYPE_BYTE:
+ case IMAGE_DATA_TYPE_BYTE4:
+ oiio_load_pixels<TypeDesc::UINT8, uchar>(metadata, in, (uchar *)pixels);
+ break;
+ case IMAGE_DATA_TYPE_USHORT:
+ case IMAGE_DATA_TYPE_USHORT4:
+ oiio_load_pixels<TypeDesc::USHORT, uint16_t>(metadata, in, (uint16_t *)pixels);
+ break;
+ case IMAGE_DATA_TYPE_HALF:
+ case IMAGE_DATA_TYPE_HALF4:
+ oiio_load_pixels<TypeDesc::HALF, half>(metadata, in, (half *)pixels);
+ break;
+ case IMAGE_DATA_TYPE_FLOAT:
+ case IMAGE_DATA_TYPE_FLOAT4:
+ oiio_load_pixels<TypeDesc::FLOAT, float>(metadata, in, (float *)pixels);
+ break;
+ case IMAGE_DATA_NUM_TYPES:
+ break;
+ }
+
+ in->close();
+ return true;
+}
+
+string OIIOImageLoader::name() const
+{
+ return path_filename(filepath.string());
+}
+
+ustring OIIOImageLoader::osl_filepath() const
+{
+ return filepath;
+}
+
+bool OIIOImageLoader::equals(const ImageLoader &other) const
+{
+ const OIIOImageLoader &other_loader = (const OIIOImageLoader &)other;
+ return filepath == other_loader.filepath;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/render/image_oiio.h b/intern/cycles/render/image_oiio.h
new file mode 100644
index 00000000000..a234b968557
--- /dev/null
+++ b/intern/cycles/render/image_oiio.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2011-2020 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __IMAGE_OIIO__
+#define __IMAGE_OIIO__
+
+#include "render/image.h"
+
+CCL_NAMESPACE_BEGIN
+
+class OIIOImageLoader : public ImageLoader {
+ public:
+ OIIOImageLoader(const string &filepath);
+ ~OIIOImageLoader();
+
+ bool load_metadata(ImageMetaData &metadata) override;
+
+ bool load_pixels(const ImageMetaData &metadata,
+ void *pixels,
+ const size_t pixels_size,
+ const bool associate_alpha) override;
+
+ string name() const override;
+
+ ustring osl_filepath() const override;
+
+ bool equals(const ImageLoader &other) const override;
+
+ protected:
+ ustring filepath;
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __IMAGE_OIIO__ */
diff --git a/intern/cycles/render/image_vdb.cpp b/intern/cycles/render/image_vdb.cpp
new file mode 100644
index 00000000000..500131c2d84
--- /dev/null
+++ b/intern/cycles/render/image_vdb.cpp
@@ -0,0 +1,188 @@
+/*
+ * Copyright 2011-2020 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "render/image_vdb.h"
+
+#ifdef WITH_OPENVDB
+# include <openvdb/openvdb.h>
+# include <openvdb/tools/Dense.h>
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+VDBImageLoader::VDBImageLoader(const string &grid_name) : grid_name(grid_name)
+{
+}
+
+VDBImageLoader::~VDBImageLoader()
+{
+}
+
+bool VDBImageLoader::load_metadata(ImageMetaData &metadata)
+{
+#ifdef WITH_OPENVDB
+ if (!grid) {
+ return false;
+ }
+
+ bbox = grid->evalActiveVoxelBoundingBox();
+ if (bbox.empty()) {
+ return false;
+ }
+
+ /* Set dimensions. */
+ openvdb::Coord dim = bbox.dim();
+ openvdb::Coord min = bbox.min();
+ metadata.width = dim.x();
+ metadata.height = dim.y();
+ metadata.depth = dim.z();
+
+ /* Set data type. */
+ if (grid->isType<openvdb::FloatGrid>()) {
+ metadata.channels = 1;
+ }
+ else if (grid->isType<openvdb::Vec3fGrid>()) {
+ metadata.channels = 3;
+ }
+ else if (grid->isType<openvdb::BoolGrid>()) {
+ metadata.channels = 1;
+ }
+ else if (grid->isType<openvdb::DoubleGrid>()) {
+ metadata.channels = 1;
+ }
+ else if (grid->isType<openvdb::Int32Grid>()) {
+ metadata.channels = 1;
+ }
+ else if (grid->isType<openvdb::Int64Grid>()) {
+ metadata.channels = 1;
+ }
+ else if (grid->isType<openvdb::Vec3IGrid>()) {
+ metadata.channels = 3;
+ }
+ else if (grid->isType<openvdb::Vec3dGrid>()) {
+ metadata.channels = 3;
+ }
+ else if (grid->isType<openvdb::MaskGrid>()) {
+ metadata.channels = 1;
+ }
+ else {
+ return false;
+ }
+
+ if (metadata.channels == 1) {
+ metadata.type = IMAGE_DATA_TYPE_FLOAT;
+ }
+ else {
+ metadata.type = IMAGE_DATA_TYPE_FLOAT4;
+ }
+
+ /* Set transform from object space to voxel index. */
+ openvdb::math::Mat4f grid_matrix = grid->transform().baseMap()->getAffineMap()->getMat4();
+ Transform index_to_object;
+ for (int col = 0; col < 4; col++) {
+ for (int row = 0; row < 3; row++) {
+ index_to_object[row][col] = (float)grid_matrix[col][row];
+ }
+ }
+
+ Transform texture_to_index = transform_translate(min.x(), min.y(), min.z()) *
+ transform_scale(dim.x(), dim.y(), dim.z());
+
+ metadata.transform_3d = transform_inverse(index_to_object * texture_to_index);
+ metadata.use_transform_3d = true;
+
+ return true;
+#else
+ (void)metadata;
+ return false;
+#endif
+}
+
+bool VDBImageLoader::load_pixels(const ImageMetaData &, void *pixels, const size_t, const bool)
+{
+#ifdef WITH_OPENVDB
+ if (grid->isType<openvdb::FloatGrid>()) {
+ openvdb::tools::Dense<float, openvdb::tools::LayoutXYZ> dense(bbox, (float *)pixels);
+ openvdb::tools::copyToDense(*openvdb::gridConstPtrCast<openvdb::FloatGrid>(grid), dense);
+ }
+ else if (grid->isType<openvdb::Vec3fGrid>()) {
+ openvdb::tools::Dense<openvdb::Vec3f, openvdb::tools::LayoutXYZ> dense(
+ bbox, (openvdb::Vec3f *)pixels);
+ openvdb::tools::copyToDense(*openvdb::gridConstPtrCast<openvdb::Vec3fGrid>(grid), dense);
+ }
+ else if (grid->isType<openvdb::BoolGrid>()) {
+ openvdb::tools::Dense<float, openvdb::tools::LayoutXYZ> dense(bbox, (float *)pixels);
+ openvdb::tools::copyToDense(*openvdb::gridConstPtrCast<openvdb::BoolGrid>(grid), dense);
+ }
+ else if (grid->isType<openvdb::DoubleGrid>()) {
+ openvdb::tools::Dense<float, openvdb::tools::LayoutXYZ> dense(bbox, (float *)pixels);
+ openvdb::tools::copyToDense(*openvdb::gridConstPtrCast<openvdb::DoubleGrid>(grid), dense);
+ }
+ else if (grid->isType<openvdb::Int32Grid>()) {
+ openvdb::tools::Dense<float, openvdb::tools::LayoutXYZ> dense(bbox, (float *)pixels);
+ openvdb::tools::copyToDense(*openvdb::gridConstPtrCast<openvdb::Int32Grid>(grid), dense);
+ }
+ else if (grid->isType<openvdb::Int64Grid>()) {
+ openvdb::tools::Dense<float, openvdb::tools::LayoutXYZ> dense(bbox, (float *)pixels);
+ openvdb::tools::copyToDense(*openvdb::gridConstPtrCast<openvdb::Int64Grid>(grid), dense);
+ }
+ else if (grid->isType<openvdb::Vec3IGrid>()) {
+ openvdb::tools::Dense<openvdb::Vec3f, openvdb::tools::LayoutXYZ> dense(
+ bbox, (openvdb::Vec3f *)pixels);
+ openvdb::tools::copyToDense(*openvdb::gridConstPtrCast<openvdb::Vec3IGrid>(grid), dense);
+ }
+ else if (grid->isType<openvdb::Vec3dGrid>()) {
+ openvdb::tools::Dense<openvdb::Vec3f, openvdb::tools::LayoutXYZ> dense(
+ bbox, (openvdb::Vec3f *)pixels);
+ openvdb::tools::copyToDense(*openvdb::gridConstPtrCast<openvdb::Vec3dGrid>(grid), dense);
+ }
+ else if (grid->isType<openvdb::MaskGrid>()) {
+ openvdb::tools::Dense<float, openvdb::tools::LayoutXYZ> dense(bbox, (float *)pixels);
+ openvdb::tools::copyToDense(*openvdb::gridConstPtrCast<openvdb::MaskGrid>(grid), dense);
+ }
+
+ return true;
+#else
+ (void)pixels;
+ return false;
+#endif
+}
+
+string VDBImageLoader::name() const
+{
+ return grid_name;
+}
+
+bool VDBImageLoader::equals(const ImageLoader &other) const
+{
+#ifdef WITH_OPENVDB
+ const VDBImageLoader &other_loader = (const VDBImageLoader &)other;
+ return grid == other_loader.grid;
+#else
+ (void)other;
+ return true;
+#endif
+}
+
+void VDBImageLoader::cleanup()
+{
+#ifdef WITH_OPENVDB
+ /* Free OpenVDB grid memory as soon as we can. */
+ grid.reset();
+#endif
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/render/image_vdb.h b/intern/cycles/render/image_vdb.h
new file mode 100644
index 00000000000..7dec63b11e6
--- /dev/null
+++ b/intern/cycles/render/image_vdb.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2011-2020 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __IMAGE_VDB__
+#define __IMAGE_VDB__
+
+#ifdef WITH_OPENVDB
+# include <openvdb/openvdb.h>
+#endif
+
+#include "render/image.h"
+
+CCL_NAMESPACE_BEGIN
+
+class VDBImageLoader : public ImageLoader {
+ public:
+ VDBImageLoader(const string &grid_name);
+ ~VDBImageLoader();
+
+ virtual bool load_metadata(ImageMetaData &metadata) override;
+
+ virtual bool load_pixels(const ImageMetaData &metadata,
+ void *pixels,
+ const size_t pixels_size,
+ const bool associate_alpha) override;
+
+ virtual string name() const override;
+
+ virtual bool equals(const ImageLoader &other) const override;
+
+ virtual void cleanup() override;
+
+ protected:
+ string grid_name;
+#ifdef WITH_OPENVDB
+ openvdb::GridBase::ConstPtr grid;
+ openvdb::CoordBBox bbox;
+#endif
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __IMAGE_VDB__ */
diff --git a/intern/cycles/render/integrator.cpp b/intern/cycles/render/integrator.cpp
index 530c32106b7..2f9d088899e 100644
--- a/intern/cycles/render/integrator.cpp
+++ b/intern/cycles/render/integrator.cpp
@@ -14,17 +14,21 @@
* limitations under the License.
*/
+#include "render/integrator.h"
#include "device/device.h"
#include "render/background.h"
-#include "render/integrator.h"
#include "render/film.h"
+#include "render/jitter.h"
#include "render/light.h"
#include "render/scene.h"
#include "render/shader.h"
#include "render/sobol.h"
+#include "kernel/kernel_types.h"
+
#include "util/util_foreach.h"
#include "util/util_hash.h"
+#include "util/util_logging.h"
CCL_NAMESPACE_BEGIN
@@ -46,7 +50,7 @@ NODE_DEFINE(Integrator)
SOCKET_INT(ao_bounces, "AO Bounces", 0);
SOCKET_INT(volume_max_steps, "Volume Max Steps", 1024);
- SOCKET_FLOAT(volume_step_size, "Volume Step Size", 0.1f);
+ SOCKET_FLOAT(volume_step_rate, "Volume Step Rate", 1.0f);
SOCKET_BOOLEAN(caustics_reflective, "Reflective Caustics", true);
SOCKET_BOOLEAN(caustics_refractive, "Refractive Caustics", true);
@@ -66,6 +70,9 @@ NODE_DEFINE(Integrator)
SOCKET_INT(volume_samples, "Volume Samples", 1);
SOCKET_INT(start_sample, "Start Sample", 0);
+ SOCKET_FLOAT(adaptive_threshold, "Adaptive Threshold", 0.0f);
+ SOCKET_INT(adaptive_min_samples, "Adaptive Min Samples", 0);
+
SOCKET_BOOLEAN(sample_all_lights_direct, "Sample All Lights Direct", true);
SOCKET_BOOLEAN(sample_all_lights_indirect, "Sample All Lights Indirect", true);
SOCKET_FLOAT(light_sampling_threshold, "Light Sampling Threshold", 0.05f);
@@ -78,6 +85,7 @@ NODE_DEFINE(Integrator)
static NodeEnum sampling_pattern_enum;
sampling_pattern_enum.insert("sobol", SAMPLING_PATTERN_SOBOL);
sampling_pattern_enum.insert("cmj", SAMPLING_PATTERN_CMJ);
+ sampling_pattern_enum.insert("pmj", SAMPLING_PATTERN_PMJ);
SOCKET_ENUM(sampling_pattern, "Sampling Pattern", sampling_pattern_enum, SAMPLING_PATTERN_SOBOL);
return type;
@@ -135,7 +143,7 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene
}
kintegrator->volume_max_steps = volume_max_steps;
- kintegrator->volume_step_size = volume_step_size;
+ kintegrator->volume_step_rate = volume_step_rate;
kintegrator->caustics_reflective = caustics_reflective;
kintegrator->caustics_refractive = caustics_refractive;
@@ -174,6 +182,22 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene
kintegrator->sampling_pattern = sampling_pattern;
kintegrator->aa_samples = aa_samples;
+ if (aa_samples > 0 && adaptive_min_samples == 0) {
+ kintegrator->adaptive_min_samples = max(4, (int)sqrtf(aa_samples));
+ VLOG(1) << "Cycles adaptive sampling: automatic min samples = "
+ << kintegrator->adaptive_min_samples;
+ }
+ else {
+ kintegrator->adaptive_min_samples = max(4, adaptive_min_samples);
+ }
+ if (aa_samples > 0 && adaptive_threshold == 0.0f) {
+ kintegrator->adaptive_threshold = max(0.001f, 1.0f / (float)aa_samples);
+ VLOG(1) << "Cycles adaptive sampling: automatic threshold = "
+ << kintegrator->adaptive_threshold;
+ }
+ else {
+ kintegrator->adaptive_threshold = adaptive_threshold;
+ }
if (light_sampling_threshold > 0.0f) {
kintegrator->light_inv_rr_threshold = 1.0f / light_sampling_threshold;
@@ -203,18 +227,34 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene
int dimensions = PRNG_BASE_NUM + max_samples * PRNG_BOUNCE_NUM;
dimensions = min(dimensions, SOBOL_MAX_DIMENSIONS);
- uint *directions = dscene->sobol_directions.alloc(SOBOL_BITS * dimensions);
+ if (sampling_pattern == SAMPLING_PATTERN_SOBOL) {
+ uint *directions = dscene->sample_pattern_lut.alloc(SOBOL_BITS * dimensions);
- sobol_generate_direction_vectors((uint(*)[SOBOL_BITS])directions, dimensions);
+ sobol_generate_direction_vectors((uint(*)[SOBOL_BITS])directions, dimensions);
- dscene->sobol_directions.copy_to_device();
+ dscene->sample_pattern_lut.copy_to_device();
+ }
+ else {
+ constexpr int sequence_size = NUM_PMJ_SAMPLES;
+ constexpr int num_sequences = NUM_PMJ_PATTERNS;
+ float2 *directions = (float2 *)dscene->sample_pattern_lut.alloc(sequence_size * num_sequences *
+ 2);
+ TaskPool pool;
+ for (int j = 0; j < num_sequences; ++j) {
+ float2 *sequence = directions + j * sequence_size;
+ pool.push(
+ function_bind(&progressive_multi_jitter_02_generate_2D, sequence, sequence_size, j));
+ }
+ pool.wait_work();
+ dscene->sample_pattern_lut.copy_to_device();
+ }
need_update = false;
}
void Integrator::device_free(Device *, DeviceScene *dscene)
{
- dscene->sobol_directions.free();
+ dscene->sample_pattern_lut.free();
}
bool Integrator::modified(const Integrator &integrator)
diff --git a/intern/cycles/render/integrator.h b/intern/cycles/render/integrator.h
index 32d84c27072..9804caebe6e 100644
--- a/intern/cycles/render/integrator.h
+++ b/intern/cycles/render/integrator.h
@@ -45,7 +45,7 @@ class Integrator : public Node {
int ao_bounces;
int volume_max_steps;
- float volume_step_size;
+ float volume_step_rate;
bool caustics_reflective;
bool caustics_refractive;
@@ -75,6 +75,9 @@ class Integrator : public Node {
bool sample_all_lights_indirect;
float light_sampling_threshold;
+ int adaptive_min_samples;
+ float adaptive_threshold;
+
enum Method {
BRANCHED_PATH = 0,
PATH = 1,
diff --git a/intern/cycles/render/jitter.cpp b/intern/cycles/render/jitter.cpp
new file mode 100644
index 00000000000..fc47b0e8f0a
--- /dev/null
+++ b/intern/cycles/render/jitter.cpp
@@ -0,0 +1,287 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* This file is based on "Progressive Multi-Jittered Sample Sequences"
+ * by Per Christensen, Andrew Kensler and Charlie Kilpatrick.
+ * http://graphics.pixar.com/library/ProgressiveMultiJitteredSampling/paper.pdf
+ *
+ * Performance can be improved in the future by implementing the new
+ * algorithm from Matt Pharr in http://jcgt.org/published/0008/01/04/
+ * "Efficient Generation of Points that Satisfy Two-Dimensional Elementary Intervals"
+ */
+
+#include "render/jitter.h"
+
+#include <math.h>
+#include <vector>
+
+CCL_NAMESPACE_BEGIN
+
+static uint cmj_hash(uint i, uint p)
+{
+ i ^= p;
+ i ^= i >> 17;
+ i ^= i >> 10;
+ i *= 0xb36534e5;
+ i ^= i >> 12;
+ i ^= i >> 21;
+ i *= 0x93fc4795;
+ i ^= 0xdf6e307f;
+ i ^= i >> 17;
+ i *= 1 | p >> 18;
+
+ return i;
+}
+
+static float cmj_randfloat(uint i, uint p)
+{
+ return cmj_hash(i, p) * (1.0f / 4294967808.0f);
+}
+
+class PMJ_Generator {
+ public:
+ static void generate_2D(float2 points[], int size, int rng_seed_in)
+ {
+ PMJ_Generator g(rng_seed_in);
+ points[0].x = g.rnd();
+ points[0].y = g.rnd();
+ int N = 1;
+ while (N < size) {
+ g.extend_sequence_even(points, N);
+ g.extend_sequence_odd(points, 2 * N);
+ N = 4 * N;
+ }
+ }
+
+ protected:
+ PMJ_Generator(int rnd_seed_in) : num_samples(1), rnd_index(2), rnd_seed(rnd_seed_in)
+ {
+ }
+
+ float rnd()
+ {
+ return cmj_randfloat(++rnd_index, rnd_seed);
+ }
+
+ virtual void mark_occupied_strata(float2 points[], int N)
+ {
+ int NN = 2 * N;
+ for (int s = 0; s < NN; ++s) {
+ occupied1Dx[s] = occupied1Dy[s] = false;
+ }
+ for (int s = 0; s < N; ++s) {
+ int xstratum = (int)(NN * points[s].x);
+ int ystratum = (int)(NN * points[s].y);
+ occupied1Dx[xstratum] = true;
+ occupied1Dy[ystratum] = true;
+ }
+ }
+
+ virtual void generate_sample_point(
+ float2 points[], float i, float j, float xhalf, float yhalf, int n, int N)
+ {
+ int NN = 2 * N;
+ float2 pt;
+ int xstratum, ystratum;
+ do {
+ pt.x = (i + 0.5f * (xhalf + rnd())) / n;
+ xstratum = (int)(NN * pt.x);
+ } while (occupied1Dx[xstratum]);
+ do {
+ pt.y = (j + 0.5f * (yhalf + rnd())) / n;
+ ystratum = (int)(NN * pt.y);
+ } while (occupied1Dy[ystratum]);
+ occupied1Dx[xstratum] = true;
+ occupied1Dy[ystratum] = true;
+ points[num_samples] = pt;
+ ++num_samples;
+ }
+
+ void extend_sequence_even(float2 points[], int N)
+ {
+ int n = (int)sqrtf(N);
+ occupied1Dx.resize(2 * N);
+ occupied1Dy.resize(2 * N);
+ mark_occupied_strata(points, N);
+ for (int s = 0; s < N; ++s) {
+ float2 oldpt = points[s];
+ float i = floorf(n * oldpt.x);
+ float j = floorf(n * oldpt.y);
+ float xhalf = floorf(2.0f * (n * oldpt.x - i));
+ float yhalf = floorf(2.0f * (n * oldpt.y - j));
+ xhalf = 1.0f - xhalf;
+ yhalf = 1.0f - yhalf;
+ generate_sample_point(points, i, j, xhalf, yhalf, n, N);
+ }
+ }
+
+ void extend_sequence_odd(float2 points[], int N)
+ {
+ int n = (int)sqrtf(N / 2);
+ occupied1Dx.resize(2 * N);
+ occupied1Dy.resize(2 * N);
+ mark_occupied_strata(points, N);
+ std::vector<float> xhalves(N / 2);
+ std::vector<float> yhalves(N / 2);
+ for (int s = 0; s < N / 2; ++s) {
+ float2 oldpt = points[s];
+ float i = floorf(n * oldpt.x);
+ float j = floorf(n * oldpt.y);
+ float xhalf = floorf(2.0f * (n * oldpt.x - i));
+ float yhalf = floorf(2.0f * (n * oldpt.y - j));
+ if (rnd() > 0.5f) {
+ xhalf = 1.0f - xhalf;
+ }
+ else {
+ yhalf = 1.0f - yhalf;
+ }
+ xhalves[s] = xhalf;
+ yhalves[s] = yhalf;
+ generate_sample_point(points, i, j, xhalf, yhalf, n, N);
+ }
+ for (int s = 0; s < N / 2; ++s) {
+ float2 oldpt = points[s];
+ float i = floorf(n * oldpt.x);
+ float j = floorf(n * oldpt.y);
+ float xhalf = 1.0f - xhalves[s];
+ float yhalf = 1.0f - yhalves[s];
+ generate_sample_point(points, i, j, xhalf, yhalf, n, N);
+ }
+ }
+
+ std::vector<bool> occupied1Dx, occupied1Dy;
+ int num_samples;
+ int rnd_index, rnd_seed;
+};
+
+class PMJ02_Generator : public PMJ_Generator {
+ protected:
+ void generate_sample_point(
+ float2 points[], float i, float j, float xhalf, float yhalf, int n, int N) override
+ {
+ int NN = 2 * N;
+ float2 pt;
+ do {
+ pt.x = (i + 0.5f * (xhalf + rnd())) / n;
+ pt.y = (j + 0.5f * (yhalf + rnd())) / n;
+ } while (is_occupied(pt, NN));
+ mark_occupied_strata1(pt, NN);
+ points[num_samples] = pt;
+ ++num_samples;
+ }
+
+ void mark_occupied_strata(float2 points[], int N) override
+ {
+ int NN = 2 * N;
+ int num_shapes = (int)log2f(NN) + 1;
+ occupiedStrata.resize(num_shapes);
+ for (int shape = 0; shape < num_shapes; ++shape) {
+ occupiedStrata[shape].resize(NN);
+ for (int n = 0; n < NN; ++n) {
+ occupiedStrata[shape][n] = false;
+ }
+ }
+ for (int s = 0; s < N; ++s) {
+ mark_occupied_strata1(points[s], NN);
+ }
+ }
+
+ void mark_occupied_strata1(float2 pt, int NN)
+ {
+ int shape = 0;
+ int xdivs = NN;
+ int ydivs = 1;
+ do {
+ int xstratum = (int)(xdivs * pt.x);
+ int ystratum = (int)(ydivs * pt.y);
+ size_t index = ystratum * xdivs + xstratum;
+ assert(index < NN);
+ occupiedStrata[shape][index] = true;
+ shape = shape + 1;
+ xdivs = xdivs / 2;
+ ydivs = ydivs * 2;
+ } while (xdivs > 0);
+ }
+
+ bool is_occupied(float2 pt, int NN)
+ {
+ int shape = 0;
+ int xdivs = NN;
+ int ydivs = 1;
+ do {
+ int xstratum = (int)(xdivs * pt.x);
+ int ystratum = (int)(ydivs * pt.y);
+ size_t index = ystratum * xdivs + xstratum;
+ assert(index < NN);
+ if (occupiedStrata[shape][index]) {
+ return true;
+ }
+ shape = shape + 1;
+ xdivs = xdivs / 2;
+ ydivs = ydivs * 2;
+ } while (xdivs > 0);
+ return false;
+ }
+
+ private:
+ std::vector<std::vector<bool>> occupiedStrata;
+};
+
+static void shuffle(float2 points[], int size, int rng_seed)
+{
+ /* Offset samples by 1.0 for faster scrambling in kernel_random.h */
+ for (int i = 0; i < size; ++i) {
+ points[i].x += 1.0f;
+ points[i].y += 1.0f;
+ }
+
+ if (rng_seed == 0) {
+ return;
+ }
+
+ constexpr int odd[8] = {0, 1, 4, 5, 10, 11, 14, 15};
+ constexpr int even[8] = {2, 3, 6, 7, 8, 9, 12, 13};
+
+ int rng_index = 0;
+ for (int yy = 0; yy < size / 16; ++yy) {
+ for (int xx = 0; xx < 8; ++xx) {
+ int other = (int)(cmj_randfloat(++rng_index, rng_seed) * (8.0f - xx) + xx);
+ float2 tmp = points[odd[other] + yy * 16];
+ points[odd[other] + yy * 16] = points[odd[xx] + yy * 16];
+ points[odd[xx] + yy * 16] = tmp;
+ }
+ for (int xx = 0; xx < 8; ++xx) {
+ int other = (int)(cmj_randfloat(++rng_index, rng_seed) * (8.0f - xx) + xx);
+ float2 tmp = points[even[other] + yy * 16];
+ points[even[other] + yy * 16] = points[even[xx] + yy * 16];
+ points[even[xx] + yy * 16] = tmp;
+ }
+ }
+}
+
+void progressive_multi_jitter_generate_2D(float2 points[], int size, int rng_seed)
+{
+ PMJ_Generator::generate_2D(points, size, rng_seed);
+ shuffle(points, size, rng_seed);
+}
+
+void progressive_multi_jitter_02_generate_2D(float2 points[], int size, int rng_seed)
+{
+ PMJ02_Generator::generate_2D(points, size, rng_seed);
+ shuffle(points, size, rng_seed);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/render/jitter.h b/intern/cycles/render/jitter.h
new file mode 100644
index 00000000000..ed34c7a4f4d
--- /dev/null
+++ b/intern/cycles/render/jitter.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __JITTER_H__
+#define __JITTER_H__
+
+#include "util/util_types.h"
+
+CCL_NAMESPACE_BEGIN
+
+void progressive_multi_jitter_generate_2D(float2 points[], int size, int rng_seed);
+void progressive_multi_jitter_02_generate_2D(float2 points[], int size, int rng_seed);
+
+CCL_NAMESPACE_END
+
+#endif /* __JITTER_H__ */
diff --git a/intern/cycles/render/light.cpp b/intern/cycles/render/light.cpp
index 664217d6f26..9adf8e5341a 100644
--- a/intern/cycles/render/light.cpp
+++ b/intern/cycles/render/light.cpp
@@ -14,12 +14,12 @@
* limitations under the License.
*/
-#include "render/background.h"
+#include "render/light.h"
#include "device/device.h"
-#include "render/integrator.h"
+#include "render/background.h"
#include "render/film.h"
#include "render/graph.h"
-#include "render/light.h"
+#include "render/integrator.h"
#include "render/mesh.h"
#include "render/nodes.h"
#include "render/object.h"
@@ -28,9 +28,9 @@
#include "util/util_foreach.h"
#include "util/util_hash.h"
+#include "util/util_logging.h"
#include "util/util_path.h"
#include "util/util_progress.h"
-#include "util/util_logging.h"
CCL_NAMESPACE_BEGIN
@@ -232,7 +232,10 @@ void LightManager::disable_ineffective_light(Scene *scene)
bool LightManager::object_usable_as_light(Object *object)
{
- Mesh *mesh = object->mesh;
+ Geometry *geom = object->geometry;
+ if (geom->type != Geometry::MESH) {
+ return false;
+ }
/* Skip objects with NaNs */
if (!object->bounds.valid()) {
return false;
@@ -243,10 +246,10 @@ bool LightManager::object_usable_as_light(Object *object)
}
/* Skip if we have no emission shaders. */
/* TODO(sergey): Ideally we want to avoid such duplicated loop, since it'll
- * iterate all mesh shaders twice (when counting and when calculating
+ * iterate all geometry shaders twice (when counting and when calculating
* triangle area.
*/
- foreach (const Shader *shader, mesh->used_shaders) {
+ foreach (const Shader *shader, geom->used_shaders) {
if (shader->use_mis && shader->has_surface_emission) {
return true;
}
@@ -285,8 +288,9 @@ void LightManager::device_update_distribution(Device *,
if (!object_usable_as_light(object)) {
continue;
}
+
/* Count triangles. */
- Mesh *mesh = object->mesh;
+ Mesh *mesh = static_cast<Mesh *>(object->geometry);
size_t mesh_num_triangles = mesh->num_triangles();
for (size_t i = 0; i < mesh_num_triangles; i++) {
int shader_index = mesh->shader[i];
@@ -320,7 +324,7 @@ void LightManager::device_update_distribution(Device *,
continue;
}
/* Sum area. */
- Mesh *mesh = object->mesh;
+ Mesh *mesh = static_cast<Mesh *>(object->geometry);
bool transform_applied = mesh->transform_applied;
Transform tfm = object->tfm;
int object_id = j;
@@ -352,7 +356,7 @@ void LightManager::device_update_distribution(Device *,
if (shader->use_mis && shader->has_surface_emission) {
distribution[offset].totarea = totarea;
- distribution[offset].prim = i + mesh->tri_offset;
+ distribution[offset].prim = i + mesh->prim_offset;
distribution[offset].mesh_light.shader_flag = shader_flag;
distribution[offset].mesh_light.object_id = object_id;
offset++;
@@ -573,8 +577,8 @@ void LightManager::device_update_background(Device *device,
if (node->type == EnvironmentTextureNode::node_type) {
EnvironmentTextureNode *env = (EnvironmentTextureNode *)node;
ImageMetaData metadata;
- if (env->image_manager && !env->slots.empty() &&
- env->image_manager->get_image_metadata(env->slots[0], metadata)) {
+ if (!env->handle.empty()) {
+ ImageMetaData metadata = env->handle.metadata();
res.x = max(res.x, metadata.width);
res.y = max(res.y, metadata.height);
}
diff --git a/intern/cycles/render/merge.cpp b/intern/cycles/render/merge.cpp
index cac07e59fe3..3ea3952b96c 100644
--- a/intern/cycles/render/merge.cpp
+++ b/intern/cycles/render/merge.cpp
@@ -22,8 +22,8 @@
#include "util/util_time.h"
#include "util/util_unique_ptr.h"
-#include <OpenImageIO/imageio.h>
#include <OpenImageIO/filesystem.h>
+#include <OpenImageIO/imageio.h>
OIIO_NAMESPACE_USING
diff --git a/intern/cycles/render/mesh.cpp b/intern/cycles/render/mesh.cpp
index d9e6d998ebd..c262d770331 100644
--- a/intern/cycles/render/mesh.cpp
+++ b/intern/cycles/render/mesh.cpp
@@ -17,32 +17,22 @@
#include "bvh/bvh.h"
#include "bvh/bvh_build.h"
-#include "render/camera.h"
-#include "render/curves.h"
#include "device/device.h"
+
#include "render/graph.h"
-#include "render/shader.h"
-#include "render/light.h"
+#include "render/hair.h"
#include "render/mesh.h"
-#include "render/nodes.h"
#include "render/object.h"
#include "render/scene.h"
-#include "render/stats.h"
-
-#include "kernel/osl/osl_globals.h"
-#include "subd/subd_split.h"
#include "subd/subd_patch_table.h"
+#include "subd/subd_split.h"
#include "util/util_foreach.h"
#include "util/util_logging.h"
#include "util/util_progress.h"
#include "util/util_set.h"
-#ifdef WITH_EMBREE
-# include "bvh/bvh_embree.h"
-#endif
-
CCL_NAMESPACE_BEGIN
/* Triangle */
@@ -120,263 +110,6 @@ bool Mesh::Triangle::valid(const float3 *verts) const
return isfinite3_safe(verts[v[0]]) && isfinite3_safe(verts[v[1]]) && isfinite3_safe(verts[v[2]]);
}
-/* Curve */
-
-void Mesh::Curve::bounds_grow(const int k,
- const float3 *curve_keys,
- const float *curve_radius,
- BoundBox &bounds) const
-{
- float3 P[4];
-
- P[0] = curve_keys[max(first_key + k - 1, first_key)];
- P[1] = curve_keys[first_key + k];
- P[2] = curve_keys[first_key + k + 1];
- P[3] = curve_keys[min(first_key + k + 2, first_key + num_keys - 1)];
-
- float3 lower;
- float3 upper;
-
- curvebounds(&lower.x, &upper.x, P, 0);
- curvebounds(&lower.y, &upper.y, P, 1);
- curvebounds(&lower.z, &upper.z, P, 2);
-
- float mr = max(curve_radius[first_key + k], curve_radius[first_key + k + 1]);
-
- bounds.grow(lower, mr);
- bounds.grow(upper, mr);
-}
-
-void Mesh::Curve::bounds_grow(const int k,
- const float3 *curve_keys,
- const float *curve_radius,
- const Transform &aligned_space,
- BoundBox &bounds) const
-{
- float3 P[4];
-
- P[0] = curve_keys[max(first_key + k - 1, first_key)];
- P[1] = curve_keys[first_key + k];
- P[2] = curve_keys[first_key + k + 1];
- P[3] = curve_keys[min(first_key + k + 2, first_key + num_keys - 1)];
-
- P[0] = transform_point(&aligned_space, P[0]);
- P[1] = transform_point(&aligned_space, P[1]);
- P[2] = transform_point(&aligned_space, P[2]);
- P[3] = transform_point(&aligned_space, P[3]);
-
- float3 lower;
- float3 upper;
-
- curvebounds(&lower.x, &upper.x, P, 0);
- curvebounds(&lower.y, &upper.y, P, 1);
- curvebounds(&lower.z, &upper.z, P, 2);
-
- float mr = max(curve_radius[first_key + k], curve_radius[first_key + k + 1]);
-
- bounds.grow(lower, mr);
- bounds.grow(upper, mr);
-}
-
-void Mesh::Curve::bounds_grow(float4 keys[4], BoundBox &bounds) const
-{
- float3 P[4] = {
- float4_to_float3(keys[0]),
- float4_to_float3(keys[1]),
- float4_to_float3(keys[2]),
- float4_to_float3(keys[3]),
- };
-
- float3 lower;
- float3 upper;
-
- curvebounds(&lower.x, &upper.x, P, 0);
- curvebounds(&lower.y, &upper.y, P, 1);
- curvebounds(&lower.z, &upper.z, P, 2);
-
- float mr = max(keys[1].w, keys[2].w);
-
- bounds.grow(lower, mr);
- bounds.grow(upper, mr);
-}
-
-void Mesh::Curve::motion_keys(const float3 *curve_keys,
- const float *curve_radius,
- const float3 *key_steps,
- size_t num_curve_keys,
- size_t num_steps,
- float time,
- size_t k0,
- size_t k1,
- float4 r_keys[2]) const
-{
- /* Figure out which steps we need to fetch and their interpolation factor. */
- const size_t max_step = num_steps - 1;
- const size_t step = min((int)(time * max_step), max_step - 1);
- const float t = time * max_step - step;
- /* Fetch vertex coordinates. */
- float4 curr_keys[2];
- float4 next_keys[2];
- keys_for_step(
- curve_keys, curve_radius, key_steps, num_curve_keys, num_steps, step, k0, k1, curr_keys);
- keys_for_step(
- curve_keys, curve_radius, key_steps, num_curve_keys, num_steps, step + 1, k0, k1, next_keys);
- /* Interpolate between steps. */
- r_keys[0] = (1.0f - t) * curr_keys[0] + t * next_keys[0];
- r_keys[1] = (1.0f - t) * curr_keys[1] + t * next_keys[1];
-}
-
-void Mesh::Curve::cardinal_motion_keys(const float3 *curve_keys,
- const float *curve_radius,
- const float3 *key_steps,
- size_t num_curve_keys,
- size_t num_steps,
- float time,
- size_t k0,
- size_t k1,
- size_t k2,
- size_t k3,
- float4 r_keys[4]) const
-{
- /* Figure out which steps we need to fetch and their interpolation factor. */
- const size_t max_step = num_steps - 1;
- const size_t step = min((int)(time * max_step), max_step - 1);
- const float t = time * max_step - step;
- /* Fetch vertex coordinates. */
- float4 curr_keys[4];
- float4 next_keys[4];
- cardinal_keys_for_step(curve_keys,
- curve_radius,
- key_steps,
- num_curve_keys,
- num_steps,
- step,
- k0,
- k1,
- k2,
- k3,
- curr_keys);
- cardinal_keys_for_step(curve_keys,
- curve_radius,
- key_steps,
- num_curve_keys,
- num_steps,
- step + 1,
- k0,
- k1,
- k2,
- k3,
- next_keys);
- /* Interpolate between steps. */
- r_keys[0] = (1.0f - t) * curr_keys[0] + t * next_keys[0];
- r_keys[1] = (1.0f - t) * curr_keys[1] + t * next_keys[1];
- r_keys[2] = (1.0f - t) * curr_keys[2] + t * next_keys[2];
- r_keys[3] = (1.0f - t) * curr_keys[3] + t * next_keys[3];
-}
-
-void Mesh::Curve::keys_for_step(const float3 *curve_keys,
- const float *curve_radius,
- const float3 *key_steps,
- size_t num_curve_keys,
- size_t num_steps,
- size_t step,
- size_t k0,
- size_t k1,
- float4 r_keys[2]) const
-{
- k0 = max(k0, 0);
- k1 = min(k1, num_keys - 1);
- const size_t center_step = ((num_steps - 1) / 2);
- if (step == center_step) {
- /* Center step: regular key location. */
- /* TODO(sergey): Consider adding make_float4(float3, float)
- * function.
- */
- r_keys[0] = make_float4(curve_keys[first_key + k0].x,
- curve_keys[first_key + k0].y,
- curve_keys[first_key + k0].z,
- curve_radius[first_key + k0]);
- r_keys[1] = make_float4(curve_keys[first_key + k1].x,
- curve_keys[first_key + k1].y,
- curve_keys[first_key + k1].z,
- curve_radius[first_key + k1]);
- }
- else {
- /* Center step is not stored in this array. */
- if (step > center_step) {
- step--;
- }
- const size_t offset = first_key + step * num_curve_keys;
- r_keys[0] = make_float4(key_steps[offset + k0].x,
- key_steps[offset + k0].y,
- key_steps[offset + k0].z,
- curve_radius[first_key + k0]);
- r_keys[1] = make_float4(key_steps[offset + k1].x,
- key_steps[offset + k1].y,
- key_steps[offset + k1].z,
- curve_radius[first_key + k1]);
- }
-}
-
-void Mesh::Curve::cardinal_keys_for_step(const float3 *curve_keys,
- const float *curve_radius,
- const float3 *key_steps,
- size_t num_curve_keys,
- size_t num_steps,
- size_t step,
- size_t k0,
- size_t k1,
- size_t k2,
- size_t k3,
- float4 r_keys[4]) const
-{
- k0 = max(k0, 0);
- k3 = min(k3, num_keys - 1);
- const size_t center_step = ((num_steps - 1) / 2);
- if (step == center_step) {
- /* Center step: regular key location. */
- r_keys[0] = make_float4(curve_keys[first_key + k0].x,
- curve_keys[first_key + k0].y,
- curve_keys[first_key + k0].z,
- curve_radius[first_key + k0]);
- r_keys[1] = make_float4(curve_keys[first_key + k1].x,
- curve_keys[first_key + k1].y,
- curve_keys[first_key + k1].z,
- curve_radius[first_key + k1]);
- r_keys[2] = make_float4(curve_keys[first_key + k2].x,
- curve_keys[first_key + k2].y,
- curve_keys[first_key + k2].z,
- curve_radius[first_key + k2]);
- r_keys[3] = make_float4(curve_keys[first_key + k3].x,
- curve_keys[first_key + k3].y,
- curve_keys[first_key + k3].z,
- curve_radius[first_key + k3]);
- }
- else {
- /* Center step is not stored in this array. */
- if (step > center_step) {
- step--;
- }
- const size_t offset = first_key + step * num_curve_keys;
- r_keys[0] = make_float4(key_steps[offset + k0].x,
- key_steps[offset + k0].y,
- key_steps[offset + k0].z,
- curve_radius[first_key + k0]);
- r_keys[1] = make_float4(key_steps[offset + k1].x,
- key_steps[offset + k1].y,
- key_steps[offset + k1].z,
- curve_radius[first_key + k1]);
- r_keys[2] = make_float4(key_steps[offset + k2].x,
- key_steps[offset + k2].y,
- key_steps[offset + k2].z,
- curve_radius[first_key + k2]);
- r_keys[3] = make_float4(key_steps[offset + k3].x,
- key_steps[offset + k3].y,
- key_steps[offset + k3].z,
- curve_radius[first_key + k3]);
- }
-}
-
/* SubdFace */
float3 Mesh::SubdFace::normal(const Mesh *mesh) const
@@ -392,60 +125,29 @@ float3 Mesh::SubdFace::normal(const Mesh *mesh) const
NODE_DEFINE(Mesh)
{
- NodeType *type = NodeType::add("mesh", create);
-
- SOCKET_UINT(motion_steps, "Motion Steps", 3);
- SOCKET_BOOLEAN(use_motion_blur, "Use Motion Blur", false);
+ NodeType *type = NodeType::add("mesh", create, NodeType::NONE, Geometry::node_base_type);
SOCKET_INT_ARRAY(triangles, "Triangles", array<int>());
SOCKET_POINT_ARRAY(verts, "Vertices", array<float3>());
SOCKET_INT_ARRAY(shader, "Shader", array<int>());
SOCKET_BOOLEAN_ARRAY(smooth, "Smooth", array<bool>());
- SOCKET_POINT_ARRAY(curve_keys, "Curve Keys", array<float3>());
- SOCKET_FLOAT_ARRAY(curve_radius, "Curve Radius", array<float>());
- SOCKET_INT_ARRAY(curve_first_key, "Curve First Key", array<int>());
- SOCKET_INT_ARRAY(curve_shader, "Curve Shader", array<int>());
-
return type;
}
-Mesh::Mesh() : Node(node_type)
+Mesh::Mesh() : Geometry(node_type, Geometry::MESH), subd_attributes(this, ATTR_PRIM_SUBD)
{
- need_update = true;
- need_update_rebuild = false;
- transform_applied = false;
- transform_negative_scaled = false;
- transform_normal = transform_identity();
- bounds = BoundBox::empty;
-
- bvh = NULL;
-
- tri_offset = 0;
vert_offset = 0;
- curve_offset = 0;
- curvekey_offset = 0;
-
patch_offset = 0;
face_offset = 0;
corner_offset = 0;
- attr_map_offset = 0;
-
- prim_offset = 0;
-
num_subd_verts = 0;
- attributes.triangle_mesh = this;
- curve_attributes.curve_mesh = this;
- subd_attributes.subd_mesh = this;
-
- geometry_flags = GEOMETRY_NONE;
-
- volume_isovalue = 0.001f;
- has_volume = false;
- has_surface_bssrdf = false;
+ volume_clipping = 0.001f;
+ volume_step_size = 0.0f;
+ volume_object_space = false;
num_ngons = 0;
@@ -457,7 +159,6 @@ Mesh::Mesh() : Node(node_type)
Mesh::~Mesh()
{
- delete bvh;
delete patch_table;
delete subd_params;
}
@@ -493,26 +194,6 @@ void Mesh::reserve_mesh(int numverts, int numtris)
attributes.resize(true);
}
-void Mesh::resize_curves(int numcurves, int numkeys)
-{
- curve_keys.resize(numkeys);
- curve_radius.resize(numkeys);
- curve_first_key.resize(numcurves);
- curve_shader.resize(numcurves);
-
- curve_attributes.resize();
-}
-
-void Mesh::reserve_curves(int numcurves, int numkeys)
-{
- curve_keys.reserve(numkeys);
- curve_radius.reserve(numkeys);
- curve_first_key.reserve(numcurves);
- curve_shader.reserve(numcurves);
-
- curve_attributes.resize(true);
-}
-
void Mesh::resize_subd_faces(int numfaces, int num_ngons_, int numcorners)
{
subd_faces.resize(numfaces);
@@ -533,6 +214,8 @@ void Mesh::reserve_subd_faces(int numfaces, int num_ngons_, int numcorners)
void Mesh::clear(bool preserve_voxel_data)
{
+ Geometry::clear();
+
/* clear all verts and triangles */
verts.clear();
triangles.clear();
@@ -542,11 +225,6 @@ void Mesh::clear(bool preserve_voxel_data)
triangle_patch.clear();
vert_patch_uv.clear();
- curve_keys.clear();
- curve_radius.clear();
- curve_first_key.clear();
- curve_shader.clear();
-
subd_faces.clear();
subd_face_corners.clear();
@@ -554,27 +232,21 @@ void Mesh::clear(bool preserve_voxel_data)
subd_creases.clear();
- curve_attributes.clear();
subd_attributes.clear();
attributes.clear(preserve_voxel_data);
- used_shaders.clear();
-
vert_to_stitching_key_map.clear();
vert_stitching_map.clear();
- if (!preserve_voxel_data) {
- geometry_flags = GEOMETRY_NONE;
- }
-
- transform_applied = false;
- transform_negative_scaled = false;
- transform_normal = transform_identity();
-
delete patch_table;
patch_table = NULL;
}
+void Mesh::clear()
+{
+ clear(false);
+}
+
void Mesh::add_vertex(float3 P)
{
verts.push_back_reserved(P);
@@ -606,18 +278,6 @@ void Mesh::add_triangle(int v0, int v1, int v2, int shader_, bool smooth_)
}
}
-void Mesh::add_curve_key(float3 co, float radius)
-{
- curve_keys.push_back_reserved(co);
- curve_radius.push_back_reserved(radius);
-}
-
-void Mesh::add_curve(int first_key, int shader)
-{
- curve_first_key.push_back_reserved(first_key);
- curve_shader.push_back_reserved(shader);
-}
-
void Mesh::add_subd_face(int *corners, int num_corners, int shader_, bool smooth_)
{
int start_corner = subd_face_corners.size();
@@ -637,47 +297,41 @@ void Mesh::add_subd_face(int *corners, int num_corners, int shader_, bool smooth
subd_faces.push_back_reserved(face);
}
-static void get_uv_tiles_from_attribute(Attribute *attr, int num, unordered_set<int> &tiles)
+void Mesh::copy_center_to_motion_step(const int motion_step)
{
- if (attr == NULL) {
- return;
- }
+ Attribute *attr_mP = attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
- const float2 *uv = attr->data_float2();
- for (int i = 0; i < num; i++, uv++) {
- float u = uv->x, v = uv->y;
- int x = (int)u, y = (int)v;
+ if (attr_mP) {
+ Attribute *attr_mN = attributes.find(ATTR_STD_MOTION_VERTEX_NORMAL);
+ Attribute *attr_N = attributes.find(ATTR_STD_VERTEX_NORMAL);
+ float3 *P = &verts[0];
+ float3 *N = (attr_N) ? attr_N->data_float3() : NULL;
+ size_t numverts = verts.size();
- if (x < 0 || y < 0 || x >= 10) {
- continue;
- }
-
- /* Be conservative in corners - precisely touching the right or upper edge of a tile
- * should not load its right/upper neighbor as well. */
- if (x > 0 && (u < x + 1e-6f)) {
- x--;
- }
- if (y > 0 && (v < y + 1e-6f)) {
- y--;
- }
-
- tiles.insert(1001 + 10 * y + x);
+ memcpy(attr_mP->data_float3() + motion_step * numverts, P, sizeof(float3) * numverts);
+ if (attr_mN)
+ memcpy(attr_mN->data_float3() + motion_step * numverts, N, sizeof(float3) * numverts);
}
}
void Mesh::get_uv_tiles(ustring map, unordered_set<int> &tiles)
{
+ Attribute *attr, *subd_attr;
+
if (map.empty()) {
- get_uv_tiles_from_attribute(attributes.find(ATTR_STD_UV), num_triangles() * 3, tiles);
- get_uv_tiles_from_attribute(
- subd_attributes.find(ATTR_STD_UV), subd_face_corners.size() + num_ngons, tiles);
- get_uv_tiles_from_attribute(curve_attributes.find(ATTR_STD_UV), num_curves(), tiles);
+ attr = attributes.find(ATTR_STD_UV);
+ subd_attr = subd_attributes.find(ATTR_STD_UV);
}
else {
- get_uv_tiles_from_attribute(attributes.find(map), num_triangles() * 3, tiles);
- get_uv_tiles_from_attribute(
- subd_attributes.find(map), subd_face_corners.size() + num_ngons, tiles);
- get_uv_tiles_from_attribute(curve_attributes.find(map), num_curves(), tiles);
+ attr = attributes.find(map);
+ subd_attr = subd_attributes.find(map);
+ }
+
+ if (attr) {
+ attr->get_uv_tiles(this, ATTR_PRIM_GEOMETRY, tiles);
+ }
+ if (subd_attr) {
+ subd_attr->get_uv_tiles(this, ATTR_PRIM_SUBD, tiles);
}
}
@@ -685,15 +339,11 @@ void Mesh::compute_bounds()
{
BoundBox bnds = BoundBox::empty;
size_t verts_size = verts.size();
- size_t curve_keys_size = curve_keys.size();
- if (verts_size + curve_keys_size > 0) {
+ if (verts_size > 0) {
for (size_t i = 0; i < verts_size; i++)
bnds.grow(verts[i]);
- for (size_t i = 0; i < curve_keys_size; i++)
- bnds.grow(curve_keys[i], curve_radius[i]);
-
Attribute *attr = attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
if (use_motion_blur && attr) {
size_t steps_size = verts.size() * (motion_steps - 1);
@@ -703,15 +353,6 @@ void Mesh::compute_bounds()
bnds.grow(vert_steps[i]);
}
- Attribute *curve_attr = curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
- if (use_motion_blur && curve_attr) {
- size_t steps_size = curve_keys.size() * (motion_steps - 1);
- float3 *key_steps = curve_attr->data_float3();
-
- for (size_t i = 0; i < steps_size; i++)
- bnds.grow(key_steps[i]);
- }
-
if (!bnds.valid()) {
bnds = BoundBox::empty;
@@ -719,9 +360,6 @@ void Mesh::compute_bounds()
for (size_t i = 0; i < verts_size; i++)
bnds.grow_safe(verts[i]);
- for (size_t i = 0; i < curve_keys_size; i++)
- bnds.grow_safe(curve_keys[i], curve_radius[i]);
-
if (use_motion_blur && attr) {
size_t steps_size = verts.size() * (motion_steps - 1);
float3 *vert_steps = attr->data_float3();
@@ -729,14 +367,6 @@ void Mesh::compute_bounds()
for (size_t i = 0; i < steps_size; i++)
bnds.grow_safe(vert_steps[i]);
}
-
- if (use_motion_blur && curve_attr) {
- size_t steps_size = curve_keys.size() * (motion_steps - 1);
- float3 *key_steps = curve_attr->data_float3();
-
- for (size_t i = 0; i < steps_size; i++)
- bnds.grow_safe(key_steps[i]);
- }
}
}
@@ -748,6 +378,38 @@ void Mesh::compute_bounds()
bounds = bnds;
}
+void Mesh::apply_transform(const Transform &tfm, const bool apply_to_motion)
+{
+ transform_normal = transform_transposed_inverse(tfm);
+
+ /* apply to mesh vertices */
+ for (size_t i = 0; i < verts.size(); i++)
+ verts[i] = transform_point(&tfm, verts[i]);
+
+ if (apply_to_motion) {
+ Attribute *attr = attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+
+ if (attr) {
+ size_t steps_size = verts.size() * (motion_steps - 1);
+ float3 *vert_steps = attr->data_float3();
+
+ for (size_t i = 0; i < steps_size; i++)
+ vert_steps[i] = transform_point(&tfm, vert_steps[i]);
+ }
+
+ Attribute *attr_N = attributes.find(ATTR_STD_MOTION_VERTEX_NORMAL);
+
+ if (attr_N) {
+ Transform ntfm = transform_normal;
+ size_t steps_size = verts.size() * (motion_steps - 1);
+ float3 *normal_steps = attr_N->data_float3();
+
+ for (size_t i = 0; i < steps_size; i++)
+ normal_steps[i] = normalize(transform_direction(&ntfm, normal_steps[i]));
+ }
+ }
+}
+
void Mesh::add_face_normals()
{
/* don't compute if already there */
@@ -885,8 +547,7 @@ void Mesh::add_undisplaced()
float3 *data = attr->data_float3();
/* copy verts */
- size_t size = attr->buffer_size(
- this, (subdivision_type == SUBDIVISION_NONE) ? ATTR_PRIM_TRIANGLE : ATTR_PRIM_SUBD);
+ size_t size = attr->buffer_size(this, attrs.prim);
/* Center points for ngons aren't stored in Mesh::verts but are included in size since they will
* be calculated later, we subtract them from size here so we don't have an overflow while
@@ -975,39 +636,6 @@ void Mesh::pack_verts(const vector<uint> &tri_prim_index,
}
}
-void Mesh::pack_curves(Scene *scene,
- float4 *curve_key_co,
- float4 *curve_data,
- size_t curvekey_offset)
-{
- size_t curve_keys_size = curve_keys.size();
-
- /* pack curve keys */
- if (curve_keys_size) {
- float3 *keys_ptr = curve_keys.data();
- float *radius_ptr = curve_radius.data();
-
- for (size_t i = 0; i < curve_keys_size; i++)
- curve_key_co[i] = make_float4(keys_ptr[i].x, keys_ptr[i].y, keys_ptr[i].z, radius_ptr[i]);
- }
-
- /* pack curve segments */
- size_t curve_num = num_curves();
-
- for (size_t i = 0; i < curve_num; i++) {
- Curve curve = get_curve(i);
- int shader_id = curve_shader[i];
- Shader *shader = (shader_id < used_shaders.size()) ? used_shaders[shader_id] :
- scene->default_surface;
- shader_id = scene->shader_manager->get_shader_id(shader, false);
-
- curve_data[i] = make_float4(__int_as_float(curve.first_key + curvekey_offset),
- __int_as_float(curve.num_keys),
- __int_as_float(shader_id),
- 0.0f);
- }
-}
-
void Mesh::pack_patches(uint *patch_data, uint vert_offset, uint face_offset, uint corner_offset)
{
size_t num_faces = subd_faces.size();
@@ -1054,1391 +682,4 @@ void Mesh::pack_patches(uint *patch_data, uint vert_offset, uint face_offset, ui
}
}
-void Mesh::compute_bvh(
- Device *device, DeviceScene *dscene, SceneParams *params, Progress *progress, int n, int total)
-{
- if (progress->get_cancel())
- return;
-
- compute_bounds();
-
- const BVHLayout bvh_layout = BVHParams::best_bvh_layout(params->bvh_layout,
- device->get_bvh_layout_mask());
- if (need_build_bvh(bvh_layout)) {
- string msg = "Updating Mesh BVH ";
- if (name.empty())
- msg += string_printf("%u/%u", (uint)(n + 1), (uint)total);
- else
- msg += string_printf("%s %u/%u", name.c_str(), (uint)(n + 1), (uint)total);
-
- Object object;
- object.mesh = this;
-
- vector<Mesh *> meshes;
- meshes.push_back(this);
- vector<Object *> objects;
- objects.push_back(&object);
-
- if (bvh && !need_update_rebuild) {
- progress->set_status(msg, "Refitting BVH");
-
- bvh->meshes = meshes;
- bvh->objects = objects;
-
- bvh->refit(*progress);
- }
- else {
- progress->set_status(msg, "Building BVH");
-
- BVHParams bparams;
- bparams.use_spatial_split = params->use_bvh_spatial_split;
- bparams.bvh_layout = bvh_layout;
- bparams.use_unaligned_nodes = dscene->data.bvh.have_curves &&
- params->use_bvh_unaligned_nodes;
- bparams.num_motion_triangle_steps = params->num_bvh_time_steps;
- bparams.num_motion_curve_steps = params->num_bvh_time_steps;
- bparams.bvh_type = params->bvh_type;
- bparams.curve_flags = dscene->data.curve.curveflags;
- bparams.curve_subdivisions = dscene->data.curve.subdivisions;
-
- delete bvh;
- bvh = BVH::create(bparams, meshes, objects);
- MEM_GUARDED_CALL(progress, bvh->build, *progress);
- }
- }
-
- need_update = false;
- need_update_rebuild = false;
-}
-
-void Mesh::tag_update(Scene *scene, bool rebuild)
-{
- need_update = true;
-
- if (rebuild) {
- need_update_rebuild = true;
- scene->light_manager->need_update = true;
- }
- else {
- foreach (Shader *shader, used_shaders)
- if (shader->has_surface_emission)
- scene->light_manager->need_update = true;
- }
-
- scene->mesh_manager->need_update = true;
- scene->object_manager->need_update = true;
-}
-
-bool Mesh::has_motion_blur() const
-{
- return (use_motion_blur && (attributes.find(ATTR_STD_MOTION_VERTEX_POSITION) ||
- curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION)));
-}
-
-bool Mesh::has_true_displacement() const
-{
- foreach (Shader *shader, used_shaders) {
- if (shader->has_displacement && shader->displacement_method != DISPLACE_BUMP) {
- return true;
- }
- }
-
- return false;
-}
-
-bool Mesh::has_voxel_attributes() const
-{
- foreach (const Attribute &attr, attributes.attributes) {
- if (attr.element == ATTR_ELEMENT_VOXEL) {
- return true;
- }
- }
-
- return false;
-}
-
-float Mesh::motion_time(int step) const
-{
- return (motion_steps > 1) ? 2.0f * step / (motion_steps - 1) - 1.0f : 0.0f;
-}
-
-int Mesh::motion_step(float time) const
-{
- if (motion_steps > 1) {
- int attr_step = 0;
-
- for (int step = 0; step < motion_steps; step++) {
- float step_time = motion_time(step);
- if (step_time == time) {
- return attr_step;
- }
-
- /* Center step is stored in a separate attribute. */
- if (step != motion_steps / 2) {
- attr_step++;
- }
- }
- }
-
- return -1;
-}
-
-bool Mesh::need_build_bvh(BVHLayout layout) const
-{
- return !transform_applied || has_surface_bssrdf || layout == BVH_LAYOUT_OPTIX;
-}
-
-bool Mesh::is_instanced() const
-{
- /* Currently we treat subsurface objects as instanced.
- *
- * While it might be not very optimal for ray traversal, it avoids having
- * duplicated BVH in the memory, saving quite some space.
- */
- return !transform_applied || has_surface_bssrdf;
-}
-
-/* Mesh Manager */
-
-MeshManager::MeshManager()
-{
- need_update = true;
- need_flags_update = true;
-}
-
-MeshManager::~MeshManager()
-{
-}
-
-void MeshManager::update_osl_attributes(Device *device,
- Scene *scene,
- vector<AttributeRequestSet> &mesh_attributes)
-{
-#ifdef WITH_OSL
- /* for OSL, a hash map is used to lookup the attribute by name. */
- OSLGlobals *og = (OSLGlobals *)device->osl_memory();
-
- og->object_name_map.clear();
- og->attribute_map.clear();
- og->object_names.clear();
-
- og->attribute_map.resize(scene->objects.size() * ATTR_PRIM_TYPES);
-
- for (size_t i = 0; i < scene->objects.size(); i++) {
- /* set object name to object index map */
- Object *object = scene->objects[i];
- og->object_name_map[object->name] = i;
- og->object_names.push_back(object->name);
-
- /* set object attributes */
- foreach (ParamValue &attr, object->attributes) {
- OSLGlobals::Attribute osl_attr;
-
- osl_attr.type = attr.type();
- osl_attr.desc.element = ATTR_ELEMENT_OBJECT;
- osl_attr.value = attr;
- osl_attr.desc.offset = 0;
- osl_attr.desc.flags = 0;
-
- og->attribute_map[i * ATTR_PRIM_TYPES + ATTR_PRIM_TRIANGLE][attr.name()] = osl_attr;
- og->attribute_map[i * ATTR_PRIM_TYPES + ATTR_PRIM_CURVE][attr.name()] = osl_attr;
- og->attribute_map[i * ATTR_PRIM_TYPES + ATTR_PRIM_SUBD][attr.name()] = osl_attr;
- }
-
- /* find mesh attributes */
- size_t j;
-
- for (j = 0; j < scene->meshes.size(); j++)
- if (scene->meshes[j] == object->mesh)
- break;
-
- AttributeRequestSet &attributes = mesh_attributes[j];
-
- /* set object attributes */
- foreach (AttributeRequest &req, attributes.requests) {
- OSLGlobals::Attribute osl_attr;
-
- if (req.triangle_desc.element != ATTR_ELEMENT_NONE) {
- osl_attr.desc = req.triangle_desc;
-
- if (req.triangle_type == TypeDesc::TypeFloat)
- osl_attr.type = TypeDesc::TypeFloat;
- else if (req.triangle_type == TypeDesc::TypeMatrix)
- osl_attr.type = TypeDesc::TypeMatrix;
- else if (req.triangle_type == TypeFloat2)
- osl_attr.type = TypeFloat2;
- else if (req.triangle_type == TypeRGBA)
- osl_attr.type = TypeRGBA;
- else
- osl_attr.type = TypeDesc::TypeColor;
-
- if (req.std != ATTR_STD_NONE) {
- /* if standard attribute, add lookup by geom: name convention */
- ustring stdname(string("geom:") + string(Attribute::standard_name(req.std)));
- og->attribute_map[i * ATTR_PRIM_TYPES + ATTR_PRIM_TRIANGLE][stdname] = osl_attr;
- }
- else if (req.name != ustring()) {
- /* add lookup by mesh attribute name */
- og->attribute_map[i * ATTR_PRIM_TYPES + ATTR_PRIM_TRIANGLE][req.name] = osl_attr;
- }
- }
-
- if (req.curve_desc.element != ATTR_ELEMENT_NONE) {
- osl_attr.desc = req.curve_desc;
-
- if (req.curve_type == TypeDesc::TypeFloat)
- osl_attr.type = TypeDesc::TypeFloat;
- else if (req.curve_type == TypeDesc::TypeMatrix)
- osl_attr.type = TypeDesc::TypeMatrix;
- else if (req.curve_type == TypeFloat2)
- osl_attr.type = TypeFloat2;
- else if (req.curve_type == TypeRGBA)
- osl_attr.type = TypeRGBA;
- else
- osl_attr.type = TypeDesc::TypeColor;
-
- if (req.std != ATTR_STD_NONE) {
- /* if standard attribute, add lookup by geom: name convention */
- ustring stdname(string("geom:") + string(Attribute::standard_name(req.std)));
- og->attribute_map[i * ATTR_PRIM_TYPES + ATTR_PRIM_CURVE][stdname] = osl_attr;
- }
- else if (req.name != ustring()) {
- /* add lookup by mesh attribute name */
- og->attribute_map[i * ATTR_PRIM_TYPES + ATTR_PRIM_CURVE][req.name] = osl_attr;
- }
- }
-
- if (req.subd_desc.element != ATTR_ELEMENT_NONE) {
- osl_attr.desc = req.subd_desc;
-
- if (req.subd_type == TypeDesc::TypeFloat)
- osl_attr.type = TypeDesc::TypeFloat;
- else if (req.subd_type == TypeDesc::TypeMatrix)
- osl_attr.type = TypeDesc::TypeMatrix;
- else if (req.subd_type == TypeFloat2)
- osl_attr.type = TypeFloat2;
- else if (req.subd_type == TypeRGBA)
- osl_attr.type = TypeRGBA;
- else
- osl_attr.type = TypeDesc::TypeColor;
-
- if (req.std != ATTR_STD_NONE) {
- /* if standard attribute, add lookup by geom: name convention */
- ustring stdname(string("geom:") + string(Attribute::standard_name(req.std)));
- og->attribute_map[i * ATTR_PRIM_TYPES + ATTR_PRIM_SUBD][stdname] = osl_attr;
- }
- else if (req.name != ustring()) {
- /* add lookup by mesh attribute name */
- og->attribute_map[i * ATTR_PRIM_TYPES + ATTR_PRIM_SUBD][req.name] = osl_attr;
- }
- }
- }
- }
-#else
- (void)device;
- (void)scene;
- (void)mesh_attributes;
-#endif
-}
-
-void MeshManager::update_svm_attributes(Device *,
- DeviceScene *dscene,
- Scene *scene,
- vector<AttributeRequestSet> &mesh_attributes)
-{
- /* for SVM, the attributes_map table is used to lookup the offset of an
- * attribute, based on a unique shader attribute id. */
-
- /* compute array stride */
- int attr_map_size = 0;
-
- for (size_t i = 0; i < scene->meshes.size(); i++) {
- Mesh *mesh = scene->meshes[i];
- mesh->attr_map_offset = attr_map_size;
- attr_map_size += (mesh_attributes[i].size() + 1) * ATTR_PRIM_TYPES;
- }
-
- if (attr_map_size == 0)
- return;
-
- /* create attribute map */
- uint4 *attr_map = dscene->attributes_map.alloc(attr_map_size);
- memset(attr_map, 0, dscene->attributes_map.size() * sizeof(uint));
-
- for (size_t i = 0; i < scene->meshes.size(); i++) {
- Mesh *mesh = scene->meshes[i];
- AttributeRequestSet &attributes = mesh_attributes[i];
-
- /* set object attributes */
- int index = mesh->attr_map_offset;
-
- foreach (AttributeRequest &req, attributes.requests) {
- uint id;
-
- if (req.std == ATTR_STD_NONE)
- id = scene->shader_manager->get_attribute_id(req.name);
- else
- id = scene->shader_manager->get_attribute_id(req.std);
-
- if (mesh->num_triangles()) {
- attr_map[index].x = id;
- attr_map[index].y = req.triangle_desc.element;
- attr_map[index].z = as_uint(req.triangle_desc.offset);
-
- if (req.triangle_type == TypeDesc::TypeFloat)
- attr_map[index].w = NODE_ATTR_FLOAT;
- else if (req.triangle_type == TypeDesc::TypeMatrix)
- attr_map[index].w = NODE_ATTR_MATRIX;
- else if (req.triangle_type == TypeFloat2)
- attr_map[index].w = NODE_ATTR_FLOAT2;
- else if (req.triangle_type == TypeRGBA)
- attr_map[index].w = NODE_ATTR_RGBA;
- else
- attr_map[index].w = NODE_ATTR_FLOAT3;
-
- attr_map[index].w |= req.triangle_desc.flags << 8;
- }
-
- index++;
-
- if (mesh->num_curves()) {
- attr_map[index].x = id;
- attr_map[index].y = req.curve_desc.element;
- attr_map[index].z = as_uint(req.curve_desc.offset);
-
- if (req.curve_type == TypeDesc::TypeFloat)
- attr_map[index].w = NODE_ATTR_FLOAT;
- else if (req.curve_type == TypeDesc::TypeMatrix)
- attr_map[index].w = NODE_ATTR_MATRIX;
- else if (req.curve_type == TypeFloat2)
- attr_map[index].w = NODE_ATTR_FLOAT2;
- else
- attr_map[index].w = NODE_ATTR_FLOAT3;
-
- attr_map[index].w |= req.curve_desc.flags << 8;
- }
-
- index++;
-
- if (mesh->subd_faces.size()) {
- attr_map[index].x = id;
- attr_map[index].y = req.subd_desc.element;
- attr_map[index].z = as_uint(req.subd_desc.offset);
-
- if (req.subd_type == TypeDesc::TypeFloat)
- attr_map[index].w = NODE_ATTR_FLOAT;
- else if (req.subd_type == TypeDesc::TypeMatrix)
- attr_map[index].w = NODE_ATTR_MATRIX;
- else if (req.subd_type == TypeFloat2)
- attr_map[index].w = NODE_ATTR_FLOAT2;
- else if (req.triangle_type == TypeRGBA)
- attr_map[index].w = NODE_ATTR_RGBA;
- else
- attr_map[index].w = NODE_ATTR_FLOAT3;
-
- attr_map[index].w |= req.subd_desc.flags << 8;
- }
-
- index++;
- }
-
- /* terminator */
- for (int j = 0; j < ATTR_PRIM_TYPES; j++) {
- attr_map[index].x = ATTR_STD_NONE;
- attr_map[index].y = 0;
- attr_map[index].z = 0;
- attr_map[index].w = 0;
-
- index++;
- }
- }
-
- /* copy to device */
- dscene->attributes_map.copy_to_device();
-}
-
-static void update_attribute_element_size(Mesh *mesh,
- Attribute *mattr,
- AttributePrimitive prim,
- size_t *attr_float_size,
- size_t *attr_float2_size,
- size_t *attr_float3_size,
- size_t *attr_uchar4_size)
-{
- if (mattr) {
- size_t size = mattr->element_size(mesh, prim);
-
- if (mattr->element == ATTR_ELEMENT_VOXEL) {
- /* pass */
- }
- else if (mattr->element == ATTR_ELEMENT_CORNER_BYTE) {
- *attr_uchar4_size += size;
- }
- else if (mattr->type == TypeDesc::TypeFloat) {
- *attr_float_size += size;
- }
- else if (mattr->type == TypeFloat2) {
- *attr_float2_size += size;
- }
- else if (mattr->type == TypeDesc::TypeMatrix) {
- *attr_float3_size += size * 4;
- }
- else {
- *attr_float3_size += size;
- }
- }
-}
-
-static void update_attribute_element_offset(Mesh *mesh,
- device_vector<float> &attr_float,
- size_t &attr_float_offset,
- device_vector<float2> &attr_float2,
- size_t &attr_float2_offset,
- device_vector<float4> &attr_float3,
- size_t &attr_float3_offset,
- device_vector<uchar4> &attr_uchar4,
- size_t &attr_uchar4_offset,
- Attribute *mattr,
- AttributePrimitive prim,
- TypeDesc &type,
- AttributeDescriptor &desc)
-{
- if (mattr) {
- /* store element and type */
- desc.element = mattr->element;
- desc.flags = mattr->flags;
- type = mattr->type;
-
- /* store attribute data in arrays */
- size_t size = mattr->element_size(mesh, prim);
-
- AttributeElement &element = desc.element;
- int &offset = desc.offset;
-
- if (mattr->element == ATTR_ELEMENT_VOXEL) {
- /* store slot in offset value */
- VoxelAttribute *voxel_data = mattr->data_voxel();
- offset = voxel_data->slot;
- }
- else if (mattr->element == ATTR_ELEMENT_CORNER_BYTE) {
- uchar4 *data = mattr->data_uchar4();
- offset = attr_uchar4_offset;
-
- assert(attr_uchar4.size() >= offset + size);
- for (size_t k = 0; k < size; k++) {
- attr_uchar4[offset + k] = data[k];
- }
- attr_uchar4_offset += size;
- }
- else if (mattr->type == TypeDesc::TypeFloat) {
- float *data = mattr->data_float();
- offset = attr_float_offset;
-
- assert(attr_float.size() >= offset + size);
- for (size_t k = 0; k < size; k++) {
- attr_float[offset + k] = data[k];
- }
- attr_float_offset += size;
- }
- else if (mattr->type == TypeFloat2) {
- float2 *data = mattr->data_float2();
- offset = attr_float2_offset;
-
- assert(attr_float2.size() >= offset + size);
- for (size_t k = 0; k < size; k++) {
- attr_float2[offset + k] = data[k];
- }
- attr_float2_offset += size;
- }
- else if (mattr->type == TypeDesc::TypeMatrix) {
- Transform *tfm = mattr->data_transform();
- offset = attr_float3_offset;
-
- assert(attr_float3.size() >= offset + size * 3);
- for (size_t k = 0; k < size * 3; k++) {
- attr_float3[offset + k] = (&tfm->x)[k];
- }
- attr_float3_offset += size * 3;
- }
- else {
- float4 *data = mattr->data_float4();
- offset = attr_float3_offset;
-
- assert(attr_float3.size() >= offset + size);
- for (size_t k = 0; k < size; k++) {
- attr_float3[offset + k] = data[k];
- }
- attr_float3_offset += size;
- }
-
- /* mesh vertex/curve index is global, not per object, so we sneak
- * a correction for that in here */
- if (mesh->subdivision_type == Mesh::SUBDIVISION_CATMULL_CLARK &&
- desc.flags & ATTR_SUBDIVIDED) {
- /* indices for subdivided attributes are retrieved
- * from patch table so no need for correction here*/
- }
- else if (element == ATTR_ELEMENT_VERTEX)
- offset -= mesh->vert_offset;
- else if (element == ATTR_ELEMENT_VERTEX_MOTION)
- offset -= mesh->vert_offset;
- else if (element == ATTR_ELEMENT_FACE) {
- if (prim == ATTR_PRIM_TRIANGLE)
- offset -= mesh->tri_offset;
- else
- offset -= mesh->face_offset;
- }
- else if (element == ATTR_ELEMENT_CORNER || element == ATTR_ELEMENT_CORNER_BYTE) {
- if (prim == ATTR_PRIM_TRIANGLE)
- offset -= 3 * mesh->tri_offset;
- else
- offset -= mesh->corner_offset;
- }
- else if (element == ATTR_ELEMENT_CURVE)
- offset -= mesh->curve_offset;
- else if (element == ATTR_ELEMENT_CURVE_KEY)
- offset -= mesh->curvekey_offset;
- else if (element == ATTR_ELEMENT_CURVE_KEY_MOTION)
- offset -= mesh->curvekey_offset;
- }
- else {
- /* attribute not found */
- desc.element = ATTR_ELEMENT_NONE;
- desc.offset = 0;
- }
-}
-
-void MeshManager::device_update_attributes(Device *device,
- DeviceScene *dscene,
- Scene *scene,
- Progress &progress)
-{
- progress.set_status("Updating Mesh", "Computing attributes");
-
- /* gather per mesh requested attributes. as meshes may have multiple
- * shaders assigned, this merges the requested attributes that have
- * been set per shader by the shader manager */
- vector<AttributeRequestSet> mesh_attributes(scene->meshes.size());
-
- for (size_t i = 0; i < scene->meshes.size(); i++) {
- Mesh *mesh = scene->meshes[i];
-
- scene->need_global_attributes(mesh_attributes[i]);
-
- foreach (Shader *shader, mesh->used_shaders) {
- mesh_attributes[i].add(shader->attributes);
- }
- }
-
- /* mesh attribute are stored in a single array per data type. here we fill
- * those arrays, and set the offset and element type to create attribute
- * maps next */
-
- /* Pre-allocate attributes to avoid arrays re-allocation which would
- * take 2x of overall attribute memory usage.
- */
- size_t attr_float_size = 0;
- size_t attr_float2_size = 0;
- size_t attr_float3_size = 0;
- size_t attr_uchar4_size = 0;
- for (size_t i = 0; i < scene->meshes.size(); i++) {
- Mesh *mesh = scene->meshes[i];
- AttributeRequestSet &attributes = mesh_attributes[i];
- foreach (AttributeRequest &req, attributes.requests) {
- Attribute *triangle_mattr = mesh->attributes.find(req);
- Attribute *curve_mattr = mesh->curve_attributes.find(req);
- Attribute *subd_mattr = mesh->subd_attributes.find(req);
-
- update_attribute_element_size(mesh,
- triangle_mattr,
- ATTR_PRIM_TRIANGLE,
- &attr_float_size,
- &attr_float2_size,
- &attr_float3_size,
- &attr_uchar4_size);
- update_attribute_element_size(mesh,
- curve_mattr,
- ATTR_PRIM_CURVE,
- &attr_float_size,
- &attr_float2_size,
- &attr_float3_size,
- &attr_uchar4_size);
- update_attribute_element_size(mesh,
- subd_mattr,
- ATTR_PRIM_SUBD,
- &attr_float_size,
- &attr_float2_size,
- &attr_float3_size,
- &attr_uchar4_size);
- }
- }
-
- dscene->attributes_float.alloc(attr_float_size);
- dscene->attributes_float2.alloc(attr_float2_size);
- dscene->attributes_float3.alloc(attr_float3_size);
- dscene->attributes_uchar4.alloc(attr_uchar4_size);
-
- size_t attr_float_offset = 0;
- size_t attr_float2_offset = 0;
- size_t attr_float3_offset = 0;
- size_t attr_uchar4_offset = 0;
-
- /* Fill in attributes. */
- for (size_t i = 0; i < scene->meshes.size(); i++) {
- Mesh *mesh = scene->meshes[i];
- AttributeRequestSet &attributes = mesh_attributes[i];
-
- /* todo: we now store std and name attributes from requests even if
- * they actually refer to the same mesh attributes, optimize */
- foreach (AttributeRequest &req, attributes.requests) {
- Attribute *triangle_mattr = mesh->attributes.find(req);
- Attribute *curve_mattr = mesh->curve_attributes.find(req);
- Attribute *subd_mattr = mesh->subd_attributes.find(req);
-
- update_attribute_element_offset(mesh,
- dscene->attributes_float,
- attr_float_offset,
- dscene->attributes_float2,
- attr_float2_offset,
- dscene->attributes_float3,
- attr_float3_offset,
- dscene->attributes_uchar4,
- attr_uchar4_offset,
- triangle_mattr,
- ATTR_PRIM_TRIANGLE,
- req.triangle_type,
- req.triangle_desc);
-
- update_attribute_element_offset(mesh,
- dscene->attributes_float,
- attr_float_offset,
- dscene->attributes_float2,
- attr_float2_offset,
- dscene->attributes_float3,
- attr_float3_offset,
- dscene->attributes_uchar4,
- attr_uchar4_offset,
- curve_mattr,
- ATTR_PRIM_CURVE,
- req.curve_type,
- req.curve_desc);
-
- update_attribute_element_offset(mesh,
- dscene->attributes_float,
- attr_float_offset,
- dscene->attributes_float2,
- attr_float2_offset,
- dscene->attributes_float3,
- attr_float3_offset,
- dscene->attributes_uchar4,
- attr_uchar4_offset,
- subd_mattr,
- ATTR_PRIM_SUBD,
- req.subd_type,
- req.subd_desc);
-
- if (progress.get_cancel())
- return;
- }
- }
-
- /* create attribute lookup maps */
- if (scene->shader_manager->use_osl())
- update_osl_attributes(device, scene, mesh_attributes);
-
- update_svm_attributes(device, dscene, scene, mesh_attributes);
-
- if (progress.get_cancel())
- return;
-
- /* copy to device */
- progress.set_status("Updating Mesh", "Copying Attributes to device");
-
- if (dscene->attributes_float.size()) {
- dscene->attributes_float.copy_to_device();
- }
- if (dscene->attributes_float2.size()) {
- dscene->attributes_float2.copy_to_device();
- }
- if (dscene->attributes_float3.size()) {
- dscene->attributes_float3.copy_to_device();
- }
- if (dscene->attributes_uchar4.size()) {
- dscene->attributes_uchar4.copy_to_device();
- }
-
- if (progress.get_cancel())
- return;
-
- /* After mesh attributes and patch tables have been copied to device memory,
- * we need to update offsets in the objects. */
- scene->object_manager->device_update_mesh_offsets(device, dscene, scene);
-}
-
-void MeshManager::mesh_calc_offset(Scene *scene)
-{
- size_t vert_size = 0;
- size_t tri_size = 0;
-
- size_t curve_key_size = 0;
- size_t curve_size = 0;
-
- size_t patch_size = 0;
- size_t face_size = 0;
- size_t corner_size = 0;
-
- size_t prim_size = 0;
-
- foreach (Mesh *mesh, scene->meshes) {
- mesh->vert_offset = vert_size;
- mesh->tri_offset = tri_size;
-
- mesh->curvekey_offset = curve_key_size;
- mesh->curve_offset = curve_size;
-
- mesh->patch_offset = patch_size;
- mesh->face_offset = face_size;
- mesh->corner_offset = corner_size;
-
- vert_size += mesh->verts.size();
- tri_size += mesh->num_triangles();
-
- curve_key_size += mesh->curve_keys.size();
- curve_size += mesh->num_curves();
-
- if (mesh->subd_faces.size()) {
- Mesh::SubdFace &last = mesh->subd_faces[mesh->subd_faces.size() - 1];
- patch_size += (last.ptex_offset + last.num_ptex_faces()) * 8;
-
- /* patch tables are stored in same array so include them in patch_size */
- if (mesh->patch_table) {
- mesh->patch_table_offset = patch_size;
- patch_size += mesh->patch_table->total_size();
- }
- }
- face_size += mesh->subd_faces.size();
- corner_size += mesh->subd_face_corners.size();
-
- mesh->prim_offset = prim_size;
- prim_size += mesh->num_primitives();
- }
-}
-
-void MeshManager::device_update_mesh(
- Device *, DeviceScene *dscene, Scene *scene, bool for_displacement, Progress &progress)
-{
- /* Count. */
- size_t vert_size = 0;
- size_t tri_size = 0;
-
- size_t curve_key_size = 0;
- size_t curve_size = 0;
-
- size_t patch_size = 0;
-
- foreach (Mesh *mesh, scene->meshes) {
- vert_size += mesh->verts.size();
- tri_size += mesh->num_triangles();
-
- curve_key_size += mesh->curve_keys.size();
- curve_size += mesh->num_curves();
-
- if (mesh->subd_faces.size()) {
- Mesh::SubdFace &last = mesh->subd_faces[mesh->subd_faces.size() - 1];
- patch_size += (last.ptex_offset + last.num_ptex_faces()) * 8;
-
- /* patch tables are stored in same array so include them in patch_size */
- if (mesh->patch_table) {
- mesh->patch_table_offset = patch_size;
- patch_size += mesh->patch_table->total_size();
- }
- }
- }
-
- /* Create mapping from triangle to primitive triangle array. */
- vector<uint> tri_prim_index(tri_size);
- if (for_displacement) {
- /* For displacement kernels we do some trickery to make them believe
- * we've got all required data ready. However, that data is different
- * from final render kernels since we don't have BVH yet, so can't
- * really use same semantic of arrays.
- */
- foreach (Mesh *mesh, scene->meshes) {
- for (size_t i = 0; i < mesh->num_triangles(); ++i) {
- tri_prim_index[i + mesh->tri_offset] = 3 * (i + mesh->tri_offset);
- }
- }
- }
- else {
- for (size_t i = 0; i < dscene->prim_index.size(); ++i) {
- if ((dscene->prim_type[i] & PRIMITIVE_ALL_TRIANGLE) != 0) {
- tri_prim_index[dscene->prim_index[i]] = dscene->prim_tri_index[i];
- }
- }
- }
-
- /* Fill in all the arrays. */
- if (tri_size != 0) {
- /* normals */
- progress.set_status("Updating Mesh", "Computing normals");
-
- uint *tri_shader = dscene->tri_shader.alloc(tri_size);
- float4 *vnormal = dscene->tri_vnormal.alloc(vert_size);
- uint4 *tri_vindex = dscene->tri_vindex.alloc(tri_size);
- uint *tri_patch = dscene->tri_patch.alloc(tri_size);
- float2 *tri_patch_uv = dscene->tri_patch_uv.alloc(vert_size);
-
- foreach (Mesh *mesh, scene->meshes) {
- mesh->pack_shaders(scene, &tri_shader[mesh->tri_offset]);
- mesh->pack_normals(&vnormal[mesh->vert_offset]);
- mesh->pack_verts(tri_prim_index,
- &tri_vindex[mesh->tri_offset],
- &tri_patch[mesh->tri_offset],
- &tri_patch_uv[mesh->vert_offset],
- mesh->vert_offset,
- mesh->tri_offset);
- if (progress.get_cancel())
- return;
- }
-
- /* vertex coordinates */
- progress.set_status("Updating Mesh", "Copying Mesh to device");
-
- dscene->tri_shader.copy_to_device();
- dscene->tri_vnormal.copy_to_device();
- dscene->tri_vindex.copy_to_device();
- dscene->tri_patch.copy_to_device();
- dscene->tri_patch_uv.copy_to_device();
- }
-
- if (curve_size != 0) {
- progress.set_status("Updating Mesh", "Copying Strands to device");
-
- float4 *curve_keys = dscene->curve_keys.alloc(curve_key_size);
- float4 *curves = dscene->curves.alloc(curve_size);
-
- foreach (Mesh *mesh, scene->meshes) {
- mesh->pack_curves(scene,
- &curve_keys[mesh->curvekey_offset],
- &curves[mesh->curve_offset],
- mesh->curvekey_offset);
- if (progress.get_cancel())
- return;
- }
-
- dscene->curve_keys.copy_to_device();
- dscene->curves.copy_to_device();
- }
-
- if (patch_size != 0) {
- progress.set_status("Updating Mesh", "Copying Patches to device");
-
- uint *patch_data = dscene->patches.alloc(patch_size);
-
- foreach (Mesh *mesh, scene->meshes) {
- mesh->pack_patches(&patch_data[mesh->patch_offset],
- mesh->vert_offset,
- mesh->face_offset,
- mesh->corner_offset);
-
- if (mesh->patch_table) {
- mesh->patch_table->copy_adjusting_offsets(&patch_data[mesh->patch_table_offset],
- mesh->patch_table_offset);
- }
-
- if (progress.get_cancel())
- return;
- }
-
- dscene->patches.copy_to_device();
- }
-
- if (for_displacement) {
- float4 *prim_tri_verts = dscene->prim_tri_verts.alloc(tri_size * 3);
- foreach (Mesh *mesh, scene->meshes) {
- for (size_t i = 0; i < mesh->num_triangles(); ++i) {
- Mesh::Triangle t = mesh->get_triangle(i);
- size_t offset = 3 * (i + mesh->tri_offset);
- prim_tri_verts[offset + 0] = float3_to_float4(mesh->verts[t.v[0]]);
- prim_tri_verts[offset + 1] = float3_to_float4(mesh->verts[t.v[1]]);
- prim_tri_verts[offset + 2] = float3_to_float4(mesh->verts[t.v[2]]);
- }
- }
- dscene->prim_tri_verts.copy_to_device();
- }
-}
-
-void MeshManager::device_update_bvh(Device *device,
- DeviceScene *dscene,
- Scene *scene,
- Progress &progress)
-{
- /* bvh build */
- progress.set_status("Updating Scene BVH", "Building");
-
- BVHParams bparams;
- bparams.top_level = true;
- bparams.bvh_layout = BVHParams::best_bvh_layout(scene->params.bvh_layout,
- device->get_bvh_layout_mask());
- bparams.use_spatial_split = scene->params.use_bvh_spatial_split;
- bparams.use_unaligned_nodes = dscene->data.bvh.have_curves &&
- scene->params.use_bvh_unaligned_nodes;
- bparams.num_motion_triangle_steps = scene->params.num_bvh_time_steps;
- bparams.num_motion_curve_steps = scene->params.num_bvh_time_steps;
- bparams.bvh_type = scene->params.bvh_type;
- bparams.curve_flags = dscene->data.curve.curveflags;
- bparams.curve_subdivisions = dscene->data.curve.subdivisions;
-
- VLOG(1) << "Using " << bvh_layout_name(bparams.bvh_layout) << " layout.";
-
-#ifdef WITH_EMBREE
- if (bparams.bvh_layout == BVH_LAYOUT_EMBREE) {
- if (dscene->data.bvh.scene) {
- BVHEmbree::destroy(dscene->data.bvh.scene);
- }
- }
-#endif
-
- BVH *bvh = BVH::create(bparams, scene->meshes, scene->objects);
- bvh->build(progress, &device->stats);
-
- if (progress.get_cancel()) {
-#ifdef WITH_EMBREE
- if (bparams.bvh_layout == BVH_LAYOUT_EMBREE) {
- if (dscene->data.bvh.scene) {
- BVHEmbree::destroy(dscene->data.bvh.scene);
- }
- }
-#endif
- delete bvh;
- return;
- }
-
- /* copy to device */
- progress.set_status("Updating Scene BVH", "Copying BVH to device");
-
- PackedBVH &pack = bvh->pack;
-
- if (pack.nodes.size()) {
- dscene->bvh_nodes.steal_data(pack.nodes);
- dscene->bvh_nodes.copy_to_device();
- }
- if (pack.leaf_nodes.size()) {
- dscene->bvh_leaf_nodes.steal_data(pack.leaf_nodes);
- dscene->bvh_leaf_nodes.copy_to_device();
- }
- if (pack.object_node.size()) {
- dscene->object_node.steal_data(pack.object_node);
- dscene->object_node.copy_to_device();
- }
- if (pack.prim_tri_index.size()) {
- dscene->prim_tri_index.steal_data(pack.prim_tri_index);
- dscene->prim_tri_index.copy_to_device();
- }
- if (pack.prim_tri_verts.size()) {
- dscene->prim_tri_verts.steal_data(pack.prim_tri_verts);
- dscene->prim_tri_verts.copy_to_device();
- }
- if (pack.prim_type.size()) {
- dscene->prim_type.steal_data(pack.prim_type);
- dscene->prim_type.copy_to_device();
- }
- if (pack.prim_visibility.size()) {
- dscene->prim_visibility.steal_data(pack.prim_visibility);
- dscene->prim_visibility.copy_to_device();
- }
- if (pack.prim_index.size()) {
- dscene->prim_index.steal_data(pack.prim_index);
- dscene->prim_index.copy_to_device();
- }
- if (pack.prim_object.size()) {
- dscene->prim_object.steal_data(pack.prim_object);
- dscene->prim_object.copy_to_device();
- }
- if (pack.prim_time.size()) {
- dscene->prim_time.steal_data(pack.prim_time);
- dscene->prim_time.copy_to_device();
- }
-
- dscene->data.bvh.root = pack.root_index;
- dscene->data.bvh.bvh_layout = bparams.bvh_layout;
- dscene->data.bvh.use_bvh_steps = (scene->params.num_bvh_time_steps != 0);
-
- bvh->copy_to_device(progress, dscene);
-
- delete bvh;
-}
-
-void MeshManager::device_update_preprocess(Device *device, Scene *scene, Progress &progress)
-{
- if (!need_update && !need_flags_update) {
- return;
- }
-
- progress.set_status("Updating Meshes Flags");
-
- /* Update flags. */
- bool volume_images_updated = false;
-
- foreach (Mesh *mesh, scene->meshes) {
- mesh->has_volume = false;
-
- foreach (const Shader *shader, mesh->used_shaders) {
- if (shader->has_volume) {
- mesh->has_volume = true;
- }
- if (shader->has_surface_bssrdf) {
- mesh->has_surface_bssrdf = true;
- }
- }
-
- if (need_update && mesh->has_volume) {
- /* Create volume meshes if there is voxel data. */
- if (mesh->has_voxel_attributes()) {
- if (!volume_images_updated) {
- progress.set_status("Updating Meshes Volume Bounds");
- device_update_volume_images(device, scene, progress);
- volume_images_updated = true;
- }
-
- create_volume_mesh(scene, mesh, progress);
- }
- }
- }
-
- need_flags_update = false;
-}
-
-void MeshManager::device_update_displacement_images(Device *device,
- Scene *scene,
- Progress &progress)
-{
- progress.set_status("Updating Displacement Images");
- TaskPool pool;
- ImageManager *image_manager = scene->image_manager;
- set<int> bump_images;
- foreach (Mesh *mesh, scene->meshes) {
- if (mesh->need_update) {
- foreach (Shader *shader, mesh->used_shaders) {
- if (!shader->has_displacement || shader->displacement_method == DISPLACE_BUMP) {
- continue;
- }
- foreach (ShaderNode *node, shader->graph->nodes) {
- if (node->special_type != SHADER_SPECIAL_TYPE_IMAGE_SLOT) {
- continue;
- }
-
- ImageSlotTextureNode *image_node = static_cast<ImageSlotTextureNode *>(node);
- foreach (int slot, image_node->slots) {
- if (slot != -1) {
- bump_images.insert(slot);
- }
- }
- }
- }
- }
- }
- foreach (int slot, bump_images) {
- pool.push(function_bind(
- &ImageManager::device_update_slot, image_manager, device, scene, slot, &progress));
- }
- pool.wait_work();
-}
-
-void MeshManager::device_update_volume_images(Device *device, Scene *scene, Progress &progress)
-{
- progress.set_status("Updating Volume Images");
- TaskPool pool;
- ImageManager *image_manager = scene->image_manager;
- set<int> volume_images;
-
- foreach (Mesh *mesh, scene->meshes) {
- if (!mesh->need_update) {
- continue;
- }
-
- foreach (Attribute &attr, mesh->attributes.attributes) {
- if (attr.element != ATTR_ELEMENT_VOXEL) {
- continue;
- }
-
- VoxelAttribute *voxel = attr.data_voxel();
-
- if (voxel->slot != -1) {
- volume_images.insert(voxel->slot);
- }
- }
- }
-
- foreach (int slot, volume_images) {
- pool.push(function_bind(
- &ImageManager::device_update_slot, image_manager, device, scene, slot, &progress));
- }
- pool.wait_work();
-}
-
-void MeshManager::device_update(Device *device,
- DeviceScene *dscene,
- Scene *scene,
- Progress &progress)
-{
- if (!need_update)
- return;
-
- VLOG(1) << "Total " << scene->meshes.size() << " meshes.";
-
- bool true_displacement_used = false;
- size_t total_tess_needed = 0;
-
- foreach (Mesh *mesh, scene->meshes) {
- foreach (Shader *shader, mesh->used_shaders) {
- if (shader->need_update_mesh)
- mesh->need_update = true;
- }
-
- if (mesh->need_update) {
- /* Update normals. */
- mesh->add_face_normals();
- mesh->add_vertex_normals();
-
- if (mesh->need_attribute(scene, ATTR_STD_POSITION_UNDISPLACED)) {
- mesh->add_undisplaced();
- }
-
- /* Test if we need tessellation. */
- if (mesh->subdivision_type != Mesh::SUBDIVISION_NONE && mesh->num_subd_verts == 0 &&
- mesh->subd_params) {
- total_tess_needed++;
- }
-
- /* Test if we need displacement. */
- if (mesh->has_true_displacement()) {
- true_displacement_used = true;
- }
-
- if (progress.get_cancel())
- return;
- }
- }
-
- /* Tessellate meshes that are using subdivision */
- if (total_tess_needed) {
- Camera *dicing_camera = scene->dicing_camera;
- dicing_camera->update(scene);
-
- size_t i = 0;
- foreach (Mesh *mesh, scene->meshes) {
- if (mesh->need_update && mesh->subdivision_type != Mesh::SUBDIVISION_NONE &&
- mesh->num_subd_verts == 0 && mesh->subd_params) {
- string msg = "Tessellating ";
- if (mesh->name == "")
- msg += string_printf("%u/%u", (uint)(i + 1), (uint)total_tess_needed);
- else
- msg += string_printf(
- "%s %u/%u", mesh->name.c_str(), (uint)(i + 1), (uint)total_tess_needed);
-
- progress.set_status("Updating Mesh", msg);
-
- mesh->subd_params->camera = dicing_camera;
- DiagSplit dsplit(*mesh->subd_params);
- mesh->tessellate(&dsplit);
-
- i++;
-
- if (progress.get_cancel())
- return;
- }
- }
- }
-
- /* Update images needed for true displacement. */
- bool old_need_object_flags_update = false;
- if (true_displacement_used) {
- VLOG(1) << "Updating images used for true displacement.";
- device_update_displacement_images(device, scene, progress);
- old_need_object_flags_update = scene->object_manager->need_flags_update;
- scene->object_manager->device_update_flags(device, dscene, scene, progress, false);
- }
-
- /* Device update. */
- device_free(device, dscene);
-
- mesh_calc_offset(scene);
- if (true_displacement_used) {
- device_update_mesh(device, dscene, scene, true, progress);
- }
- if (progress.get_cancel())
- return;
-
- device_update_attributes(device, dscene, scene, progress);
- if (progress.get_cancel())
- return;
-
- /* Update displacement. */
- bool displacement_done = false;
- size_t num_bvh = 0;
- BVHLayout bvh_layout = BVHParams::best_bvh_layout(scene->params.bvh_layout,
- device->get_bvh_layout_mask());
-
- foreach (Mesh *mesh, scene->meshes) {
- if (mesh->need_update) {
- if (displace(device, dscene, scene, mesh, progress)) {
- displacement_done = true;
- }
-
- if (mesh->need_build_bvh(bvh_layout)) {
- num_bvh++;
- }
- }
-
- if (progress.get_cancel())
- return;
- }
-
- /* Device re-update after displacement. */
- if (displacement_done) {
- device_free(device, dscene);
-
- device_update_attributes(device, dscene, scene, progress);
- if (progress.get_cancel())
- return;
- }
-
- TaskPool pool;
-
- size_t i = 0;
- foreach (Mesh *mesh, scene->meshes) {
- if (mesh->need_update) {
- pool.push(function_bind(
- &Mesh::compute_bvh, mesh, device, dscene, &scene->params, &progress, i, num_bvh));
- if (mesh->need_build_bvh(bvh_layout)) {
- i++;
- }
- }
- }
-
- TaskPool::Summary summary;
- pool.wait_work(&summary);
- VLOG(2) << "Objects BVH build pool statistics:\n" << summary.full_report();
-
- foreach (Shader *shader, scene->shaders) {
- shader->need_update_mesh = false;
- }
-
- Scene::MotionType need_motion = scene->need_motion();
- bool motion_blur = need_motion == Scene::MOTION_BLUR;
-
- /* Update objects. */
- vector<Object *> volume_objects;
- foreach (Object *object, scene->objects) {
- object->compute_bounds(motion_blur);
- }
-
- if (progress.get_cancel())
- return;
-
- device_update_bvh(device, dscene, scene, progress);
- if (progress.get_cancel())
- return;
-
- device_update_mesh(device, dscene, scene, false, progress);
- if (progress.get_cancel())
- return;
-
- need_update = false;
-
- if (true_displacement_used) {
- /* Re-tag flags for update, so they're re-evaluated
- * for meshes with correct bounding boxes.
- *
- * This wouldn't cause wrong results, just true
- * displacement might be less optimal ot calculate.
- */
- scene->object_manager->need_flags_update = old_need_object_flags_update;
- }
-}
-
-void MeshManager::device_free(Device *device, DeviceScene *dscene)
-{
- dscene->bvh_nodes.free();
- dscene->bvh_leaf_nodes.free();
- dscene->object_node.free();
- dscene->prim_tri_verts.free();
- dscene->prim_tri_index.free();
- dscene->prim_type.free();
- dscene->prim_visibility.free();
- dscene->prim_index.free();
- dscene->prim_object.free();
- dscene->prim_time.free();
- dscene->tri_shader.free();
- dscene->tri_vnormal.free();
- dscene->tri_vindex.free();
- dscene->tri_patch.free();
- dscene->tri_patch_uv.free();
- dscene->curves.free();
- dscene->curve_keys.free();
- dscene->patches.free();
- dscene->attributes_map.free();
- dscene->attributes_float.free();
- dscene->attributes_float2.free();
- dscene->attributes_float3.free();
- dscene->attributes_uchar4.free();
-
- /* Signal for shaders like displacement not to do ray tracing. */
- dscene->data.bvh.bvh_layout = BVH_LAYOUT_NONE;
-
-#ifdef WITH_OSL
- OSLGlobals *og = (OSLGlobals *)device->osl_memory();
-
- if (og) {
- og->object_name_map.clear();
- og->attribute_map.clear();
- og->object_names.clear();
- }
-#else
- (void)device;
-#endif
-}
-
-void MeshManager::tag_update(Scene *scene)
-{
- need_update = true;
- scene->object_manager->need_update = true;
-}
-
-void MeshManager::collect_statistics(const Scene *scene, RenderStats *stats)
-{
- foreach (Mesh *mesh, scene->meshes) {
- stats->mesh.geometry.add_entry(
- NamedSizeEntry(string(mesh->name.c_str()), mesh->get_total_size_in_bytes()));
- }
-}
-
-bool Mesh::need_attribute(Scene *scene, AttributeStandard std)
-{
- if (std == ATTR_STD_NONE)
- return false;
-
- if (scene->need_global_attribute(std))
- return true;
-
- foreach (Shader *shader, used_shaders)
- if (shader->attributes.find(std))
- return true;
-
- return false;
-}
-
-bool Mesh::need_attribute(Scene * /*scene*/, ustring name)
-{
- if (name == ustring())
- return false;
-
- foreach (Shader *shader, used_shaders)
- if (shader->attributes.find(name))
- return true;
-
- return false;
-}
-
CCL_NAMESPACE_END
diff --git a/intern/cycles/render/mesh.h b/intern/cycles/render/mesh.h
index c5be0ba60b9..d0cf4d557aa 100644
--- a/intern/cycles/render/mesh.h
+++ b/intern/cycles/render/mesh.h
@@ -21,6 +21,7 @@
#include "bvh/bvh_params.h"
#include "render/attribute.h"
+#include "render/geometry.h"
#include "render/shader.h"
#include "util/util_array.h"
@@ -29,7 +30,6 @@
#include "util/util_map.h"
#include "util/util_param.h"
#include "util/util_set.h"
-#include "util/util_transform.h"
#include "util/util_types.h"
#include "util/util_vector.h"
@@ -51,7 +51,7 @@ struct PackedPatchTable;
/* Mesh */
-class Mesh : public Node {
+class Mesh : public Geometry {
public:
NODE_DECLARE
@@ -91,94 +91,6 @@ class Mesh : public Node {
return triangles.size() / 3;
}
- /* Mesh Curve */
- struct Curve {
- int first_key;
- int num_keys;
-
- int num_segments() const
- {
- return num_keys - 1;
- }
-
- void bounds_grow(const int k,
- const float3 *curve_keys,
- const float *curve_radius,
- BoundBox &bounds) const;
- void bounds_grow(float4 keys[4], BoundBox &bounds) const;
- void bounds_grow(const int k,
- const float3 *curve_keys,
- const float *curve_radius,
- const Transform &aligned_space,
- BoundBox &bounds) const;
-
- void motion_keys(const float3 *curve_keys,
- const float *curve_radius,
- const float3 *key_steps,
- size_t num_curve_keys,
- size_t num_steps,
- float time,
- size_t k0,
- size_t k1,
- float4 r_keys[2]) const;
- void cardinal_motion_keys(const float3 *curve_keys,
- const float *curve_radius,
- const float3 *key_steps,
- size_t num_curve_keys,
- size_t num_steps,
- float time,
- size_t k0,
- size_t k1,
- size_t k2,
- size_t k3,
- float4 r_keys[4]) const;
-
- void keys_for_step(const float3 *curve_keys,
- const float *curve_radius,
- const float3 *key_steps,
- size_t num_curve_keys,
- size_t num_steps,
- size_t step,
- size_t k0,
- size_t k1,
- float4 r_keys[2]) const;
- void cardinal_keys_for_step(const float3 *curve_keys,
- const float *curve_radius,
- const float3 *key_steps,
- size_t num_curve_keys,
- size_t num_steps,
- size_t step,
- size_t k0,
- size_t k1,
- size_t k2,
- size_t k3,
- float4 r_keys[4]) const;
- };
-
- Curve get_curve(size_t i) const
- {
- int first = curve_first_key[i];
- int next_first = (i + 1 < curve_first_key.size()) ? curve_first_key[i + 1] : curve_keys.size();
-
- Curve curve = {first, next_first - first};
- return curve;
- }
-
- size_t num_curves() const
- {
- return curve_first_key.size();
- }
-
- size_t num_segments() const
- {
- return curve_keys.size() - curve_first_key.size();
- }
-
- size_t num_primitives() const
- {
- return num_triangles() + num_segments();
- }
-
/* Mesh SubdFace */
struct SubdFace {
int start_corner;
@@ -212,14 +124,6 @@ class Mesh : public Node {
SubdivisionType subdivision_type;
/* Mesh Data */
- enum GeometryFlags {
- GEOMETRY_NONE = 0,
- GEOMETRY_TRIANGLES = (1 << 0),
- GEOMETRY_CURVES = (1 << 1),
- };
- int geometry_flags; /* used to distinguish meshes with no verts
- and meshed for which geometry is not created */
-
array<int> triangles;
array<float3> verts;
array<int> shader;
@@ -229,14 +133,9 @@ class Mesh : public Node {
array<int> triangle_patch; /* must be < 0 for non subd triangles */
array<float2> vert_patch_uv;
- float volume_isovalue;
- bool has_volume; /* Set in the device_update_flags(). */
- bool has_surface_bssrdf; /* Set in the device_update_flags(). */
-
- array<float3> curve_keys;
- array<float> curve_radius;
- array<int> curve_first_key;
- array<int> curve_shader;
+ float volume_clipping;
+ float volume_step_size;
+ bool volume_object_space;
array<SubdFace> subd_faces;
array<int> subd_face_corners;
@@ -246,42 +145,18 @@ class Mesh : public Node {
SubdParams *subd_params;
- vector<Shader *> used_shaders;
- AttributeSet attributes;
- AttributeSet curve_attributes;
AttributeSet subd_attributes;
- BoundBox bounds;
- bool transform_applied;
- bool transform_negative_scaled;
- Transform transform_normal;
-
PackedPatchTable *patch_table;
- uint motion_steps;
- bool use_motion_blur;
-
- /* Update Flags */
- bool need_update;
- bool need_update_rebuild;
-
/* BVH */
- BVH *bvh;
- size_t tri_offset;
size_t vert_offset;
- size_t curve_offset;
- size_t curvekey_offset;
-
size_t patch_offset;
size_t patch_table_offset;
size_t face_offset;
size_t corner_offset;
- size_t attr_map_offset;
-
- size_t prim_offset;
-
size_t num_subd_verts;
private:
@@ -289,7 +164,7 @@ class Mesh : public Node {
unordered_multimap<int, int>
vert_stitching_map; /* stitching index -> multiple real vert indices */
friend class DiagSplit;
- friend class MeshManager;
+ friend class GeometryManager;
public:
/* Functions */
@@ -298,24 +173,24 @@ class Mesh : public Node {
void resize_mesh(int numverts, int numfaces);
void reserve_mesh(int numverts, int numfaces);
- void resize_curves(int numcurves, int numkeys);
- void reserve_curves(int numcurves, int numkeys);
void resize_subd_faces(int numfaces, int num_ngons, int numcorners);
void reserve_subd_faces(int numfaces, int num_ngons, int numcorners);
- void clear(bool preserve_voxel_data = false);
+ void clear(bool preserve_voxel_data);
+ void clear() override;
void add_vertex(float3 P);
void add_vertex_slow(float3 P);
void add_triangle(int v0, int v1, int v2, int shader, bool smooth);
- void add_curve_key(float3 loc, float radius);
- void add_curve(int first_key, int shader);
void add_subd_face(int *corners, int num_corners, int shader_, bool smooth_);
- void compute_bounds();
+ void copy_center_to_motion_step(const int motion_step);
+
+ void compute_bounds() override;
+ void apply_transform(const Transform &tfm, const bool apply_to_motion) override;
void add_face_normals();
void add_vertex_normals();
void add_undisplaced();
- void get_uv_tiles(ustring map, unordered_set<int> &tiles);
+ void get_uv_tiles(ustring map, unordered_set<int> &tiles) override;
void pack_shaders(Scene *scene, uint *shader);
void pack_normals(float4 *vnormal);
@@ -325,103 +200,11 @@ class Mesh : public Node {
float2 *tri_patch_uv,
size_t vert_offset,
size_t tri_offset);
- void pack_curves(Scene *scene, float4 *curve_key_co, float4 *curve_data, size_t curvekey_offset);
void pack_patches(uint *patch_data, uint vert_offset, uint face_offset, uint corner_offset);
- void compute_bvh(Device *device,
- DeviceScene *dscene,
- SceneParams *params,
- Progress *progress,
- int n,
- int total);
-
- bool need_attribute(Scene *scene, AttributeStandard std);
- bool need_attribute(Scene *scene, ustring name);
-
- void tag_update(Scene *scene, bool rebuild);
-
- bool has_motion_blur() const;
- bool has_true_displacement() const;
- bool has_voxel_attributes() const;
-
- /* Convert between normalized -1..1 motion time and index
- * in the VERTEX_MOTION attribute. */
- float motion_time(int step) const;
- int motion_step(float time) const;
-
- /* Check whether the mesh should have own BVH built separately. Briefly,
- * own BVH is needed for mesh, if:
- *
- * - It is instanced multiple times, so each instance object should share the
- * same BVH tree.
- * - Special ray intersection is needed, for example to limit subsurface rays
- * to only the mesh itself.
- * - The BVH layout requires the top level to only contain instances.
- */
- bool need_build_bvh(BVHLayout layout) const;
-
- /* Check if the mesh should be treated as instanced. */
- bool is_instanced() const;
-
void tessellate(DiagSplit *split);
};
-/* Mesh Manager */
-
-class MeshManager {
- public:
- bool need_update;
- bool need_flags_update;
-
- MeshManager();
- ~MeshManager();
-
- bool displace(Device *device, DeviceScene *dscene, Scene *scene, Mesh *mesh, Progress &progress);
-
- /* attributes */
- void update_osl_attributes(Device *device,
- Scene *scene,
- vector<AttributeRequestSet> &mesh_attributes);
- void update_svm_attributes(Device *device,
- DeviceScene *dscene,
- Scene *scene,
- vector<AttributeRequestSet> &mesh_attributes);
-
- void device_update_preprocess(Device *device, Scene *scene, Progress &progress);
- void device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress &progress);
-
- void device_free(Device *device, DeviceScene *dscene);
-
- void tag_update(Scene *scene);
-
- void create_volume_mesh(Scene *scene, Mesh *mesh, Progress &progress);
-
- void collect_statistics(const Scene *scene, RenderStats *stats);
-
- protected:
- /* Calculate verts/triangles/curves offsets in global arrays. */
- void mesh_calc_offset(Scene *scene);
-
- void device_update_object(Device *device, DeviceScene *dscene, Scene *scene, Progress &progress);
-
- void device_update_mesh(Device *device,
- DeviceScene *dscene,
- Scene *scene,
- bool for_displacement,
- Progress &progress);
-
- void device_update_attributes(Device *device,
- DeviceScene *dscene,
- Scene *scene,
- Progress &progress);
-
- void device_update_bvh(Device *device, DeviceScene *dscene, Scene *scene, Progress &progress);
-
- void device_update_displacement_images(Device *device, Scene *scene, Progress &progress);
-
- void device_update_volume_images(Device *device, Scene *scene, Progress &progress);
-};
-
CCL_NAMESPACE_END
#endif /* __MESH_H__ */
diff --git a/intern/cycles/render/mesh_displace.cpp b/intern/cycles/render/mesh_displace.cpp
index 6a6c2fbb3eb..467810f9273 100644
--- a/intern/cycles/render/mesh_displace.cpp
+++ b/intern/cycles/render/mesh_displace.cpp
@@ -43,7 +43,7 @@ static float3 compute_face_normal(const Mesh::Triangle &t, float3 *verts)
return norm / normlen;
}
-bool MeshManager::displace(
+bool GeometryManager::displace(
Device *device, DeviceScene *dscene, Scene *scene, Mesh *mesh, Progress &progress)
{
/* verify if we have a displacement shader */
@@ -58,7 +58,7 @@ bool MeshManager::displace(
size_t object_index = OBJECT_NONE;
for (size_t i = 0; i < scene->objects.size(); i++) {
- if (scene->objects[i]->mesh == mesh) {
+ if (scene->objects[i]->geometry == mesh) {
object_index = i;
break;
}
@@ -91,7 +91,7 @@ bool MeshManager::displace(
/* set up object, primitive and barycentric coordinates */
int object = object_index;
- int prim = mesh->tri_offset + i;
+ int prim = mesh->prim_offset + i;
float u, v;
switch (j) {
diff --git a/intern/cycles/render/mesh_subdivision.cpp b/intern/cycles/render/mesh_subdivision.cpp
index 40dd658eadd..3d72b2fab91 100644
--- a/intern/cycles/render/mesh_subdivision.cpp
+++ b/intern/cycles/render/mesh_subdivision.cpp
@@ -14,16 +14,16 @@
* limitations under the License.
*/
-#include "render/mesh.h"
#include "render/attribute.h"
#include "render/camera.h"
+#include "render/mesh.h"
-#include "subd/subd_split.h"
#include "subd/subd_patch.h"
#include "subd/subd_patch_table.h"
+#include "subd/subd_split.h"
-#include "util/util_foreach.h"
#include "util/util_algorithm.h"
+#include "util/util_foreach.h"
#include "util/util_hash.h"
CCL_NAMESPACE_BEGIN
@@ -32,10 +32,10 @@ CCL_NAMESPACE_BEGIN
CCL_NAMESPACE_END
-# include <opensubdiv/far/topologyRefinerFactory.h>
-# include <opensubdiv/far/primvarRefiner.h>
-# include <opensubdiv/far/patchTableFactory.h>
# include <opensubdiv/far/patchMap.h>
+# include <opensubdiv/far/patchTableFactory.h>
+# include <opensubdiv/far/primvarRefiner.h>
+# include <opensubdiv/far/topologyRefinerFactory.h>
/* specializations of TopologyRefinerFactory for ccl::Mesh */
diff --git a/intern/cycles/render/mesh_volume.cpp b/intern/cycles/render/mesh_volume.cpp
index f451b58e92a..d73ba3b06dd 100644
--- a/intern/cycles/render/mesh_volume.cpp
+++ b/intern/cycles/render/mesh_volume.cpp
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include "render/mesh.h"
#include "render/attribute.h"
+#include "render/mesh.h"
#include "render/scene.h"
#include "util/util_foreach.h"
@@ -362,7 +362,7 @@ struct VoxelAttributeGrid {
int channels;
};
-void MeshManager::create_volume_mesh(Scene *scene, Mesh *mesh, Progress &progress)
+void GeometryManager::create_volume_mesh(Mesh *mesh, Progress &progress)
{
string msg = string_printf("Computing Volume Mesh %s", mesh->name.c_str());
progress.set_status("Updating Mesh", msg);
@@ -373,13 +373,15 @@ void MeshManager::create_volume_mesh(Scene *scene, Mesh *mesh, Progress &progres
VolumeParams volume_params;
volume_params.resolution = make_int3(0, 0, 0);
+ Transform transform = transform_identity();
+
foreach (Attribute &attr, mesh->attributes.attributes) {
if (attr.element != ATTR_ELEMENT_VOXEL) {
continue;
}
- VoxelAttribute *voxel = attr.data_voxel();
- device_memory *image_memory = scene->image_manager->image_memory(voxel->slot);
+ ImageHandle &handle = attr.data_voxel();
+ device_texture *image_memory = handle.image_memory();
int3 resolution = make_int3(
image_memory->data_width, image_memory->data_height, image_memory->data_depth);
@@ -387,14 +389,20 @@ void MeshManager::create_volume_mesh(Scene *scene, Mesh *mesh, Progress &progres
volume_params.resolution = resolution;
}
else if (volume_params.resolution != resolution) {
- VLOG(1) << "Can't create volume mesh, all voxel grid resolutions must be equal\n";
- return;
+ /* TODO: support this as it's common for OpenVDB. */
+ VLOG(1) << "Can't create accurate volume mesh, all voxel grid resolutions must be equal\n";
+ continue;
}
VoxelAttributeGrid voxel_grid;
voxel_grid.data = static_cast<float *>(image_memory->host_pointer);
voxel_grid.channels = image_memory->data_elements;
voxel_grids.push_back(voxel_grid);
+
+ /* TODO: support multiple transforms. */
+ if (image_memory->info.use_transform_3d) {
+ transform = image_memory->info.transform_3d;
+ }
}
if (voxel_grids.empty()) {
@@ -427,17 +435,14 @@ void MeshManager::create_volume_mesh(Scene *scene, Mesh *mesh, Progress &progres
}
/* Compute start point and cell size from transform. */
- Attribute *attr = mesh->attributes.find(ATTR_STD_GENERATED_TRANSFORM);
const int3 resolution = volume_params.resolution;
float3 start_point = make_float3(0.0f, 0.0f, 0.0f);
float3 cell_size = make_float3(1.0f / resolution.x, 1.0f / resolution.y, 1.0f / resolution.z);
- if (attr) {
- const Transform *tfm = attr->data_transform();
- const Transform itfm = transform_inverse(*tfm);
- start_point = transform_point(&itfm, start_point);
- cell_size = transform_direction(&itfm, cell_size);
- }
+ /* TODO: support arbitrary transforms, not just scale + translate. */
+ const Transform itfm = transform_inverse(transform);
+ start_point = transform_point(&itfm, start_point);
+ cell_size = transform_direction(&itfm, cell_size);
volume_params.start_point = start_point;
volume_params.cell_size = cell_size;
@@ -445,7 +450,7 @@ void MeshManager::create_volume_mesh(Scene *scene, Mesh *mesh, Progress &progres
/* Build bounding mesh around non-empty volume cells. */
VolumeMeshBuilder builder(&volume_params);
- const float isovalue = mesh->volume_isovalue;
+ const float clipping = mesh->volume_clipping;
for (int z = 0; z < resolution.z; ++z) {
for (int y = 0; y < resolution.y; ++y) {
@@ -457,7 +462,7 @@ void MeshManager::create_volume_mesh(Scene *scene, Mesh *mesh, Progress &progres
const int channels = voxel_grid.channels;
for (int c = 0; c < channels; c++) {
- if (voxel_grid.data[voxel_index * channels + c] >= isovalue) {
+ if (voxel_grid.data[voxel_index * channels + c] >= clipping) {
builder.add_node_with_padding(x, y, z);
break;
}
diff --git a/intern/cycles/render/nodes.cpp b/intern/cycles/render/nodes.cpp
index bdab2a99897..ac07d91c4ca 100644
--- a/intern/cycles/render/nodes.cpp
+++ b/intern/cycles/render/nodes.cpp
@@ -14,27 +14,28 @@
* limitations under the License.
*/
+#include "render/nodes.h"
#include "render/colorspace.h"
+#include "render/constant_fold.h"
#include "render/film.h"
#include "render/image.h"
#include "render/integrator.h"
#include "render/light.h"
#include "render/mesh.h"
-#include "render/nodes.h"
+#include "render/osl.h"
#include "render/scene.h"
#include "render/svm.h"
-#include "kernel/svm/svm_color_util.h"
-#include "kernel/svm/svm_ramp_util.h"
-#include "kernel/svm/svm_math_util.h"
-#include "kernel/svm/svm_mapping_util.h"
-#include "render/osl.h"
-#include "render/constant_fold.h"
-#include "util/util_sky_model.h"
#include "util/util_foreach.h"
#include "util/util_logging.h"
+#include "util/util_sky_model.h"
#include "util/util_transform.h"
+#include "kernel/svm/svm_color_util.h"
+#include "kernel/svm/svm_mapping_util.h"
+#include "kernel/svm/svm_math_util.h"
+#include "kernel/svm/svm_ramp_util.h"
+
CCL_NAMESPACE_BEGIN
/* Texture Mapping */
@@ -205,27 +206,6 @@ void TextureMapping::compile(OSLCompiler &compiler)
/* Image Texture */
-ImageSlotTextureNode::~ImageSlotTextureNode()
-{
- if (image_manager) {
- foreach (int slot, slots) {
- if (slot != -1) {
- image_manager->remove_image(slot);
- }
- }
- }
-}
-
-void ImageSlotTextureNode::add_image_user() const
-{
- /* Increase image user count for new node. */
- foreach (int slot, slots) {
- if (slot != -1) {
- image_manager->add_image_user(slot);
- }
- }
-}
-
NODE_DEFINE(ImageTextureNode)
{
NodeType *type = NodeType::add("image_texture", create, NodeType::SHADER);
@@ -275,18 +255,27 @@ NODE_DEFINE(ImageTextureNode)
ImageTextureNode::ImageTextureNode() : ImageSlotTextureNode(node_type)
{
- is_float = false;
- compress_as_srgb = false;
colorspace = u_colorspace_raw;
- builtin_data = NULL;
animated = false;
tiles.push_back(1001);
}
ShaderNode *ImageTextureNode::clone() const
{
- add_image_user();
- return new ImageTextureNode(*this);
+ ImageTextureNode *node = new ImageTextureNode(*this);
+ node->handle = handle;
+ return node;
+}
+
+ImageParams ImageTextureNode::image_params() const
+{
+ ImageParams params;
+ params.animated = animated;
+ params.interpolation = interpolation;
+ params.extension = extension;
+ params.alpha_type = alpha_type;
+ params.colorspace = colorspace;
+ return params;
}
void ImageTextureNode::cull_tiles(Scene *scene, ShaderGraph *graph)
@@ -333,10 +322,10 @@ void ImageTextureNode::cull_tiles(Scene *scene, ShaderGraph *graph)
/* TODO(lukas): This is quite inefficient. A fairly simple improvement would
* be to have a cache in each mesh that is indexed by attribute.
* Additionally, building a graph-to-meshes list once could help. */
- foreach (Mesh *mesh, scene->meshes) {
- foreach (Shader *shader, mesh->used_shaders) {
+ foreach (Geometry *geom, scene->geometry) {
+ foreach (Shader *shader, geom->used_shaders) {
if (shader->graph == graph) {
- mesh->get_uv_tiles(attribute, used_tiles);
+ geom->get_uv_tiles(attribute, used_tiles);
}
}
}
@@ -371,123 +360,80 @@ void ImageTextureNode::compile(SVMCompiler &compiler)
ShaderOutput *color_out = output("Color");
ShaderOutput *alpha_out = output("Alpha");
- image_manager = compiler.scene->image_manager;
- if (slots.empty()) {
+ if (handle.empty()) {
cull_tiles(compiler.scene, compiler.current_graph);
- slots.reserve(tiles.size());
-
- bool have_metadata = false;
- foreach (int tile, tiles) {
- string tile_name = filename.string();
- string_replace(tile_name, "<UDIM>", string_printf("%04d", tile));
-
- ImageMetaData metadata;
- int slot = image_manager->add_image(tile_name,
- builtin_data,
- animated,
- 0,
- interpolation,
- extension,
- alpha_type,
- colorspace,
- metadata);
- slots.push_back(slot);
-
- /* We assume that all tiles have the same metadata. */
- if (!have_metadata) {
- is_float = metadata.is_float;
- compress_as_srgb = metadata.compress_as_srgb;
- known_colorspace = metadata.colorspace;
- have_metadata = true;
- }
- }
+ ImageManager *image_manager = compiler.scene->image_manager;
+ handle = image_manager->add_image(filename.string(), image_params(), tiles);
}
- bool has_image = false;
- foreach (int slot, slots) {
- if (slot != -1) {
- has_image = true;
- break;
- }
- }
+ /* All tiles have the same metadata. */
+ const ImageMetaData metadata = handle.metadata();
+ const bool compress_as_srgb = metadata.compress_as_srgb;
+ const ustring known_colorspace = metadata.colorspace;
- if (has_image) {
- int vector_offset = tex_mapping.compile_begin(compiler, vector_in);
- uint flags = 0;
+ int vector_offset = tex_mapping.compile_begin(compiler, vector_in);
+ uint flags = 0;
- if (compress_as_srgb) {
- flags |= NODE_IMAGE_COMPRESS_AS_SRGB;
+ if (compress_as_srgb) {
+ flags |= NODE_IMAGE_COMPRESS_AS_SRGB;
+ }
+ if (!alpha_out->links.empty()) {
+ const bool unassociate_alpha = !(ColorSpaceManager::colorspace_is_data(colorspace) ||
+ alpha_type == IMAGE_ALPHA_CHANNEL_PACKED ||
+ alpha_type == IMAGE_ALPHA_IGNORE);
+
+ if (unassociate_alpha) {
+ flags |= NODE_IMAGE_ALPHA_UNASSOCIATE;
}
- if (!alpha_out->links.empty()) {
- const bool unassociate_alpha = !(ColorSpaceManager::colorspace_is_data(colorspace) ||
- alpha_type == IMAGE_ALPHA_CHANNEL_PACKED ||
- alpha_type == IMAGE_ALPHA_IGNORE);
+ }
- if (unassociate_alpha) {
- flags |= NODE_IMAGE_ALPHA_UNASSOCIATE;
- }
+ if (projection != NODE_IMAGE_PROJ_BOX) {
+ /* If there only is one image (a very common case), we encode it as a negative value. */
+ int num_nodes;
+ if (handle.num_tiles() == 1) {
+ num_nodes = -handle.svm_slot();
+ }
+ else {
+ num_nodes = divide_up(handle.num_tiles(), 2);
}
- if (projection != NODE_IMAGE_PROJ_BOX) {
- /* If there only is one image (a very common case), we encode it as a negative value. */
- int num_nodes;
- if (slots.size() == 1) {
- num_nodes = -slots[0];
- }
- else {
- num_nodes = divide_up(slots.size(), 2);
- }
+ compiler.add_node(NODE_TEX_IMAGE,
+ num_nodes,
+ compiler.encode_uchar4(vector_offset,
+ compiler.stack_assign_if_linked(color_out),
+ compiler.stack_assign_if_linked(alpha_out),
+ flags),
+ projection);
- compiler.add_node(NODE_TEX_IMAGE,
- num_nodes,
- compiler.encode_uchar4(vector_offset,
- compiler.stack_assign_if_linked(color_out),
- compiler.stack_assign_if_linked(alpha_out),
- flags),
- projection);
-
- if (num_nodes > 0) {
- for (int i = 0; i < num_nodes; i++) {
- int4 node;
- node.x = tiles[2 * i];
- node.y = slots[2 * i];
- if (2 * i + 1 < slots.size()) {
- node.z = tiles[2 * i + 1];
- node.w = slots[2 * i + 1];
- }
- else {
- node.z = -1;
- node.w = -1;
- }
- compiler.add_node(node.x, node.y, node.z, node.w);
+ if (num_nodes > 0) {
+ for (int i = 0; i < num_nodes; i++) {
+ int4 node;
+ node.x = tiles[2 * i];
+ node.y = handle.svm_slot(2 * i);
+ if (2 * i + 1 < tiles.size()) {
+ node.z = tiles[2 * i + 1];
+ node.w = handle.svm_slot(2 * i + 1);
}
+ else {
+ node.z = -1;
+ node.w = -1;
+ }
+ compiler.add_node(node.x, node.y, node.z, node.w);
}
}
- else {
- assert(slots.size() == 1);
- compiler.add_node(NODE_TEX_IMAGE_BOX,
- slots[0],
- compiler.encode_uchar4(vector_offset,
- compiler.stack_assign_if_linked(color_out),
- compiler.stack_assign_if_linked(alpha_out),
- flags),
- __float_as_int(projection_blend));
- }
-
- tex_mapping.compile_end(compiler, vector_in, vector_offset);
}
else {
- /* image not found */
- if (!color_out->links.empty()) {
- compiler.add_node(NODE_VALUE_V, compiler.stack_assign(color_out));
- compiler.add_node(
- NODE_VALUE_V,
- make_float3(TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B));
- }
- if (!alpha_out->links.empty())
- compiler.add_node(
- NODE_VALUE_F, __float_as_int(TEX_IMAGE_MISSING_A), compiler.stack_assign(alpha_out));
+ assert(handle.num_tiles() == 1);
+ compiler.add_node(NODE_TEX_IMAGE_BOX,
+ handle.svm_slot(),
+ compiler.encode_uchar4(vector_offset,
+ compiler.stack_assign_if_linked(color_out),
+ compiler.stack_assign_if_linked(alpha_out),
+ flags),
+ __float_as_int(projection_blend));
}
+
+ tex_mapping.compile_end(compiler, vector_in, vector_offset);
}
void ImageTextureNode::compile(OSLCompiler &compiler)
@@ -496,38 +442,22 @@ void ImageTextureNode::compile(OSLCompiler &compiler)
tex_mapping.compile(compiler);
- image_manager = compiler.scene->image_manager;
- if (slots.size() == 0) {
- ImageMetaData metadata;
- if (builtin_data == NULL) {
- string tile_name = filename.string();
- string_replace(tile_name, "<UDIM>", "1001");
- image_manager->get_image_metadata(tile_name, NULL, colorspace, metadata);
- slots.push_back(-1);
- }
- else {
- int slot = image_manager->add_image(filename.string(),
- builtin_data,
- animated,
- 0,
- interpolation,
- extension,
- alpha_type,
- colorspace,
- metadata);
- slots.push_back(slot);
- }
- is_float = metadata.is_float;
- compress_as_srgb = metadata.compress_as_srgb;
- known_colorspace = metadata.colorspace;
+ if (handle.empty()) {
+ ImageManager *image_manager = compiler.scene->image_manager;
+ handle = image_manager->add_image(filename.string(), image_params());
}
- if (slots[0] == -1) {
+ const ImageMetaData metadata = handle.metadata();
+ const bool is_float = metadata.is_float();
+ const bool compress_as_srgb = metadata.compress_as_srgb;
+ const ustring known_colorspace = metadata.colorspace;
+
+ if (handle.svm_slot() == -1) {
compiler.parameter_texture(
"filename", filename, compress_as_srgb ? u_colorspace_raw : known_colorspace);
}
else {
- compiler.parameter_texture("filename", slots[0]);
+ compiler.parameter_texture("filename", handle.svm_slot());
}
const bool unassociate_alpha = !(ColorSpaceManager::colorspace_is_data(colorspace) ||
@@ -589,17 +519,26 @@ NODE_DEFINE(EnvironmentTextureNode)
EnvironmentTextureNode::EnvironmentTextureNode() : ImageSlotTextureNode(node_type)
{
- is_float = false;
- compress_as_srgb = false;
colorspace = u_colorspace_raw;
- builtin_data = NULL;
animated = false;
}
ShaderNode *EnvironmentTextureNode::clone() const
{
- add_image_user();
- return new EnvironmentTextureNode(*this);
+ EnvironmentTextureNode *node = new EnvironmentTextureNode(*this);
+ node->handle = handle;
+ return node;
+}
+
+ImageParams EnvironmentTextureNode::image_params() const
+{
+ ImageParams params;
+ params.animated = animated;
+ params.interpolation = interpolation;
+ params.extension = EXTENSION_REPEAT;
+ params.alpha_type = alpha_type;
+ params.colorspace = colorspace;
+ return params;
}
void EnvironmentTextureNode::attributes(Shader *shader, AttributeRequestSet *attributes)
@@ -621,93 +560,53 @@ void EnvironmentTextureNode::compile(SVMCompiler &compiler)
ShaderOutput *color_out = output("Color");
ShaderOutput *alpha_out = output("Alpha");
- image_manager = compiler.scene->image_manager;
- if (slots.empty()) {
- ImageMetaData metadata;
- int slot = image_manager->add_image(filename.string(),
- builtin_data,
- animated,
- 0,
- interpolation,
- EXTENSION_REPEAT,
- alpha_type,
- colorspace,
- metadata);
- slots.push_back(slot);
- is_float = metadata.is_float;
- compress_as_srgb = metadata.compress_as_srgb;
- known_colorspace = metadata.colorspace;
- }
-
- if (slots[0] != -1) {
- int vector_offset = tex_mapping.compile_begin(compiler, vector_in);
- uint flags = 0;
-
- if (compress_as_srgb) {
- flags |= NODE_IMAGE_COMPRESS_AS_SRGB;
- }
+ if (handle.empty()) {
+ ImageManager *image_manager = compiler.scene->image_manager;
+ handle = image_manager->add_image(filename.string(), image_params());
+ }
- compiler.add_node(NODE_TEX_ENVIRONMENT,
- slots[0],
- compiler.encode_uchar4(vector_offset,
- compiler.stack_assign_if_linked(color_out),
- compiler.stack_assign_if_linked(alpha_out),
- flags),
- projection);
+ const ImageMetaData metadata = handle.metadata();
+ const bool compress_as_srgb = metadata.compress_as_srgb;
+ const ustring known_colorspace = metadata.colorspace;
- tex_mapping.compile_end(compiler, vector_in, vector_offset);
- }
- else {
- /* image not found */
- if (!color_out->links.empty()) {
- compiler.add_node(NODE_VALUE_V, compiler.stack_assign(color_out));
- compiler.add_node(
- NODE_VALUE_V,
- make_float3(TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B));
- }
- if (!alpha_out->links.empty())
- compiler.add_node(
- NODE_VALUE_F, __float_as_int(TEX_IMAGE_MISSING_A), compiler.stack_assign(alpha_out));
+ int vector_offset = tex_mapping.compile_begin(compiler, vector_in);
+ uint flags = 0;
+
+ if (compress_as_srgb) {
+ flags |= NODE_IMAGE_COMPRESS_AS_SRGB;
}
+
+ compiler.add_node(NODE_TEX_ENVIRONMENT,
+ handle.svm_slot(),
+ compiler.encode_uchar4(vector_offset,
+ compiler.stack_assign_if_linked(color_out),
+ compiler.stack_assign_if_linked(alpha_out),
+ flags),
+ projection);
+
+ tex_mapping.compile_end(compiler, vector_in, vector_offset);
}
void EnvironmentTextureNode::compile(OSLCompiler &compiler)
{
+ if (handle.empty()) {
+ ImageManager *image_manager = compiler.scene->image_manager;
+ handle = image_manager->add_image(filename.string(), image_params());
+ }
+
tex_mapping.compile(compiler);
- /* See comments in ImageTextureNode::compile about support
- * of builtin images.
- */
- image_manager = compiler.scene->image_manager;
- if (slots.empty()) {
- ImageMetaData metadata;
- if (builtin_data == NULL) {
- image_manager->get_image_metadata(filename.string(), NULL, colorspace, metadata);
- slots.push_back(-1);
- }
- else {
- int slot = image_manager->add_image(filename.string(),
- builtin_data,
- animated,
- 0,
- interpolation,
- EXTENSION_REPEAT,
- alpha_type,
- colorspace,
- metadata);
- slots.push_back(slot);
- }
- is_float = metadata.is_float;
- compress_as_srgb = metadata.compress_as_srgb;
- known_colorspace = metadata.colorspace;
- }
+ const ImageMetaData metadata = handle.metadata();
+ const bool is_float = metadata.is_float();
+ const bool compress_as_srgb = metadata.compress_as_srgb;
+ const ustring known_colorspace = metadata.colorspace;
- if (slots[0] == -1) {
+ if (handle.svm_slot() == -1) {
compiler.parameter_texture(
"filename", filename, compress_as_srgb ? u_colorspace_raw : known_colorspace);
}
else {
- compiler.parameter_texture("filename", slots[0]);
+ compiler.parameter_texture("filename", handle.svm_slot());
}
compiler.parameter(this, "projection");
@@ -1350,7 +1249,7 @@ NODE_DEFINE(MusgraveTextureNode)
SOCKET_IN_FLOAT(scale, "Scale", 1.0f);
SOCKET_IN_FLOAT(detail, "Detail", 2.0f);
SOCKET_IN_FLOAT(dimension, "Dimension", 2.0f);
- SOCKET_IN_FLOAT(lacunarity, "Lacunarity", 1.0f);
+ SOCKET_IN_FLOAT(lacunarity, "Lacunarity", 2.0f);
SOCKET_IN_FLOAT(offset, "Offset", 0.0f);
SOCKET_IN_FLOAT(gain, "Gain", 1.0f);
@@ -1422,15 +1321,33 @@ NODE_DEFINE(WaveTextureNode)
type_enum.insert("rings", NODE_WAVE_RINGS);
SOCKET_ENUM(type, "Type", type_enum, NODE_WAVE_BANDS);
+ static NodeEnum bands_direction_enum;
+ bands_direction_enum.insert("x", NODE_WAVE_BANDS_DIRECTION_X);
+ bands_direction_enum.insert("y", NODE_WAVE_BANDS_DIRECTION_Y);
+ bands_direction_enum.insert("z", NODE_WAVE_BANDS_DIRECTION_Z);
+ bands_direction_enum.insert("diagonal", NODE_WAVE_BANDS_DIRECTION_DIAGONAL);
+ SOCKET_ENUM(
+ bands_direction, "Bands Direction", bands_direction_enum, NODE_WAVE_BANDS_DIRECTION_X);
+
+ static NodeEnum rings_direction_enum;
+ rings_direction_enum.insert("x", NODE_WAVE_RINGS_DIRECTION_X);
+ rings_direction_enum.insert("y", NODE_WAVE_RINGS_DIRECTION_Y);
+ rings_direction_enum.insert("z", NODE_WAVE_RINGS_DIRECTION_Z);
+ rings_direction_enum.insert("spherical", NODE_WAVE_RINGS_DIRECTION_SPHERICAL);
+ SOCKET_ENUM(
+ rings_direction, "Rings Direction", rings_direction_enum, NODE_WAVE_BANDS_DIRECTION_X);
+
static NodeEnum profile_enum;
profile_enum.insert("sine", NODE_WAVE_PROFILE_SIN);
profile_enum.insert("saw", NODE_WAVE_PROFILE_SAW);
+ profile_enum.insert("tri", NODE_WAVE_PROFILE_TRI);
SOCKET_ENUM(profile, "Profile", profile_enum, NODE_WAVE_PROFILE_SIN);
SOCKET_IN_FLOAT(scale, "Scale", 1.0f);
SOCKET_IN_FLOAT(distortion, "Distortion", 0.0f);
SOCKET_IN_FLOAT(detail, "Detail", 2.0f);
SOCKET_IN_FLOAT(detail_scale, "Detail Scale", 0.0f);
+ SOCKET_IN_FLOAT(phase, "Phase Offset", 0.0f);
SOCKET_IN_POINT(
vector, "Vector", make_float3(0.0f, 0.0f, 0.0f), SocketType::LINK_TEXTURE_GENERATED);
@@ -1446,32 +1363,36 @@ WaveTextureNode::WaveTextureNode() : TextureNode(node_type)
void WaveTextureNode::compile(SVMCompiler &compiler)
{
+ ShaderInput *vector_in = input("Vector");
ShaderInput *scale_in = input("Scale");
ShaderInput *distortion_in = input("Distortion");
- ShaderInput *dscale_in = input("Detail Scale");
ShaderInput *detail_in = input("Detail");
- ShaderInput *vector_in = input("Vector");
- ShaderOutput *fac_out = output("Fac");
+ ShaderInput *dscale_in = input("Detail Scale");
+ ShaderInput *phase_in = input("Phase Offset");
ShaderOutput *color_out = output("Color");
+ ShaderOutput *fac_out = output("Fac");
int vector_offset = tex_mapping.compile_begin(compiler, vector_in);
compiler.add_node(NODE_TEX_WAVE,
- compiler.encode_uchar4(type,
- compiler.stack_assign_if_linked(color_out),
- compiler.stack_assign_if_linked(fac_out),
- compiler.stack_assign_if_linked(dscale_in)),
+ compiler.encode_uchar4(type, bands_direction, rings_direction, profile),
compiler.encode_uchar4(vector_offset,
compiler.stack_assign_if_linked(scale_in),
- compiler.stack_assign_if_linked(detail_in),
- compiler.stack_assign_if_linked(distortion_in)),
- profile);
+ compiler.stack_assign_if_linked(distortion_in),
+ compiler.stack_assign_if_linked(detail_in)),
+ compiler.encode_uchar4(compiler.stack_assign_if_linked(dscale_in),
+ compiler.stack_assign_if_linked(phase_in),
+ compiler.stack_assign_if_linked(color_out),
+ compiler.stack_assign_if_linked(fac_out)));
compiler.add_node(__float_as_int(scale),
__float_as_int(detail),
__float_as_int(distortion),
__float_as_int(detail_scale));
+ compiler.add_node(
+ __float_as_int(phase), SVM_STACK_INVALID, SVM_STACK_INVALID, SVM_STACK_INVALID);
+
tex_mapping.compile_end(compiler, vector_in, vector_offset);
}
@@ -1480,6 +1401,8 @@ void WaveTextureNode::compile(OSLCompiler &compiler)
tex_mapping.compile(compiler);
compiler.parameter(this, "type");
+ compiler.parameter(this, "bands_direction");
+ compiler.parameter(this, "rings_direction");
compiler.parameter(this, "profile");
compiler.add(this, "node_wave_texture");
@@ -1722,21 +1645,10 @@ NODE_DEFINE(PointDensityTextureNode)
PointDensityTextureNode::PointDensityTextureNode() : ShaderNode(node_type)
{
- image_manager = NULL;
- slot = -1;
- builtin_data = NULL;
}
PointDensityTextureNode::~PointDensityTextureNode()
{
- if (image_manager) {
- image_manager->remove_image(filename.string(),
- builtin_data,
- interpolation,
- EXTENSION_CLIP,
- IMAGE_ALPHA_AUTO,
- ustring());
- }
}
ShaderNode *PointDensityTextureNode::clone() const
@@ -1744,10 +1656,9 @@ ShaderNode *PointDensityTextureNode::clone() const
/* Increase image user count for new node. We need to ensure to not call
* add_image again, to work around access of freed data on the Blender
* side. A better solution should be found to avoid this. */
- if (slot != -1) {
- image_manager->add_image_user(slot);
- }
- return new PointDensityTextureNode(*this);
+ PointDensityTextureNode *node = new PointDensityTextureNode(*this);
+ node->handle = handle; /* TODO: not needed? */
+ return node;
}
void PointDensityTextureNode::attributes(Shader *shader, AttributeRequestSet *attributes)
@@ -1758,20 +1669,11 @@ void PointDensityTextureNode::attributes(Shader *shader, AttributeRequestSet *at
ShaderNode::attributes(shader, attributes);
}
-void PointDensityTextureNode::add_image()
+ImageParams PointDensityTextureNode::image_params() const
{
- if (slot == -1) {
- ImageMetaData metadata;
- slot = image_manager->add_image(filename.string(),
- builtin_data,
- false,
- 0,
- interpolation,
- EXTENSION_CLIP,
- IMAGE_ALPHA_AUTO,
- u_colorspace_raw,
- metadata);
- }
+ ImageParams params;
+ params.interpolation = interpolation;
+ return params;
}
void PointDensityTextureNode::compile(SVMCompiler &compiler)
@@ -1783,11 +1685,13 @@ void PointDensityTextureNode::compile(SVMCompiler &compiler)
const bool use_density = !density_out->links.empty();
const bool use_color = !color_out->links.empty();
- image_manager = compiler.scene->image_manager;
-
if (use_density || use_color) {
- add_image();
+ if (handle.empty()) {
+ ImageManager *image_manager = compiler.scene->image_manager;
+ handle = image_manager->add_image(filename.string(), image_params());
+ }
+ const int slot = handle.svm_slot();
if (slot != -1) {
compiler.stack_assign(vector_in);
compiler.add_node(NODE_TEX_VOXEL,
@@ -1824,12 +1728,13 @@ void PointDensityTextureNode::compile(OSLCompiler &compiler)
const bool use_density = !density_out->links.empty();
const bool use_color = !color_out->links.empty();
- image_manager = compiler.scene->image_manager;
-
if (use_density || use_color) {
- add_image();
+ if (handle.empty()) {
+ ImageManager *image_manager = compiler.scene->image_manager;
+ handle = image_manager->add_image(filename.string(), image_params());
+ }
- compiler.parameter_texture("filename", slot);
+ compiler.parameter_texture("filename", handle.svm_slot());
if (space == NODE_TEX_VOXEL_SPACE_WORLD) {
compiler.parameter("mapping", tfm);
compiler.parameter("use_mapping", 1);
@@ -3343,7 +3248,7 @@ NODE_DEFINE(PrincipledVolumeNode)
SOCKET_IN_COLOR(emission_color, "Emission Color", make_float3(1.0f, 1.0f, 1.0f));
SOCKET_IN_FLOAT(blackbody_intensity, "Blackbody Intensity", 0.0f);
SOCKET_IN_COLOR(blackbody_tint, "Blackbody Tint", make_float3(1.0f, 1.0f, 1.0f));
- SOCKET_IN_FLOAT(temperature, "Temperature", 1500.0f);
+ SOCKET_IN_FLOAT(temperature, "Temperature", 1000.0f);
SOCKET_IN_FLOAT(volume_mix_weight, "VolumeMixWeight", 0.0f, SocketType::SVM_INTERNAL);
SOCKET_OUT_CLOSURE(volume, "Volume");
@@ -3354,6 +3259,8 @@ NODE_DEFINE(PrincipledVolumeNode)
PrincipledVolumeNode::PrincipledVolumeNode() : VolumeNode(node_type)
{
closure = CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID;
+ density_attribute = ustring("density");
+ temperature_attribute = ustring("temperature");
}
void PrincipledVolumeNode::attributes(Shader *shader, AttributeRequestSet *attributes)
@@ -4495,7 +4402,10 @@ VertexColorNode::VertexColorNode() : ShaderNode(node_type)
void VertexColorNode::attributes(Shader *shader, AttributeRequestSet *attributes)
{
if (!(output("Color")->links.empty() && output("Alpha")->links.empty())) {
- attributes->add_standard(layer_name);
+ if (layer_name != "")
+ attributes->add_standard(layer_name);
+ else
+ attributes->add(ATTR_STD_VERTEX_COLOR);
}
ShaderNode::attributes(shader, attributes);
}
@@ -4504,7 +4414,14 @@ void VertexColorNode::compile(SVMCompiler &compiler)
{
ShaderOutput *color_out = output("Color");
ShaderOutput *alpha_out = output("Alpha");
- int layer_id = compiler.attribute(layer_name);
+ int layer_id = 0;
+
+ if (layer_name != "") {
+ layer_id = compiler.attribute(layer_name);
+ }
+ else {
+ layer_id = compiler.attribute(ATTR_STD_VERTEX_COLOR);
+ }
ShaderNodeType node;
@@ -4531,7 +4448,19 @@ void VertexColorNode::compile(OSLCompiler &compiler)
else {
compiler.parameter("bump_offset", "center");
}
- compiler.parameter("layer_name", layer_name.c_str());
+
+ if (layer_name.empty()) {
+ compiler.parameter("layer_name", ustring("geom:vertex_color"));
+ }
+ else {
+ if (Attribute::name_standard(layer_name.c_str()) != ATTR_STD_NONE) {
+ compiler.parameter("name", (string("geom:") + layer_name.c_str()).c_str());
+ }
+ else {
+ compiler.parameter("layer_name", layer_name.c_str());
+ }
+ }
+
compiler.add(this, "node_vertex_color");
}
@@ -6022,14 +5951,20 @@ NODE_DEFINE(VectorMathNode)
type_enum.insert("floor", NODE_VECTOR_MATH_FLOOR);
type_enum.insert("ceil", NODE_VECTOR_MATH_CEIL);
type_enum.insert("modulo", NODE_VECTOR_MATH_MODULO);
+ type_enum.insert("wrap", NODE_VECTOR_MATH_WRAP);
type_enum.insert("fraction", NODE_VECTOR_MATH_FRACTION);
type_enum.insert("absolute", NODE_VECTOR_MATH_ABSOLUTE);
type_enum.insert("minimum", NODE_VECTOR_MATH_MINIMUM);
type_enum.insert("maximum", NODE_VECTOR_MATH_MAXIMUM);
+
+ type_enum.insert("sine", NODE_VECTOR_MATH_SINE);
+ type_enum.insert("cosine", NODE_VECTOR_MATH_COSINE);
+ type_enum.insert("tangent", NODE_VECTOR_MATH_TANGENT);
SOCKET_ENUM(type, "Type", type_enum, NODE_VECTOR_MATH_ADD);
SOCKET_IN_VECTOR(vector1, "Vector1", make_float3(0.0f, 0.0f, 0.0f));
SOCKET_IN_VECTOR(vector2, "Vector2", make_float3(0.0f, 0.0f, 0.0f));
+ SOCKET_IN_VECTOR(vector3, "Vector3", make_float3(0.0f, 0.0f, 0.0f));
SOCKET_IN_FLOAT(scale, "Scale", 1.0f);
SOCKET_OUT_FLOAT(value, "Value");
@@ -6048,7 +5983,7 @@ void VectorMathNode::constant_fold(const ConstantFolder &folder)
float3 vector = make_float3(0.0f, 0.0f, 0.0f);
if (folder.all_inputs_constant()) {
- svm_vector_math(&value, &vector, type, vector1, vector2, scale);
+ svm_vector_math(&value, &vector, type, vector1, vector2, vector3, scale);
if (folder.output == output("Value")) {
folder.make_constant(value);
}
@@ -6075,11 +6010,24 @@ void VectorMathNode::compile(SVMCompiler &compiler)
int value_stack_offset = compiler.stack_assign_if_linked(value_out);
int vector_stack_offset = compiler.stack_assign_if_linked(vector_out);
- compiler.add_node(
- NODE_VECTOR_MATH,
- type,
- compiler.encode_uchar4(vector1_stack_offset, vector2_stack_offset, scale_stack_offset),
- compiler.encode_uchar4(value_stack_offset, vector_stack_offset));
+ /* 3 Vector Operators */
+ if (type == NODE_VECTOR_MATH_WRAP) {
+ ShaderInput *vector3_in = input("Vector3");
+ int vector3_stack_offset = compiler.stack_assign(vector3_in);
+ compiler.add_node(
+ NODE_VECTOR_MATH,
+ type,
+ compiler.encode_uchar4(vector1_stack_offset, vector2_stack_offset, scale_stack_offset),
+ compiler.encode_uchar4(value_stack_offset, vector_stack_offset));
+ compiler.add_node(vector3_stack_offset);
+ }
+ else {
+ compiler.add_node(
+ NODE_VECTOR_MATH,
+ type,
+ compiler.encode_uchar4(vector1_stack_offset, vector2_stack_offset, scale_stack_offset),
+ compiler.encode_uchar4(value_stack_offset, vector_stack_offset));
+ }
}
void VectorMathNode::compile(OSLCompiler &compiler)
@@ -6088,6 +6036,62 @@ void VectorMathNode::compile(OSLCompiler &compiler)
compiler.add(this, "node_vector_math");
}
+/* Vector Rotate */
+
+NODE_DEFINE(VectorRotateNode)
+{
+ NodeType *type = NodeType::add("vector_rotate", create, NodeType::SHADER);
+
+ static NodeEnum type_enum;
+ type_enum.insert("axis", NODE_VECTOR_ROTATE_TYPE_AXIS);
+ type_enum.insert("x_axis", NODE_VECTOR_ROTATE_TYPE_AXIS_X);
+ type_enum.insert("y_axis", NODE_VECTOR_ROTATE_TYPE_AXIS_Y);
+ type_enum.insert("z_axis", NODE_VECTOR_ROTATE_TYPE_AXIS_Z);
+ type_enum.insert("euler_xyz", NODE_VECTOR_ROTATE_TYPE_EULER_XYZ);
+ SOCKET_ENUM(type, "Type", type_enum, NODE_VECTOR_ROTATE_TYPE_AXIS);
+
+ SOCKET_BOOLEAN(invert, "Invert", false);
+
+ SOCKET_IN_VECTOR(vector, "Vector", make_float3(0.0f, 0.0f, 0.0f));
+ SOCKET_IN_POINT(rotation, "Rotation", make_float3(0.0f, 0.0f, 0.0f));
+ SOCKET_IN_POINT(center, "Center", make_float3(0.0f, 0.0f, 0.0f));
+ SOCKET_IN_VECTOR(axis, "Axis", make_float3(0.0f, 0.0f, 1.0f));
+ SOCKET_IN_FLOAT(angle, "Angle", 0.0f);
+ SOCKET_OUT_VECTOR(vector, "Vector");
+
+ return type;
+}
+
+VectorRotateNode::VectorRotateNode() : ShaderNode(node_type)
+{
+}
+
+void VectorRotateNode::compile(SVMCompiler &compiler)
+{
+ ShaderInput *vector_in = input("Vector");
+ ShaderInput *rotation_in = input("Rotation");
+ ShaderInput *center_in = input("Center");
+ ShaderInput *axis_in = input("Axis");
+ ShaderInput *angle_in = input("Angle");
+ ShaderOutput *vector_out = output("Vector");
+
+ compiler.add_node(
+ NODE_VECTOR_ROTATE,
+ compiler.encode_uchar4(
+ type, compiler.stack_assign(vector_in), compiler.stack_assign(rotation_in), invert),
+ compiler.encode_uchar4(compiler.stack_assign(center_in),
+ compiler.stack_assign(axis_in),
+ compiler.stack_assign(angle_in)),
+ compiler.stack_assign(vector_out));
+}
+
+void VectorRotateNode::compile(OSLCompiler &compiler)
+{
+ compiler.parameter(this, "type");
+ compiler.parameter(this, "invert");
+ compiler.add(this, "node_vector_rotate");
+}
+
/* VectorTransform */
NODE_DEFINE(VectorTransformNode)
diff --git a/intern/cycles/render/nodes.h b/intern/cycles/render/nodes.h
index a8fe7644957..e201118574b 100644
--- a/intern/cycles/render/nodes.h
+++ b/intern/cycles/render/nodes.h
@@ -17,8 +17,9 @@
#ifndef __NODES_H__
#define __NODES_H__
-#include "render/graph.h"
#include "graph/node.h"
+#include "render/graph.h"
+#include "render/image.h"
#include "util/util_array.h"
#include "util/util_string.h"
@@ -77,12 +78,15 @@ class ImageSlotTextureNode : public TextureNode {
explicit ImageSlotTextureNode(const NodeType *node_type) : TextureNode(node_type)
{
special_type = SHADER_SPECIAL_TYPE_IMAGE_SLOT;
- image_manager = NULL;
}
- ~ImageSlotTextureNode();
- void add_image_user() const;
- ImageManager *image_manager;
- vector<int> slots;
+
+ virtual bool equals(const ShaderNode &other)
+ {
+ const ImageSlotTextureNode &other_node = (const ImageSlotTextureNode &)other;
+ return TextureNode::equals(other) && handle == other_node.handle;
+ }
+
+ ImageHandle handle;
};
class ImageTextureNode : public ImageSlotTextureNode {
@@ -97,14 +101,14 @@ class ImageTextureNode : public ImageSlotTextureNode {
virtual bool equals(const ShaderNode &other)
{
- const ImageTextureNode &image_node = (const ImageTextureNode &)other;
- return ImageSlotTextureNode::equals(other) && builtin_data == image_node.builtin_data &&
- animated == image_node.animated;
+ const ImageTextureNode &other_node = (const ImageTextureNode &)other;
+ return ImageSlotTextureNode::equals(other) && animated == other_node.animated;
}
+ ImageParams image_params() const;
+
/* Parameters. */
ustring filename;
- void *builtin_data;
ustring colorspace;
ImageAlphaType alpha_type;
NodeImageProjection projection;
@@ -115,11 +119,6 @@ class ImageTextureNode : public ImageSlotTextureNode {
float3 vector;
ccl::vector<int> tiles;
- /* Runtime. */
- bool is_float;
- bool compress_as_srgb;
- ustring known_colorspace;
-
protected:
void cull_tiles(Scene *scene, ShaderGraph *graph);
};
@@ -140,25 +139,20 @@ class EnvironmentTextureNode : public ImageSlotTextureNode {
virtual bool equals(const ShaderNode &other)
{
- const EnvironmentTextureNode &env_node = (const EnvironmentTextureNode &)other;
- return ImageSlotTextureNode::equals(other) && builtin_data == env_node.builtin_data &&
- animated == env_node.animated;
+ const EnvironmentTextureNode &other_node = (const EnvironmentTextureNode &)other;
+ return ImageSlotTextureNode::equals(other) && animated == other_node.animated;
}
+ ImageParams image_params() const;
+
/* Parameters. */
ustring filename;
- void *builtin_data;
ustring colorspace;
ImageAlphaType alpha_type;
NodeEnvironmentProjection projection;
InterpolationType interpolation;
bool animated;
float3 vector;
-
- /* Runtime. */
- bool is_float;
- bool compress_as_srgb;
- ustring known_colorspace;
};
class SkyTextureNode : public TextureNode {
@@ -203,6 +197,11 @@ class OutputAOVNode : public ShaderNode {
ustring name;
+ virtual int get_group()
+ {
+ return NODE_GROUP_LEVEL_4;
+ }
+
/* Don't allow output node de-duplication. */
virtual bool equals(const ShaderNode & /*other*/)
{
@@ -288,9 +287,11 @@ class WaveTextureNode : public TextureNode {
}
NodeWaveType type;
+ NodeWaveBandsDirection bands_direction;
+ NodeWaveRingsDirection rings_direction;
NodeWaveProfile profile;
- float scale, distortion, detail, detail_scale;
+ float scale, distortion, detail, detail_scale, phase;
float3 vector;
};
@@ -343,7 +344,7 @@ class PointDensityTextureNode : public ShaderNode {
SHADER_NODE_NO_CLONE_CLASS(PointDensityTextureNode)
virtual int get_group()
{
- return NODE_GROUP_LEVEL_3;
+ return NODE_GROUP_LEVEL_4;
}
~PointDensityTextureNode();
@@ -363,24 +364,22 @@ class PointDensityTextureNode : public ShaderNode {
return true;
}
- void add_image();
-
/* Parameters. */
ustring filename;
NodeTexVoxelSpace space;
InterpolationType interpolation;
Transform tfm;
float3 vector;
- void *builtin_data;
/* Runtime. */
- ImageManager *image_manager;
- int slot;
+ ImageHandle handle;
+
+ ImageParams image_params() const;
virtual bool equals(const ShaderNode &other)
{
- const PointDensityTextureNode &point_dendity_node = (const PointDensityTextureNode &)other;
- return ShaderNode::equals(other) && builtin_data == point_dendity_node.builtin_data;
+ const PointDensityTextureNode &other_node = (const PointDensityTextureNode &)other;
+ return ShaderNode::equals(other) && handle == other_node.handle;
}
};
@@ -1377,10 +1376,28 @@ class VectorMathNode : public ShaderNode {
float3 vector1;
float3 vector2;
+ float3 vector3;
float scale;
NodeVectorMathType type;
};
+class VectorRotateNode : public ShaderNode {
+ public:
+ SHADER_NODE_CLASS(VectorRotateNode)
+
+ virtual int get_group()
+ {
+ return NODE_GROUP_LEVEL_3;
+ }
+ NodeVectorRotateType type;
+ bool invert;
+ float3 vector;
+ float3 center;
+ float3 axis;
+ float angle;
+ float3 rotation;
+};
+
class VectorTransformNode : public ShaderNode {
public:
SHADER_NODE_CLASS(VectorTransformNode)
diff --git a/intern/cycles/render/object.cpp b/intern/cycles/render/object.cpp
index 849329a086d..90a1d90019d 100644
--- a/intern/cycles/render/object.cpp
+++ b/intern/cycles/render/object.cpp
@@ -14,22 +14,24 @@
* limitations under the License.
*/
-#include "render/camera.h"
+#include "render/object.h"
#include "device/device.h"
+#include "render/camera.h"
+#include "render/curves.h"
+#include "render/hair.h"
+#include "render/integrator.h"
#include "render/light.h"
#include "render/mesh.h"
-#include "render/curves.h"
-#include "render/object.h"
#include "render/particles.h"
#include "render/scene.h"
#include "util/util_foreach.h"
#include "util/util_logging.h"
#include "util/util_map.h"
+#include "util/util_murmurhash.h"
#include "util/util_progress.h"
#include "util/util_set.h"
#include "util/util_vector.h"
-#include "util/util_murmurhash.h"
#include "subd/subd_patch_table.h"
@@ -64,6 +66,7 @@ struct UpdateObjectTransformState {
KernelObject *objects;
Transform *object_motion_pass;
DecomposedTransform *object_motion;
+ float *object_volume_step;
/* Flags which will be synchronized to Integrator. */
bool have_motion;
@@ -87,7 +90,7 @@ NODE_DEFINE(Object)
{
NodeType *type = NodeType::add("object", create);
- SOCKET_NODE(mesh, "Mesh", &Mesh::node_type);
+ SOCKET_NODE(geometry, "Geometry", &Geometry::node_base_type);
SOCKET_TRANSFORM(tfm, "Transform", transform_identity());
SOCKET_UINT(visibility, "Visibility", ~0);
SOCKET_COLOR(color, "Color", make_float3(0.0f, 0.0f, 0.0f));
@@ -152,7 +155,7 @@ void Object::update_motion()
void Object::compute_bounds(bool motion_blur)
{
- BoundBox mbounds = mesh->bounds;
+ BoundBox mbounds = geometry->bounds;
if (motion_blur && use_motion()) {
array<DecomposedTransform> decomp(motion.size());
@@ -172,7 +175,7 @@ void Object::compute_bounds(bool motion_blur)
}
else {
/* No motion blur case. */
- if (mesh->transform_applied) {
+ if (geometry->transform_applied) {
bounds = mbounds;
}
else {
@@ -183,89 +186,18 @@ void Object::compute_bounds(bool motion_blur)
void Object::apply_transform(bool apply_to_motion)
{
- if (!mesh || tfm == transform_identity())
+ if (!geometry || tfm == transform_identity())
return;
- /* triangles */
- if (mesh->verts.size()) {
- /* store matrix to transform later. when accessing these as attributes we
- * do not want the transform to be applied for consistency between static
- * and dynamic BVH, so we do it on packing. */
- mesh->transform_normal = transform_transposed_inverse(tfm);
-
- /* apply to mesh vertices */
- for (size_t i = 0; i < mesh->verts.size(); i++)
- mesh->verts[i] = transform_point(&tfm, mesh->verts[i]);
-
- if (apply_to_motion) {
- Attribute *attr = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
-
- if (attr) {
- size_t steps_size = mesh->verts.size() * (mesh->motion_steps - 1);
- float3 *vert_steps = attr->data_float3();
-
- for (size_t i = 0; i < steps_size; i++)
- vert_steps[i] = transform_point(&tfm, vert_steps[i]);
- }
-
- Attribute *attr_N = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_NORMAL);
-
- if (attr_N) {
- Transform ntfm = mesh->transform_normal;
- size_t steps_size = mesh->verts.size() * (mesh->motion_steps - 1);
- float3 *normal_steps = attr_N->data_float3();
-
- for (size_t i = 0; i < steps_size; i++)
- normal_steps[i] = normalize(transform_direction(&ntfm, normal_steps[i]));
- }
- }
- }
-
- /* curves */
- if (mesh->curve_keys.size()) {
- /* compute uniform scale */
- float3 c0 = transform_get_column(&tfm, 0);
- float3 c1 = transform_get_column(&tfm, 1);
- float3 c2 = transform_get_column(&tfm, 2);
- float scalar = powf(fabsf(dot(cross(c0, c1), c2)), 1.0f / 3.0f);
-
- /* apply transform to curve keys */
- for (size_t i = 0; i < mesh->curve_keys.size(); i++) {
- float3 co = transform_point(&tfm, mesh->curve_keys[i]);
- float radius = mesh->curve_radius[i] * scalar;
-
- /* scale for curve radius is only correct for uniform scale */
- mesh->curve_keys[i] = co;
- mesh->curve_radius[i] = radius;
- }
-
- if (apply_to_motion) {
- Attribute *curve_attr = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
-
- if (curve_attr) {
- /* apply transform to motion curve keys */
- size_t steps_size = mesh->curve_keys.size() * (mesh->motion_steps - 1);
- float4 *key_steps = curve_attr->data_float4();
-
- for (size_t i = 0; i < steps_size; i++) {
- float3 co = transform_point(&tfm, float4_to_float3(key_steps[i]));
- float radius = key_steps[i].w * scalar;
-
- /* scale for curve radius is only correct for uniform scale */
- key_steps[i] = float3_to_float4(co);
- key_steps[i].w = radius;
- }
- }
- }
- }
+ geometry->apply_transform(tfm, apply_to_motion);
/* we keep normals pointing in same direction on negative scale, notify
- * mesh about this in it (re)calculates normals */
+ * geometry about this in it (re)calculates normals */
if (transform_negative_scale(tfm))
- mesh->transform_negative_scaled = true;
+ geometry->transform_negative_scaled = true;
if (bounds.valid()) {
- mesh->compute_bounds();
+ geometry->compute_bounds();
compute_bounds(false);
}
@@ -275,11 +207,11 @@ void Object::apply_transform(bool apply_to_motion)
void Object::tag_update(Scene *scene)
{
- if (mesh) {
- if (mesh->transform_applied)
- mesh->need_update = true;
+ if (geometry) {
+ if (geometry->transform_applied)
+ geometry->need_update = true;
- foreach (Shader *shader, mesh->used_shaders) {
+ foreach (Shader *shader, geometry->used_shaders) {
if (shader->use_mis && shader->has_surface_emission)
scene->light_manager->need_update = true;
}
@@ -287,7 +219,7 @@ void Object::tag_update(Scene *scene)
scene->camera->need_flags_update = true;
scene->curve_system_manager->need_update = true;
- scene->mesh_manager->need_update = true;
+ scene->geometry_manager->need_update = true;
scene->object_manager->need_update = true;
}
@@ -336,6 +268,82 @@ uint Object::visibility_for_tracing() const
return trace_visibility;
}
+float Object::compute_volume_step_size() const
+{
+ if (geometry->type != Geometry::MESH) {
+ return FLT_MAX;
+ }
+
+ Mesh *mesh = static_cast<Mesh *>(geometry);
+
+ if (!mesh->has_volume) {
+ return FLT_MAX;
+ }
+
+ /* Compute step rate from shaders. */
+ float step_rate = FLT_MAX;
+
+ foreach (Shader *shader, mesh->used_shaders) {
+ if (shader->has_volume) {
+ if ((shader->heterogeneous_volume && shader->has_volume_spatial_varying) ||
+ (shader->has_volume_attribute_dependency)) {
+ step_rate = fminf(shader->volume_step_rate, step_rate);
+ }
+ }
+ }
+
+ if (step_rate == FLT_MAX) {
+ return FLT_MAX;
+ }
+
+ /* Compute step size from voxel grids. */
+ float step_size = FLT_MAX;
+
+ foreach (Attribute &attr, mesh->attributes.attributes) {
+ if (attr.element == ATTR_ELEMENT_VOXEL) {
+ ImageHandle &handle = attr.data_voxel();
+ const ImageMetaData &metadata = handle.metadata();
+ if (metadata.width == 0 || metadata.height == 0 || metadata.depth == 0) {
+ continue;
+ }
+
+ /* User specified step size. */
+ float voxel_step_size = mesh->volume_step_size;
+
+ if (voxel_step_size == 0.0f) {
+ /* Auto detect step size. */
+ float3 size = make_float3(
+ 1.0f / metadata.width, 1.0f / metadata.height, 1.0f / metadata.depth);
+
+ /* Step size is transformed from voxel to world space. */
+ Transform voxel_tfm = tfm;
+ if (metadata.use_transform_3d) {
+ voxel_tfm = tfm * transform_inverse(metadata.transform_3d);
+ }
+ voxel_step_size = min3(fabs(transform_direction(&voxel_tfm, size)));
+ }
+ else if (mesh->volume_object_space) {
+ /* User specified step size in object space. */
+ float3 size = make_float3(voxel_step_size, voxel_step_size, voxel_step_size);
+ voxel_step_size = min3(fabs(transform_direction(&tfm, size)));
+ }
+
+ if (voxel_step_size > 0.0f) {
+ step_size = fminf(voxel_step_size, step_size);
+ }
+ }
+ }
+
+ if (step_size == FLT_MAX) {
+ /* Fall back to 1/10th of bounds for procedural volumes. */
+ step_size = 0.1f * average(bounds.size());
+ }
+
+ step_size *= step_rate;
+
+ return step_size;
+}
+
int Object::get_device_index() const
{
return index;
@@ -353,32 +361,33 @@ ObjectManager::~ObjectManager()
{
}
-void ObjectManager::device_update_object_transform(UpdateObjectTransformState *state, Object *ob)
+static float object_surface_area(UpdateObjectTransformState *state,
+ const Transform &tfm,
+ Geometry *geom)
{
- KernelObject &kobject = state->objects[ob->index];
- Transform *object_motion_pass = state->object_motion_pass;
-
- Mesh *mesh = ob->mesh;
- uint flag = 0;
+ if (geom->type != Geometry::MESH) {
+ return 0.0f;
+ }
- /* Compute transformations. */
- Transform tfm = ob->tfm;
- Transform itfm = transform_inverse(tfm);
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ if (mesh->has_volume) {
+ /* Volume density automatically adjust to object scale. */
+ if (mesh->volume_object_space) {
+ const float3 unit = normalize(make_float3(1.0f, 1.0f, 1.0f));
+ return 1.0f / len(transform_direction(&tfm, unit));
+ }
+ else {
+ return 1.0f;
+ }
+ }
/* Compute surface area. for uniform scale we can do avoid the many
* transform calls and share computation for instances.
*
* TODO(brecht): Correct for displacement, and move to a better place.
*/
- float uniform_scale;
float surface_area = 0.0f;
- float3 color = ob->color;
- float pass_id = ob->pass_id;
- float random_number = (float)ob->random_id * (1.0f / (float)0xFFFFFFFF);
- int particle_index = (ob->particle_system) ?
- ob->particle_index + state->particle_offset[ob->particle_system] :
- 0;
-
+ float uniform_scale;
if (transform_uniform_scale(tfm, uniform_scale)) {
map<Mesh *, float>::iterator it;
@@ -424,9 +433,31 @@ void ObjectManager::device_update_object_transform(UpdateObjectTransformState *s
}
}
+ return surface_area;
+}
+
+void ObjectManager::device_update_object_transform(UpdateObjectTransformState *state, Object *ob)
+{
+ KernelObject &kobject = state->objects[ob->index];
+ Transform *object_motion_pass = state->object_motion_pass;
+
+ Geometry *geom = ob->geometry;
+ uint flag = 0;
+
+ /* Compute transformations. */
+ Transform tfm = ob->tfm;
+ Transform itfm = transform_inverse(tfm);
+
+ float3 color = ob->color;
+ float pass_id = ob->pass_id;
+ float random_number = (float)ob->random_id * (1.0f / (float)0xFFFFFFFF);
+ int particle_index = (ob->particle_system) ?
+ ob->particle_index + state->particle_offset[ob->particle_system] :
+ 0;
+
kobject.tfm = tfm;
kobject.itfm = itfm;
- kobject.surface_area = surface_area;
+ kobject.surface_area = object_surface_area(state, tfm, geom);
kobject.color[0] = color.x;
kobject.color[1] = color.y;
kobject.color[2] = color.z;
@@ -435,11 +466,16 @@ void ObjectManager::device_update_object_transform(UpdateObjectTransformState *s
kobject.particle_index = particle_index;
kobject.motion_offset = 0;
- if (mesh->use_motion_blur) {
+ if (geom->use_motion_blur) {
state->have_motion = true;
}
- if (mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION)) {
- flag |= SD_OBJECT_HAS_VERTEX_MOTION;
+
+ if (geom->type == Geometry::MESH) {
+ /* TODO: why only mesh? */
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ if (mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION)) {
+ flag |= SD_OBJECT_HAS_VERTEX_MOTION;
+ }
}
if (state->need_motion == Scene::MOTION_PASS) {
@@ -460,7 +496,7 @@ void ObjectManager::device_update_object_transform(UpdateObjectTransformState *s
/* Motion transformations, is world/object space depending if mesh
* comes with deformed position in object space, or if we transform
* the shading point in world space. */
- if (!mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION)) {
+ if (!(flag & SD_OBJECT_HAS_VERTEX_MOTION)) {
tfm_pre = tfm_pre * itfm;
tfm_post = tfm_post * itfm;
}
@@ -485,12 +521,13 @@ void ObjectManager::device_update_object_transform(UpdateObjectTransformState *s
kobject.dupli_generated[0] = ob->dupli_generated[0];
kobject.dupli_generated[1] = ob->dupli_generated[1];
kobject.dupli_generated[2] = ob->dupli_generated[2];
- kobject.numkeys = mesh->curve_keys.size();
+ kobject.numkeys = (geom->type == Geometry::HAIR) ? static_cast<Hair *>(geom)->curve_keys.size() :
+ 0;
kobject.dupli_uv[0] = ob->dupli_uv[0];
kobject.dupli_uv[1] = ob->dupli_uv[1];
- int totalsteps = mesh->motion_steps;
+ int totalsteps = geom->motion_steps;
kobject.numsteps = (totalsteps - 1) / 2;
- kobject.numverts = mesh->verts.size();
+ kobject.numverts = (geom->type == Geometry::MESH) ? static_cast<Mesh *>(geom)->verts.size() : 0;
kobject.patch_map_offset = 0;
kobject.attribute_map_offset = 0;
uint32_t hash_name = util_murmur_hash3(ob->name.c_str(), ob->name.length(), 0);
@@ -503,9 +540,10 @@ void ObjectManager::device_update_object_transform(UpdateObjectTransformState *s
flag |= SD_OBJECT_HOLDOUT_MASK;
}
state->object_flag[ob->index] = flag;
+ state->object_volume_step[ob->index] = FLT_MAX;
/* Have curves. */
- if (mesh->num_curves()) {
+ if (geom->type == Geometry::HAIR) {
state->have_curves = true;
}
}
@@ -556,6 +594,7 @@ void ObjectManager::device_update_transforms(DeviceScene *dscene, Scene *scene,
state.objects = dscene->objects.alloc(scene->objects.size());
state.object_flag = dscene->object_flag.alloc(scene->objects.size());
+ state.object_volume_step = dscene->object_volume_step.alloc(scene->objects.size());
state.object_motion = NULL;
state.object_motion_pass = NULL;
@@ -676,25 +715,30 @@ void ObjectManager::device_update_flags(
/* Object info flag. */
uint *object_flag = dscene->object_flag.data();
+ float *object_volume_step = dscene->object_volume_step.data();
/* Object volume intersection. */
vector<Object *> volume_objects;
bool has_volume_objects = false;
foreach (Object *object, scene->objects) {
- if (object->mesh->has_volume) {
+ if (object->geometry->has_volume) {
if (bounds_valid) {
volume_objects.push_back(object);
}
has_volume_objects = true;
+ object_volume_step[object->index] = object->compute_volume_step_size();
+ }
+ else {
+ object_volume_step[object->index] = FLT_MAX;
}
}
foreach (Object *object, scene->objects) {
- if (object->mesh->has_volume) {
+ if (object->geometry->has_volume) {
object_flag[object->index] |= SD_OBJECT_HAS_VOLUME;
object_flag[object->index] &= ~SD_OBJECT_HAS_VOLUME_ATTRIBUTES;
- foreach (Attribute &attr, object->mesh->attributes.attributes) {
+ foreach (Attribute &attr, object->geometry->attributes.attributes) {
if (attr.element == ATTR_ELEMENT_VOXEL) {
object_flag[object->index] |= SD_OBJECT_HAS_VOLUME_ATTRIBUTES;
}
@@ -703,6 +747,7 @@ void ObjectManager::device_update_flags(
else {
object_flag[object->index] &= ~(SD_OBJECT_HAS_VOLUME | SD_OBJECT_HAS_VOLUME_ATTRIBUTES);
}
+
if (object->is_shadow_catcher) {
object_flag[object->index] |= SD_OBJECT_SHADOW_CATCHER;
}
@@ -731,6 +776,7 @@ void ObjectManager::device_update_flags(
/* Copy object flag. */
dscene->object_flag.copy_to_device();
+ dscene->object_volume_step.copy_to_device();
}
void ObjectManager::device_update_mesh_offsets(Device *, DeviceScene *dscene, Scene *scene)
@@ -744,21 +790,24 @@ void ObjectManager::device_update_mesh_offsets(Device *, DeviceScene *dscene, Sc
bool update = false;
foreach (Object *object, scene->objects) {
- Mesh *mesh = object->mesh;
-
- if (mesh->patch_table) {
- uint patch_map_offset = 2 * (mesh->patch_table_offset + mesh->patch_table->total_size() -
- mesh->patch_table->num_nodes * PATCH_NODE_SIZE) -
- mesh->patch_offset;
-
- if (kobjects[object->index].patch_map_offset != patch_map_offset) {
- kobjects[object->index].patch_map_offset = patch_map_offset;
- update = true;
+ Geometry *geom = object->geometry;
+
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ if (mesh->patch_table) {
+ uint patch_map_offset = 2 * (mesh->patch_table_offset + mesh->patch_table->total_size() -
+ mesh->patch_table->num_nodes * PATCH_NODE_SIZE) -
+ mesh->patch_offset;
+
+ if (kobjects[object->index].patch_map_offset != patch_map_offset) {
+ kobjects[object->index].patch_map_offset = patch_map_offset;
+ update = true;
+ }
}
}
- if (kobjects[object->index].attribute_map_offset != mesh->attr_map_offset) {
- kobjects[object->index].attribute_map_offset = mesh->attr_map_offset;
+ if (kobjects[object->index].attribute_map_offset != geom->attr_map_offset) {
+ kobjects[object->index].attribute_map_offset = geom->attr_map_offset;
update = true;
}
}
@@ -774,15 +823,16 @@ void ObjectManager::device_free(Device *, DeviceScene *dscene)
dscene->object_motion_pass.free();
dscene->object_motion.free();
dscene->object_flag.free();
+ dscene->object_volume_step.free();
}
void ObjectManager::apply_static_transforms(DeviceScene *dscene, Scene *scene, Progress &progress)
{
/* todo: normals and displacement should be done before applying transform! */
- /* todo: create objects/meshes in right order! */
+ /* todo: create objects/geometry in right order! */
- /* counter mesh users */
- map<Mesh *, int> mesh_users;
+ /* counter geometry users */
+ map<Geometry *, int> geometry_users;
Scene::MotionType need_motion = scene->need_motion();
bool motion_blur = need_motion == Scene::MOTION_BLUR;
bool apply_to_motion = need_motion != Scene::MOTION_PASS;
@@ -790,10 +840,10 @@ void ObjectManager::apply_static_transforms(DeviceScene *dscene, Scene *scene, P
bool have_instancing = false;
foreach (Object *object, scene->objects) {
- map<Mesh *, int>::iterator it = mesh_users.find(object->mesh);
+ map<Geometry *, int>::iterator it = geometry_users.find(object->geometry);
- if (it == mesh_users.end())
- mesh_users[object->mesh] = 1;
+ if (it == geometry_users.end())
+ geometry_users[object->geometry] = 1;
else
it->second++;
}
@@ -803,27 +853,34 @@ void ObjectManager::apply_static_transforms(DeviceScene *dscene, Scene *scene, P
uint *object_flag = dscene->object_flag.data();
- /* apply transforms for objects with single user meshes */
+ /* apply transforms for objects with single user geometry */
foreach (Object *object, scene->objects) {
/* Annoying feedback loop here: we can't use is_instanced() because
* it'll use uninitialized transform_applied flag.
*
- * Could be solved by moving reference counter to Mesh.
+ * Could be solved by moving reference counter to Geometry.
*/
- if ((mesh_users[object->mesh] == 1 && !object->mesh->has_surface_bssrdf) &&
- !object->mesh->has_true_displacement() &&
- object->mesh->subdivision_type == Mesh::SUBDIVISION_NONE) {
+ Geometry *geom = object->geometry;
+ bool apply = (geometry_users[geom] == 1) && !geom->has_surface_bssrdf &&
+ !geom->has_true_displacement();
+
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+ apply = apply && mesh->subdivision_type == Mesh::SUBDIVISION_NONE;
+ }
+
+ if (apply) {
if (!(motion_blur && object->use_motion())) {
- if (!object->mesh->transform_applied) {
+ if (!geom->transform_applied) {
object->apply_transform(apply_to_motion);
- object->mesh->transform_applied = true;
+ geom->transform_applied = true;
if (progress.get_cancel())
return;
}
object_flag[i] |= SD_OBJECT_TRANSFORM_APPLIED;
- if (object->mesh->transform_negative_scaled)
+ if (geom->transform_negative_scaled)
object_flag[i] |= SD_OBJECT_NEGATIVE_SCALE_APPLIED;
}
else
@@ -842,7 +899,7 @@ void ObjectManager::tag_update(Scene *scene)
{
need_update = true;
scene->curve_system_manager->need_update = true;
- scene->mesh_manager->need_update = true;
+ scene->geometry_manager->need_update = true;
scene->light_manager->need_update = true;
}
diff --git a/intern/cycles/render/object.h b/intern/cycles/render/object.h
index cbbff0d4c6d..7c84c2de4fb 100644
--- a/intern/cycles/render/object.h
+++ b/intern/cycles/render/object.h
@@ -23,8 +23,8 @@
#include "util/util_array.h"
#include "util/util_boundbox.h"
#include "util/util_param.h"
-#include "util/util_transform.h"
#include "util/util_thread.h"
+#include "util/util_transform.h"
#include "util/util_types.h"
#include "util/util_vector.h"
@@ -32,7 +32,7 @@ CCL_NAMESPACE_BEGIN
class Device;
class DeviceScene;
-class Mesh;
+class Geometry;
class ParticleSystem;
class Progress;
class Scene;
@@ -46,7 +46,7 @@ class Object : public Node {
public:
NODE_DECLARE
- Mesh *mesh;
+ Geometry *geometry;
Transform tfm;
BoundBox bounds;
uint random_id;
@@ -81,6 +81,9 @@ class Object : public Node {
int motion_step(float time) const;
void update_motion();
+ /* Maximum number of motion steps supported (due to Embree). */
+ static const uint MAX_MOTION_STEPS = 129;
+
/* Check whether object is traceable and it worth adding it to
* kernel scene.
*/
@@ -94,6 +97,9 @@ class Object : public Node {
/* Returns the index that is used in the kernel for this object. */
int get_device_index() const;
+ /* Compute step size from attributes, shaders, transforms. */
+ float compute_volume_step_size() const;
+
protected:
/* Specifies the position of the object in scene->objects and
* in the device vectors. Gets set in device_update. */
diff --git a/intern/cycles/render/osl.cpp b/intern/cycles/render/osl.cpp
index 1f0a243e6c1..06d832a29ca 100644
--- a/intern/cycles/render/osl.cpp
+++ b/intern/cycles/render/osl.cpp
@@ -20,10 +20,10 @@
#include "render/colorspace.h"
#include "render/graph.h"
#include "render/light.h"
+#include "render/nodes.h"
#include "render/osl.h"
#include "render/scene.h"
#include "render/shader.h"
-#include "render/nodes.h"
#ifdef WITH_OSL
@@ -102,8 +102,8 @@ void OSLShaderManager::device_update(Device *device,
device_free(device, dscene, scene);
- /* determine which shaders are in use */
- device_update_shaders_used(scene);
+ /* set texture system */
+ scene->image_manager->set_osl_texture_system((void *)ts);
/* create shaders */
OSLGlobals *og = (OSLGlobals *)device->osl_memory();
@@ -142,9 +142,6 @@ void OSLShaderManager::device_update(Device *device,
need_update = false;
- /* set texture system */
- scene->image_manager->set_osl_texture_system((void *)ts);
-
/* add special builtin texture types */
services->textures.insert(ustring("@ao"), new OSLTextureHandle(OSLTextureHandle::AO));
services->textures.insert(ustring("@bevel"), new OSLTextureHandle(OSLTextureHandle::BEVEL));
@@ -319,7 +316,7 @@ bool OSLShaderManager::osl_compile(const string &inputfile, const string &output
string include_path_arg = string("-I") + shader_path;
options.push_back(include_path_arg);
- stdosl_path = path_get("shader/stdosl.h");
+ stdosl_path = path_get("shader/stdcycles.h");
/* compile */
OSL::OSLCompiler *compiler = new OSL::OSLCompiler(&OSL::ErrorHandler::default_handler());
@@ -440,27 +437,35 @@ const char *OSLShaderManager::shader_load_bytecode(const string &hash, const str
return loaded_shaders.find(hash)->first.c_str();
}
-OSLNode *OSLShaderManager::osl_node(const std::string &filepath,
+/* This is a static function to avoid RTTI link errors with only this
+ * file being compiled without RTTI to match OSL and LLVM libraries. */
+OSLNode *OSLShaderManager::osl_node(ShaderManager *manager,
+ const std::string &filepath,
const std::string &bytecode_hash,
const std::string &bytecode)
{
+ if (!manager->use_osl()) {
+ return NULL;
+ }
+
/* create query */
+ OSLShaderManager *osl_manager = static_cast<OSLShaderManager *>(manager);
const char *hash;
if (!filepath.empty()) {
- hash = shader_load_filepath(filepath);
+ hash = osl_manager->shader_load_filepath(filepath);
}
else {
- hash = shader_test_loaded(bytecode_hash);
+ hash = osl_manager->shader_test_loaded(bytecode_hash);
if (!hash)
- hash = shader_load_bytecode(bytecode_hash, bytecode);
+ hash = osl_manager->shader_load_bytecode(bytecode_hash, bytecode);
}
if (!hash) {
return NULL;
}
- OSLShaderInfo *info = shader_loaded_info(hash);
+ OSLShaderInfo *info = osl_manager->shader_loaded_info(hash);
/* count number of inputs */
size_t num_inputs = 0;
@@ -755,16 +760,14 @@ void OSLCompiler::add(ShaderNode *node, const char *name, bool isfilepath)
else if (current_type == SHADER_TYPE_VOLUME) {
if (node->has_spatial_varying())
current_shader->has_volume_spatial_varying = true;
+ if (node->has_attribute_dependency())
+ current_shader->has_volume_attribute_dependency = true;
}
if (node->has_object_dependency()) {
current_shader->has_object_dependency = true;
}
- if (node->has_attribute_dependency()) {
- current_shader->has_attribute_dependency = true;
- }
-
if (node->has_integrator_dependency()) {
current_shader->has_integrator_dependency = true;
}
@@ -1138,8 +1141,8 @@ void OSLCompiler::compile(OSLGlobals *og, Shader *shader)
shader->has_displacement = false;
shader->has_surface_spatial_varying = false;
shader->has_volume_spatial_varying = false;
+ shader->has_volume_attribute_dependency = false;
shader->has_object_dependency = false;
- shader->has_attribute_dependency = false;
shader->has_integrator_dependency = false;
/* generate surface shader */
diff --git a/intern/cycles/render/osl.h b/intern/cycles/render/osl.h
index 62cbfebf7eb..4dd9f6630f2 100644
--- a/intern/cycles/render/osl.h
+++ b/intern/cycles/render/osl.h
@@ -93,9 +93,10 @@ class OSLShaderManager : public ShaderManager {
OSLShaderInfo *shader_loaded_info(const string &hash);
/* create OSL node using OSLQuery */
- OSLNode *osl_node(const std::string &filepath,
- const std::string &bytecode_hash = "",
- const std::string &bytecode = "");
+ static OSLNode *osl_node(ShaderManager *manager,
+ const std::string &filepath,
+ const std::string &bytecode_hash = "",
+ const std::string &bytecode = "");
protected:
void texture_system_init();
diff --git a/intern/cycles/render/particles.cpp b/intern/cycles/render/particles.cpp
index 8335404b197..ec9276eff86 100644
--- a/intern/cycles/render/particles.cpp
+++ b/intern/cycles/render/particles.cpp
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include "device/device.h"
#include "render/particles.h"
+#include "device/device.h"
#include "render/scene.h"
#include "util/util_foreach.h"
diff --git a/intern/cycles/render/scene.cpp b/intern/cycles/render/scene.cpp
index 1e75fa0f99b..f5b68d5a4fe 100644
--- a/intern/cycles/render/scene.cpp
+++ b/intern/cycles/render/scene.cpp
@@ -16,11 +16,11 @@
#include <stdlib.h>
+#include "device/device.h"
#include "render/background.h"
#include "render/bake.h"
#include "render/camera.h"
#include "render/curves.h"
-#include "device/device.h"
#include "render/film.h"
#include "render/integrator.h"
#include "render/light.h"
@@ -41,50 +41,59 @@
CCL_NAMESPACE_BEGIN
DeviceScene::DeviceScene(Device *device)
- : bvh_nodes(device, "__bvh_nodes", MEM_TEXTURE),
- bvh_leaf_nodes(device, "__bvh_leaf_nodes", MEM_TEXTURE),
- object_node(device, "__object_node", MEM_TEXTURE),
- prim_tri_index(device, "__prim_tri_index", MEM_TEXTURE),
- prim_tri_verts(device, "__prim_tri_verts", MEM_TEXTURE),
- prim_type(device, "__prim_type", MEM_TEXTURE),
- prim_visibility(device, "__prim_visibility", MEM_TEXTURE),
- prim_index(device, "__prim_index", MEM_TEXTURE),
- prim_object(device, "__prim_object", MEM_TEXTURE),
- prim_time(device, "__prim_time", MEM_TEXTURE),
- tri_shader(device, "__tri_shader", MEM_TEXTURE),
- tri_vnormal(device, "__tri_vnormal", MEM_TEXTURE),
- tri_vindex(device, "__tri_vindex", MEM_TEXTURE),
- tri_patch(device, "__tri_patch", MEM_TEXTURE),
- tri_patch_uv(device, "__tri_patch_uv", MEM_TEXTURE),
- curves(device, "__curves", MEM_TEXTURE),
- curve_keys(device, "__curve_keys", MEM_TEXTURE),
- patches(device, "__patches", MEM_TEXTURE),
- objects(device, "__objects", MEM_TEXTURE),
- object_motion_pass(device, "__object_motion_pass", MEM_TEXTURE),
- object_motion(device, "__object_motion", MEM_TEXTURE),
- object_flag(device, "__object_flag", MEM_TEXTURE),
- camera_motion(device, "__camera_motion", MEM_TEXTURE),
- attributes_map(device, "__attributes_map", MEM_TEXTURE),
- attributes_float(device, "__attributes_float", MEM_TEXTURE),
- attributes_float2(device, "__attributes_float2", MEM_TEXTURE),
- attributes_float3(device, "__attributes_float3", MEM_TEXTURE),
- attributes_uchar4(device, "__attributes_uchar4", MEM_TEXTURE),
- light_distribution(device, "__light_distribution", MEM_TEXTURE),
- lights(device, "__lights", MEM_TEXTURE),
- light_background_marginal_cdf(device, "__light_background_marginal_cdf", MEM_TEXTURE),
- light_background_conditional_cdf(device, "__light_background_conditional_cdf", MEM_TEXTURE),
- particles(device, "__particles", MEM_TEXTURE),
- svm_nodes(device, "__svm_nodes", MEM_TEXTURE),
- shaders(device, "__shaders", MEM_TEXTURE),
- lookup_table(device, "__lookup_table", MEM_TEXTURE),
- sobol_directions(device, "__sobol_directions", MEM_TEXTURE),
- ies_lights(device, "__ies", MEM_TEXTURE)
+ : bvh_nodes(device, "__bvh_nodes", MEM_GLOBAL),
+ bvh_leaf_nodes(device, "__bvh_leaf_nodes", MEM_GLOBAL),
+ object_node(device, "__object_node", MEM_GLOBAL),
+ prim_tri_index(device, "__prim_tri_index", MEM_GLOBAL),
+ prim_tri_verts(device, "__prim_tri_verts", MEM_GLOBAL),
+ prim_type(device, "__prim_type", MEM_GLOBAL),
+ prim_visibility(device, "__prim_visibility", MEM_GLOBAL),
+ prim_index(device, "__prim_index", MEM_GLOBAL),
+ prim_object(device, "__prim_object", MEM_GLOBAL),
+ prim_time(device, "__prim_time", MEM_GLOBAL),
+ tri_shader(device, "__tri_shader", MEM_GLOBAL),
+ tri_vnormal(device, "__tri_vnormal", MEM_GLOBAL),
+ tri_vindex(device, "__tri_vindex", MEM_GLOBAL),
+ tri_patch(device, "__tri_patch", MEM_GLOBAL),
+ tri_patch_uv(device, "__tri_patch_uv", MEM_GLOBAL),
+ curves(device, "__curves", MEM_GLOBAL),
+ curve_keys(device, "__curve_keys", MEM_GLOBAL),
+ patches(device, "__patches", MEM_GLOBAL),
+ objects(device, "__objects", MEM_GLOBAL),
+ object_motion_pass(device, "__object_motion_pass", MEM_GLOBAL),
+ object_motion(device, "__object_motion", MEM_GLOBAL),
+ object_flag(device, "__object_flag", MEM_GLOBAL),
+ object_volume_step(device, "__object_volume_step", MEM_GLOBAL),
+ camera_motion(device, "__camera_motion", MEM_GLOBAL),
+ attributes_map(device, "__attributes_map", MEM_GLOBAL),
+ attributes_float(device, "__attributes_float", MEM_GLOBAL),
+ attributes_float2(device, "__attributes_float2", MEM_GLOBAL),
+ attributes_float3(device, "__attributes_float3", MEM_GLOBAL),
+ attributes_uchar4(device, "__attributes_uchar4", MEM_GLOBAL),
+ light_distribution(device, "__light_distribution", MEM_GLOBAL),
+ lights(device, "__lights", MEM_GLOBAL),
+ light_background_marginal_cdf(device, "__light_background_marginal_cdf", MEM_GLOBAL),
+ light_background_conditional_cdf(device, "__light_background_conditional_cdf", MEM_GLOBAL),
+ particles(device, "__particles", MEM_GLOBAL),
+ svm_nodes(device, "__svm_nodes", MEM_GLOBAL),
+ shaders(device, "__shaders", MEM_GLOBAL),
+ lookup_table(device, "__lookup_table", MEM_GLOBAL),
+ sample_pattern_lut(device, "__sample_pattern_lut", MEM_GLOBAL),
+ ies_lights(device, "__ies", MEM_GLOBAL)
{
memset((void *)&data, 0, sizeof(data));
}
Scene::Scene(const SceneParams &params_, Device *device)
- : name("Scene"), device(device), dscene(device), params(params_)
+ : name("Scene"),
+ default_surface(NULL),
+ default_volume(NULL),
+ default_light(NULL),
+ default_background(NULL),
+ default_empty(NULL),
+ device(device),
+ dscene(device),
+ params(params_)
{
memset((void *)&dscene.data, 0, sizeof(dscene.data));
@@ -94,7 +103,7 @@ Scene::Scene(const SceneParams &params_, Device *device)
film = new Film();
background = new Background();
light_manager = new LightManager();
- mesh_manager = new MeshManager();
+ geometry_manager = new GeometryManager();
object_manager = new ObjectManager();
integrator = new Integrator();
image_manager = new ImageManager(device->info);
@@ -104,9 +113,11 @@ Scene::Scene(const SceneParams &params_, Device *device)
/* OSL only works on the CPU */
if (device->info.has_osl)
- shader_manager = ShaderManager::create(this, params.shadingsystem);
+ shader_manager = ShaderManager::create(params.shadingsystem);
else
- shader_manager = ShaderManager::create(this, SHADINGSYSTEM_SVM);
+ shader_manager = ShaderManager::create(SHADINGSYSTEM_SVM);
+
+ shader_manager->add_default(this);
}
Scene::~Scene()
@@ -118,8 +129,8 @@ void Scene::free_memory(bool final)
{
foreach (Shader *s, shaders)
delete s;
- foreach (Mesh *m, meshes)
- delete m;
+ foreach (Geometry *g, geometry)
+ delete g;
foreach (Object *o, objects)
delete o;
foreach (Light *l, lights)
@@ -128,7 +139,7 @@ void Scene::free_memory(bool final)
delete p;
shaders.clear();
- meshes.clear();
+ geometry.clear();
objects.clear();
lights.clear();
particle_systems.clear();
@@ -140,7 +151,7 @@ void Scene::free_memory(bool final)
integrator->device_free(device, &dscene);
object_manager->device_free(device, &dscene);
- mesh_manager->device_free(device, &dscene);
+ geometry_manager->device_free(device, &dscene);
shader_manager->device_free(device, &dscene, this);
light_manager->device_free(device, &dscene);
@@ -165,7 +176,7 @@ void Scene::free_memory(bool final)
delete background;
delete integrator;
delete object_manager;
- delete mesh_manager;
+ delete geometry_manager;
delete shader_manager;
delete light_manager;
delete particle_system_manager;
@@ -211,7 +222,7 @@ void Scene::device_update(Device *device_, Progress &progress)
if (progress.get_cancel() || device->have_error())
return;
- mesh_manager->device_update_preprocess(device, this, progress);
+ geometry_manager->device_update_preprocess(device, this, progress);
if (progress.get_cancel() || device->have_error())
return;
@@ -235,7 +246,7 @@ void Scene::device_update(Device *device_, Progress &progress)
return;
progress.set_status("Updating Meshes");
- mesh_manager->device_update(device, &dscene, this, progress);
+ geometry_manager->device_update(device, &dscene, this, progress);
if (progress.get_cancel() || device->have_error())
return;
@@ -356,8 +367,8 @@ bool Scene::need_update()
bool Scene::need_data_update()
{
return (background->need_update || image_manager->need_update || object_manager->need_update ||
- mesh_manager->need_update || light_manager->need_update || lookup_tables->need_update ||
- integrator->need_update || shader_manager->need_update ||
+ geometry_manager->need_update || light_manager->need_update ||
+ lookup_tables->need_update || integrator->need_update || shader_manager->need_update ||
particle_system_manager->need_update || curve_system_manager->need_update ||
bake_manager->need_update || film->need_update);
}
@@ -379,7 +390,7 @@ void Scene::reset()
background->tag_update(this);
integrator->tag_update(this);
object_manager->tag_update(this);
- mesh_manager->tag_update(this);
+ geometry_manager->tag_update(this);
light_manager->tag_update(this);
particle_system_manager->tag_update(this);
curve_system_manager->tag_update(this);
@@ -392,7 +403,7 @@ void Scene::device_free()
void Scene::collect_statistics(RenderStats *stats)
{
- mesh_manager->collect_statistics(this, stats);
+ geometry_manager->collect_statistics(this, stats);
image_manager->collect_statistics(stats);
}
diff --git a/intern/cycles/render/scene.h b/intern/cycles/render/scene.h
index f99510d2d42..6b10a901d7b 100644
--- a/intern/cycles/render/scene.h
+++ b/intern/cycles/render/scene.h
@@ -44,8 +44,8 @@ class Integrator;
class Light;
class LightManager;
class LookupTables;
-class Mesh;
-class MeshManager;
+class Geometry;
+class GeometryManager;
class Object;
class ObjectManager;
class ParticleSystemManager;
@@ -91,6 +91,7 @@ class DeviceScene {
device_vector<Transform> object_motion_pass;
device_vector<DecomposedTransform> object_motion;
device_vector<uint> object_flag;
+ device_vector<float> object_volume_step;
/* cameras */
device_vector<DecomposedTransform> camera_motion;
@@ -119,7 +120,7 @@ class DeviceScene {
device_vector<float> lookup_table;
/* integrator */
- device_vector<uint> sobol_directions;
+ device_vector<uint> sample_pattern_lut;
/* ies lights */
device_vector<float> ies_lights;
@@ -213,7 +214,7 @@ class Scene {
/* data lists */
vector<Object *> objects;
- vector<Mesh *> meshes;
+ vector<Geometry *> geometry;
vector<Shader *> shaders;
vector<Light *> lights;
vector<ParticleSystem *> particle_systems;
@@ -222,7 +223,7 @@ class Scene {
ImageManager *image_manager;
LightManager *light_manager;
ShaderManager *shader_manager;
- MeshManager *mesh_manager;
+ GeometryManager *geometry_manager;
ObjectManager *object_manager;
ParticleSystemManager *particle_system_manager;
CurveSystemManager *curve_system_manager;
@@ -230,6 +231,7 @@ class Scene {
/* default shaders */
Shader *default_surface;
+ Shader *default_volume;
Shader *default_light;
Shader *default_background;
Shader *default_empty;
diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp
index c77a20787f5..b1b30979b0e 100644
--- a/intern/cycles/render/session.cpp
+++ b/intern/cycles/render/session.cpp
@@ -14,12 +14,13 @@
* limitations under the License.
*/
-#include <string.h>
#include <limits.h>
+#include <string.h>
+#include "device/device.h"
+#include "render/bake.h"
#include "render/buffers.h"
#include "render/camera.h"
-#include "device/device.h"
#include "render/graph.h"
#include "render/integrator.h"
#include "render/light.h"
@@ -27,7 +28,6 @@
#include "render/object.h"
#include "render/scene.h"
#include "render/session.h"
-#include "render/bake.h"
#include "util/util_foreach.h"
#include "util/util_function.h"
@@ -183,7 +183,8 @@ bool Session::draw_gpu(BufferParams &buffer_params, DeviceDrawParams &draw_param
if (gpu_draw_ready) {
/* then verify the buffers have the expected size, so we don't
* draw previous results in a resized window */
- if (!buffer_params.modified(display->params)) {
+ if (buffer_params.width == display->params.width &&
+ buffer_params.height == display->params.height) {
/* for CUDA we need to do tone-mapping still, since we can
* only access GL buffers from the main thread. */
if (gpu_need_display_buffer_update) {
@@ -211,6 +212,7 @@ void Session::run_gpu()
reset_time = time_dt();
last_update_time = time_dt();
+ last_display_time = last_update_time;
progress.set_render_start_time();
@@ -291,11 +293,15 @@ void Session::run_gpu()
* reset and draw in between */
thread_scoped_lock buffers_lock(buffers_mutex);
+ /* avoid excessive denoising in viewport after reaching a certain amount of samples */
+ bool need_denoise = tile_manager.schedule_denoising || tile_manager.state.sample < 20 ||
+ (time_dt() - last_display_time) >= params.progressive_update_timeout;
+
/* update status and timing */
update_status_time();
/* render */
- render();
+ render(need_denoise);
device->task_wait();
@@ -305,7 +311,7 @@ void Session::run_gpu()
/* update status and timing */
update_status_time();
- gpu_need_display_buffer_update = true;
+ gpu_need_display_buffer_update = need_denoise || !params.run_denoising;
gpu_draw_ready = true;
progress.set_update();
@@ -359,7 +365,8 @@ bool Session::draw_cpu(BufferParams &buffer_params, DeviceDrawParams &draw_param
if (display->draw_ready()) {
/* then verify the buffers have the expected size, so we don't
* draw previous results in a resized window */
- if (!buffer_params.modified(display->params)) {
+ if (buffer_params.width == display->params.width &&
+ buffer_params.height == display->params.height) {
display->draw(device, draw_params);
if (display_outdated && (time_dt() - reset_time) > params.text_timeout)
@@ -372,7 +379,7 @@ bool Session::draw_cpu(BufferParams &buffer_params, DeviceDrawParams &draw_param
return false;
}
-bool Session::acquire_tile(Device *tile_device, RenderTile &rtile)
+bool Session::acquire_tile(RenderTile &rtile, Device *tile_device, uint tile_types)
{
if (progress.get_cancel()) {
if (params.progressive_refine == false) {
@@ -387,8 +394,14 @@ bool Session::acquire_tile(Device *tile_device, RenderTile &rtile)
Tile *tile;
int device_num = device->device_number(tile_device);
- if (!tile_manager.next_tile(tile, device_num))
+ while (!tile_manager.next_tile(tile, device_num, tile_types)) {
+ /* Wait for denoising tiles to become available */
+ if ((tile_types & RenderTile::DENOISE) && !progress.get_cancel() && tile_manager.has_tiles()) {
+ denoising_cond.wait(tile_lock);
+ continue;
+ }
return false;
+ }
/* fill render tile */
rtile.x = tile_manager.state.buffer.full_x + tile->x;
@@ -399,7 +412,7 @@ bool Session::acquire_tile(Device *tile_device, RenderTile &rtile)
rtile.num_samples = tile_manager.state.num_samples;
rtile.resolution = tile_manager.state.resolution_divider;
rtile.tile_index = tile->index;
- rtile.task = (tile->state == Tile::DENOISE) ? RenderTile::DENOISE : RenderTile::PATH_TRACE;
+ rtile.task = tile->state == Tile::DENOISE ? RenderTile::DENOISE : RenderTile::PATH_TRACE;
tile_lock.unlock();
@@ -413,6 +426,9 @@ bool Session::acquire_tile(Device *tile_device, RenderTile &rtile)
device->map_tile(tile_device, rtile);
+ /* Reset copy state, since buffer contents change after the tile was acquired */
+ buffers->map_neighbor_copied = false;
+
return true;
}
@@ -429,6 +445,8 @@ bool Session::acquire_tile(Device *tile_device, RenderTile &rtile)
tile->buffers->reset(buffer_params);
}
+ tile->buffers->map_neighbor_copied = false;
+
tile->buffers->params.get_offset_stride(rtile.offset, rtile.stride);
rtile.buffer = tile->buffers->buffer.device_pointer;
@@ -484,45 +502,75 @@ void Session::release_tile(RenderTile &rtile)
}
update_status_time();
+
+ /* Notify denoising thread that a tile was finished. */
+ denoising_cond.notify_all();
}
void Session::map_neighbor_tiles(RenderTile *tiles, Device *tile_device)
{
thread_scoped_lock tile_lock(tile_mutex);
- int center_idx = tiles[4].tile_index;
- assert(tile_manager.state.tiles[center_idx].state == Tile::DENOISE);
- BufferParams buffer_params = tile_manager.params;
- int4 image_region = make_int4(buffer_params.full_x,
- buffer_params.full_y,
- buffer_params.full_x + buffer_params.width,
- buffer_params.full_y + buffer_params.height);
-
- for (int dy = -1, i = 0; dy <= 1; dy++) {
- for (int dx = -1; dx <= 1; dx++, i++) {
- int px = tiles[4].x + dx * params.tile_size.x;
- int py = tiles[4].y + dy * params.tile_size.y;
- if (px >= image_region.x && py >= image_region.y && px < image_region.z &&
- py < image_region.w) {
- int tile_index = center_idx + dy * tile_manager.state.tile_stride + dx;
- Tile *tile = &tile_manager.state.tiles[tile_index];
- assert(tile->buffers);
-
- tiles[i].buffer = tile->buffers->buffer.device_pointer;
- tiles[i].x = tile_manager.state.buffer.full_x + tile->x;
- tiles[i].y = tile_manager.state.buffer.full_y + tile->y;
- tiles[i].w = tile->w;
- tiles[i].h = tile->h;
- tiles[i].buffers = tile->buffers;
-
- tile->buffers->params.get_offset_stride(tiles[i].offset, tiles[i].stride);
- }
- else {
- tiles[i].buffer = (device_ptr)NULL;
- tiles[i].buffers = NULL;
- tiles[i].x = clamp(px, image_region.x, image_region.z);
- tiles[i].y = clamp(py, image_region.y, image_region.w);
- tiles[i].w = tiles[i].h = 0;
+ const int4 image_region = make_int4(
+ tile_manager.state.buffer.full_x,
+ tile_manager.state.buffer.full_y,
+ tile_manager.state.buffer.full_x + tile_manager.state.buffer.width,
+ tile_manager.state.buffer.full_y + tile_manager.state.buffer.height);
+
+ if (!tile_manager.schedule_denoising) {
+ /* Fix up tile slices with overlap. */
+ if (tile_manager.slice_overlap != 0) {
+ int y = max(tiles[4].y - tile_manager.slice_overlap, image_region.y);
+ tiles[4].h = min(tiles[4].y + tiles[4].h + tile_manager.slice_overlap, image_region.w) - y;
+ tiles[4].y = y;
+ }
+
+ /* Tiles are not being denoised individually, which means the entire image is processed. */
+ tiles[3].x = tiles[4].x;
+ tiles[1].y = tiles[4].y;
+ tiles[5].x = tiles[4].x + tiles[4].w;
+ tiles[7].y = tiles[4].y + tiles[4].h;
+ }
+ else {
+ int center_idx = tiles[4].tile_index;
+ assert(tile_manager.state.tiles[center_idx].state == Tile::DENOISE);
+
+ for (int dy = -1, i = 0; dy <= 1; dy++) {
+ for (int dx = -1; dx <= 1; dx++, i++) {
+ int nindex = tile_manager.get_neighbor_index(center_idx, i);
+ if (nindex >= 0) {
+ Tile *tile = &tile_manager.state.tiles[nindex];
+
+ tiles[i].x = image_region.x + tile->x;
+ tiles[i].y = image_region.y + tile->y;
+ tiles[i].w = tile->w;
+ tiles[i].h = tile->h;
+
+ if (buffers) {
+ tile_manager.state.buffer.get_offset_stride(tiles[i].offset, tiles[i].stride);
+
+ tiles[i].buffer = buffers->buffer.device_pointer;
+ tiles[i].buffers = buffers;
+ }
+ else {
+ assert(tile->buffers);
+ tile->buffers->params.get_offset_stride(tiles[i].offset, tiles[i].stride);
+
+ tiles[i].buffer = tile->buffers->buffer.device_pointer;
+ tiles[i].buffers = tile->buffers;
+ }
+ }
+ else {
+ int px = tiles[4].x + dx * params.tile_size.x;
+ int py = tiles[4].y + dy * params.tile_size.y;
+
+ tiles[i].x = clamp(px, image_region.x, image_region.z);
+ tiles[i].y = clamp(py, image_region.y, image_region.w);
+ tiles[i].w = tiles[i].h = 0;
+
+ tiles[i].buffer = (device_ptr)NULL;
+ tiles[i].buffers = NULL;
+ }
}
}
}
@@ -545,6 +593,7 @@ void Session::run_cpu()
bool tiles_written = false;
last_update_time = time_dt();
+ last_display_time = last_update_time;
{
/* reset once to start */
@@ -575,7 +624,7 @@ void Session::run_cpu()
}
/* Don't go in pause mode when preview kernels are used
- * When feature kernels become available the session will be resetted. */
+ * When feature kernels become available the session will be reset. */
else if (no_tiles && kernel_state == DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL) {
time_sleep(0.1);
}
@@ -620,11 +669,6 @@ void Session::run_cpu()
}
if (!no_tiles) {
- /* buffers mutex is locked entirely while rendering each
- * sample, and released/reacquired on each iteration to allow
- * reset and draw in between */
- thread_scoped_lock buffers_lock(buffers_mutex);
-
/* update scene */
scoped_timer update_timer;
if (update_scene()) {
@@ -638,17 +682,26 @@ void Session::run_cpu()
if (progress.get_cancel())
break;
+ /* buffers mutex is locked entirely while rendering each
+ * sample, and released/reacquired on each iteration to allow
+ * reset and draw in between */
+ thread_scoped_lock buffers_lock(buffers_mutex);
+
+ /* avoid excessive denoising in viewport after reaching a certain amount of samples */
+ bool need_denoise = tile_manager.schedule_denoising || tile_manager.state.sample < 20 ||
+ (time_dt() - last_display_time) >= params.progressive_update_timeout;
+
/* update status and timing */
update_status_time();
/* render */
- render();
+ render(need_denoise);
/* update status and timing */
update_status_time();
if (!params.background)
- need_copy_to_display_buffer = true;
+ need_copy_to_display_buffer = need_denoise || !params.run_denoising;
if (!device->error_message().empty())
progress.set_error(device->error_message());
@@ -701,23 +754,26 @@ DeviceRequestedFeatures Session::get_requested_device_features()
requested_features.use_object_motion = false;
requested_features.use_camera_motion = use_motion && scene->camera->use_motion();
foreach (Object *object, scene->objects) {
- Mesh *mesh = object->mesh;
- if (mesh->num_curves()) {
- requested_features.use_hair = true;
- }
+ Geometry *geom = object->geometry;
if (use_motion) {
- requested_features.use_object_motion |= object->use_motion() | mesh->use_motion_blur;
- requested_features.use_camera_motion |= mesh->use_motion_blur;
+ requested_features.use_object_motion |= object->use_motion() | geom->use_motion_blur;
+ requested_features.use_camera_motion |= geom->use_motion_blur;
}
-#ifdef WITH_OPENSUBDIV
- if (mesh->subdivision_type != Mesh::SUBDIVISION_NONE) {
- requested_features.use_patch_evaluation = true;
- }
-#endif
if (object->is_shadow_catcher) {
requested_features.use_shadow_tricks = true;
}
- requested_features.use_true_displacement |= mesh->has_true_displacement();
+ if (geom->type == Geometry::MESH) {
+ Mesh *mesh = static_cast<Mesh *>(geom);
+#ifdef WITH_OPENSUBDIV
+ if (mesh->subdivision_type != Mesh::SUBDIVISION_NONE) {
+ requested_features.use_patch_evaluation = true;
+ }
+#endif
+ requested_features.use_true_displacement |= mesh->has_true_displacement();
+ }
+ else if (geom->type == Geometry::HAIR) {
+ requested_features.use_hair = true;
+ }
}
requested_features.use_background_light = scene->light_manager->has_background_light(scene);
@@ -842,9 +898,6 @@ void Session::set_samples(int samples)
params.samples = samples;
tile_manager.set_samples(samples);
- {
- thread_scoped_lock pause_lock(pause_mutex);
- }
pause_cond.notify_all();
}
}
@@ -866,6 +919,29 @@ void Session::set_pause(bool pause_)
pause_cond.notify_all();
}
+void Session::set_denoising(bool denoising, bool optix_denoising)
+{
+ /* Lock buffers so no denoising operation is triggered while the settings are changed here. */
+ thread_scoped_lock buffers_lock(buffers_mutex);
+
+ params.run_denoising = denoising;
+ params.full_denoising = !optix_denoising;
+ params.optix_denoising = optix_denoising;
+
+ // TODO(pmours): Query the required overlap value for denoising from the device?
+ tile_manager.slice_overlap = denoising && !params.background ? 64 : 0;
+ tile_manager.schedule_denoising = denoising && !buffers;
+}
+
+void Session::set_denoising_start_sample(int sample)
+{
+ if (sample != params.denoising_start_sample) {
+ params.denoising_start_sample = sample;
+
+ pause_cond.notify_all();
+ }
+}
+
void Session::wait()
{
if (session_thread) {
@@ -900,7 +976,7 @@ bool Session::update_scene()
Integrator *integrator = scene->integrator;
BakeManager *bake_manager = scene->bake_manager;
- if (integrator->sampling_pattern == SAMPLING_PATTERN_CMJ || bake_manager->get_baking()) {
+ if (integrator->sampling_pattern != SAMPLING_PATTERN_SOBOL || bake_manager->get_baking()) {
int aa_samples = tile_manager.num_samples;
if (aa_samples != integrator->aa_samples) {
@@ -911,7 +987,8 @@ bool Session::update_scene()
/* update scene */
if (scene->need_update()) {
- bool new_kernels_needed = load_kernels(false);
+ /* Updated used shader tag so we know which features are need for the kernel. */
+ scene->shader_manager->update_shaders_used(scene);
/* Update max_closures. */
KernelIntegrator *kintegrator = &scene->dscene.data.integrator;
@@ -923,6 +1000,9 @@ bool Session::update_scene()
kintegrator->max_closures = MAX_CLOSURE;
}
+ /* Load render kernels, before device update where we upload data to the GPU. */
+ bool new_kernels_needed = load_kernels(false);
+
progress.set_status("Updating Scene");
MEM_GUARDED_CALL(&progress, scene->device_update, device, progress);
@@ -1003,17 +1083,21 @@ void Session::update_status_time(bool show_pause, bool show_done)
progress.set_status(status, substatus);
}
-void Session::render()
+void Session::render(bool with_denoising)
{
- /* Clear buffers. */
if (buffers && tile_manager.state.sample == tile_manager.range_start_sample) {
+ /* Clear buffers. */
buffers->zero();
}
+ if (tile_manager.state.buffer.width == 0 || tile_manager.state.buffer.height == 0) {
+ return; /* Avoid empty launches. */
+ }
+
/* Add path trace task. */
DeviceTask task(DeviceTask::RENDER);
- task.acquire_tile = function_bind(&Session::acquire_tile, this, _1, _2);
+ task.acquire_tile = function_bind(&Session::acquire_tile, this, _2, _1, _3);
task.release_tile = function_bind(&Session::release_tile, this, _1);
task.map_neighbor_tiles = function_bind(&Session::map_neighbor_tiles, this, _1, _2);
task.unmap_neighbor_tiles = function_bind(&Session::unmap_neighbor_tiles, this, _1, _2);
@@ -1022,13 +1106,37 @@ void Session::render()
task.update_progress_sample = function_bind(&Progress::add_samples, &this->progress, _1, _2);
task.need_finish_queue = params.progressive_refine;
task.integrator_branched = scene->integrator->method == Integrator::BRANCHED_PATH;
- task.requested_tile_size = params.tile_size;
- task.passes_size = tile_manager.params.get_passes_size();
- if (params.run_denoising) {
+ task.adaptive_sampling.use = (scene->integrator->sampling_pattern == SAMPLING_PATTERN_PMJ) &&
+ scene->dscene.data.film.pass_adaptive_aux_buffer;
+ task.adaptive_sampling.min_samples = scene->dscene.data.integrator.adaptive_min_samples;
+
+ /* Acquire render tiles by default. */
+ task.tile_types = RenderTile::PATH_TRACE;
+
+ with_denoising = params.run_denoising && with_denoising;
+ if (with_denoising) {
+ /* Do not denoise viewport until the sample at which denoising should start is reached. */
+ if (!params.background && tile_manager.state.sample < params.denoising_start_sample) {
+ with_denoising = false;
+ }
+
+ /* Cannot denoise with resolution divider and separate denoising devices.
+ * It breaks the copy in 'MultiDevice::map_neighbor_tiles' (which operates on the full buffer
+ * dimensions and not the scaled ones). */
+ if (!params.device.denoising_devices.empty() && tile_manager.state.resolution_divider > 1) {
+ with_denoising = false;
+ }
+
+ /* It can happen that denoising was already enabled, but the scene still needs an update. */
+ if (scene->film->need_update || !scene->film->denoising_data_offset) {
+ with_denoising = false;
+ }
+ }
+
+ if (with_denoising) {
task.denoising = params.denoising;
- assert(!scene->film->need_update);
task.pass_stride = scene->film->pass_stride;
task.target_pass_stride = task.pass_stride;
task.pass_denoising_data = scene->film->denoising_data_offset;
@@ -1038,6 +1146,30 @@ void Session::render()
task.denoising_do_filter = params.full_denoising;
task.denoising_use_optix = params.optix_denoising;
task.denoising_write_passes = params.write_denoising_passes;
+
+ if (tile_manager.schedule_denoising) {
+ /* Acquire denoising tiles during rendering. */
+ task.tile_types |= RenderTile::DENOISE;
+ }
+ else {
+ assert(buffers);
+
+ /* Schedule rendering and wait for it to finish. */
+ device->task_add(task);
+ device->task_wait();
+
+ /* Then run denoising on the whole image at once. */
+ task.type = DeviceTask::DENOISE_BUFFER;
+ task.x = tile_manager.state.buffer.full_x;
+ task.y = tile_manager.state.buffer.full_y;
+ task.w = tile_manager.state.buffer.width;
+ task.h = tile_manager.state.buffer.height;
+ task.buffer = buffers->buffer.device_pointer;
+ task.sample = tile_manager.state.sample;
+ task.num_samples = tile_manager.state.num_samples;
+ tile_manager.state.buffer.get_offset_stride(task.offset, task.stride);
+ task.buffers = buffers;
+ }
}
device->task_add(task);
@@ -1064,6 +1196,8 @@ void Session::copy_to_display_buffer(int sample)
/* set display to new size */
display->draw_set(task.w, task.h);
+
+ last_display_time = time_dt();
}
display_outdated = false;
@@ -1141,8 +1275,11 @@ int Session::get_max_closure_count()
int max_closures = 0;
for (int i = 0; i < scene->shaders.size(); i++) {
- int num_closures = scene->shaders[i]->graph->get_num_closures();
- max_closures = max(max_closures, num_closures);
+ Shader *shader = scene->shaders[i];
+ if (shader->used) {
+ int num_closures = shader->graph->get_num_closures();
+ max_closures = max(max_closures, num_closures);
+ }
}
max_closure_global = max(max_closure_global, max_closures);
diff --git a/intern/cycles/render/session.h b/intern/cycles/render/session.h
index ec465601541..61970d87e9c 100644
--- a/intern/cycles/render/session.h
+++ b/intern/cycles/render/session.h
@@ -17,8 +17,8 @@
#ifndef __SESSION_H__
#define __SESSION_H__
-#include "render/buffers.h"
#include "device/device.h"
+#include "render/buffers.h"
#include "render/shader.h"
#include "render/stats.h"
#include "render/tile.h"
@@ -53,8 +53,10 @@ class SessionParams {
int2 tile_size;
TileOrder tile_order;
int start_resolution;
+ int denoising_start_sample;
int pixel_size;
int threads;
+ bool adaptive_sampling;
bool use_profiling;
@@ -85,8 +87,10 @@ class SessionParams {
samples = 1024;
tile_size = make_int2(64, 64);
start_resolution = INT_MAX;
+ denoising_start_sample = 0;
pixel_size = 1;
threads = 0;
+ adaptive_sampling = false;
use_profiling = false;
@@ -109,11 +113,13 @@ class SessionParams {
bool modified(const SessionParams &params)
{
return !(device == params.device && background == params.background &&
- progressive_refine == params.progressive_refine
- /* && samples == params.samples */
- && progressive == params.progressive && experimental == params.experimental &&
+ progressive_refine == params.progressive_refine &&
+ /* samples == params.samples && denoising_start_sample ==
+ params.denoising_start_sample && */
+ progressive == params.progressive && experimental == params.experimental &&
tile_size == params.tile_size && start_resolution == params.start_resolution &&
pixel_size == params.pixel_size && threads == params.threads &&
+ adaptive_sampling == params.adaptive_sampling &&
use_profiling == params.use_profiling &&
display_buffer_linear == params.display_buffer_linear &&
cancel_timeout == params.cancel_timeout && reset_timeout == params.reset_timeout &&
@@ -152,8 +158,10 @@ class Session {
bool ready_to_reset();
void reset(BufferParams &params, int samples);
- void set_samples(int samples);
void set_pause(bool pause);
+ void set_samples(int samples);
+ void set_denoising(bool denoising, bool optix_denoising);
+ void set_denoising_start_sample(int sample);
bool update_scene();
bool load_kernels(bool lock_scene = true);
@@ -178,8 +186,9 @@ class Session {
void update_status_time(bool show_pause = false, bool show_done = false);
+ void render(bool with_denoising);
void copy_to_display_buffer(int sample);
- void render();
+
void reset_(BufferParams &params, int samples);
void run_cpu();
@@ -190,7 +199,7 @@ class Session {
bool draw_gpu(BufferParams &params, DeviceDrawParams &draw_params);
void reset_gpu(BufferParams &params, int samples);
- bool acquire_tile(Device *tile_device, RenderTile &tile);
+ bool acquire_tile(RenderTile &tile, Device *tile_device, uint tile_types);
void update_tile_sample(RenderTile &tile);
void release_tile(RenderTile &tile);
@@ -213,14 +222,16 @@ class Session {
thread_mutex tile_mutex;
thread_mutex buffers_mutex;
thread_mutex display_mutex;
+ thread_condition_variable denoising_cond;
bool kernels_loaded;
DeviceRequestedFeatures loaded_kernel_features;
double reset_time;
+ double last_update_time;
+ double last_display_time;
/* progressive refine */
- double last_update_time;
bool update_progressive_refine(bool cancel);
DeviceRequestedFeatures get_requested_device_features();
diff --git a/intern/cycles/render/shader.cpp b/intern/cycles/render/shader.cpp
index 661208c6463..747fc58f81a 100644
--- a/intern/cycles/render/shader.cpp
+++ b/intern/cycles/render/shader.cpp
@@ -168,7 +168,7 @@ NODE_DEFINE(Shader)
SOCKET_ENUM(volume_sampling_method,
"Volume Sampling Method",
volume_sampling_method_enum,
- VOLUME_SAMPLING_DISTANCE);
+ VOLUME_SAMPLING_MULTIPLE_IMPORTANCE);
static NodeEnum volume_interpolation_method_enum;
volume_interpolation_method_enum.insert("linear", VOLUME_INTERPOLATION_LINEAR);
@@ -178,6 +178,8 @@ NODE_DEFINE(Shader)
volume_interpolation_method_enum,
VOLUME_INTERPOLATION_LINEAR);
+ SOCKET_FLOAT(volume_step_rate, "Volume Step Rate", 1.0f);
+
static NodeEnum displacement_method_enum;
displacement_method_enum.insert("bump", DISPLACE_BUMP);
displacement_method_enum.insert("true", DISPLACE_TRUE);
@@ -203,10 +205,11 @@ Shader::Shader() : Node(node_type)
has_bssrdf_bump = false;
has_surface_spatial_varying = false;
has_volume_spatial_varying = false;
+ has_volume_attribute_dependency = false;
has_object_dependency = false;
- has_attribute_dependency = false;
has_integrator_dependency = false;
has_volume_connected = false;
+ prev_volume_step_rate = 0.0f;
displacement_method = DISPLACE_BUMP;
@@ -214,7 +217,7 @@ Shader::Shader() : Node(node_type)
used = false;
need_update = true;
- need_update_mesh = true;
+ need_update_geometry = true;
need_sync_object = false;
}
@@ -288,7 +291,7 @@ void Shader::set_graph(ShaderGraph *graph_)
const char *new_hash = (graph_) ? graph_->displacement_hash.c_str() : "";
if (strcmp(old_hash, new_hash) != 0) {
- need_update_mesh = true;
+ need_update_geometry = true;
}
}
@@ -347,15 +350,16 @@ void Shader::tag_update(Scene *scene)
}
/* compare if the attributes changed, mesh manager will check
- * need_update_mesh, update the relevant meshes and clear it. */
+ * need_update_geometry, update the relevant meshes and clear it. */
if (attributes.modified(prev_attributes)) {
- need_update_mesh = true;
- scene->mesh_manager->need_update = true;
+ need_update_geometry = true;
+ scene->geometry_manager->need_update = true;
}
- if (has_volume != prev_has_volume) {
- scene->mesh_manager->need_flags_update = true;
+ if (has_volume != prev_has_volume || volume_step_rate != prev_volume_step_rate) {
+ scene->geometry_manager->need_flags_update = true;
scene->object_manager->need_flags_update = true;
+ prev_volume_step_rate = volume_step_rate;
}
}
@@ -415,7 +419,7 @@ ShaderManager::~ShaderManager()
{
}
-ShaderManager *ShaderManager::create(Scene *scene, int shadingsystem)
+ShaderManager *ShaderManager::create(int shadingsystem)
{
ShaderManager *manager;
@@ -431,8 +435,6 @@ ShaderManager *ShaderManager::create(Scene *scene, int shadingsystem)
manager = new SVMShaderManager();
}
- add_default(scene);
-
return manager;
}
@@ -471,8 +473,12 @@ int ShaderManager::get_shader_id(Shader *shader, bool smooth)
return id;
}
-void ShaderManager::device_update_shaders_used(Scene *scene)
+void ShaderManager::update_shaders_used(Scene *scene)
{
+ if (!need_update) {
+ return;
+ }
+
/* figure out which shaders are in use, so SVM/OSL can skip compiling them
* for speed and avoid loading image textures into memory */
uint id = 0;
@@ -489,8 +495,8 @@ void ShaderManager::device_update_shaders_used(Scene *scene)
if (scene->background->shader)
scene->background->shader->used = true;
- foreach (Mesh *mesh, scene->meshes)
- foreach (Shader *shader, mesh->used_shaders)
+ foreach (Geometry *geom, scene->geometry)
+ foreach (Shader *shader, geom->used_shaders)
shader->used = true;
foreach (Light *light, scene->lights)
@@ -531,10 +537,12 @@ void ShaderManager::device_update_common(Device *device,
/* in this case we can assume transparent surface */
if (shader->has_volume_connected && !shader->has_surface)
flag |= SD_HAS_ONLY_VOLUME;
- if (shader->heterogeneous_volume && shader->has_volume_spatial_varying)
- flag |= SD_HETEROGENEOUS_VOLUME;
- if (shader->has_attribute_dependency)
- flag |= SD_NEED_ATTRIBUTES;
+ if (shader->has_volume) {
+ if (shader->heterogeneous_volume && shader->has_volume_spatial_varying)
+ flag |= SD_HETEROGENEOUS_VOLUME;
+ }
+ if (shader->has_volume_attribute_dependency)
+ flag |= SD_NEED_VOLUME_ATTRIBUTES;
if (shader->has_bssrdf_bump)
flag |= SD_HAS_BSSRDF_BUMP;
if (device->info.has_volume_decoupled) {
@@ -623,9 +631,27 @@ void ShaderManager::add_default(Scene *scene)
Shader *shader = new Shader();
shader->name = "default_surface";
- shader->graph = graph;
+ shader->set_graph(graph);
scene->shaders.push_back(shader);
scene->default_surface = shader;
+ shader->tag_update(scene);
+ }
+
+ /* default volume */
+ {
+ ShaderGraph *graph = new ShaderGraph();
+
+ PrincipledVolumeNode *principled = new PrincipledVolumeNode();
+ graph->add(principled);
+
+ graph->connect(principled->output("Volume"), graph->output()->input("Volume"));
+
+ Shader *shader = new Shader();
+ shader->name = "default_volume";
+ shader->set_graph(graph);
+ scene->shaders.push_back(shader);
+ scene->default_volume = shader;
+ shader->tag_update(scene);
}
/* default light */
@@ -641,9 +667,10 @@ void ShaderManager::add_default(Scene *scene)
Shader *shader = new Shader();
shader->name = "default_light";
- shader->graph = graph;
+ shader->set_graph(graph);
scene->shaders.push_back(shader);
scene->default_light = shader;
+ shader->tag_update(scene);
}
/* default background */
@@ -652,9 +679,10 @@ void ShaderManager::add_default(Scene *scene)
Shader *shader = new Shader();
shader->name = "default_background";
- shader->graph = graph;
+ shader->set_graph(graph);
scene->shaders.push_back(shader);
scene->default_background = shader;
+ shader->tag_update(scene);
}
/* default empty */
@@ -663,9 +691,10 @@ void ShaderManager::add_default(Scene *scene)
Shader *shader = new Shader();
shader->name = "default_empty";
- shader->graph = graph;
+ shader->set_graph(graph);
scene->shaders.push_back(shader);
scene->default_empty = shader;
+ shader->tag_update(scene);
}
}
@@ -704,6 +733,10 @@ void ShaderManager::get_requested_features(Scene *scene,
requested_features->nodes_features = 0;
for (int i = 0; i < scene->shaders.size(); i++) {
Shader *shader = scene->shaders[i];
+ if (!shader->used) {
+ continue;
+ }
+
/* Gather requested features from all the nodes from the graph nodes. */
get_requested_graph_features(shader->graph, requested_features);
ShaderNode *output_node = shader->graph->output();
diff --git a/intern/cycles/render/shader.h b/intern/cycles/render/shader.h
index f74204df355..7801fd29276 100644
--- a/intern/cycles/render/shader.h
+++ b/intern/cycles/render/shader.h
@@ -23,8 +23,8 @@
# include <OSL/oslexec.h>
#endif
-#include "render/attribute.h"
#include "kernel/kernel_types.h"
+#include "render/attribute.h"
#include "graph/node.h"
@@ -92,10 +92,12 @@ class Shader : public Node {
bool heterogeneous_volume;
VolumeSampling volume_sampling_method;
int volume_interpolation_method;
+ float volume_step_rate;
+ float prev_volume_step_rate;
/* synchronization */
bool need_update;
- bool need_update_mesh;
+ bool need_update_geometry;
bool need_sync_object;
/* If the shader has only volume components, the surface is assumed to
@@ -118,8 +120,8 @@ class Shader : public Node {
bool has_bssrdf_bump;
bool has_surface_spatial_varying;
bool has_volume_spatial_varying;
+ bool has_volume_attribute_dependency;
bool has_object_dependency;
- bool has_attribute_dependency;
bool has_integrator_dependency;
/* displacement */
@@ -163,7 +165,7 @@ class ShaderManager {
public:
bool need_update;
- static ShaderManager *create(Scene *scene, int shadingsystem);
+ static ShaderManager *create(int shadingsystem);
virtual ~ShaderManager();
virtual void reset(Scene *scene) = 0;
@@ -180,7 +182,6 @@ class ShaderManager {
Progress &progress) = 0;
virtual void device_free(Device *device, DeviceScene *dscene, Scene *scene) = 0;
- void device_update_shaders_used(Scene *scene);
void device_update_common(Device *device, DeviceScene *dscene, Scene *scene, Progress &progress);
void device_free_common(Device *device, DeviceScene *dscene, Scene *scene);
@@ -196,6 +197,7 @@ class ShaderManager {
static void add_default(Scene *scene);
/* Selective nodes compilation. */
+ void update_shaders_used(Scene *scene);
void get_requested_features(Scene *scene, DeviceRequestedFeatures *requested_features);
static void free_memory();
diff --git a/intern/cycles/render/svm.cpp b/intern/cycles/render/svm.cpp
index 7c33f6c04ae..b4858f488c3 100644
--- a/intern/cycles/render/svm.cpp
+++ b/intern/cycles/render/svm.cpp
@@ -25,8 +25,8 @@
#include "render/shader.h"
#include "render/svm.h"
-#include "util/util_logging.h"
#include "util/util_foreach.h"
+#include "util/util_logging.h"
#include "util/util_progress.h"
#include "util/util_task.h"
@@ -85,9 +85,6 @@ void SVMShaderManager::device_update(Device *device,
/* test if we need to update */
device_free(device, dscene, scene);
- /* determine which shaders are in use */
- device_update_shaders_used(scene);
-
/* Build all shaders. */
TaskPool task_pool;
vector<array<int4>> shader_svm_nodes(num_shaders);
@@ -447,16 +444,14 @@ void SVMCompiler::generate_node(ShaderNode *node, ShaderNodeSet &done)
else if (current_type == SHADER_TYPE_VOLUME) {
if (node->has_spatial_varying())
current_shader->has_volume_spatial_varying = true;
+ if (node->has_attribute_dependency())
+ current_shader->has_volume_attribute_dependency = true;
}
if (node->has_object_dependency()) {
current_shader->has_object_dependency = true;
}
- if (node->has_attribute_dependency()) {
- current_shader->has_attribute_dependency = true;
- }
-
if (node->has_integrator_dependency()) {
current_shader->has_integrator_dependency = true;
}
@@ -867,8 +862,8 @@ void SVMCompiler::compile(Shader *shader, array<int4> &svm_nodes, int index, Sum
shader->has_displacement = false;
shader->has_surface_spatial_varying = false;
shader->has_volume_spatial_varying = false;
+ shader->has_volume_attribute_dependency = false;
shader->has_object_dependency = false;
- shader->has_attribute_dependency = false;
shader->has_integrator_dependency = false;
/* generate bump shader */
diff --git a/intern/cycles/render/tables.cpp b/intern/cycles/render/tables.cpp
index d88925939e3..270e05abe29 100644
--- a/intern/cycles/render/tables.cpp
+++ b/intern/cycles/render/tables.cpp
@@ -14,9 +14,9 @@
* limitations under the License.
*/
+#include "render/tables.h"
#include "device/device.h"
#include "render/scene.h"
-#include "render/tables.h"
#include "util/util_logging.h"
diff --git a/intern/cycles/render/tables.h b/intern/cycles/render/tables.h
index 12b59bb0aeb..3ed2959ae59 100644
--- a/intern/cycles/render/tables.h
+++ b/intern/cycles/render/tables.h
@@ -18,6 +18,7 @@
#define __TABLES_H__
#include "util/util_list.h"
+#include "util/util_vector.h"
CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/render/tile.cpp b/intern/cycles/render/tile.cpp
index 9ef0c695667..1480b6d1aab 100644
--- a/intern/cycles/render/tile.cpp
+++ b/intern/cycles/render/tile.cpp
@@ -101,6 +101,7 @@ TileManager::TileManager(bool progressive_,
tile_order = tile_order_;
start_resolution = start_resolution_;
pixel_size = pixel_size_;
+ slice_overlap = 0;
num_samples = num_samples_;
num_devices = num_devices_;
preserve_tile_device = preserve_tile_device_;
@@ -201,8 +202,7 @@ int TileManager::gen_tiles(bool sliced)
int image_h = max(1, params.height / resolution);
int2 center = make_int2(image_w / 2, image_h / 2);
- int num_logical_devices = preserve_tile_device ? num_devices : 1;
- int num = min(image_h, num_logical_devices);
+ int num = preserve_tile_device || sliced ? min(image_h, num_devices) : 1;
int slice_num = sliced ? num : 1;
int tile_w = (tile_size.x >= image_w) ? 1 : divide_up(image_w, tile_size.x);
@@ -216,7 +216,7 @@ int TileManager::gen_tiles(bool sliced)
tile_list = state.render_tiles.begin();
if (tile_order == TILE_HILBERT_SPIRAL) {
- assert(!sliced);
+ assert(!sliced && slice_overlap == 0);
int tile_h = (tile_size.y >= image_h) ? 1 : divide_up(image_h, tile_size.y);
state.tiles.resize(tile_w * tile_h);
@@ -319,6 +319,12 @@ int TileManager::gen_tiles(bool sliced)
int slice_h = (slice == slice_num - 1) ? image_h - slice * (image_h / slice_num) :
image_h / slice_num;
+ if (slice_overlap != 0) {
+ int slice_y_offset = max(slice_y - slice_overlap, 0);
+ slice_h = min(slice_y + slice_h + slice_overlap, image_h) - slice_y_offset;
+ slice_y = slice_y_offset;
+ }
+
int tile_h = (tile_size.y >= slice_h) ? 1 : divide_up(slice_h, tile_size.y);
int tiles_per_device = divide_up(tile_w * tile_h, num);
@@ -363,6 +369,7 @@ void TileManager::gen_render_tiles()
{
/* Regenerate just the render tiles for progressive render. */
foreach (Tile &tile, state.tiles) {
+ tile.state = Tile::RENDER;
state.render_tiles[tile.device].push_back(tile.index);
}
}
@@ -386,17 +393,29 @@ void TileManager::set_tiles()
int TileManager::get_neighbor_index(int index, int neighbor)
{
- static const int dx[] = {-1, 0, 1, -1, 1, -1, 0, 1, 0}, dy[] = {-1, -1, -1, 0, 0, 1, 1, 1, 0};
+ /* Neighbor indices:
+ * 0 1 2
+ * 3 4 5
+ * 6 7 8
+ */
+ static const int dx[] = {-1, 0, 1, -1, 0, 1, -1, 0, 1};
+ static const int dy[] = {-1, -1, -1, 0, 0, 0, 1, 1, 1};
int resolution = state.resolution_divider;
int image_w = max(1, params.width / resolution);
int image_h = max(1, params.height / resolution);
+
+ int num = min(image_h, num_devices);
+ int slice_num = !background ? num : 1;
+ int slice_h = image_h / slice_num;
+
int tile_w = (tile_size.x >= image_w) ? 1 : divide_up(image_w, tile_size.x);
- int tile_h = (tile_size.y >= image_h) ? 1 : divide_up(image_h, tile_size.y);
+ int tile_h = (tile_size.y >= slice_h) ? 1 : divide_up(slice_h, tile_size.y);
- int nx = state.tiles[index].x / tile_size.x + dx[neighbor],
- ny = state.tiles[index].y / tile_size.y + dy[neighbor];
- if (nx < 0 || ny < 0 || nx >= tile_w || ny >= tile_h)
+ /* Tiles in the state tile list are always indexed from left to right, top to bottom. */
+ int nx = (index % tile_w) + dx[neighbor];
+ int ny = (index / tile_w) + dy[neighbor];
+ if (nx < 0 || ny < 0 || nx >= tile_w || ny >= tile_h * slice_num)
return -1;
return ny * state.tile_stride + nx;
@@ -426,15 +445,11 @@ bool TileManager::finish_tile(int index, bool &delete_tile)
{
delete_tile = false;
- if (progressive) {
- return true;
- }
-
switch (state.tiles[index].state) {
case Tile::RENDER: {
if (!schedule_denoising) {
state.tiles[index].state = Tile::DONE;
- delete_tile = true;
+ delete_tile = !progressive;
return true;
}
state.tiles[index].state = Tile::RENDERED;
@@ -457,15 +472,18 @@ bool TileManager::finish_tile(int index, bool &delete_tile)
int nindex = get_neighbor_index(index, neighbor);
if (check_neighbor_state(nindex, Tile::DENOISED)) {
state.tiles[nindex].state = Tile::DONE;
- /* It can happen that the tile just finished denoising and already can be freed here.
- * However, in that case it still has to be written before deleting, so we can't delete
- * it yet. */
- if (neighbor == 8) {
- delete_tile = true;
- }
- else {
- delete state.tiles[nindex].buffers;
- state.tiles[nindex].buffers = NULL;
+ /* Do not delete finished tiles in progressive mode. */
+ if (!progressive) {
+ /* It can happen that the tile just finished denoising and already can be freed here.
+ * However, in that case it still has to be written before deleting, so we can't delete
+ * it yet. */
+ if (neighbor == 4) {
+ delete_tile = true;
+ }
+ else {
+ delete state.tiles[nindex].buffers;
+ state.tiles[nindex].buffers = NULL;
+ }
}
}
}
@@ -477,27 +495,65 @@ bool TileManager::finish_tile(int index, bool &delete_tile)
}
}
-bool TileManager::next_tile(Tile *&tile, int device)
+bool TileManager::next_tile(Tile *&tile, int device, uint tile_types)
{
- int logical_device = preserve_tile_device ? device : 0;
+ /* Preserve device if requested, unless this is a separate denoising device that just wants to
+ * grab any available tile. */
+ const bool preserve_device = preserve_tile_device && device < num_devices;
- if (logical_device >= state.render_tiles.size())
- return false;
+ if (tile_types & RenderTile::DENOISE) {
+ int tile_index = -1;
+ int logical_device = preserve_device ? device : 0;
- if (!state.denoising_tiles[logical_device].empty()) {
- int idx = state.denoising_tiles[logical_device].front();
- state.denoising_tiles[logical_device].pop_front();
- tile = &state.tiles[idx];
- return true;
+ while (logical_device < state.denoising_tiles.size()) {
+ if (state.denoising_tiles[logical_device].empty()) {
+ if (preserve_device) {
+ break;
+ }
+ else {
+ logical_device++;
+ continue;
+ }
+ }
+
+ tile_index = state.denoising_tiles[logical_device].front();
+ state.denoising_tiles[logical_device].pop_front();
+ break;
+ }
+
+ if (tile_index >= 0) {
+ tile = &state.tiles[tile_index];
+ return true;
+ }
}
- if (state.render_tiles[logical_device].empty())
- return false;
+ if (tile_types & RenderTile::PATH_TRACE) {
+ int tile_index = -1;
+ int logical_device = preserve_device ? device : 0;
- int idx = state.render_tiles[logical_device].front();
- state.render_tiles[logical_device].pop_front();
- tile = &state.tiles[idx];
- return true;
+ while (logical_device < state.render_tiles.size()) {
+ if (state.render_tiles[logical_device].empty()) {
+ if (preserve_device) {
+ break;
+ }
+ else {
+ logical_device++;
+ continue;
+ }
+ }
+
+ tile_index = state.render_tiles[logical_device].front();
+ state.render_tiles[logical_device].pop_front();
+ break;
+ }
+
+ if (tile_index >= 0) {
+ tile = &state.tiles[tile_index];
+ return true;
+ }
+ }
+
+ return false;
}
bool TileManager::done()
@@ -508,6 +564,16 @@ bool TileManager::done()
(state.sample + state.num_samples >= end_sample);
}
+bool TileManager::has_tiles()
+{
+ foreach (Tile &tile, state.tiles) {
+ if (tile.state != Tile::DONE) {
+ return true;
+ }
+ }
+ return false;
+}
+
bool TileManager::next()
{
if (done())
diff --git a/intern/cycles/render/tile.h b/intern/cycles/render/tile.h
index 017c1af0ead..9fb9c1ca782 100644
--- a/intern/cycles/render/tile.h
+++ b/intern/cycles/render/tile.h
@@ -89,6 +89,7 @@ class TileManager {
} state;
int num_samples;
+ int slice_overlap;
TileManager(bool progressive,
int num_samples,
@@ -105,15 +106,19 @@ class TileManager {
void reset(BufferParams &params, int num_samples);
void set_samples(int num_samples);
bool next();
- bool next_tile(Tile *&tile, int device = 0);
+ bool next_tile(Tile *&tile, int device, uint tile_types);
bool finish_tile(int index, bool &delete_tile);
bool done();
+ bool has_tiles();
void set_tile_order(TileOrder tile_order_)
{
tile_order = tile_order_;
}
+ int get_neighbor_index(int index, int neighbor);
+ bool check_neighbor_state(int index, Tile::State state);
+
/* ** Sample range rendering. ** */
/* Start sample in the range. */
@@ -160,9 +165,6 @@ class TileManager {
/* Generate tile list, return number of tiles. */
int gen_tiles(bool sliced);
void gen_render_tiles();
-
- int get_neighbor_index(int index, int neighbor);
- bool check_neighbor_state(int index, Tile::State state);
};
CCL_NAMESPACE_END
diff --git a/intern/cycles/test/CMakeLists.txt b/intern/cycles/test/CMakeLists.txt
index 98fcc8cd15e..6dcc7f7b3dd 100644
--- a/intern/cycles/test/CMakeLists.txt
+++ b/intern/cycles/test/CMakeLists.txt
@@ -82,25 +82,33 @@ list(APPEND ALL_CYCLES_LIBRARIES
${TIFF_LIBRARY}
${OPENIMAGEIO_LIBRARIES}
${OPENEXR_LIBRARIES}
+ ${OPENVDB_LIBRARIES}
)
include_directories(${INC})
-link_directories(${OPENIMAGEIO_LIBPATH}
- ${BOOST_LIBPATH}
- ${PNG_LIBPATH}
- ${JPEG_LIBPATH}
- ${ZLIB_LIBPATH}
- ${TIFF_LIBPATH}
- ${OPENEXR_LIBPATH}
- ${OPENCOLORIO_LIBPATH})
+link_directories(
+ ${OPENIMAGEIO_LIBPATH}
+ ${BOOST_LIBPATH}
+ ${PNG_LIBPATH}
+ ${JPEG_LIBPATH}
+ ${ZLIB_LIBPATH}
+ ${TIFF_LIBPATH}
+ ${OPENEXR_LIBPATH}
+ ${OPENCOLORIO_LIBPATH}
+ ${OPENVDB_LIBPATH}
+)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${PLATFORM_LINKFLAGS}")
set(CMAKE_EXE_LINKER_FLAGS_DEBUG "${CMAKE_EXE_LINKER_FLAGS_DEBUG} ${PLATFORM_LINKFLAGS_DEBUG}")
CYCLES_TEST(render_graph_finalize "${ALL_CYCLES_LIBRARIES};bf_intern_numaapi")
CYCLES_TEST(util_aligned_malloc "cycles_util")
-CYCLES_TEST(util_path "cycles_util;${BOOST_LIBRARIES};${OPENIMAGEIO_LIBRARIES}")
-CYCLES_TEST(util_string "cycles_util;${BOOST_LIBRARIES};${OPENIMAGEIO_LIBRARIES}")
-CYCLES_TEST(util_task "cycles_util;${BOOST_LIBRARIES};${OPENIMAGEIO_LIBRARIES};bf_intern_numaapi")
-CYCLES_TEST(util_time "cycles_util;${BOOST_LIBRARIES};${OPENIMAGEIO_LIBRARIES}")
+CYCLES_TEST(util_path "cycles_util;${OPENIMAGEIO_LIBRARIES};${BOOST_LIBRARIES}")
+CYCLES_TEST(util_string "cycles_util;${OPENIMAGEIO_LIBRARIES};${BOOST_LIBRARIES}")
+CYCLES_TEST(util_task "cycles_util;${OPENIMAGEIO_LIBRARIES};${BOOST_LIBRARIES};bf_intern_numaapi")
+CYCLES_TEST(util_time "cycles_util;${OPENIMAGEIO_LIBRARIES};${BOOST_LIBRARIES}")
+set_source_files_properties(util_avxf_avx_test.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
+CYCLES_TEST(util_avxf_avx "cycles_util;bf_intern_numaapi;${OPENIMAGEIO_LIBRARIES};${BOOST_LIBRARIES}")
+set_source_files_properties(util_avxf_avx2_test.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
+CYCLES_TEST(util_avxf_avx2 "cycles_util;bf_intern_numaapi;${OPENIMAGEIO_LIBRARIES};${BOOST_LIBRARIES}")
diff --git a/intern/cycles/test/render_graph_finalize_test.cpp b/intern/cycles/test/render_graph_finalize_test.cpp
index ca93f8b02d0..87389ebfb16 100644
--- a/intern/cycles/test/render_graph_finalize_test.cpp
+++ b/intern/cycles/test/render_graph_finalize_test.cpp
@@ -14,12 +14,12 @@
* limitations under the License.
*/
-#include "testing/testing.h"
#include "testing/mock_log.h"
+#include "testing/testing.h"
#include "render/graph.h"
-#include "render/scene.h"
#include "render/nodes.h"
+#include "render/scene.h"
#include "util/util_array.h"
#include "util/util_logging.h"
#include "util/util_string.h"
diff --git a/intern/cycles/test/util_avxf_avx2_test.cpp b/intern/cycles/test/util_avxf_avx2_test.cpp
new file mode 100644
index 00000000000..9b466ddd3a0
--- /dev/null
+++ b/intern/cycles/test/util_avxf_avx2_test.cpp
@@ -0,0 +1,21 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#define __KERNEL_AVX2__
+#define __KERNEL_CPU__
+
+#if defined(i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
+# include "util_avxf_test.h"
+#endif
diff --git a/intern/cycles/test/util_avxf_avx_test.cpp b/intern/cycles/test/util_avxf_avx_test.cpp
new file mode 100644
index 00000000000..cea67649b80
--- /dev/null
+++ b/intern/cycles/test/util_avxf_avx_test.cpp
@@ -0,0 +1,21 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#define __KERNEL_AVX__
+#define __KERNEL_CPU__
+
+#if defined(i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
+# include "util_avxf_test.h"
+#endif
diff --git a/intern/cycles/test/util_avxf_test.h b/intern/cycles/test/util_avxf_test.h
new file mode 100644
index 00000000000..d93563fdb3f
--- /dev/null
+++ b/intern/cycles/test/util_avxf_test.h
@@ -0,0 +1,222 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "testing/testing.h"
+#include "util/util_system.h"
+#include "util/util_types.h"
+
+CCL_NAMESPACE_BEGIN
+
+bool validate_cpu_capabilities()
+{
+
+#ifdef __KERNEL_AVX2__
+ return system_cpu_support_avx2();
+#else
+# ifdef __KERNEL_AVX__
+ return system_cpu_support_avx();
+# endif
+#endif
+}
+
+#define VALIDATECPU \
+ if (!validate_cpu_capabilities()) \
+ return;
+
+#define compare_vector_scalar(a, b) \
+ for (size_t index = 0; index < a.size; index++) \
+ EXPECT_FLOAT_EQ(a[index], b);
+
+#define compare_vector_vector(a, b) \
+ for (size_t index = 0; index < a.size; index++) \
+ EXPECT_FLOAT_EQ(a[index], b[index]);
+
+#define compare_vector_vector_near(a, b, abserror) \
+ for (size_t index = 0; index < a.size; index++) \
+ EXPECT_NEAR(a[index], b[index], abserror);
+
+#define basic_test_vv(a, b, op) \
+ VALIDATECPU \
+ avxf c = a op b; \
+ for (size_t i = 0; i < a.size; i++) \
+ EXPECT_FLOAT_EQ(c[i], a[i] op b[i]);
+
+/* vector op float tests */
+#define basic_test_vf(a, b, op) \
+ VALIDATECPU \
+ avxf c = a op b; \
+ for (size_t i = 0; i < a.size; i++) \
+ EXPECT_FLOAT_EQ(c[i], a[i] op b);
+
+const avxf avxf_a(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
+const avxf avxf_b(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
+const avxf avxf_c(1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f);
+const float float_b = 1.5f;
+
+TEST(util_avx, avxf_add_vv){basic_test_vv(avxf_a, avxf_b, +)} TEST(util_avx, avxf_sub_vv){
+ basic_test_vv(avxf_a, avxf_b, -)} TEST(util_avx, avxf_mul_vv){
+ basic_test_vv(avxf_a, avxf_b, *)} TEST(util_avx, avxf_div_vv){
+ basic_test_vv(avxf_a, avxf_b, /)} TEST(util_avx, avxf_add_vf){
+ basic_test_vf(avxf_a, float_b, +)} TEST(util_avx, avxf_sub_vf){
+ basic_test_vf(avxf_a, float_b, -)} TEST(util_avx, avxf_mul_vf){
+ basic_test_vf(avxf_a, float_b, *)} TEST(util_avx,
+ avxf_div_vf){basic_test_vf(avxf_a, float_b, /)}
+
+TEST(util_avx, avxf_ctor)
+{
+ VALIDATECPU
+ compare_vector_scalar(avxf(7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 0.0f),
+ static_cast<float>(index));
+ compare_vector_scalar(avxf(1.0f), 1.0f);
+ compare_vector_vector(avxf(1.0f, 2.0f), avxf(1.0f, 1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 2.0f));
+ compare_vector_vector(avxf(1.0f, 2.0f, 3.0f, 4.0f),
+ avxf(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f));
+ compare_vector_vector(avxf(make_float3(1.0f, 2.0f, 3.0f)),
+ avxf(0.0f, 3.0f, 2.0f, 1.0f, 0.0f, 3.0f, 2.0f, 1.0f));
+}
+
+TEST(util_avx, avxf_sqrt)
+{
+ VALIDATECPU
+ compare_vector_vector(mm256_sqrt(avxf(1.0f, 4.0f, 9.0f, 16.0f, 25.0f, 36.0f, 49.0f, 64.0f)),
+ avxf(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f));
+}
+
+TEST(util_avx, avxf_min_max)
+{
+ VALIDATECPU
+ compare_vector_vector(min(avxf_a, avxf_b), avxf_a);
+ compare_vector_vector(max(avxf_a, avxf_b), avxf_b);
+}
+
+TEST(util_avx, avxf_set_sign)
+{
+ VALIDATECPU
+ avxf res = set_sign_bit<1, 0, 0, 0, 0, 0, 0, 0>(avxf_a);
+ compare_vector_vector(res, avxf(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, -0.8f));
+}
+
+TEST(util_avx, avxf_msub)
+{
+ VALIDATECPU
+ avxf res = msub(avxf_a, avxf_b, avxf_c);
+ avxf exp = avxf((avxf_a[7] * avxf_b[7]) - avxf_c[7],
+ (avxf_a[6] * avxf_b[6]) - avxf_c[6],
+ (avxf_a[5] * avxf_b[5]) - avxf_c[5],
+ (avxf_a[4] * avxf_b[4]) - avxf_c[4],
+ (avxf_a[3] * avxf_b[3]) - avxf_c[3],
+ (avxf_a[2] * avxf_b[2]) - avxf_c[2],
+ (avxf_a[1] * avxf_b[1]) - avxf_c[1],
+ (avxf_a[0] * avxf_b[0]) - avxf_c[0]);
+ compare_vector_vector(res, exp);
+}
+
+TEST(util_avx, avxf_madd)
+{
+ VALIDATECPU
+ avxf res = madd(avxf_a, avxf_b, avxf_c);
+ avxf exp = avxf((avxf_a[7] * avxf_b[7]) + avxf_c[7],
+ (avxf_a[6] * avxf_b[6]) + avxf_c[6],
+ (avxf_a[5] * avxf_b[5]) + avxf_c[5],
+ (avxf_a[4] * avxf_b[4]) + avxf_c[4],
+ (avxf_a[3] * avxf_b[3]) + avxf_c[3],
+ (avxf_a[2] * avxf_b[2]) + avxf_c[2],
+ (avxf_a[1] * avxf_b[1]) + avxf_c[1],
+ (avxf_a[0] * avxf_b[0]) + avxf_c[0]);
+ compare_vector_vector(res, exp);
+}
+
+TEST(util_avx, avxf_nmadd)
+{
+ VALIDATECPU
+ avxf res = nmadd(avxf_a, avxf_b, avxf_c);
+ avxf exp = avxf(avxf_c[7] - (avxf_a[7] * avxf_b[7]),
+ avxf_c[6] - (avxf_a[6] * avxf_b[6]),
+ avxf_c[5] - (avxf_a[5] * avxf_b[5]),
+ avxf_c[4] - (avxf_a[4] * avxf_b[4]),
+ avxf_c[3] - (avxf_a[3] * avxf_b[3]),
+ avxf_c[2] - (avxf_a[2] * avxf_b[2]),
+ avxf_c[1] - (avxf_a[1] * avxf_b[1]),
+ avxf_c[0] - (avxf_a[0] * avxf_b[0]));
+ compare_vector_vector(res, exp);
+}
+
+TEST(util_avx, avxf_compare)
+{
+ VALIDATECPU
+ avxf a(0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f);
+ avxf b(7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 0.0f);
+ avxb res = a <= b;
+ int exp[8] = {
+ a[0] <= b[0] ? -1 : 0,
+ a[1] <= b[1] ? -1 : 0,
+ a[2] <= b[2] ? -1 : 0,
+ a[3] <= b[3] ? -1 : 0,
+ a[4] <= b[4] ? -1 : 0,
+ a[5] <= b[5] ? -1 : 0,
+ a[6] <= b[6] ? -1 : 0,
+ a[7] <= b[7] ? -1 : 0,
+ };
+ compare_vector_vector(res, exp);
+}
+
+TEST(util_avx, avxf_permute)
+{
+ VALIDATECPU
+ avxf res = permute<3, 0, 1, 7, 6, 5, 2, 4>(avxf_b);
+ compare_vector_vector(res, avxf(4.0f, 6.0f, 3.0f, 2.0f, 1.0f, 7.0f, 8.0f, 5.0f));
+}
+
+TEST(util_avx, avxf_blend)
+{
+ VALIDATECPU
+ avxf res = blend<0, 0, 1, 0, 1, 0, 1, 0>(avxf_a, avxf_b);
+ compare_vector_vector(res, avxf(0.1f, 0.2f, 3.0f, 0.4f, 5.0f, 0.6f, 7.0f, 0.8f));
+}
+
+TEST(util_avx, avxf_shuffle)
+{
+ VALIDATECPU
+ avxf res = shuffle<0, 1, 2, 3, 1, 3, 2, 0>(avxf_a);
+ compare_vector_vector(res, avxf(0.4f, 0.2f, 0.1f, 0.3f, 0.5f, 0.6f, 0.7f, 0.8f));
+}
+
+TEST(util_avx, avxf_cross)
+{
+ VALIDATECPU
+ avxf res = cross(avxf_b, avxf_c);
+ compare_vector_vector_near(res,
+ avxf(0.0f,
+ -9.5367432e-07f,
+ 0.0f,
+ 4.7683716e-07f,
+ 0.0f,
+ -3.8146973e-06f,
+ 3.8146973e-06f,
+ 3.8146973e-06f),
+ 0.000002000f);
+}
+
+TEST(util_avx, avxf_dot3)
+{
+ VALIDATECPU
+ float den, den2;
+ dot3(avxf_a, avxf_b, den, den2);
+ EXPECT_FLOAT_EQ(den, 14.9f);
+ EXPECT_FLOAT_EQ(den2, 2.9f);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/util/CMakeLists.txt b/intern/cycles/util/CMakeLists.txt
index ef100c12453..c1f71461dfd 100644
--- a/intern/cycles/util/CMakeLists.txt
+++ b/intern/cycles/util/CMakeLists.txt
@@ -102,6 +102,7 @@ set(SRC_HEADERS
util_sky_model_data.h
util_avxf.h
util_avxb.h
+ util_semaphore.h
util_sseb.h
util_ssef.h
util_ssei.h
diff --git a/intern/cycles/util/util_algorithm.h b/intern/cycles/util/util_algorithm.h
index 62093039625..63abd4e92a3 100644
--- a/intern/cycles/util/util_algorithm.h
+++ b/intern/cycles/util/util_algorithm.h
@@ -25,6 +25,7 @@ using std::max;
using std::min;
using std::remove;
using std::sort;
+using std::stable_sort;
using std::swap;
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_atomic.h b/intern/cycles/util/util_atomic.h
index a8ea1dc925e..13d177d2b25 100644
--- a/intern/cycles/util/util_atomic.h
+++ b/intern/cycles/util/util_atomic.h
@@ -77,6 +77,7 @@ ccl_device_inline float atomic_compare_and_swap_float(volatile ccl_global float
# define atomic_fetch_and_add_uint32(p, x) atomic_add((p), (x))
# define atomic_fetch_and_inc_uint32(p) atomic_inc((p))
# define atomic_fetch_and_dec_uint32(p) atomic_dec((p))
+# define atomic_fetch_and_or_uint32(p, x) atomic_or((p), (x))
# define CCL_LOCAL_MEM_FENCE CLK_LOCAL_MEM_FENCE
# define ccl_barrier(flags) barrier(flags)
@@ -91,6 +92,7 @@ ccl_device_inline float atomic_compare_and_swap_float(volatile ccl_global float
# define atomic_fetch_and_sub_uint32(p, x) atomicSub((unsigned int *)(p), (unsigned int)(x))
# define atomic_fetch_and_inc_uint32(p) atomic_fetch_and_add_uint32((p), 1)
# define atomic_fetch_and_dec_uint32(p) atomic_fetch_and_sub_uint32((p), 1)
+# define atomic_fetch_and_or_uint32(p, x) atomicOr((unsigned int *)(p), (unsigned int)(x))
ccl_device_inline float atomic_compare_and_swap_float(volatile float *dest,
const float old_val,
diff --git a/intern/cycles/util/util_boundbox.h b/intern/cycles/util/util_boundbox.h
index b5c3f1a8954..7fab7bd5a15 100644
--- a/intern/cycles/util/util_boundbox.h
+++ b/intern/cycles/util/util_boundbox.h
@@ -17,8 +17,8 @@
#ifndef __UTIL_BOUNDBOX_H__
#define __UTIL_BOUNDBOX_H__
-#include <math.h>
#include <float.h>
+#include <math.h>
#include "util/util_math.h"
#include "util/util_string.h"
diff --git a/intern/cycles/util/util_defines.h b/intern/cycles/util/util_defines.h
index b29d4163133..24a20a969ab 100644
--- a/intern/cycles/util/util_defines.h
+++ b/intern/cycles/util/util_defines.h
@@ -15,6 +15,11 @@
* limitations under the License.
*/
+/* clang-format off */
+
+/* #define __forceinline triggers a bug in some clang-format versions, disable
+ * format for entire file to keep results consistent. */
+
#ifndef __UTIL_DEFINES_H__
#define __UTIL_DEFINES_H__
diff --git a/intern/cycles/util/util_disjoint_set.h b/intern/cycles/util/util_disjoint_set.h
index 80f3c714a29..946632371d2 100644
--- a/intern/cycles/util/util_disjoint_set.h
+++ b/intern/cycles/util/util_disjoint_set.h
@@ -17,8 +17,8 @@
#ifndef __UTIL_DISJOINT_SET_H__
#define __UTIL_DISJOINT_SET_H__
-#include <utility>
#include "util_array.h"
+#include <utility>
CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/util/util_half.h b/intern/cycles/util/util_half.h
index 647e9cf2fd6..8de62893ba8 100644
--- a/intern/cycles/util/util_half.h
+++ b/intern/cycles/util/util_half.h
@@ -17,8 +17,8 @@
#ifndef __UTIL_HALF_H__
#define __UTIL_HALF_H__
-#include "util/util_types.h"
#include "util/util_math.h"
+#include "util/util_types.h"
#ifdef __KERNEL_SSE2__
# include "util/util_simd.h"
diff --git a/intern/cycles/util/util_md5.cpp b/intern/cycles/util/util_md5.cpp
index c11f495f785..0df521c2b58 100644
--- a/intern/cycles/util/util_md5.cpp
+++ b/intern/cycles/util/util_md5.cpp
@@ -26,8 +26,8 @@
#include "util_md5.h"
#include "util_path.h"
-#include <string.h>
#include <stdio.h>
+#include <string.h>
CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/util/util_path.cpp b/intern/cycles/util/util_path.cpp
index 77293c45f6b..8905c8bc7f0 100644
--- a/intern/cycles/util/util_path.cpp
+++ b/intern/cycles/util/util_path.cpp
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include "util/util_md5.h"
#include "util/util_path.h"
+#include "util/util_md5.h"
#include "util/util_string.h"
#include <OpenImageIO/filesystem.h>
@@ -36,8 +36,8 @@ OIIO_NAMESPACE_USING
# define DIR_SEP '/'
# include <dirent.h>
# include <pwd.h>
-# include <unistd.h>
# include <sys/types.h>
+# include <unistd.h>
#endif
#ifdef HAVE_SHLWAPI_H
diff --git a/intern/cycles/util/util_profiling.cpp b/intern/cycles/util/util_profiling.cpp
index bbefbadd0fe..073b09f719f 100644
--- a/intern/cycles/util/util_profiling.cpp
+++ b/intern/cycles/util/util_profiling.cpp
@@ -14,8 +14,9 @@
* limitations under the License.
*/
-#include "util/util_algorithm.h"
#include "util/util_profiling.h"
+#include "util/util_algorithm.h"
+#include "util/util_foreach.h"
#include "util/util_set.h"
CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/util/util_profiling.h b/intern/cycles/util/util_profiling.h
index f5f500239f2..ceec08ed894 100644
--- a/intern/cycles/util/util_profiling.h
+++ b/intern/cycles/util/util_profiling.h
@@ -19,7 +19,6 @@
#include <atomic>
-#include "util/util_foreach.h"
#include "util/util_map.h"
#include "util/util_thread.h"
#include "util/util_vector.h"
diff --git a/intern/cycles/util/util_progress.h b/intern/cycles/util/util_progress.h
index 379beaeeefa..26534a29dfe 100644
--- a/intern/cycles/util/util_progress.h
+++ b/intern/cycles/util/util_progress.h
@@ -25,8 +25,8 @@
#include "util/util_function.h"
#include "util/util_string.h"
-#include "util/util_time.h"
#include "util/util_thread.h"
+#include "util/util_time.h"
CCL_NAMESPACE_BEGIN
@@ -204,6 +204,8 @@ class Progress {
float get_progress()
{
+ thread_scoped_lock lock(progress_mutex);
+
if (total_pixel_samples > 0) {
return ((float)pixel_samples) / total_pixel_samples;
}
diff --git a/intern/cycles/util/util_semaphore.h b/intern/cycles/util/util_semaphore.h
new file mode 100644
index 00000000000..d995b0732b8
--- /dev/null
+++ b/intern/cycles/util/util_semaphore.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2011-2020 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_SEMAPHORE_H__
+#define __UTIL_SEMAPHORE_H__
+
+#include "util/util_thread.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Counting Semaphore
+ *
+ * To restrict concurrent access to a resource to a specified number
+ * of threads. Similar to std::counting_semaphore from C++20. */
+
+class thread_counting_semaphore {
+ public:
+ explicit thread_counting_semaphore(const int count) : count(count)
+ {
+ }
+
+ thread_counting_semaphore(const thread_counting_semaphore &) = delete;
+
+ void acquire()
+ {
+ thread_scoped_lock lock(mutex);
+ while (count == 0) {
+ condition.wait(lock);
+ }
+ count--;
+ }
+
+ void release()
+ {
+ thread_scoped_lock lock(mutex);
+ count++;
+ condition.notify_one();
+ }
+
+ protected:
+ thread_mutex mutex;
+ thread_condition_variable condition;
+ int count;
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_SEMAPHORE_H__ */
diff --git a/intern/cycles/util/util_sky_model.cpp b/intern/cycles/util/util_sky_model.cpp
index 4a6a9f32607..8cdad8a90a4 100644
--- a/intern/cycles/util/util_sky_model.cpp
+++ b/intern/cycles/util/util_sky_model.cpp
@@ -101,9 +101,9 @@ All instructions on how to use this code are in the accompanying header file.
#include "util/util_sky_model_data.h"
#include <assert.h>
+#include <math.h>
#include <stdio.h>
#include <stdlib.h>
-#include <math.h>
CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/util/util_ssef.h b/intern/cycles/util/util_ssef.h
index fa525daf37c..e9f0efb4efb 100644
--- a/intern/cycles/util/util_ssef.h
+++ b/intern/cycles/util/util_ssef.h
@@ -18,6 +18,8 @@
#ifndef __UTIL_SSEF_H__
#define __UTIL_SSEF_H__
+#include "util_ssei.h"
+
CCL_NAMESPACE_BEGIN
#ifdef __KERNEL_SSE2__
diff --git a/intern/cycles/util/util_static_assert.h b/intern/cycles/util/util_static_assert.h
index ceb52830319..d809f2e06d7 100644
--- a/intern/cycles/util/util_static_assert.h
+++ b/intern/cycles/util/util_static_assert.h
@@ -14,34 +14,20 @@
* limitations under the License.
*/
+/* clang-format off */
+
+/* #define static_assert triggers a bug in some clang-format versions, disable
+ * format for entire file to keep results consistent. */
+
#ifndef __UTIL_STATIC_ASSERT_H__
#define __UTIL_STATIC_ASSERT_H__
CCL_NAMESPACE_BEGIN
-/* TODO(sergey): In theory CUDA might work with own static assert
- * implementation since it's just pure C++.
- */
-#ifdef __KERNEL_GPU__
-# ifndef static_assert
-# define static_assert(statement, message)
-# endif
-#endif /* __KERNEL_GPU__ */
-
-/* TODO(sergey): For until C++11 is a bare minimum for us,
- * we do a bit of a trickery to show meaningful message so
- * it's more or less clear what's wrong when building without
- * C++11.
- *
- * The thing here is: our non-C++11 implementation doesn't
- * have a way to print any message after preprocessor
- * substitution so we rely on the message which is passed to
- * static_assert() since that's the only message visible when
- * compilation fails.
- *
- * After C++11 bump it should be possible to glue structure
- * name to the error message,
- */
+#if defined(__KERNEL_OPENCL__) || defined(CYCLES_CUBIN_CC)
+# define static_assert(statement, message)
+#endif /* __KERNEL_OPENCL__ */
+
#define static_assert_align(st, align) \
static_assert((sizeof(st) % (align) == 0), "Structure must be strictly aligned") // NOLINT
diff --git a/intern/cycles/util/util_string.h b/intern/cycles/util/util_string.h
index f71145741c9..ce2d4acdde4 100644
--- a/intern/cycles/util/util_string.h
+++ b/intern/cycles/util/util_string.h
@@ -17,9 +17,9 @@
#ifndef __UTIL_STRING_H__
#define __UTIL_STRING_H__
+#include <sstream>
#include <string.h>
#include <string>
-#include <sstream>
#include "util/util_vector.h"
diff --git a/intern/cycles/util/util_system.cpp b/intern/cycles/util/util_system.cpp
index f700f9bd277..6d32153209a 100644
--- a/intern/cycles/util/util_system.cpp
+++ b/intern/cycles/util/util_system.cpp
@@ -17,8 +17,8 @@
#include "util/util_system.h"
#include "util/util_logging.h"
-#include "util/util_types.h"
#include "util/util_string.h"
+#include "util/util_types.h"
#include <numaapi.h>
@@ -35,8 +35,8 @@ OIIO_NAMESPACE_USING
# include <sys/sysctl.h>
# include <sys/types.h>
#else
-# include <unistd.h>
# include <sys/ioctl.h>
+# include <unistd.h>
#endif
CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/util/util_task.cpp b/intern/cycles/util/util_task.cpp
index 24286116dfb..61aa28c6815 100644
--- a/intern/cycles/util/util_task.cpp
+++ b/intern/cycles/util/util_task.cpp
@@ -14,10 +14,10 @@
* limitations under the License.
*/
+#include "util/util_task.h"
#include "util/util_foreach.h"
#include "util/util_logging.h"
#include "util/util_system.h"
-#include "util/util_task.h"
#include "util/util_time.h"
//#define THREADING_DEBUG_ENABLED
diff --git a/intern/cycles/util/util_texture.h b/intern/cycles/util/util_texture.h
index d43852480d1..863c2ea3124 100644
--- a/intern/cycles/util/util_texture.h
+++ b/intern/cycles/util/util_texture.h
@@ -17,6 +17,8 @@
#ifndef __UTIL_TEXTURE_H__
#define __UTIL_TEXTURE_H__
+#include "util_transform.h"
+
CCL_NAMESPACE_BEGIN
/* Texture limits on devices. */
@@ -91,12 +93,17 @@ typedef enum ExtensionType {
typedef struct TextureInfo {
/* Pointer, offset or texture depending on device. */
uint64_t data;
+ /* Data Type */
+ uint data_type;
/* Buffer number for OpenCL. */
uint cl_buffer;
/* Interpolation and extension type. */
uint interpolation, extension;
/* Dimensions. */
uint width, height, depth;
+ /* Transform for 3D textures. */
+ uint use_transform_3d;
+ Transform transform_3d;
} TextureInfo;
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_thread.h b/intern/cycles/util/util_thread.h
index 18ec5b32144..f6dbc9186b8 100644
--- a/intern/cycles/util/util_thread.h
+++ b/intern/cycles/util/util_thread.h
@@ -17,11 +17,11 @@
#ifndef __UTIL_THREAD_H__
#define __UTIL_THREAD_H__
-#include <thread>
-#include <mutex>
#include <condition_variable>
#include <functional>
+#include <mutex>
#include <queue>
+#include <thread>
#ifdef _WIN32
# include "util_windows.h"
diff --git a/intern/cycles/util/util_transform.cpp b/intern/cycles/util/util_transform.cpp
index 302a8a386ac..101122740d7 100644
--- a/intern/cycles/util/util_transform.cpp
+++ b/intern/cycles/util/util_transform.cpp
@@ -46,8 +46,8 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#include "util/util_projection.h"
#include "util/util_transform.h"
+#include "util/util_projection.h"
#include "util/util_boundbox.h"
#include "util/util_math.h"
diff --git a/intern/cycles/util/util_transform.h b/intern/cycles/util/util_transform.h
index 407654245cb..d0a6264d5cf 100644
--- a/intern/cycles/util/util_transform.h
+++ b/intern/cycles/util/util_transform.h
@@ -344,10 +344,10 @@ ccl_device_inline Transform transform_empty()
ccl_device_inline float4 quat_interpolate(float4 q1, float4 q2, float t)
{
- /* use simpe nlerp instead of slerp. it's faster and almost the same */
+ /* Optix is using lerp to interpolate motion transformations. */
+#ifdef __KERNEL_OPTIX__
return normalize((1.0f - t) * q1 + t * q2);
-
-#if 0
+#else /* __KERNEL_OPTIX__ */
/* note: this does not ensure rotation around shortest angle, q1 and q2
* are assumed to be matched already in transform_motion_decompose */
float costheta = dot(q1, q2);
@@ -365,7 +365,7 @@ ccl_device_inline float4 quat_interpolate(float4 q1, float4 q2, float t)
float thetap = theta * t;
return q1 * cosf(thetap) + qperp * sinf(thetap);
}
-#endif
+#endif /* __KERNEL_OPTIX__ */
}
ccl_device_inline Transform transform_quick_inverse(Transform M)
@@ -468,29 +468,6 @@ ccl_device void transform_motion_array_interpolate(Transform *tfm,
#ifndef __KERNEL_GPU__
-# ifdef WITH_EMBREE
-ccl_device void transform_motion_array_interpolate_straight(
- Transform *tfm, const ccl_global DecomposedTransform *motion, uint numsteps, float time)
-{
- /* Figure out which steps we need to interpolate. */
- int maxstep = numsteps - 1;
- int step = min((int)(time * maxstep), maxstep - 1);
- float t = time * maxstep - step;
-
- const ccl_global DecomposedTransform *a = motion + step;
- const ccl_global DecomposedTransform *b = motion + step + 1;
- Transform step1, step2;
-
- transform_compose(&step1, a);
- transform_compose(&step2, b);
-
- /* matrix lerp */
- tfm->x = (1.0f - t) * step1.x + t * step2.x;
- tfm->y = (1.0f - t) * step1.y + t * step2.y;
- tfm->z = (1.0f - t) * step1.z + t * step2.z;
-}
-# endif
-
class BoundBox2D;
ccl_device_inline bool operator==(const DecomposedTransform &A, const DecomposedTransform &B)
diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h
index 48e9983ac8f..f6535848480 100644
--- a/intern/cycles/util/util_types.h
+++ b/intern/cycles/util/util_types.h
@@ -101,6 +101,11 @@ ccl_device_inline size_t round_down(size_t x, size_t multiple)
return (x / multiple) * multiple;
}
+ccl_device_inline bool is_power_of_two(size_t x)
+{
+ return (x & (x - 1)) == 0;
+}
+
CCL_NAMESPACE_END
/* Vectorized types declaration. */
@@ -148,8 +153,8 @@ CCL_NAMESPACE_END
/* SSE types. */
#ifndef __KERNEL_GPU__
# include "util/util_sseb.h"
-# include "util/util_ssei.h"
# include "util/util_ssef.h"
+# include "util/util_ssei.h"
# if defined(__KERNEL_AVX__) || defined(__KERNEL_AVX2__)
# include "util/util_avxb.h"
# include "util/util_avxf.h"
diff --git a/intern/cycles/util/util_version.h b/intern/cycles/util/util_version.h
index 38829d3a29c..bb2c99cc6d7 100644
--- a/intern/cycles/util/util_version.h
+++ b/intern/cycles/util/util_version.h
@@ -22,7 +22,7 @@
CCL_NAMESPACE_BEGIN
#define CYCLES_VERSION_MAJOR 1
-#define CYCLES_VERSION_MINOR 9
+#define CYCLES_VERSION_MINOR 12
#define CYCLES_VERSION_PATCH 0
#define CYCLES_MAKE_VERSION_STRING2(a, b, c) #a "." #b "." #c
diff --git a/intern/cycles/util/util_view.cpp b/intern/cycles/util/util_view.cpp
index f23174fd6dc..9d9ff451b3b 100644
--- a/intern/cycles/util/util_view.cpp
+++ b/intern/cycles/util/util_view.cpp
@@ -134,7 +134,7 @@ static void view_display()
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
- gluOrtho2D(0, V.width, 0, V.height);
+ glOrtho(0, V.width, 0, V.height, -1, 1);
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();